From b282631e367f474bbd9e5d92e2aa54b3ce44efe8 Mon Sep 17 00:00:00 2001 From: Will Schmidt Date: Wed, 18 Apr 2012 14:52:25 -0500 Subject: Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros. --- sysdeps/powerpc/powerpc64/power6/wordcopy.c | 234 ++++++---------------------- 1 file changed, 44 insertions(+), 190 deletions(-) (limited to 'sysdeps/powerpc/powerpc64/power6') diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c index 184904db5f..b18b97faab 100644 --- a/sysdeps/powerpc/powerpc64/power6/wordcopy.c +++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c @@ -1,5 +1,5 @@ /* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996 Free Software Foundation, Inc. + Copyright (C) 1991-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Torbjorn Granlund (tege@sics.se). @@ -59,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len) while (len != 0); } +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + + /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory operations on `op_t's, but SRCP must @@ -103,109 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8)); - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (1); break; case 2: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16)); - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (2); break; case 3: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24)); - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (3); break; case 4: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32)); - ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (4); break; case 5: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40)); - ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (5); break; case 6: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48)); - ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (6); break; case 7: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56)); - ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (7); break; } @@ -250,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len) while (len != 0); } +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + /* _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory @@ -294,116 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8)); - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (1); break; case 2: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16)); - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (2); break; case 3: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24)); - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (3); break; case 4: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32)); - ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (4); break; case 5: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40)); - ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (5); break; case 6: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48)); - ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (6); break; case 7: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56)); - ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (7); break; } } -- cgit 1.4.1