diff options
author | Will Schmidt <will_schmidt@vnet.ibm.com> | 2012-04-18 14:52:25 -0500 |
---|---|---|
committer | Ryan S. Arnold <rsa@linux.vnet.ibm.com> | 2012-04-18 14:52:25 -0500 |
commit | b282631e367f474bbd9e5d92e2aa54b3ce44efe8 (patch) | |
tree | 8baaa0d7b178a0679aa1c1f2a37cc8508c20cd7d | |
parent | 95aa737cf9fd7f0c31de62950b253861b863dc50 (diff) | |
download | glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.gz glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.xz glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.zip |
Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros.
-rw-r--r-- | ChangeLog | 8 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc32/power6/wordcopy.c | 117 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power6/wordcopy.c | 234 |
3 files changed, 87 insertions, 272 deletions
diff --git a/ChangeLog b/ChangeLog index e56e7e704a..cdd29819f3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2012-04-18 Will Schmidt <will_schmidt@vnet.ibm.com> + + * sysdeps/powerpc/powerpc64/power6/wordcopy.c: Add fwd_align_merge and + bwd_align_merge macros. + (_wordcopy_fwd_dest_aligned): Use fwd_align_merge macro calls. + (_wordcopy_bwd_dest_aligned): Use bwd_align_merge macro calls. + * sysdeps/powerpc/powerpc32/power6/wordcopy.c: Likewise. + 2012-04-18 David S. Miller <davem@davemloft.net> * sysdeps/sparc/sparc64/memcopy.h: Delete. diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c index 2594b1d578..d0df56f22c 100644 --- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c +++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c @@ -1,5 +1,5 @@ /* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc. + Copyright (C) 1991-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Torbjorn Granlund (tege@sics.se). Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com). @@ -65,6 +65,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len) DSTP should be aligned for memory operations on `op_t's, but SRCP must *not* be aligned. */ +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + void _wordcopy_fwd_dest_aligned (dstp, srcp, len) long int dstp; @@ -104,49 +118,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (1); break; case 2: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (2); break; case 3: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (3); break; } @@ -191,6 +169,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len) while (len != 0); } +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + /* _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory @@ -235,52 +227,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8)); - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (1); break; case 2: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16)); - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (2); break; case 3: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24)); - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (3); break; } } diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c index 184904db5f..b18b97faab 100644 --- a/sysdeps/powerpc/powerpc64/power6/wordcopy.c +++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c @@ -1,5 +1,5 @@ /* _memcopy.c -- subroutines for memory copy functions. - Copyright (C) 1991, 1996 Free Software Foundation, Inc. + Copyright (C) 1991-2012 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Torbjorn Granlund (tege@sics.se). @@ -59,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len) while (len != 0); } +#define fwd_align_merge(align) \ + do \ + { \ + a1 = ((op_t *) srcp)[1]; \ + a2 = ((op_t *) srcp)[2]; \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + a0 = a2; \ + srcp += 2 * OPSIZ; \ + dstp += 2 * OPSIZ; \ + len -= 2; \ + } \ + while (len != 0) + + /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to block beginning at DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory operations on `op_t's, but SRCP must @@ -103,109 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8)); - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (1); break; case 2: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16)); - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (2); break; case 3: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24)); - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (3); break; case 4: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32)); - ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (4); break; case 5: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40)); - ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (5); break; case 6: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48)); - ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (6); break; case 7: - do - { - a1 = ((op_t *) srcp)[1]; - a2 = ((op_t *) srcp)[2]; - ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56)); - ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56)); - a0 = a2; - - srcp += 2 * OPSIZ; - dstp += 2 * OPSIZ; - len -= 2; - } - while (len != 0); + fwd_align_merge (7); break; } @@ -250,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len) while (len != 0); } +#define bwd_align_merge(align) \ + do \ + { \ + srcp -= 2 * OPSIZ; \ + dstp -= 2 * OPSIZ; \ + a1 = ((op_t *) srcp)[1]; \ + a0 = ((op_t *) srcp)[0]; \ + ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8)); \ + ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8)); \ + a2 = a0; \ + len -= 2; \ + } \ + while (len != 0) + /* _wordcopy_bwd_dest_aligned -- Copy block finishing right before SRCP to block finishing right before DSTP with LEN `op_t' words (not LEN bytes!). DSTP should be aligned for memory @@ -294,116 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len) switch (align) { case 1: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8)); - ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (1); break; case 2: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16)); - ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (2); break; case 3: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24)); - ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (3); break; case 4: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32)); - ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (4); break; case 5: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40)); - ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (5); break; case 6: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48)); - ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (6); break; case 7: - do - { - srcp -= 2 * OPSIZ; - dstp -= 2 * OPSIZ; - - a1 = ((op_t *) srcp)[1]; - a0 = ((op_t *) srcp)[0]; - ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56)); - ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56)); - a2 = a0; - - len -= 2; - } - while (len != 0); + bwd_align_merge (7); break; } } |