about summary refs log tree commit diff
diff options
context:
space:
mode:
authorWill Schmidt <will_schmidt@vnet.ibm.com>2012-04-18 14:52:25 -0500
committerRyan S. Arnold <rsa@linux.vnet.ibm.com>2012-04-18 14:52:25 -0500
commitb282631e367f474bbd9e5d92e2aa54b3ce44efe8 (patch)
tree8baaa0d7b178a0679aa1c1f2a37cc8508c20cd7d
parent95aa737cf9fd7f0c31de62950b253861b863dc50 (diff)
downloadglibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.gz
glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.tar.xz
glibc-b282631e367f474bbd9e5d92e2aa54b3ce44efe8.zip
Simplify power6 wordcopy by adding [fwd|bwd]_align_merge macros.
-rw-r--r--ChangeLog8
-rw-r--r--sysdeps/powerpc/powerpc32/power6/wordcopy.c117
-rw-r--r--sysdeps/powerpc/powerpc64/power6/wordcopy.c234
3 files changed, 87 insertions, 272 deletions
diff --git a/ChangeLog b/ChangeLog
index e56e7e704a..cdd29819f3 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,11 @@
+2012-04-18  Will Schmidt  <will_schmidt@vnet.ibm.com>
+
+	* sysdeps/powerpc/powerpc64/power6/wordcopy.c: Add fwd_align_merge and
+	bwd_align_merge macros.
+	(_wordcopy_fwd_dest_aligned): Use fwd_align_merge macro calls.
+	(_wordcopy_bwd_dest_aligned): Use bwd_align_merge macro calls.
+	* sysdeps/powerpc/powerpc32/power6/wordcopy.c: Likewise.
+
 2012-04-18  David S. Miller  <davem@davemloft.net>
 
 	* sysdeps/sparc/sparc64/memcopy.h: Delete.
diff --git a/sysdeps/powerpc/powerpc32/power6/wordcopy.c b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
index 2594b1d578..d0df56f22c 100644
--- a/sysdeps/powerpc/powerpc32/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc32/power6/wordcopy.c
@@ -1,5 +1,5 @@
 /* _memcopy.c -- subroutines for memory copy functions.
-   Copyright (C) 1991, 1996, 2006 Free Software Foundation, Inc.
+   Copyright (C) 1991-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Torbjorn Granlund (tege@sics.se).
    Updated for POWER6 by Steven Munroe (sjmunroe@us.ibm.com).
@@ -65,6 +65,20 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
    DSTP should be aligned for memory operations on `op_t's, but SRCP must
    *not* be aligned.  */
 
+#define fwd_align_merge(align)                                         \
+  do                                                                   \
+    {                                                                  \
+      a1 = ((op_t *) srcp)[1];                                         \
+      a2 = ((op_t *) srcp)[2];                                         \
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));      \
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));      \
+      a0 = a2;                                                         \
+      srcp += 2 * OPSIZ;                                               \
+      dstp += 2 * OPSIZ;                                               \
+      len -= 2;                                                                \
+    }                                                                  \
+  while (len != 0)
+
 void
 _wordcopy_fwd_dest_aligned (dstp, srcp, len)
      long int dstp;
@@ -104,49 +118,13 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (3);
       break;
     }
 
@@ -191,6 +169,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define bwd_align_merge(align)                                         \
+  do                                                                   \
+    {                                                                  \
+      srcp -= 2 * OPSIZ;                                               \
+      dstp -= 2 * OPSIZ;                                               \
+      a1 = ((op_t *) srcp)[1];                                         \
+      a0 = ((op_t *) srcp)[0];                                         \
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (32-align*8));      \
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (32-align*8));      \
+      a2 = a0;                                                         \
+      len -= 2;                                                                \
+    }                                                                  \
+  while (len != 0)
+
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'
    words (not LEN bytes!).  DSTP should be aligned for memory
@@ -235,52 +227,13 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (32-8));
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (32-8));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (32-16));
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (32-16));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (32-24));
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (32-24));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (3);
       break;
     }
 }
diff --git a/sysdeps/powerpc/powerpc64/power6/wordcopy.c b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
index 184904db5f..b18b97faab 100644
--- a/sysdeps/powerpc/powerpc64/power6/wordcopy.c
+++ b/sysdeps/powerpc/powerpc64/power6/wordcopy.c
@@ -1,5 +1,5 @@
 /* _memcopy.c -- subroutines for memory copy functions.
-   Copyright (C) 1991, 1996 Free Software Foundation, Inc.
+   Copyright (C) 1991-2012 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Torbjorn Granlund (tege@sics.se).
 
@@ -59,6 +59,21 @@ _wordcopy_fwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define fwd_align_merge(align)                                         \
+  do                                                                   \
+    {                                                                  \
+      a1 = ((op_t *) srcp)[1];                                         \
+      a2 = ((op_t *) srcp)[2];                                         \
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));      \
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));      \
+      a0 = a2;                                                         \
+      srcp += 2 * OPSIZ;                                               \
+      dstp += 2 * OPSIZ;                                               \
+      len -= 2;                                                                \
+    }                                                                  \
+  while (len != 0)
+
+
 /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to
    block beginning at DSTP with LEN `op_t' words (not LEN bytes!).
    DSTP should be aligned for memory operations on `op_t's, but SRCP must
@@ -103,109 +118,25 @@ _wordcopy_fwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (3);
       break;
     case 4:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
-          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (4);
       break;
     case 5:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
-          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (5);
       break;
     case 6:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
-          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (6);
       break;
     case 7:
-      do
-        {
-          a1 = ((op_t *) srcp)[1];
-          a2 = ((op_t *) srcp)[2];
-          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
-          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
-          a0 = a2;
-    
-          srcp += 2 * OPSIZ;
-          dstp += 2 * OPSIZ;
-          len -= 2;
-        }
-      while (len != 0);
+      fwd_align_merge (7);
       break;
     }
 
@@ -250,6 +181,20 @@ _wordcopy_bwd_aligned (dstp, srcp, len)
   while (len != 0);
 }
 
+#define bwd_align_merge(align)                                         \
+  do                                                                   \
+    {                                                                  \
+      srcp -= 2 * OPSIZ;                                               \
+      dstp -= 2 * OPSIZ;                                               \
+      a1 = ((op_t *) srcp)[1];                                         \
+      a0 = ((op_t *) srcp)[0];                                         \
+      ((op_t *) dstp)[1] = MERGE (a1, align*8, a2, (64-align*8));      \
+      ((op_t *) dstp)[0] = MERGE (a0, align*8, a1, (64-align*8));      \
+      a2 = a0;                                                         \
+      len -= 2;                                                                \
+    }                                                                  \
+  while (len != 0)
+
 /* _wordcopy_bwd_dest_aligned -- Copy block finishing right
    before SRCP to block finishing right before DSTP with LEN `op_t'
    words (not LEN bytes!).  DSTP should be aligned for memory
@@ -294,116 +239,25 @@ _wordcopy_bwd_dest_aligned (dstp, srcp, len)
   switch (align)
     {
     case 1:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 8, a2, (64-8));
-          ((op_t *) dstp)[0] = MERGE (a0, 8, a1, (64-8));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (1);
       break;
     case 2:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 16, a2, (64-16));
-          ((op_t *) dstp)[0] = MERGE (a0, 16, a1, (64-16));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (2);
       break;
     case 3:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 24, a2, (64-24));
-          ((op_t *) dstp)[0] = MERGE (a0, 24, a1, (64-24));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (3);
       break;
     case 4:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 32, a2, (64-32));
-          ((op_t *) dstp)[0] = MERGE (a0, 32, a1, (64-32));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (4);
       break;
     case 5:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 40, a2, (64-40));
-          ((op_t *) dstp)[0] = MERGE (a0, 40, a1, (64-40));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (5);
       break;
     case 6:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 48, a2, (64-48));
-          ((op_t *) dstp)[0] = MERGE (a0, 48, a1, (64-48));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (6);
       break;
     case 7:
-      do
-        {
-          srcp -= 2 * OPSIZ;
-          dstp -= 2 * OPSIZ;
-    
-          a1 = ((op_t *) srcp)[1];
-          a0 = ((op_t *) srcp)[0];
-          ((op_t *) dstp)[1] = MERGE (a1, 56, a2, (64-56));
-          ((op_t *) dstp)[0] = MERGE (a0, 56, a1, (64-56));
-          a2 = a0;
-    
-          len -= 2;
-        }
-      while (len != 0);
+      bwd_align_merge (7);
       break;
     }
 }