diff options
Diffstat (limited to 'sysdeps/powerpc/powerpc64/power4')
-rw-r--r-- | sysdeps/powerpc/powerpc64/power4/fpu/mpa.c | 6 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c | 2 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power4/memcmp.S | 26 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power4/memcpy.S | 18 | ||||
-rw-r--r-- | sysdeps/powerpc/powerpc64/power4/strncmp.S | 2 |
5 files changed, 27 insertions, 27 deletions
diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c index f167969ea3..b6f8341afa 100644 --- a/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c +++ b/sysdeps/powerpc/powerpc64/power4/fpu/mpa.c @@ -409,9 +409,9 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { if (k > p2) {i1=k-p2; i2=p2+1; } else {i1=1; i2=k; } #if 1 - /* rearange this inner loop to allow the fmadd instructions to be + /* rearrange this inner loop to allow the fmadd instructions to be independent and execute in parallel on processors that have - dual symetrical FP pipelines. */ + dual symmetrical FP pipelines. */ if (i1 < (i2-1)) { /* make sure we have at least 2 iterations */ @@ -437,7 +437,7 @@ void __mul(const mp_no *x, const mp_no *y, mp_no *z, int p) { zk += x->d[i1]*y->d[i1]; } #else - /* The orginal code. */ + /* The original code. */ for (i=i1,j=i2-1; i<i2; i++,j--) zk += X[i]*Y[j]; #endif diff --git a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c index 098e19a5f0..7c97d95817 100644 --- a/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c +++ b/sysdeps/powerpc/powerpc64/power4/fpu/slowpow.c @@ -59,7 +59,7 @@ __slowpow (double x, double y, double z) res1 = (double) (ldpp - ldeps); if (res != res1) /* if result still not accurate enough */ - { /* use mpa for higher persision. */ + { /* use mpa for higher precision. */ mp_no mpx, mpy, mpz, mpw, mpp, mpr, mpr1; static const mp_no eps = { -3, {1.0, 4.0} }; int p; diff --git a/sysdeps/powerpc/powerpc64/power4/memcmp.S b/sysdeps/powerpc/powerpc64/power4/memcmp.S index 65922813bd..7df52f810b 100644 --- a/sysdeps/powerpc/powerpc64/power4/memcmp.S +++ b/sysdeps/powerpc/powerpc64/power4/memcmp.S @@ -53,7 +53,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) beq- cr6, L(zeroLength) dcbt 0,rSTR1 dcbt 0,rSTR2 -/* If less than 8 bytes or not aligned, use the unalligned +/* If less than 8 bytes or not aligned, use the unaligned byte loop. */ blt cr1, L(bytealigned) std rWORD8,-8(r1) @@ -62,7 +62,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) cfi_offset(rWORD7,-16) bne L(unaligned) /* At this point we know both strings have the same alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then we are already double word aligned and can perform the DWaligned loop. @@ -70,7 +70,7 @@ EALIGN (BP_SYM(memcmp), 4, 0) Otherwise we know the two strings have the same alignment (but not yet DW). So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -152,8 +152,8 @@ L(DWaligned): L(dP1): mtctr rTMP /* Power4 wants mtctr 1st in dispatch group */ /* Normally we'd use rWORD7/rWORD8 here, but since we might exit early - (8-15 byte compare), we want to use only volitile registers. This - means we can avoid restoring non-volitile registers since we did not + (8-15 byte compare), we want to use only volatile registers. This + means we can avoid restoring non-volatile registers since we did not change any on the early exit path. The key here is the non-early exit path only cares about the condition code (cr5), not about which register pair was used. */ @@ -215,7 +215,7 @@ L(dP2e): bne cr5, L(dLcr5) b L(dLoop2) /* Again we are on a early exit path (16-23 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP2x): @@ -256,7 +256,7 @@ L(dP3e): bne cr6, L(dLcr6) b L(dLoop1) /* Again we are on a early exit path (24-31 byte compare), we want to - only use volitile registers and avoid restoring non-volitile + only use volatile registers and avoid restoring non-volatile registers. */ .align 4 L(dP3x): @@ -340,7 +340,7 @@ L(d04): beq L(zeroLength) /* At this point we have a remainder of 1 to 7 bytes to compare. Since we are aligned it is safe to load the whole double word, and use - shift right double to elliminate bits beyond the compare length. */ + shift right double to eliminate bits beyond the compare length. */ L(d00): ld rWORD1, 8(rSTR1) ld rWORD2, 8(rSTR2) @@ -496,15 +496,15 @@ L(zeroLength): .align 4 /* At this point we know the strings have different alignment and the - compare length is at least 8 bytes. rBITDIF containes the low order + compare length is at least 8 bytes. rBITDIF contains the low order 3 bits of rSTR1 and cr5 contains the result of the logical compare of rBITDIF to 0. If rBITDIF == 0 then rStr1 is double word aligned and can perform the DWunaligned loop. - Otherwise we know that rSTR1 is not aready DW aligned yet. + Otherwise we know that rSTR1 is not already DW aligned yet. So we can force the string addresses to the next lower DW boundary and special case this first DW word using shift left to - ellimiate bits preceeding the first byte. Since we want to join the + eliminate bits preceding the first byte. Since we want to join the normal (DWaligned) compare loop, starting at the second double word, we need to adjust the length (rN) and special case the loop versioning for the first DW. This insures that the loop count is @@ -537,7 +537,7 @@ L(unaligned): clrrdi rSTR2, rSTR2, 3 std r26,-48(r1) cfi_offset(r26,-48) -/* Compute the leaft/right shift counts for the unalign rSTR2, +/* Compute the left/right shift counts for the unalign rSTR2, compensating for the logical (DW aligned) start of rSTR1. */ clrldi rSHL, r27, 61 clrrdi rSTR1, rSTR1, 3 @@ -876,7 +876,7 @@ L(du14): sldi. rN, rN, 3 bne cr5, L(duLcr5) /* At this point we have a remainder of 1 to 7 bytes to compare. We use - shift right double to elliminate bits beyond the compare length. + shift right double to eliminate bits beyond the compare length. This allows the use of double word subtract to compute the final result. diff --git a/sysdeps/powerpc/powerpc64/power4/memcpy.S b/sysdeps/powerpc/powerpc64/power4/memcpy.S index 9d0b478029..734434af07 100644 --- a/sysdeps/powerpc/powerpc64/power4/memcpy.S +++ b/sysdeps/powerpc/powerpc64/power4/memcpy.S @@ -28,11 +28,11 @@ with the appropriate combination of byte and halfword load/stores. There is minimal effort to optimize the alignment of short moves. The 64-bit implementations of POWER3 and POWER4 do a reasonable job - of handling unligned load/stores that do not cross 32-byte boundries. + of handling unaligned load/stores that do not cross 32-byte boundaries. Longer moves (>= 32-bytes) justify the effort to get at least the destination doubleword (8-byte) aligned. Further optimization is - posible when both source and destination are doubleword aligned. + possible when both source and destination are doubleword aligned. Each case has a optimized unrolled loop. */ .machine power4 @@ -44,9 +44,9 @@ EALIGN (BP_SYM (memcpy), 5, 0) std 3,-16(1) std 31,-8(1) cfi_offset(31,-8) - andi. 11,3,7 /* check alignement of dst. */ + andi. 11,3,7 /* check alignment of dst. */ clrldi 0,0,61 /* Number of bytes until the 1st doubleword of dst. */ - clrldi 10,4,61 /* check alignement of src. */ + clrldi 10,4,61 /* check alignment of src. */ cmpldi cr6,5,8 ble- cr1,.L2 /* If move < 32 bytes use short move code. */ cmpld cr6,10,11 @@ -57,7 +57,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) beq .L0 subf 31,0,5 - /* Move 0-7 bytes as needed to get the destination doubleword alligned. */ + /* Move 0-7 bytes as needed to get the destination doubleword aligned. */ 1: bf 31,2f lbz 6,0(12) addi 12,12,1 @@ -74,10 +74,10 @@ EALIGN (BP_SYM (memcpy), 5, 0) stw 6,0(3) addi 3,3,4 0: - clrldi 10,12,61 /* check alignement of src again. */ + clrldi 10,12,61 /* check alignment of src again. */ srdi 9,31,3 /* Number of full double words remaining. */ - /* Copy doublewords from source to destination, assumpting the + /* Copy doublewords from source to destination, assuming the destination is aligned on a doubleword boundary. At this point we know there are at least 25 bytes left (32-7) to copy. @@ -154,7 +154,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) add 12,12,0 /* At this point we have a tail of 0-7 bytes and we know that the - destiniation is double word aligned. */ + destination is double word aligned. */ 4: bf 29,2f lwz 6,0(12) addi 12,12,4 @@ -284,7 +284,7 @@ EALIGN (BP_SYM (memcpy), 5, 0) bne cr6,4f /* Would have liked to use use ld/std here but the 630 processors are slow for load/store doubles that are not at least word aligned. - Unaligned Load/Store word execute with only a 1 cycle penaltity. */ + Unaligned Load/Store word execute with only a 1 cycle penalty. */ lwz 6,0(4) lwz 7,4(4) stw 6,0(3) diff --git a/sysdeps/powerpc/powerpc64/power4/strncmp.S b/sysdeps/powerpc/powerpc64/power4/strncmp.S index 0940571e8d..19877fa785 100644 --- a/sysdeps/powerpc/powerpc64/power4/strncmp.S +++ b/sysdeps/powerpc/powerpc64/power4/strncmp.S @@ -52,7 +52,7 @@ EALIGN (BP_SYM(strncmp), 4, 0) cmpldi cr1, rN, 0 lis rFEFE, -0x101 bne L(unaligned) -/* We are doubleword alligned so set up for two loops. first a double word +/* We are doubleword aligned so set up for two loops. first a double word loop, then fall into the byte loop if any residual. */ srdi. rTMP, rN, 3 clrldi rN, rN, 61 |