about summary refs log tree commit diff
path: root/sysdeps/tile/tilegx/memcpy.c
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2017-12-13 11:43:39 -0200
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2017-12-20 16:55:26 -0200
commit24d1d8ec9e529ed66c49e17366fe5a889d483670 (patch)
treea80a960c7f0b071cff127c37d76814413e3a956d /sysdeps/tile/tilegx/memcpy.c
parented95f6114928f00f5f8fe76ccf2ec9e77872cbad (diff)
downloadglibc-24d1d8ec9e529ed66c49e17366fe5a889d483670.tar.gz
glibc-24d1d8ec9e529ed66c49e17366fe5a889d483670.tar.xz
glibc-24d1d8ec9e529ed66c49e17366fe5a889d483670.zip
Simplify tilegx sysdeps folder
With tilepro support removal we can now simplify internal tile support by
moving the directory structure to avoid the unnecessary directory levels
in tile/tilegx both on generic and linux folders.

Checked with a build for tilegx-linux-gnu and tilegx-linux-gnu-32 with
and without the patch, there is no difference in generated binary with
a dissassemble.

	* stdlib/bug-getcontext.c (do_test): Remove tilepro mention in
	comment.
	* sysdeps/tile/preconfigure: Remove tilegx folder.
	* sysdeps/tile/tilegx/Implies: Move definitions to ...
	* sysdeps/tile/Implies: ... here.
	* sysdeps/tile/tilegx/Makefile: Move rules to ...
	* sysdeps/tile/Makefile: ... here.
	* sysdeps/tile/tilegx/atomic-machine.h: Move definitions to ...
	* sysdeps/tile/atomic-machine.h: ... here.  Add include guards.
	* sysdeps/tile/tilegx/bits/wordsize.h: Move to ...
	* sysdeps/tile/bits/wordsize.h: ... here.
	* sysdeps/tile/tilegx/*: Move to ...
	* sysdeps/tile/*: ... here.
	* sysdeps/tile/tilegx/tilegx32/Implies: Move to ...
	* sysdeps/tile/tilegx32/Implies: ... here.
	* sysdeps/tile/tilegx/tilegx64/Implies: Move to ...
	* sysdeps/tile/tilegx64/Implies: ... here.
	* sysdeps/unix/sysv/linux/tile/tilegx/Makefile: Move definitions
	to ...
	* sysdeps/unix/sysv/linux/tile/Makefile: ... here.
	* sysdeps/unix/sysv/linux/tile/tilegx/*: Move to ...
	* sysdeps/unix/sysv/linux/tile/*: ... here.
	* sysdeps/unix/sysv/linux/tile/tilegx/tilegx32/*: Move to ...
	* sysdeps/unix/sysv/linux/tile/tilegx32/*: ... here.
	* sysdeps/unix/sysv/linux/tile/tilegx/tilegx64/*: Move to ...
	* sysdeps/unix/sysv/linux/tile/tilegx64/*: ... here.
Diffstat (limited to 'sysdeps/tile/tilegx/memcpy.c')
-rw-r--r--sysdeps/tile/tilegx/memcpy.c272
1 files changed, 0 insertions, 272 deletions
diff --git a/sysdeps/tile/tilegx/memcpy.c b/sysdeps/tile/tilegx/memcpy.c
deleted file mode 100644
index c1a2a29860..0000000000
--- a/sysdeps/tile/tilegx/memcpy.c
+++ /dev/null
@@ -1,272 +0,0 @@
-/* Copyright (C) 2011-2017 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Chris Metcalf <cmetcalf@tilera.com>, 2011.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#include <string.h>
-#include <stdint.h>
-#include <stdlib.h>
-#include <memcopy.h>
-#include <arch/chip.h>
-
-/* How many cache lines ahead should we prefetch? */
-#define PREFETCH_LINES_AHEAD 3
-
-void * inhibit_loop_to_libcall
-__memcpy (void *__restrict dstv, const void *__restrict srcv, size_t n)
-{
-  char *__restrict dst1 = (char *) dstv;
-  const char *__restrict src1 = (const char *) srcv;
-  const char *__restrict src1_end;
-  const char *__restrict prefetch;
-  op_t *__restrict dst8; /* 8-byte pointer to destination memory. */
-  op_t final; /* Final bytes to write to trailing word, if any */
-  long i;
-
-  if (n < 16)
-    {
-      for (; n; n--)
-        *dst1++ = *src1++;
-      return dstv;
-    }
-
-  /* Locate the end of source memory we will copy.  Don't prefetch
-     past this.  */
-  src1_end = src1 + n - 1;
-
-  /* Prefetch ahead a few cache lines, but not past the end. */
-  prefetch = src1;
-  for (i = 0; i < PREFETCH_LINES_AHEAD; i++)
-    {
-      __insn_prefetch (prefetch);
-      prefetch += CHIP_L2_LINE_SIZE ();
-      prefetch = (prefetch < src1_end) ? prefetch : src1;
-    }
-
-  /* Copy bytes until dst is word-aligned. */
-  for (; (uintptr_t) dst1 & (sizeof (op_t) - 1); n--)
-    *dst1++ = *src1++;
-
-  /* 8-byte pointer to destination memory. */
-  dst8 = (op_t *) dst1;
-
-  if (__builtin_expect ((uintptr_t) src1 & (sizeof (op_t) - 1), 0))
-    {
-      /* Misaligned copy.  Use glibc's _wordcopy_fwd_dest_aligned, but
-         inline it to avoid prologue/epilogue.  TODO: Consider
-         prefetching and using wh64 as well.  */
-      void * srci;
-      op_t a0, a1, a2, a3;
-      long int dstp = (long int) dst1;
-      long int srcp = (long int) src1;
-      long int len = n / OPSIZ;
-
-      /* Save the initial source pointer so we know the number of
-         bytes to shift for merging two unaligned results.  */
-      srci = (void *) srcp;
-
-      /* Make SRCP aligned by rounding it down to the beginning of the
-         `op_t' it points in the middle of.  */
-      srcp &= -OPSIZ;
-
-      switch (len % 4)
-	{
-	case 2:
-	  a1 = ((op_t *) srcp)[0];
-	  a2 = ((op_t *) srcp)[1];
-	  len += 2;
-	  srcp += 2 * OPSIZ;
-	  goto do1;
-	case 3:
-	  a0 = ((op_t *) srcp)[0];
-	  a1 = ((op_t *) srcp)[1];
-	  len += 1;
-	  srcp += 2 * OPSIZ;
-	  goto do2;
-	case 0:
-	  if (OP_T_THRES <= 3 * OPSIZ && len == 0)
-	    return dstv;
-	  a3 = ((op_t *) srcp)[0];
-	  a0 = ((op_t *) srcp)[1];
-	  len += 0;
-	  srcp += 2 * OPSIZ;
-	  goto do3;
-	case 1:
-	  a2 = ((op_t *) srcp)[0];
-	  a3 = ((op_t *) srcp)[1];
-	  srcp += 2 * OPSIZ;
-	  len -= 1;
-	  if (OP_T_THRES <= 3 * OPSIZ && len == 0)
-	    goto do0;
-	  goto do4;			/* No-op.  */
-	}
-
-      do
-	{
-	do4:
-	  a0 = ((op_t *) srcp)[0];
-	  a2 = __insn_dblalign (a2, a3, srci);
-	  ((op_t *) dstp)[0] = a2;
-	  srcp += OPSIZ;
-	  dstp += OPSIZ;
-	do3:
-	  a1 = ((op_t *) srcp)[0];
-	  a3 = __insn_dblalign (a3, a0, srci);
-	  ((op_t *) dstp)[0] = a3;
-	  srcp += OPSIZ;
-	  dstp += OPSIZ;
-	do2:
-	  a2 = ((op_t *) srcp)[0];
-	  a0 = __insn_dblalign (a0, a1, srci);
-	  ((op_t *) dstp)[0] = a0;
-	  srcp += OPSIZ;
-	  dstp += OPSIZ;
-	do1:
-	  a3 = ((op_t *) srcp)[0];
-	  a1 = __insn_dblalign (a1, a2, srci);
-	  ((op_t *) dstp)[0] = a1;
-	  srcp += OPSIZ;
-	  dstp += OPSIZ;
-	  len -= 4;
-	}
-      while (len != 0);
-
-      /* This is the right position for do0.  Please don't move
-         it into the loop.  */
-    do0:
-      ((op_t *) dstp)[0] = __insn_dblalign (a2, a3, srci);
-
-      n = n % OPSIZ;
-      if (n == 0)
-	return dstv;
-
-      a0 = ((const char *) srcp <= src1_end) ? ((op_t *) srcp)[0] : 0;
-
-      final = __insn_dblalign (a3, a0, srci);
-      dst8 = (op_t *)(dstp + OPSIZ);
-    }
-  else
-    {
-      /* Aligned copy. */
-
-      const op_t *__restrict src8 = (const op_t *) src1;
-
-      /* src8 and dst8 are both word-aligned. */
-      if (n >= CHIP_L2_LINE_SIZE ())
-        {
-          /* Copy until 'dst' is cache-line-aligned. */
-          for (; (uintptr_t) dst8 & (CHIP_L2_LINE_SIZE () - 1);
-               n -= sizeof (op_t))
-            *dst8++ = *src8++;
-
-          for (; n >= CHIP_L2_LINE_SIZE ();)
-	    {
-	      op_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
-
-	      /* Prefetch and advance to next line to prefetch, but
-		 don't go past the end.  */
-	      __insn_prefetch (prefetch);
-	      prefetch += CHIP_L2_LINE_SIZE ();
-	      prefetch = (prefetch < src1_end) ? prefetch :
-		(const char *) src8;
-
-	      /* Do all the loads before wh64.  This is necessary if
-		 [src8, src8+7] and [dst8, dst8+7] share the same
-		 cache line and dst8 <= src8, as can be the case when
-		 called from memmove, or with code tested on x86 whose
-		 memcpy always works with forward copies.  */
-	      tmp0 = *src8++;
-	      tmp1 = *src8++;
-	      tmp2 = *src8++;
-	      tmp3 = *src8++;
-	      tmp4 = *src8++;
-	      tmp5 = *src8++;
-	      tmp6 = *src8++;
-	      tmp7 = *src8++;
-
-	      __insn_wh64 (dst8);
-
-	      *dst8++ = tmp0;
-	      *dst8++ = tmp1;
-	      *dst8++ = tmp2;
-	      *dst8++ = tmp3;
-	      *dst8++ = tmp4;
-	      *dst8++ = tmp5;
-	      *dst8++ = tmp6;
-	      *dst8++ = tmp7;
-
-	      n -= 64;
-	    }
-#if CHIP_L2_LINE_SIZE() != 64
-# error "Fix code that assumes particular L2 cache line size."
-#endif
-        }
-
-      for (; n >= sizeof (op_t); n -= sizeof (op_t))
-        *dst8++ = *src8++;
-
-      if (__builtin_expect (n == 0, 1))
-        return dstv;
-
-      final = *src8;
-    }
-
-  /* n != 0 if we get here.  Write out any trailing bytes. */
-  dst1 = (char *) dst8;
-#ifndef __BIG_ENDIAN__
-  if (n & 4)
-    {
-      *(uint32_t *) dst1 = final;
-      dst1 += 4;
-      final >>= 32;
-      n &= 3;
-    }
-  if (n & 2)
-    {
-      *(uint16_t *) dst1 = final;
-      dst1 += 2;
-      final >>= 16;
-      n &= 1;
-    }
-  if (n)
-    *(uint8_t *) dst1 = final;
-#else
-  if (n & 4)
-    {
-      *(uint32_t *) dst1 = final >> 32;
-      dst1 += 4;
-    }
-  else
-    {
-      final >>= 32;
-    }
-  if (n & 2)
-    {
-      *(uint16_t *) dst1 = final >> 16;
-      dst1 += 2;
-    }
-  else
-    {
-      final >>= 16;
-    }
-  if (n & 1)
-    *(uint8_t *) dst1 = final >> 8;
-#endif
-
-  return dstv;
-}
-weak_alias (__memcpy, memcpy)
-libc_hidden_builtin_def (memcpy)