about summary refs log tree commit diff
diff options
context:
space:
mode:
-rw-r--r--NEWS8
-rw-r--r--Rules17
-rw-r--r--elf/dl-tunables.list3
-rw-r--r--elf/tst-rtld-list-tunables.exp2
-rw-r--r--malloc/Makefile8
-rw-r--r--malloc/arena.c4
-rw-r--r--malloc/malloc.c31
-rw-r--r--manual/tunables.texi7
-rw-r--r--sysdeps/generic/malloc-hugepages.c8
-rw-r--r--sysdeps/generic/malloc-hugepages.h7
-rw-r--r--sysdeps/unix/sysv/linux/malloc-hugepages.c127
11 files changed, 207 insertions, 15 deletions
diff --git a/NEWS b/NEWS
index 3b94dd209c..c7200cd4e8 100644
--- a/NEWS
+++ b/NEWS
@@ -93,9 +93,11 @@ Major new features:
   configuration.
 
 * On Linux, a new tunable, glibc.malloc.hugetlb, can be used to
-  make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk calls.
-  Setting this might improve performance with Transparent Huge Pages madvise
-  mode depending of the workload.
+  either make malloc issue madvise plus MADV_HUGEPAGE on mmap and sbrk
+  or to use huge pages directly with mmap calls with the MAP_HUGETLB
+  flags).  The former can improve performance when Transparent Huge Pages
+  is set to 'madvise' mode while the latter uses the system reserved
+  huge pages.
 
 Deprecated and removed features, and other changes affecting compatibility:
 
diff --git a/Rules b/Rules
index 5f5d9ba4cc..be34982daa 100644
--- a/Rules
+++ b/Rules
@@ -158,6 +158,7 @@ tests: $(tests:%=$(objpfx)%.out) $(tests-internal:%=$(objpfx)%.out) \
        $(tests-mcheck:%=$(objpfx)%-mcheck.out) \
        $(tests-malloc-check:%=$(objpfx)%-malloc-check.out) \
        $(tests-malloc-hugetlb1:%=$(objpfx)%-malloc-hugetlb1.out) \
+       $(tests-malloc-hugetlb2:%=$(objpfx)%-malloc-hugetlb2.out) \
        $(tests-special) $(tests-printers-out)
 xtests: tests $(xtests:%=$(objpfx)%.out) $(xtests-special)
 endif
@@ -170,6 +171,7 @@ else
 tests-expected = $(tests) $(tests-internal) $(tests-printers) \
 	$(tests-container) $(tests-malloc-check:%=%-malloc-check) \
 	$(tests-malloc-hugetlb1:%=%-malloc-hugetlb1) \
+	$(tests-malloc-hugetlb2:%=%-malloc-hugetlb2) \
 	$(tests-mcheck:%=%-mcheck)
 endif
 tests:
@@ -199,6 +201,7 @@ endif
 binaries-mcheck-tests = $(tests-mcheck:%=%-mcheck)
 binaries-malloc-check-tests = $(tests-malloc-check:%=%-malloc-check)
 binaries-malloc-hugetlb1-tests = $(tests-malloc-hugetlb1:%=%-malloc-hugetlb1)
+binaries-malloc-hugetlb2-tests = $(tests-malloc-hugetlb2:%=%-malloc-hugetlb2)
 else
 binaries-all-notests =
 binaries-all-tests = $(tests) $(tests-internal) $(xtests) $(test-srcs)
@@ -211,6 +214,7 @@ binaries-pie-notests =
 binaries-mcheck-tests =
 binaries-malloc-check-tests =
 binaries-malloc-hugetlb1-tests =
+binaries-malloc-hugetlb2-tests =
 endif
 
 binaries-pie = $(binaries-pie-tests) $(binaries-pie-notests)
@@ -259,6 +263,14 @@ $(addprefix $(objpfx),$(binaries-malloc-hugetlb1-tests)): %-malloc-hugetlb1: %.o
 	$(+link-tests)
 endif
 
+ifneq "$(strip $(binaries-malloc-hugetlb2-tests))" ""
+$(addprefix $(objpfx),$(binaries-malloc-hugetlb2-tests)): %-malloc-hugetlb2: %.o \
+  $(link-extra-libs-tests) \
+  $(sort $(filter $(common-objpfx)lib%,$(link-libc))) \
+  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
+	$(+link-tests)
+endif
+
 ifneq "$(strip $(binaries-pie-tests))" ""
 $(addprefix $(objpfx),$(binaries-pie-tests)): %: %.o \
   $(link-extra-libs-tests) \
@@ -302,6 +314,11 @@ $(1)-malloc-hugetlb1-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=1
 endef
 $(foreach t,$(tests-malloc-hugetlb1),$(eval $(call malloc-hugetlb1-ENVS,$(t))))
 
+# All malloc-hugetlb2 tests will be run with GLIBC_TUNABLE=glibc.malloc.hugetlb=2
+define malloc-hugetlb2-ENVS
+$(1)-malloc-hugetlb2-ENV += GLIBC_TUNABLES=glibc.malloc.hugetlb=2
+endef
+$(foreach t,$(tests-malloc-hugetlb2),$(eval $(call malloc-hugetlb2-ENVS,$(t))))
 
 # mcheck tests need the debug DSO to support -lmcheck.
 define mcheck-ENVS
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
index d1fd3f3e91..845d521a43 100644
--- a/elf/dl-tunables.list
+++ b/elf/dl-tunables.list
@@ -93,9 +93,8 @@ glibc {
       security_level: SXID_IGNORE
     }
     hugetlb {
-      type: INT_32
+      type: SIZE_T
       minval: 0
-      maxval: 1
     }
   }
   cpu {
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
index d8e363f2c5..cdfdb56a94 100644
--- a/elf/tst-rtld-list-tunables.exp
+++ b/elf/tst-rtld-list-tunables.exp
@@ -1,7 +1,7 @@
 glibc.malloc.arena_max: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.arena_test: 0x0 (min: 0x1, max: 0x[f]+)
 glibc.malloc.check: 0 (min: 0, max: 3)
-glibc.malloc.hugetlb: 0 (min: 0, max: 1)
+glibc.malloc.hugetlb: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mmap_max: 0 (min: 0, max: 2147483647)
 glibc.malloc.mmap_threshold: 0x0 (min: 0x0, max: 0x[f]+)
 glibc.malloc.mxfast: 0x0 (min: 0x0, max: 0x[f]+)
diff --git a/malloc/Makefile b/malloc/Makefile
index 0137595e17..e9a6666d22 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -78,9 +78,9 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
 tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
 				  $(tests-static),$(tests))
 
-# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
-# Transparent Huge Pages support.  We need exclude some tests that define
-# the ENV vars.
+# Run all tests with GLIBC_TUNABLES=glibc.malloc.hugetlb={1,2} which check
+# the Transparent Huge Pages support (1) or automatic huge page support (2).
+# We need exclude some tests that define the ENV vars.
 tests-exclude-hugetlb1 = \
 	tst-compathooks-off \
 	tst-compathooks-on \
@@ -93,6 +93,8 @@ tests-exclude-hugetlb1 = \
 	tst-mallocstate
 tests-malloc-hugetlb1 = \
 	$(filter-out $(tests-exclude-hugetlb1), $(tests))
+tests-malloc-hugetlb2 = \
+	$(filter-out $(tests-exclude-hugetlb1), $(tests))
 
 # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
 ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
diff --git a/malloc/arena.c b/malloc/arena.c
index cd00c7bef4..9a6e1af2bd 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -230,7 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
 TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
 #endif
 TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
-TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, size_t)
 #else
 /* Initialization routine. */
 #include <string.h>
@@ -331,7 +331,7 @@ ptmalloc_init (void)
 	       TUNABLE_CALLBACK (set_tcache_unsorted_limit));
 # endif
   TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
-  TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
+  TUNABLE_GET (hugetlb, size_t, TUNABLE_CALLBACK (set_hugetlb));
 #else
   if (__glibc_likely (_environ != NULL))
     {
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 6b6ec53db1..75efdc2ee7 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1883,6 +1883,10 @@ struct malloc_par
 #if HAVE_TUNABLES
   /* Transparent Large Page support.  */
   INTERNAL_SIZE_T thp_pagesize;
+  /* A value different than 0 means to align mmap allocation to hp_pagesize
+     add hp_flags on flags.  */
+  INTERNAL_SIZE_T hp_pagesize;
+  int hp_flags;
 #endif
 
   /* Memory map support */
@@ -2440,7 +2444,10 @@ sysmalloc_mmap (INTERNAL_SIZE_T nb, size_t pagesize, int extra_flags, mstate av)
   if (mm == MAP_FAILED)
     return mm;
 
-  madvise_thp (mm, size);
+#ifdef MAP_HUGETLB
+  if (!(extra_flags & MAP_HUGETLB))
+    madvise_thp (mm, size);
+#endif
 
   /*
     The offset to the start of the mmapped region is stored in the prev_size
@@ -2528,7 +2535,18 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
       || ((unsigned long) (nb) >= (unsigned long) (mp_.mmap_threshold)
 	  && (mp_.n_mmaps < mp_.n_mmaps_max)))
     {
-      char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
+      char *mm;
+#if HAVE_TUNABLES
+      if (mp_.hp_pagesize > 0 && nb >= mp_.hp_pagesize)
+	{
+	  /* There is no need to isse the THP madvise call if Huge Pages are
+	     used directly.  */
+	  mm = sysmalloc_mmap (nb, mp_.hp_pagesize, mp_.hp_flags, av);
+	  if (mm != MAP_FAILED)
+	    return mm;
+	}
+#endif
+      mm = sysmalloc_mmap (nb, pagesize, 0, av);
       if (mm != MAP_FAILED)
 	return mm;
       tried_mmap = true;
@@ -2609,7 +2627,9 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
         }
       else if (!tried_mmap)
 	{
-	  /* We can at least try to use to mmap memory.  */
+	  /* We can at least try to use to mmap memory.  If new_heap fails
+	     it is unlikely that trying to allocate huge pages will
+	     succeed.  */
 	  char *mm = sysmalloc_mmap (nb, pagesize, 0, av);
 	  if (mm != MAP_FAILED)
 	    return mm;
@@ -5383,7 +5403,7 @@ do_set_mxfast (size_t value)
 
 #if HAVE_TUNABLES
 static __always_inline int
-do_set_hugetlb (int32_t value)
+do_set_hugetlb (size_t value)
 {
   if (value == 1)
     {
@@ -5395,6 +5415,9 @@ do_set_hugetlb (int32_t value)
       if (thp_mode == malloc_thp_mode_madvise)
 	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
     }
+  else if (value >= 2)
+    __malloc_hugepage_config (value == 2 ? 0 : value, &mp_.hp_pagesize,
+			      &mp_.hp_flags);
   return 0;
 }
 #endif
diff --git a/manual/tunables.texi b/manual/tunables.texi
index 9ca6e3f603..58a47b2e9b 100644
--- a/manual/tunables.texi
+++ b/manual/tunables.texi
@@ -278,6 +278,13 @@ default value is @code{0}, which disables any additional support on
 Setting its value to @code{1} enables the use of @code{madvise} with
 @code{MADV_HUGEPAGE} after memory allocation with @code{mmap}.  It is enabled
 only if the system supports Transparent Huge Page (currently only on Linux).
+
+Setting its value to @code{2} enables the use of Huge Page directly with
+@code{mmap} with the use of @code{MAP_HUGETLB} flag.  The huge page size
+to use will be the default one provided by the system.  A value larger than
+@code{2} specifies huge page size, which will be matched against the system
+supported ones.  If provided value is invalid, @code{MAP_HUGETLB} will not
+be used.
 @end deftp
 
 @node Dynamic Linking Tunables
diff --git a/sysdeps/generic/malloc-hugepages.c b/sysdeps/generic/malloc-hugepages.c
index 8fb459a263..946284a33c 100644
--- a/sysdeps/generic/malloc-hugepages.c
+++ b/sysdeps/generic/malloc-hugepages.c
@@ -29,3 +29,11 @@ __malloc_thp_mode (void)
 {
   return malloc_thp_mode_not_supported;
 }
+
+/* Return the default transparent huge page size.  */
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+  *pagesize = 0;
+  *flags = 0;
+}
diff --git a/sysdeps/generic/malloc-hugepages.h b/sysdeps/generic/malloc-hugepages.h
index f5a442e328..75cda3796a 100644
--- a/sysdeps/generic/malloc-hugepages.h
+++ b/sysdeps/generic/malloc-hugepages.h
@@ -34,4 +34,11 @@ enum malloc_thp_mode_t
 
 enum malloc_thp_mode_t __malloc_thp_mode (void) attribute_hidden;
 
+/* Return the supported huge page size from the REQUESTED sizes on PAGESIZE
+   along with the required extra mmap flags on FLAGS,  Requesting the value
+   of 0 returns the default huge page size, otherwise the value will be
+   matched against the sizes supported by the system.  */
+void __malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+     attribute_hidden;
+
 #endif /* _MALLOC_HUGEPAGES_H */
diff --git a/sysdeps/unix/sysv/linux/malloc-hugepages.c b/sysdeps/unix/sysv/linux/malloc-hugepages.c
index 7497e07260..0e05291d61 100644
--- a/sysdeps/unix/sysv/linux/malloc-hugepages.c
+++ b/sysdeps/unix/sysv/linux/malloc-hugepages.c
@@ -17,8 +17,10 @@
    not, see <https://www.gnu.org/licenses/>.  */
 
 #include <intprops.h>
+#include <dirent.h>
 #include <malloc-hugepages.h>
 #include <not-cancel.h>
+#include <sys/mman.h>
 
 unsigned long int
 __malloc_default_thp_pagesize (void)
@@ -72,3 +74,128 @@ __malloc_thp_mode (void)
     }
   return malloc_thp_mode_not_supported;
 }
+
+static size_t
+malloc_default_hugepage_size (void)
+{
+  int fd = __open64_nocancel ("/proc/meminfo", O_RDONLY);
+  if (fd == -1)
+    return 0;
+
+  size_t hpsize = 0;
+
+  char buf[512];
+  off64_t off = 0;
+  while (1)
+    {
+      ssize_t r = __pread64_nocancel (fd, buf, sizeof (buf) - 1, off);
+      if (r < 0)
+	break;
+      buf[r] = '\0';
+
+      /* If the tag is not found, read the last line again.  */
+      const char *s = strstr (buf, "Hugepagesize:");
+      if (s == NULL)
+	{
+	  char *nl = strrchr (buf, '\n');
+	  if (nl == NULL)
+	    break;
+	  off += (nl + 1) - buf;
+	  continue;
+	}
+
+      /* The default huge page size is in the form:
+	 Hugepagesize:       NUMBER kB  */
+      s += sizeof ("Hugepagesize: ") - 1;
+      for (int i = 0; (s[i] >= '0' && s[i] <= '9') || s[i] == ' '; i++)
+	{
+	  if (s[i] == ' ')
+	    continue;
+	  hpsize *= 10;
+	  hpsize += s[i] - '0';
+	}
+      hpsize *= 1024;
+      break;
+    }
+
+  __close_nocancel (fd);
+
+  return hpsize;
+}
+
+static inline int
+hugepage_flags (size_t pagesize)
+{
+  return MAP_HUGETLB | (__builtin_ctzll (pagesize) << MAP_HUGE_SHIFT);
+}
+
+void
+__malloc_hugepage_config (size_t requested, size_t *pagesize, int *flags)
+{
+  *pagesize = 0;
+  *flags = 0;
+
+  if (requested == 0)
+    {
+      *pagesize = malloc_default_hugepage_size ();
+      if (*pagesize != 0)
+	*flags = hugepage_flags (*pagesize);
+      return;
+    }
+
+  /* Each entry represents a supported huge page in the form of:
+     hugepages-<size>kB.  */
+  int dirfd = __open64_nocancel ("/sys/kernel/mm/hugepages",
+				 O_RDONLY | O_DIRECTORY, 0);
+  if (dirfd == -1)
+    return;
+
+  char buffer[1024];
+  while (true)
+    {
+#if !IS_IN(libc)
+# define __getdents64 getdents64
+#endif
+      ssize_t ret = __getdents64 (dirfd, buffer, sizeof (buffer));
+      if (ret == -1)
+	break;
+      else if (ret == 0)
+        break;
+
+      bool found = false;
+      char *begin = buffer, *end = buffer + ret;
+      while (begin != end)
+        {
+          unsigned short int d_reclen;
+          memcpy (&d_reclen, begin + offsetof (struct dirent64, d_reclen),
+                  sizeof (d_reclen));
+          const char *dname = begin + offsetof (struct dirent64, d_name);
+          begin += d_reclen;
+
+          if (dname[0] == '.'
+	      || strncmp (dname, "hugepages-", sizeof ("hugepages-") - 1) != 0)
+            continue;
+
+	  size_t hpsize = 0;
+	  const char *sizestr = dname + sizeof ("hugepages-") - 1;
+	  for (int i = 0; sizestr[i] >= '0' && sizestr[i] <= '9'; i++)
+	    {
+	      hpsize *= 10;
+	      hpsize += sizestr[i] - '0';
+	    }
+	  hpsize *= 1024;
+
+	  if (hpsize == requested)
+	    {
+	      *pagesize = hpsize;
+	      *flags = hugepage_flags (*pagesize);
+	      found = true;
+	      break;
+	    }
+        }
+      if (found)
+	break;
+    }
+
+  __close_nocancel (dirfd);
+}