summary refs log tree commit diff
path: root/malloc
diff options
context:
space:
mode:
authorAdhemerval Zanella <adhemerval.zanella@linaro.org>2021-08-13 08:36:29 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2021-12-15 17:35:14 -0300
commit5f6d8d97c69748180f0031dfa385aff75062c4d5 (patch)
tree97837a8eab4175385c32bacb10dc633b9070f266 /malloc
parentcb976fba4c51ede7bf8cee5035888527c308dfbc (diff)
downloadglibc-5f6d8d97c69748180f0031dfa385aff75062c4d5.tar.gz
glibc-5f6d8d97c69748180f0031dfa385aff75062c4d5.tar.xz
glibc-5f6d8d97c69748180f0031dfa385aff75062c4d5.zip
malloc: Add madvise support for Transparent Huge Pages
Linux Transparent Huge Pages (THP) current supports three different
states: 'never', 'madvise', and 'always'.  The 'never' is
self-explanatory and 'always' will enable THP for all anonymous
pages.  However, 'madvise' is still the default for some system and
for such case THP will be only used if the memory range is explicity
advertise by the program through a madvise(MADV_HUGEPAGE) call.

To enable it a new tunable is provided, 'glibc.malloc.hugetlb',
where setting to a value diffent than 0 enables the madvise call.

This patch issues the madvise(MADV_HUGEPAGE) call after a successful
mmap() call at sysmalloc() with sizes larger than the default huge
page size.  The madvise() call is disable is system does not support
THP or if it has the mode set to "never" and on Linux only support
one page size for THP, even if the architecture supports multiple
sizes.

To test is a new rule is added tests-malloc-hugetlb1, which run the
addes tests with the required GLIBC_TUNABLE setting.

Checked on x86_64-linux-gnu.

Reviewed-by: DJ Delorie <dj@redhat.com>
Diffstat (limited to 'malloc')
-rw-r--r--malloc/Makefile16
-rw-r--r--malloc/arena.c5
-rw-r--r--malloc/malloc-internal.h1
-rw-r--r--malloc/malloc.c47
4 files changed, 69 insertions, 0 deletions
diff --git a/malloc/Makefile b/malloc/Makefile
index 63cd7c0734..0137595e17 100644
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -78,6 +78,22 @@ tests-exclude-malloc-check = tst-malloc-check tst-malloc-usable \
 tests-malloc-check = $(filter-out $(tests-exclude-malloc-check) \
 				  $(tests-static),$(tests))
 
+# Run all testes with GLIBC_TUNABLES=glibc.malloc.hugetlb=1 that check the
+# Transparent Huge Pages support.  We need exclude some tests that define
+# the ENV vars.
+tests-exclude-hugetlb1 = \
+	tst-compathooks-off \
+	tst-compathooks-on \
+	tst-interpose-nothread \
+	tst-interpose-thread \
+	tst-interpose-static-nothread \
+	tst-interpose-static-thread \
+	tst-malloc-usable \
+	tst-malloc-usable-tunables \
+	tst-mallocstate
+tests-malloc-hugetlb1 = \
+	$(filter-out $(tests-exclude-hugetlb1), $(tests))
+
 # -lmcheck needs __malloc_initialize_hook, which was deprecated in 2.24.
 ifeq ($(have-GLIBC_2.23)$(build-shared),yesyes)
 # Tests that don't play well with mcheck.  They are either bugs in mcheck or
diff --git a/malloc/arena.c b/malloc/arena.c
index 78ef4cf18c..cd00c7bef4 100644
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -230,6 +230,7 @@ TUNABLE_CALLBACK_FNDECL (set_tcache_count, size_t)
 TUNABLE_CALLBACK_FNDECL (set_tcache_unsorted_limit, size_t)
 #endif
 TUNABLE_CALLBACK_FNDECL (set_mxfast, size_t)
+TUNABLE_CALLBACK_FNDECL (set_hugetlb, int32_t)
 #else
 /* Initialization routine. */
 #include <string.h>
@@ -330,6 +331,7 @@ ptmalloc_init (void)
 	       TUNABLE_CALLBACK (set_tcache_unsorted_limit));
 # endif
   TUNABLE_GET (mxfast, size_t, TUNABLE_CALLBACK (set_mxfast));
+  TUNABLE_GET (hugetlb, int32_t, TUNABLE_CALLBACK (set_hugetlb));
 #else
   if (__glibc_likely (_environ != NULL))
     {
@@ -508,6 +510,9 @@ new_heap (size_t size, size_t top_pad)
       __munmap (p2, HEAP_MAX_SIZE);
       return 0;
     }
+
+  madvise_thp (p2, size);
+
   h = (heap_info *) p2;
   h->size = size;
   h->mprotect_size = size;
diff --git a/malloc/malloc-internal.h b/malloc/malloc-internal.h
index 0c7b5a183c..7493e34d86 100644
--- a/malloc/malloc-internal.h
+++ b/malloc/malloc-internal.h
@@ -22,6 +22,7 @@
 #include <malloc-machine.h>
 #include <malloc-sysdep.h>
 #include <malloc-size.h>
+#include <malloc-hugepages.h>
 
 /* Called in the parent process before a fork.  */
 void __malloc_fork_lock_parent (void) attribute_hidden;
diff --git a/malloc/malloc.c b/malloc/malloc.c
index 095d97a3be..c75841b841 100644
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1880,6 +1880,11 @@ struct malloc_par
   INTERNAL_SIZE_T arena_test;
   INTERNAL_SIZE_T arena_max;
 
+#if HAVE_TUNABLES
+  /* Transparent Large Page support.  */
+  INTERNAL_SIZE_T thp_pagesize;
+#endif
+
   /* Memory map support */
   int n_mmaps;
   int n_mmaps_max;
@@ -2008,6 +2013,20 @@ free_perturb (char *p, size_t n)
 
 #include <stap-probe.h>
 
+/* ----------- Routines dealing with transparent huge pages ----------- */
+
+static inline void
+madvise_thp (void *p, INTERNAL_SIZE_T size)
+{
+#if HAVE_TUNABLES && defined (MADV_HUGEPAGE)
+  /* Do not consider areas smaller than a huge page or if the tunable is
+     not active.  */
+  if (mp_.thp_pagesize == 0 || size < mp_.thp_pagesize)
+    return;
+  __madvise (p, size, MADV_HUGEPAGE);
+#endif
+}
+
 /* ------------------- Support for multiple arenas -------------------- */
 #include "arena.c"
 
@@ -2445,6 +2464,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
           if (mm != MAP_FAILED)
             {
+	      madvise_thp (mm, size);
+
               /*
                  The offset to the start of the mmapped region is stored
                  in the prev_size field of the chunk. This allows us to adjust
@@ -2606,6 +2627,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
       if (size > 0)
         {
           brk = (char *) (MORECORE (size));
+	  if (brk != (char *) (MORECORE_FAILURE))
+	    madvise_thp (brk, size);
           LIBC_PROBE (memory_sbrk_more, 2, brk, size);
         }
 
@@ -2637,6 +2660,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
 
               if (mbrk != MAP_FAILED)
                 {
+		  madvise_thp (mbrk, size);
+
                   /* We do not need, and cannot use, another sbrk call to find end */
                   brk = mbrk;
                   snd_brk = brk + size;
@@ -2748,6 +2773,8 @@ sysmalloc (INTERNAL_SIZE_T nb, mstate av)
                       correction = 0;
                       snd_brk = (char *) (MORECORE (0));
                     }
+		  else
+		    madvise_thp (snd_brk, correction);
                 }
 
               /* handle non-contiguous cases */
@@ -2988,6 +3015,8 @@ mremap_chunk (mchunkptr p, size_t new_size)
   if (cp == MAP_FAILED)
     return 0;
 
+  madvise_thp (cp, new_size);
+
   p = (mchunkptr) (cp + offset);
 
   assert (aligned_OK (chunk2mem (p)));
@@ -5316,6 +5345,24 @@ do_set_mxfast (size_t value)
   return 0;
 }
 
+#if HAVE_TUNABLES
+static __always_inline int
+do_set_hugetlb (int32_t value)
+{
+  if (value == 1)
+    {
+      enum malloc_thp_mode_t thp_mode = __malloc_thp_mode ();
+      /*
+	 Only enable THP madvise usage if system does support it and
+	 has 'madvise' mode.  Otherwise the madvise() call is wasteful.
+       */
+      if (thp_mode == malloc_thp_mode_madvise)
+	mp_.thp_pagesize = __malloc_default_thp_pagesize ();
+    }
+  return 0;
+}
+#endif
+
 int
 __libc_mallopt (int param_number, int value)
 {