about summary refs log tree commit diff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-06-15 10:20:21 -0400
committerUlrich Drepper <drepper@gmail.com>2011-06-15 21:06:19 -0400
commita9e836b0406c259979fbb31eb6650d33c2bbefbc (patch)
treecf1d8206337cecad31d25a6fc2153e382cef2414
parent2666d441c2d8107b1987b869714189af64b954c6 (diff)
downloadglibc-a9e836b0406c259979fbb31eb6650d33c2bbefbc.tar.gz
glibc-a9e836b0406c259979fbb31eb6650d33c2bbefbc.tar.xz
glibc-a9e836b0406c259979fbb31eb6650d33c2bbefbc.zip
Optimize hash table generation in makedb
-rw-r--r--ChangeLog5
-rw-r--r--nss/makedb.c125
2 files changed, 107 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index 60ad9ae1f0..ed75109d2b 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+2011-06-15  Ulrich Drepper  <drepper@gmail.com>
+
+	* nss/makedb.c (compute_tables): Check result of multiple hash table
+	sizes to minimize maximum chain length.
+
 2011-06-14  Ulrich Drepper  <drepper@gmail.com>
 
 	* Versions.def: Add entry for libnss_db.
diff --git a/nss/makedb.c b/nss/makedb.c
index 687414b74f..ebb6215355 100644
--- a/nss/makedb.c
+++ b/nss/makedb.c
@@ -63,7 +63,7 @@ struct database
   char *keystrtab;
 } *databases;
 static size_t ndatabases;
-static size_t nhashentries;
+static size_t nhashentries_total;
 static size_t valstrlen;
 static void *valstrtree;
 static char *valstrtab;
@@ -542,6 +542,37 @@ copy_valstr (const void *nodep, const VISIT which, const int depth)
 }
 
 
+static int
+is_prime (size_t candidate)
+{
+  /* No even number and none less than 10 will be passed here.  */
+  size_t divn = 3;
+  size_t sq = divn * divn;
+
+  while (sq < candidate && candidate % divn != 0)
+    {
+      ++divn;
+      sq += 4 * divn;
+      ++divn;
+    }
+
+  return candidate % divn != 0;
+}
+
+
+static size_t
+next_prime (size_t seed)
+{
+  /* Make it definitely odd.  */
+  seed |= 1;
+
+  while (!is_prime (seed))
+    seed += 2;
+
+  return seed;
+}
+
+
 static void
 compute_tables (void)
 {
@@ -558,15 +589,23 @@ compute_tables (void)
 	/* We simply use an odd number large than twice the number of
 	   elements to store in the hash table for the size.  This gives
 	   enough efficiency.  */
-	db->nhashentries = db->nentries * 2 + 1;
-	db->hashtable = xmalloc (db->nhashentries * sizeof (stridx_t));
-	memset (db->hashtable, '\xff', db->nhashentries * sizeof (stridx_t));
-	db->keyidxtab = xmalloc (db->nhashentries * sizeof (stridx_t));
-	memset (db->keyidxtab, '\xff', db->nhashentries * sizeof (stridx_t));
-	db->keystrtab = xmalloc (db->keystrlen);
-
-	size_t max_chainlength = 0;
-	char *wp = db->keystrtab;
+#define TEST_RANGE 30
+	size_t nhashentries_min = next_prime (MAX (db->nentries,
+						   db->nentries
+						   * 2 - TEST_RANGE));
+	size_t nhashentries_max = MAX (nhashentries_min, db->nentries * 4);
+	size_t nhashentries_best = nhashentries_min;
+	size_t chainlength_best = db->nentries;
+
+	db->hashtable = xmalloc (2 * nhashentries_max * sizeof (stridx_t)
+				 + db->keystrlen);
+	db->keyidxtab = db->hashtable + nhashentries_max;
+	db->keystrtab = (char *) (db->keyidxtab + nhashentries_max);
+
+	size_t max_chainlength;
+	char *wp;
+	size_t nhashentries;
+	bool copy_string = false;
 
 	void add_key(const void *nodep, const VISIT which, const int depth)
 	{
@@ -575,18 +614,24 @@ compute_tables (void)
 
 	  const struct dbentry *dbe = *(const struct dbentry **) nodep;
 
-	  ptrdiff_t stridx = wp - db->keystrtab;
-	  wp = stpcpy (wp, dbe->str) + 1;
+	  ptrdiff_t stridx;
+	  if (copy_string)
+	    {
+	      stridx = wp - db->keystrtab;
+	      wp = stpcpy (wp, dbe->str) + 1;
+	    }
+	  else
+	    stridx = 0;
 
-	  size_t hidx = dbe->hashval % db->nhashentries;
-	  size_t hval2 = 1 + dbe->hashval % (db->nhashentries - 2);
+	  size_t hidx = dbe->hashval % nhashentries;
+	  size_t hval2 = 1 + dbe->hashval % (nhashentries - 2);
 	  size_t chainlength = 0;
 
 	  while (db->hashtable[hidx] != ~((stridx_t) 0))
 	    {
 	      ++chainlength;
-	      if ((hidx += hval2) >= db->nhashentries)
-		hidx -= db->nhashentries;
+	      if ((hidx += hval2) >= nhashentries)
+		hidx -= nhashentries;
 	    }
 
 	  db->hashtable[hidx] = dbe->validx;
@@ -595,11 +640,45 @@ compute_tables (void)
 	  max_chainlength = MAX (max_chainlength, chainlength);
 	}
 
-	twalk (db->entries, add_key);
+	nhashentries = nhashentries_min;
+	for (size_t cnt = 0; cnt < TEST_RANGE; ++cnt)
+	  {
+	    memset (db->hashtable, '\xff', nhashentries * sizeof (stridx_t));
+
+	    max_chainlength = 0;
+	    wp = db->keystrtab;
+
+	    twalk (db->entries, add_key);
+
+	    if (max_chainlength == 0)
+	      {
+		/* No need to look further, this is as good as it gets.  */
+		nhashentries_best = nhashentries;
+		break;
+	      }
+
+	    if (max_chainlength < chainlength_best)
+	      {
+		chainlength_best = max_chainlength;
+		nhashentries_best = nhashentries;
+	      }
+
+	    nhashentries = next_prime (nhashentries + 1);
+	    if (nhashentries > nhashentries_max)
+	      break;
+	  }
+
+	/* Recompute the best table again, this time fill in the strings.  */
+	nhashentries = nhashentries_best;
+	memset (db->hashtable, '\xff',
+		2 * nhashentries_max * sizeof (stridx_t));
+	copy_string = true;
+	wp = db->keystrtab;
 
-	// XXX if hash length is too long resize table and start again
+	twalk (db->entries, add_key);
 
-	nhashentries += db->nhashentries;
+	db->nhashentries = nhashentries_best;
+	nhashentries_total += nhashentries_best;
     }
 }
 
@@ -626,7 +705,7 @@ write_output (int fd)
   iov[1].iov_len = valstrlen;
   file_offset += valstrlen;
 
-  size_t keydataoffset = file_offset + nhashentries * sizeof (stridx_t);
+  size_t keydataoffset = file_offset + nhashentries_total * sizeof (stridx_t);
   for (struct database *db = databases; db != NULL; db = db->next)
     if (db->entries != NULL)
       {
@@ -639,13 +718,13 @@ write_output (int fd)
 	header->dbs[filled_dbs].hashsize = db->nhashentries;
 
 	iov[2 + filled_dbs].iov_base = db->hashtable;
-	iov[2 + filled_dbs].iov_len = db-> nhashentries * sizeof (stridx_t);
+	iov[2 + filled_dbs].iov_len = db->nhashentries * sizeof (stridx_t);
 	header->dbs[filled_dbs].hashoffset = file_offset;
 	file_offset += iov[2 + filled_dbs].iov_len;
 
 	iov[2 + ndatabases + filled_dbs * 2].iov_base = db->keyidxtab;
 	iov[2 + ndatabases + filled_dbs * 2].iov_len
-	  = db-> nhashentries * sizeof (stridx_t);
+	  = db->nhashentries * sizeof (stridx_t);
 	header->dbs[filled_dbs].keyidxoffset = keydataoffset;
 	keydataoffset += iov[2 + ndatabases + filled_dbs * 2].iov_len;
 
@@ -659,7 +738,7 @@ write_output (int fd)
 
   assert (filled_dbs == ndatabases);
   assert (file_offset == (iov[0].iov_len + iov[1].iov_len
-			  + nhashentries * sizeof (stridx_t)));
+			  + nhashentries_total * sizeof (stridx_t)));
   header->allocate = file_offset;
 
   if (writev (fd, iov, 2 + ndatabases * 3) != keydataoffset)