about summary refs log tree commit diff
diff options
context:
space:
mode:
authorBart Schaefer <barts@users.sourceforge.net>2012-04-10 01:17:02 +0000
committerBart Schaefer <barts@users.sourceforge.net>2012-04-10 01:17:02 +0000
commit4a4d9f3cbe72d3976f2df7053208c671c22c0410 (patch)
tree1cb72d24966c2c35c36ae41cd976005d7970d3ce
parent246a63d9d32e3291ef4993719565d0f087f795a3 (diff)
downloadzsh-4a4d9f3cbe72d3976f2df7053208c671c22c0410.tar.gz
zsh-4a4d9f3cbe72d3976f2df7053208c671c22c0410.tar.xz
zsh-4a4d9f3cbe72d3976f2df7053208c671c22c0410.zip
30383, users/16991 (Vaclav), users/17000: Improve speed of arrayuniq() by
implementing a hash seive algorithm; add test to exercise it.
-rw-r--r--ChangeLog20
-rw-r--r--Src/params.c92
-rw-r--r--Test/D04parameter.ztst10
3 files changed, 118 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index 49879afd5..ad2d39e7f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2012-04-09  Barton E. Schaefer  <schaefer@zsh.org>
+
+	* unposted: Test/D04parameter.ztst: hash seive needs more than 10
+	array elements for arrayuniq() testing.  This test will need to
+	be tweaked if that size changes.
+
+	* unposted (see users/17000): Src/params.c: fix allocation bug in
+	16991 by using heap memory for hash nodes; throw an error if out
+	of heap; pull hash table creation out into a helper function and
+	use arrlen() to count the array.
+
+	* Václav Zeman: users/16991: Src/params.c: implement hash-table
+	seive variant of arrayuniq() to improve speed at cost of space,
+	falls back on the constant-space version for small arrays.
+
+	* 30383: Src/params.c: improve the constant-space variant of
+	arrayuniq() by optimizing shifts.
+
 2012-04-01  Peter Stephenson  <p.w.stephenson@ntlworld.com>
 
 	* users/16944: Functions/Zle/url-quote-magic: some more "local"s
@@ -16146,5 +16164,5 @@
 
 *****************************************************
 * This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.5620 $
+* $Revision: 1.5621 $
 *****************************************************
diff --git a/Src/params.c b/Src/params.c
index 8a0ed5143..d960c22cf 100644
--- a/Src/params.c
+++ b/Src/params.c
@@ -3456,18 +3456,104 @@ tiedarrunsetfn(Param pm, UNUSED(int exp))
 
 /**/
 static void
-arrayuniq(char **x, int freeok)
+simple_arrayuniq(char **x, int freeok)
 {
     char **t, **p = x;
+    char *hole = "";
 
+    /* Find duplicates and replace them with holes */
     while (*++p)
 	for (t = x; t < p; t++)
-	    if (!strcmp(*p, *t)) {
+	    if (*t != hole && !strcmp(*p, *t)) {
 		if (freeok)
 		    zsfree(*p);
-		for (t = p--; (*t = t[1]) != NULL; t++);
+		*p = hole;
 		break;
 	    }
+    /* Swap non-holes into holes in optimal jumps */
+    for (p = t = x; *t != NULL; t++) {
+	if (*t == hole) {
+	    while (*p == hole)
+		++p;
+	    if ((*t = *p) != NULL)
+		*p++ = hole;
+	} else if (p == t)
+	    p++;
+    }
+    /* Erase all the remaining holes, just in case */
+    while (++t < p)
+	*t = NULL;
+}
+
+/**/
+static void
+arrayuniq_freenode(HashNode hn)
+{
+    (void)hn;
+}
+
+/**/
+static HashTable
+newuniqtable(zlong size)
+{
+    HashTable ht = newhashtable((int)size, "arrayuniq", NULL);
+    /* ??? error checking */
+
+    ht->hash        = hasher;
+    ht->emptytable  = emptyhashtable;
+    ht->filltable   = NULL;
+    ht->cmpnodes    = strcmp;
+    ht->addnode     = addhashnode;
+    ht->getnode     = gethashnode2;
+    ht->getnode2    = gethashnode2;
+    ht->removenode  = removehashnode;
+    ht->disablenode = disablehashnode;
+    ht->enablenode  = enablehashnode;
+    ht->freenode    = arrayuniq_freenode;
+    ht->printnode   = NULL;
+
+    return ht;
+}
+
+/**/
+static void
+arrayuniq(char **x, int freeok)
+{
+    char **it, **write_it;
+    zlong array_size = arrlen(x);
+    HashTable ht;
+
+    if (array_size == 0)
+	return;
+    if (array_size < 10 || !(ht = newuniqtable(array_size + 1))) {
+	/* fallback to simpler routine */
+	simple_arrayuniq(x, freeok);
+	return;
+    }
+
+    for (it = x, write_it = x; *it;) {
+	if (! gethashnode(ht, *it)) {
+	    HashNode new_node = zhalloc(sizeof(struct hashnode));
+	    if (!new_node) {
+		/* Oops, out of heap memory, no way to recover */
+		zerr("out of memory in arrayuniq");
+		break;
+	    }
+	    (void) addhashnode2(ht, *it, new_node);
+	    *write_it = *it;
+	    if (it != write_it)
+		*it = NULL;
+	    ++write_it;
+	}
+	else {
+	    if (freeok)
+		zsfree(*it);
+	    *it = NULL;
+	}
+	++it;
+    }
+    
+    deletehashtable(ht);
 }
 
 /**/
diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst
index 8bc37ff4c..cc2d6aecd 100644
--- a/Test/D04parameter.ztst
+++ b/Test/D04parameter.ztst
@@ -1076,6 +1076,16 @@
 >i
 >willJOYCE
 
+  print -l JAMES${(u)${=:-$(echo yes yes she said yes i will yes she said she will and yes she did yes)}}JOYCE
+0:New hash seive unique algorithm for arrays of more than 10 elements
+>JAMESyes
+>she
+>said
+>i
+>will
+>and
+>didJOYCE
+
   foo=
   print "${${foo}/?*/replacement}"
 0:Quoted zero-length strings are handled properly