From 4a4d9f3cbe72d3976f2df7053208c671c22c0410 Mon Sep 17 00:00:00 2001 From: Bart Schaefer Date: Tue, 10 Apr 2012 01:17:02 +0000 Subject: 30383, users/16991 (Vaclav), users/17000: Improve speed of arrayuniq() by implementing a hash seive algorithm; add test to exercise it. --- ChangeLog | 20 ++++++++++- Src/params.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++-- Test/D04parameter.ztst | 10 ++++++ 3 files changed, 118 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 49879afd5..ad2d39e7f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2012-04-09 Barton E. Schaefer + + * unposted: Test/D04parameter.ztst: hash seive needs more than 10 + array elements for arrayuniq() testing. This test will need to + be tweaked if that size changes. + + * unposted (see users/17000): Src/params.c: fix allocation bug in + 16991 by using heap memory for hash nodes; throw an error if out + of heap; pull hash table creation out into a helper function and + use arrlen() to count the array. + + * Václav Zeman: users/16991: Src/params.c: implement hash-table + seive variant of arrayuniq() to improve speed at cost of space, + falls back on the constant-space version for small arrays. + + * 30383: Src/params.c: improve the constant-space variant of + arrayuniq() by optimizing shifts. + 2012-04-01 Peter Stephenson * users/16944: Functions/Zle/url-quote-magic: some more "local"s @@ -16146,5 +16164,5 @@ ***************************************************** * This is used by the shell to define $ZSH_PATCHLEVEL -* $Revision: 1.5620 $ +* $Revision: 1.5621 $ ***************************************************** diff --git a/Src/params.c b/Src/params.c index 8a0ed5143..d960c22cf 100644 --- a/Src/params.c +++ b/Src/params.c @@ -3456,18 +3456,104 @@ tiedarrunsetfn(Param pm, UNUSED(int exp)) /**/ static void -arrayuniq(char **x, int freeok) +simple_arrayuniq(char **x, int freeok) { char **t, **p = x; + char *hole = ""; + /* Find duplicates and replace them with holes */ while (*++p) for (t = x; t < p; t++) - if (!strcmp(*p, *t)) { + if (*t != hole && !strcmp(*p, *t)) { if (freeok) zsfree(*p); - for (t = p--; (*t = t[1]) != NULL; t++); + *p = hole; break; } + /* Swap non-holes into holes in optimal jumps */ + for (p = t = x; *t != NULL; t++) { + if (*t == hole) { + while (*p == hole) + ++p; + if ((*t = *p) != NULL) + *p++ = hole; + } else if (p == t) + p++; + } + /* Erase all the remaining holes, just in case */ + while (++t < p) + *t = NULL; +} + +/**/ +static void +arrayuniq_freenode(HashNode hn) +{ + (void)hn; +} + +/**/ +static HashTable +newuniqtable(zlong size) +{ + HashTable ht = newhashtable((int)size, "arrayuniq", NULL); + /* ??? error checking */ + + ht->hash = hasher; + ht->emptytable = emptyhashtable; + ht->filltable = NULL; + ht->cmpnodes = strcmp; + ht->addnode = addhashnode; + ht->getnode = gethashnode2; + ht->getnode2 = gethashnode2; + ht->removenode = removehashnode; + ht->disablenode = disablehashnode; + ht->enablenode = enablehashnode; + ht->freenode = arrayuniq_freenode; + ht->printnode = NULL; + + return ht; +} + +/**/ +static void +arrayuniq(char **x, int freeok) +{ + char **it, **write_it; + zlong array_size = arrlen(x); + HashTable ht; + + if (array_size == 0) + return; + if (array_size < 10 || !(ht = newuniqtable(array_size + 1))) { + /* fallback to simpler routine */ + simple_arrayuniq(x, freeok); + return; + } + + for (it = x, write_it = x; *it;) { + if (! gethashnode(ht, *it)) { + HashNode new_node = zhalloc(sizeof(struct hashnode)); + if (!new_node) { + /* Oops, out of heap memory, no way to recover */ + zerr("out of memory in arrayuniq"); + break; + } + (void) addhashnode2(ht, *it, new_node); + *write_it = *it; + if (it != write_it) + *it = NULL; + ++write_it; + } + else { + if (freeok) + zsfree(*it); + *it = NULL; + } + ++it; + } + + deletehashtable(ht); } /**/ diff --git a/Test/D04parameter.ztst b/Test/D04parameter.ztst index 8bc37ff4c..cc2d6aecd 100644 --- a/Test/D04parameter.ztst +++ b/Test/D04parameter.ztst @@ -1076,6 +1076,16 @@ >i >willJOYCE + print -l JAMES${(u)${=:-$(echo yes yes she said yes i will yes she said she will and yes she did yes)}}JOYCE +0:New hash seive unique algorithm for arrays of more than 10 elements +>JAMESyes +>she +>said +>i +>will +>and +>didJOYCE + foo= print "${${foo}/?*/replacement}" 0:Quoted zero-length strings are handled properly -- cgit 1.4.1