diff options
author | Sebastian Gniazdowski <psprint2@fastmail.com> | 2017-02-16 08:03:37 -0800 |
---|---|---|
committer | Peter Stephenson <pws@zsh.org> | 2017-02-17 09:22:02 +0000 |
commit | e9ce00174921354099d96813fc73d62243923904 (patch) | |
tree | cc863e8f99e26fe34eac5bd618c308c3eeb6b93a /Src/Modules | |
parent | 63f086d167960a27ecdbcb762179e2c2bf8a29f5 (diff) | |
download | zsh-e9ce00174921354099d96813fc73d62243923904.tar.gz zsh-e9ce00174921354099d96813fc73d62243923904.tar.xz zsh-e9ce00174921354099d96813fc73d62243923904.zip |
40558, 40562: General improvements to zsh/db/gdbm module
Diffstat (limited to 'Src/Modules')
-rw-r--r-- | Src/Modules/db_gdbm.c | 507 | ||||
-rw-r--r-- | Src/Modules/db_gdbm.mdd | 2 |
2 files changed, 437 insertions, 72 deletions
diff --git a/Src/Modules/db_gdbm.c b/Src/Modules/db_gdbm.c index 310e32948..596a8ae24 100644 --- a/Src/Modules/db_gdbm.c +++ b/Src/Modules/db_gdbm.c @@ -6,6 +6,9 @@ * Copyright (c) 2008 Clint Adams * All rights reserved. * + * Modifications copyright (c) 2017 Sebastian Gniazdowski + * All rights reserved. + * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and to distribute modified versions of this software for any @@ -31,6 +34,15 @@ #include "db_gdbm.mdh" #include "db_gdbm.pro" +#ifndef PM_UPTODATE +#define PM_UPTODATE (1<<19) /* Parameter has up-to-date data (e.g. loaded from DB) */ +#endif + +static Param createhash( char *name, int flags ); +static int append_tied_name( const char *name ); +static int remove_tied_name( const char *name ); +char *unmetafy_zalloc(const char *to_copy, int *new_len); + /* * Make sure we have all the bits I'm using for memory mapping, otherwise * I don't know what I'm doing. @@ -41,8 +53,34 @@ static char *backtype = "db/gdbm"; -static const struct gsu_scalar gdbm_gsu = -{ gdbmgetfn, gdbmsetfn, gdbmunsetfn }; +/* + * Longer GSU structure, to carry GDBM_FILE of owning + * database. Every parameter (hash value) receives GSU + * pointer and thus also receives GDBM_FILE - this way + * parameters can access proper database. + * + * Main HashTable parameter has the same instance of + * the custom GSU struct in u.hash->tmpdata field. + * When database is closed, `dbf` field is set to NULL + * and hash values know to not access database when + * being unset (total purge at zuntie). + * + * When database closing is ended, custom GSU struct + * is freed. Only new ztie creates new custom GSU + * struct instance. + */ + +struct gsu_scalar_ext { + struct gsu_scalar std; + GDBM_FILE dbf; + char *dbfile_path; +}; + +/* Source structure - will be copied to allocated one, + * with `dbf` filled. `dbf` allocation <-> gsu allocation. */ +static const struct gsu_scalar_ext gdbm_gsu_ext = +{ { gdbmgetfn, gdbmsetfn, gdbmunsetfn }, 0, 0 }; + /**/ static const struct gsu_hash gdbm_hash_gsu = { hashgetfn, gdbmhashsetfn, gdbmhashunsetfn }; @@ -50,6 +88,17 @@ static const struct gsu_hash gdbm_hash_gsu = static struct builtin bintab[] = { BUILTIN("ztie", 0, bin_ztie, 1, -1, 0, "d:f:r", NULL), BUILTIN("zuntie", 0, bin_zuntie, 1, -1, 0, "u", NULL), + BUILTIN("zgdbmpath", 0, bin_zgdbmpath, 1, -1, 0, "", NULL), +}; + +#define ROARRPARAMDEF(name, var) \ + { name, PM_ARRAY | PM_READONLY, (void *) var, NULL, NULL, NULL, NULL } + +/* Holds names of all tied parameters */ +char **zgdbm_tied; + +static struct paramdef patab[] = { + ROARRPARAMDEF( "zgdbm_tied", &zgdbm_tied ), }; /**/ @@ -77,8 +126,7 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func)) } /* Here should be a lookup of the backend type against - * a registry. - */ + * a registry, if generam DB mechanism is to be added */ if (strcmp(OPT_ARG(ops, 'd'), backtype) != 0) { zwarnnam(nam, "unsupported backend type `%s'", OPT_ARG(ops, 'd')); return 1; @@ -92,7 +140,8 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func)) /* * Unset any existing parameter. Note there's no implicit * "local" here, but if the existing parameter is local - * that will be reflected in the new one. + * then new parameter will be also local without following + * unset. * * We need to do this before attempting to open the DB * in case this variable is already tied to a DB. @@ -106,15 +155,15 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func)) } dbf = gdbm_open(resource_name, 0, read_write, 0666, 0); - if(dbf) - addmodulefd(gdbm_fdesc(dbf), FDT_INTERNAL); - else { + if(dbf) { + addmodulefd(gdbm_fdesc(dbf), FDT_MODULE); + append_tied_name(pmname); + } else { zwarnnam(nam, "error opening database file %s", resource_name); return 1; } - if (!(tied_param = createspecialhash(pmname, &getgdbmnode, &scangdbmkeys, - pmflags))) { + if (!(tied_param = createhash(pmname, pmflags))) { zwarnnam(nam, "cannot create the requested parameter %s", pmname); fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED; gdbm_close(dbf); @@ -122,8 +171,23 @@ bin_ztie(char *nam, char **args, Options ops, UNUSED(int func)) } tied_param->gsu.h = &gdbm_hash_gsu; - tied_param->u.hash->tmpdata = (void *)dbf; + /* Allocate parameter sub-gsu, fill dbf field. + * dbf allocation is 1 to 1 accompanied by + * gsu_scalar_ext allocation. */ + + struct gsu_scalar_ext *dbf_carrier = (struct gsu_scalar_ext *) zalloc(sizeof(struct gsu_scalar_ext)); + dbf_carrier->std = gdbm_gsu_ext.std; + dbf_carrier->dbf = dbf; + tied_param->u.hash->tmpdata = (void *)dbf_carrier; + + /* Fill also file path field */ + if (*resource_name != '/') { + /* Code copied from check_autoload() */ + resource_name = zhtricat(metafy(zgetcwd(), -1, META_HEAPDUP), "/", resource_name); + resource_name = xsymlink(resource_name, 1); + } + dbf_carrier->dbfile_path = ztrdup(resource_name); return 0; } @@ -162,6 +226,53 @@ bin_zuntie(char *nam, char **args, Options ops, UNUSED(int func)) } /**/ +static int +bin_zgdbmpath(char *nam, char **args, Options ops, UNUSED(int func)) +{ + Param pm; + char *pmname; + + pmname = *args; + + if (!pmname) { + zwarnnam(nam, "parameter name (whose path is to be written to $REPLY) is required"); + return 1; + } + + pm = (Param) paramtab->getnode(paramtab, pmname); + if(!pm) { + zwarnnam(nam, "no such parameter: %s", pmname); + return 1; + } + + if (pm->gsu.h != &gdbm_hash_gsu) { + zwarnnam(nam, "not a tied gdbm parameter: %s", pmname); + return 1; + } + + /* Paranoia, it *will* be always set */ + if (((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbfile_path) { + setsparam("REPLY", ztrdup(((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbfile_path)); + } else { + setsparam("REPLY", ztrdup("")); + } + + return 0; +} + +/* + * The param is actual param in hash – always, because + * getgdbmnode creates every new key seen. However, it + * might be not PM_UPTODATE - which means that database + * wasn't yet queried. + * + * It will be left in this state if database doesn't + * contain such key. That might be a drawback, maybe + * setting to empty value has sense, as no other writer + * can exist. This would remove subtle hcalloc(1) leak. + */ + +/**/ static char * gdbmgetfn(Param pm) { @@ -169,18 +280,56 @@ gdbmgetfn(Param pm) int ret; GDBM_FILE dbf; - key.dptr = pm->node.nam; - key.dsize = strlen(key.dptr); + /* Key already retrieved? There is no sense of asking the + * database again, because: + * - there can be only multiple readers + * - so, no writer + reader use is allowed + * + * Thus: + * - if we are writers, we for sure have newest copy of data + * - if we are readers, we for sure have newest copy of data + */ + if ( pm->node.flags & PM_UPTODATE ) { + return pm->u.str ? pm->u.str : (char *) hcalloc(1); + } + + /* Unmetafy key. GDBM fits nice into this + * process, as it uses length of data */ + int umlen = 0; + char *umkey = unmetafy_zalloc(pm->node.nam,¨en); + + key.dptr = umkey; + key.dsize = umlen; + + dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf; + + if((ret = gdbm_exists(dbf, key))) { + /* We have data – store it, return it */ + pm->node.flags |= PM_UPTODATE; - dbf = (GDBM_FILE)(pm->u.hash->tmpdata); - ret = gdbm_exists(dbf, key); - if(ret) { content = gdbm_fetch(dbf, key); - } else { - content.dptr = dupstring(""); + + /* Ensure there's no leak */ + if (pm->u.str) { + zsfree(pm->u.str); + } + + /* Metafy returned data. All fits - metafy + * can obtain data length to avoid using \0 */ + pm->u.str = metafy(content.dptr, content.dsize, META_DUP); + + /* Free key, restoring its original length */ + zsfree(umkey); + + /* Can return pointer, correctly saved inside hash */ + return pm->u.str; } - return content.dptr; + /* Free key */ + zsfree(umkey); + + /* Can this be "" ? */ + return (char *) hcalloc(1); } /**/ @@ -190,78 +339,126 @@ gdbmsetfn(Param pm, char *val) datum key, content; GDBM_FILE dbf; - key.dptr = pm->node.nam; - key.dsize = strlen(key.dptr); - content.dptr = val; - content.dsize = strlen(content.dptr); + /* Set is done on parameter and on database. + * See the allowed workers / readers comment + * at gdbmgetfn() */ + + /* Parameter */ + if (pm->u.str) { + zsfree(pm->u.str); + pm->u.str = NULL; + pm->node.flags &= ~(PM_UPTODATE); + } + + if (val) { + pm->u.str = ztrdup(val); + pm->node.flags |= PM_UPTODATE; + } + + /* Database */ + dbf = ((struct gsu_scalar_ext *)pm->gsu.s)->dbf; + if (dbf) { + int umlen = 0; + char *umkey = unmetafy_zalloc(pm->node.nam,¨en); - dbf = (GDBM_FILE)(pm->u.hash->tmpdata); - (void)gdbm_store(dbf, key, content, GDBM_REPLACE); + key.dptr = umkey; + key.dsize = umlen; + + if (val) { + /* Unmetafy with exact zalloc size */ + char *umval = unmetafy_zalloc(val,¨en); + + /* Store */ + content.dptr = umval; + content.dsize = umlen; + (void)gdbm_store(dbf, key, content, GDBM_REPLACE); + + /* Free */ + zsfree(umval); + } else { + (void)gdbm_delete(dbf, key); + } + + /* Free key */ + zsfree(umkey); + } } /**/ static void gdbmunsetfn(Param pm, UNUSED(int um)) { - datum key; - GDBM_FILE dbf; - - key.dptr = pm->node.nam; - key.dsize = strlen(key.dptr); - - dbf = (GDBM_FILE)(pm->u.hash->tmpdata); - (void)gdbm_delete(dbf, key); + /* Set with NULL */ + gdbmsetfn(pm, NULL); } /**/ static HashNode getgdbmnode(HashTable ht, const char *name) { - int len; - char *nameu; - Param pm = NULL; - - nameu = dupstring(name); - unmetafy(nameu, &len); - - pm = (Param) hcalloc(sizeof(struct param)); - pm->node.nam = nameu; - pm->node.flags = PM_SCALAR; - pm->gsu.s = &gdbm_gsu; - pm->u.hash = ht; + HashNode hn = gethashnode2( ht, name ); + Param val_pm = (Param) hn; + + /* Entry for key doesn't exist? Create it now, + * it will be interfacing between the database + * and Zsh - through special gdbm_gsu. So, any + * seen key results in new interfacing parameter. + * + * Previous code was returning heap arena Param + * that wasn't actually added to the hash. It was + * plainly name / database-key holder. Here we + * add the Param to its hash, it is not PM_UPTODATE. + * It will be loaded from database *and filled* + * or left in that state if the database doesn't + * contain it. + * + * No heap arena memory is used, memory usage is + * now limited - by number of distinct keys seen, + * not by number of key *uses*. + * */ + + if ( ! val_pm ) { + val_pm = (Param) zshcalloc( sizeof (*val_pm) ); + val_pm->node.flags = PM_SCALAR | PM_HASHELEM; /* no PM_UPTODATE */ + val_pm->gsu.s = (GsuScalar) ht->tmpdata; + ht->addnode( ht, ztrdup( name ), val_pm ); // sets pm->node.nam + } - return &pm->node; + return (HashNode) val_pm; } /**/ static void scangdbmkeys(HashTable ht, ScanFunc func, int flags) { - Param pm = NULL; - datum key, content; - GDBM_FILE dbf = (GDBM_FILE)(ht->tmpdata); - - pm = (Param) hcalloc(sizeof(struct param)); - - pm->node.flags = PM_SCALAR; - pm->gsu.s = &nullsetscalar_gsu; + datum key; + GDBM_FILE dbf = ((struct gsu_scalar_ext *)ht->tmpdata)->dbf; + /* Iterate keys adding them to hash, so + * we have Param to use in `func` */ key = gdbm_firstkey(dbf); while(key.dptr) { - content = gdbm_fetch(dbf, key); - - pm->node.nam = key.dptr; - pm->u.str = content.dptr; - pm->gsu.s = &nullsetscalar_gsu; + /* This returns database-interfacing Param, + * it will return u.str or first fetch data + * if not PM_UPTODATE (newly created) */ + char *zkey = metafy(key.dptr, key.dsize, META_DUP); + HashNode hn = getgdbmnode(ht, zkey); + zsfree( zkey ); - func(&pm->node, flags); + func(hn, flags); + /* Iterate - no problem as interfacing Param + * will do at most only fetches, not stores */ key = gdbm_nextkey(dbf, key); } } +/* + * Replace database with new hash + */ + /**/ static void gdbmhashsetfn(Param pm, HashTable ht) @@ -274,7 +471,7 @@ gdbmhashsetfn(Param pm, HashTable ht) if (!pm->u.hash || pm->u.hash == ht) return; - if (!(dbf = (GDBM_FILE)(pm->u.hash->tmpdata))) + if (!(dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf)) return; key = gdbm_firstkey(dbf); @@ -292,6 +489,9 @@ gdbmhashsetfn(Param pm, HashTable ht) if (!ht) return; + /* Put new strings into database, waiting + * for their interfacing-Params to be created */ + for (i = 0; i < ht->hsize; i++) for (hn = ht->nodes[i]; hn; hn = hn->next) { struct value v; @@ -301,16 +501,29 @@ gdbmhashsetfn(Param pm, HashTable ht) v.arr = NULL; v.pm = (Param) hn; - key.dptr = v.pm->node.nam; - key.dsize = strlen(key.dptr); + /* Unmetafy key */ + int umlen = 0; + char *umkey = unmetafy_zalloc(v.pm->node.nam,¨en); + + key.dptr = umkey; + key.dsize = umlen; queue_signals(); - content.dptr = getstrvalue(&v); - content.dsize = strlen(content.dptr); + /* Unmetafy */ + char *umval = unmetafy_zalloc(getstrvalue(&v),¨en); + /* Store */ + content.dptr = umval; + content.dsize = umlen; (void)gdbm_store(dbf, key, content, GDBM_REPLACE); + /* Free - thanks to unmetafy_zalloc size of + * the strings is exact zalloc size - can + * pass to zsfree */ + zsfree(umval); + zsfree(umkey); + unqueue_signals(); } } @@ -319,15 +532,19 @@ gdbmhashsetfn(Param pm, HashTable ht) static void gdbmuntie(Param pm) { - GDBM_FILE dbf = (GDBM_FILE)(pm->u.hash->tmpdata); + GDBM_FILE dbf = ((struct gsu_scalar_ext *)pm->u.hash->tmpdata)->dbf; HashTable ht = pm->u.hash; if (dbf) { /* paranoia */ fdtable[gdbm_fdesc(dbf)] = FDT_UNUSED; - gdbm_close(dbf); - } + gdbm_close(dbf); - ht->tmpdata = NULL; + /* Let hash fields know there's no backend */ + ((struct gsu_scalar_ext *)ht->tmpdata)->dbf = NULL; + + /* Remove from list of tied parameters */ + remove_tied_name(pm->node.nam); + } /* for completeness ... createspecialhash() should have an inverse */ ht->getnode = ht->getnode2 = gethashnode2; @@ -342,8 +559,20 @@ static void gdbmhashunsetfn(Param pm, UNUSED(int exp)) { gdbmuntie(pm); - /* hash table is now normal, so proceed normally... */ + + /* Remember custom GSU structure assigned to + * u.hash->tmpdata before hash gets deleted */ + struct gsu_scalar_ext * gsu_ext = pm->u.hash->tmpdata; + + /* Uses normal unsetter. Will delete all owned + * parameters and also hashtable. */ pm->gsu.h->setfn(pm, NULL); + + /* Don't need custom GSU structure with its + * GDBM_FILE pointer anymore */ + zsfree( gsu_ext->dbfile_path ); + zfree( gsu_ext, sizeof(struct gsu_scalar_ext)); + pm->node.flags |= PM_UNSET; } @@ -355,7 +584,7 @@ static struct features module_features = { bintab, sizeof(bintab)/sizeof(*bintab), NULL, 0, NULL, 0, - NULL, 0, + patab, sizeof(patab)/sizeof(*patab), 0 }; @@ -385,6 +614,7 @@ enables_(Module m, int **enables) int boot_(UNUSED(Module m)) { + zgdbm_tied = zshcalloc((1) * sizeof(char *)); return 0; } @@ -392,6 +622,7 @@ boot_(UNUSED(Module m)) int cleanup_(Module m) { + /* This frees `zgdbm_tied` */ return setfeatureenables(m, &module_features, NULL); } @@ -401,3 +632,137 @@ finish_(UNUSED(Module m)) { return 0; } + +/********************* + * Utility functions * + *********************/ + +static Param createhash( char *name, int flags ) { + Param pm; + HashTable ht; + + pm = createparam(name, PM_SPECIAL | PM_HASHED); + if (!pm) { + return NULL; + } + + if (pm->old) + pm->level = locallevel; + + /* This creates standard hash. */ + ht = pm->u.hash = newparamtable(32, name); + if (!pm->u.hash) { + paramtab->removenode(paramtab, name); + paramtab->freenode(&pm->node); + zwarnnam(name, "Out of memory when allocating hash"); + } + + /* These provide special features */ + ht->getnode = ht->getnode2 = getgdbmnode; + ht->scantab = scangdbmkeys; + + return pm; +} + +/* + * Adds parameter name to `zgdbm_tied` + */ + +static int append_tied_name( const char *name ) { + int old_len = arrlen(zgdbm_tied); + char **new_zgdbm_tied = zshcalloc( (old_len+2) * sizeof(char *)); + + /* Copy */ + char **p = zgdbm_tied; + char **dst = new_zgdbm_tied; + while (*p) { + *dst++ = *p++; + } + + /* Append new one */ + *dst = ztrdup(name); + + /* Substitute, free old one */ + zfree(zgdbm_tied, sizeof(char *) * (old_len + 1)); + zgdbm_tied = new_zgdbm_tied; + + return 0; +} + +/* + * Removes parameter name from `zgdbm_tied` + */ + +static int remove_tied_name( const char *name ) { + int old_len = arrlen(zgdbm_tied); + + /* Two stage, to always have arrlen() == zfree-size - 1. + * Could do allocation and revert when `not found`, but + * what would be better about that. */ + + /* Find one to remove */ + char **p = zgdbm_tied; + while (*p) { + if (0==strcmp(name,*p)) { + break; + } + p++; + } + + /* Copy x+1 to x */ + while (*p) { + *p=*(p+1); + p++; + } + + /* Second stage. Size changed? Only old_size-1 + * change is possible, but.. paranoia way */ + int new_len = arrlen(zgdbm_tied); + if (new_len != old_len) { + char **new_zgdbm_tied = zshcalloc((new_len+1) * sizeof(char *)); + + /* Copy */ + p = zgdbm_tied; + char **dst = new_zgdbm_tied; + while (*p) { + *dst++ = *p++; + } + *dst = NULL; + + /* Substitute, free old one */ + zfree(zgdbm_tied, sizeof(char *) * (old_len + 1)); + zgdbm_tied = new_zgdbm_tied; + } + + return 0; +} + +/* + * Unmetafy that: + * - duplicates bufer to work on it, + * - does zalloc of exact size for the new string, + * - restores work buffer to original content, to restore strlen + * + * No zsfree()-confusing string will be produced. + */ +char *unmetafy_zalloc(const char *to_copy, int *new_len) { + char *work, *to_return; + int my_new_len = 0; + + work = ztrdup(to_copy); + work = unmetafy(work,&my_new_len); + + if (new_len) + *new_len = my_new_len; + + /* This string can be correctly zsfree()-d */ + to_return = (char *) zalloc((my_new_len+1)*sizeof(char)); + memcpy(to_return, work, sizeof(char)*my_new_len); // memcpy handles $'\0' + to_return[my_new_len]='\0'; + + /* Restore original strlen and correctly free */ + strcpy(work, to_copy); + zsfree(work); + + return to_return; +} diff --git a/Src/Modules/db_gdbm.mdd b/Src/Modules/db_gdbm.mdd index ce7926bd9..210c22177 100644 --- a/Src/Modules/db_gdbm.mdd +++ b/Src/Modules/db_gdbm.mdd @@ -7,6 +7,6 @@ fi ' load=no -autofeatures="b:ztie b:zuntie" +autofeatures="b:ztie b:zuntie b:zgdbmpath p:zgdbm_tied" objects="db_gdbm.o" |