From 12e246495c2f40ca289b27a552d548318f255472 Mon Sep 17 00:00:00 2001
From: Peter Stephenson <pws@users.sourceforge.net>
Date: Fri, 3 Jun 2011 19:54:43 +0000
Subject: 29413: "print -S" for saving to history with lexical word split

---
 ChangeLog           |   8 +-
 Doc/Zsh/builtins.yo |  11 ++-
 Src/builtin.c       |  44 +++++++---
 Src/hist.c          | 239 ++++++++++++++++++++++++++++++----------------------
 4 files changed, 187 insertions(+), 115 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 2c9bed84c..abc2736e1 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+2011-06-03  Peter Stephenson  <p.w.stephenson@ntlworld.com>
+
+	* 29413: Doc/Zsh/builtins.yo, Src/builtin.c, Src/hist.c: print
+	-S takes a single argument, applies lexical history word
+	splitting, and puts it on the history.
+
 2011-06-02  Frank Terbeck  <ft@bewatermyfriend.org>
 
 	* 29434: Doc/Zsh/contrib.yo: Use PLUS() to avoid a + being
@@ -14925,5 +14931,5 @@
 
 *****************************************************
 * This is used by the shell to define $ZSH_PATCHLEVEL
-* $Revision: 1.5355 $
+* $Revision: 1.5356 $
 *****************************************************
diff --git a/Doc/Zsh/builtins.yo b/Doc/Zsh/builtins.yo
index 7170b13b9..5d717479a 100644
--- a/Doc/Zsh/builtins.yo
+++ b/Doc/Zsh/builtins.yo
@@ -914,7 +914,7 @@ and the new directory stack is not printed.  This is useful for calls to
 tt(popd) that do not change the environment seen by an interactive user.
 )
 findex(print)
-xitem(tt(print) [ tt(-abcDilmnNoOpPrsz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
+xitem(tt(print) [ tt(-abcDilmnNoOpPrsSz) ] [ tt(-u) var(n) ] [ tt(-f) var(format) ] [ tt(-C) var(cols) ])
 item(  [ tt(-R) [ tt(-en) ]] [ var(arg) ... ])(
 With the `tt(-f)' option the arguments are printed as described by tt(printf).
 With no flags or with the flag `tt(-)', the arguments are printed on
@@ -994,6 +994,15 @@ tt(-R); all other arguments and options are printed.
 )
 item(tt(-s))(
 Place the results in the history list instead of on the standard output.
+Each argument to the tt(print) command is treated as a single word in the
+history, regardless of its content.
+)
+item(tt(-S))(
+Place the results in the history list instead of on the standard output.
+In this case only a single argument is allowed; it will be split into
+words as if it were a full shell command line.  The effect is
+similar to reading the line from a history file with the
+tt(HIST_LEX_WORDS) option active.
 )
 item(tt(-u) var(n))(
 Print the arguments to file descriptor var(n).
diff --git a/Src/builtin.c b/Src/builtin.c
index fc98eb1b1..9b34ef7c0 100644
--- a/Src/builtin.c
+++ b/Src/builtin.c
@@ -99,7 +99,7 @@ static struct builtin builtins[] =
 #endif
 
     BUILTIN("popd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 1, BIN_POPD, "q", NULL),
-    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsu:z-", NULL),
+    BUILTIN("print", BINF_PRINTOPTS, bin_print, 0, -1, BIN_PRINT, "abcC:Df:ilmnNoOpPrRsSu:z-", NULL),
     BUILTIN("printf", 0, bin_print, 1, -1, BIN_PRINTF, NULL, NULL),
     BUILTIN("pushd", BINF_SKIPINVALID | BINF_SKIPDASH | BINF_DASHDASHVALID, bin_cd, 0, 2, BIN_PUSHD, "qsPL", NULL),
     BUILTIN("pushln", 0, bin_print, 0, -1, BIN_PRINT, NULL, "-nz"),
@@ -3965,25 +3965,45 @@ bin_print(char *name, char **args, Options ops, int func)
 	    return 0;
 	}
 	/* -s option -- add the arguments to the history list */
-	if (OPT_ISSET(ops,'s')) {
+	if (OPT_ISSET(ops,'s') || OPT_ISSET(ops,'S')) {
 	    int nwords = 0, nlen, iwords;
 	    char **pargs = args;
 
 	    queue_signals();
-	    ent = prepnexthistent();
 	    while (*pargs++)
 		nwords++;
-	    if ((ent->nwords = nwords)) {
-		ent->words = (short *)zalloc(nwords*2*sizeof(short));
-		nlen = iwords = 0;
-		for (pargs = args; *pargs; pargs++) {
-		    ent->words[iwords++] = nlen;
-		    nlen += strlen(*pargs);
-		    ent->words[iwords++] = nlen;
-		    nlen++;
+	    if (nwords) {
+		if (OPT_ISSET(ops,'S')) {
+		    int wordsize;
+		    short *words;
+		    if (nwords > 1) {
+			zwarnnam(name, "option -S takes a single argument");
+			return 1;
+		    }
+		    words = NULL;
+		    wordsize = 0;
+		    histsplitwords(*args, &words, &wordsize, &nwords, 1);
+		    ent = prepnexthistent();
+		    ent->words = (short *)zalloc(nwords*sizeof(short));
+		    memcpy(ent->words, words, nwords*sizeof(short));
+		    free(words);
+		    ent->nwords = nwords/2;
+		} else {
+		    ent = prepnexthistent();
+		    ent->words = (short *)zalloc(nwords*2*sizeof(short));
+		    ent->nwords = nwords;
+		    nlen = iwords = 0;
+		    for (pargs = args; *pargs; pargs++) {
+			ent->words[iwords++] = nlen;
+			nlen += strlen(*pargs);
+			ent->words[iwords++] = nlen;
+			nlen++;
+		    }
 		}
-	    } else
+	    } else {
+		ent = prepnexthistent();
 		ent->words = (short *)NULL;
+	    }
 	    ent->node.nam = zjoin(args, ' ', 0);
 	    ent->stim = ent->ftim = time(NULL);
 	    ent->node.flags = 0;
diff --git a/Src/hist.c b/Src/hist.c
index 01a97da2b..87bfde882 100644
--- a/Src/hist.c
+++ b/Src/hist.c
@@ -2338,110 +2338,11 @@ readhistfile(char *fn, int err, int readflags)
 	    /*
 	     * Divide up the words.
 	     */
-	    nwordpos = 0;
 	    start = pt;
 	    uselex = isset(HISTLEXWORDS) && !(readflags & HFILE_FAST);
-	    if (uselex) {
-		/*
-		 * Attempt to do this using the lexer.
-		 */
-		LinkList wordlist = bufferwords(NULL, pt, NULL,
-						LEXFLAGS_COMMENTS_KEEP);
-		LinkNode wordnode;
-		int nwords_max;
-		nwords_max = 2 * countlinknodes(wordlist);
-		if (nwords_max > nwords) {
-		    nwords = nwords_max;
-		    words = (short *)realloc(words, nwords*sizeof(short));
-		}
-		for (wordnode = firstnode(wordlist);
-		     wordnode;
-		     incnode(wordnode)) {
-		    char *word = getdata(wordnode);
-
-		    for (;;) {
-			/*
-			 * Not really an oddity: "\\\n" is
-			 * removed from input as if whitespace.
-			 */
-			if (inblank(*pt))
-			    pt++;
-			else if (pt[0] == '\\' && pt[1] == '\n')
-			    pt += 2;
-			else
-			    break;
-		    }
-		    if (!strpfx(word, pt)) {
-			int bad = 0;
-			/*
-			 * Oddity 1: newlines turn into semicolons.
-			 */
-			if (!strcmp(word, ";"))
-			    continue;
-			while (*pt) {
-			    if (!*word) {
-				bad = 1;
-				break;
-			    }
-			    /*
-			     * Oddity 2: !'s turn into |'s.
-			     */
-			    if (*pt == *word ||
-				(*pt == '!' && *word == '|')) {
-				pt++;
-				word++;
-			    } else {
-				bad = 1;
-				break;
-			    }
-			}
-			if (bad) {
-#ifdef DEBUG
-			    dputs(ERRMSG("bad wordsplit reading history: "
-					 "%s\nat: %s\nword: %s"),
-				  start, pt, word);
-#endif
-			    pt = start;
-			    nwordpos = 0;
-			    uselex = 0;
-			    break;
-			}
-		    } else if (!strcmp(word, ";") && strpfx(";;", pt)) {
-			/*
-			 * Don't get confused between a semicolon that's
-			 * probably really a newline and a double
-			 * semicolon that's terminating a case.
-			 */
-			continue;
-		    }
-		    words[nwordpos++] = pt - start;
-		    pt += strlen(word);
-		    words[nwordpos++] = pt - start;
-		}
+	    histsplitwords(pt, &words, &nwords, &nwordpos, uselex);
+	    if (uselex)
 		freeheap();
-	    }
-	    if (!uselex) {
-		do {
-		    for (;;) {
-			if (inblank(*pt))
-			    pt++;
-			else if (pt[0] == '\\' && pt[1] == '\n')
-			    pt += 2;
-			else
-			    break;
-		    }
-		    if (*pt) {
-			if (nwordpos >= nwords)
-			    words = (short *)
-				realloc(words, (nwords += 64)*sizeof(short));
-			words[nwordpos++] = pt - start;
-			while (*pt && !inblank(*pt))
-			    pt++;
-			words[nwordpos++] = pt - start;
-		    }
-		} while (*pt);
-
-	    }
 
 	    he->nwords = nwordpos/2;
 	    if (he->nwords) {
@@ -3141,6 +3042,142 @@ bufferwords(LinkList list, char *buf, int *index, int flags)
     return list;
 }
 
+/*
+ * Split up a line into words for use in a history file.
+ *
+ * lineptr is the line to be split.
+ *
+ * *wordsp and *nwordsp are an array already allocated to hold words
+ * and its length.  The array holds both start and end positions,
+ * so *nwordsp actually counts twice the number of words in the
+ * original string.  *nwordsp may be zero in which case the array
+ * will be allocated.
+ *
+ * *nwordposp returns the used length of *wordsp in the same units as
+ * *nwordsp, i.e. twice the number of words in the input line.
+ *
+ * If uselex is 1, attempt to do this using the lexical analyser.
+ * This is more accurate, but slower; for reading history files it's
+ * controlled by the option HISTLEXWORDS.  If this failed (which
+ * indicates a bug in the shell) it falls back to whitespace-separated
+ * strings, printing a message if in debug mode.
+ *
+ * If uselex is 0, just look for whitespace-separated words; the only
+ * special handling is for a backslash-newline combination as used
+ * by the history file format to save multiline buffers.
+ */
+/**/
+mod_export void
+histsplitwords(char *lineptr, short **wordsp, int *nwordsp, int *nwordposp,
+	       int uselex)
+{
+    int nwords = *nwordsp, nwordpos = 0;
+    short *words = *wordsp;
+    char *start = lineptr;
+
+    if (uselex) {
+	LinkList wordlist = bufferwords(NULL, lineptr, NULL,
+					LEXFLAGS_COMMENTS_KEEP);
+	LinkNode wordnode;
+	int nwords_max;
+
+	nwords_max = 2 * countlinknodes(wordlist);
+	if (nwords_max > nwords) {
+	    *nwordsp = nwords = nwords_max;
+	    *wordsp = words = (short *)zrealloc(words, nwords*sizeof(short));
+	}
+	for (wordnode = firstnode(wordlist);
+	     wordnode;
+	     incnode(wordnode)) {
+	    char *word = getdata(wordnode);
+
+	    for (;;) {
+		/*
+		 * Not really an oddity: "\\\n" is
+		 * removed from input as if whitespace.
+		 */
+		if (inblank(*lineptr))
+		    lineptr++;
+		else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+		    lineptr += 2;
+		else
+		    break;
+	    }
+	    if (!strpfx(word, lineptr)) {
+		int bad = 0;
+		/*
+		 * Oddity 1: newlines turn into semicolons.
+		 */
+		if (!strcmp(word, ";"))
+		    continue;
+		while (*lineptr) {
+		    if (!*word) {
+			bad = 1;
+			break;
+		    }
+		    /*
+		     * Oddity 2: !'s turn into |'s.
+		     */
+		    if (*lineptr == *word ||
+			(*lineptr == '!' && *word == '|')) {
+			lineptr++;
+			word++;
+		    } else {
+			bad = 1;
+			break;
+		    }
+		}
+		if (bad) {
+#ifdef DEBUG
+		    dputs(ERRMSG("bad wordsplit reading history: "
+				 "%s\nat: %s\nword: %s"),
+			  start, lineptr, word);
+#endif
+		    lineptr = start;
+		    nwordpos = 0;
+		    uselex = 0;
+		    break;
+		}
+	    } else if (!strcmp(word, ";") && strpfx(";;", lineptr)) {
+		/*
+		 * Don't get confused between a semicolon that's
+		 * probably really a newline and a double
+		 * semicolon that's terminating a case.
+		 */
+		continue;
+	    }
+	    words[nwordpos++] = lineptr - start;
+	    lineptr += strlen(word);
+	    words[nwordpos++] = lineptr - start;
+	}
+    }
+    if (!uselex) {
+	do {
+	    for (;;) {
+		if (inblank(*lineptr))
+		    lineptr++;
+		else if (lineptr[0] == '\\' && lineptr[1] == '\n')
+		    lineptr += 2;
+		else
+		    break;
+	    }
+	    if (*lineptr) {
+		if (nwordpos >= nwords) {
+		    *nwordsp = nwords = nwords + 64;
+		    *wordsp = words = (short *)
+			zrealloc(words, nwords*sizeof(*words));
+		}
+		words[nwordpos++] = lineptr - start;
+		while (*lineptr && !inblank(*lineptr))
+		    lineptr++;
+		words[nwordpos++] = lineptr - start;
+	    }
+	} while (*lineptr);
+    }
+
+    *nwordposp = nwordpos;
+}
+
 /* Move the current history list out of the way and prepare a fresh history
  * list using hf for HISTFILE, hs for HISTSIZE, and shs for SAVEHIST.  If
  * the hf value is an empty string, HISTFILE will be unset from the new
-- 
cgit 1.4.1