/* * lex.c - lexical analysis * * This file is part of zsh, the Z shell. * * Copyright (c) 1992-1997 Paul Falstad * All rights reserved. * * Permission is hereby granted, without written agreement and without * license or royalty fees, to use, copy, modify, and distribute this * software and to distribute modified versions of this software for any * purpose, provided that the above copyright notice and the following * two paragraphs appear in all copies of this software. * * In no event shall Paul Falstad or the Zsh Development Group be liable * to any party for direct, indirect, special, incidental, or consequential * damages arising out of the use of this software and its documentation, * even if Paul Falstad and the Zsh Development Group have been advised of * the possibility of such damage. * * Paul Falstad and the Zsh Development Group specifically disclaim any * warranties, including, but not limited to, the implied warranties of * merchantability and fitness for a particular purpose. The software * provided hereunder is on an "as is" basis, and Paul Falstad and the * Zsh Development Group have no obligation to provide maintenance, * support, updates, enhancements, or modifications. * */ #include "zsh.mdh" #include "lex.pro" #define LEX_HEAP_SIZE (32) /* tokens */ /**/ mod_export char ztokens[] = "#$^*(())$=|{}[]`<>>?~`,'\"\\\\"; /* parts of the current token */ /**/ char *zshlextext; /**/ mod_export char *tokstr; /**/ mod_export enum lextok tok; /**/ mod_export int tokfd; /* * Line number at which the first character of a token was found. * We always set this in gettok(), which is always called from * zshlex() unless we have reached an error. So it is always * valid when parsing. It is not useful during execution * of the parsed structure. */ /**/ zlong toklineno; /* lexical analyzer error flag */ /**/ mod_export int lexstop; /* if != 0, this is the first line of the command */ /**/ mod_export int isfirstln; /* if != 0, this is the first char of the command (not including white space) */ /**/ int isfirstch; /* flag that an alias should be expanded after expansion ending in space */ /**/ int inalmore; /* * Don't do spelling correction. * Bit 1 is only valid for the current word. It's * set when we detect a lookahead that stops the word from * needing correction. */ /**/ int nocorrect; /* * TBD: the following exported variables are part of the non-interface * with ZLE for completion. They are poorly named and the whole * scheme is incredibly brittle. One piece of robustness is applied: * the variables are only set if LEXFLAGS_ZLE is set. Improvements * should therefore concentrate on areas with this flag set. * * Cursor position and line length in zle when the line is * metafied for access from the main shell. */ /**/ mod_export int zlemetacs, zlemetall; /* inwhat says what exactly we are in * * (its value is one of the IN_* things). */ /**/ mod_export int inwhat; /* 1 if x added to complete in a blank between words */ /**/ mod_export int addedx; /* wb and we hold the beginning/end position of the word we are completing. */ /**/ mod_export int wb, we; /**/ mod_export int wordbeg; /**/ mod_export int parbegin; /**/ mod_export int parend; /* 1 if aliases should not be expanded */ /**/ mod_export int noaliases; /* * If non-zero, we are parsing a line sent to use by the editor, or some * other string that's not part of standard command input (e.g. eval is * part of normal command input). * * Set of bits from LEXFLAGS_*. * * Note that although it is passed into the lexer as an input, the * lexer can set it to zero after finding the word it's searching for. * This only happens if the line being parsed actually does come from * ZLE, and hence the bit LEXFLAGS_ZLE is set. */ /**/ mod_export int lexflags; /* don't recognize comments */ /**/ mod_export int nocomments; /* add raw input characters while parsing command substitution */ /**/ static int lex_add_raw; /* variables associated with the above */ static char *tokstr_raw; static struct lexbufstate lexbuf_raw; /* text of punctuation tokens */ /**/ mod_export char *tokstrings[WHILE + 1] = { NULL, /* NULLTOK 0 */ ";", /* SEPER */ "\\n", /* NEWLIN */ ";", /* SEMI */ ";;", /* DSEMI */ "&", /* AMPER 5 */ "(", /* INPAR */ ")", /* OUTPAR */ "||", /* DBAR */ "&&", /* DAMPER */ ">", /* OUTANG 10 */ ">|", /* OUTANGBANG */ ">>", /* DOUTANG */ ">>|", /* DOUTANGBANG */ "<", /* INANG */ "<>", /* INOUTANG 15 */ "<<", /* DINANG */ "<<-", /* DINANGDASH */ "<&", /* INANGAMP */ ">&", /* OUTANGAMP */ "&>", /* AMPOUTANG 20 */ "&>|", /* OUTANGAMPBANG */ ">>&", /* DOUTANGAMP */ ">>&|", /* DOUTANGAMPBANG */ "<<<", /* TRINANG */ "|", /* BAR 25 */ "|&", /* BARAMP */ "()", /* INOUTPAR */ "((", /* DINPAR */ "))", /* DOUTPAR */ "&|", /* AMPERBANG 30 */ ";&", /* SEMIAMP */ ";|", /* SEMIBAR */ }; /* lexical state */ static int dbparens; static struct lexbufstate lexbuf = { NULL, 256, 0 }; /* save lexical context */ /**/ void lex_context_save(struct lex_stack *ls, int toplevel) { (void)toplevel; ls->dbparens = dbparens; ls->isfirstln = isfirstln; ls->isfirstch = isfirstch; ls->lexflags = lexflags; ls->tok = tok; ls->tokstr = tokstr; ls->zshlextext = zshlextext; ls->lexbuf = lexbuf; ls->lex_add_raw = lex_add_raw; ls->tokstr_raw = tokstr_raw; ls->lexbuf_raw = lexbuf_raw; ls->lexstop = lexstop; ls->toklineno = toklineno; tokstr = zshlextext = lexbuf.ptr = NULL; lexbuf.siz = 256; tokstr_raw = lexbuf_raw.ptr = NULL; lexbuf_raw.siz = lexbuf_raw.len = lex_add_raw = 0; } /* restore lexical context */ /**/ mod_export void lex_context_restore(const struct lex_stack *ls, int toplevel) { (void)toplevel; dbparens = ls->dbparens; isfirstln = ls->isfirstln; isfirstch = ls->isfirstch; lexflags = ls->lexflags; tok = ls->tok; tokstr = ls->tokstr; zshlextext = ls->zshlextext; lexbuf = ls->lexbuf; lex_add_raw = ls->lex_add_raw; tokstr_raw = ls->tokstr_raw; lexbuf_raw = ls->lexbuf_raw; lexstop = ls->lexstop; toklineno = ls->toklineno; } /**/ void zshlex(void) { if (tok == LEXERR) return; do tok = gettok(); while (tok != ENDINPUT && exalias()); nocorrect &= 1; if (tok == NEWLIN || tok == ENDINPUT) { while (hdocs) { struct heredocs *next = hdocs->next; char *doc, *munged_term; hwbegin(0); cmdpush(hdocs->type == REDIR_HEREDOC ? CS_HEREDOC : CS_HEREDOCD); munged_term = dupstring(hdocs->str); STOPHIST doc = gethere(&munged_term, hdocs->type); ALLOWHIST cmdpop(); hwend(); if (!doc) { zerr("here document too large"); while (hdocs) { next = hdocs->next; zfree(hdocs, sizeof(struct heredocs)); hdocs = next; } tok = LEXERR; break; } setheredoc(hdocs->pc, REDIR_HERESTR, doc, hdocs->str, munged_term); zfree(hdocs, sizeof(struct heredocs)); hdocs = next; } } if (tok != NEWLIN) isnewlin = 0; else isnewlin = (inbufct) ? -1 : 1; if (tok == SEMI || (tok == NEWLIN && !(lexflags & LEXFLAGS_NEWLINE))) tok = SEPER; } /**/ mod_export void ctxtlex(void) { static int oldpos; zshlex(); switch (tok) { case SEPER: case NEWLIN: case SEMI: case DSEMI: case SEMIAMP: case SEMIBAR: case AMPER: case AMPERBANG: case INPAR: case INBRACE: case DBAR: case DAMPER: case BAR: case BARAMP: case INOUTPAR: case DOLOOP: case THEN: case ELIF: case ELSE: case DOUTBRACK: incmdpos = 1; break; case STRING: /* case ENVSTRING: */ case ENVARRAY: case OUTPAR: case CASE: case DINBRACK: incmdpos = 0; break; default: /* nothing to do, keep compiler happy */ break; } if (tok != DINPAR) infor = tok == FOR ? 2 : 0; if (IS_REDIROP(tok) || tok == FOR || tok == FOREACH || tok == SELECT) { inredir = 1; oldpos = incmdpos; incmdpos = 0; } else if (inredir) { incmdpos = oldpos; inredir = 0; } } #define LX1_BKSLASH 0 #define LX1_COMMENT 1 #define LX1_NEWLIN 2 #define LX1_SEMI 3 #define LX1_AMPER 5 #define LX1_BAR 6 #define LX1_INPAR 7 #define LX1_OUTPAR 8 #define LX1_INANG 13 #define LX1_OUTANG 14 #define LX1_OTHER 15 #define LX2_BREAK 0 #define LX2_OUTPAR 1 #define LX2_BAR 2 #define LX2_STRING 3 #define LX2_INBRACK 4 #define LX2_OUTBRACK 5 #define LX2_TILDE 6 #define LX2_INPAR 7 #define LX2_INBRACE 8 #define LX2_OUTBRACE 9 #define LX2_OUTANG 10 #define LX2_INANG 11 #define LX2_EQUALS 12 #define LX2_BKSLASH 13 #define LX2_QUOTE 14 #define LX2_DQUOTE 15 #define LX2_BQUOTE 16 #define LX2_COMMA 17 #define LX2_OTHER 18 #define LX2_META 19 static unsigned char lexact1[256], lexact2[256], lextok2[256]; /**/ void initlextabs(void) { int t0; static char *lx1 = "\\q\n;!&|(){}[]<>"; static char *lx2 = ";)|$[]~({}><=\\\'\"`,"; for (t0 = 0; t0 != 256; t0++) { lexact1[t0] = LX1_OTHER; lexact2[t0] = LX2_OTHER; lextok2[t0] = t0; } for (t0 = 0; lx1[t0]; t0++) lexact1[(int)lx1[t0]] = t0; for (t0 = 0; lx2[t0]; t0++) lexact2[(int)lx2[t0]] = t0; lexact2['&'] = LX2_BREAK; lexact2[STOUC(Meta)] = LX2_META; lextok2['*'] = Star; lextok2['?'] = Quest; lextok2['{'] = Inbrace; lextok2['['] = Inbrack; lextok2['$'] = String; lextok2['~'] = Tilde; lextok2['#'] = Pound; lextok2['^'] = Hat; } /* initialize lexical state */ /**/ void lexinit(void) { nocorrect = dbparens = lexstop = 0; tok = ENDINPUT; } /* add a char to the string buffer */ /**/ void add(int c) { *lexbuf.ptr++ = c; if (lexbuf.siz == ++lexbuf.len) { int newbsiz = lexbuf.siz * 2; if (newbsiz > inbufct && inbufct > lexbuf.siz) newbsiz = inbufct; tokstr = (char *)hrealloc(tokstr, lexbuf.siz, newbsiz); lexbuf.ptr = tokstr + lexbuf.len; /* len == bsiz, so bptr is at the start of newly allocated memory */ memset(lexbuf.ptr, 0, newbsiz - lexbuf.siz); lexbuf.siz = newbsiz; } } #define SETPARBEGIN { \ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ zlemetacs >= zlemetall+1-inbufct) \ parbegin = inbufct; \ } #define SETPAREND { \ if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS) && \ parbegin != -1 && parend == -1) { \ if (zlemetacs >= zlemetall + 1 - inbufct) \ parbegin = -1; \ else \ parend = inbufct; \ } \ } enum { CMD_OR_MATH_CMD, CMD_OR_MATH_MATH, CMD_OR_MATH_ERR }; /* * Return one of the above. If it couldn't be * parsed as math, but there was no gross error, it's a command. */ static int cmd_or_math(int cs_type) { int oldlen = lexbuf.len; int c; int oinflags = inbufflags; cmdpush(cs_type); inbufflags |= INP_APPEND; c = dquote_parse(')', 0); if (!(oinflags & INP_APPEND)) inbufflags &= ~INP_APPEND; cmdpop(); *lexbuf.ptr = '\0'; if (!c) { /* Successfully parsed, see if it was math */ c = hgetc(); if (c == ')') return CMD_OR_MATH_MATH; /* yes */ hungetc(c); lexstop = 0; c = ')'; } else if (lexstop) { /* we haven't got anything to unget */ return CMD_OR_MATH_ERR; } /* else unsuccessful: unget the whole thing */ hungetc(c); lexstop = 0; while (lexbuf.len > oldlen && !(errflag & ERRFLAG_ERROR)) { lexbuf.len--; hungetc(itok(*--lexbuf.ptr) ? ztokens[*lexbuf.ptr - Pound] : *lexbuf.ptr); } if (errflag) return CMD_OR_MATH_ERR; hungetc('('); return errflag ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; } /* * Parse either a $(( ... )) or a $(...) * Return the same as cmd_or_math(). */ static int cmd_or_math_sub(void) { int c = hgetc(), ret; if (c == '(') { int lexpos = (int)(lexbuf.ptr - tokstr); add(Inpar); add('('); if ((ret = cmd_or_math(CS_MATHSUBST)) == CMD_OR_MATH_MATH) { tokstr[lexpos] = Inparmath; add(')'); return CMD_OR_MATH_MATH; } if (ret == CMD_OR_MATH_ERR) return CMD_OR_MATH_ERR; lexbuf.ptr -= 2; lexbuf.len -= 2; } else { hungetc(c); lexstop = 0; } return skipcomm() ? CMD_OR_MATH_ERR : CMD_OR_MATH_CMD; } /* Check whether we're looking at valid numeric globbing syntax * * (/\<[0-9]*-[0-9]*\>/). Call pointing just after the opening "<". * * Leaves the input in the same place, returning 0 or 1. */ /**/ static int isnumglob(void) { int c, ec = '-', ret = 0; int tbs = 256, n = 0; char *tbuf = (char *)zalloc(tbs); while(1) { c = hgetc(); if(lexstop) { lexstop = 0; break; } tbuf[n++] = c; if(!idigit(c)) { if(c != ec) break; if(ec == '>') { ret = 1; break; } ec = '>'; } if(n == tbs) tbuf = (char *)realloc(tbuf, tbs *= 2); } while(n--) hungetc(tbuf[n]); zfree(tbuf, tbs); return ret; } /**/ static enum lextok gettok(void) { int c, d; int peekfd = -1; enum lextok peek; beginning: tokstr = NULL; while (iblank(c = hgetc()) && !lexstop); toklineno = lineno; if (lexstop) return (errflag) ? LEXERR : ENDINPUT; isfirstln = 0; if ((lexflags & LEXFLAGS_ZLE)) wordbeg = inbufct - (qbang && c == bangchar); hwbegin(-1-(qbang && c == bangchar)); /* word includes the last character read and possibly \ before ! */ if (dbparens) { lexbuf.len = 0; lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); hungetc(c); cmdpush(CS_MATH); c = dquote_parse(infor ? ';' : ')', 0); cmdpop(); *lexbuf.ptr = '\0'; if (!c && infor) { infor--; return DINPAR; } if (c || (c = hgetc()) != ')') { hungetc(c); return LEXERR; } dbparens = 0; return DOUTPAR; } else if (idigit(c)) { /* handle 1< foo */ d = hgetc(); if(d == '&') { d = hgetc(); if(d == '>') { peekfd = c - '0'; hungetc('>'); c = '&'; } else { hungetc(d); lexstop = 0; hungetc('&'); } } else if (d == '>' || d == '<') { peekfd = c - '0'; c = d; } else { hungetc(d); lexstop = 0; } } /* chars in initial position in word */ /* * Handle comments. There are some special cases when this * is not normal command input: lexflags implies we are examining * a line lexically without it being used for normal command input. */ if (c == hashchar && !nocomments && (isset(INTERACTIVECOMMENTS) || ((!lexflags || (lexflags & LEXFLAGS_COMMENTS)) && !expanding && (!interact || unset(SHINSTDIN) || strin)))) { /* History is handled here to prevent extra * * newlines being inserted into the history. */ if (lexflags & LEXFLAGS_COMMENTS_KEEP) { lexbuf.len = 0; lexbuf.ptr = tokstr = (char *)hcalloc(lexbuf.siz = LEX_HEAP_SIZE); add(c); } hwend(); while ((c = ingetc()) != '\n' && !lexstop) { hwaddc(c); addtoline(c); if (lexflags & LEXFLAGS_COMMENTS_KEEP) add(c); } if (errflag) peek = LEXERR; else { if (lexflags & LEXFLAGS_COMMENTS_KEEP) { *lexbuf.ptr = '\0'; if (!lexstop) hungetc(c); peek = STRING; } else { hwend(); hwbegin(0); hwaddc('\n'); addtoline('\n'); /* * If splitting a line and removing comments, * we don't want a newline token since it's * treated specially. */ if ((lexflags & LEXFLAGS_COMMENTS_STRIP) && lexstop) peek = ENDINPUT; else peek = NEWLIN; } } return peek; } switch (lexact1[STOUC(c)]) { case LX1_BKSLASH: d = hgetc(); if (d == '\n') goto beginning; hungetc(d); lexstop = 0; break; case LX1_NEWLIN: return NEWLIN; case LX1_SEMI: d = hgetc(); if(d == ';') return DSEMI; else if(d == '&') return SEMIAMP; else if (d == '|') return SEMIBAR; hungetc(d); lexstop = 0; return SEMI; case LX1_AMPER: d = hgetc(); if (d == '&') return DAMPER; else if (d == '!' || d == '|') return AMPERBANG; else if (d == '>') { tokfd = peekfd; d = hgetc(); if (d == '!' || d == '|') return OUTANGAMPBANG; else if (d == '>') { d = hgetc(); if (d == '!' || d == '|') return DOUTANGAMPBANG; hungetc(d); lexstop = 0; return DOUTANGAMP; } hungetc(d); lexstop = 0; return AMPOUTANG; } hungetc(d); lexstop = 0; return AMPER; case LX1_BAR: d = hgetc(); if (d == '|') return DBAR; else if (d == '&') return BARAMP; hungetc(d); lexstop = 0; return BAR; case LX1_INPAR: d = hgetc(); if (d == '(') { if (infor) { dbparens = 1; return DINPAR; } if (incmdpos || (isset(SHGLOB) && !isset(KSHGLOB))) { lexbuf.len = 0; lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); switch (cmd_or_math(CS_MATH)) { case CMD_OR_MATH_MATH: return DINPAR; case CMD_OR_MATH_CMD: /* * Not math, so we don't return the contents * as a string in this case. */ tokstr = NULL; return INPAR; default: return LEXERR; } } } else if (d == ')') return INOUTPAR; hungetc(d); lexstop = 0; if (!(incond == 1 || incmdpos)) break; return INPAR; case LX1_OUTPAR: return OUTPAR; case LX1_INANG: d = hgetc(); if (d == '(') { hungetc(d); lexstop = 0; unpeekfd: if(peekfd != -1) { hungetc(c); c = '0' + peekfd; } break; } if (d == '>') { peek = INOUTANG; } else if (d == '<') { int e = hgetc(); if (e == '(') { hungetc(e); hungetc(d); peek = INANG; } else if (e == '<') peek = TRINANG; else if (e == '-') peek = DINANGDASH; else { hungetc(e); lexstop = 0; peek = DINANG; } } else if (d == '&') { peek = INANGAMP; } else { hungetc(d); if(isnumglob()) goto unpeekfd; peek = INANG; } tokfd = peekfd; return peek; case LX1_OUTANG: d = hgetc(); if (d == '(') { hungetc(d); goto unpeekfd; } else if (d == '&') { d = hgetc(); if (d == '!' || d == '|') peek = OUTANGAMPBANG; else { hungetc(d); lexstop = 0; peek = OUTANGAMP; } } else if (d == '!' || d == '|') peek = OUTANGBANG; else if (d == '>') { d = hgetc(); if (d == '&') { d = hgetc(); if (d == '!' || d == '|') peek = DOUTANGAMPBANG; else { hungetc(d); lexstop = 0; peek = DOUTANGAMP; } } else if (d == '!' || d == '|') peek = DOUTANGBANG; else if (d == '(') { hungetc(d); hungetc('>'); peek = OUTANG; } else { hungetc(d); lexstop = 0; peek = DOUTANG; if (isset(HISTALLOWCLOBBER)) hwaddc('|'); } } else { hungetc(d); lexstop = 0; peek = OUTANG; if (!incond && isset(HISTALLOWCLOBBER)) hwaddc('|'); } tokfd = peekfd; return peek; } /* we've started a string, now get the * * rest of it, performing tokenization */ return gettokstr(c, 0); } /* * Get the remains of a token string. This has two uses. * When called from gettok(), with sub = 0, we have already identified * any interesting initial character and want to get the rest of * what we now know is a string. However, the string may still include * metacharacters and potentially substitutions. * * When called from parse_subst_string() with sub = 1, we are not * fully parsing a command line, merely tokenizing a string. * In this case we always add characters to the parsed string * unless there is a parse error. */ /**/ static enum lextok gettokstr(int c, int sub) { int bct = 0, pct = 0, brct = 0, fdpar = 0; int intpos = 1, in_brace_param = 0; int inquote, unmatched = 0; enum lextok peek; #ifdef DEBUG int ocmdsp = cmdsp; #endif peek = STRING; if (!sub) { lexbuf.len = 0; lexbuf.ptr = tokstr = (char *) hcalloc(lexbuf.siz = LEX_HEAP_SIZE); } for (;;) { int act; int e; int inbl = inblank(c); if (fdpar && !inbl && c != ')') fdpar = 0; if (inbl && !in_brace_param && !pct) act = LX2_BREAK; else { act = lexact2[STOUC(c)]; c = lextok2[STOUC(c)]; } switch (act) { case LX2_BREAK: if (!in_brace_param && !sub) goto brk; break; case LX2_META: c = hgetc(); #ifdef DEBUG if (lexstop) { fputs("BUG: input terminated by Meta\n", stderr); fflush(stderr); goto brk; } #endif add(Meta); break; case LX2_OUTPAR: if (fdpar) { /* this is a single word `( )', treat as INOUTPAR */ add(c); *lexbuf.ptr = '\0'; return INOUTPAR; } if ((sub || in_brace_param) && isset(SHGLOB)) break; if (!in_brace_param && !pct--) { if (sub) { pct = 0; break; } else goto brk; } c = Outpar; break; case LX2_BAR: if (!pct && !in_brace_param) { if (sub) break; else goto brk; } if (unset(SHGLOB) || (!sub && !in_brace_param)) c = Bar; break; case LX2_STRING: e = hgetc(); if (e == '[') { cmdpush(CS_MATHSUBST); add(String); add(Inbrack); c = dquote_parse(']', sub); cmdpop(); if (c) { peek = LEXERR; goto brk; } c = Outbrack; } else if (e == '(') { add(String); switch (cmd_or_math_sub()) { case CMD_OR_MATH_CMD: c = Outpar; break; case CMD_OR_MATH_MATH: c = Outparmath; break; default: peek = LEXERR; goto brk; } } else { if (e == '{') { add(c); c = Inbrace; ++bct; cmdpush(CS_BRACEPAR); if (!in_brace_param) in_brace_param = bct; } else { hungetc(e); lexstop = 0; } } break; case LX2_INBRACK: if (!in_brace_param) brct++; c = Inbrack; break; case LX2_OUTBRACK: if (!in_brace_param) brct--; if (brct < 0) brct = 0; c = Outbrack; break; case LX2_INPAR: if (isset(SHGLOB)) { if (sub || in_brace_param) break; if (incasepat && !lexbuf.len) return INPAR; if (!isset(KSHGLOB) && lexbuf.len) goto brk; } if (!in_brace_param) { if (!sub) { e = hgetc(); hungetc(e); lexstop = 0; /* For command words, parentheses are only * special at the start. But now we're tokenising * the remaining string. So I don't see what * the old incmdpos test here is for. * pws 1999/6/8 * * Oh, no. * func1( ) * is a valid function definition in [k]sh. The best * thing we can do, without really nasty lookahead tricks, * is break if we find a blank after a parenthesis. At * least this can't happen inside braces or brackets. We * only allow this with SHGLOB (set for both sh and ksh). * * Things like `print @( |foo)' should still * work, because [k]sh don't allow multiple words * in a function definition, so we only do this * in command position. * pws 1999/6/14 */ if (e == ')' || (isset(SHGLOB) && inblank(e) && !bct && !brct && !intpos && incmdpos)) { /* * Either a () token, or a command word with * something suspiciously like a ksh function * definition. * The current word isn't spellcheckable. */ nocorrect |= 2; goto brk; } } /* * This also handles the [k]sh `foo( )' function definition. * Maintain a variable fdpar, set as long as a single set of * parentheses contains only space. Then if we get to the * closing parenthesis and it is still set, we can assume we * have a function definition. Only do this at the start of * the word, since the (...) must be a separate token. */ if (!pct++ && isset(SHGLOB) && intpos && !bct && !brct) fdpar = 1; } c = Inpar; break; case LX2_INBRACE: if (isset(IGNOREBRACES) || sub) c = '{'; else { if (!lexbuf.len && incmdpos) { add('{'); *lexbuf.ptr = '\0'; return STRING; } if (in_brace_param) { cmdpush(CS_BRACE); } bct++; } break; case LX2_OUTBRACE: if ((isset(IGNOREBRACES) || sub) && !in_brace_param) break; if (!bct) break; if (in_brace_param) { cmdpop(); } if (bct-- == in_brace_param) in_brace_param = 0; c = Outbrace; break; case LX2_COMMA: if (unset(IGNOREBRACES) && !sub && bct > in_brace_param) c = Comma; break; case LX2_OUTANG: if (in_brace_param || sub) break; e = hgetc(); if (e != '(') { hungetc(e); lexstop = 0; goto brk; } add(OutangProc); if (skipcomm()) { peek = LEXERR; goto brk; } c = Outpar; break; case LX2_INANG: if (isset(SHGLOB) && sub) break; e = hgetc(); if (!(in_brace_param || sub) && e == '(') { add(Inang); if (skipcomm()) { peek = LEXERR; goto brk; } c = Outpar; break; } hungetc(e); if(isnumglob()) { add(Inang); while ((c = hgetc()) != '>') add(c); c = Outang; break; } lexstop = 0; if (in_brace_param || sub) break; goto brk; case LX2_EQUALS: if (!sub) { if (intpos) { e = hgetc(); if (e != '(') { hungetc(e); lexstop = 0; c = Equals; } else { add(Equals); if (skipcomm()) { peek = LEXERR; goto brk; } c = Outpar; } } else if (peek != ENVSTRING && incmdpos && !bct && !brct) { char *t = tokstr; if (idigit(*t)) while (++t < lexbuf.ptr && idigit(*t)); else { int sav = *lexbuf.ptr; *lexbuf.ptr = '\0'; t = itype_end(t, IIDENT, 0); if (t < lexbuf.ptr) { skipparens(Inbrack, Outbrack, &t); } else { *lexbuf.ptr = sav; } } if (*t == '+') t++; if (t == lexbuf.ptr) { e = hgetc(); if (e == '(' && incmdpos) { *lexbuf.ptr = '\0'; return ENVARRAY; } hungetc(e); lexstop = 0; peek = ENVSTRING; intpos = 2; } else c = Equals; } else c = Equals; } break; case LX2_BKSLASH: c = hgetc(); if (c == '\n') { c = hgetc(); if (!lexstop) continue; } else { add(Bnull); if (c == STOUC(Meta)) { c = hgetc(); #ifdef DEBUG if (lexstop) { fputs("BUG: input terminated by Meta\n", stderr); fflush(stderr); goto brk; } #endif add(Meta); } } if (lexstop) goto brk; break; case LX2_QUOTE: { int strquote = (lexbuf.len && lexbuf.ptr[-1] == String); add(Snull); cmdpush(CS_QUOTE); for (;;) { STOPHIST while ((c = hgetc()) != '\'' && !lexstop) { if (strquote && c == '\\') { c = hgetc(); if (lexstop) break; /* * Mostly we don't need to do anything special * with escape backslashes or closing quotes * inside $'...'; however in completion we * need to be able to strip multiple backslashes * neatly. */ if (c == '\\' || c == '\'') add(Bnull); else add('\\'); } else if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { if (lexbuf.ptr[-1] == '\\') lexbuf.ptr--, lexbuf.len--; else break; } add(c); } ALLOWHIST if (c != '\'') { unmatched = '\''; peek = LEXERR; cmdpop(); goto brk; } e = hgetc(); if (e != '\'' || unset(RCQUOTES) || strquote) break; add(c); } cmdpop(); hungetc(e); lexstop = 0; c = Snull; break; } case LX2_DQUOTE: add(Dnull); cmdpush(CS_DQUOTE); c = dquote_parse('"', sub); cmdpop(); if (c) { unmatched = '"'; peek = LEXERR; goto brk; } c = Dnull; break; case LX2_BQUOTE: add(Tick); cmdpush(CS_BQUOTE); SETPARBEGIN inquote = 0; while ((c = hgetc()) != '`' && !lexstop) { if (c == '\\') { c = hgetc(); if (c != '\n') { add(c == '`' || c == '\\' || c == '$' ? Bnull : '\\'); add(c); } else if (!sub && isset(CSHJUNKIEQUOTES)) add(c); } else { if (!sub && isset(CSHJUNKIEQUOTES) && c == '\n') { break; } add(c); if (c == '\'') { if ((inquote = !inquote)) STOPHIST else ALLOWHIST } } } if (inquote) ALLOWHIST cmdpop(); if (c != '`') { unmatched = '`'; peek = LEXERR; goto brk; } c = Tick; SETPAREND break; } add(c); c = hgetc(); if (intpos) intpos--; if (lexstop) break; } brk: hungetc(c); if (unmatched) zerr("unmatched %c", unmatched); if (in_brace_param) { while(bct-- >= in_brace_param) cmdpop(); zerr("closing brace expected"); } else if (unset(IGNOREBRACES) && !sub && lexbuf.len > 1 && peek == STRING && lexbuf.ptr[-1] == '}' && lexbuf.ptr[-2] != Bnull) { /* hack to get {foo} command syntax work */ lexbuf.ptr--; lexbuf.len--; lexstop = 0; hungetc('}'); } *lexbuf.ptr = '\0'; DPUTS(cmdsp != ocmdsp, "BUG: gettok: cmdstack changed."); return peek; } /* * Parse input as if in double quotes. * endchar is the end character to expect. * sub has got something to do with whether we are doing quoted substitution. * Return non-zero for error (character to unget), else zero */ /**/ static int dquote_parse(char endchar, int sub) { int pct = 0, brct = 0, bct = 0, intick = 0, err = 0; int c; int math = endchar == ')' || endchar == ']'; int zlemath = math && zlemetacs > zlemetall + addedx - inbufct; while (((c = hgetc()) != endchar || bct || (math && ((pct > 0) || (brct > 0))) || intick) && !lexstop) { cont: switch (c) { case '\\': c = hgetc(); if (c != '\n') { if (c == '$' || c == '\\' || (c == '}' && !intick && bct) || c == endchar || c == '`' || (endchar == ']' && (c == '[' || c == ']' || c == '(' || c == ')' || c == '{' || c == '}' || (c == '"' && sub)))) add(Bnull); else { /* lexstop is implicitly handled here */ add('\\'); goto cont; } } else if (sub || unset(CSHJUNKIEQUOTES) || endchar != '"') continue; break; case '\n': err = !sub && isset(CSHJUNKIEQUOTES) && endchar == '"'; break; case '$': if (intick) break; c = hgetc(); if (c == '(') { add(Qstring); switch (cmd_or_math_sub()) { case CMD_OR_MATH_CMD: c = Outpar; break; case CMD_OR_MATH_MATH: c = Outparmath; break; default: err = 1; break; } } else if (c == '[') { add(String); add(Inbrack); cmdpush(CS_MATHSUBST); err = dquote_parse(']', sub); cmdpop(); c = Outbrack; } else if (c == '{') { add(Qstring); c = Inbrace; cmdpush(CS_BRACEPAR); bct++; } else if (c == '$') add(Qstring); else { hungetc(c); lexstop = 0; c = Qstring; } break; case '}': if (intick || !bct) break; c = Outbrace; bct--; cmdpop(); break; case '`': c = Qtick; if (intick == 2) ALLOWHIST if ((intick = !intick)) { SETPARBEGIN cmdpush(CS_BQUOTE); } else { SETPAREND cmdpop(); } break; case '\'': if (!intick) break; if (intick == 1) intick = 2, STOPHIST else intick = 1, ALLOWHIST break; case '(': if (!math || !bct) pct++; break; case ')': if (!math || !bct) err = (!pct-- && math); break; case '[': if (!math || !bct) brct++; break; case ']': if (!math || !bct) err = (!brct-- && math); break; case '"': if (intick || (endchar != '"' && !bct)) break; if (bct) { add(Dnull); cmdpush(CS_DQUOTE); err = dquote_parse('"', sub); cmdpop(); c = Dnull; } else err = 1; break; } if (err || lexstop) break; add(c); } if (intick == 2) ALLOWHIST if (intick) { cmdpop(); } while (bct--) cmdpop(); if (lexstop) err = intick || endchar || err; else if (err == 1) { /* * TODO: as far as I can see, this hack is used in gettokstr() * to hungetc() a character on an error. However, I don't * understand what that actually gets us, and we can't guarantee * it's a character anyway, because of the previous test. * * We use the same feature in cmd_or_math where we actually do * need to unget if we decide it's really a command substitution. * We try to handle the other case by testing for lexstop. */ err = c; } if (zlemath && zlemetacs <= zlemetall + 1 - inbufct) inwhat = IN_MATH; return err; } /* * Tokenize a string given in s. Parsing is done as in double * quotes. This is usually called before singsub(). * * parsestr() is noisier, reporting an error if the parse failed. * * On entry, *s must point to a string allocated from the stack of * exactly the right length, i.e. strlen(*s) + 1, as the string * is used as the lexical token string whose memory management * demands this. Usually the input string will therefore be * the result of an immediately preceding dupstring(). */ /**/ mod_export int parsestr(char **s) { int err; if ((err = parsestrnoerr(s))) { untokenize(*s); if (err > 32 && err < 127) zerr("parse error near `%c'", err); else zerr("parse error"); } return err; } /**/ mod_export int parsestrnoerr(char **s) { int l = strlen(*s), err; zcontext_save(); untokenize(*s); inpush(dupstring(*s), 0, NULL); strinbeg(0); lexbuf.len = 0; lexbuf.ptr = tokstr = *s; lexbuf.siz = l + 1; err = dquote_parse('\0', 1); if (tokstr) *s = tokstr; *lexbuf.ptr = '\0'; strinend(); inpop(); DPUTS(cmdsp, "BUG: parsestr: cmdstack not empty."); zcontext_restore(); return err; } /* * Parse a subscript in string s. * sub is passed down to dquote_parse(). * endchar is the final character. * Return the next character, or NULL. */ /**/ mod_export char * parse_subscript(char *s, int sub, int endchar) { int l = strlen(s), err; char *t; if (!*s || *s == endchar) return 0; zcontext_save(); untokenize(t = dupstring(s)); inpush(t, 0, NULL); strinbeg(0); lexbuf.len = 0; lexbuf.ptr = tokstr = s; lexbuf.siz = l + 1; err = dquote_parse(endchar, sub); if (err) { err = *lexbuf.ptr; *lexbuf.ptr = '\0'; untokenize(s); *lexbuf.ptr = err; s = NULL; } else { s = lexbuf.ptr; } strinend(); inpop(); DPUTS(cmdsp, "BUG: parse_subscript: cmdstack not empty."); zcontext_restore(); return s; } /* Tokenize a string given in s. Parsing is done as if s were a normal * * command-line argument but it may contain separators. This is used * * to parse the right-hand side of ${...%...} substitutions. */ /**/ mod_export int parse_subst_string(char *s) { int c, l = strlen(s), err; char *ptr; enum lextok ctok; if (!*s || !strcmp(s, nulstring)) return 0; zcontext_save(); untokenize(s); inpush(dupstring(s), 0, NULL); strinbeg(0); lexbuf.len = 0; lexbuf.ptr = tokstr = s; lexbuf.siz = l + 1; c = hgetc(); ctok = gettokstr(c, 1); err = errflag; strinend(); inpop(); DPUTS(cmdsp, "BUG: parse_subst_string: cmdstack not empty."); zcontext_restore(); /* Keep any interrupt error status */ errflag = err | (errflag & ERRFLAG_INT); if (ctok == LEXERR) { untokenize(s); return 1; } #ifdef DEBUG /* * Historical note: we used to check here for olen (the value of lexbuf.len * before zcontext_restore()) == l, but that's not necessarily the case if * we stripped an RCQUOTE. */ if (ctok != STRING || (errflag && !noerrs)) { fprintf(stderr, "Oops. Bug in parse_subst_string: %s\n", errflag ? "errflag" : "ctok != STRING"); fflush(stderr); untokenize(s); return 1; } #endif /* Check for $'...' quoting. This needs special handling. */ for (ptr = s; *ptr; ) { if (*ptr == String && ptr[1] == Snull) { char *t; int len, tlen, diff; t = getkeystring(ptr + 2, &len, GETKEYS_DOLLARS_QUOTE, NULL); len += 2; tlen = strlen(t); diff = len - tlen; /* * Yuk. * parse_subst_string() currently handles strings in-place. * That's not so easy to fix without knowing whether * additional memory should come off the heap or * otherwise. So we cheat by copying the unquoted string * into place, unless it's too long. That's not the * normal case, but I'm worried there are pathological * cases with converting metafied multibyte strings. * If someone can prove there aren't I will be very happy. */ if (diff < 0) { DPUTS(1, "$'...' subst too long: fix get_parse_string()"); return 1; } memcpy(ptr, t, tlen); ptr += tlen; if (diff > 0) { char *dptr = ptr; char *sptr = ptr + diff; while ((*dptr++ = *sptr++)) ; } } else ptr++; } return 0; } /* Called below to report word positions. */ /**/ static void gotword(void) { we = zlemetall + 1 - inbufct + (addedx == 2 ? 1 : 0); if (zlemetacs <= we) { wb = zlemetall - wordbeg + addedx; lexflags = 0; } } /* Check if current lex text matches an alias: 1 if so, else 0 */ static int checkalias(void) { Alias an; if (!zshlextext) return 0; if (!noaliases && isset(ALIASESOPT) && (!isset(POSIXALIASES) || (tok == STRING && !reswdtab->getnode(reswdtab, zshlextext)))) { char *suf; an = (Alias) aliastab->getnode(aliastab, zshlextext); if (an && !an->inuse && ((an->node.flags & ALIAS_GLOBAL) || (incmdpos && tok == STRING) || inalmore)) { if (!lexstop) { /* * Tokens that don't require a space after, get one, * because they are treated as if preceded by one. */ int c = hgetc(); hungetc(c); if (!iblank(c)) inpush(" ", INP_ALIAS, 0); } inpush(an->text, INP_ALIAS, an); if (an->text[0] == ' ' && !(an->node.flags & ALIAS_GLOBAL)) aliasspaceflag = 1; lexstop = 0; return 1; } if ((suf = strrchr(zshlextext, '.')) && suf[1] && suf > zshlextext && suf[-1] != Meta && (an = (Alias)sufaliastab->getnode(sufaliastab, suf+1)) && !an->inuse && incmdpos) { inpush(dupstring(zshlextext), INP_ALIAS, NULL); inpush(" ", INP_ALIAS, NULL); inpush(an->text, INP_ALIAS, an); lexstop = 0; return 1; } } return 0; } /* expand aliases and reserved words */ /**/ int exalias(void) { Reswd rw; hwend(); if (interact && isset(SHINSTDIN) && !strin && !incasepat && tok == STRING && !nocorrect && !(inbufflags & INP_ALIAS) && (isset(CORRECTALL) || (isset(CORRECT) && incmdpos))) spckword(&tokstr, 1, incmdpos, 1); if (!tokstr) { zshlextext = tokstrings[tok]; if (tok == NEWLIN) return 0; return checkalias(); } else { VARARR(char, copy, (strlen(tokstr) + 1)); if (has_token(tokstr)) { char *p, *t; zshlextext = p = copy; for (t = tokstr; (*p++ = itok(*t) ? ztokens[*t++ - Pound] : *t++);); } else zshlextext = tokstr; if ((lexflags & LEXFLAGS_ZLE) && !(inbufflags & INP_ALIAS)) { int zp = lexflags; gotword(); if ((zp & LEXFLAGS_ZLE) && !lexflags) { if (zshlextext == copy) zshlextext = tokstr; return 0; } } if (tok == STRING) { /* Check for an alias */ if ((zshlextext != copy || !isset(POSIXALIASES)) && checkalias()) { if (zshlextext == copy) zshlextext = tokstr; return 1; } /* Then check for a reserved word */ if ((incmdpos || (unset(IGNOREBRACES) && unset(IGNORECLOSEBRACES) && zshlextext[0] == '}' && !zshlextext[1])) && (rw = (Reswd) reswdtab->getnode(reswdtab, zshlextext))) { tok = rw->token; if (tok == DINBRACK) incond = 1; } else if (incond && !strcmp(zshlextext, "]]")) { tok = DOUTBRACK; incond = 0; } else if (incond == 1 && zshlextext[0] == '!' && !zshlextext[1]) tok = BANG; } inalmore = 0; if (zshlextext == copy) zshlextext = tokstr; } return 0; } /**/ void zshlex_raw_add(int c) { if (!lex_add_raw) return; *lexbuf_raw.ptr++ = c; if (lexbuf_raw.siz == ++lexbuf_raw.len) { int newbsiz = lexbuf_raw.siz * 2; tokstr_raw = (char *)hrealloc(tokstr_raw, lexbuf_raw.siz, newbsiz); lexbuf_raw.ptr = tokstr_raw + lexbuf_raw.len; memset(lexbuf_raw.ptr, 0, newbsiz - lexbuf_raw.siz); lexbuf_raw.siz = newbsiz; } } /**/ void zshlex_raw_back(void) { if (!lex_add_raw) return; lexbuf_raw.ptr--; lexbuf_raw.len--; } /**/ int zshlex_raw_mark(int offset) { if (!lex_add_raw) return 0; return lexbuf_raw.len + offset; } /**/ void zshlex_raw_back_to_mark(int mark) { if (!lex_add_raw) return; lexbuf_raw.ptr = tokstr_raw + mark; lexbuf_raw.len = mark; } /* * Skip (...) for command-style substitutions: $(...), <(...), >(...) * * In order to ensure we don't stop at closing parentheses with * some other syntactic significance, we'll parse the input until * we find an unmatched closing parenthesis. However, we'll throw * away the result of the parsing and just keep the string we've built * up on the way. */ /**/ static int skipcomm(void) { #ifdef ZSH_OLD_SKIPCOMM int pct = 1, c, start = 1; cmdpush(CS_CMDSUBST); SETPARBEGIN c = Inpar; do { int iswhite; add(c); c = hgetc(); if (itok(c) || lexstop) break; iswhite = inblank(c); switch (c) { case '(': pct++; break; case ')': pct--; break; case '\\': add(c); c = hgetc(); break; case '\'': { int strquote = lexbuf.ptr[-1] == '$'; add(c); STOPHIST while ((c = hgetc()) != '\'' && !lexstop) { if (c == '\\' && strquote) { add(c); c = hgetc(); } add(c); } ALLOWHIST break; } case '\"': add(c); while ((c = hgetc()) != '\"' && !lexstop) if (c == '\\') { add(c); add(hgetc()); } else add(c); break; case '`': add(c); while ((c = hgetc()) != '`' && !lexstop) if (c == '\\') add(c), add(hgetc()); else add(c); break; case '#': if (start) { add(c); while ((c = hgetc()) != '\n' && !lexstop) add(c); iswhite = 1; } break; } start = iswhite; } while (pct); if (!lexstop) SETPAREND cmdpop(); return lexstop; #else char *new_tokstr; int new_lexstop, new_lex_add_raw; struct lexbufstate new_lexbuf; cmdpush(CS_CMDSUBST); SETPARBEGIN add(Inpar); new_lex_add_raw = lex_add_raw + 1; if (!lex_add_raw) { /* * We'll combine the string so far with the input * read in for the command substitution. To do this * we'll just propagate the current tokstr etc. as the * variables used for adding raw input, and * ensure we swap those for the real tokstr etc. at the end. * * However, we need to save and restore the rest of the * lexical and parse state as we're effectively parsing * an internal string. Because we're still parsing it from * the original input source (we have to --- we don't know * when to stop inputting it otherwise and can't rely on * the input being recoverable until we've read it) we need * to keep the same history context. */ new_tokstr = tokstr; new_lexbuf = lexbuf; zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); hist_in_word(1); } else { /* * Set up for nested command subsitution, however * we don't actually need the string until we get * back to the top level and recover the lot. * The $() body just appears empty. * * We do need to propagate the raw variables which would * otherwise by cleared, though. */ new_tokstr = tokstr_raw; new_lexbuf = lexbuf_raw; zcontext_save_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); } tokstr_raw = new_tokstr; lexbuf_raw = new_lexbuf; lex_add_raw = new_lex_add_raw; /* * Don't do any ZLE specials down here: they're only needed * when we return the string from the recursive parse. * (TBD: this probably means we should be initialising lexflags * more consistently.) * * Note that in that case we're still using the ZLE line reading * function at the history layer --- this is consistent with the * intention of maintaining the history and input layers across * the recursive parsing. */ lexflags &= ~LEXFLAGS_ZLE; if (!parse_event(OUTPAR) || tok != OUTPAR) lexstop = 1; /* Outpar lexical token gets added in caller if present */ /* * We're going to keep the full raw input string * as the current token string after popping the stack. */ new_tokstr = tokstr_raw; new_lexbuf = lexbuf_raw; /* * We're also going to propagate the lexical state: * if we couldn't parse the command substitution we * can't continue. */ new_lexstop = lexstop; zcontext_restore_partial(ZCONTEXT_LEX|ZCONTEXT_PARSE); if (lex_add_raw) { /* * Keep going, so retain the raw variables. */ tokstr_raw = new_tokstr; lexbuf_raw = new_lexbuf; } else { if (!new_lexstop) { /* Ignore the ')' added on input */ new_lexbuf.len--; *--new_lexbuf.ptr = '\0'; } /* * Convince the rest of lex.c we were examining a string * all along. */ tokstr = new_tokstr; lexbuf = new_lexbuf; lexstop = new_lexstop; hist_in_word(0); } if (!lexstop) SETPAREND cmdpop(); return lexstop; #endif }