diff options
-rw-r--r-- | NEWS.md | 5 | ||||
-rw-r--r-- | README.md | 3 | ||||
-rw-r--r-- | lr.1 | 8 | ||||
-rw-r--r-- | lr.c | 109 |
4 files changed, 106 insertions, 19 deletions
diff --git a/NEWS.md b/NEWS.md index 103acbc..8a43628 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,8 @@ +## HEAD + +* Feature: new option `-P` to quote filenames with `$'...'` syntax. +* Feature: invalid UTF-8 filenames are quoted now. + ## 1.2 (2017-11-17) * Feature: new option `-B` for breadth first traversal. diff --git a/README.md b/README.md index 643e9bf..2e8bcda 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ Over ls: ## Usage: - lr [-0|-F|-l [-TA|-TC|-TM]|-S|-f FMT] [-B|-D] [-H|-L] [-1AGQXdhsx] [-U|-o ORD] [-e REGEX]* [-t TEST]* PATH... + lr [-0|-F|-l [-TA|-TC|-TM]|-S|-f FMT] [-B|-D] [-H|-L] [-1AGPQXdhsx] [-U|-o ORD] [-e REGEX]* [-t TEST]* PATH... The special path argument `-` makes `lr` read file names from standard input, instead of traversing path. @@ -70,6 +70,7 @@ input, instead of traversing path. * `-A`: don't list files starting with a dot. * `-G`: colorize output to tty. Use twice to force colorize. * `-X`: print OSC 8 hyperlinks to tty. Use twice to force. +* `-P`: quote file names using `$'...'` syntax. * `-Q`: shell quote file names (default for output to TTY). * `-d`: don't enter directories. * `-h`: print human readable size for `-l` (also `%s`). diff --git a/lr.1 b/lr.1 index a79fb39..2e29e7f 100644 --- a/lr.1 +++ b/lr.1 @@ -10,7 +10,7 @@ .br .Op Fl B | Fl D .Op Fl H | Fl L -.Op Fl 1AGQXdhsx +.Op Fl 1AGPQXdhsx .Op Fl U | Fl o Ar ord .br .Op Fl e Ar regex @@ -102,6 +102,12 @@ Follow all symlinks. Don't go below one level of directories. .It Fl A Don't list files starting with a dot. +.It Fl P +Quote file names using +Austin Group #249 syntax +.Li \&$' Ns No ... Ns Li \&' +(implies +.Fl Q ) . .It Fl Q Quote file names (default for output to TTY). .It Fl d diff --git a/lr.c b/lr.c index 9328f55..7f446f9 100644 --- a/lr.c +++ b/lr.c @@ -88,6 +88,7 @@ static int Dflag; static int Hflag; static int Lflag; static int Qflag; +static int Pflag; static int Uflag; static int Xflag; static int hflag; @@ -1055,7 +1056,7 @@ username(uid_t uid) if (name) return name; - + struct passwd *p = getpwuid(uid); if (p) { if ((int)strlen(p->pw_name) > uwid) @@ -1064,7 +1065,7 @@ username(uid_t uid) users = idtree_insert(users, uid, name); return name; } - + return strid(uid); } @@ -1653,25 +1654,98 @@ print_human(intmax_t i) } +// Decode one UTF-8 codepoint into cp, return number of bytes to next one. +// On invalid UTF-8, return -1, and do not change cp. +// Invalid codepoints are not checked. +// +// This code is meant to be inlined, if cp is unused it can be optimized away. +static int +u8decode(const char *cs, uint32_t *cp) +{ + const uint8_t *s = (uint8_t *)cs; + + if (*s == 0) { *cp = 0; return 0; } + if (*s < 0x80) { *cp = *s; return 1; } + if (*s < 0xc2) { return -1; } //cont+overlong + if (*s < 0xe0) { *cp = *s & 0x1f; goto u2; } + if (*s < 0xf0) { + if (*s == 0xe0 && (s[1] & 0xe0) == 0x80) return -1; //overlong + if (*s == 0xed && (s[1] & 0xe0) == 0xa0) return -1; //surrogate + *cp = *s & 0x0f; goto u3; + } + if (*s < 0xf5) { + if (*s == 0xf0 && (s[1] & 0xf0) == 0x80) return -1; //overlong + if (*s == 0xf4 && (s[1] > 0x8f)) return -1; //too high + *cp = *s & 0x07; goto u4; + } + return -1; + +u4: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f); +u3: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f); +u2: if ((*++s & 0xc0) != 0x80) return -1; *cp = (*cp << 6) | (*s & 0x3f); + return s - (uint8_t *)cs + 1; +} + static void print_shquoted(const char *s) { - if (!Qflag || !strpbrk(s, "\001\002\003\004\005\006\007\010" - "\011\012\013\014\015\016\017\020" - "\021\022\023\024\025\026\027\030" - "\031\032\033\034\035\036\037\040" - "`^#*[]=|\\?${}()'\"<>&;\177")) { + uint32_t ignored; + int l; + + const char *t; + int esc = 0; + for (t = s; *t; ) { + if ((unsigned char)*t <= 32 || + strchr("`^#*[]=|\\?${}()'\"<>&;\177", *t)) { + esc = 1; + break; + } else { + if ((l = u8decode(t, &ignored)) < 0) { + esc = 1; + break; + } + t += l; + } + } + + if (!esc) { printf("%s", s); return; } - putchar('\''); - for (; *s; s++) - if (*s == '\'') - printf("'\\''"); - else - putchar(*s); - putchar('\''); + if (Pflag) { + printf("$'"); + for (; *s; s++) + switch (*s) { + case '\a': printf("\\a"); break; + case '\b': printf("\\b"); break; + case '\e': printf("\\e"); break; + case '\f': printf("\\f"); break; + case '\n': printf("\\n"); break; + case '\r': printf("\\r"); break; + case '\t': printf("\\t"); break; + case '\v': printf("\\v"); break; + case '\\': printf("\\\\"); break; + case '\'': printf("\\\'"); break; + default: + if ((unsigned char)*s < 32 || + (l = u8decode(s, &ignored)) < 0) { + printf("\\%03o", (unsigned char)*s); + } else { + printf("%.*s", l, s); + s += l-1; + } + } + putchar('\''); + } else { + putchar('\''); + for (; *s; s++) + if (*s == '\'') + printf("'\\''"); + else + putchar(*s); + putchar('\''); + } } void @@ -2319,9 +2393,9 @@ main(int argc, char *argv[]) setlocale(LC_ALL, ""); - while ((c = getopt(argc, argv, "01ABC:DFGHLQST:UXde:f:lho:st:x")) != -1) + while ((c = getopt(argc, argv, "01ABC:DFGHLQPST:UXde:f:lho:st:x")) != -1) switch (c) { - case '0': format = zero_format; input_delim = 0; Qflag = 0; break; + case '0': format = zero_format; input_delim = 0; Qflag = Pflag = 0; break; case '1': expr = chain(parse_expr("depth > 0 ? prune : print"), EXPR_AND, expr); break; case 'A': expr = chain(expr, EXPR_AND, parse_expr("name =~ \"^\\.\" && path != \".\" ? prune : print")); break; case 'B': Bflag++; Dflag = 0; Uflag = 0; need_stat++; break; @@ -2341,6 +2415,7 @@ main(int argc, char *argv[]) case 'H': Hflag++; break; case 'L': Lflag++; break; case 'Q': Qflag++; break; + case 'P': Pflag++; Qflag++; break; case 'S': Qflag++; format = stat_format; break; case 'T': Tflag = timeflag(optarg); break; case 'U': Uflag++; Bflag = 0; break; @@ -2359,7 +2434,7 @@ main(int argc, char *argv[]) case 'x': xflag++; break; default: fprintf(stderr, -"Usage: %s [-0|-F|-l [-TA|-TC|-TM]|-S|-f FMT] [-B|-D] [-H|-L] [-1AGQdhsx]\n" +"Usage: %s [-0|-F|-l [-TA|-TC|-TM]|-S|-f FMT] [-B|-D] [-H|-L] [-1AGPQdhsx]\n" " [-U|-o ORD] [-e REGEX]* [-t TEST]* [-C [COLOR:]PATH]* PATH...\n", argv0); exit(2); } |