View Issue Details
ID | Project | Category | View Status | Date Submitted | Last Update |
---|---|---|---|---|---|
0000214 | tcsh | General | public | 2020-12-05 00:40 | 2021-02-27 01:02 |
Reporter | andrew@ugh.net.au | Assigned To | christos | ||
Priority | normal | Severity | minor | Reproducibility | always |
Status | assigned | Resolution | open | ||
Product Version | 6.22.03 | ||||
Summary | 0000214: Can't escape delimiters in :s modifier | ||||
Description | the man page, under "History substitution" for the s modifier says: > Any character may be used as the delimiter in place of `/'; a `\' can be used to quote the delimiter inside l and r. \ does not quote the delimiter currently. I didn't go back to see if it used to work. | ||||
Steps To Reproduce | ``` >set a='a/b' >echo $a a/b >echo $a:s/\//#/ a/b ``` The output should have been `a#b` | ||||
Additional Information | Patch attached | ||||
Tags | patch | ||||
|
sh.dol.c.patch (2,458 bytes)
diff --git a/sh.dol.c b/sh.dol.c index 22d09e4..0ae232c 100644 --- a/sh.dol.c +++ b/sh.dol.c @@ -791,6 +791,50 @@ all_dolmcnts_are_0() return 1; } +/* Unescape the sub of an :s modifier + * + * start = start of string + * delim = delim character + * consumed = incremented by the number of characters consumed before finding + * delim + * + * Returns the length of the string after unescaping + * + * Note the characters that start points at will be changed if an escape is + * found - those after the escape character will all be shifted to the left. + * + * \ is always used as the escape character + * + * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3 + * unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes + * start[0..4] = "a/bc" + * unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes + * start[0..4] = "a\bc" + */ +static size_t +unescnhs(Char *nhs, Char delim, size_t *consumed) { + /* how far through the string we have got to */ + size_t i; + /* offset is how far behind the output string is compared to the number of + * chars we have consumed. every escape makes us one more behind */ + size_t offset; + + for (i = 0, offset = 0; ; i++) { + if (nhs[i] == '\\') { + offset++; + i++; + } else if (nhs[i] == delim) { + /* this is the end so we can return */ + *consumed += i; + return i - offset; + } + + /* place the current char in its right place in the array, allowing + * for any removed escape characters */ + nhs[i - offset] = nhs[i]; + } +} + static void setDolp(Char *cp) { @@ -813,6 +857,8 @@ setDolp(Char *cp) size_t lhlen = 0, rhlen = 0; /* keep track of where the last :a match hit */ ptrdiff_t last_match = 0; + /* if there is an escaped delim then we need to strip the escape + * and that means shuffling later characters to the left */ delim = dolmod.s[++i]; if (!delim || letter(delim) @@ -821,15 +867,11 @@ setDolp(Char *cp) break; } lhsub = &dolmod.s[++i]; - while(dolmod.s[i] != delim && dolmod.s[++i]) { - lhlen++; - } - dolmod.s[i] = 0; + lhlen = unescnhs(lhsub, delim, &i); + lhsub[lhlen] = 0; rhsub = &dolmod.s[++i]; - while(dolmod.s[i] != delim && dolmod.s[++i]) { - rhlen++; - } - dolmod.s[i] = 0; + rhlen = unescnhs(rhsub, delim, &i); + rhsub[rhlen] = 0; strip(lhsub); strip(rhsub); |
|
This replaces the previous patch which somehow had some misplaced comments in it. sh.dol.c-2.patch (2,097 bytes)
diff --git a/sh.dol.c b/sh.dol.c index 22d09e4..68ee959 100644 --- a/sh.dol.c +++ b/sh.dol.c @@ -791,6 +791,50 @@ all_dolmcnts_are_0() return 1; } +/* Unescape the sub of an :s modifier + * + * start = start of string + * delim = delim character + * consumed = incremented by the number of characters consumed before finding + * delim + * + * Returns the length of the string after unescaping + * + * Note the characters that start points at will be changed if an escape is + * found - those after the escape character will all be shifted to the left. + * + * \ is always used as the escape character + * + * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3 + * unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes + * start[0..4] = "a/bc" + * unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes + * start[0..4] = "a\bc" + */ +static size_t +unescnhs(Char *nhs, Char delim, size_t *consumed) { + /* how far through the string we have got to */ + size_t i; + /* offset is how far behind the output string is compared to the number of + * chars we have consumed. every escape makes us one more behind */ + size_t offset; + + for (i = 0, offset = 0; ; i++) { + if (nhs[i] == '\\') { + offset++; + i++; + } else if (nhs[i] == delim) { + /* this is the end so we can return */ + *consumed += i; + return i - offset; + } + + /* place the current char in its right place in the array, allowing + * for any removed escape characters */ + nhs[i - offset] = nhs[i]; + } +} + static void setDolp(Char *cp) { @@ -821,15 +865,11 @@ setDolp(Char *cp) break; } lhsub = &dolmod.s[++i]; - while(dolmod.s[i] != delim && dolmod.s[++i]) { - lhlen++; - } - dolmod.s[i] = 0; + lhlen = unescnhs(lhsub, delim, &i); + lhsub[lhlen] = 0; rhsub = &dolmod.s[++i]; - while(dolmod.s[i] != delim && dolmod.s[++i]) { - rhlen++; - } - dolmod.s[i] = 0; + rhlen = unescnhs(rhsub, delim, &i); + rhsub[rhlen] = 0; strip(lhsub); strip(rhsub); |
|
I am wondering if that ever worked and we broke it, or if it never worked and this patch is needed. I need to take a more careful look. |
|
I think we need to parse both at the lexical level and at dollar evaluation like below. delim.diff (1,412 bytes)
diff --git a/sh.dol.c b/sh.dol.c index 28e8639..362ec55 100644 --- a/sh.dol.c +++ b/sh.dol.c @@ -746,6 +746,7 @@ fixDolMod(void) if (c == 's') { /* [eichin:19910926.0755EST] */ int delimcnt = 2; + int esc = 0; eChar delim = DgetC(0); Strbuf_append1(&dolmod, (Char) c); Strbuf_append1(&dolmod, (Char) delim); @@ -756,9 +757,14 @@ fixDolMod(void) break; } while ((c = DgetC(0)) != DEOF) { + if (esc == 0 && c == '\\') { + esc = 1; + continue; + } Strbuf_append1(&dolmod, (Char) c); - if (c == delim) delimcnt--; + if (!esc && c == delim) delimcnt--; if (!delimcnt) break; + esc = 0; } if (delimcnt) { seterror(ERR_BADSUBST); diff --git a/sh.lex.c b/sh.lex.c index 46cc96d..4277912 100644 --- a/sh.lex.c +++ b/sh.lex.c @@ -618,6 +618,7 @@ getdol(void) /* scan s// [eichin:19910926.0512EST] */ if (c == 's') { int delimcnt = 2; + int esc = 0; eChar delim = getC(0); Strbuf_append1(&name, delim); @@ -627,9 +628,15 @@ getdol(void) break; } while ((c = getC(0)) != CHAR_ERR) { + if (esc == 0 && c == '\\') { + esc = 1; + Strbuf_append1(&name, c); + continue; + } Strbuf_append1(&name, c); - if (c == delim) delimcnt--; + if (!esc && c == delim) delimcnt--; if (!delimcnt) break; + esc = 0; } if (delimcnt) { seterror(ERR_BADSUBST); |
Date Modified | Username | Field | Change |
---|---|---|---|
2020-12-05 00:40 | andrew@ugh.net.au | New Issue | |
2020-12-05 00:40 | andrew@ugh.net.au | File Added: sh.dol.c.patch | |
2020-12-05 00:43 | andrew@ugh.net.au | File Added: sh.dol.c-2.patch | |
2020-12-05 00:43 | andrew@ugh.net.au | Note Added: 0003499 | |
2021-01-13 12:13 | andrew@ugh.net.au | Tag Attached: patch | |
2021-02-26 14:33 | christos | Note Added: 0003563 | |
2021-02-27 01:01 | christos | Assigned To | => christos |
2021-02-27 01:01 | christos | Status | new => assigned |
2021-02-27 01:02 | christos | File Added: delim.diff | |
2021-02-27 01:02 | christos | Note Added: 0003564 |