View Issue Details
| ID | Project | Category | View Status | Date Submitted | Last Update |
|---|---|---|---|---|---|
| 0000214 | tcsh | General | public | 2020-12-05 00:40 | 2021-02-27 01:02 |
| Reporter | andrew@ugh.net.au | Assigned To | christos | ||
| Priority | normal | Severity | minor | Reproducibility | always |
| Status | assigned | Resolution | open | ||
| Product Version | 6.22.03 | ||||
| Summary | 0000214: Can't escape delimiters in :s modifier | ||||
| Description | the man page, under "History substitution" for the s modifier says: > Any character may be used as the delimiter in place of `/'; a `\' can be used to quote the delimiter inside l and r. \ does not quote the delimiter currently. I didn't go back to see if it used to work. | ||||
| Steps To Reproduce | ``` >set a='a/b' >echo $a a/b >echo $a:s/\//#/ a/b ``` The output should have been `a#b` | ||||
| Additional Information | Patch attached | ||||
| Tags | patch | ||||
|
|
sh.dol.c.patch (2,458 bytes)
diff --git a/sh.dol.c b/sh.dol.c
index 22d09e4..0ae232c 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -791,6 +791,50 @@ all_dolmcnts_are_0()
return 1;
}
+/* Unescape the sub of an :s modifier
+ *
+ * start = start of string
+ * delim = delim character
+ * consumed = incremented by the number of characters consumed before finding
+ * delim
+ *
+ * Returns the length of the string after unescaping
+ *
+ * Note the characters that start points at will be changed if an escape is
+ * found - those after the escape character will all be shifted to the left.
+ *
+ * \ is always used as the escape character
+ *
+ * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3
+ * unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * start[0..4] = "a/bc"
+ * unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * start[0..4] = "a\bc"
+ */
+static size_t
+unescnhs(Char *nhs, Char delim, size_t *consumed) {
+ /* how far through the string we have got to */
+ size_t i;
+ /* offset is how far behind the output string is compared to the number of
+ * chars we have consumed. every escape makes us one more behind */
+ size_t offset;
+
+ for (i = 0, offset = 0; ; i++) {
+ if (nhs[i] == '\\') {
+ offset++;
+ i++;
+ } else if (nhs[i] == delim) {
+ /* this is the end so we can return */
+ *consumed += i;
+ return i - offset;
+ }
+
+ /* place the current char in its right place in the array, allowing
+ * for any removed escape characters */
+ nhs[i - offset] = nhs[i];
+ }
+}
+
static void
setDolp(Char *cp)
{
@@ -813,6 +857,8 @@ setDolp(Char *cp)
size_t lhlen = 0, rhlen = 0;
/* keep track of where the last :a match hit */
ptrdiff_t last_match = 0;
+ /* if there is an escaped delim then we need to strip the escape
+ * and that means shuffling later characters to the left */
delim = dolmod.s[++i];
if (!delim || letter(delim)
@@ -821,15 +867,11 @@ setDolp(Char *cp)
break;
}
lhsub = &dolmod.s[++i];
- while(dolmod.s[i] != delim && dolmod.s[++i]) {
- lhlen++;
- }
- dolmod.s[i] = 0;
+ lhlen = unescnhs(lhsub, delim, &i);
+ lhsub[lhlen] = 0;
rhsub = &dolmod.s[++i];
- while(dolmod.s[i] != delim && dolmod.s[++i]) {
- rhlen++;
- }
- dolmod.s[i] = 0;
+ rhlen = unescnhs(rhsub, delim, &i);
+ rhsub[rhlen] = 0;
strip(lhsub);
strip(rhsub);
|
|
|
This replaces the previous patch which somehow had some misplaced comments in it. sh.dol.c-2.patch (2,097 bytes)
diff --git a/sh.dol.c b/sh.dol.c
index 22d09e4..68ee959 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -791,6 +791,50 @@ all_dolmcnts_are_0()
return 1;
}
+/* Unescape the sub of an :s modifier
+ *
+ * start = start of string
+ * delim = delim character
+ * consumed = incremented by the number of characters consumed before finding
+ * delim
+ *
+ * Returns the length of the string after unescaping
+ *
+ * Note the characters that start points at will be changed if an escape is
+ * found - those after the escape character will all be shifted to the left.
+ *
+ * \ is always used as the escape character
+ *
+ * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3
+ * unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * start[0..4] = "a/bc"
+ * unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * start[0..4] = "a\bc"
+ */
+static size_t
+unescnhs(Char *nhs, Char delim, size_t *consumed) {
+ /* how far through the string we have got to */
+ size_t i;
+ /* offset is how far behind the output string is compared to the number of
+ * chars we have consumed. every escape makes us one more behind */
+ size_t offset;
+
+ for (i = 0, offset = 0; ; i++) {
+ if (nhs[i] == '\\') {
+ offset++;
+ i++;
+ } else if (nhs[i] == delim) {
+ /* this is the end so we can return */
+ *consumed += i;
+ return i - offset;
+ }
+
+ /* place the current char in its right place in the array, allowing
+ * for any removed escape characters */
+ nhs[i - offset] = nhs[i];
+ }
+}
+
static void
setDolp(Char *cp)
{
@@ -821,15 +865,11 @@ setDolp(Char *cp)
break;
}
lhsub = &dolmod.s[++i];
- while(dolmod.s[i] != delim && dolmod.s[++i]) {
- lhlen++;
- }
- dolmod.s[i] = 0;
+ lhlen = unescnhs(lhsub, delim, &i);
+ lhsub[lhlen] = 0;
rhsub = &dolmod.s[++i];
- while(dolmod.s[i] != delim && dolmod.s[++i]) {
- rhlen++;
- }
- dolmod.s[i] = 0;
+ rhlen = unescnhs(rhsub, delim, &i);
+ rhsub[rhlen] = 0;
strip(lhsub);
strip(rhsub);
|
|
|
I am wondering if that ever worked and we broke it, or if it never worked and this patch is needed. I need to take a more careful look. |
|
|
I think we need to parse both at the lexical level and at dollar evaluation like below. delim.diff (1,412 bytes)
diff --git a/sh.dol.c b/sh.dol.c
index 28e8639..362ec55 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -746,6 +746,7 @@ fixDolMod(void)
if (c == 's') { /* [eichin:19910926.0755EST] */
int delimcnt = 2;
+ int esc = 0;
eChar delim = DgetC(0);
Strbuf_append1(&dolmod, (Char) c);
Strbuf_append1(&dolmod, (Char) delim);
@@ -756,9 +757,14 @@ fixDolMod(void)
break;
}
while ((c = DgetC(0)) != DEOF) {
+ if (esc == 0 && c == '\\') {
+ esc = 1;
+ continue;
+ }
Strbuf_append1(&dolmod, (Char) c);
- if (c == delim) delimcnt--;
+ if (!esc && c == delim) delimcnt--;
if (!delimcnt) break;
+ esc = 0;
}
if (delimcnt) {
seterror(ERR_BADSUBST);
diff --git a/sh.lex.c b/sh.lex.c
index 46cc96d..4277912 100644
--- a/sh.lex.c
+++ b/sh.lex.c
@@ -618,6 +618,7 @@ getdol(void)
/* scan s// [eichin:19910926.0512EST] */
if (c == 's') {
int delimcnt = 2;
+ int esc = 0;
eChar delim = getC(0);
Strbuf_append1(&name, delim);
@@ -627,9 +628,15 @@ getdol(void)
break;
}
while ((c = getC(0)) != CHAR_ERR) {
+ if (esc == 0 && c == '\\') {
+ esc = 1;
+ Strbuf_append1(&name, c);
+ continue;
+ }
Strbuf_append1(&name, c);
- if (c == delim) delimcnt--;
+ if (!esc && c == delim) delimcnt--;
if (!delimcnt) break;
+ esc = 0;
}
if (delimcnt) {
seterror(ERR_BADSUBST);
|
| Date Modified | Username | Field | Change |
|---|---|---|---|
| 2020-12-05 00:40 | andrew@ugh.net.au | New Issue | |
| 2020-12-05 00:40 | andrew@ugh.net.au | File Added: sh.dol.c.patch | |
| 2020-12-05 00:43 | andrew@ugh.net.au | File Added: sh.dol.c-2.patch | |
| 2020-12-05 00:43 | andrew@ugh.net.au | Note Added: 0003499 | |
| 2021-01-13 12:13 | andrew@ugh.net.au | Tag Attached: patch | |
| 2021-02-26 14:33 | christos | Note Added: 0003563 | |
| 2021-02-27 01:01 | christos | Assigned To | => christos |
| 2021-02-27 01:01 | christos | Status | new => assigned |
| 2021-02-27 01:02 | christos | File Added: delim.diff | |
| 2021-02-27 01:02 | christos | Note Added: 0003564 |