View Issue Details

IDProjectCategoryView StatusLast Update
0000214tcshGeneralpublic2021-02-27 01:02
Reporterandrew@ugh.net.au Assigned Tochristos  
PrioritynormalSeverityminorReproducibilityalways
Status assignedResolutionopen 
Product Version6.22.03 
Summary0000214: Can't escape delimiters in :s modifier
Descriptionthe man page, under "History substitution" for the s modifier says:

> Any character may be used as the delimiter in place of `/'; a `\' can be used to quote the delimiter inside l and r.

\ does not quote the delimiter currently. I didn't go back to see if it used to work.
Steps To Reproduce```
>set a='a/b'
>echo $a
a/b
>echo $a:s/\//#/
a/b
```

The output should have been `a#b`
Additional InformationPatch attached
Tagspatch

Activities

andrew@ugh.net.au

2020-12-05 00:40

reporter  

sh.dol.c.patch (2,458 bytes)   
diff --git a/sh.dol.c b/sh.dol.c
index 22d09e4..0ae232c 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -791,6 +791,50 @@ all_dolmcnts_are_0()
     return 1;
 }
 
+/* Unescape the sub of an :s modifier
+ *
+ * start = start of string
+ * delim = delim character
+ * consumed = incremented by the number of characters consumed before finding
+ * 	delim
+ *
+ * Returns the length of the string after unescaping
+ *
+ * Note the characters that start points at will be changed if an escape is
+ * found - those after the escape character will all be shifted to the left.
+ *
+ * \ is always used as the escape character
+ *
+ * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3
+ * 	unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * 		start[0..4] = "a/bc"
+ * 	unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * 		start[0..4] = "a\bc"
+ */
+static size_t
+unescnhs(Char *nhs, Char delim, size_t *consumed) {
+    /* how far through the string we have got to */
+    size_t i;
+    /* offset is how far behind the output string is compared to the number of
+     * chars we have consumed. every escape makes us one more behind */
+    size_t offset;
+
+    for (i = 0, offset = 0; ; i++) {
+	if (nhs[i] == '\\') {
+	    offset++;
+	    i++;
+	} else if (nhs[i] == delim) {
+	    /* this is the end so we can return */
+	    *consumed += i;
+	    return i - offset;
+	}
+
+	/* place the current char in its right place in the array, allowing
+	 * for any removed escape characters */
+	nhs[i - offset] = nhs[i];
+    }
+}
+
 static void
 setDolp(Char *cp)
 {
@@ -813,6 +857,8 @@ setDolp(Char *cp)
 	    size_t lhlen = 0, rhlen = 0;
 	    /* keep track of where the last :a match hit */
 	    ptrdiff_t last_match = 0;
+	    /* if there is an escaped delim then we need to strip the escape
+	     * and that means shuffling later characters to the left */
 
 	    delim = dolmod.s[++i];
 	    if (!delim || letter(delim)
@@ -821,15 +867,11 @@ setDolp(Char *cp)
 		break;
 	    }
 	    lhsub = &dolmod.s[++i];
-	    while(dolmod.s[i] != delim && dolmod.s[++i]) {
-		lhlen++;
-	    }
-	    dolmod.s[i] = 0;
+	    lhlen = unescnhs(lhsub, delim, &i);
+	    lhsub[lhlen] = 0;
 	    rhsub = &dolmod.s[++i];
-	    while(dolmod.s[i] != delim && dolmod.s[++i]) {
-		rhlen++;
-	    }
-	    dolmod.s[i] = 0;
+	    rhlen = unescnhs(rhsub, delim, &i);
+	    rhsub[rhlen] = 0;
 
 	    strip(lhsub);
 	    strip(rhsub);
sh.dol.c.patch (2,458 bytes)   

andrew@ugh.net.au

2020-12-05 00:43

reporter   ~0003499

This replaces the previous patch which somehow had some misplaced comments in it.
sh.dol.c-2.patch (2,097 bytes)   
diff --git a/sh.dol.c b/sh.dol.c
index 22d09e4..68ee959 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -791,6 +791,50 @@ all_dolmcnts_are_0()
     return 1;
 }
 
+/* Unescape the sub of an :s modifier
+ *
+ * start = start of string
+ * delim = delim character
+ * consumed = incremented by the number of characters consumed before finding
+ * 	delim
+ *
+ * Returns the length of the string after unescaping
+ *
+ * Note the characters that start points at will be changed if an escape is
+ * found - those after the escape character will all be shifted to the left.
+ *
+ * \ is always used as the escape character
+ *
+ * e.g. unescnhs("abc/def/", '/', &i) returns 3 and increments i by 3
+ * 	unescnhs("a\/bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * 		start[0..4] = "a/bc"
+ * 	unescnhs("a\\bc/def/", '/', &i) returns 4, increments i by 5 and makes
+ * 		start[0..4] = "a\bc"
+ */
+static size_t
+unescnhs(Char *nhs, Char delim, size_t *consumed) {
+    /* how far through the string we have got to */
+    size_t i;
+    /* offset is how far behind the output string is compared to the number of
+     * chars we have consumed. every escape makes us one more behind */
+    size_t offset;
+
+    for (i = 0, offset = 0; ; i++) {
+	if (nhs[i] == '\\') {
+	    offset++;
+	    i++;
+	} else if (nhs[i] == delim) {
+	    /* this is the end so we can return */
+	    *consumed += i;
+	    return i - offset;
+	}
+
+	/* place the current char in its right place in the array, allowing
+	 * for any removed escape characters */
+	nhs[i - offset] = nhs[i];
+    }
+}
+
 static void
 setDolp(Char *cp)
 {
@@ -821,15 +865,11 @@ setDolp(Char *cp)
 		break;
 	    }
 	    lhsub = &dolmod.s[++i];
-	    while(dolmod.s[i] != delim && dolmod.s[++i]) {
-		lhlen++;
-	    }
-	    dolmod.s[i] = 0;
+	    lhlen = unescnhs(lhsub, delim, &i);
+	    lhsub[lhlen] = 0;
 	    rhsub = &dolmod.s[++i];
-	    while(dolmod.s[i] != delim && dolmod.s[++i]) {
-		rhlen++;
-	    }
-	    dolmod.s[i] = 0;
+	    rhlen = unescnhs(rhsub, delim, &i);
+	    rhsub[rhlen] = 0;
 
 	    strip(lhsub);
 	    strip(rhsub);
sh.dol.c-2.patch (2,097 bytes)   

christos

2021-02-26 14:33

manager   ~0003563

I am wondering if that ever worked and we broke it, or if it never worked and this patch is needed. I need to take a more careful look.

christos

2021-02-27 01:02

manager   ~0003564

I think we need to parse both at the lexical level and at dollar evaluation like below.
delim.diff (1,412 bytes)   
diff --git a/sh.dol.c b/sh.dol.c
index 28e8639..362ec55 100644
--- a/sh.dol.c
+++ b/sh.dol.c
@@ -746,6 +746,7 @@ fixDolMod(void)
 
 	    if (c == 's') {	/* [eichin:19910926.0755EST] */
 		int delimcnt = 2;
+		int esc = 0;
 		eChar delim = DgetC(0);
 		Strbuf_append1(&dolmod, (Char) c);
 		Strbuf_append1(&dolmod, (Char) delim);
@@ -756,9 +757,14 @@ fixDolMod(void)
 		    break;
 		}	
 		while ((c = DgetC(0)) != DEOF) {
+		    if (esc == 0 && c == '\\') {
+			esc = 1;
+			continue;
+		    }
 		    Strbuf_append1(&dolmod, (Char) c);
-		    if (c == delim) delimcnt--;
+		    if (!esc && c == delim) delimcnt--;
 		    if (!delimcnt) break;
+		    esc = 0;
 		}
 		if (delimcnt) {
 		    seterror(ERR_BADSUBST);
diff --git a/sh.lex.c b/sh.lex.c
index 46cc96d..4277912 100644
--- a/sh.lex.c
+++ b/sh.lex.c
@@ -618,6 +618,7 @@ getdol(void)
 	    /* scan s// [eichin:19910926.0512EST] */
 	    if (c == 's') {
 		int delimcnt = 2;
+		int esc = 0;
 		eChar delim = getC(0);
 
 		Strbuf_append1(&name, delim);
@@ -627,9 +628,15 @@ getdol(void)
 		    break;
 		}
 		while ((c = getC(0)) != CHAR_ERR) {
+		    if (esc == 0 && c == '\\') {
+			esc = 1;
+			Strbuf_append1(&name, c);
+			continue;
+		    }
 		    Strbuf_append1(&name, c);
-		    if (c == delim) delimcnt--;
+		    if (!esc && c == delim) delimcnt--;
 		    if (!delimcnt) break;
+		    esc = 0;
 		}
 		if (delimcnt) {
 		    seterror(ERR_BADSUBST);
delim.diff (1,412 bytes)   

Issue History

Date Modified Username Field Change
2020-12-05 00:40 andrew@ugh.net.au New Issue
2020-12-05 00:40 andrew@ugh.net.au File Added: sh.dol.c.patch
2020-12-05 00:43 andrew@ugh.net.au File Added: sh.dol.c-2.patch
2020-12-05 00:43 andrew@ugh.net.au Note Added: 0003499
2021-01-13 12:13 andrew@ugh.net.au Tag Attached: patch
2021-02-26 14:33 christos Note Added: 0003563
2021-02-27 01:01 christos Assigned To => christos
2021-02-27 01:01 christos Status new => assigned
2021-02-27 01:02 christos File Added: delim.diff
2021-02-27 01:02 christos Note Added: 0003564