🐐 SVBSTITVE
This commit is contained in:
@@ -354,6 +354,11 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
|
|||||||
|
|
||||||
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
|
### SVBSTITVE
|
||||||
|
`SVBSTITVE(pattern, replacement, string)`
|
||||||
|
|
||||||
|
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
## Modules
|
## Modules
|
||||||
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having
|
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having
|
||||||
|
|
||||||
|
|||||||
@@ -1290,6 +1290,17 @@ class BuiltIn(Node):
|
|||||||
except re.error as e:
|
except re.error as e:
|
||||||
raise CentvrionError(f"Invalid regex: {e}")
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
return vtable, ValList(matches)
|
return vtable, ValList(matches)
|
||||||
|
case "SVBSTITVE":
|
||||||
|
pattern = params[0]
|
||||||
|
replacement = params[1]
|
||||||
|
text = params[2]
|
||||||
|
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
||||||
|
raise CentvrionError("SVBSTITVE requires three strings")
|
||||||
|
try:
|
||||||
|
result = re.sub(pattern.value(), replacement.value(), text.value())
|
||||||
|
except re.error as e:
|
||||||
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
|
return vtable, ValStr(result)
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError(self.builtin)
|
raise NotImplementedError(self.builtin)
|
||||||
|
|
||||||
|
|||||||
@@ -300,6 +300,9 @@ def _emit_builtin(node, ctx):
|
|||||||
case "QVAERE":
|
case "QVAERE":
|
||||||
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
|
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
|
||||||
|
|
||||||
|
case "SVBSTITVE":
|
||||||
|
lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});")
|
||||||
|
|
||||||
case _:
|
case _:
|
||||||
raise NotImplementedError(node.builtin)
|
raise NotImplementedError(node.builtin)
|
||||||
|
|
||||||
|
|||||||
@@ -902,6 +902,99 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Expand replacement string, substituting \1..\9 with captured groups */
|
||||||
|
static void _expand_replacement(const char *repl, const char *subject,
|
||||||
|
regmatch_t *matches, int ngroups,
|
||||||
|
char **out, size_t *opos, size_t *ocap) {
|
||||||
|
for (const char *r = repl; *r; r++) {
|
||||||
|
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
|
||||||
|
int g = r[1] - '0';
|
||||||
|
r++;
|
||||||
|
if (g < ngroups && matches[g].rm_so != -1) {
|
||||||
|
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
||||||
|
while (*opos + glen + 1 > *ocap) {
|
||||||
|
*ocap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||||
|
memcpy(newbuf, *out, *opos);
|
||||||
|
*out = newbuf;
|
||||||
|
}
|
||||||
|
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
||||||
|
*opos += glen;
|
||||||
|
}
|
||||||
|
} else if (*r == '\\' && r[1] == '\\') {
|
||||||
|
/* escaped backslash → literal \ */
|
||||||
|
if (*opos + 2 > *ocap) {
|
||||||
|
*ocap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||||
|
memcpy(newbuf, *out, *opos);
|
||||||
|
*out = newbuf;
|
||||||
|
}
|
||||||
|
(*out)[(*opos)++] = '\\';
|
||||||
|
r++;
|
||||||
|
} else {
|
||||||
|
if (*opos + 2 > *ocap) {
|
||||||
|
*ocap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||||
|
memcpy(newbuf, *out, *opos);
|
||||||
|
*out = newbuf;
|
||||||
|
}
|
||||||
|
(*out)[(*opos)++] = *r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text) {
|
||||||
|
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
||||||
|
cent_type_error("'SVBSTITVE' requires three strings");
|
||||||
|
regex_t re;
|
||||||
|
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
||||||
|
if (rc != 0) {
|
||||||
|
char errbuf[256];
|
||||||
|
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||||
|
regfree(&re);
|
||||||
|
cent_runtime_error(errbuf);
|
||||||
|
}
|
||||||
|
size_t text_len = strlen(text.sval);
|
||||||
|
size_t repl_len = strlen(replacement.sval);
|
||||||
|
size_t cap = text_len + repl_len * 4 + 1;
|
||||||
|
char *result = cent_arena_alloc(cent_arena, cap);
|
||||||
|
size_t rpos = 0;
|
||||||
|
const char *cursor = text.sval;
|
||||||
|
int ngroups = (int)re.re_nsub + 1;
|
||||||
|
if (ngroups > 10) ngroups = 10;
|
||||||
|
regmatch_t matches[10];
|
||||||
|
while (*cursor && regexec(&re, cursor, ngroups, matches, 0) == 0) {
|
||||||
|
/* copy text before match */
|
||||||
|
size_t prefix_len = matches[0].rm_so;
|
||||||
|
while (rpos + prefix_len + 1 > cap) {
|
||||||
|
cap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, cap);
|
||||||
|
memcpy(newbuf, result, rpos);
|
||||||
|
result = newbuf;
|
||||||
|
}
|
||||||
|
memcpy(result + rpos, cursor, prefix_len);
|
||||||
|
rpos += prefix_len;
|
||||||
|
/* expand replacement with backreferences */
|
||||||
|
_expand_replacement(replacement.sval, cursor, matches, ngroups,
|
||||||
|
&result, &rpos, &cap);
|
||||||
|
cursor += matches[0].rm_eo;
|
||||||
|
if (matches[0].rm_eo == 0) cursor++;
|
||||||
|
}
|
||||||
|
/* copy remaining text */
|
||||||
|
size_t tail_len = strlen(cursor);
|
||||||
|
while (rpos + tail_len + 1 > cap) {
|
||||||
|
cap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, cap);
|
||||||
|
memcpy(newbuf, result, rpos);
|
||||||
|
result = newbuf;
|
||||||
|
}
|
||||||
|
memcpy(result + rpos, cursor, tail_len);
|
||||||
|
rpos += tail_len;
|
||||||
|
result[rpos] = '\0';
|
||||||
|
regfree(&re);
|
||||||
|
return cent_str(result);
|
||||||
|
}
|
||||||
|
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
/* Initialisation */
|
/* Initialisation */
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
|
|||||||
@@ -233,6 +233,7 @@ CentValue cent_lege(CentValue path); /* LEGE */
|
|||||||
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
|
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
|
||||||
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
|
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
|
||||||
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
|
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
|
||||||
|
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */
|
||||||
|
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
/* Array helpers */
|
/* Array helpers */
|
||||||
|
|||||||
@@ -58,7 +58,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
|
|||||||
"LEGE",
|
"LEGE",
|
||||||
"SCRIBE",
|
"SCRIBE",
|
||||||
"ADIVNGE",
|
"ADIVNGE",
|
||||||
"QVAERE"
|
"QVAERE",
|
||||||
|
"SVBSTITVE"
|
||||||
]]
|
]]
|
||||||
|
|
||||||
data_tokens = [
|
data_tokens = [
|
||||||
|
|||||||
@@ -70,7 +70,7 @@ contexts:
|
|||||||
scope: constant.language.centvrion
|
scope: constant.language.centvrion
|
||||||
|
|
||||||
builtins:
|
builtins:
|
||||||
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|SCRIBE|SEMEN|SENATVS)\b'
|
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE)\b'
|
||||||
scope: support.function.builtin.centvrion
|
scope: support.function.builtin.centvrion
|
||||||
|
|
||||||
modules:
|
modules:
|
||||||
|
|||||||
20
tests.py
20
tests.py
@@ -620,6 +620,22 @@ builtin_tests = [
|
|||||||
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
|
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
|
||||||
# QVAERE: dot matches any character
|
# QVAERE: dot matches any character
|
||||||
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
|
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
|
||||||
|
# SVBSTITVE: basic literal replacement
|
||||||
|
('SVBSTITVE("a", "b", "aaa")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("aaa")]))]), ValStr("bbb")),
|
||||||
|
# SVBSTITVE: regex character class
|
||||||
|
('SVBSTITVE("[0-9]+", "N", "abc123def456")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("[0-9]+"), String("N"), String("abc123def456")]))]), ValStr("abcNdefN")),
|
||||||
|
# SVBSTITVE: no match → string unchanged
|
||||||
|
('SVBSTITVE("x", "y", "abc")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("x"), String("y"), String("abc")]))]), ValStr("abc")),
|
||||||
|
# SVBSTITVE: empty replacement (deletion)
|
||||||
|
('SVBSTITVE("a", "", "banana")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String(""), String("banana")]))]), ValStr("bnn")),
|
||||||
|
# SVBSTITVE: empty text → empty string
|
||||||
|
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
||||||
|
# SVBSTITVE: dot matches any character
|
||||||
|
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
||||||
|
# SVBSTITVE: backreference swaps two groups
|
||||||
|
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
|
||||||
|
# SVBSTITVE: backreference with unmatched group (ignored)
|
||||||
|
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestBuiltins(unittest.TestCase):
|
class TestBuiltins(unittest.TestCase):
|
||||||
@@ -704,6 +720,10 @@ error_tests = [
|
|||||||
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
|
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
|
||||||
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
|
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
|
||||||
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
|
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
|
||||||
|
('SVBSTITVE(I, "b", "c")', CentvrionError), # SVBSTITVE requires strings, not int pattern
|
||||||
|
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
||||||
|
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
||||||
|
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestErrors(unittest.TestCase):
|
class TestErrors(unittest.TestCase):
|
||||||
|
|||||||
Reference in New Issue
Block a user