🐐 SVBSTITVE
This commit is contained in:
@@ -354,6 +354,11 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
|
||||
|
||||
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
||||
|
||||
### SVBSTITVE
|
||||
`SVBSTITVE(pattern, replacement, string)`
|
||||
|
||||
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
||||
|
||||
## Modules
|
||||
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having
|
||||
|
||||
|
||||
@@ -1290,6 +1290,17 @@ class BuiltIn(Node):
|
||||
except re.error as e:
|
||||
raise CentvrionError(f"Invalid regex: {e}")
|
||||
return vtable, ValList(matches)
|
||||
case "SVBSTITVE":
|
||||
pattern = params[0]
|
||||
replacement = params[1]
|
||||
text = params[2]
|
||||
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
||||
raise CentvrionError("SVBSTITVE requires three strings")
|
||||
try:
|
||||
result = re.sub(pattern.value(), replacement.value(), text.value())
|
||||
except re.error as e:
|
||||
raise CentvrionError(f"Invalid regex: {e}")
|
||||
return vtable, ValStr(result)
|
||||
case _:
|
||||
raise NotImplementedError(self.builtin)
|
||||
|
||||
|
||||
@@ -300,6 +300,9 @@ def _emit_builtin(node, ctx):
|
||||
case "QVAERE":
|
||||
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
|
||||
|
||||
case "SVBSTITVE":
|
||||
lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});")
|
||||
|
||||
case _:
|
||||
raise NotImplementedError(node.builtin)
|
||||
|
||||
|
||||
@@ -902,6 +902,99 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Expand replacement string, substituting \1..\9 with captured groups */
|
||||
static void _expand_replacement(const char *repl, const char *subject,
|
||||
regmatch_t *matches, int ngroups,
|
||||
char **out, size_t *opos, size_t *ocap) {
|
||||
for (const char *r = repl; *r; r++) {
|
||||
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
|
||||
int g = r[1] - '0';
|
||||
r++;
|
||||
if (g < ngroups && matches[g].rm_so != -1) {
|
||||
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
||||
while (*opos + glen + 1 > *ocap) {
|
||||
*ocap *= 2;
|
||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||
memcpy(newbuf, *out, *opos);
|
||||
*out = newbuf;
|
||||
}
|
||||
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
||||
*opos += glen;
|
||||
}
|
||||
} else if (*r == '\\' && r[1] == '\\') {
|
||||
/* escaped backslash → literal \ */
|
||||
if (*opos + 2 > *ocap) {
|
||||
*ocap *= 2;
|
||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||
memcpy(newbuf, *out, *opos);
|
||||
*out = newbuf;
|
||||
}
|
||||
(*out)[(*opos)++] = '\\';
|
||||
r++;
|
||||
} else {
|
||||
if (*opos + 2 > *ocap) {
|
||||
*ocap *= 2;
|
||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||
memcpy(newbuf, *out, *opos);
|
||||
*out = newbuf;
|
||||
}
|
||||
(*out)[(*opos)++] = *r;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text) {
|
||||
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
||||
cent_type_error("'SVBSTITVE' requires three strings");
|
||||
regex_t re;
|
||||
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
||||
if (rc != 0) {
|
||||
char errbuf[256];
|
||||
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||
regfree(&re);
|
||||
cent_runtime_error(errbuf);
|
||||
}
|
||||
size_t text_len = strlen(text.sval);
|
||||
size_t repl_len = strlen(replacement.sval);
|
||||
size_t cap = text_len + repl_len * 4 + 1;
|
||||
char *result = cent_arena_alloc(cent_arena, cap);
|
||||
size_t rpos = 0;
|
||||
const char *cursor = text.sval;
|
||||
int ngroups = (int)re.re_nsub + 1;
|
||||
if (ngroups > 10) ngroups = 10;
|
||||
regmatch_t matches[10];
|
||||
while (*cursor && regexec(&re, cursor, ngroups, matches, 0) == 0) {
|
||||
/* copy text before match */
|
||||
size_t prefix_len = matches[0].rm_so;
|
||||
while (rpos + prefix_len + 1 > cap) {
|
||||
cap *= 2;
|
||||
char *newbuf = cent_arena_alloc(cent_arena, cap);
|
||||
memcpy(newbuf, result, rpos);
|
||||
result = newbuf;
|
||||
}
|
||||
memcpy(result + rpos, cursor, prefix_len);
|
||||
rpos += prefix_len;
|
||||
/* expand replacement with backreferences */
|
||||
_expand_replacement(replacement.sval, cursor, matches, ngroups,
|
||||
&result, &rpos, &cap);
|
||||
cursor += matches[0].rm_eo;
|
||||
if (matches[0].rm_eo == 0) cursor++;
|
||||
}
|
||||
/* copy remaining text */
|
||||
size_t tail_len = strlen(cursor);
|
||||
while (rpos + tail_len + 1 > cap) {
|
||||
cap *= 2;
|
||||
char *newbuf = cent_arena_alloc(cent_arena, cap);
|
||||
memcpy(newbuf, result, rpos);
|
||||
result = newbuf;
|
||||
}
|
||||
memcpy(result + rpos, cursor, tail_len);
|
||||
rpos += tail_len;
|
||||
result[rpos] = '\0';
|
||||
regfree(&re);
|
||||
return cent_str(result);
|
||||
}
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Initialisation */
|
||||
/* ------------------------------------------------------------------ */
|
||||
|
||||
@@ -233,6 +233,7 @@ CentValue cent_lege(CentValue path); /* LEGE */
|
||||
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
|
||||
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
|
||||
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
|
||||
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */
|
||||
|
||||
/* ------------------------------------------------------------------ */
|
||||
/* Array helpers */
|
||||
|
||||
@@ -58,7 +58,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
|
||||
"LEGE",
|
||||
"SCRIBE",
|
||||
"ADIVNGE",
|
||||
"QVAERE"
|
||||
"QVAERE",
|
||||
"SVBSTITVE"
|
||||
]]
|
||||
|
||||
data_tokens = [
|
||||
|
||||
@@ -70,7 +70,7 @@ contexts:
|
||||
scope: constant.language.centvrion
|
||||
|
||||
builtins:
|
||||
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|SCRIBE|SEMEN|SENATVS)\b'
|
||||
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE)\b'
|
||||
scope: support.function.builtin.centvrion
|
||||
|
||||
modules:
|
||||
|
||||
20
tests.py
20
tests.py
@@ -620,6 +620,22 @@ builtin_tests = [
|
||||
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
|
||||
# QVAERE: dot matches any character
|
||||
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
|
||||
# SVBSTITVE: basic literal replacement
|
||||
('SVBSTITVE("a", "b", "aaa")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("aaa")]))]), ValStr("bbb")),
|
||||
# SVBSTITVE: regex character class
|
||||
('SVBSTITVE("[0-9]+", "N", "abc123def456")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("[0-9]+"), String("N"), String("abc123def456")]))]), ValStr("abcNdefN")),
|
||||
# SVBSTITVE: no match → string unchanged
|
||||
('SVBSTITVE("x", "y", "abc")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("x"), String("y"), String("abc")]))]), ValStr("abc")),
|
||||
# SVBSTITVE: empty replacement (deletion)
|
||||
('SVBSTITVE("a", "", "banana")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String(""), String("banana")]))]), ValStr("bnn")),
|
||||
# SVBSTITVE: empty text → empty string
|
||||
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
||||
# SVBSTITVE: dot matches any character
|
||||
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
||||
# SVBSTITVE: backreference swaps two groups
|
||||
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
|
||||
# SVBSTITVE: backreference with unmatched group (ignored)
|
||||
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
|
||||
]
|
||||
|
||||
class TestBuiltins(unittest.TestCase):
|
||||
@@ -704,6 +720,10 @@ error_tests = [
|
||||
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
|
||||
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
|
||||
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
|
||||
('SVBSTITVE(I, "b", "c")', CentvrionError), # SVBSTITVE requires strings, not int pattern
|
||||
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
||||
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
||||
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
||||
]
|
||||
|
||||
class TestErrors(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user