🐐 SVBSTITVE

This commit is contained in:
2026-04-22 09:30:58 +02:00
parent b9a1ed1bcc
commit 39218485c7
8 changed files with 136 additions and 2 deletions

View File

@@ -354,6 +354,11 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
### SVBSTITVE
`SVBSTITVE(pattern, replacement, string)`
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
## Modules
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having

View File

@@ -1290,6 +1290,17 @@ class BuiltIn(Node):
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValList(matches)
case "SVBSTITVE":
pattern = params[0]
replacement = params[1]
text = params[2]
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
raise CentvrionError("SVBSTITVE requires three strings")
try:
result = re.sub(pattern.value(), replacement.value(), text.value())
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValStr(result)
case _:
raise NotImplementedError(self.builtin)

View File

@@ -300,6 +300,9 @@ def _emit_builtin(node, ctx):
case "QVAERE":
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
case "SVBSTITVE":
lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});")
case _:
raise NotImplementedError(node.builtin)

View File

@@ -902,6 +902,99 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
return result;
}
/* Expand replacement string, substituting \1..\9 with captured groups */
static void _expand_replacement(const char *repl, const char *subject,
regmatch_t *matches, int ngroups,
char **out, size_t *opos, size_t *ocap) {
for (const char *r = repl; *r; r++) {
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
int g = r[1] - '0';
r++;
if (g < ngroups && matches[g].rm_so != -1) {
size_t glen = matches[g].rm_eo - matches[g].rm_so;
while (*opos + glen + 1 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
*opos += glen;
}
} else if (*r == '\\' && r[1] == '\\') {
/* escaped backslash → literal \ */
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = '\\';
r++;
} else {
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = *r;
}
}
}
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text) {
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'SVBSTITVE' requires three strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));
regfree(&re);
cent_runtime_error(errbuf);
}
size_t text_len = strlen(text.sval);
size_t repl_len = strlen(replacement.sval);
size_t cap = text_len + repl_len * 4 + 1;
char *result = cent_arena_alloc(cent_arena, cap);
size_t rpos = 0;
const char *cursor = text.sval;
int ngroups = (int)re.re_nsub + 1;
if (ngroups > 10) ngroups = 10;
regmatch_t matches[10];
while (*cursor && regexec(&re, cursor, ngroups, matches, 0) == 0) {
/* copy text before match */
size_t prefix_len = matches[0].rm_so;
while (rpos + prefix_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, prefix_len);
rpos += prefix_len;
/* expand replacement with backreferences */
_expand_replacement(replacement.sval, cursor, matches, ngroups,
&result, &rpos, &cap);
cursor += matches[0].rm_eo;
if (matches[0].rm_eo == 0) cursor++;
}
/* copy remaining text */
size_t tail_len = strlen(cursor);
while (rpos + tail_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, tail_len);
rpos += tail_len;
result[rpos] = '\0';
regfree(&re);
return cent_str(result);
}
/* ------------------------------------------------------------------ */
/* Initialisation */
/* ------------------------------------------------------------------ */

View File

@@ -233,6 +233,7 @@ CentValue cent_lege(CentValue path); /* LEGE */
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */
/* ------------------------------------------------------------------ */
/* Array helpers */

View File

@@ -58,7 +58,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
"LEGE",
"SCRIBE",
"ADIVNGE",
"QVAERE"
"QVAERE",
"SVBSTITVE"
]]
data_tokens = [

View File

@@ -70,7 +70,7 @@ contexts:
scope: constant.language.centvrion
builtins:
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|SCRIBE|SEMEN|SENATVS)\b'
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE)\b'
scope: support.function.builtin.centvrion
modules:

View File

@@ -620,6 +620,22 @@ builtin_tests = [
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
# QVAERE: dot matches any character
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
# SVBSTITVE: basic literal replacement
('SVBSTITVE("a", "b", "aaa")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("aaa")]))]), ValStr("bbb")),
# SVBSTITVE: regex character class
('SVBSTITVE("[0-9]+", "N", "abc123def456")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("[0-9]+"), String("N"), String("abc123def456")]))]), ValStr("abcNdefN")),
# SVBSTITVE: no match → string unchanged
('SVBSTITVE("x", "y", "abc")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("x"), String("y"), String("abc")]))]), ValStr("abc")),
# SVBSTITVE: empty replacement (deletion)
('SVBSTITVE("a", "", "banana")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String(""), String("banana")]))]), ValStr("bnn")),
# SVBSTITVE: empty text → empty string
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
# SVBSTITVE: dot matches any character
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
# SVBSTITVE: backreference swaps two groups
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
# SVBSTITVE: backreference with unmatched group (ignored)
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
]
class TestBuiltins(unittest.TestCase):
@@ -704,6 +720,10 @@ error_tests = [
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
('SVBSTITVE(I, "b", "c")', CentvrionError), # SVBSTITVE requires strings, not int pattern
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
]
class TestErrors(unittest.TestCase):