🐐 SVBSTITVE

This commit is contained in:
2026-04-22 09:30:58 +02:00
parent b9a1ed1bcc
commit 39218485c7
8 changed files with 136 additions and 2 deletions

View File

@@ -354,6 +354,11 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid. Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
### SVBSTITVE
`SVBSTITVE(pattern, replacement, string)`
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
## Modules ## Modules
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having

View File

@@ -1290,6 +1290,17 @@ class BuiltIn(Node):
except re.error as e: except re.error as e:
raise CentvrionError(f"Invalid regex: {e}") raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValList(matches) return vtable, ValList(matches)
case "SVBSTITVE":
pattern = params[0]
replacement = params[1]
text = params[2]
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
raise CentvrionError("SVBSTITVE requires three strings")
try:
result = re.sub(pattern.value(), replacement.value(), text.value())
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValStr(result)
case _: case _:
raise NotImplementedError(self.builtin) raise NotImplementedError(self.builtin)

View File

@@ -300,6 +300,9 @@ def _emit_builtin(node, ctx):
case "QVAERE": case "QVAERE":
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});") lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
case "SVBSTITVE":
lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});")
case _: case _:
raise NotImplementedError(node.builtin) raise NotImplementedError(node.builtin)

View File

@@ -902,6 +902,99 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
return result; return result;
} }
/* Expand replacement string, substituting \1..\9 with captured groups */
static void _expand_replacement(const char *repl, const char *subject,
regmatch_t *matches, int ngroups,
char **out, size_t *opos, size_t *ocap) {
for (const char *r = repl; *r; r++) {
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
int g = r[1] - '0';
r++;
if (g < ngroups && matches[g].rm_so != -1) {
size_t glen = matches[g].rm_eo - matches[g].rm_so;
while (*opos + glen + 1 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
*opos += glen;
}
} else if (*r == '\\' && r[1] == '\\') {
/* escaped backslash → literal \ */
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = '\\';
r++;
} else {
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = *r;
}
}
}
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text) {
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'SVBSTITVE' requires three strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));
regfree(&re);
cent_runtime_error(errbuf);
}
size_t text_len = strlen(text.sval);
size_t repl_len = strlen(replacement.sval);
size_t cap = text_len + repl_len * 4 + 1;
char *result = cent_arena_alloc(cent_arena, cap);
size_t rpos = 0;
const char *cursor = text.sval;
int ngroups = (int)re.re_nsub + 1;
if (ngroups > 10) ngroups = 10;
regmatch_t matches[10];
while (*cursor && regexec(&re, cursor, ngroups, matches, 0) == 0) {
/* copy text before match */
size_t prefix_len = matches[0].rm_so;
while (rpos + prefix_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, prefix_len);
rpos += prefix_len;
/* expand replacement with backreferences */
_expand_replacement(replacement.sval, cursor, matches, ngroups,
&result, &rpos, &cap);
cursor += matches[0].rm_eo;
if (matches[0].rm_eo == 0) cursor++;
}
/* copy remaining text */
size_t tail_len = strlen(cursor);
while (rpos + tail_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, tail_len);
rpos += tail_len;
result[rpos] = '\0';
regfree(&re);
return cent_str(result);
}
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* Initialisation */ /* Initialisation */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */

View File

@@ -233,6 +233,7 @@ CentValue cent_lege(CentValue path); /* LEGE */
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */ void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */ void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */ CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */
/* ------------------------------------------------------------------ */ /* ------------------------------------------------------------------ */
/* Array helpers */ /* Array helpers */

View File

@@ -58,7 +58,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
"LEGE", "LEGE",
"SCRIBE", "SCRIBE",
"ADIVNGE", "ADIVNGE",
"QVAERE" "QVAERE",
"SVBSTITVE"
]] ]]
data_tokens = [ data_tokens = [

View File

@@ -70,7 +70,7 @@ contexts:
scope: constant.language.centvrion scope: constant.language.centvrion
builtins: builtins:
- match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|SCRIBE|SEMEN|SENATVS)\b' - match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|CLAVES|DECIMATIO|DIC|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE)\b'
scope: support.function.builtin.centvrion scope: support.function.builtin.centvrion
modules: modules:

View File

@@ -620,6 +620,22 @@ builtin_tests = [
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])), ('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
# QVAERE: dot matches any character # QVAERE: dot matches any character
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])), ('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
# SVBSTITVE: basic literal replacement
('SVBSTITVE("a", "b", "aaa")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("aaa")]))]), ValStr("bbb")),
# SVBSTITVE: regex character class
('SVBSTITVE("[0-9]+", "N", "abc123def456")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("[0-9]+"), String("N"), String("abc123def456")]))]), ValStr("abcNdefN")),
# SVBSTITVE: no match → string unchanged
('SVBSTITVE("x", "y", "abc")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("x"), String("y"), String("abc")]))]), ValStr("abc")),
# SVBSTITVE: empty replacement (deletion)
('SVBSTITVE("a", "", "banana")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String(""), String("banana")]))]), ValStr("bnn")),
# SVBSTITVE: empty text → empty string
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
# SVBSTITVE: dot matches any character
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
# SVBSTITVE: backreference swaps two groups
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
# SVBSTITVE: backreference with unmatched group (ignored)
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
] ]
class TestBuiltins(unittest.TestCase): class TestBuiltins(unittest.TestCase):
@@ -704,6 +720,10 @@ error_tests = [
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int ('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int ('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex ('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
('SVBSTITVE(I, "b", "c")', CentvrionError), # SVBSTITVE requires strings, not int pattern
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
] ]
class TestErrors(unittest.TestCase): class TestErrors(unittest.TestCase):