🐐 SVBSTITVE

This commit is contained in:
2026-04-22 09:30:58 +02:00
parent b9a1ed1bcc
commit 39218485c7
8 changed files with 136 additions and 2 deletions

View File

@@ -1290,6 +1290,17 @@ class BuiltIn(Node):
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValList(matches)
case "SVBSTITVE":
pattern = params[0]
replacement = params[1]
text = params[2]
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
raise CentvrionError("SVBSTITVE requires three strings")
try:
result = re.sub(pattern.value(), replacement.value(), text.value())
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValStr(result)
case _:
raise NotImplementedError(self.builtin)

View File

@@ -300,6 +300,9 @@ def _emit_builtin(node, ctx):
case "QVAERE":
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
case "SVBSTITVE":
lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});")
case _:
raise NotImplementedError(node.builtin)

View File

@@ -902,6 +902,99 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
return result;
}
/* Expand replacement string, substituting \1..\9 with captured groups */
static void _expand_replacement(const char *repl, const char *subject,
regmatch_t *matches, int ngroups,
char **out, size_t *opos, size_t *ocap) {
for (const char *r = repl; *r; r++) {
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
int g = r[1] - '0';
r++;
if (g < ngroups && matches[g].rm_so != -1) {
size_t glen = matches[g].rm_eo - matches[g].rm_so;
while (*opos + glen + 1 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
*opos += glen;
}
} else if (*r == '\\' && r[1] == '\\') {
/* escaped backslash → literal \ */
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = '\\';
r++;
} else {
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
(*out)[(*opos)++] = *r;
}
}
}
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text) {
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'SVBSTITVE' requires three strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));
regfree(&re);
cent_runtime_error(errbuf);
}
size_t text_len = strlen(text.sval);
size_t repl_len = strlen(replacement.sval);
size_t cap = text_len + repl_len * 4 + 1;
char *result = cent_arena_alloc(cent_arena, cap);
size_t rpos = 0;
const char *cursor = text.sval;
int ngroups = (int)re.re_nsub + 1;
if (ngroups > 10) ngroups = 10;
regmatch_t matches[10];
while (*cursor && regexec(&re, cursor, ngroups, matches, 0) == 0) {
/* copy text before match */
size_t prefix_len = matches[0].rm_so;
while (rpos + prefix_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, prefix_len);
rpos += prefix_len;
/* expand replacement with backreferences */
_expand_replacement(replacement.sval, cursor, matches, ngroups,
&result, &rpos, &cap);
cursor += matches[0].rm_eo;
if (matches[0].rm_eo == 0) cursor++;
}
/* copy remaining text */
size_t tail_len = strlen(cursor);
while (rpos + tail_len + 1 > cap) {
cap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, cap);
memcpy(newbuf, result, rpos);
result = newbuf;
}
memcpy(result + rpos, cursor, tail_len);
rpos += tail_len;
result[rpos] = '\0';
regfree(&re);
return cent_str(result);
}
/* ------------------------------------------------------------------ */
/* Initialisation */
/* ------------------------------------------------------------------ */

View File

@@ -233,6 +233,7 @@ CentValue cent_lege(CentValue path); /* LEGE */
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */
/* ------------------------------------------------------------------ */
/* Array helpers */

View File

@@ -58,7 +58,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
"LEGE",
"SCRIBE",
"ADIVNGE",
"QVAERE"
"QVAERE",
"SVBSTITVE"
]]
data_tokens = [