🐐 Fixes

This commit is contained in:
2026-04-22 13:45:55 +02:00
parent f5b8986681
commit 5e2ebcdc9d
4 changed files with 295 additions and 32 deletions

View File

@@ -175,6 +175,89 @@ def make_string(val, magnvm=False, svbnvlla=False) -> str:
else:
raise CentvrionError(f"Cannot display {val!r}")
def _roman_backref(m):
try:
n = num_to_int(m.group(1), False)
except CentvrionError:
return m.group(0)
return f"\\{n}"
def _check_arabic_backref(s):
for i in range(len(s) - 1):
if s[i] == '\\' and s[i+1].isdigit():
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
def _romanize_replacement(s):
_check_arabic_backref(s)
return re.sub(r'\\([IVXLCDM]+)', _roman_backref, s)
def _convert_quantifier(inner):
parts = inner.split(',')
converted = []
for p in parts:
p = p.strip()
if p == '':
converted.append('')
else:
try:
converted.append(str(num_to_int(p, False)))
except CentvrionError:
return None
return '{' + ','.join(converted) + '}'
def _romanize_pattern(s):
result = []
i = 0
while i < len(s):
if s[i] == '\\' and i + 1 < len(s) and s[i+1] in 'IVXLCDM':
# backref: collect Roman numeral chars and convert
j = i + 1
while j < len(s) and s[j] in 'IVXLCDM':
j += 1
try:
n = num_to_int(s[i+1:j], False)
result.append(f'\\{n}')
except CentvrionError:
result.append(s[i:j])
i = j
elif s[i] == '\\' and i + 1 < len(s) and s[i+1].isdigit():
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
elif s[i] == '\\' and i + 1 < len(s):
result.append(s[i:i+2])
i += 2
elif s[i] == '[':
# skip character class
j = i + 1
if j < len(s) and s[j] == '^':
j += 1
if j < len(s) and s[j] == ']':
j += 1
while j < len(s) and s[j] != ']':
if s[j] == '\\' and j + 1 < len(s):
j += 1
j += 1
result.append(s[i:j+1])
i = j + 1
elif s[i] == '{':
j = s.find('}', i)
if j == -1:
result.append(s[i])
i += 1
else:
inner = s[i+1:j]
if re.match(r'^[\d,\s]+$', inner) and re.search(r'\d', inner):
raise CentvrionError(f"Invalid quantifier '{{{inner}}}' — use Roman numerals")
converted = _convert_quantifier(inner)
if converted is not None:
result.append(converted)
else:
result.append(s[i:j+1])
i = j + 1
else:
result.append(s[i])
i += 1
return ''.join(result)
FRAC_SYMBOLS = [("S", 6), (":", 2), (".", 1)]
def frac_to_fraction(s, magnvm=False, svbnvlla=False):
@@ -1328,7 +1411,7 @@ class BuiltIn(Node):
try:
matches = [
ValStr(m.group(0))
for m in re.finditer(pattern.value(), text.value())
for m in re.finditer(_romanize_pattern(pattern.value()), text.value())
]
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
@@ -1340,7 +1423,11 @@ class BuiltIn(Node):
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
raise CentvrionError("SVBSTITVE requires three strings")
try:
result = re.sub(pattern.value(), replacement.value(), text.value())
result = re.sub(
_romanize_pattern(pattern.value()),
_romanize_replacement(replacement.value()),
text.value()
)
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValStr(result)

View File

@@ -511,6 +511,14 @@ CentValue cent_eq(CentValue a, CentValue b) {
case CENT_BOOL: return cent_bool(a.bval == b.bval);
case CENT_FUNC: return cent_bool(a.fnval.fn == b.fnval.fn);
case CENT_NULL: return cent_bool(1);
case CENT_LIST: {
if (a.lval.len != b.lval.len) return cent_bool(0);
for (int i = 0; i < a.lval.len; i++) {
CentValue r = cent_eq(a.lval.items[i], b.lval.items[i]);
if (!r.bval) return cent_bool(0);
}
return cent_bool(1);
}
default:
cent_type_error("'EST' not supported for this type");
return cent_null();
@@ -920,11 +928,160 @@ CentValue cent_dict_keys(CentValue dict) {
/* Regex */
/* ------------------------------------------------------------------ */
static int _is_roman_char(char c) {
return c == 'I' || c == 'V' || c == 'X' || c == 'L'
|| c == 'C' || c == 'D' || c == 'M';
}
static void _ensure_cap(char **out, size_t *opos, size_t *ocap, size_t need) {
while (*opos + need + 1 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
}
/* Convert Roman numeral quantifiers in pattern: {III} → {3}, {II,V} → {2,5} */
static char *_romanize_pattern(const char *s) {
size_t slen = strlen(s);
size_t cap = slen * 2 + 1;
char *result = cent_arena_alloc(cent_arena, cap);
size_t rpos = 0;
for (size_t i = 0; i < slen; ) {
if (s[i] == '\\' && i + 1 < slen && _is_roman_char(s[i + 1])) {
/* backref: collect Roman numeral chars and convert */
size_t j = i + 1;
while (j < slen && _is_roman_char(s[j])) j++;
char buf[64];
size_t len = j - i - 1;
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
memcpy(buf, s + i + 1, len);
buf[len] = '\0';
long val = cent_roman_to_int(buf);
char numbuf[32];
snprintf(numbuf, sizeof(numbuf), "\\%ld", val);
size_t nlen = strlen(numbuf);
while (rpos + nlen >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
memcpy(result + rpos, numbuf, nlen);
rpos += nlen;
i = j;
} else if (s[i] == '\\' && i + 1 < slen && s[i + 1] >= '0' && s[i + 1] <= '9') {
char msg[128];
snprintf(msg, sizeof(msg),
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", s[i + 1]);
cent_runtime_error(msg);
} else if (s[i] == '\\' && i + 1 < slen) {
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
result[rpos++] = s[i++];
} else if (s[i] == '[') {
/* copy character class verbatim */
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
if (i < slen && s[i] == '^') { result[rpos++] = s[i++]; }
if (i < slen && s[i] == ']') { result[rpos++] = s[i++]; }
while (i < slen && s[i] != ']') {
if (s[i] == '\\' && i + 1 < slen) {
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
}
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
}
if (i < slen) { if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; } result[rpos++] = s[i++]; }
} else if (s[i] == '{') {
/* find closing brace */
size_t j = i + 1;
while (j < slen && s[j] != '}') j++;
if (j >= slen) {
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
} else {
/* extract inner content and try to convert */
size_t inner_len = j - i - 1;
char inner[128];
if (inner_len >= sizeof(inner)) inner_len = sizeof(inner) - 1;
memcpy(inner, s + i + 1, inner_len);
inner[inner_len] = '\0';
/* reject Arabic digit quantifiers */
int has_digit = 0, all_digit_comma_space = 1;
for (size_t k = 0; k < inner_len; k++) {
if (inner[k] >= '0' && inner[k] <= '9') has_digit = 1;
else if (inner[k] != ',' && inner[k] != ' ') all_digit_comma_space = 0;
}
if (has_digit && all_digit_comma_space) {
char msg[192];
snprintf(msg, sizeof(msg), "Invalid quantifier '{%s}' — use Roman numerals", inner);
cent_runtime_error(msg);
}
/* convert comma-separated Roman parts */
char converted[128];
size_t cpos = 0;
converted[0] = '\0';
int ok = 1;
char *part = inner;
while (ok) {
char *comma = strchr(part, ',');
if (comma) *comma = '\0';
/* trim spaces */
while (*part == ' ') part++;
char *pend = part + strlen(part) - 1;
while (pend > part && *pend == ' ') *pend-- = '\0';
if (*part == '\0') {
/* empty part (e.g. {,V}) */
} else {
/* check all chars are Roman */
int all_roman = 1;
for (char *c = part; *c; c++) { if (!_is_roman_char(*c)) { all_roman = 0; break; } }
if (!all_roman) { ok = 0; break; }
long val = cent_roman_to_int(part);
char numbuf[32];
snprintf(numbuf, sizeof(numbuf), "%ld", val);
size_t nlen = strlen(numbuf);
if (cpos + nlen >= sizeof(converted)) { ok = 0; break; }
memcpy(converted + cpos, numbuf, nlen);
cpos += nlen;
}
if (comma) {
if (cpos + 1 >= sizeof(converted)) { ok = 0; break; }
converted[cpos++] = ',';
part = comma + 1;
} else {
break;
}
}
converted[cpos] = '\0';
if (ok) {
size_t need = cpos + 2;
while (rpos + need >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = '{';
memcpy(result + rpos, converted, cpos);
rpos += cpos;
result[rpos++] = '}';
} else {
/* not valid Roman — copy verbatim */
size_t chunk = j - i + 1;
while (rpos + chunk >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
memcpy(result + rpos, s + i, chunk);
rpos += chunk;
}
i = j + 1;
}
} else {
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
result[rpos++] = s[i++];
}
}
result[rpos] = '\0';
return result;
}
CentValue cent_qvaere(CentValue pattern, CentValue text) {
if (pattern.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'QVAERE' requires two strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));
@@ -947,42 +1104,39 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
return result;
}
/* Expand replacement string, substituting \1..\9 with captured groups */
/* Expand replacement string, substituting \I..\IX with captured groups */
static void _expand_replacement(const char *repl, const char *subject,
regmatch_t *matches, int ngroups,
char **out, size_t *opos, size_t *ocap) {
for (const char *r = repl; *r; r++) {
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
int g = r[1] - '0';
r++;
if (*r == '\\' && _is_roman_char(r[1])) {
const char *start = r + 1;
const char *end = start;
while (_is_roman_char(*end)) end++;
char buf[64];
size_t len = (size_t)(end - start);
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
memcpy(buf, start, len);
buf[len] = '\0';
int g = (int)cent_roman_to_int(buf);
r = end - 1;
if (g < ngroups && matches[g].rm_so != -1) {
size_t glen = matches[g].rm_eo - matches[g].rm_so;
while (*opos + glen + 1 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
_ensure_cap(out, opos, ocap, glen);
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
*opos += glen;
}
} else if (*r == '\\' && r[1] >= '0' && r[1] <= '9') {
char msg[128];
snprintf(msg, sizeof(msg),
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", r[1]);
cent_runtime_error(msg);
} else if (*r == '\\' && r[1] == '\\') {
/* escaped backslash → literal \ */
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
_ensure_cap(out, opos, ocap, 1);
(*out)[(*opos)++] = '\\';
r++;
} else {
if (*opos + 2 > *ocap) {
*ocap *= 2;
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
memcpy(newbuf, *out, *opos);
*out = newbuf;
}
_ensure_cap(out, opos, ocap, 1);
(*out)[(*opos)++] = *r;
}
}
@@ -992,7 +1146,7 @@ CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue tex
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'SVBSTITVE' requires three strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));