🐐 Fixes
This commit is contained in:
@@ -357,12 +357,12 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
|
|||||||
### QVAERE
|
### QVAERE
|
||||||
`QVAERE(pattern, string)`
|
`QVAERE(pattern, string)`
|
||||||
|
|
||||||
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax with Roman numeral quantifiers (`{III}` for exactly 3, `{II,V}` for 2–5, `{III,}` for 3 or more). Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
### SVBSTITVE
|
### SVBSTITVE
|
||||||
`SVBSTITVE(pattern, replacement, string)`
|
`SVBSTITVE(pattern, replacement, string)`
|
||||||
|
|
||||||
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\I`, `\II`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
### SCINDE
|
### SCINDE
|
||||||
`SCINDE(string, delimiter)`
|
`SCINDE(string, delimiter)`
|
||||||
|
|||||||
@@ -175,6 +175,89 @@ def make_string(val, magnvm=False, svbnvlla=False) -> str:
|
|||||||
else:
|
else:
|
||||||
raise CentvrionError(f"Cannot display {val!r}")
|
raise CentvrionError(f"Cannot display {val!r}")
|
||||||
|
|
||||||
|
def _roman_backref(m):
|
||||||
|
try:
|
||||||
|
n = num_to_int(m.group(1), False)
|
||||||
|
except CentvrionError:
|
||||||
|
return m.group(0)
|
||||||
|
return f"\\{n}"
|
||||||
|
|
||||||
|
def _check_arabic_backref(s):
|
||||||
|
for i in range(len(s) - 1):
|
||||||
|
if s[i] == '\\' and s[i+1].isdigit():
|
||||||
|
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
|
||||||
|
|
||||||
|
def _romanize_replacement(s):
|
||||||
|
_check_arabic_backref(s)
|
||||||
|
return re.sub(r'\\([IVXLCDM]+)', _roman_backref, s)
|
||||||
|
|
||||||
|
def _convert_quantifier(inner):
|
||||||
|
parts = inner.split(',')
|
||||||
|
converted = []
|
||||||
|
for p in parts:
|
||||||
|
p = p.strip()
|
||||||
|
if p == '':
|
||||||
|
converted.append('')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
converted.append(str(num_to_int(p, False)))
|
||||||
|
except CentvrionError:
|
||||||
|
return None
|
||||||
|
return '{' + ','.join(converted) + '}'
|
||||||
|
|
||||||
|
def _romanize_pattern(s):
|
||||||
|
result = []
|
||||||
|
i = 0
|
||||||
|
while i < len(s):
|
||||||
|
if s[i] == '\\' and i + 1 < len(s) and s[i+1] in 'IVXLCDM':
|
||||||
|
# backref: collect Roman numeral chars and convert
|
||||||
|
j = i + 1
|
||||||
|
while j < len(s) and s[j] in 'IVXLCDM':
|
||||||
|
j += 1
|
||||||
|
try:
|
||||||
|
n = num_to_int(s[i+1:j], False)
|
||||||
|
result.append(f'\\{n}')
|
||||||
|
except CentvrionError:
|
||||||
|
result.append(s[i:j])
|
||||||
|
i = j
|
||||||
|
elif s[i] == '\\' and i + 1 < len(s) and s[i+1].isdigit():
|
||||||
|
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
|
||||||
|
elif s[i] == '\\' and i + 1 < len(s):
|
||||||
|
result.append(s[i:i+2])
|
||||||
|
i += 2
|
||||||
|
elif s[i] == '[':
|
||||||
|
# skip character class
|
||||||
|
j = i + 1
|
||||||
|
if j < len(s) and s[j] == '^':
|
||||||
|
j += 1
|
||||||
|
if j < len(s) and s[j] == ']':
|
||||||
|
j += 1
|
||||||
|
while j < len(s) and s[j] != ']':
|
||||||
|
if s[j] == '\\' and j + 1 < len(s):
|
||||||
|
j += 1
|
||||||
|
j += 1
|
||||||
|
result.append(s[i:j+1])
|
||||||
|
i = j + 1
|
||||||
|
elif s[i] == '{':
|
||||||
|
j = s.find('}', i)
|
||||||
|
if j == -1:
|
||||||
|
result.append(s[i])
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
inner = s[i+1:j]
|
||||||
|
if re.match(r'^[\d,\s]+$', inner) and re.search(r'\d', inner):
|
||||||
|
raise CentvrionError(f"Invalid quantifier '{{{inner}}}' — use Roman numerals")
|
||||||
|
converted = _convert_quantifier(inner)
|
||||||
|
if converted is not None:
|
||||||
|
result.append(converted)
|
||||||
|
else:
|
||||||
|
result.append(s[i:j+1])
|
||||||
|
i = j + 1
|
||||||
|
else:
|
||||||
|
result.append(s[i])
|
||||||
|
i += 1
|
||||||
|
return ''.join(result)
|
||||||
|
|
||||||
FRAC_SYMBOLS = [("S", 6), (":", 2), (".", 1)]
|
FRAC_SYMBOLS = [("S", 6), (":", 2), (".", 1)]
|
||||||
|
|
||||||
def frac_to_fraction(s, magnvm=False, svbnvlla=False):
|
def frac_to_fraction(s, magnvm=False, svbnvlla=False):
|
||||||
@@ -1328,7 +1411,7 @@ class BuiltIn(Node):
|
|||||||
try:
|
try:
|
||||||
matches = [
|
matches = [
|
||||||
ValStr(m.group(0))
|
ValStr(m.group(0))
|
||||||
for m in re.finditer(pattern.value(), text.value())
|
for m in re.finditer(_romanize_pattern(pattern.value()), text.value())
|
||||||
]
|
]
|
||||||
except re.error as e:
|
except re.error as e:
|
||||||
raise CentvrionError(f"Invalid regex: {e}")
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
@@ -1340,7 +1423,11 @@ class BuiltIn(Node):
|
|||||||
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
||||||
raise CentvrionError("SVBSTITVE requires three strings")
|
raise CentvrionError("SVBSTITVE requires three strings")
|
||||||
try:
|
try:
|
||||||
result = re.sub(pattern.value(), replacement.value(), text.value())
|
result = re.sub(
|
||||||
|
_romanize_pattern(pattern.value()),
|
||||||
|
_romanize_replacement(replacement.value()),
|
||||||
|
text.value()
|
||||||
|
)
|
||||||
except re.error as e:
|
except re.error as e:
|
||||||
raise CentvrionError(f"Invalid regex: {e}")
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
return vtable, ValStr(result)
|
return vtable, ValStr(result)
|
||||||
|
|||||||
@@ -511,6 +511,14 @@ CentValue cent_eq(CentValue a, CentValue b) {
|
|||||||
case CENT_BOOL: return cent_bool(a.bval == b.bval);
|
case CENT_BOOL: return cent_bool(a.bval == b.bval);
|
||||||
case CENT_FUNC: return cent_bool(a.fnval.fn == b.fnval.fn);
|
case CENT_FUNC: return cent_bool(a.fnval.fn == b.fnval.fn);
|
||||||
case CENT_NULL: return cent_bool(1);
|
case CENT_NULL: return cent_bool(1);
|
||||||
|
case CENT_LIST: {
|
||||||
|
if (a.lval.len != b.lval.len) return cent_bool(0);
|
||||||
|
for (int i = 0; i < a.lval.len; i++) {
|
||||||
|
CentValue r = cent_eq(a.lval.items[i], b.lval.items[i]);
|
||||||
|
if (!r.bval) return cent_bool(0);
|
||||||
|
}
|
||||||
|
return cent_bool(1);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
cent_type_error("'EST' not supported for this type");
|
cent_type_error("'EST' not supported for this type");
|
||||||
return cent_null();
|
return cent_null();
|
||||||
@@ -920,11 +928,160 @@ CentValue cent_dict_keys(CentValue dict) {
|
|||||||
/* Regex */
|
/* Regex */
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
static int _is_roman_char(char c) {
|
||||||
|
return c == 'I' || c == 'V' || c == 'X' || c == 'L'
|
||||||
|
|| c == 'C' || c == 'D' || c == 'M';
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _ensure_cap(char **out, size_t *opos, size_t *ocap, size_t need) {
|
||||||
|
while (*opos + need + 1 > *ocap) {
|
||||||
|
*ocap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||||
|
memcpy(newbuf, *out, *opos);
|
||||||
|
*out = newbuf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert Roman numeral quantifiers in pattern: {III} → {3}, {II,V} → {2,5} */
|
||||||
|
static char *_romanize_pattern(const char *s) {
|
||||||
|
size_t slen = strlen(s);
|
||||||
|
size_t cap = slen * 2 + 1;
|
||||||
|
char *result = cent_arena_alloc(cent_arena, cap);
|
||||||
|
size_t rpos = 0;
|
||||||
|
for (size_t i = 0; i < slen; ) {
|
||||||
|
if (s[i] == '\\' && i + 1 < slen && _is_roman_char(s[i + 1])) {
|
||||||
|
/* backref: collect Roman numeral chars and convert */
|
||||||
|
size_t j = i + 1;
|
||||||
|
while (j < slen && _is_roman_char(s[j])) j++;
|
||||||
|
char buf[64];
|
||||||
|
size_t len = j - i - 1;
|
||||||
|
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
|
||||||
|
memcpy(buf, s + i + 1, len);
|
||||||
|
buf[len] = '\0';
|
||||||
|
long val = cent_roman_to_int(buf);
|
||||||
|
char numbuf[32];
|
||||||
|
snprintf(numbuf, sizeof(numbuf), "\\%ld", val);
|
||||||
|
size_t nlen = strlen(numbuf);
|
||||||
|
while (rpos + nlen >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
memcpy(result + rpos, numbuf, nlen);
|
||||||
|
rpos += nlen;
|
||||||
|
i = j;
|
||||||
|
} else if (s[i] == '\\' && i + 1 < slen && s[i + 1] >= '0' && s[i + 1] <= '9') {
|
||||||
|
char msg[128];
|
||||||
|
snprintf(msg, sizeof(msg),
|
||||||
|
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", s[i + 1]);
|
||||||
|
cent_runtime_error(msg);
|
||||||
|
} else if (s[i] == '\\' && i + 1 < slen) {
|
||||||
|
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
} else if (s[i] == '[') {
|
||||||
|
/* copy character class verbatim */
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
if (i < slen && s[i] == '^') { result[rpos++] = s[i++]; }
|
||||||
|
if (i < slen && s[i] == ']') { result[rpos++] = s[i++]; }
|
||||||
|
while (i < slen && s[i] != ']') {
|
||||||
|
if (s[i] == '\\' && i + 1 < slen) {
|
||||||
|
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
if (i < slen) { if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; } result[rpos++] = s[i++]; }
|
||||||
|
} else if (s[i] == '{') {
|
||||||
|
/* find closing brace */
|
||||||
|
size_t j = i + 1;
|
||||||
|
while (j < slen && s[j] != '}') j++;
|
||||||
|
if (j >= slen) {
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
} else {
|
||||||
|
/* extract inner content and try to convert */
|
||||||
|
size_t inner_len = j - i - 1;
|
||||||
|
char inner[128];
|
||||||
|
if (inner_len >= sizeof(inner)) inner_len = sizeof(inner) - 1;
|
||||||
|
memcpy(inner, s + i + 1, inner_len);
|
||||||
|
inner[inner_len] = '\0';
|
||||||
|
/* reject Arabic digit quantifiers */
|
||||||
|
int has_digit = 0, all_digit_comma_space = 1;
|
||||||
|
for (size_t k = 0; k < inner_len; k++) {
|
||||||
|
if (inner[k] >= '0' && inner[k] <= '9') has_digit = 1;
|
||||||
|
else if (inner[k] != ',' && inner[k] != ' ') all_digit_comma_space = 0;
|
||||||
|
}
|
||||||
|
if (has_digit && all_digit_comma_space) {
|
||||||
|
char msg[192];
|
||||||
|
snprintf(msg, sizeof(msg), "Invalid quantifier '{%s}' — use Roman numerals", inner);
|
||||||
|
cent_runtime_error(msg);
|
||||||
|
}
|
||||||
|
/* convert comma-separated Roman parts */
|
||||||
|
char converted[128];
|
||||||
|
size_t cpos = 0;
|
||||||
|
converted[0] = '\0';
|
||||||
|
int ok = 1;
|
||||||
|
char *part = inner;
|
||||||
|
while (ok) {
|
||||||
|
char *comma = strchr(part, ',');
|
||||||
|
if (comma) *comma = '\0';
|
||||||
|
/* trim spaces */
|
||||||
|
while (*part == ' ') part++;
|
||||||
|
char *pend = part + strlen(part) - 1;
|
||||||
|
while (pend > part && *pend == ' ') *pend-- = '\0';
|
||||||
|
if (*part == '\0') {
|
||||||
|
/* empty part (e.g. {,V}) */
|
||||||
|
} else {
|
||||||
|
/* check all chars are Roman */
|
||||||
|
int all_roman = 1;
|
||||||
|
for (char *c = part; *c; c++) { if (!_is_roman_char(*c)) { all_roman = 0; break; } }
|
||||||
|
if (!all_roman) { ok = 0; break; }
|
||||||
|
long val = cent_roman_to_int(part);
|
||||||
|
char numbuf[32];
|
||||||
|
snprintf(numbuf, sizeof(numbuf), "%ld", val);
|
||||||
|
size_t nlen = strlen(numbuf);
|
||||||
|
if (cpos + nlen >= sizeof(converted)) { ok = 0; break; }
|
||||||
|
memcpy(converted + cpos, numbuf, nlen);
|
||||||
|
cpos += nlen;
|
||||||
|
}
|
||||||
|
if (comma) {
|
||||||
|
if (cpos + 1 >= sizeof(converted)) { ok = 0; break; }
|
||||||
|
converted[cpos++] = ',';
|
||||||
|
part = comma + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
converted[cpos] = '\0';
|
||||||
|
if (ok) {
|
||||||
|
size_t need = cpos + 2;
|
||||||
|
while (rpos + need >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = '{';
|
||||||
|
memcpy(result + rpos, converted, cpos);
|
||||||
|
rpos += cpos;
|
||||||
|
result[rpos++] = '}';
|
||||||
|
} else {
|
||||||
|
/* not valid Roman — copy verbatim */
|
||||||
|
size_t chunk = j - i + 1;
|
||||||
|
while (rpos + chunk >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
memcpy(result + rpos, s + i, chunk);
|
||||||
|
rpos += chunk;
|
||||||
|
}
|
||||||
|
i = j + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[rpos] = '\0';
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
||||||
if (pattern.type != CENT_STR || text.type != CENT_STR)
|
if (pattern.type != CENT_STR || text.type != CENT_STR)
|
||||||
cent_type_error("'QVAERE' requires two strings");
|
cent_type_error("'QVAERE' requires two strings");
|
||||||
regex_t re;
|
regex_t re;
|
||||||
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
|
||||||
if (rc != 0) {
|
if (rc != 0) {
|
||||||
char errbuf[256];
|
char errbuf[256];
|
||||||
regerror(rc, &re, errbuf, sizeof(errbuf));
|
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||||
@@ -947,42 +1104,39 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Expand replacement string, substituting \1..\9 with captured groups */
|
/* Expand replacement string, substituting \I..\IX with captured groups */
|
||||||
static void _expand_replacement(const char *repl, const char *subject,
|
static void _expand_replacement(const char *repl, const char *subject,
|
||||||
regmatch_t *matches, int ngroups,
|
regmatch_t *matches, int ngroups,
|
||||||
char **out, size_t *opos, size_t *ocap) {
|
char **out, size_t *opos, size_t *ocap) {
|
||||||
for (const char *r = repl; *r; r++) {
|
for (const char *r = repl; *r; r++) {
|
||||||
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
|
if (*r == '\\' && _is_roman_char(r[1])) {
|
||||||
int g = r[1] - '0';
|
const char *start = r + 1;
|
||||||
r++;
|
const char *end = start;
|
||||||
|
while (_is_roman_char(*end)) end++;
|
||||||
|
char buf[64];
|
||||||
|
size_t len = (size_t)(end - start);
|
||||||
|
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
|
||||||
|
memcpy(buf, start, len);
|
||||||
|
buf[len] = '\0';
|
||||||
|
int g = (int)cent_roman_to_int(buf);
|
||||||
|
r = end - 1;
|
||||||
if (g < ngroups && matches[g].rm_so != -1) {
|
if (g < ngroups && matches[g].rm_so != -1) {
|
||||||
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
||||||
while (*opos + glen + 1 > *ocap) {
|
_ensure_cap(out, opos, ocap, glen);
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
||||||
*opos += glen;
|
*opos += glen;
|
||||||
}
|
}
|
||||||
|
} else if (*r == '\\' && r[1] >= '0' && r[1] <= '9') {
|
||||||
|
char msg[128];
|
||||||
|
snprintf(msg, sizeof(msg),
|
||||||
|
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", r[1]);
|
||||||
|
cent_runtime_error(msg);
|
||||||
} else if (*r == '\\' && r[1] == '\\') {
|
} else if (*r == '\\' && r[1] == '\\') {
|
||||||
/* escaped backslash → literal \ */
|
_ensure_cap(out, opos, ocap, 1);
|
||||||
if (*opos + 2 > *ocap) {
|
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
(*out)[(*opos)++] = '\\';
|
(*out)[(*opos)++] = '\\';
|
||||||
r++;
|
r++;
|
||||||
} else {
|
} else {
|
||||||
if (*opos + 2 > *ocap) {
|
_ensure_cap(out, opos, ocap, 1);
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
(*out)[(*opos)++] = *r;
|
(*out)[(*opos)++] = *r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -992,7 +1146,7 @@ CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue tex
|
|||||||
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
||||||
cent_type_error("'SVBSTITVE' requires three strings");
|
cent_type_error("'SVBSTITVE' requires three strings");
|
||||||
regex_t re;
|
regex_t re;
|
||||||
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
|
||||||
if (rc != 0) {
|
if (rc != 0) {
|
||||||
char errbuf[256];
|
char errbuf[256];
|
||||||
regerror(rc, &re, errbuf, sizeof(errbuf));
|
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||||
|
|||||||
28
tests.py
28
tests.py
@@ -634,10 +634,22 @@ builtin_tests = [
|
|||||||
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
||||||
# SVBSTITVE: dot matches any character
|
# SVBSTITVE: dot matches any character
|
||||||
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
||||||
# SVBSTITVE: backreference swaps two groups
|
# SVBSTITVE: backreference swaps two groups (Roman numerals)
|
||||||
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
|
('SVBSTITVE("(a)(b)", "\\II\\I", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\II\\I"), String("ab")]))]), ValStr("ba")),
|
||||||
# SVBSTITVE: backreference with unmatched group (ignored)
|
# SVBSTITVE: backreference with unmatched group (ignored)
|
||||||
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
|
('SVBSTITVE("(a)(b)?", "\\I\\II", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\I\\II"), String("a")]))]), ValStr("a")),
|
||||||
|
# SVBSTITVE: Roman numeral quantifier in pattern
|
||||||
|
("SVBSTITVE('a{III}', 'x', 'aaa')", Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a{III}"), String("x"), String("aaa")]))]), ValStr("x")),
|
||||||
|
# QVAERE: Roman numeral quantifier — exact repetition
|
||||||
|
("QVAERE('a{III}', 'aaaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{III}"), String("aaaa")]))]), ValList([ValStr("aaa")])),
|
||||||
|
# QVAERE: Roman numeral quantifier — range
|
||||||
|
("QVAERE('a{II,III}', 'aaaaaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{II,III}"), String("aaaaaa")]))]), ValList([ValStr("aaa"), ValStr("aaa")])),
|
||||||
|
# QVAERE: Roman numeral quantifier — at-least
|
||||||
|
("QVAERE('a{II,}', 'a aa aaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{II,}"), String("a aa aaa")]))]), ValList([ValStr("aa"), ValStr("aaa")])),
|
||||||
|
# QVAERE: pattern backreference — repeated character
|
||||||
|
("QVAERE('(.)\\I', 'aabcdd')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("(.)\\I"), String("aabcdd")]))]), ValList([ValStr("aa"), ValStr("dd")])),
|
||||||
|
# QVAERE: pattern backreference — repeated group
|
||||||
|
("QVAERE('(..)\\I', 'ababcc')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("(..)\\I"), String("ababcc")]))]), ValList([ValStr("abab")])),
|
||||||
# NVMERVS: basic conversion
|
# NVMERVS: basic conversion
|
||||||
('NVMERVS("XIV")', Program([], [ExpressionStatement(BuiltIn("NVMERVS", [String("XIV")]))]), ValInt(14)),
|
('NVMERVS("XIV")', Program([], [ExpressionStatement(BuiltIn("NVMERVS", [String("XIV")]))]), ValInt(14)),
|
||||||
# NVMERVS: simple single numeral
|
# NVMERVS: simple single numeral
|
||||||
@@ -751,6 +763,9 @@ error_tests = [
|
|||||||
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
||||||
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
||||||
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
||||||
|
("SVBSTITVE('(a)', '\\1', 'a')", CentvrionError), # Arabic backref in replacement
|
||||||
|
("QVAERE('(.)\\1', 'aa')", CentvrionError), # Arabic backref in pattern
|
||||||
|
("QVAERE('a{3}', 'aaa')", CentvrionError), # Arabic quantifier in pattern
|
||||||
('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int
|
('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int
|
||||||
('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter
|
('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter
|
||||||
('PETE("http://example.com")', CentvrionError), # RETE required for PETE
|
('PETE("http://example.com")', CentvrionError), # RETE required for PETE
|
||||||
@@ -1222,6 +1237,13 @@ comparison_tests = [
|
|||||||
# non-zero integer does not equal NVLLVS
|
# non-zero integer does not equal NVLLVS
|
||||||
("I EST NVLLVS", Program([], [ExpressionStatement(BinOp(Numeral("I"), Nullus(), "KEYWORD_EST"))]), ValBool(False)),
|
("I EST NVLLVS", Program([], [ExpressionStatement(BinOp(Numeral("I"), Nullus(), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
("NVLLVS DISPAR I", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("I"), "KEYWORD_DISPAR"))]), ValBool(True)),
|
("NVLLVS DISPAR I", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("I"), "KEYWORD_DISPAR"))]), ValBool(True)),
|
||||||
|
# EST / DISPAR on arrays
|
||||||
|
("[I, II] EST [I, II]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II")]), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("[I, II] EST [I, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("III")]), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
|
("[I, II] EST [I, II, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II"), Numeral("III")]), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
|
("[] EST []", Program([], [ExpressionStatement(BinOp(DataArray([]), DataArray([]), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("[I, II] DISPAR [I, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("III")]), "KEYWORD_DISPAR"))]), ValBool(True)),
|
||||||
|
("[I, II] DISPAR [I, II]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II")]), "KEYWORD_DISPAR"))]), ValBool(False)),
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestComparisons(unittest.TestCase):
|
class TestComparisons(unittest.TestCase):
|
||||||
|
|||||||
Reference in New Issue
Block a user