Compare commits
6 Commits
791ed2491e
...
60b45869fb
| Author | SHA1 | Date | |
|---|---|---|---|
| 60b45869fb | |||
| 461bfbbdc5 | |||
| 5e2ebcdc9d | |||
| f5b8986681 | |||
| 24b187c23c | |||
| c55a63f46c |
@@ -357,12 +357,12 @@ Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as
|
|||||||
### QVAERE
|
### QVAERE
|
||||||
`QVAERE(pattern, string)`
|
`QVAERE(pattern, string)`
|
||||||
|
|
||||||
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax with Roman numeral quantifiers (`{III}` for exactly 3, `{II,V}` for 2–5, `{III,}` for 3 or more). Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
### SVBSTITVE
|
### SVBSTITVE
|
||||||
`SVBSTITVE(pattern, replacement, string)`
|
`SVBSTITVE(pattern, replacement, string)`
|
||||||
|
|
||||||
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\I`, `\II`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid.
|
||||||
|
|
||||||
### SCINDE
|
### SCINDE
|
||||||
`SCINDE(string, delimiter)`
|
`SCINDE(string, delimiter)`
|
||||||
|
|||||||
2
cent
@@ -42,6 +42,8 @@ def main():
|
|||||||
pos = e.source_pos
|
pos = e.source_pos
|
||||||
char = program_text[pos.idx] if pos.idx < len(program_text) else "?"
|
char = program_text[pos.idx] if pos.idx < len(program_text) else "?"
|
||||||
sys.exit(f"CENTVRION error: Invalid character {char!r} at line {pos.lineno}, column {pos.colno}")
|
sys.exit(f"CENTVRION error: Invalid character {char!r} at line {pos.lineno}, column {pos.colno}")
|
||||||
|
except SyntaxError as e:
|
||||||
|
sys.exit(f"CENTVRION error: {e}")
|
||||||
|
|
||||||
if isinstance(program, Program):
|
if isinstance(program, Program):
|
||||||
if args["-i"]:
|
if args["-i"]:
|
||||||
|
|||||||
@@ -133,8 +133,11 @@ def int_to_num(n, m, s=False) -> str:
|
|||||||
for i in thousands_chars
|
for i in thousands_chars
|
||||||
])
|
])
|
||||||
|
|
||||||
return thousands + int_to_num(n % 1000, m, s)
|
remainder = n % 1000
|
||||||
|
return thousands + (int_to_num(remainder, m, s) if remainder else "")
|
||||||
else:
|
else:
|
||||||
|
if n == 0:
|
||||||
|
return "NVLLVS"
|
||||||
nums = []
|
nums = []
|
||||||
while n > 0:
|
while n > 0:
|
||||||
for num, i in list(NUMERALS.items())[::-1]:
|
for num, i in list(NUMERALS.items())[::-1]:
|
||||||
@@ -172,6 +175,89 @@ def make_string(val, magnvm=False, svbnvlla=False) -> str:
|
|||||||
else:
|
else:
|
||||||
raise CentvrionError(f"Cannot display {val!r}")
|
raise CentvrionError(f"Cannot display {val!r}")
|
||||||
|
|
||||||
|
def _roman_backref(m):
|
||||||
|
try:
|
||||||
|
n = num_to_int(m.group(1), False)
|
||||||
|
except CentvrionError:
|
||||||
|
return m.group(0)
|
||||||
|
return f"\\{n}"
|
||||||
|
|
||||||
|
def _check_arabic_backref(s):
|
||||||
|
for i in range(len(s) - 1):
|
||||||
|
if s[i] == '\\' and s[i+1].isdigit():
|
||||||
|
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
|
||||||
|
|
||||||
|
def _romanize_replacement(s):
|
||||||
|
_check_arabic_backref(s)
|
||||||
|
return re.sub(r'\\([IVXLCDM]+)', _roman_backref, s)
|
||||||
|
|
||||||
|
def _convert_quantifier(inner):
|
||||||
|
parts = inner.split(',')
|
||||||
|
converted = []
|
||||||
|
for p in parts:
|
||||||
|
p = p.strip()
|
||||||
|
if p == '':
|
||||||
|
converted.append('')
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
converted.append(str(num_to_int(p, False)))
|
||||||
|
except CentvrionError:
|
||||||
|
return None
|
||||||
|
return '{' + ','.join(converted) + '}'
|
||||||
|
|
||||||
|
def _romanize_pattern(s):
|
||||||
|
result = []
|
||||||
|
i = 0
|
||||||
|
while i < len(s):
|
||||||
|
if s[i] == '\\' and i + 1 < len(s) and s[i+1] in 'IVXLCDM':
|
||||||
|
# backref: collect Roman numeral chars and convert
|
||||||
|
j = i + 1
|
||||||
|
while j < len(s) and s[j] in 'IVXLCDM':
|
||||||
|
j += 1
|
||||||
|
try:
|
||||||
|
n = num_to_int(s[i+1:j], False)
|
||||||
|
result.append(f'\\{n}')
|
||||||
|
except CentvrionError:
|
||||||
|
result.append(s[i:j])
|
||||||
|
i = j
|
||||||
|
elif s[i] == '\\' and i + 1 < len(s) and s[i+1].isdigit():
|
||||||
|
raise CentvrionError(f"Invalid escape sequence '\\{s[i+1]}' — use Roman numerals for backreferences")
|
||||||
|
elif s[i] == '\\' and i + 1 < len(s):
|
||||||
|
result.append(s[i:i+2])
|
||||||
|
i += 2
|
||||||
|
elif s[i] == '[':
|
||||||
|
# skip character class
|
||||||
|
j = i + 1
|
||||||
|
if j < len(s) and s[j] == '^':
|
||||||
|
j += 1
|
||||||
|
if j < len(s) and s[j] == ']':
|
||||||
|
j += 1
|
||||||
|
while j < len(s) and s[j] != ']':
|
||||||
|
if s[j] == '\\' and j + 1 < len(s):
|
||||||
|
j += 1
|
||||||
|
j += 1
|
||||||
|
result.append(s[i:j+1])
|
||||||
|
i = j + 1
|
||||||
|
elif s[i] == '{':
|
||||||
|
j = s.find('}', i)
|
||||||
|
if j == -1:
|
||||||
|
result.append(s[i])
|
||||||
|
i += 1
|
||||||
|
else:
|
||||||
|
inner = s[i+1:j]
|
||||||
|
if re.match(r'^[\d,\s]+$', inner) and re.search(r'\d', inner):
|
||||||
|
raise CentvrionError(f"Invalid quantifier '{{{inner}}}' — use Roman numerals")
|
||||||
|
converted = _convert_quantifier(inner)
|
||||||
|
if converted is not None:
|
||||||
|
result.append(converted)
|
||||||
|
else:
|
||||||
|
result.append(s[i:j+1])
|
||||||
|
i = j + 1
|
||||||
|
else:
|
||||||
|
result.append(s[i])
|
||||||
|
i += 1
|
||||||
|
return ''.join(result)
|
||||||
|
|
||||||
FRAC_SYMBOLS = [("S", 6), (":", 2), (".", 1)]
|
FRAC_SYMBOLS = [("S", 6), (":", 2), (".", 1)]
|
||||||
|
|
||||||
def frac_to_fraction(s, magnvm=False, svbnvlla=False):
|
def frac_to_fraction(s, magnvm=False, svbnvlla=False):
|
||||||
@@ -720,6 +806,20 @@ class BinOp(Node):
|
|||||||
return f"({self.left.print()} {OP_STR[self.op]} {self.right.print()})"
|
return f"({self.left.print()} {OP_STR[self.op]} {self.right.print()})"
|
||||||
|
|
||||||
def _eval(self, vtable):
|
def _eval(self, vtable):
|
||||||
|
# Short-circuit for logical operators
|
||||||
|
if self.op == "KEYWORD_AVT":
|
||||||
|
vtable, left = self.left.eval(vtable)
|
||||||
|
if bool(left):
|
||||||
|
return vtable, ValBool(True)
|
||||||
|
vtable, right = self.right.eval(vtable)
|
||||||
|
return vtable, ValBool(bool(right))
|
||||||
|
if self.op == "KEYWORD_ET":
|
||||||
|
vtable, left = self.left.eval(vtable)
|
||||||
|
if not bool(left):
|
||||||
|
return vtable, ValBool(False)
|
||||||
|
vtable, right = self.right.eval(vtable)
|
||||||
|
return vtable, ValBool(bool(right))
|
||||||
|
|
||||||
vtable, left = self.left.eval(vtable)
|
vtable, left = self.left.eval(vtable)
|
||||||
vtable, right = self.right.eval(vtable)
|
vtable, right = self.right.eval(vtable)
|
||||||
lv, rv = left.value(), right.value()
|
lv, rv = left.value(), right.value()
|
||||||
@@ -779,13 +879,15 @@ class BinOp(Node):
|
|||||||
raise CentvrionError("Cannot compare strings or arrays with PLVS")
|
raise CentvrionError("Cannot compare strings or arrays with PLVS")
|
||||||
return vtable, ValBool((lv or 0) > (rv or 0))
|
return vtable, ValBool((lv or 0) > (rv or 0))
|
||||||
case "KEYWORD_EST":
|
case "KEYWORD_EST":
|
||||||
|
if ((isinstance(left, ValInt) and lv == 0 and isinstance(right, ValNul)) or
|
||||||
|
(isinstance(left, ValNul) and isinstance(right, ValInt) and rv == 0)):
|
||||||
|
return vtable, ValBool(True)
|
||||||
return vtable, ValBool(lv == rv)
|
return vtable, ValBool(lv == rv)
|
||||||
case "KEYWORD_DISPAR":
|
case "KEYWORD_DISPAR":
|
||||||
|
if ((isinstance(left, ValInt) and lv == 0 and isinstance(right, ValNul)) or
|
||||||
|
(isinstance(left, ValNul) and isinstance(right, ValInt) and rv == 0)):
|
||||||
|
return vtable, ValBool(False)
|
||||||
return vtable, ValBool(lv != rv)
|
return vtable, ValBool(lv != rv)
|
||||||
case "KEYWORD_ET":
|
|
||||||
return vtable, ValBool(bool(left) and bool(right))
|
|
||||||
case "KEYWORD_AVT":
|
|
||||||
return vtable, ValBool(bool(left) or bool(right))
|
|
||||||
case _:
|
case _:
|
||||||
raise Exception(self.op)
|
raise Exception(self.op)
|
||||||
|
|
||||||
@@ -1319,7 +1421,7 @@ class BuiltIn(Node):
|
|||||||
try:
|
try:
|
||||||
matches = [
|
matches = [
|
||||||
ValStr(m.group(0))
|
ValStr(m.group(0))
|
||||||
for m in re.finditer(pattern.value(), text.value())
|
for m in re.finditer(_romanize_pattern(pattern.value()), text.value())
|
||||||
]
|
]
|
||||||
except re.error as e:
|
except re.error as e:
|
||||||
raise CentvrionError(f"Invalid regex: {e}")
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
@@ -1331,7 +1433,11 @@ class BuiltIn(Node):
|
|||||||
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
if not isinstance(pattern, ValStr) or not isinstance(replacement, ValStr) or not isinstance(text, ValStr):
|
||||||
raise CentvrionError("SVBSTITVE requires three strings")
|
raise CentvrionError("SVBSTITVE requires three strings")
|
||||||
try:
|
try:
|
||||||
result = re.sub(pattern.value(), replacement.value(), text.value())
|
result = re.sub(
|
||||||
|
_romanize_pattern(pattern.value()),
|
||||||
|
_romanize_replacement(replacement.value()),
|
||||||
|
text.value()
|
||||||
|
)
|
||||||
except re.error as e:
|
except re.error as e:
|
||||||
raise CentvrionError(f"Invalid regex: {e}")
|
raise CentvrionError(f"Invalid regex: {e}")
|
||||||
return vtable, ValStr(result)
|
return vtable, ValStr(result)
|
||||||
|
|||||||
@@ -93,6 +93,25 @@ def emit_expr(node, ctx):
|
|||||||
return [f'CentValue {tmp} = cent_scope_get(&_scope, "{node.name}");'], tmp
|
return [f'CentValue {tmp} = cent_scope_get(&_scope, "{node.name}");'], tmp
|
||||||
|
|
||||||
if isinstance(node, BinOp):
|
if isinstance(node, BinOp):
|
||||||
|
# Short-circuit for logical operators
|
||||||
|
if node.op in ("KEYWORD_AVT", "KEYWORD_ET"):
|
||||||
|
l_lines, l_var = emit_expr(node.left, ctx)
|
||||||
|
r_lines, r_var = emit_expr(node.right, ctx)
|
||||||
|
tmp = ctx.fresh_tmp()
|
||||||
|
if node.op == "KEYWORD_AVT":
|
||||||
|
lines = l_lines + [f"CentValue {tmp};"]
|
||||||
|
lines += [f"if (cent_truthy({l_var})) {{ {tmp} = cent_bool(1); }} else {{"]
|
||||||
|
lines += [f" {l}" for l in r_lines]
|
||||||
|
lines += [f" {tmp} = cent_bool(cent_truthy({r_var}));"]
|
||||||
|
lines += ["}"]
|
||||||
|
else:
|
||||||
|
lines = l_lines + [f"CentValue {tmp};"]
|
||||||
|
lines += [f"if (!cent_truthy({l_var})) {{ {tmp} = cent_bool(0); }} else {{"]
|
||||||
|
lines += [f" {l}" for l in r_lines]
|
||||||
|
lines += [f" {tmp} = cent_bool(cent_truthy({r_var}));"]
|
||||||
|
lines += ["}"]
|
||||||
|
return lines, tmp
|
||||||
|
|
||||||
l_lines, l_var = emit_expr(node.left, ctx)
|
l_lines, l_var = emit_expr(node.left, ctx)
|
||||||
r_lines, r_var = emit_expr(node.right, ctx)
|
r_lines, r_var = emit_expr(node.right, ctx)
|
||||||
tmp = ctx.fresh_tmp()
|
tmp = ctx.fresh_tmp()
|
||||||
|
|||||||
@@ -177,8 +177,12 @@ static void transform_thousands(const char *src, char *dst, size_t dstsz) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void cent_int_to_roman(long n, char *buf, size_t bufsz) {
|
void cent_int_to_roman(long n, char *buf, size_t bufsz) {
|
||||||
if (n <= 0 || (n > 3999 && !cent_magnvm))
|
if (n == 0) {
|
||||||
cent_runtime_error("number out of range for Roman numerals (1-3999)");
|
if (bufsz > 6) { memcpy(buf, "NVLLVS", 6); buf[6] = '\0'; }
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (n < 0 || (n > 3999 && !cent_magnvm))
|
||||||
|
cent_runtime_error("number out of range for Roman numerals");
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
if (n > 3999) {
|
if (n > 3999) {
|
||||||
char base[64];
|
char base[64];
|
||||||
@@ -376,7 +380,9 @@ static long gcd(long a, long b) {
|
|||||||
static CentValue frac_reduce(long num, long den) {
|
static CentValue frac_reduce(long num, long den) {
|
||||||
if (den < 0) { num = -num; den = -den; }
|
if (den < 0) { num = -num; den = -den; }
|
||||||
long g = gcd(num < 0 ? -num : num, den);
|
long g = gcd(num < 0 ? -num : num, den);
|
||||||
return cent_frac(num / g, den / g);
|
num /= g; den /= g;
|
||||||
|
if (den == 1) return cent_int(num);
|
||||||
|
return cent_frac(num, den);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void to_frac(CentValue v, long *num, long *den) {
|
static void to_frac(CentValue v, long *num, long *den) {
|
||||||
@@ -441,11 +447,16 @@ CentValue cent_mul(CentValue a, CentValue b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CentValue cent_div(CentValue a, CentValue b) {
|
CentValue cent_div(CentValue a, CentValue b) {
|
||||||
|
if (a.type == CENT_NULL) a = cent_int(0);
|
||||||
|
if (b.type == CENT_NULL) b = cent_int(0);
|
||||||
if (a.type != CENT_INT || b.type != CENT_INT)
|
if (a.type != CENT_INT || b.type != CENT_INT)
|
||||||
cent_type_error("'/' requires two integers");
|
cent_type_error("'/' requires two integers");
|
||||||
if (b.ival == 0)
|
if (b.ival == 0)
|
||||||
cent_runtime_error("division by zero");
|
cent_runtime_error("division by zero");
|
||||||
return cent_int(a.ival / b.ival);
|
/* floored division (Python // semantics) */
|
||||||
|
long q = a.ival / b.ival;
|
||||||
|
if ((a.ival % b.ival != 0) && ((a.ival < 0) != (b.ival < 0))) q -= 1;
|
||||||
|
return cent_int(q);
|
||||||
}
|
}
|
||||||
|
|
||||||
CentValue cent_div_frac(CentValue a, CentValue b) {
|
CentValue cent_div_frac(CentValue a, CentValue b) {
|
||||||
@@ -456,11 +467,16 @@ CentValue cent_div_frac(CentValue a, CentValue b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CentValue cent_mod(CentValue a, CentValue b) {
|
CentValue cent_mod(CentValue a, CentValue b) {
|
||||||
|
if (a.type == CENT_NULL) a = cent_int(0);
|
||||||
|
if (b.type == CENT_NULL) b = cent_int(0);
|
||||||
if (a.type != CENT_INT || b.type != CENT_INT)
|
if (a.type != CENT_INT || b.type != CENT_INT)
|
||||||
cent_type_error("'RELIQVVM' requires two integers");
|
cent_type_error("'RELIQVVM' requires two integers");
|
||||||
if (b.ival == 0)
|
if (b.ival == 0)
|
||||||
cent_runtime_error("modulo by zero");
|
cent_runtime_error("modulo by zero");
|
||||||
return cent_int(a.ival % b.ival);
|
/* floored modulo (Python % semantics) */
|
||||||
|
long r = a.ival % b.ival;
|
||||||
|
if (r != 0 && ((a.ival < 0) != (b.ival < 0))) r += b.ival;
|
||||||
|
return cent_int(r);
|
||||||
}
|
}
|
||||||
|
|
||||||
CentValue cent_mod_frac(CentValue a, CentValue b) {
|
CentValue cent_mod_frac(CentValue a, CentValue b) {
|
||||||
@@ -480,6 +496,9 @@ CentValue cent_mod_frac(CentValue a, CentValue b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
CentValue cent_eq(CentValue a, CentValue b) {
|
CentValue cent_eq(CentValue a, CentValue b) {
|
||||||
|
if ((a.type == CENT_INT && a.ival == 0 && b.type == CENT_NULL) ||
|
||||||
|
(a.type == CENT_NULL && b.type == CENT_INT && b.ival == 0))
|
||||||
|
return cent_bool(1);
|
||||||
if ((a.type == CENT_INT || a.type == CENT_FRAC) &&
|
if ((a.type == CENT_INT || a.type == CENT_FRAC) &&
|
||||||
(b.type == CENT_INT || b.type == CENT_FRAC)) {
|
(b.type == CENT_INT || b.type == CENT_FRAC)) {
|
||||||
long an, ad, bn, bd;
|
long an, ad, bn, bd;
|
||||||
@@ -492,6 +511,14 @@ CentValue cent_eq(CentValue a, CentValue b) {
|
|||||||
case CENT_BOOL: return cent_bool(a.bval == b.bval);
|
case CENT_BOOL: return cent_bool(a.bval == b.bval);
|
||||||
case CENT_FUNC: return cent_bool(a.fnval.fn == b.fnval.fn);
|
case CENT_FUNC: return cent_bool(a.fnval.fn == b.fnval.fn);
|
||||||
case CENT_NULL: return cent_bool(1);
|
case CENT_NULL: return cent_bool(1);
|
||||||
|
case CENT_LIST: {
|
||||||
|
if (a.lval.len != b.lval.len) return cent_bool(0);
|
||||||
|
for (int i = 0; i < a.lval.len; i++) {
|
||||||
|
CentValue r = cent_eq(a.lval.items[i], b.lval.items[i]);
|
||||||
|
if (!r.bval) return cent_bool(0);
|
||||||
|
}
|
||||||
|
return cent_bool(1);
|
||||||
|
}
|
||||||
default:
|
default:
|
||||||
cent_type_error("'EST' not supported for this type");
|
cent_type_error("'EST' not supported for this type");
|
||||||
return cent_null();
|
return cent_null();
|
||||||
@@ -901,11 +928,160 @@ CentValue cent_dict_keys(CentValue dict) {
|
|||||||
/* Regex */
|
/* Regex */
|
||||||
/* ------------------------------------------------------------------ */
|
/* ------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
static int _is_roman_char(char c) {
|
||||||
|
return c == 'I' || c == 'V' || c == 'X' || c == 'L'
|
||||||
|
|| c == 'C' || c == 'D' || c == 'M';
|
||||||
|
}
|
||||||
|
|
||||||
|
static void _ensure_cap(char **out, size_t *opos, size_t *ocap, size_t need) {
|
||||||
|
while (*opos + need + 1 > *ocap) {
|
||||||
|
*ocap *= 2;
|
||||||
|
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
||||||
|
memcpy(newbuf, *out, *opos);
|
||||||
|
*out = newbuf;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert Roman numeral quantifiers in pattern: {III} → {3}, {II,V} → {2,5} */
|
||||||
|
static char *_romanize_pattern(const char *s) {
|
||||||
|
size_t slen = strlen(s);
|
||||||
|
size_t cap = slen * 2 + 1;
|
||||||
|
char *result = cent_arena_alloc(cent_arena, cap);
|
||||||
|
size_t rpos = 0;
|
||||||
|
for (size_t i = 0; i < slen; ) {
|
||||||
|
if (s[i] == '\\' && i + 1 < slen && _is_roman_char(s[i + 1])) {
|
||||||
|
/* backref: collect Roman numeral chars and convert */
|
||||||
|
size_t j = i + 1;
|
||||||
|
while (j < slen && _is_roman_char(s[j])) j++;
|
||||||
|
char buf[64];
|
||||||
|
size_t len = j - i - 1;
|
||||||
|
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
|
||||||
|
memcpy(buf, s + i + 1, len);
|
||||||
|
buf[len] = '\0';
|
||||||
|
long val = cent_roman_to_int(buf);
|
||||||
|
char numbuf[32];
|
||||||
|
snprintf(numbuf, sizeof(numbuf), "\\%ld", val);
|
||||||
|
size_t nlen = strlen(numbuf);
|
||||||
|
while (rpos + nlen >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
memcpy(result + rpos, numbuf, nlen);
|
||||||
|
rpos += nlen;
|
||||||
|
i = j;
|
||||||
|
} else if (s[i] == '\\' && i + 1 < slen && s[i + 1] >= '0' && s[i + 1] <= '9') {
|
||||||
|
char msg[128];
|
||||||
|
snprintf(msg, sizeof(msg),
|
||||||
|
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", s[i + 1]);
|
||||||
|
cent_runtime_error(msg);
|
||||||
|
} else if (s[i] == '\\' && i + 1 < slen) {
|
||||||
|
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
} else if (s[i] == '[') {
|
||||||
|
/* copy character class verbatim */
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
if (i < slen && s[i] == '^') { result[rpos++] = s[i++]; }
|
||||||
|
if (i < slen && s[i] == ']') { result[rpos++] = s[i++]; }
|
||||||
|
while (i < slen && s[i] != ']') {
|
||||||
|
if (s[i] == '\\' && i + 1 < slen) {
|
||||||
|
if (rpos + 2 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
if (i < slen) { if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; } result[rpos++] = s[i++]; }
|
||||||
|
} else if (s[i] == '{') {
|
||||||
|
/* find closing brace */
|
||||||
|
size_t j = i + 1;
|
||||||
|
while (j < slen && s[j] != '}') j++;
|
||||||
|
if (j >= slen) {
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
} else {
|
||||||
|
/* extract inner content and try to convert */
|
||||||
|
size_t inner_len = j - i - 1;
|
||||||
|
char inner[128];
|
||||||
|
if (inner_len >= sizeof(inner)) inner_len = sizeof(inner) - 1;
|
||||||
|
memcpy(inner, s + i + 1, inner_len);
|
||||||
|
inner[inner_len] = '\0';
|
||||||
|
/* reject Arabic digit quantifiers */
|
||||||
|
int has_digit = 0, all_digit_comma_space = 1;
|
||||||
|
for (size_t k = 0; k < inner_len; k++) {
|
||||||
|
if (inner[k] >= '0' && inner[k] <= '9') has_digit = 1;
|
||||||
|
else if (inner[k] != ',' && inner[k] != ' ') all_digit_comma_space = 0;
|
||||||
|
}
|
||||||
|
if (has_digit && all_digit_comma_space) {
|
||||||
|
char msg[192];
|
||||||
|
snprintf(msg, sizeof(msg), "Invalid quantifier '{%s}' — use Roman numerals", inner);
|
||||||
|
cent_runtime_error(msg);
|
||||||
|
}
|
||||||
|
/* convert comma-separated Roman parts */
|
||||||
|
char converted[128];
|
||||||
|
size_t cpos = 0;
|
||||||
|
converted[0] = '\0';
|
||||||
|
int ok = 1;
|
||||||
|
char *part = inner;
|
||||||
|
while (ok) {
|
||||||
|
char *comma = strchr(part, ',');
|
||||||
|
if (comma) *comma = '\0';
|
||||||
|
/* trim spaces */
|
||||||
|
while (*part == ' ') part++;
|
||||||
|
char *pend = part + strlen(part) - 1;
|
||||||
|
while (pend > part && *pend == ' ') *pend-- = '\0';
|
||||||
|
if (*part == '\0') {
|
||||||
|
/* empty part (e.g. {,V}) */
|
||||||
|
} else {
|
||||||
|
/* check all chars are Roman */
|
||||||
|
int all_roman = 1;
|
||||||
|
for (char *c = part; *c; c++) { if (!_is_roman_char(*c)) { all_roman = 0; break; } }
|
||||||
|
if (!all_roman) { ok = 0; break; }
|
||||||
|
long val = cent_roman_to_int(part);
|
||||||
|
char numbuf[32];
|
||||||
|
snprintf(numbuf, sizeof(numbuf), "%ld", val);
|
||||||
|
size_t nlen = strlen(numbuf);
|
||||||
|
if (cpos + nlen >= sizeof(converted)) { ok = 0; break; }
|
||||||
|
memcpy(converted + cpos, numbuf, nlen);
|
||||||
|
cpos += nlen;
|
||||||
|
}
|
||||||
|
if (comma) {
|
||||||
|
if (cpos + 1 >= sizeof(converted)) { ok = 0; break; }
|
||||||
|
converted[cpos++] = ',';
|
||||||
|
part = comma + 1;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
converted[cpos] = '\0';
|
||||||
|
if (ok) {
|
||||||
|
size_t need = cpos + 2;
|
||||||
|
while (rpos + need >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = '{';
|
||||||
|
memcpy(result + rpos, converted, cpos);
|
||||||
|
rpos += cpos;
|
||||||
|
result[rpos++] = '}';
|
||||||
|
} else {
|
||||||
|
/* not valid Roman — copy verbatim */
|
||||||
|
size_t chunk = j - i + 1;
|
||||||
|
while (rpos + chunk >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
memcpy(result + rpos, s + i, chunk);
|
||||||
|
rpos += chunk;
|
||||||
|
}
|
||||||
|
i = j + 1;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (rpos + 1 >= cap) { cap *= 2; char *nb = cent_arena_alloc(cent_arena, cap); memcpy(nb, result, rpos); result = nb; }
|
||||||
|
result[rpos++] = s[i++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
result[rpos] = '\0';
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
||||||
if (pattern.type != CENT_STR || text.type != CENT_STR)
|
if (pattern.type != CENT_STR || text.type != CENT_STR)
|
||||||
cent_type_error("'QVAERE' requires two strings");
|
cent_type_error("'QVAERE' requires two strings");
|
||||||
regex_t re;
|
regex_t re;
|
||||||
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
|
||||||
if (rc != 0) {
|
if (rc != 0) {
|
||||||
char errbuf[256];
|
char errbuf[256];
|
||||||
regerror(rc, &re, errbuf, sizeof(errbuf));
|
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||||
@@ -928,42 +1104,39 @@ CentValue cent_qvaere(CentValue pattern, CentValue text) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Expand replacement string, substituting \1..\9 with captured groups */
|
/* Expand replacement string, substituting \I..\IX with captured groups */
|
||||||
static void _expand_replacement(const char *repl, const char *subject,
|
static void _expand_replacement(const char *repl, const char *subject,
|
||||||
regmatch_t *matches, int ngroups,
|
regmatch_t *matches, int ngroups,
|
||||||
char **out, size_t *opos, size_t *ocap) {
|
char **out, size_t *opos, size_t *ocap) {
|
||||||
for (const char *r = repl; *r; r++) {
|
for (const char *r = repl; *r; r++) {
|
||||||
if (*r == '\\' && r[1] >= '1' && r[1] <= '9') {
|
if (*r == '\\' && _is_roman_char(r[1])) {
|
||||||
int g = r[1] - '0';
|
const char *start = r + 1;
|
||||||
r++;
|
const char *end = start;
|
||||||
|
while (_is_roman_char(*end)) end++;
|
||||||
|
char buf[64];
|
||||||
|
size_t len = (size_t)(end - start);
|
||||||
|
if (len >= sizeof(buf)) len = sizeof(buf) - 1;
|
||||||
|
memcpy(buf, start, len);
|
||||||
|
buf[len] = '\0';
|
||||||
|
int g = (int)cent_roman_to_int(buf);
|
||||||
|
r = end - 1;
|
||||||
if (g < ngroups && matches[g].rm_so != -1) {
|
if (g < ngroups && matches[g].rm_so != -1) {
|
||||||
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
size_t glen = matches[g].rm_eo - matches[g].rm_so;
|
||||||
while (*opos + glen + 1 > *ocap) {
|
_ensure_cap(out, opos, ocap, glen);
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
memcpy(*out + *opos, subject + matches[g].rm_so, glen);
|
||||||
*opos += glen;
|
*opos += glen;
|
||||||
}
|
}
|
||||||
|
} else if (*r == '\\' && r[1] >= '0' && r[1] <= '9') {
|
||||||
|
char msg[128];
|
||||||
|
snprintf(msg, sizeof(msg),
|
||||||
|
"Invalid escape sequence '\\%c' — use Roman numerals for backreferences", r[1]);
|
||||||
|
cent_runtime_error(msg);
|
||||||
} else if (*r == '\\' && r[1] == '\\') {
|
} else if (*r == '\\' && r[1] == '\\') {
|
||||||
/* escaped backslash → literal \ */
|
_ensure_cap(out, opos, ocap, 1);
|
||||||
if (*opos + 2 > *ocap) {
|
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
(*out)[(*opos)++] = '\\';
|
(*out)[(*opos)++] = '\\';
|
||||||
r++;
|
r++;
|
||||||
} else {
|
} else {
|
||||||
if (*opos + 2 > *ocap) {
|
_ensure_cap(out, opos, ocap, 1);
|
||||||
*ocap *= 2;
|
|
||||||
char *newbuf = cent_arena_alloc(cent_arena, *ocap);
|
|
||||||
memcpy(newbuf, *out, *opos);
|
|
||||||
*out = newbuf;
|
|
||||||
}
|
|
||||||
(*out)[(*opos)++] = *r;
|
(*out)[(*opos)++] = *r;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -973,7 +1146,7 @@ CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue tex
|
|||||||
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
if (pattern.type != CENT_STR || replacement.type != CENT_STR || text.type != CENT_STR)
|
||||||
cent_type_error("'SVBSTITVE' requires three strings");
|
cent_type_error("'SVBSTITVE' requires three strings");
|
||||||
regex_t re;
|
regex_t re;
|
||||||
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
|
int rc = regcomp(&re, _romanize_pattern(pattern.sval), REG_EXTENDED);
|
||||||
if (rc != 0) {
|
if (rc != 0) {
|
||||||
char errbuf[256];
|
char errbuf[256];
|
||||||
regerror(rc, &re, errbuf, sizeof(errbuf));
|
regerror(rc, &re, errbuf, sizeof(errbuf));
|
||||||
|
|||||||
@@ -408,7 +408,13 @@ class Parser():
|
|||||||
|
|
||||||
@self.pg.error
|
@self.pg.error
|
||||||
def error_handle(token):
|
def error_handle(token):
|
||||||
raise SyntaxError(f"{token.name}, {token.value}, {token.source_pos}")
|
pos = token.source_pos
|
||||||
|
loc = f" at line {pos.lineno}, column {pos.colno}" if pos else ""
|
||||||
|
if token.name == "SYMBOL_LPARENS":
|
||||||
|
raise SyntaxError(
|
||||||
|
f"Unexpected '('{loc}. To call a function, use INVOCA: INVOCA func (args)"
|
||||||
|
)
|
||||||
|
raise SyntaxError(f"Unexpected token '{token.value}'{loc}")
|
||||||
|
|
||||||
parser = self.pg.build()
|
parser = self.pg.build()
|
||||||
return parser.parse(tokens_input) # type: ignore
|
return parser.parse(tokens_input) # type: ignore
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 27 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 8.9 KiB After Width: | Height: | Size: 8.6 KiB |
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 10 KiB After Width: | Height: | Size: 9.9 KiB |
BIN
snippets/dic.png
|
Before Width: | Height: | Size: 9.6 KiB After Width: | Height: | Size: 8.9 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 8.0 KiB After Width: | Height: | Size: 7.4 KiB |
|
Before Width: | Height: | Size: 27 KiB After Width: | Height: | Size: 26 KiB |
BIN
snippets/dvm.png
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 41 KiB After Width: | Height: | Size: 41 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 87 KiB After Width: | Height: | Size: 87 KiB |
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 88 KiB After Width: | Height: | Size: 87 KiB |
BIN
snippets/per.png
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 30 KiB After Width: | Height: | Size: 29 KiB |
@@ -10,11 +10,11 @@ contexts:
|
|||||||
- include: comments
|
- include: comments
|
||||||
- include: strings
|
- include: strings
|
||||||
- include: keywords
|
- include: keywords
|
||||||
- include: fractions
|
|
||||||
- include: numerals
|
|
||||||
- include: constants
|
- include: constants
|
||||||
- include: builtins
|
- include: builtins
|
||||||
- include: modules
|
- include: modules
|
||||||
|
- include: fractions
|
||||||
|
- include: numerals
|
||||||
- include: operators
|
- include: operators
|
||||||
- include: identifiers
|
- include: identifiers
|
||||||
|
|
||||||
|
|||||||
89
tests.py
@@ -634,10 +634,22 @@ builtin_tests = [
|
|||||||
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
('SVBSTITVE("a", "b", "")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a"), String("b"), String("")]))]), ValStr("")),
|
||||||
# SVBSTITVE: dot matches any character
|
# SVBSTITVE: dot matches any character
|
||||||
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
('SVBSTITVE(".", "x", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("."), String("x"), String("ab")]))]), ValStr("xx")),
|
||||||
# SVBSTITVE: backreference swaps two groups
|
# SVBSTITVE: backreference swaps two groups (Roman numerals)
|
||||||
('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")),
|
('SVBSTITVE("(a)(b)", "\\II\\I", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\II\\I"), String("ab")]))]), ValStr("ba")),
|
||||||
# SVBSTITVE: backreference with unmatched group (ignored)
|
# SVBSTITVE: backreference with unmatched group (ignored)
|
||||||
('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")),
|
('SVBSTITVE("(a)(b)?", "\\I\\II", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\I\\II"), String("a")]))]), ValStr("a")),
|
||||||
|
# SVBSTITVE: Roman numeral quantifier in pattern
|
||||||
|
("SVBSTITVE('a{III}', 'x', 'aaa')", Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("a{III}"), String("x"), String("aaa")]))]), ValStr("x")),
|
||||||
|
# QVAERE: Roman numeral quantifier — exact repetition
|
||||||
|
("QVAERE('a{III}', 'aaaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{III}"), String("aaaa")]))]), ValList([ValStr("aaa")])),
|
||||||
|
# QVAERE: Roman numeral quantifier — range
|
||||||
|
("QVAERE('a{II,III}', 'aaaaaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{II,III}"), String("aaaaaa")]))]), ValList([ValStr("aaa"), ValStr("aaa")])),
|
||||||
|
# QVAERE: Roman numeral quantifier — at-least
|
||||||
|
("QVAERE('a{II,}', 'a aa aaa')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a{II,}"), String("a aa aaa")]))]), ValList([ValStr("aa"), ValStr("aaa")])),
|
||||||
|
# QVAERE: pattern backreference — repeated character
|
||||||
|
("QVAERE('(.)\\I', 'aabcdd')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("(.)\\I"), String("aabcdd")]))]), ValList([ValStr("aa"), ValStr("dd")])),
|
||||||
|
# QVAERE: pattern backreference — repeated group
|
||||||
|
("QVAERE('(..)\\I', 'ababcc')", Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("(..)\\I"), String("ababcc")]))]), ValList([ValStr("abab")])),
|
||||||
# NVMERVS: basic conversion
|
# NVMERVS: basic conversion
|
||||||
('NVMERVS("XIV")', Program([], [ExpressionStatement(BuiltIn("NVMERVS", [String("XIV")]))]), ValInt(14)),
|
('NVMERVS("XIV")', Program([], [ExpressionStatement(BuiltIn("NVMERVS", [String("XIV")]))]), ValInt(14)),
|
||||||
# NVMERVS: simple single numeral
|
# NVMERVS: simple single numeral
|
||||||
@@ -673,6 +685,8 @@ error_tests = [
|
|||||||
("INVOCA f ()", CentvrionError), # undefined function
|
("INVOCA f ()", CentvrionError), # undefined function
|
||||||
("DESIGNA VT III", SyntaxError), # parse error: missing id after DESIGNA
|
("DESIGNA VT III", SyntaxError), # parse error: missing id after DESIGNA
|
||||||
("DESIGNA x III", SyntaxError), # parse error: missing VT
|
("DESIGNA x III", SyntaxError), # parse error: missing VT
|
||||||
|
("DEFINI f () VT { REDI(I) }\nf()", SyntaxError), # function call without INVOCA (no args)
|
||||||
|
("DEFINI f (x) VT { REDI(x) }\nf(I)", SyntaxError), # function call without INVOCA (with args)
|
||||||
("DIC(M + M + M + M)", CentvrionError), # output > 3999 without MAGNVM
|
("DIC(M + M + M + M)", CentvrionError), # output > 3999 without MAGNVM
|
||||||
("IIII", CentvrionError), # invalid Roman numeral in source
|
("IIII", CentvrionError), # invalid Roman numeral in source
|
||||||
("FORTVITVS_NVMERVS(I, X)", CentvrionError), # requires FORS module
|
("FORTVITVS_NVMERVS(I, X)", CentvrionError), # requires FORS module
|
||||||
@@ -684,11 +698,16 @@ error_tests = [
|
|||||||
("DEFINI f () VT { REDI(I) }\nINVOCA f (I)", CentvrionError), # args to zero-param function
|
("DEFINI f () VT { REDI(I) }\nINVOCA f (I)", CentvrionError), # args to zero-param function
|
||||||
("SI NVLLVS TVNC { DESIGNA r VT I }", CentvrionError), # NVLLVS cannot be used as boolean
|
("SI NVLLVS TVNC { DESIGNA r VT I }", CentvrionError), # NVLLVS cannot be used as boolean
|
||||||
("NVLLVS AVT VERITAS", CentvrionError), # NVLLVS cannot be used as boolean in AVT
|
("NVLLVS AVT VERITAS", CentvrionError), # NVLLVS cannot be used as boolean in AVT
|
||||||
|
("FALSITAS AVT NVLLVS", CentvrionError), # no short-circuit: right side evaluated, NVLLVS not boolean
|
||||||
|
("VERITAS ET NVLLVS", CentvrionError), # no short-circuit: right side evaluated, NVLLVS not boolean
|
||||||
|
("NVLLVS ET VERITAS", CentvrionError), # NVLLVS cannot be used as boolean in ET
|
||||||
('"hello" + " world"', CentvrionError), # use & for string concatenation, not +
|
('"hello" + " world"', CentvrionError), # use & for string concatenation, not +
|
||||||
("[I, II][III]", CentvrionError), # index too high
|
("[I, II][III]", CentvrionError), # index too high
|
||||||
("CVM SVBNVLLA\n[I, II][-I]", CentvrionError), # negative index
|
("CVM SVBNVLLA\n[I, II][-I]", CentvrionError), # negative index
|
||||||
("[I, II][-I]", CentvrionError), # negative value
|
("[I, II][-I]", CentvrionError), # negative value
|
||||||
("I / NVLLVS", CentvrionError), # division by zero (NVLLVS coerces to 0)
|
("I / NVLLVS", CentvrionError), # division by zero (NVLLVS coerces to 0)
|
||||||
|
("V RELIQVVM NVLLVS", CentvrionError), # modulo by zero (NVLLVS coerces to 0)
|
||||||
|
("NVLLVS RELIQVVM NVLLVS", CentvrionError), # modulo by zero (both NVLLVS)
|
||||||
("I / [I, II]", CentvrionError), # division with array operand
|
("I / [I, II]", CentvrionError), # division with array operand
|
||||||
("I - \"hello\"", CentvrionError), # subtraction with string
|
("I - \"hello\"", CentvrionError), # subtraction with string
|
||||||
("I * \"hello\"", CentvrionError), # multiplication with string
|
("I * \"hello\"", CentvrionError), # multiplication with string
|
||||||
@@ -749,6 +768,9 @@ error_tests = [
|
|||||||
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement
|
||||||
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text
|
||||||
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex
|
||||||
|
("SVBSTITVE('(a)', '\\1', 'a')", CentvrionError), # Arabic backref in replacement
|
||||||
|
("QVAERE('(.)\\1', 'aa')", CentvrionError), # Arabic backref in pattern
|
||||||
|
("QVAERE('a{3}', 'aaa')", CentvrionError), # Arabic quantifier in pattern
|
||||||
('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int
|
('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int
|
||||||
('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter
|
('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter
|
||||||
('PETE("http://example.com")', CentvrionError), # RETE required for PETE
|
('PETE("http://example.com")', CentvrionError), # RETE required for PETE
|
||||||
@@ -881,7 +903,7 @@ class TestNumerals(unittest.TestCase):
|
|||||||
|
|
||||||
# int_to_num: valid cases
|
# int_to_num: valid cases
|
||||||
def test_int_to_num(self):
|
def test_int_to_num(self):
|
||||||
for n, s in [(1,"I"),(4,"IV"),(9,"IX"),(40,"XL"),(42,"XLII"),(3999,"MMMCMXCIX")]:
|
for n, s in [(0,"NVLLVS"),(1,"I"),(4,"IV"),(9,"IX"),(40,"XL"),(42,"XLII"),(3999,"MMMCMXCIX")]:
|
||||||
self.assertEqual(int_to_num(n, False), s)
|
self.assertEqual(int_to_num(n, False), s)
|
||||||
|
|
||||||
def test_int_to_num_above_3999_raises(self):
|
def test_int_to_num_above_3999_raises(self):
|
||||||
@@ -907,6 +929,9 @@ class TestMakeString(unittest.TestCase):
|
|||||||
def test_int(self):
|
def test_int(self):
|
||||||
self.assertEqual(make_string(ValInt(3)), "III")
|
self.assertEqual(make_string(ValInt(3)), "III")
|
||||||
|
|
||||||
|
def test_int_zero(self):
|
||||||
|
self.assertEqual(make_string(ValInt(0)), "NVLLVS")
|
||||||
|
|
||||||
def test_bool_true(self):
|
def test_bool_true(self):
|
||||||
self.assertEqual(make_string(ValBool(True)), "VERITAS")
|
self.assertEqual(make_string(ValBool(True)), "VERITAS")
|
||||||
|
|
||||||
@@ -939,6 +964,8 @@ dic_type_tests = [
|
|||||||
('DIC("")', Program([], [ExpressionStatement(BuiltIn("DIC", [String("")]))]), ValStr(""), "\n"),
|
('DIC("")', Program([], [ExpressionStatement(BuiltIn("DIC", [String("")]))]), ValStr(""), "\n"),
|
||||||
# arithmetic result printed as numeral
|
# arithmetic result printed as numeral
|
||||||
("DIC(II + III)", Program([], [ExpressionStatement(BuiltIn("DIC", [BinOp(Numeral("II"), Numeral("III"), "SYMBOL_PLUS")]))]), ValStr("V"), "V\n"),
|
("DIC(II + III)", Program([], [ExpressionStatement(BuiltIn("DIC", [BinOp(Numeral("II"), Numeral("III"), "SYMBOL_PLUS")]))]), ValStr("V"), "V\n"),
|
||||||
|
# integer 0 prints as NVLLVS
|
||||||
|
("DIC(I - I)", Program([], [ExpressionStatement(BuiltIn("DIC", [BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS")]))]), ValStr("NVLLVS"), "NVLLVS\n"),
|
||||||
# multiple args of mixed types
|
# multiple args of mixed types
|
||||||
('DIC("x", VERITAS)', Program([], [ExpressionStatement(BuiltIn("DIC", [String("x"), Bool(True)]))]), ValStr("x VERITAS"), "x VERITAS\n"),
|
('DIC("x", VERITAS)', Program([], [ExpressionStatement(BuiltIn("DIC", [String("x"), Bool(True)]))]), ValStr("x VERITAS"), "x VERITAS\n"),
|
||||||
]
|
]
|
||||||
@@ -985,6 +1012,17 @@ arithmetic_edge_tests = [
|
|||||||
("NVLLVS + NVLLVS", Program([], [ExpressionStatement(BinOp(Nullus(), Nullus(), "SYMBOL_PLUS"))]), ValNul()),
|
("NVLLVS + NVLLVS", Program([], [ExpressionStatement(BinOp(Nullus(), Nullus(), "SYMBOL_PLUS"))]), ValNul()),
|
||||||
("NVLLVS - V", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("V"), "SYMBOL_MINUS"))]), ValInt(-5)),
|
("NVLLVS - V", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("V"), "SYMBOL_MINUS"))]), ValInt(-5)),
|
||||||
("V - NVLLVS", Program([], [ExpressionStatement(BinOp(Numeral("V"), Nullus(), "SYMBOL_MINUS"))]), ValInt(5)),
|
("V - NVLLVS", Program([], [ExpressionStatement(BinOp(Numeral("V"), Nullus(), "SYMBOL_MINUS"))]), ValInt(5)),
|
||||||
|
# NVLLVS coerces to 0 in modulo and division
|
||||||
|
("NVLLVS RELIQVVM V", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("V"), "KEYWORD_RELIQVVM"))]), ValInt(0)),
|
||||||
|
("NVLLVS / V", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("V"), "SYMBOL_DIVIDE"))]), ValInt(0)),
|
||||||
|
# floored division and modulo with negative operands (Python semantics)
|
||||||
|
("CVM SVBNVLLA\n- VII RELIQVVM III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(UnaryMinus(Numeral("VII")), Numeral("III"), "KEYWORD_RELIQVVM"))]), ValInt(2)),
|
||||||
|
("CVM SVBNVLLA\nVII RELIQVVM - III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(Numeral("VII"), UnaryMinus(Numeral("III")), "KEYWORD_RELIQVVM"))]), ValInt(-2)),
|
||||||
|
("CVM SVBNVLLA\n- VII RELIQVVM - III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(UnaryMinus(Numeral("VII")), UnaryMinus(Numeral("III")), "KEYWORD_RELIQVVM"))]), ValInt(-1)),
|
||||||
|
("CVM SVBNVLLA\n- VII / III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(UnaryMinus(Numeral("VII")), Numeral("III"), "SYMBOL_DIVIDE"))]), ValInt(-3)),
|
||||||
|
("CVM SVBNVLLA\nVII / - III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(Numeral("VII"), UnaryMinus(Numeral("III")), "SYMBOL_DIVIDE"))]), ValInt(-3)),
|
||||||
|
("CVM SVBNVLLA\n- VII / - III", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(UnaryMinus(Numeral("VII")), UnaryMinus(Numeral("III")), "SYMBOL_DIVIDE"))]), ValInt(2)),
|
||||||
|
("CVM SVBNVLLA\n- L RELIQVVM C", Program([ModuleCall("SVBNVLLA")], [ExpressionStatement(BinOp(UnaryMinus(Numeral("L")), Numeral("C"), "KEYWORD_RELIQVVM"))]), ValInt(50)),
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestArithmeticEdge(unittest.TestCase):
|
class TestArithmeticEdge(unittest.TestCase):
|
||||||
@@ -1062,6 +1100,8 @@ interpolation_tests = [
|
|||||||
Program([], [
|
Program([], [
|
||||||
ExpressionStatement(InterpolatedString([String("value: "), Nullus()]))
|
ExpressionStatement(InterpolatedString([String("value: "), Nullus()]))
|
||||||
]), ValStr("value: NVLLVS")),
|
]), ValStr("value: NVLLVS")),
|
||||||
|
# integer 0 interpolates as NVLLVS
|
||||||
|
('"value: {I - I}"', Program([], [ExpressionStatement(InterpolatedString([String("value: "), BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS")]))]), ValStr("value: NVLLVS")),
|
||||||
# expression-only string (no literal parts around it)
|
# expression-only string (no literal parts around it)
|
||||||
('DESIGNA x VT "hi"\n"{x}"',
|
('DESIGNA x VT "hi"\n"{x}"',
|
||||||
Program([], [
|
Program([], [
|
||||||
@@ -1194,6 +1234,21 @@ comparison_tests = [
|
|||||||
("NVLLVS DISPAR NVLLVS", Program([], [ExpressionStatement(BinOp(Nullus(), Nullus(), "KEYWORD_DISPAR"))]), ValBool(False)),
|
("NVLLVS DISPAR NVLLVS", Program([], [ExpressionStatement(BinOp(Nullus(), Nullus(), "KEYWORD_DISPAR"))]), ValBool(False)),
|
||||||
# cross-type: an int and a string are never equal
|
# cross-type: an int and a string are never equal
|
||||||
('I DISPAR "I"', Program([], [ExpressionStatement(BinOp(Numeral("I"), String("I"), "KEYWORD_DISPAR"))]), ValBool(True)),
|
('I DISPAR "I"', Program([], [ExpressionStatement(BinOp(Numeral("I"), String("I"), "KEYWORD_DISPAR"))]), ValBool(True)),
|
||||||
|
# integer 0 equals NVLLVS
|
||||||
|
("(I - I) EST NVLLVS", Program([], [ExpressionStatement(BinOp(BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS"), Nullus(), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("NVLLVS EST (I - I)", Program([], [ExpressionStatement(BinOp(Nullus(), BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS"), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("(I - I) DISPAR NVLLVS", Program([], [ExpressionStatement(BinOp(BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS"), Nullus(), "KEYWORD_DISPAR"))]), ValBool(False)),
|
||||||
|
("NVLLVS DISPAR (I - I)", Program([], [ExpressionStatement(BinOp(Nullus(), BinOp(Numeral("I"), Numeral("I"), "SYMBOL_MINUS"), "KEYWORD_DISPAR"))]), ValBool(False)),
|
||||||
|
# non-zero integer does not equal NVLLVS
|
||||||
|
("I EST NVLLVS", Program([], [ExpressionStatement(BinOp(Numeral("I"), Nullus(), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
|
("NVLLVS DISPAR I", Program([], [ExpressionStatement(BinOp(Nullus(), Numeral("I"), "KEYWORD_DISPAR"))]), ValBool(True)),
|
||||||
|
# EST / DISPAR on arrays
|
||||||
|
("[I, II] EST [I, II]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II")]), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("[I, II] EST [I, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("III")]), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
|
("[I, II] EST [I, II, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II"), Numeral("III")]), "KEYWORD_EST"))]), ValBool(False)),
|
||||||
|
("[] EST []", Program([], [ExpressionStatement(BinOp(DataArray([]), DataArray([]), "KEYWORD_EST"))]), ValBool(True)),
|
||||||
|
("[I, II] DISPAR [I, III]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("III")]), "KEYWORD_DISPAR"))]), ValBool(True)),
|
||||||
|
("[I, II] DISPAR [I, II]", Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("I"), Numeral("II")]), "KEYWORD_DISPAR"))]), ValBool(False)),
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestComparisons(unittest.TestCase):
|
class TestComparisons(unittest.TestCase):
|
||||||
@@ -1560,6 +1615,32 @@ et_avt_tests = [
|
|||||||
("SI FALSITAS AVT FALSITAS TVNC { DESIGNA r VT I } ALIVD { DESIGNA r VT II }\nr",
|
("SI FALSITAS AVT FALSITAS TVNC { DESIGNA r VT I } ALIVD { DESIGNA r VT II }\nr",
|
||||||
Program([], [SiStatement(BinOp(Bool(False), Bool(False), "KEYWORD_AVT"), [Designa(ID("r"), Numeral("I"))], [Designa(ID("r"), Numeral("II"))]), ExpressionStatement(ID("r"))]),
|
Program([], [SiStatement(BinOp(Bool(False), Bool(False), "KEYWORD_AVT"), [Designa(ID("r"), Numeral("I"))], [Designa(ID("r"), Numeral("II"))]), ExpressionStatement(ID("r"))]),
|
||||||
ValInt(2)),
|
ValInt(2)),
|
||||||
|
# short-circuit: right side not evaluated when result is determined
|
||||||
|
("VERITAS AVT NVLLVS",
|
||||||
|
Program([], [ExpressionStatement(BinOp(Bool(True), Nullus(), "KEYWORD_AVT"))]),
|
||||||
|
ValBool(True)),
|
||||||
|
("FALSITAS ET NVLLVS",
|
||||||
|
Program([], [ExpressionStatement(BinOp(Bool(False), Nullus(), "KEYWORD_ET"))]),
|
||||||
|
ValBool(False)),
|
||||||
|
# short-circuit with side-effect-prone expressions
|
||||||
|
("DESIGNA x VT NVLLVS\nSI x EST NVLLVS AVT [I, II][x] EST I TVNC { DESIGNA r VT I } ALIVD { DESIGNA r VT II }\nr",
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("x"), Nullus()),
|
||||||
|
SiStatement(
|
||||||
|
BinOp(BinOp(ID("x"), Nullus(), "KEYWORD_EST"), BinOp(ArrayIndex(DataArray([Numeral("I"), Numeral("II")]), ID("x")), Numeral("I"), "KEYWORD_EST"), "KEYWORD_AVT"),
|
||||||
|
[Designa(ID("r"), Numeral("I"))],
|
||||||
|
[Designa(ID("r"), Numeral("II"))]),
|
||||||
|
ExpressionStatement(ID("r"))]),
|
||||||
|
ValInt(1)),
|
||||||
|
("DESIGNA x VT NVLLVS\nSI x DISPAR NVLLVS ET [I, II][x] EST I TVNC { DESIGNA r VT I } ALIVD { DESIGNA r VT II }\nr",
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("x"), Nullus()),
|
||||||
|
SiStatement(
|
||||||
|
BinOp(BinOp(ID("x"), Nullus(), "KEYWORD_DISPAR"), BinOp(ArrayIndex(DataArray([Numeral("I"), Numeral("II")]), ID("x")), Numeral("I"), "KEYWORD_EST"), "KEYWORD_ET"),
|
||||||
|
[Designa(ID("r"), Numeral("I"))],
|
||||||
|
[Designa(ID("r"), Numeral("II"))]),
|
||||||
|
ExpressionStatement(ID("r"))]),
|
||||||
|
ValInt(2)),
|
||||||
]
|
]
|
||||||
|
|
||||||
class TestEtAvt(unittest.TestCase):
|
class TestEtAvt(unittest.TestCase):
|
||||||
|
|||||||