🐐 String indexing and slicing

This commit is contained in:
2026-04-21 23:07:40 +02:00
parent 378c28102c
commit 9003d49b20
5 changed files with 153 additions and 1 deletions

View File

@@ -70,6 +70,21 @@ Any expression can appear inside `{}`. Values are coerced to strings the same wa
Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"``use {braces}`.
#### String Indexing and Slicing
Strings support the same indexing and slicing syntax as arrays. Indexing is 1-based and returns a single-character string:
```
"SALVTE"[I] @> "S"
"SALVTE"[III] @> "L"
```
Slicing uses `VSQVE` with inclusive bounds, returning a substring:
```
"SALVTE"[II VSQVE IV] @> "ALV"
```
Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2).
### Integers

View File

@@ -833,6 +833,17 @@ class ArrayIndex(Node):
if k not in d:
raise CentvrionError(f"Key not found in dict")
return vtable, d[k]
if isinstance(array, ValStr):
if isinstance(index, ValInt):
i = index.value()
elif isinstance(index, ValFrac) and index.value().denominator == 1:
i = index.value().numerator
else:
raise CentvrionError("String index must be a number")
s = array.value()
if i < 1 or i > len(s):
raise CentvrionError(f"Index {i} out of range for string of length {len(s)}")
return vtable, ValStr(s[i - 1])
if not isinstance(array, ValList):
raise CentvrionError("Cannot index a non-array value")
if isinstance(index, ValInt):
@@ -877,6 +888,16 @@ class ArraySlice(Node):
vtable, array = self.array.eval(vtable)
vtable, from_val = self.from_index.eval(vtable)
vtable, to_val = self.to_index.eval(vtable)
if isinstance(array, ValStr):
from_int = _to_index_int(from_val)
to_int = _to_index_int(to_val)
s = array.value()
if from_int < 1 or to_int > len(s) or from_int > to_int:
raise CentvrionError(
f"Slice [{from_int} VSQVE {to_int}] out of range"
f" for string of length {len(s)}"
)
return vtable, ValStr(s[from_int - 1 : to_int])
if not isinstance(array, ValList):
raise CentvrionError("Cannot slice a non-array value")
from_int = _to_index_int(from_val)

View File

@@ -721,6 +721,22 @@ void cent_list_push(CentValue *lst, CentValue v) {
CentValue cent_list_index(CentValue lst, CentValue idx) {
if (lst.type == CENT_DICT)
return cent_dict_get(lst, idx);
if (lst.type == CENT_STR) {
long i;
if (idx.type == CENT_INT)
i = idx.ival;
else if (idx.type == CENT_FRAC && idx.fval.den == 1)
i = idx.fval.num;
else
cent_type_error("string index must be an integer");
long slen = (long)strlen(lst.sval);
if (i < 1 || i > slen)
cent_runtime_error("string index out of range");
char *ch = cent_arena_alloc(cent_arena, 2);
ch[0] = lst.sval[i - 1];
ch[1] = '\0';
return cent_str(ch);
}
if (lst.type != CENT_LIST)
cent_type_error("index requires a list or dict");
long i;
@@ -736,6 +752,20 @@ CentValue cent_list_index(CentValue lst, CentValue idx) {
}
CentValue cent_list_slice(CentValue lst, CentValue lo, CentValue hi) {
if (lst.type == CENT_STR) {
if (lo.type != CENT_INT || hi.type != CENT_INT)
cent_type_error("slice indices must be integers");
long from = lo.ival;
long to = hi.ival;
long slen = (long)strlen(lst.sval);
if (from < 1 || to > slen || from > to)
cent_runtime_error("string slice out of range");
int len = (int)(to - from + 1);
char *buf = cent_arena_alloc(cent_arena, len + 1);
memcpy(buf, lst.sval + from - 1, len);
buf[len] = '\0';
return cent_str(buf);
}
if (lst.type != CENT_LIST)
cent_type_error("slice requires a list");
if (lo.type != CENT_INT || hi.type != CENT_INT)

View File

@@ -98,7 +98,7 @@
\item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. Available modules: \texttt{FORS} (randomness), \texttt{FRACTIO} (fractions), \texttt{MAGNVM} (large integers), \texttt{SCRIPTA} (file I/O: \texttt{LEGE}, \texttt{SCRIBE}, \texttt{ADIVNGE}), \texttt{SVBNVLLA} (negative literals).
\item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w.
\item \textbf{builtin}: \\ Builtin functions are uppercase latin words.
\item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal.
\item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. Strings support 1-based indexing (\texttt{string[I]}) and inclusive slicing (\texttt{string[I VSQVE III]}), returning single-character strings and substrings respectively.
\item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces.
\item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM.
\item \textbf{bool}: \\ VERITAS or FALSITAS.

View File

@@ -639,6 +639,10 @@ error_tests = [
("I * \"hello\"", CentvrionError), # multiplication with string
("\"hello\" MINVS \"world\"", CentvrionError), # comparison with strings
("I[I]", CentvrionError), # indexing a non-array
('"SALVTE"[VII]', CentvrionError), # string index out of range
('"SALVTE"[NVLLVS]', CentvrionError), # string index with non-integer
('"SALVTE"[II VSQVE VII]', CentvrionError), # string slice out of range
('"SALVTE"[III VSQVE II]', CentvrionError), # string slice from > to
("DESIGNA x VT I\nDESIGNA x[I] VT II", CentvrionError), # index-assign to non-array
("SEMEN(I)", CentvrionError), # requires FORS module
('CVM FORS\nSEMEN("abc")', CentvrionError), # SEMEN requires integer seed
@@ -1583,6 +1587,88 @@ class TestArraySlice(unittest.TestCase):
run_test(self, source, nodes, value)
# --- String indexing ---
string_index_tests = [
# first character
('"SALVTE"[I]',
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("I")))]),
ValStr("S")),
# last character
('"SALVTE"[VI]',
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("VI")))]),
ValStr("E")),
# middle character
('"SALVTE"[III]',
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("III")))]),
ValStr("L")),
# string index via variable
('DESIGNA s VT "SALVTE"\ns[II]',
Program([], [
Designa(ID("s"), String("SALVTE")),
ExpressionStatement(ArrayIndex(ID("s"), Numeral("II"))),
]),
ValStr("A")),
# expression as index
('"SALVTE"[I + II]',
Program([], [ExpressionStatement(ArrayIndex(
String("SALVTE"),
BinOp(Numeral("I"), Numeral("II"), "SYMBOL_PLUS")))]),
ValStr("L")),
]
class TestStringIndex(unittest.TestCase):
@parameterized.expand(string_index_tests)
def test_string_index(self, source, nodes, value):
run_test(self, source, nodes, value)
# --- String slicing ---
string_slice_tests = [
# substring from middle
('"SALVTE"[II VSQVE IV]',
Program([], [ExpressionStatement(ArraySlice(
String("SALVTE"), Numeral("II"), Numeral("IV")))]),
ValStr("ALV")),
# full string slice
('"SALVTE"[I VSQVE VI]',
Program([], [ExpressionStatement(ArraySlice(
String("SALVTE"), Numeral("I"), Numeral("VI")))]),
ValStr("SALVTE")),
# single-char slice
('"SALVTE"[III VSQVE III]',
Program([], [ExpressionStatement(ArraySlice(
String("SALVTE"), Numeral("III"), Numeral("III")))]),
ValStr("L")),
# slice on variable
('DESIGNA s VT "SALVTE"\ns[II VSQVE IV]',
Program([], [
Designa(ID("s"), String("SALVTE")),
ExpressionStatement(ArraySlice(ID("s"), Numeral("II"), Numeral("IV"))),
]),
ValStr("ALV")),
# chaining: slice then index
('"SALVTE"[I VSQVE III][II]',
Program([], [ExpressionStatement(ArrayIndex(
ArraySlice(String("SALVTE"), Numeral("I"), Numeral("III")),
Numeral("II")))]),
ValStr("A")),
# expression as slice bounds
('"SALVTE"[I + I VSQVE II + II]',
Program([], [ExpressionStatement(ArraySlice(
String("SALVTE"),
BinOp(Numeral("I"), Numeral("I"), "SYMBOL_PLUS"),
BinOp(Numeral("II"), Numeral("II"), "SYMBOL_PLUS")))]),
ValStr("ALV")),
]
class TestStringSlice(unittest.TestCase):
@parameterized.expand(string_slice_tests)
def test_string_slice(self, source, nodes, value):
run_test(self, source, nodes, value)
# --- Comments ---
comment_tests = [