🐐 String indexing and slicing
This commit is contained in:
15
README.md
15
README.md
@@ -70,6 +70,21 @@ Any expression can appear inside `{}`. Values are coerced to strings the same wa
|
||||
|
||||
Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"` → `use {braces}`.
|
||||
|
||||
#### String Indexing and Slicing
|
||||
|
||||
Strings support the same indexing and slicing syntax as arrays. Indexing is 1-based and returns a single-character string:
|
||||
|
||||
```
|
||||
"SALVTE"[I] @> "S"
|
||||
"SALVTE"[III] @> "L"
|
||||
```
|
||||
|
||||
Slicing uses `VSQVE` with inclusive bounds, returning a substring:
|
||||
|
||||
```
|
||||
"SALVTE"[II VSQVE IV] @> "ALV"
|
||||
```
|
||||
|
||||
Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2).
|
||||
|
||||
### Integers
|
||||
|
||||
@@ -833,6 +833,17 @@ class ArrayIndex(Node):
|
||||
if k not in d:
|
||||
raise CentvrionError(f"Key not found in dict")
|
||||
return vtable, d[k]
|
||||
if isinstance(array, ValStr):
|
||||
if isinstance(index, ValInt):
|
||||
i = index.value()
|
||||
elif isinstance(index, ValFrac) and index.value().denominator == 1:
|
||||
i = index.value().numerator
|
||||
else:
|
||||
raise CentvrionError("String index must be a number")
|
||||
s = array.value()
|
||||
if i < 1 or i > len(s):
|
||||
raise CentvrionError(f"Index {i} out of range for string of length {len(s)}")
|
||||
return vtable, ValStr(s[i - 1])
|
||||
if not isinstance(array, ValList):
|
||||
raise CentvrionError("Cannot index a non-array value")
|
||||
if isinstance(index, ValInt):
|
||||
@@ -877,6 +888,16 @@ class ArraySlice(Node):
|
||||
vtable, array = self.array.eval(vtable)
|
||||
vtable, from_val = self.from_index.eval(vtable)
|
||||
vtable, to_val = self.to_index.eval(vtable)
|
||||
if isinstance(array, ValStr):
|
||||
from_int = _to_index_int(from_val)
|
||||
to_int = _to_index_int(to_val)
|
||||
s = array.value()
|
||||
if from_int < 1 or to_int > len(s) or from_int > to_int:
|
||||
raise CentvrionError(
|
||||
f"Slice [{from_int} VSQVE {to_int}] out of range"
|
||||
f" for string of length {len(s)}"
|
||||
)
|
||||
return vtable, ValStr(s[from_int - 1 : to_int])
|
||||
if not isinstance(array, ValList):
|
||||
raise CentvrionError("Cannot slice a non-array value")
|
||||
from_int = _to_index_int(from_val)
|
||||
|
||||
@@ -721,6 +721,22 @@ void cent_list_push(CentValue *lst, CentValue v) {
|
||||
CentValue cent_list_index(CentValue lst, CentValue idx) {
|
||||
if (lst.type == CENT_DICT)
|
||||
return cent_dict_get(lst, idx);
|
||||
if (lst.type == CENT_STR) {
|
||||
long i;
|
||||
if (idx.type == CENT_INT)
|
||||
i = idx.ival;
|
||||
else if (idx.type == CENT_FRAC && idx.fval.den == 1)
|
||||
i = idx.fval.num;
|
||||
else
|
||||
cent_type_error("string index must be an integer");
|
||||
long slen = (long)strlen(lst.sval);
|
||||
if (i < 1 || i > slen)
|
||||
cent_runtime_error("string index out of range");
|
||||
char *ch = cent_arena_alloc(cent_arena, 2);
|
||||
ch[0] = lst.sval[i - 1];
|
||||
ch[1] = '\0';
|
||||
return cent_str(ch);
|
||||
}
|
||||
if (lst.type != CENT_LIST)
|
||||
cent_type_error("index requires a list or dict");
|
||||
long i;
|
||||
@@ -736,6 +752,20 @@ CentValue cent_list_index(CentValue lst, CentValue idx) {
|
||||
}
|
||||
|
||||
CentValue cent_list_slice(CentValue lst, CentValue lo, CentValue hi) {
|
||||
if (lst.type == CENT_STR) {
|
||||
if (lo.type != CENT_INT || hi.type != CENT_INT)
|
||||
cent_type_error("slice indices must be integers");
|
||||
long from = lo.ival;
|
||||
long to = hi.ival;
|
||||
long slen = (long)strlen(lst.sval);
|
||||
if (from < 1 || to > slen || from > to)
|
||||
cent_runtime_error("string slice out of range");
|
||||
int len = (int)(to - from + 1);
|
||||
char *buf = cent_arena_alloc(cent_arena, len + 1);
|
||||
memcpy(buf, lst.sval + from - 1, len);
|
||||
buf[len] = '\0';
|
||||
return cent_str(buf);
|
||||
}
|
||||
if (lst.type != CENT_LIST)
|
||||
cent_type_error("slice requires a list");
|
||||
if (lo.type != CENT_INT || hi.type != CENT_INT)
|
||||
|
||||
@@ -98,7 +98,7 @@
|
||||
\item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. Available modules: \texttt{FORS} (randomness), \texttt{FRACTIO} (fractions), \texttt{MAGNVM} (large integers), \texttt{SCRIPTA} (file I/O: \texttt{LEGE}, \texttt{SCRIBE}, \texttt{ADIVNGE}), \texttt{SVBNVLLA} (negative literals).
|
||||
\item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w.
|
||||
\item \textbf{builtin}: \\ Builtin functions are uppercase latin words.
|
||||
\item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal.
|
||||
\item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. Strings support 1-based indexing (\texttt{string[I]}) and inclusive slicing (\texttt{string[I VSQVE III]}), returning single-character strings and substrings respectively.
|
||||
\item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces.
|
||||
\item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM.
|
||||
\item \textbf{bool}: \\ VERITAS or FALSITAS.
|
||||
|
||||
86
tests.py
86
tests.py
@@ -639,6 +639,10 @@ error_tests = [
|
||||
("I * \"hello\"", CentvrionError), # multiplication with string
|
||||
("\"hello\" MINVS \"world\"", CentvrionError), # comparison with strings
|
||||
("I[I]", CentvrionError), # indexing a non-array
|
||||
('"SALVTE"[VII]', CentvrionError), # string index out of range
|
||||
('"SALVTE"[NVLLVS]', CentvrionError), # string index with non-integer
|
||||
('"SALVTE"[II VSQVE VII]', CentvrionError), # string slice out of range
|
||||
('"SALVTE"[III VSQVE II]', CentvrionError), # string slice from > to
|
||||
("DESIGNA x VT I\nDESIGNA x[I] VT II", CentvrionError), # index-assign to non-array
|
||||
("SEMEN(I)", CentvrionError), # requires FORS module
|
||||
('CVM FORS\nSEMEN("abc")', CentvrionError), # SEMEN requires integer seed
|
||||
@@ -1583,6 +1587,88 @@ class TestArraySlice(unittest.TestCase):
|
||||
run_test(self, source, nodes, value)
|
||||
|
||||
|
||||
# --- String indexing ---
|
||||
|
||||
string_index_tests = [
|
||||
# first character
|
||||
('"SALVTE"[I]',
|
||||
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("I")))]),
|
||||
ValStr("S")),
|
||||
# last character
|
||||
('"SALVTE"[VI]',
|
||||
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("VI")))]),
|
||||
ValStr("E")),
|
||||
# middle character
|
||||
('"SALVTE"[III]',
|
||||
Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("III")))]),
|
||||
ValStr("L")),
|
||||
# string index via variable
|
||||
('DESIGNA s VT "SALVTE"\ns[II]',
|
||||
Program([], [
|
||||
Designa(ID("s"), String("SALVTE")),
|
||||
ExpressionStatement(ArrayIndex(ID("s"), Numeral("II"))),
|
||||
]),
|
||||
ValStr("A")),
|
||||
# expression as index
|
||||
('"SALVTE"[I + II]',
|
||||
Program([], [ExpressionStatement(ArrayIndex(
|
||||
String("SALVTE"),
|
||||
BinOp(Numeral("I"), Numeral("II"), "SYMBOL_PLUS")))]),
|
||||
ValStr("L")),
|
||||
]
|
||||
|
||||
class TestStringIndex(unittest.TestCase):
|
||||
@parameterized.expand(string_index_tests)
|
||||
def test_string_index(self, source, nodes, value):
|
||||
run_test(self, source, nodes, value)
|
||||
|
||||
|
||||
# --- String slicing ---
|
||||
|
||||
string_slice_tests = [
|
||||
# substring from middle
|
||||
('"SALVTE"[II VSQVE IV]',
|
||||
Program([], [ExpressionStatement(ArraySlice(
|
||||
String("SALVTE"), Numeral("II"), Numeral("IV")))]),
|
||||
ValStr("ALV")),
|
||||
# full string slice
|
||||
('"SALVTE"[I VSQVE VI]',
|
||||
Program([], [ExpressionStatement(ArraySlice(
|
||||
String("SALVTE"), Numeral("I"), Numeral("VI")))]),
|
||||
ValStr("SALVTE")),
|
||||
# single-char slice
|
||||
('"SALVTE"[III VSQVE III]',
|
||||
Program([], [ExpressionStatement(ArraySlice(
|
||||
String("SALVTE"), Numeral("III"), Numeral("III")))]),
|
||||
ValStr("L")),
|
||||
# slice on variable
|
||||
('DESIGNA s VT "SALVTE"\ns[II VSQVE IV]',
|
||||
Program([], [
|
||||
Designa(ID("s"), String("SALVTE")),
|
||||
ExpressionStatement(ArraySlice(ID("s"), Numeral("II"), Numeral("IV"))),
|
||||
]),
|
||||
ValStr("ALV")),
|
||||
# chaining: slice then index
|
||||
('"SALVTE"[I VSQVE III][II]',
|
||||
Program([], [ExpressionStatement(ArrayIndex(
|
||||
ArraySlice(String("SALVTE"), Numeral("I"), Numeral("III")),
|
||||
Numeral("II")))]),
|
||||
ValStr("A")),
|
||||
# expression as slice bounds
|
||||
('"SALVTE"[I + I VSQVE II + II]',
|
||||
Program([], [ExpressionStatement(ArraySlice(
|
||||
String("SALVTE"),
|
||||
BinOp(Numeral("I"), Numeral("I"), "SYMBOL_PLUS"),
|
||||
BinOp(Numeral("II"), Numeral("II"), "SYMBOL_PLUS")))]),
|
||||
ValStr("ALV")),
|
||||
]
|
||||
|
||||
class TestStringSlice(unittest.TestCase):
|
||||
@parameterized.expand(string_slice_tests)
|
||||
def test_string_slice(self, source, nodes, value):
|
||||
run_test(self, source, nodes, value)
|
||||
|
||||
|
||||
# --- Comments ---
|
||||
|
||||
comment_tests = [
|
||||
|
||||
Reference in New Issue
Block a user