diff --git a/README.md b/README.md index 20d4c2b..8033b96 100644 --- a/README.md +++ b/README.md @@ -70,6 +70,21 @@ Any expression can appear inside `{}`. Values are coerced to strings the same wa Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"` → `use {braces}`. +#### String Indexing and Slicing + +Strings support the same indexing and slicing syntax as arrays. Indexing is 1-based and returns a single-character string: + +``` +"SALVTE"[I] @> "S" +"SALVTE"[III] @> "L" +``` + +Slicing uses `VSQVE` with inclusive bounds, returning a substring: + +``` +"SALVTE"[II VSQVE IV] @> "ALV" +``` + Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2). ### Integers diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index 1168909..85b59a6 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -833,6 +833,17 @@ class ArrayIndex(Node): if k not in d: raise CentvrionError(f"Key not found in dict") return vtable, d[k] + if isinstance(array, ValStr): + if isinstance(index, ValInt): + i = index.value() + elif isinstance(index, ValFrac) and index.value().denominator == 1: + i = index.value().numerator + else: + raise CentvrionError("String index must be a number") + s = array.value() + if i < 1 or i > len(s): + raise CentvrionError(f"Index {i} out of range for string of length {len(s)}") + return vtable, ValStr(s[i - 1]) if not isinstance(array, ValList): raise CentvrionError("Cannot index a non-array value") if isinstance(index, ValInt): @@ -877,6 +888,16 @@ class ArraySlice(Node): vtable, array = self.array.eval(vtable) vtable, from_val = self.from_index.eval(vtable) vtable, to_val = self.to_index.eval(vtable) + if isinstance(array, ValStr): + from_int = _to_index_int(from_val) + to_int = _to_index_int(to_val) + s = array.value() + if from_int < 1 or to_int > len(s) or from_int > to_int: + raise CentvrionError( + f"Slice [{from_int} VSQVE {to_int}] out of range" + f" for string of length {len(s)}" + ) + return vtable, ValStr(s[from_int - 1 : to_int]) if not isinstance(array, ValList): raise CentvrionError("Cannot slice a non-array value") from_int = _to_index_int(from_val) diff --git a/centvrion/compiler/runtime/cent_runtime.c b/centvrion/compiler/runtime/cent_runtime.c index c5d014e..0fa1b51 100644 --- a/centvrion/compiler/runtime/cent_runtime.c +++ b/centvrion/compiler/runtime/cent_runtime.c @@ -721,6 +721,22 @@ void cent_list_push(CentValue *lst, CentValue v) { CentValue cent_list_index(CentValue lst, CentValue idx) { if (lst.type == CENT_DICT) return cent_dict_get(lst, idx); + if (lst.type == CENT_STR) { + long i; + if (idx.type == CENT_INT) + i = idx.ival; + else if (idx.type == CENT_FRAC && idx.fval.den == 1) + i = idx.fval.num; + else + cent_type_error("string index must be an integer"); + long slen = (long)strlen(lst.sval); + if (i < 1 || i > slen) + cent_runtime_error("string index out of range"); + char *ch = cent_arena_alloc(cent_arena, 2); + ch[0] = lst.sval[i - 1]; + ch[1] = '\0'; + return cent_str(ch); + } if (lst.type != CENT_LIST) cent_type_error("index requires a list or dict"); long i; @@ -736,6 +752,20 @@ CentValue cent_list_index(CentValue lst, CentValue idx) { } CentValue cent_list_slice(CentValue lst, CentValue lo, CentValue hi) { + if (lst.type == CENT_STR) { + if (lo.type != CENT_INT || hi.type != CENT_INT) + cent_type_error("slice indices must be integers"); + long from = lo.ival; + long to = hi.ival; + long slen = (long)strlen(lst.sval); + if (from < 1 || to > slen || from > to) + cent_runtime_error("string slice out of range"); + int len = (int)(to - from + 1); + char *buf = cent_arena_alloc(cent_arena, len + 1); + memcpy(buf, lst.sval + from - 1, len); + buf[len] = '\0'; + return cent_str(buf); + } if (lst.type != CENT_LIST) cent_type_error("slice requires a list"); if (lo.type != CENT_INT || hi.type != CENT_INT) diff --git a/language/main.tex b/language/main.tex index b64a1d2..b58bb9f 100644 --- a/language/main.tex +++ b/language/main.tex @@ -98,7 +98,7 @@ \item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. Available modules: \texttt{FORS} (randomness), \texttt{FRACTIO} (fractions), \texttt{MAGNVM} (large integers), \texttt{SCRIPTA} (file I/O: \texttt{LEGE}, \texttt{SCRIBE}, \texttt{ADIVNGE}), \texttt{SVBNVLLA} (negative literals). \item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w. \item \textbf{builtin}: \\ Builtin functions are uppercase latin words. - \item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. + \item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. Strings support 1-based indexing (\texttt{string[I]}) and inclusive slicing (\texttt{string[I VSQVE III]}), returning single-character strings and substrings respectively. \item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces. \item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM. \item \textbf{bool}: \\ VERITAS or FALSITAS. diff --git a/tests.py b/tests.py index fbd20cc..9fee0f3 100644 --- a/tests.py +++ b/tests.py @@ -639,6 +639,10 @@ error_tests = [ ("I * \"hello\"", CentvrionError), # multiplication with string ("\"hello\" MINVS \"world\"", CentvrionError), # comparison with strings ("I[I]", CentvrionError), # indexing a non-array + ('"SALVTE"[VII]', CentvrionError), # string index out of range + ('"SALVTE"[NVLLVS]', CentvrionError), # string index with non-integer + ('"SALVTE"[II VSQVE VII]', CentvrionError), # string slice out of range + ('"SALVTE"[III VSQVE II]', CentvrionError), # string slice from > to ("DESIGNA x VT I\nDESIGNA x[I] VT II", CentvrionError), # index-assign to non-array ("SEMEN(I)", CentvrionError), # requires FORS module ('CVM FORS\nSEMEN("abc")', CentvrionError), # SEMEN requires integer seed @@ -1583,6 +1587,88 @@ class TestArraySlice(unittest.TestCase): run_test(self, source, nodes, value) +# --- String indexing --- + +string_index_tests = [ + # first character + ('"SALVTE"[I]', + Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("I")))]), + ValStr("S")), + # last character + ('"SALVTE"[VI]', + Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("VI")))]), + ValStr("E")), + # middle character + ('"SALVTE"[III]', + Program([], [ExpressionStatement(ArrayIndex(String("SALVTE"), Numeral("III")))]), + ValStr("L")), + # string index via variable + ('DESIGNA s VT "SALVTE"\ns[II]', + Program([], [ + Designa(ID("s"), String("SALVTE")), + ExpressionStatement(ArrayIndex(ID("s"), Numeral("II"))), + ]), + ValStr("A")), + # expression as index + ('"SALVTE"[I + II]', + Program([], [ExpressionStatement(ArrayIndex( + String("SALVTE"), + BinOp(Numeral("I"), Numeral("II"), "SYMBOL_PLUS")))]), + ValStr("L")), +] + +class TestStringIndex(unittest.TestCase): + @parameterized.expand(string_index_tests) + def test_string_index(self, source, nodes, value): + run_test(self, source, nodes, value) + + +# --- String slicing --- + +string_slice_tests = [ + # substring from middle + ('"SALVTE"[II VSQVE IV]', + Program([], [ExpressionStatement(ArraySlice( + String("SALVTE"), Numeral("II"), Numeral("IV")))]), + ValStr("ALV")), + # full string slice + ('"SALVTE"[I VSQVE VI]', + Program([], [ExpressionStatement(ArraySlice( + String("SALVTE"), Numeral("I"), Numeral("VI")))]), + ValStr("SALVTE")), + # single-char slice + ('"SALVTE"[III VSQVE III]', + Program([], [ExpressionStatement(ArraySlice( + String("SALVTE"), Numeral("III"), Numeral("III")))]), + ValStr("L")), + # slice on variable + ('DESIGNA s VT "SALVTE"\ns[II VSQVE IV]', + Program([], [ + Designa(ID("s"), String("SALVTE")), + ExpressionStatement(ArraySlice(ID("s"), Numeral("II"), Numeral("IV"))), + ]), + ValStr("ALV")), + # chaining: slice then index + ('"SALVTE"[I VSQVE III][II]', + Program([], [ExpressionStatement(ArrayIndex( + ArraySlice(String("SALVTE"), Numeral("I"), Numeral("III")), + Numeral("II")))]), + ValStr("A")), + # expression as slice bounds + ('"SALVTE"[I + I VSQVE II + II]', + Program([], [ExpressionStatement(ArraySlice( + String("SALVTE"), + BinOp(Numeral("I"), Numeral("I"), "SYMBOL_PLUS"), + BinOp(Numeral("II"), Numeral("II"), "SYMBOL_PLUS")))]), + ValStr("ALV")), +] + +class TestStringSlice(unittest.TestCase): + @parameterized.expand(string_slice_tests) + def test_string_slice(self, source, nodes, value): + run_test(self, source, nodes, value) + + # --- Comments --- comment_tests = [