From 60fe69173162954064c462ddd3e787f92721c1d9 Mon Sep 17 00:00:00 2001 From: NikolajDanger Date: Wed, 22 Apr 2026 15:35:51 +0200 Subject: [PATCH] :goat: Array concat --- README.md | 13 +++++++ centvrion/ast_nodes.py | 6 ++- centvrion/compiler/emit_expr.py | 1 + centvrion/compiler/runtime/cent_runtime.c | 12 ++++++ centvrion/compiler/runtime/cent_runtime.h | 1 + centvrion/lexer.py | 1 + centvrion/parser.py | 3 +- language/main.tex | 2 +- snippets/syntaxes/centvrion.sublime-syntax | 2 +- tests.py | 37 +++++++++++++++++++ .../syntaxes/cent.tmLanguage.json | 2 +- 11 files changed, 75 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 1415d9a..e40bb48 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,19 @@ Individual elements can be accessed by index using square brackets. Indexing is > I ``` +Arrays are concatenated with `@`: + +``` +DESIGNA x VT [I, II, III] @ [IV, V] +DIC x +``` + +``` +> [I, II, III, IV, V] +``` + +Both operands must be arrays — using `@` on non-arrays raises an error. + A sub-array can be extracted with `VSQVE` inside the index brackets. Both bounds are inclusive and 1-based: ![Array slicing](snippets/array_slice.png) diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index edfdd2e..59382e4 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -59,7 +59,7 @@ def rep_join(l): OP_STR = { "SYMBOL_PLUS": "+", "SYMBOL_MINUS": "-", "SYMBOL_TIMES": "*", "SYMBOL_DIVIDE": "/", - "SYMBOL_AMPERSAND": "&", + "SYMBOL_AMPERSAND": "&", "SYMBOL_AT": "@", "KEYWORD_RELIQVVM": "RELIQVVM", "KEYWORD_EST": "EST", "KEYWORD_DISPAR": "DISPAR", "KEYWORD_MINVS": "MINVS", @@ -833,6 +833,10 @@ class BinOp(Node): return vtable, ValNul() result = (lv or 0) + (rv or 0) return vtable, ValFrac(result) if isinstance(result, Fraction) else ValInt(result) + case "SYMBOL_AT": + if not isinstance(left, ValList) or not isinstance(right, ValList): + raise CentvrionError("@ requires two arrays") + return vtable, ValList(list(lv) + list(rv)) case "SYMBOL_AMPERSAND": magnvm = "MAGNVM" in vtable["#modules"] svbnvlla = "SVBNVLLA" in vtable["#modules"] diff --git a/centvrion/compiler/emit_expr.py b/centvrion/compiler/emit_expr.py index 5ef2ef2..0b2317a 100644 --- a/centvrion/compiler/emit_expr.py +++ b/centvrion/compiler/emit_expr.py @@ -13,6 +13,7 @@ _BINOP_FN = { "SYMBOL_TIMES": "cent_mul", "SYMBOL_DIVIDE": "cent_div", "SYMBOL_AMPERSAND": "cent_concat", + "SYMBOL_AT": "cent_array_concat", "KEYWORD_RELIQVVM": "cent_mod", "KEYWORD_EST": "cent_eq", "KEYWORD_DISPAR": "cent_neq", diff --git a/centvrion/compiler/runtime/cent_runtime.c b/centvrion/compiler/runtime/cent_runtime.c index 50ea227..3442691 100644 --- a/centvrion/compiler/runtime/cent_runtime.c +++ b/centvrion/compiler/runtime/cent_runtime.c @@ -409,6 +409,18 @@ CentValue cent_add(CentValue a, CentValue b) { return cent_null(); } +CentValue cent_array_concat(CentValue a, CentValue b) { + if (a.type != CENT_LIST || b.type != CENT_LIST) + cent_type_error("'@' requires two arrays"); + int total = a.lval.len + b.lval.len; + CentValue result = cent_list_new(total); + for (int i = 0; i < a.lval.len; i++) + cent_list_push(&result, a.lval.items[i]); + for (int i = 0; i < b.lval.len; i++) + cent_list_push(&result, b.lval.items[i]); + return result; +} + CentValue cent_concat(CentValue a, CentValue b) { const char *sa = (a.type == CENT_NULL) ? "" : cent_make_string(a); const char *sb = (b.type == CENT_NULL) ? "" : cent_make_string(b); diff --git a/centvrion/compiler/runtime/cent_runtime.h b/centvrion/compiler/runtime/cent_runtime.h index 15b7b90..237f509 100644 --- a/centvrion/compiler/runtime/cent_runtime.h +++ b/centvrion/compiler/runtime/cent_runtime.h @@ -198,6 +198,7 @@ char *cent_make_string(CentValue v); /* ------------------------------------------------------------------ */ CentValue cent_add(CentValue a, CentValue b); /* INT+INT or FRAC+FRAC/INT */ +CentValue cent_array_concat(CentValue a, CentValue b); /* @ operator: concatenate two arrays */ CentValue cent_concat(CentValue a, CentValue b); /* & operator: coerce all types to str */ CentValue cent_sub(CentValue a, CentValue b); /* INT-INT or FRAC-FRAC/INT */ CentValue cent_mul(CentValue a, CentValue b); /* INT*INT or FRAC*FRAC/INT */ diff --git a/centvrion/lexer.py b/centvrion/lexer.py index ff0ea41..48dcc77 100644 --- a/centvrion/lexer.py +++ b/centvrion/lexer.py @@ -94,6 +94,7 @@ symbol_tokens = [ ("SYMBOL_TIMES", r"\*"), ("SYMBOL_DIVIDE", r"\/"), ("SYMBOL_AMPERSAND", r"&"), + ("SYMBOL_AT", r"@"), ("SYMBOL_COMMA", r",") ] diff --git a/centvrion/parser.py b/centvrion/parser.py index e041229..cfa20ce 100644 --- a/centvrion/parser.py +++ b/centvrion/parser.py @@ -116,7 +116,7 @@ class Parser(): ('left', ["KEYWORD_AVT"]), ('left', ["KEYWORD_ET"]), ('left', ["KEYWORD_PLVS", "KEYWORD_MINVS", "KEYWORD_EST", "KEYWORD_DISPAR"]), - ('left', ["SYMBOL_AMPERSAND", "SYMBOL_PLUS", "SYMBOL_MINUS"]), + ('left', ["SYMBOL_AMPERSAND", "SYMBOL_AT", "SYMBOL_PLUS", "SYMBOL_MINUS"]), ('left', ["SYMBOL_TIMES", "SYMBOL_DIVIDE", "KEYWORD_RELIQVVM"]), ('right', ["UMINUS", "UNOT"]), ('left', ["SYMBOL_LBRACKET", "INDEX"]), @@ -311,6 +311,7 @@ class Parser(): def expression_nullus(_): return ast_nodes.Nullus() + @self.pg.production('expression : expression SYMBOL_AT expression') @self.pg.production('expression : expression SYMBOL_AMPERSAND expression') @self.pg.production('expression : expression SYMBOL_MINUS expression') @self.pg.production('expression : expression SYMBOL_PLUS expression') diff --git a/language/main.tex b/language/main.tex index d2bbe35..b73065a 100644 --- a/language/main.tex +++ b/language/main.tex @@ -105,7 +105,7 @@ \item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces. \item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM. \item \textbf{bool}: \\ VERITAS or FALSITAS. - \item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation). + \item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation), \texttt{@} (array concatenation). \item \textbf{unop}: \\ Unary operators: \texttt{-} (negation), \texttt{NON} (boolean not). \end{itemize} diff --git a/snippets/syntaxes/centvrion.sublime-syntax b/snippets/syntaxes/centvrion.sublime-syntax index 91fac95..56d1be0 100644 --- a/snippets/syntaxes/centvrion.sublime-syntax +++ b/snippets/syntaxes/centvrion.sublime-syntax @@ -82,7 +82,7 @@ contexts: scope: keyword.control.centvrion operators: - - match: '[+\-*/&]' + - match: '[+\-*/&@]' scope: keyword.operator.centvrion identifiers: diff --git a/tests.py b/tests.py index d38b5a3..3ecbce0 100644 --- a/tests.py +++ b/tests.py @@ -701,6 +701,9 @@ error_tests = [ ("FALSITAS AVT NVLLVS", CentvrionError), # no short-circuit: right side evaluated, NVLLVS not boolean ("VERITAS ET NVLLVS", CentvrionError), # no short-circuit: right side evaluated, NVLLVS not boolean ("NVLLVS ET VERITAS", CentvrionError), # NVLLVS cannot be used as boolean in ET + ('I @ [II]', CentvrionError), # @ requires two arrays (int @ array) + ('[I] @ "hello"', CentvrionError), # @ requires two arrays (array @ string) + ('"a" @ "b"', CentvrionError), # @ requires two arrays (string @ string) ('"hello" + " world"', CentvrionError), # use & for string concatenation, not + ("[I, II][III]", CentvrionError), # index too high ("CVM SVBNVLLA\n[I, II][-I]", CentvrionError), # negative index @@ -1031,6 +1034,40 @@ class TestArithmeticEdge(unittest.TestCase): run_test(self, source, nodes, value) +# --- Array concatenation --- + +array_concat_tests = [ + ("[I, II] @ [III, IV]", + Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I"), Numeral("II")]), DataArray([Numeral("III"), Numeral("IV")]), "SYMBOL_AT"))]), + ValList([ValInt(1), ValInt(2), ValInt(3), ValInt(4)])), + ("[] @ [I]", + Program([], [ExpressionStatement(BinOp(DataArray([]), DataArray([Numeral("I")]), "SYMBOL_AT"))]), + ValList([ValInt(1)])), + ("[I] @ []", + Program([], [ExpressionStatement(BinOp(DataArray([Numeral("I")]), DataArray([]), "SYMBOL_AT"))]), + ValList([ValInt(1)])), + ("[] @ []", + Program([], [ExpressionStatement(BinOp(DataArray([]), DataArray([]), "SYMBOL_AT"))]), + ValList([])), + ('["a"] @ [I]', + Program([], [ExpressionStatement(BinOp(DataArray([String("a")]), DataArray([Numeral("I")]), "SYMBOL_AT"))]), + ValList([ValStr("a"), ValInt(1)])), + # left-associative chaining + ("[I] @ [II] @ [III]", + Program([], [ExpressionStatement(BinOp(BinOp(DataArray([Numeral("I")]), DataArray([Numeral("II")]), "SYMBOL_AT"), DataArray([Numeral("III")]), "SYMBOL_AT"))]), + ValList([ValInt(1), ValInt(2), ValInt(3)])), + # concat with variable + ("DESIGNA a VT [I, II]\nDESIGNA b VT [III]\na @ b", + Program([], [Designa(ID("a"), DataArray([Numeral("I"), Numeral("II")])), Designa(ID("b"), DataArray([Numeral("III")])), ExpressionStatement(BinOp(ID("a"), ID("b"), "SYMBOL_AT"))]), + ValList([ValInt(1), ValInt(2), ValInt(3)])), +] + +class TestArrayConcat(unittest.TestCase): + @parameterized.expand(array_concat_tests) + def test_array_concat(self, source, nodes, value): + run_test(self, source, nodes, value) + + # --- String concatenation --- string_concat_tests = [ diff --git a/vscode-extension/syntaxes/cent.tmLanguage.json b/vscode-extension/syntaxes/cent.tmLanguage.json index e9d2634..75d07f7 100644 --- a/vscode-extension/syntaxes/cent.tmLanguage.json +++ b/vscode-extension/syntaxes/cent.tmLanguage.json @@ -57,7 +57,7 @@ }, { "name": "keyword.operator.arithmetic.cent", - "match": "(\\*|\\+|-|/|&)" + "match": "(\\*|\\+|-|/|&|@)" } ] },