From 25e88a636262ca8ab8a0cb5e6e53df74c70a590e Mon Sep 17 00:00:00 2001 From: NikolajDanger Date: Wed, 22 Apr 2026 11:48:54 +0200 Subject: [PATCH] :goat: SCINDE --- README.md | 5 +++ centvrion/ast_nodes.py | 12 +++++++ centvrion/compiler/emit_expr.py | 3 ++ centvrion/compiler/runtime/cent_runtime.c | 34 +++++++++++++++++++ centvrion/compiler/runtime/cent_runtime.h | 1 + centvrion/lexer.py | 1 + snippets/syntaxes/centvrion.sublime-syntax | 2 +- tests.py | 14 ++++++++ .../syntaxes/cent.tmLanguage.json | 2 +- 9 files changed, 72 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 493fb79..9d21987 100644 --- a/README.md +++ b/README.md @@ -359,6 +359,11 @@ Returns an array of all non-overlapping matches of the regex `pattern` in `strin Replaces all non-overlapping matches of the regex `pattern` in `string` with `replacement`. All three arguments must be strings. The replacement string supports backreferences (`\1`, `\2`, etc.) to captured groups. Returns the resulting string. Raises an error if the pattern is invalid. +### SCINDE +`SCINDE(string, delimiter)` + +Splits `string` by `delimiter` and returns an array of substrings. Both arguments must be strings. If the delimiter is not found, returns a single-element array containing the original string. If the delimiter is an empty string, splits into individual characters. + ## Modules Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index 4293a34..4a44f80 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -1328,6 +1328,18 @@ class BuiltIn(Node): except re.error as e: raise CentvrionError(f"Invalid regex: {e}") return vtable, ValStr(result) + case "SCINDE": + string = params[0] + delimiter = params[1] + if not isinstance(string, ValStr) or not isinstance(delimiter, ValStr): + raise CentvrionError("SCINDE requires two strings") + s = string.value() + d = delimiter.value() + if d == "": + parts = [ValStr(c) for c in s] + else: + parts = [ValStr(p) for p in s.split(d)] + return vtable, ValList(parts) case "PETE": if "RETE" not in vtable["#modules"]: raise CentvrionError("Cannot use 'PETE' without module 'RETE'") diff --git a/centvrion/compiler/emit_expr.py b/centvrion/compiler/emit_expr.py index 5872adb..38ae4b9 100644 --- a/centvrion/compiler/emit_expr.py +++ b/centvrion/compiler/emit_expr.py @@ -303,6 +303,9 @@ def _emit_builtin(node, ctx): case "SVBSTITVE": lines.append(f"CentValue {tmp} = cent_svbstitve({param_vars[0]}, {param_vars[1]}, {param_vars[2]});") + case "SCINDE": + lines.append(f"CentValue {tmp} = cent_scinde({param_vars[0]}, {param_vars[1]});") + case "PETE": if not ctx.has_module("RETE"): lines.append('cent_runtime_error("RETE module required for PETE");') diff --git a/centvrion/compiler/runtime/cent_runtime.c b/centvrion/compiler/runtime/cent_runtime.c index cef3f14..bbdb609 100644 --- a/centvrion/compiler/runtime/cent_runtime.c +++ b/centvrion/compiler/runtime/cent_runtime.c @@ -1015,6 +1015,40 @@ CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue tex return cent_str(result); } +CentValue cent_scinde(CentValue str, CentValue delim) { + if (str.type != CENT_STR || delim.type != CENT_STR) + cent_type_error("'SCINDE' requires two strings"); + const char *s = str.sval; + const char *d = delim.sval; + size_t dlen = strlen(d); + CentValue result = cent_list_new(8); + if (dlen == 0) { + /* empty delimiter: split into individual characters */ + for (const char *p = s; *p; p++) { + char *buf = cent_arena_alloc(cent_arena, 2); + buf[0] = *p; + buf[1] = '\0'; + cent_list_push(&result, cent_str(buf)); + } + return result; + } + const char *cursor = s; + for (;;) { + const char *found = strstr(cursor, d); + if (!found) { + cent_list_push(&result, cent_str(cursor)); + break; + } + size_t len = found - cursor; + char *buf = cent_arena_alloc(cent_arena, len + 1); + memcpy(buf, cursor, len); + buf[len] = '\0'; + cent_list_push(&result, cent_str(buf)); + cursor = found + dlen; + } + return result; +} + /* ------------------------------------------------------------------ */ /* Networking (RETE) */ /* ------------------------------------------------------------------ */ diff --git a/centvrion/compiler/runtime/cent_runtime.h b/centvrion/compiler/runtime/cent_runtime.h index f44b97c..876c123 100644 --- a/centvrion/compiler/runtime/cent_runtime.h +++ b/centvrion/compiler/runtime/cent_runtime.h @@ -234,6 +234,7 @@ void cent_scribe(CentValue path, CentValue content); /* SCRIBE */ void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */ CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */ CentValue cent_svbstitve(CentValue pattern, CentValue replacement, CentValue text); /* SVBSTITVE */ +CentValue cent_scinde(CentValue str, CentValue delim); /* SCINDE */ CentValue cent_pete(CentValue url); /* PETE */ void cent_petitvr(CentValue path, CentValue handler, CentScope scope); /* PETITVR */ void cent_avscvlta(CentValue port); /* AVSCVLTA */ diff --git a/centvrion/lexer.py b/centvrion/lexer.py index 42984d4..250fbc4 100644 --- a/centvrion/lexer.py +++ b/centvrion/lexer.py @@ -60,6 +60,7 @@ builtin_tokens = [("BUILTIN", i) for i in [ "ADIVNGE", "QVAERE", "SVBSTITVE", + "SCINDE", "PETE", "PETITVR", "AVSCVLTA" diff --git a/snippets/syntaxes/centvrion.sublime-syntax b/snippets/syntaxes/centvrion.sublime-syntax index 328892a..d17bac2 100644 --- a/snippets/syntaxes/centvrion.sublime-syntax +++ b/snippets/syntaxes/centvrion.sublime-syntax @@ -70,7 +70,7 @@ contexts: scope: constant.language.centvrion builtins: - - match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|PETE|PETITVR|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\b' + - match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\b' scope: support.function.builtin.centvrion modules: diff --git a/tests.py b/tests.py index 263a807..35dd624 100644 --- a/tests.py +++ b/tests.py @@ -638,6 +638,18 @@ builtin_tests = [ ('SVBSTITVE("(a)(b)", "\\2\\1", "ab")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)"), String("\\2\\1"), String("ab")]))]), ValStr("ba")), # SVBSTITVE: backreference with unmatched group (ignored) ('SVBSTITVE("(a)(b)?", "\\1\\2", "a")', Program([], [ExpressionStatement(BuiltIn("SVBSTITVE", [String("(a)(b)?"), String("\\1\\2"), String("a")]))]), ValStr("a")), + # SCINDE: basic split + ('SCINDE("a,b,c", ",")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String("a,b,c"), String(",")]))]), ValList([ValStr("a"), ValStr("b"), ValStr("c")])), + # SCINDE: no match (delimiter not found) + ('SCINDE("abc", ",")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String("abc"), String(",")]))]), ValList([ValStr("abc")])), + # SCINDE: empty string + ('SCINDE("", ",")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String(""), String(",")]))]), ValList([ValStr("")])), + # SCINDE: multi-char delimiter + ('SCINDE("a::b::c", "::")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String("a::b::c"), String("::")]))]), ValList([ValStr("a"), ValStr("b"), ValStr("c")])), + # SCINDE: delimiter at edges + ('SCINDE(",a,", ",")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String(",a,"), String(",")]))]), ValList([ValStr(""), ValStr("a"), ValStr("")])), + # SCINDE: empty delimiter (split into chars) + ('SCINDE("abc", "")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String("abc"), String("")]))]), ValList([ValStr("a"), ValStr("b"), ValStr("c")])), ] class TestBuiltins(unittest.TestCase): @@ -726,6 +738,8 @@ error_tests = [ ('SVBSTITVE("a", I, "c")', CentvrionError), # SVBSTITVE requires strings, not int replacement ('SVBSTITVE("a", "b", I)', CentvrionError), # SVBSTITVE requires strings, not int text ('SVBSTITVE("[", "b", "c")', CentvrionError), # SVBSTITVE invalid regex + ('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int + ('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter ('PETE("http://example.com")', CentvrionError), # RETE required for PETE ('CVM RETE\nPETE(I)', CentvrionError), # PETE requires a string URL ('PETITVR("/", FVNCTIO (r) VT {\nREDI("hi")\n})', CentvrionError), # RETE required for PETITVR diff --git a/vscode-extension/syntaxes/cent.tmLanguage.json b/vscode-extension/syntaxes/cent.tmLanguage.json index e68e2dc..3778311 100644 --- a/vscode-extension/syntaxes/cent.tmLanguage.json +++ b/vscode-extension/syntaxes/cent.tmLanguage.json @@ -65,7 +65,7 @@ "patterns": [ { "name": "support.function.builtin.cent", - "match": "\\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|PETE|PETITVR|QVAERE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\\b" + "match": "\\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LONGITVDO|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\\b" } ] },