From dbaf01b6a38796260e592ae7a087f7e069fccf2d Mon Sep 17 00:00:00 2001 From: NikolajDanger Date: Fri, 24 Apr 2026 18:10:50 +0200 Subject: [PATCH] :goat: String uppercase/lowercase functions --- README.md | 10 ++++++ centvrion/ast_nodes.py | 16 +++++++++ centvrion/compiler/emit_expr.py | 14 ++++++++ centvrion/compiler/runtime/cent_runtime.c | 24 +++++++++++++ centvrion/compiler/runtime/cent_runtime.h | 2 ++ centvrion/lexer.py | 4 ++- snippets/syntaxes/centvrion.sublime-syntax | 2 +- tests.py | 36 +++++++++++++++++++ vscode-extension/snippets/cent.json | 2 ++ .../syntaxes/cent.tmLanguage.json | 2 +- 10 files changed, 109 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 932a7d2..e9a76b3 100644 --- a/README.md +++ b/README.md @@ -391,6 +391,16 @@ Replaces all non-overlapping matches of the regex `pattern` in `string` with `re Splits `string` by `delimiter` and returns an array of substrings. Both arguments must be strings. If the delimiter is not found, returns a single-element array containing the original string. If the delimiter is an empty string, splits into individual characters. +### MAIVSCVLA +`MAIVSCVLA(string)` + +Returns a new string with every ASCII letter `a`–`z` replaced by its uppercase counterpart `A`–`Z`. All other bytes (digits, punctuation, non-ASCII) pass through unchanged. + +### MINVSCVLA +`MINVSCVLA(string)` + +Returns a new string with every ASCII letter `A`–`Z` replaced by its lowercase counterpart `a`–`z`. All other bytes (digits, punctuation, non-ASCII) pass through unchanged. + ## Modules Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index 38e9d81..76a1515 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -1445,6 +1445,22 @@ class BuiltIn(Node): if len(params) != 1: raise CentvrionError("LITTERA takes exactly I argument") return vtable, ValStr(make_string(params[0], magnvm, svbnvlla)) + case "MAIVSCVLA": + if len(params) != 1: + raise CentvrionError("MAIVSCVLA takes exactly I argument") + val = params[0] + if not isinstance(val, ValStr): + raise CentvrionError(f"MAIVSCVLA expects a string, got {type(val).__name__}") + s = val.value() + return vtable, ValStr("".join(chr(ord(c) - 32) if "a" <= c <= "z" else c for c in s)) + case "MINVSCVLA": + if len(params) != 1: + raise CentvrionError("MINVSCVLA takes exactly I argument") + val = params[0] + if not isinstance(val, ValStr): + raise CentvrionError(f"MINVSCVLA expects a string, got {type(val).__name__}") + s = val.value() + return vtable, ValStr("".join(chr(ord(c) + 32) if "A" <= c <= "Z" else c for c in s)) case "CLAVES": if not isinstance(params[0], ValDict): raise CentvrionError("CLAVES requires a dict") diff --git a/centvrion/compiler/emit_expr.py b/centvrion/compiler/emit_expr.py index 751c372..44ee431 100644 --- a/centvrion/compiler/emit_expr.py +++ b/centvrion/compiler/emit_expr.py @@ -240,6 +240,20 @@ def _emit_builtin(node, ctx): case "LITTERA": lines.append(f"CentValue {tmp} = cent_littera({param_vars[0]});") + case "MAIVSCVLA": + if len(param_vars) != 1: + lines.append(f'cent_runtime_error("MAIVSCVLA takes exactly I argument");') + lines.append(f"CentValue {tmp} = cent_null();") + else: + lines.append(f"CentValue {tmp} = cent_maivscvla({param_vars[0]});") + + case "MINVSCVLA": + if len(param_vars) != 1: + lines.append(f'cent_runtime_error("MINVSCVLA takes exactly I argument");') + lines.append(f"CentValue {tmp} = cent_null();") + else: + lines.append(f"CentValue {tmp} = cent_minvscvla({param_vars[0]});") + case "FORTVITVS_NVMERVS": if not ctx.has_module("FORS"): lines.append('cent_runtime_error("FORS module required for FORTVITVS_NVMERVS");') diff --git a/centvrion/compiler/runtime/cent_runtime.c b/centvrion/compiler/runtime/cent_runtime.c index 10c6232..254da4b 100644 --- a/centvrion/compiler/runtime/cent_runtime.c +++ b/centvrion/compiler/runtime/cent_runtime.c @@ -671,6 +671,30 @@ CentValue cent_littera(CentValue v) { return cent_str(cent_make_string(v)); } +CentValue cent_maivscvla(CentValue v) { + if (v.type != CENT_STR) cent_type_error("'MAIVSCVLA' requires a string"); + size_t len = strlen(v.sval); + char *out = cent_arena_alloc(cent_arena, len + 1); + for (size_t i = 0; i < len; i++) { + unsigned char c = (unsigned char)v.sval[i]; + out[i] = (c >= 'a' && c <= 'z') ? (char)(c - ('a' - 'A')) : (char)c; + } + out[len] = '\0'; + return cent_str(out); +} + +CentValue cent_minvscvla(CentValue v) { + if (v.type != CENT_STR) cent_type_error("'MINVSCVLA' requires a string"); + size_t len = strlen(v.sval); + char *out = cent_arena_alloc(cent_arena, len + 1); + for (size_t i = 0; i < len; i++) { + unsigned char c = (unsigned char)v.sval[i]; + out[i] = (c >= 'A' && c <= 'Z') ? (char)(c + ('a' - 'A')) : (char)c; + } + out[len] = '\0'; + return cent_str(out); +} + CentValue cent_typvs(CentValue v) { switch (v.type) { case CENT_INT: return cent_str("NVMERVS"); diff --git a/centvrion/compiler/runtime/cent_runtime.h b/centvrion/compiler/runtime/cent_runtime.h index 22ef77d..d97cc27 100644 --- a/centvrion/compiler/runtime/cent_runtime.h +++ b/centvrion/compiler/runtime/cent_runtime.h @@ -228,6 +228,8 @@ CentValue cent_avdi(void); /* AVDI */ CentValue cent_avdi_numerus(void); /* AVDI_NVMERVS */ CentValue cent_longitudo(CentValue v); /* LONGITVDO */ CentValue cent_littera(CentValue v); /* LITTERA */ +CentValue cent_maivscvla(CentValue v); /* MAIVSCVLA */ +CentValue cent_minvscvla(CentValue v); /* MINVSCVLA */ CentValue cent_fortuitus_numerus(CentValue lo, CentValue hi); /* FORTVITVS_NVMERVS */ CentValue cent_fortuita_electionis(CentValue lst); /* FORTVITA_ELECTIO */ CentValue cent_decimatio(CentValue lst); /* DECIMATIO */ diff --git a/centvrion/lexer.py b/centvrion/lexer.py index 09b1523..1370f44 100644 --- a/centvrion/lexer.py +++ b/centvrion/lexer.py @@ -56,6 +56,8 @@ builtin_tokens = [("BUILTIN", i) for i in [ "FORTVITA_ELECTIO", "LITTERA", "LONGITVDO", + "MAIVSCVLA", + "MINVSCVLA", "NVMERVS", "ORDINA", "SEMEN", @@ -108,8 +110,8 @@ whitespace_tokens = [ ] all_tokens = ( - keyword_tokens + builtin_tokens + + keyword_tokens + module_tokens + data_tokens + symbol_tokens + diff --git a/snippets/syntaxes/centvrion.sublime-syntax b/snippets/syntaxes/centvrion.sublime-syntax index 2bcd36a..eafe5e0 100644 --- a/snippets/syntaxes/centvrion.sublime-syntax +++ b/snippets/syntaxes/centvrion.sublime-syntax @@ -70,7 +70,7 @@ contexts: scope: constant.language.centvrion builtins: - - match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LITTERA|LONGITVDO|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\b' + - match: '\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\b' scope: support.function.builtin.centvrion modules: diff --git a/tests.py b/tests.py index 9143e4e..bcc02d0 100644 --- a/tests.py +++ b/tests.py @@ -740,6 +740,36 @@ builtin_tests = [ ('SCINDE(",a,", ",")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String(",a,"), String(",")]))]), ValList([ValStr(""), ValStr("a"), ValStr("")])), # SCINDE: empty delimiter (split into chars) ('SCINDE("abc", "")', Program([], [ExpressionStatement(BuiltIn("SCINDE", [String("abc"), String("")]))]), ValList([ValStr("a"), ValStr("b"), ValStr("c")])), + # MAIVSCVLA: basic lowercase→uppercase + ('MAIVSCVLA("hello")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("hello")]))]), ValStr("HELLO")), + # MAIVSCVLA: mixed case + ('MAIVSCVLA("HeLLo")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("HeLLo")]))]), ValStr("HELLO")), + # MAIVSCVLA: already uppercase (idempotence) + ('MAIVSCVLA("HELLO")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("HELLO")]))]), ValStr("HELLO")), + # MAIVSCVLA: empty string + ('MAIVSCVLA("")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("")]))]), ValStr("")), + # MAIVSCVLA: Roman-numeral-shaped ASCII (case-only, not numeral-aware) + ('MAIVSCVLA("xii")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("xii")]))]), ValStr("XII")), + # MAIVSCVLA: non-alphabetic chars unchanged + ('MAIVSCVLA("a,b!1")', Program([], [ExpressionStatement(BuiltIn("MAIVSCVLA", [String("a,b!1")]))]), ValStr("A,B!1")), + # MAIVSCVLA: via variable + ('DESIGNA s VT "foo"\nDIC(MAIVSCVLA(s))', Program([], [Designa(ID("s"), String("foo")), ExpressionStatement(BuiltIn("DIC", [BuiltIn("MAIVSCVLA", [ID("s")])]))]), ValStr("FOO"), "FOO\n"), + # MAIVSCVLA: concatenated with & + ('MAIVSCVLA("hi") & "!"', Program([], [ExpressionStatement(BinOp(BuiltIn("MAIVSCVLA", [String("hi")]), String("!"), "SYMBOL_AMPERSAND"))]), ValStr("HI!")), + # MINVSCVLA: basic uppercase→lowercase + ('MINVSCVLA("HELLO")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("HELLO")]))]), ValStr("hello")), + # MINVSCVLA: mixed case + ('MINVSCVLA("HeLLo")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("HeLLo")]))]), ValStr("hello")), + # MINVSCVLA: already lowercase (idempotence) + ('MINVSCVLA("hello")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("hello")]))]), ValStr("hello")), + # MINVSCVLA: empty string + ('MINVSCVLA("")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("")]))]), ValStr("")), + # MINVSCVLA: Roman-numeral-shaped ASCII (case-only, not numeral-aware) + ('MINVSCVLA("XII")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("XII")]))]), ValStr("xii")), + # MINVSCVLA: non-alphabetic chars unchanged + ('MINVSCVLA("A,B!1")', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [String("A,B!1")]))]), ValStr("a,b!1")), + # MINVSCVLA round-trips MAIVSCVLA on lowercase input + ('MINVSCVLA(MAIVSCVLA("hi"))', Program([], [ExpressionStatement(BuiltIn("MINVSCVLA", [BuiltIn("MAIVSCVLA", [String("hi")])]))]), ValStr("hi")), ] class TestBuiltins(unittest.TestCase): @@ -850,6 +880,12 @@ error_tests = [ ("QVAERE('a{3}', 'aaa')", CentvrionError), # Arabic quantifier in pattern ('SCINDE(I, ",")', CentvrionError), # SCINDE requires strings, not int ('SCINDE("a", I)', CentvrionError), # SCINDE requires strings, not int delimiter + ('MAIVSCVLA(I)', CentvrionError), # MAIVSCVLA requires a string, not int + ('MAIVSCVLA()', CentvrionError), # MAIVSCVLA requires exactly 1 arg + ('MAIVSCVLA("a", "b")', CentvrionError), # MAIVSCVLA too many args + ('MINVSCVLA(I)', CentvrionError), # MINVSCVLA requires a string, not int + ('MINVSCVLA()', CentvrionError), # MINVSCVLA requires exactly 1 arg + ('MINVSCVLA("a", "b")', CentvrionError), # MINVSCVLA too many args ('PETE("http://example.com")', CentvrionError), # RETE required for PETE ('CVM RETE\nPETE(I)', CentvrionError), # PETE requires a string URL ('PETITVR("/", FVNCTIO (r) VT {\nREDI("hi")\n})', CentvrionError), # RETE required for PETITVR diff --git a/vscode-extension/snippets/cent.json b/vscode-extension/snippets/cent.json index 9140b13..a16cdb4 100644 --- a/vscode-extension/snippets/cent.json +++ b/vscode-extension/snippets/cent.json @@ -74,6 +74,8 @@ "LEGE": { "prefix": "LEGE", "body": "LEGE", "description": "read file contents (SCRIPTA module)" }, "LITTERA": { "prefix": "LITTERA", "body": "LITTERA", "description": "coerce any value to its display string" }, "LONGITVDO": { "prefix": "LONGITVDO", "body": "LONGITVDO", "description": "length of array, string, or dict" }, + "MAIVSCVLA": { "prefix": "MAIVSCVLA", "body": "MAIVSCVLA", "description": "uppercase a string (ASCII a-z → A-Z)" }, + "MINVSCVLA": { "prefix": "MINVSCVLA", "body": "MINVSCVLA", "description": "lowercase a string (ASCII A-Z → a-z)" }, "NVMERVS": { "prefix": "NVMERVS", "body": "NVMERVS", "description": "parse a Roman numeral string to an integer" }, "ORDINA": { "prefix": "ORDINA", "body": "ORDINA", "description": "sort an array in ascending order" }, "PETE": { "prefix": "PETE", "body": "PETE", "description": "HTTP GET request (RETE module)" }, diff --git a/vscode-extension/syntaxes/cent.tmLanguage.json b/vscode-extension/syntaxes/cent.tmLanguage.json index aac2132..0a4bdac 100644 --- a/vscode-extension/syntaxes/cent.tmLanguage.json +++ b/vscode-extension/syntaxes/cent.tmLanguage.json @@ -65,7 +65,7 @@ "patterns": [ { "name": "support.function.builtin.cent", - "match": "\\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LITTERA|LONGITVDO|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\\b" + "match": "\\b(ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TYPVS)\\b" } ] },