diff --git a/README.md b/README.md index 63c350e..6d643aa 100644 --- a/README.md +++ b/README.md @@ -65,6 +65,21 @@ Strings are concatenated with `&`: `NVLLVS` coerces to an empty string when used with `&`. Note: `+` is for arithmetic only — using it on strings raises an error. +#### String Interpolation + +Double-quoted strings support interpolation with `{expression}`: + +``` +DESIGNA nomen VT "Marcus" +DICE("Salve, {nomen}!") // → Salve, Marcus! +DICE("Sum: {III + IV}") // → Sum: VII +DICE("{nomen} has {V} cats") // → Marcus has V cats +``` + +Any expression can appear inside `{}`. Values are coerced to strings the same way as with `&` (integers become Roman numerals, booleans become `VERITAS`/`FALSITAS`, etc.). + +Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"` → `use {braces}`. + Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2). ### Integers diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index 02d71a8..97d5a62 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -275,6 +275,7 @@ class DataRangeArray(Node): class String(Node): def __init__(self, value) -> None: self.value = value + self.quote = '"' def __eq__(self, other): return type(self) == type(other) and self.value == other.value @@ -283,12 +284,60 @@ class String(Node): return f"String({self.value})" def print(self): - return f'"{self.value}"' + if self.quote == "'": + return f"'{self.value}'" + escaped = self.value.replace('{', '{{').replace('}', '}}') + return f'"{escaped}"' def _eval(self, vtable): return vtable, ValStr(self.value) +def _flip_quotes(node, quote): + """Recursively set quote style on all String nodes in an expression tree.""" + if isinstance(node, String): + node.quote = quote + for attr in vars(node).values(): + if isinstance(attr, Node): + _flip_quotes(attr, quote) + elif isinstance(attr, list): + for item in attr: + if isinstance(item, Node): + _flip_quotes(item, quote) + + +class InterpolatedString(Node): + def __init__(self, parts) -> None: + self.parts = parts + + def __eq__(self, other): + return type(self) == type(other) and self.parts == other.parts + + def __repr__(self): + return f"InterpolatedString([{rep_join(self.parts)}])" + + def print(self): + result = '"' + for part in self.parts: + if isinstance(part, String): + result += part.value.replace('{', '{{').replace('}', '}}') + else: + _flip_quotes(part, "'") + result += '{' + part.print() + '}' + _flip_quotes(part, '"') + result += '"' + return result + + def _eval(self, vtable): + magnvm = "MAGNVM" in vtable["#modules"] + svbnvlla = "SVBNVLLA" in vtable["#modules"] + pieces = [] + for part in self.parts: + vtable, val = part.eval(vtable) + pieces.append(make_string(val, magnvm, svbnvlla)) + return vtable, ValStr(''.join(pieces)) + + class Numeral(Node): def __init__(self, value: str) -> None: self.value = value diff --git a/centvrion/compiler/emit_expr.py b/centvrion/compiler/emit_expr.py index 4c6c03a..af1a7d0 100644 --- a/centvrion/compiler/emit_expr.py +++ b/centvrion/compiler/emit_expr.py @@ -1,6 +1,6 @@ from centvrion.errors import CentvrionError from centvrion.ast_nodes import ( - String, Numeral, Fractio, Bool, Nullus, ID, + String, InterpolatedString, Numeral, Fractio, Bool, Nullus, ID, BinOp, UnaryMinus, UnaryNot, ArrayIndex, DataArray, DataRangeArray, BuiltIn, Invoca, @@ -51,6 +51,25 @@ def emit_expr(node, ctx): tmp = ctx.fresh_tmp() return [f'CentValue {tmp} = cent_str("{_escape(node.value)}");'], tmp + if isinstance(node, InterpolatedString): + if len(node.parts) == 0: + tmp = ctx.fresh_tmp() + return [f'CentValue {tmp} = cent_str("");'], tmp + if len(node.parts) == 1: + return emit_expr(node.parts[0], ctx) + l_lines, l_var = emit_expr(node.parts[0], ctx) + r_lines, r_var = emit_expr(node.parts[1], ctx) + lines = l_lines + r_lines + acc = ctx.fresh_tmp() + lines.append(f"CentValue {acc} = cent_concat({l_var}, {r_var});") + for part in node.parts[2:]: + p_lines, p_var = emit_expr(part, ctx) + lines.extend(p_lines) + new_acc = ctx.fresh_tmp() + lines.append(f"CentValue {new_acc} = cent_concat({acc}, {p_var});") + acc = new_acc + return lines, acc + if isinstance(node, Bool): tmp = ctx.fresh_tmp() v = "1" if node.value else "0" diff --git a/centvrion/parser.py b/centvrion/parser.py index 6c34d48..7ce4453 100644 --- a/centvrion/parser.py +++ b/centvrion/parser.py @@ -1,10 +1,71 @@ from rply import ParserGenerator -from centvrion.lexer import all_tokens +from centvrion.errors import CentvrionError +from centvrion.lexer import Lexer, all_tokens from . import ast_nodes ALL_TOKENS = list(set([i[0] for i in all_tokens])) + +def _parse_interpolated(raw_value): + quote_char = raw_value[0] + inner = raw_value[1:-1] + + if quote_char == "'" or len(inner) == 0: + return ast_nodes.String(inner) + + parts = [] + i = 0 + current = [] + + while i < len(inner): + ch = inner[i] + if ch == '{': + if i + 1 < len(inner) and inner[i + 1] == '{': + current.append('{') + i += 2 + continue + if current: + parts.append(ast_nodes.String(''.join(current))) + current = [] + j = i + 1 + depth = 1 + while j < len(inner) and depth > 0: + if inner[j] == '{': + depth += 1 + elif inner[j] == '}': + depth -= 1 + j += 1 + if depth != 0: + raise CentvrionError("Unclosed '{' in interpolated string") + expr_src = inner[i + 1:j - 1] + tokens = Lexer().get_lexer().lex(expr_src + "\n") + program = Parser().parse(tokens) + if len(program.statements) != 1: + raise CentvrionError("Interpolation must contain exactly one expression") + stmt = program.statements[0] + if not isinstance(stmt, ast_nodes.ExpressionStatement): + raise CentvrionError("Interpolation must contain an expression, not a statement") + parts.append(stmt.expression) + i = j + elif ch == '}': + if i + 1 < len(inner) and inner[i + 1] == '}': + current.append('}') + i += 2 + continue + raise CentvrionError("Unmatched '}' in string (use '}}' for literal '}')") + else: + current.append(ch) + i += 1 + + if current: + parts.append(ast_nodes.String(''.join(current))) + + if len(parts) == 1 and isinstance(parts[0], ast_nodes.String): + return parts[0] + + return ast_nodes.InterpolatedString(parts) + class Parser(): def __init__(self): self.pg = ParserGenerator( @@ -184,7 +245,7 @@ class Parser(): @self.pg.production('expression : DATA_STRING') def expression_string(tokens): - return ast_nodes.String(tokens[0].value[1:-1]) + return _parse_interpolated(tokens[0].value) @self.pg.production('expression : DATA_NUMERAL') def expression_numeral(tokens): diff --git a/language/main.tex b/language/main.tex index 304b9ea..8bf01d3 100644 --- a/language/main.tex +++ b/language/main.tex @@ -61,6 +61,7 @@ \languageline{expression}{\textit{expression} \textbf{binop} \textit{expression}} \\ \languageline{expression}{\textbf{unop} \textit{expression}} \\ \hline \languageline{literal}{\textbf{string}} \\ + \languageline{literal}{\textbf{interpolated-string}} \\ \languageline{literal}{\textbf{numeral}} \\ \languageline{literal}{\textbf{bool}} \\ \languageline{literal}{\texttt{[} \textit{optional-expressions} \texttt{]}} \\ @@ -88,7 +89,8 @@ \item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. \item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w. \item \textbf{builtin}: \\ Builtin functions are uppercase latin words. - \item \textbf{string}: \\ Any text encased in " characters. + \item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. + \item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces. \item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM. \item \textbf{bool}: \\ VERITAS or FALSITAS. \item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation). diff --git a/snippets/syntaxes/centvrion.sublime-syntax b/snippets/syntaxes/centvrion.sublime-syntax index 64934c4..aaf5f92 100644 --- a/snippets/syntaxes/centvrion.sublime-syntax +++ b/snippets/syntaxes/centvrion.sublime-syntax @@ -33,6 +33,19 @@ contexts: scope: string.quoted.double.centvrion push: - meta_scope: string.quoted.double.centvrion + - match: '\{\{' + scope: constant.character.escape.centvrion + - match: '\}\}' + scope: constant.character.escape.centvrion + - match: '\{' + scope: punctuation.section.interpolation.begin.centvrion + push: + - clear_scopes: 1 + - meta_scope: meta.interpolation.centvrion + - match: '\}' + scope: punctuation.section.interpolation.end.centvrion + pop: true + - include: main - match: '"' pop: true - match: "'" diff --git a/tests.py b/tests.py index 1068868..22c3747 100644 --- a/tests.py +++ b/tests.py @@ -12,9 +12,9 @@ from fractions import Fraction from centvrion.ast_nodes import ( ArrayIndex, Bool, BinOp, BuiltIn, DataArray, DataRangeArray, Defini, Continva, Designa, DesignaDestructure, DesignaIndex, DumStatement, Erumpe, - ExpressionStatement, ID, Invoca, ModuleCall, Nullus, Numeral, PerStatement, - Program, Redi, SiStatement, String, UnaryMinus, UnaryNot, - Fractio, frac_to_fraction, fraction_to_frac, + ExpressionStatement, ID, InterpolatedString, Invoca, ModuleCall, Nullus, + Numeral, PerStatement, Program, Redi, SiStatement, String, UnaryMinus, + UnaryNot, Fractio, frac_to_fraction, fraction_to_frac, num_to_int, int_to_num, make_string, ) from centvrion.compiler.emitter import compile_program @@ -881,6 +881,99 @@ class TestStringConcat(unittest.TestCase): run_test(self, source, nodes, value) +# --- String interpolation --- + +interpolation_tests = [ + # basic variable interpolation + ('DESIGNA nomen VT "Marcus"\n"Salve, {nomen}!"', + Program([], [ + Designa(ID("nomen"), String("Marcus")), + ExpressionStatement(InterpolatedString([String("Salve, "), ID("nomen"), String("!")])) + ]), ValStr("Salve, Marcus!")), + # arithmetic expression inside interpolation + ('DESIGNA x VT III\n"Sum: {x + II}"', + Program([], [ + Designa(ID("x"), Numeral("III")), + ExpressionStatement(InterpolatedString([String("Sum: "), BinOp(ID("x"), Numeral("II"), "SYMBOL_PLUS")])) + ]), ValStr("Sum: V")), + # multiple interpolations + ('DESIGNA a VT I\nDESIGNA b VT II\n"{a} + {b} = {a + b}"', + Program([], [ + Designa(ID("a"), Numeral("I")), + Designa(ID("b"), Numeral("II")), + ExpressionStatement(InterpolatedString([ + ID("a"), String(" + "), ID("b"), String(" = "), + BinOp(ID("a"), ID("b"), "SYMBOL_PLUS"), + ])) + ]), ValStr("I + II = III")), + # escaped braces become literal + ('"use {{braces}}"', + Program([], [ExpressionStatement(String("use {braces}"))]), + ValStr("use {braces}")), + # single-quoted strings ignore braces + ("'hello {world}'", + Program([], [ExpressionStatement(String("hello {world}"))]), + ValStr("hello {world}")), + # integer coercion + ('DESIGNA n VT V\n"n is {n}"', + Program([], [ + Designa(ID("n"), Numeral("V")), + ExpressionStatement(InterpolatedString([String("n is "), ID("n")])) + ]), ValStr("n is V")), + # boolean coercion + ('DESIGNA b VT VERITAS\n"value: {b}"', + Program([], [ + Designa(ID("b"), Bool(True)), + ExpressionStatement(InterpolatedString([String("value: "), ID("b")])) + ]), ValStr("value: VERITAS")), + # NVLLVS coercion + ('"value: {NVLLVS}"', + Program([], [ + ExpressionStatement(InterpolatedString([String("value: "), Nullus()])) + ]), ValStr("value: NVLLVS")), + # expression-only string (no literal parts around it) + ('DESIGNA x VT "hi"\n"{x}"', + Program([], [ + Designa(ID("x"), String("hi")), + ExpressionStatement(InterpolatedString([ID("x")])) + ]), ValStr("hi")), + # adjacent interpolations + ('DESIGNA a VT "x"\nDESIGNA b VT "y"\n"{a}{b}"', + Program([], [ + Designa(ID("a"), String("x")), + Designa(ID("b"), String("y")), + ExpressionStatement(InterpolatedString([ID("a"), ID("b")])) + ]), ValStr("xy")), + # function call inside interpolation + ("DEFINI f () VT {\nREDI (V)\n}\n\"result: {INVOCA f()}\"", + Program([], [ + Defini(ID("f"), [], [Redi([Numeral("V")])]), + ExpressionStatement(InterpolatedString([String("result: "), Invoca(ID("f"), [])])) + ]), ValStr("result: V")), + # single-quoted string inside interpolation + ("DESIGNA x VT 'hello'\n\"{x & '!'}\"", + Program([], [ + Designa(ID("x"), String("hello")), + ExpressionStatement(InterpolatedString([BinOp(ID("x"), String("!"), "SYMBOL_AMPERSAND")])) + ]), ValStr("hello!")), + # plain double-quoted string (no braces) still works + ('"hello world"', + Program([], [ExpressionStatement(String("hello world"))]), + ValStr("hello world")), + # interpolation in DICE output + ('DESIGNA name VT "Roma"\nDICE("Salve, {name}!")', + Program([], [ + Designa(ID("name"), String("Roma")), + ExpressionStatement(BuiltIn("DICE", [InterpolatedString([String("Salve, "), ID("name"), String("!")])])) + ]), ValStr("Salve, Roma!"), "Salve, Roma!\n"), +] + +class TestInterpolation(unittest.TestCase): + @parameterized.expand(interpolation_tests) + def test_interpolation(self, source, nodes, value, output=""): + run_test(self, source, nodes, value, output) + + # --- Comparison operators --- comparison_tests = [