🐐 String interpolation

This commit is contained in:
2026-04-21 16:29:40 +02:00
parent 0b8b7c086e
commit 264ea84dfc
7 changed files with 260 additions and 8 deletions

View File

@@ -65,6 +65,21 @@ Strings are concatenated with `&`:
`NVLLVS` coerces to an empty string when used with `&`. Note: `+` is for arithmetic only — using it on strings raises an error. `NVLLVS` coerces to an empty string when used with `&`. Note: `+` is for arithmetic only — using it on strings raises an error.
#### String Interpolation
Double-quoted strings support interpolation with `{expression}`:
```
DESIGNA nomen VT "Marcus"
DICE("Salve, {nomen}!") // → Salve, Marcus!
DICE("Sum: {III + IV}") // → Sum: VII
DICE("{nomen} has {V} cats") // → Marcus has V cats
```
Any expression can appear inside `{}`. Values are coerced to strings the same way as with `&` (integers become Roman numerals, booleans become `VERITAS`/`FALSITAS`, etc.).
Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"``use {braces}`.
Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2). Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2).
### Integers ### Integers

View File

@@ -275,6 +275,7 @@ class DataRangeArray(Node):
class String(Node): class String(Node):
def __init__(self, value) -> None: def __init__(self, value) -> None:
self.value = value self.value = value
self.quote = '"'
def __eq__(self, other): def __eq__(self, other):
return type(self) == type(other) and self.value == other.value return type(self) == type(other) and self.value == other.value
@@ -283,12 +284,60 @@ class String(Node):
return f"String({self.value})" return f"String({self.value})"
def print(self): def print(self):
return f'"{self.value}"' if self.quote == "'":
return f"'{self.value}'"
escaped = self.value.replace('{', '{{').replace('}', '}}')
return f'"{escaped}"'
def _eval(self, vtable): def _eval(self, vtable):
return vtable, ValStr(self.value) return vtable, ValStr(self.value)
def _flip_quotes(node, quote):
"""Recursively set quote style on all String nodes in an expression tree."""
if isinstance(node, String):
node.quote = quote
for attr in vars(node).values():
if isinstance(attr, Node):
_flip_quotes(attr, quote)
elif isinstance(attr, list):
for item in attr:
if isinstance(item, Node):
_flip_quotes(item, quote)
class InterpolatedString(Node):
def __init__(self, parts) -> None:
self.parts = parts
def __eq__(self, other):
return type(self) == type(other) and self.parts == other.parts
def __repr__(self):
return f"InterpolatedString([{rep_join(self.parts)}])"
def print(self):
result = '"'
for part in self.parts:
if isinstance(part, String):
result += part.value.replace('{', '{{').replace('}', '}}')
else:
_flip_quotes(part, "'")
result += '{' + part.print() + '}'
_flip_quotes(part, '"')
result += '"'
return result
def _eval(self, vtable):
magnvm = "MAGNVM" in vtable["#modules"]
svbnvlla = "SVBNVLLA" in vtable["#modules"]
pieces = []
for part in self.parts:
vtable, val = part.eval(vtable)
pieces.append(make_string(val, magnvm, svbnvlla))
return vtable, ValStr(''.join(pieces))
class Numeral(Node): class Numeral(Node):
def __init__(self, value: str) -> None: def __init__(self, value: str) -> None:
self.value = value self.value = value

View File

@@ -1,6 +1,6 @@
from centvrion.errors import CentvrionError from centvrion.errors import CentvrionError
from centvrion.ast_nodes import ( from centvrion.ast_nodes import (
String, Numeral, Fractio, Bool, Nullus, ID, String, InterpolatedString, Numeral, Fractio, Bool, Nullus, ID,
BinOp, UnaryMinus, UnaryNot, BinOp, UnaryMinus, UnaryNot,
ArrayIndex, DataArray, DataRangeArray, ArrayIndex, DataArray, DataRangeArray,
BuiltIn, Invoca, BuiltIn, Invoca,
@@ -51,6 +51,25 @@ def emit_expr(node, ctx):
tmp = ctx.fresh_tmp() tmp = ctx.fresh_tmp()
return [f'CentValue {tmp} = cent_str("{_escape(node.value)}");'], tmp return [f'CentValue {tmp} = cent_str("{_escape(node.value)}");'], tmp
if isinstance(node, InterpolatedString):
if len(node.parts) == 0:
tmp = ctx.fresh_tmp()
return [f'CentValue {tmp} = cent_str("");'], tmp
if len(node.parts) == 1:
return emit_expr(node.parts[0], ctx)
l_lines, l_var = emit_expr(node.parts[0], ctx)
r_lines, r_var = emit_expr(node.parts[1], ctx)
lines = l_lines + r_lines
acc = ctx.fresh_tmp()
lines.append(f"CentValue {acc} = cent_concat({l_var}, {r_var});")
for part in node.parts[2:]:
p_lines, p_var = emit_expr(part, ctx)
lines.extend(p_lines)
new_acc = ctx.fresh_tmp()
lines.append(f"CentValue {new_acc} = cent_concat({acc}, {p_var});")
acc = new_acc
return lines, acc
if isinstance(node, Bool): if isinstance(node, Bool):
tmp = ctx.fresh_tmp() tmp = ctx.fresh_tmp()
v = "1" if node.value else "0" v = "1" if node.value else "0"

View File

@@ -1,10 +1,71 @@
from rply import ParserGenerator from rply import ParserGenerator
from centvrion.lexer import all_tokens from centvrion.errors import CentvrionError
from centvrion.lexer import Lexer, all_tokens
from . import ast_nodes from . import ast_nodes
ALL_TOKENS = list(set([i[0] for i in all_tokens])) ALL_TOKENS = list(set([i[0] for i in all_tokens]))
def _parse_interpolated(raw_value):
quote_char = raw_value[0]
inner = raw_value[1:-1]
if quote_char == "'" or len(inner) == 0:
return ast_nodes.String(inner)
parts = []
i = 0
current = []
while i < len(inner):
ch = inner[i]
if ch == '{':
if i + 1 < len(inner) and inner[i + 1] == '{':
current.append('{')
i += 2
continue
if current:
parts.append(ast_nodes.String(''.join(current)))
current = []
j = i + 1
depth = 1
while j < len(inner) and depth > 0:
if inner[j] == '{':
depth += 1
elif inner[j] == '}':
depth -= 1
j += 1
if depth != 0:
raise CentvrionError("Unclosed '{' in interpolated string")
expr_src = inner[i + 1:j - 1]
tokens = Lexer().get_lexer().lex(expr_src + "\n")
program = Parser().parse(tokens)
if len(program.statements) != 1:
raise CentvrionError("Interpolation must contain exactly one expression")
stmt = program.statements[0]
if not isinstance(stmt, ast_nodes.ExpressionStatement):
raise CentvrionError("Interpolation must contain an expression, not a statement")
parts.append(stmt.expression)
i = j
elif ch == '}':
if i + 1 < len(inner) and inner[i + 1] == '}':
current.append('}')
i += 2
continue
raise CentvrionError("Unmatched '}' in string (use '}}' for literal '}')")
else:
current.append(ch)
i += 1
if current:
parts.append(ast_nodes.String(''.join(current)))
if len(parts) == 1 and isinstance(parts[0], ast_nodes.String):
return parts[0]
return ast_nodes.InterpolatedString(parts)
class Parser(): class Parser():
def __init__(self): def __init__(self):
self.pg = ParserGenerator( self.pg = ParserGenerator(
@@ -184,7 +245,7 @@ class Parser():
@self.pg.production('expression : DATA_STRING') @self.pg.production('expression : DATA_STRING')
def expression_string(tokens): def expression_string(tokens):
return ast_nodes.String(tokens[0].value[1:-1]) return _parse_interpolated(tokens[0].value)
@self.pg.production('expression : DATA_NUMERAL') @self.pg.production('expression : DATA_NUMERAL')
def expression_numeral(tokens): def expression_numeral(tokens):

View File

@@ -61,6 +61,7 @@
\languageline{expression}{\textit{expression} \textbf{binop} \textit{expression}} \\ \languageline{expression}{\textit{expression} \textbf{binop} \textit{expression}} \\
\languageline{expression}{\textbf{unop} \textit{expression}} \\ \hline \languageline{expression}{\textbf{unop} \textit{expression}} \\ \hline
\languageline{literal}{\textbf{string}} \\ \languageline{literal}{\textbf{string}} \\
\languageline{literal}{\textbf{interpolated-string}} \\
\languageline{literal}{\textbf{numeral}} \\ \languageline{literal}{\textbf{numeral}} \\
\languageline{literal}{\textbf{bool}} \\ \languageline{literal}{\textbf{bool}} \\
\languageline{literal}{\texttt{[} \textit{optional-expressions} \texttt{]}} \\ \languageline{literal}{\texttt{[} \textit{optional-expressions} \texttt{]}} \\
@@ -88,7 +89,8 @@
\item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. \item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features.
\item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w. \item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w.
\item \textbf{builtin}: \\ Builtin functions are uppercase latin words. \item \textbf{builtin}: \\ Builtin functions are uppercase latin words.
\item \textbf{string}: \\ Any text encased in " characters. \item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal.
\item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces.
\item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM. \item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM.
\item \textbf{bool}: \\ VERITAS or FALSITAS. \item \textbf{bool}: \\ VERITAS or FALSITAS.
\item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation). \item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation).

View File

@@ -33,6 +33,19 @@ contexts:
scope: string.quoted.double.centvrion scope: string.quoted.double.centvrion
push: push:
- meta_scope: string.quoted.double.centvrion - meta_scope: string.quoted.double.centvrion
- match: '\{\{'
scope: constant.character.escape.centvrion
- match: '\}\}'
scope: constant.character.escape.centvrion
- match: '\{'
scope: punctuation.section.interpolation.begin.centvrion
push:
- clear_scopes: 1
- meta_scope: meta.interpolation.centvrion
- match: '\}'
scope: punctuation.section.interpolation.end.centvrion
pop: true
- include: main
- match: '"' - match: '"'
pop: true pop: true
- match: "'" - match: "'"

View File

@@ -12,9 +12,9 @@ from fractions import Fraction
from centvrion.ast_nodes import ( from centvrion.ast_nodes import (
ArrayIndex, Bool, BinOp, BuiltIn, DataArray, DataRangeArray, Defini, ArrayIndex, Bool, BinOp, BuiltIn, DataArray, DataRangeArray, Defini,
Continva, Designa, DesignaDestructure, DesignaIndex, DumStatement, Erumpe, Continva, Designa, DesignaDestructure, DesignaIndex, DumStatement, Erumpe,
ExpressionStatement, ID, Invoca, ModuleCall, Nullus, Numeral, PerStatement, ExpressionStatement, ID, InterpolatedString, Invoca, ModuleCall, Nullus,
Program, Redi, SiStatement, String, UnaryMinus, UnaryNot, Numeral, PerStatement, Program, Redi, SiStatement, String, UnaryMinus,
Fractio, frac_to_fraction, fraction_to_frac, UnaryNot, Fractio, frac_to_fraction, fraction_to_frac,
num_to_int, int_to_num, make_string, num_to_int, int_to_num, make_string,
) )
from centvrion.compiler.emitter import compile_program from centvrion.compiler.emitter import compile_program
@@ -881,6 +881,99 @@ class TestStringConcat(unittest.TestCase):
run_test(self, source, nodes, value) run_test(self, source, nodes, value)
# --- String interpolation ---
interpolation_tests = [
# basic variable interpolation
('DESIGNA nomen VT "Marcus"\n"Salve, {nomen}!"',
Program([], [
Designa(ID("nomen"), String("Marcus")),
ExpressionStatement(InterpolatedString([String("Salve, "), ID("nomen"), String("!")]))
]), ValStr("Salve, Marcus!")),
# arithmetic expression inside interpolation
('DESIGNA x VT III\n"Sum: {x + II}"',
Program([], [
Designa(ID("x"), Numeral("III")),
ExpressionStatement(InterpolatedString([String("Sum: "), BinOp(ID("x"), Numeral("II"), "SYMBOL_PLUS")]))
]), ValStr("Sum: V")),
# multiple interpolations
('DESIGNA a VT I\nDESIGNA b VT II\n"{a} + {b} = {a + b}"',
Program([], [
Designa(ID("a"), Numeral("I")),
Designa(ID("b"), Numeral("II")),
ExpressionStatement(InterpolatedString([
ID("a"), String(" + "), ID("b"), String(" = "),
BinOp(ID("a"), ID("b"), "SYMBOL_PLUS"),
]))
]), ValStr("I + II = III")),
# escaped braces become literal
('"use {{braces}}"',
Program([], [ExpressionStatement(String("use {braces}"))]),
ValStr("use {braces}")),
# single-quoted strings ignore braces
("'hello {world}'",
Program([], [ExpressionStatement(String("hello {world}"))]),
ValStr("hello {world}")),
# integer coercion
('DESIGNA n VT V\n"n is {n}"',
Program([], [
Designa(ID("n"), Numeral("V")),
ExpressionStatement(InterpolatedString([String("n is "), ID("n")]))
]), ValStr("n is V")),
# boolean coercion
('DESIGNA b VT VERITAS\n"value: {b}"',
Program([], [
Designa(ID("b"), Bool(True)),
ExpressionStatement(InterpolatedString([String("value: "), ID("b")]))
]), ValStr("value: VERITAS")),
# NVLLVS coercion
('"value: {NVLLVS}"',
Program([], [
ExpressionStatement(InterpolatedString([String("value: "), Nullus()]))
]), ValStr("value: NVLLVS")),
# expression-only string (no literal parts around it)
('DESIGNA x VT "hi"\n"{x}"',
Program([], [
Designa(ID("x"), String("hi")),
ExpressionStatement(InterpolatedString([ID("x")]))
]), ValStr("hi")),
# adjacent interpolations
('DESIGNA a VT "x"\nDESIGNA b VT "y"\n"{a}{b}"',
Program([], [
Designa(ID("a"), String("x")),
Designa(ID("b"), String("y")),
ExpressionStatement(InterpolatedString([ID("a"), ID("b")]))
]), ValStr("xy")),
# function call inside interpolation
("DEFINI f () VT {\nREDI (V)\n}\n\"result: {INVOCA f()}\"",
Program([], [
Defini(ID("f"), [], [Redi([Numeral("V")])]),
ExpressionStatement(InterpolatedString([String("result: "), Invoca(ID("f"), [])]))
]), ValStr("result: V")),
# single-quoted string inside interpolation
("DESIGNA x VT 'hello'\n\"{x & '!'}\"",
Program([], [
Designa(ID("x"), String("hello")),
ExpressionStatement(InterpolatedString([BinOp(ID("x"), String("!"), "SYMBOL_AMPERSAND")]))
]), ValStr("hello!")),
# plain double-quoted string (no braces) still works
('"hello world"',
Program([], [ExpressionStatement(String("hello world"))]),
ValStr("hello world")),
# interpolation in DICE output
('DESIGNA name VT "Roma"\nDICE("Salve, {name}!")',
Program([], [
Designa(ID("name"), String("Roma")),
ExpressionStatement(BuiltIn("DICE", [InterpolatedString([String("Salve, "), ID("name"), String("!")])]))
]), ValStr("Salve, Roma!"), "Salve, Roma!\n"),
]
class TestInterpolation(unittest.TestCase):
@parameterized.expand(interpolation_tests)
def test_interpolation(self, source, nodes, value, output=""):
run_test(self, source, nodes, value, output)
# --- Comparison operators --- # --- Comparison operators ---
comparison_tests = [ comparison_tests = [