🐐 String interpolation
This commit is contained in:
15
README.md
15
README.md
@@ -65,6 +65,21 @@ Strings are concatenated with `&`:
|
|||||||
|
|
||||||
`NVLLVS` coerces to an empty string when used with `&`. Note: `+` is for arithmetic only — using it on strings raises an error.
|
`NVLLVS` coerces to an empty string when used with `&`. Note: `+` is for arithmetic only — using it on strings raises an error.
|
||||||
|
|
||||||
|
#### String Interpolation
|
||||||
|
|
||||||
|
Double-quoted strings support interpolation with `{expression}`:
|
||||||
|
|
||||||
|
```
|
||||||
|
DESIGNA nomen VT "Marcus"
|
||||||
|
DICE("Salve, {nomen}!") // → Salve, Marcus!
|
||||||
|
DICE("Sum: {III + IV}") // → Sum: VII
|
||||||
|
DICE("{nomen} has {V} cats") // → Marcus has V cats
|
||||||
|
```
|
||||||
|
|
||||||
|
Any expression can appear inside `{}`. Values are coerced to strings the same way as with `&` (integers become Roman numerals, booleans become `VERITAS`/`FALSITAS`, etc.).
|
||||||
|
|
||||||
|
Single-quoted strings do **not** interpolate — `'{nomen}'` is the literal text `{nomen}`. Use `{{` and `}}` for literal braces in double-quoted strings: `"use {{braces}}"` → `use {braces}`.
|
||||||
|
|
||||||
Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2).
|
Integer modulo is `RELIQVVM`: `VII RELIQVVM III` evaluates to `I`. Under the `FRACTIO` module it returns a fraction, so `IIIS RELIQVVM IS` is `S` (i.e. 1/2).
|
||||||
|
|
||||||
### Integers
|
### Integers
|
||||||
|
|||||||
@@ -275,6 +275,7 @@ class DataRangeArray(Node):
|
|||||||
class String(Node):
|
class String(Node):
|
||||||
def __init__(self, value) -> None:
|
def __init__(self, value) -> None:
|
||||||
self.value = value
|
self.value = value
|
||||||
|
self.quote = '"'
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
return type(self) == type(other) and self.value == other.value
|
return type(self) == type(other) and self.value == other.value
|
||||||
@@ -283,12 +284,60 @@ class String(Node):
|
|||||||
return f"String({self.value})"
|
return f"String({self.value})"
|
||||||
|
|
||||||
def print(self):
|
def print(self):
|
||||||
return f'"{self.value}"'
|
if self.quote == "'":
|
||||||
|
return f"'{self.value}'"
|
||||||
|
escaped = self.value.replace('{', '{{').replace('}', '}}')
|
||||||
|
return f'"{escaped}"'
|
||||||
|
|
||||||
def _eval(self, vtable):
|
def _eval(self, vtable):
|
||||||
return vtable, ValStr(self.value)
|
return vtable, ValStr(self.value)
|
||||||
|
|
||||||
|
|
||||||
|
def _flip_quotes(node, quote):
|
||||||
|
"""Recursively set quote style on all String nodes in an expression tree."""
|
||||||
|
if isinstance(node, String):
|
||||||
|
node.quote = quote
|
||||||
|
for attr in vars(node).values():
|
||||||
|
if isinstance(attr, Node):
|
||||||
|
_flip_quotes(attr, quote)
|
||||||
|
elif isinstance(attr, list):
|
||||||
|
for item in attr:
|
||||||
|
if isinstance(item, Node):
|
||||||
|
_flip_quotes(item, quote)
|
||||||
|
|
||||||
|
|
||||||
|
class InterpolatedString(Node):
|
||||||
|
def __init__(self, parts) -> None:
|
||||||
|
self.parts = parts
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return type(self) == type(other) and self.parts == other.parts
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return f"InterpolatedString([{rep_join(self.parts)}])"
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
result = '"'
|
||||||
|
for part in self.parts:
|
||||||
|
if isinstance(part, String):
|
||||||
|
result += part.value.replace('{', '{{').replace('}', '}}')
|
||||||
|
else:
|
||||||
|
_flip_quotes(part, "'")
|
||||||
|
result += '{' + part.print() + '}'
|
||||||
|
_flip_quotes(part, '"')
|
||||||
|
result += '"'
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _eval(self, vtable):
|
||||||
|
magnvm = "MAGNVM" in vtable["#modules"]
|
||||||
|
svbnvlla = "SVBNVLLA" in vtable["#modules"]
|
||||||
|
pieces = []
|
||||||
|
for part in self.parts:
|
||||||
|
vtable, val = part.eval(vtable)
|
||||||
|
pieces.append(make_string(val, magnvm, svbnvlla))
|
||||||
|
return vtable, ValStr(''.join(pieces))
|
||||||
|
|
||||||
|
|
||||||
class Numeral(Node):
|
class Numeral(Node):
|
||||||
def __init__(self, value: str) -> None:
|
def __init__(self, value: str) -> None:
|
||||||
self.value = value
|
self.value = value
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from centvrion.errors import CentvrionError
|
from centvrion.errors import CentvrionError
|
||||||
from centvrion.ast_nodes import (
|
from centvrion.ast_nodes import (
|
||||||
String, Numeral, Fractio, Bool, Nullus, ID,
|
String, InterpolatedString, Numeral, Fractio, Bool, Nullus, ID,
|
||||||
BinOp, UnaryMinus, UnaryNot,
|
BinOp, UnaryMinus, UnaryNot,
|
||||||
ArrayIndex, DataArray, DataRangeArray,
|
ArrayIndex, DataArray, DataRangeArray,
|
||||||
BuiltIn, Invoca,
|
BuiltIn, Invoca,
|
||||||
@@ -51,6 +51,25 @@ def emit_expr(node, ctx):
|
|||||||
tmp = ctx.fresh_tmp()
|
tmp = ctx.fresh_tmp()
|
||||||
return [f'CentValue {tmp} = cent_str("{_escape(node.value)}");'], tmp
|
return [f'CentValue {tmp} = cent_str("{_escape(node.value)}");'], tmp
|
||||||
|
|
||||||
|
if isinstance(node, InterpolatedString):
|
||||||
|
if len(node.parts) == 0:
|
||||||
|
tmp = ctx.fresh_tmp()
|
||||||
|
return [f'CentValue {tmp} = cent_str("");'], tmp
|
||||||
|
if len(node.parts) == 1:
|
||||||
|
return emit_expr(node.parts[0], ctx)
|
||||||
|
l_lines, l_var = emit_expr(node.parts[0], ctx)
|
||||||
|
r_lines, r_var = emit_expr(node.parts[1], ctx)
|
||||||
|
lines = l_lines + r_lines
|
||||||
|
acc = ctx.fresh_tmp()
|
||||||
|
lines.append(f"CentValue {acc} = cent_concat({l_var}, {r_var});")
|
||||||
|
for part in node.parts[2:]:
|
||||||
|
p_lines, p_var = emit_expr(part, ctx)
|
||||||
|
lines.extend(p_lines)
|
||||||
|
new_acc = ctx.fresh_tmp()
|
||||||
|
lines.append(f"CentValue {new_acc} = cent_concat({acc}, {p_var});")
|
||||||
|
acc = new_acc
|
||||||
|
return lines, acc
|
||||||
|
|
||||||
if isinstance(node, Bool):
|
if isinstance(node, Bool):
|
||||||
tmp = ctx.fresh_tmp()
|
tmp = ctx.fresh_tmp()
|
||||||
v = "1" if node.value else "0"
|
v = "1" if node.value else "0"
|
||||||
|
|||||||
@@ -1,10 +1,71 @@
|
|||||||
from rply import ParserGenerator
|
from rply import ParserGenerator
|
||||||
|
|
||||||
from centvrion.lexer import all_tokens
|
from centvrion.errors import CentvrionError
|
||||||
|
from centvrion.lexer import Lexer, all_tokens
|
||||||
from . import ast_nodes
|
from . import ast_nodes
|
||||||
|
|
||||||
ALL_TOKENS = list(set([i[0] for i in all_tokens]))
|
ALL_TOKENS = list(set([i[0] for i in all_tokens]))
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_interpolated(raw_value):
|
||||||
|
quote_char = raw_value[0]
|
||||||
|
inner = raw_value[1:-1]
|
||||||
|
|
||||||
|
if quote_char == "'" or len(inner) == 0:
|
||||||
|
return ast_nodes.String(inner)
|
||||||
|
|
||||||
|
parts = []
|
||||||
|
i = 0
|
||||||
|
current = []
|
||||||
|
|
||||||
|
while i < len(inner):
|
||||||
|
ch = inner[i]
|
||||||
|
if ch == '{':
|
||||||
|
if i + 1 < len(inner) and inner[i + 1] == '{':
|
||||||
|
current.append('{')
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
if current:
|
||||||
|
parts.append(ast_nodes.String(''.join(current)))
|
||||||
|
current = []
|
||||||
|
j = i + 1
|
||||||
|
depth = 1
|
||||||
|
while j < len(inner) and depth > 0:
|
||||||
|
if inner[j] == '{':
|
||||||
|
depth += 1
|
||||||
|
elif inner[j] == '}':
|
||||||
|
depth -= 1
|
||||||
|
j += 1
|
||||||
|
if depth != 0:
|
||||||
|
raise CentvrionError("Unclosed '{' in interpolated string")
|
||||||
|
expr_src = inner[i + 1:j - 1]
|
||||||
|
tokens = Lexer().get_lexer().lex(expr_src + "\n")
|
||||||
|
program = Parser().parse(tokens)
|
||||||
|
if len(program.statements) != 1:
|
||||||
|
raise CentvrionError("Interpolation must contain exactly one expression")
|
||||||
|
stmt = program.statements[0]
|
||||||
|
if not isinstance(stmt, ast_nodes.ExpressionStatement):
|
||||||
|
raise CentvrionError("Interpolation must contain an expression, not a statement")
|
||||||
|
parts.append(stmt.expression)
|
||||||
|
i = j
|
||||||
|
elif ch == '}':
|
||||||
|
if i + 1 < len(inner) and inner[i + 1] == '}':
|
||||||
|
current.append('}')
|
||||||
|
i += 2
|
||||||
|
continue
|
||||||
|
raise CentvrionError("Unmatched '}' in string (use '}}' for literal '}')")
|
||||||
|
else:
|
||||||
|
current.append(ch)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if current:
|
||||||
|
parts.append(ast_nodes.String(''.join(current)))
|
||||||
|
|
||||||
|
if len(parts) == 1 and isinstance(parts[0], ast_nodes.String):
|
||||||
|
return parts[0]
|
||||||
|
|
||||||
|
return ast_nodes.InterpolatedString(parts)
|
||||||
|
|
||||||
class Parser():
|
class Parser():
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.pg = ParserGenerator(
|
self.pg = ParserGenerator(
|
||||||
@@ -184,7 +245,7 @@ class Parser():
|
|||||||
|
|
||||||
@self.pg.production('expression : DATA_STRING')
|
@self.pg.production('expression : DATA_STRING')
|
||||||
def expression_string(tokens):
|
def expression_string(tokens):
|
||||||
return ast_nodes.String(tokens[0].value[1:-1])
|
return _parse_interpolated(tokens[0].value)
|
||||||
|
|
||||||
@self.pg.production('expression : DATA_NUMERAL')
|
@self.pg.production('expression : DATA_NUMERAL')
|
||||||
def expression_numeral(tokens):
|
def expression_numeral(tokens):
|
||||||
|
|||||||
@@ -61,6 +61,7 @@
|
|||||||
\languageline{expression}{\textit{expression} \textbf{binop} \textit{expression}} \\
|
\languageline{expression}{\textit{expression} \textbf{binop} \textit{expression}} \\
|
||||||
\languageline{expression}{\textbf{unop} \textit{expression}} \\ \hline
|
\languageline{expression}{\textbf{unop} \textit{expression}} \\ \hline
|
||||||
\languageline{literal}{\textbf{string}} \\
|
\languageline{literal}{\textbf{string}} \\
|
||||||
|
\languageline{literal}{\textbf{interpolated-string}} \\
|
||||||
\languageline{literal}{\textbf{numeral}} \\
|
\languageline{literal}{\textbf{numeral}} \\
|
||||||
\languageline{literal}{\textbf{bool}} \\
|
\languageline{literal}{\textbf{bool}} \\
|
||||||
\languageline{literal}{\texttt{[} \textit{optional-expressions} \texttt{]}} \\
|
\languageline{literal}{\texttt{[} \textit{optional-expressions} \texttt{]}} \\
|
||||||
@@ -88,7 +89,8 @@
|
|||||||
\item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features.
|
\item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features.
|
||||||
\item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w.
|
\item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w.
|
||||||
\item \textbf{builtin}: \\ Builtin functions are uppercase latin words.
|
\item \textbf{builtin}: \\ Builtin functions are uppercase latin words.
|
||||||
\item \textbf{string}: \\ Any text encased in " characters.
|
\item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal.
|
||||||
|
\item \textbf{interpolated-string}: \\ A double-quoted string containing \texttt{\{}\textit{expression}\texttt{\}} segments. Each expression is evaluated and coerced to a string. Use \texttt{\{\{} and \texttt{\}\}} for literal braces.
|
||||||
\item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM.
|
\item \textbf{numeral}: \\ Roman numerals consisting of the uppercase characters I, V, X, L, C, D, and M. Can also include underscore if the module MAGNVM.
|
||||||
\item \textbf{bool}: \\ VERITAS or FALSITAS.
|
\item \textbf{bool}: \\ VERITAS or FALSITAS.
|
||||||
\item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation).
|
\item \textbf{binop}: \\ Binary operators: \texttt{+}, \texttt{-}, \texttt{*}, \texttt{/}, \texttt{RELIQVVM} (modulo), \texttt{EST} (equality), \texttt{DISPAR} (not-equal), \texttt{MINVS} (<), \texttt{PLVS} (>), \texttt{ET} (and), \texttt{AVT} (or), \texttt{\&} (string concatenation).
|
||||||
|
|||||||
@@ -33,6 +33,19 @@ contexts:
|
|||||||
scope: string.quoted.double.centvrion
|
scope: string.quoted.double.centvrion
|
||||||
push:
|
push:
|
||||||
- meta_scope: string.quoted.double.centvrion
|
- meta_scope: string.quoted.double.centvrion
|
||||||
|
- match: '\{\{'
|
||||||
|
scope: constant.character.escape.centvrion
|
||||||
|
- match: '\}\}'
|
||||||
|
scope: constant.character.escape.centvrion
|
||||||
|
- match: '\{'
|
||||||
|
scope: punctuation.section.interpolation.begin.centvrion
|
||||||
|
push:
|
||||||
|
- clear_scopes: 1
|
||||||
|
- meta_scope: meta.interpolation.centvrion
|
||||||
|
- match: '\}'
|
||||||
|
scope: punctuation.section.interpolation.end.centvrion
|
||||||
|
pop: true
|
||||||
|
- include: main
|
||||||
- match: '"'
|
- match: '"'
|
||||||
pop: true
|
pop: true
|
||||||
- match: "'"
|
- match: "'"
|
||||||
|
|||||||
99
tests.py
99
tests.py
@@ -12,9 +12,9 @@ from fractions import Fraction
|
|||||||
from centvrion.ast_nodes import (
|
from centvrion.ast_nodes import (
|
||||||
ArrayIndex, Bool, BinOp, BuiltIn, DataArray, DataRangeArray, Defini,
|
ArrayIndex, Bool, BinOp, BuiltIn, DataArray, DataRangeArray, Defini,
|
||||||
Continva, Designa, DesignaDestructure, DesignaIndex, DumStatement, Erumpe,
|
Continva, Designa, DesignaDestructure, DesignaIndex, DumStatement, Erumpe,
|
||||||
ExpressionStatement, ID, Invoca, ModuleCall, Nullus, Numeral, PerStatement,
|
ExpressionStatement, ID, InterpolatedString, Invoca, ModuleCall, Nullus,
|
||||||
Program, Redi, SiStatement, String, UnaryMinus, UnaryNot,
|
Numeral, PerStatement, Program, Redi, SiStatement, String, UnaryMinus,
|
||||||
Fractio, frac_to_fraction, fraction_to_frac,
|
UnaryNot, Fractio, frac_to_fraction, fraction_to_frac,
|
||||||
num_to_int, int_to_num, make_string,
|
num_to_int, int_to_num, make_string,
|
||||||
)
|
)
|
||||||
from centvrion.compiler.emitter import compile_program
|
from centvrion.compiler.emitter import compile_program
|
||||||
@@ -881,6 +881,99 @@ class TestStringConcat(unittest.TestCase):
|
|||||||
run_test(self, source, nodes, value)
|
run_test(self, source, nodes, value)
|
||||||
|
|
||||||
|
|
||||||
|
# --- String interpolation ---
|
||||||
|
|
||||||
|
interpolation_tests = [
|
||||||
|
# basic variable interpolation
|
||||||
|
('DESIGNA nomen VT "Marcus"\n"Salve, {nomen}!"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("nomen"), String("Marcus")),
|
||||||
|
ExpressionStatement(InterpolatedString([String("Salve, "), ID("nomen"), String("!")]))
|
||||||
|
]), ValStr("Salve, Marcus!")),
|
||||||
|
# arithmetic expression inside interpolation
|
||||||
|
('DESIGNA x VT III\n"Sum: {x + II}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("x"), Numeral("III")),
|
||||||
|
ExpressionStatement(InterpolatedString([String("Sum: "), BinOp(ID("x"), Numeral("II"), "SYMBOL_PLUS")]))
|
||||||
|
]), ValStr("Sum: V")),
|
||||||
|
# multiple interpolations
|
||||||
|
('DESIGNA a VT I\nDESIGNA b VT II\n"{a} + {b} = {a + b}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("a"), Numeral("I")),
|
||||||
|
Designa(ID("b"), Numeral("II")),
|
||||||
|
ExpressionStatement(InterpolatedString([
|
||||||
|
ID("a"), String(" + "), ID("b"), String(" = "),
|
||||||
|
BinOp(ID("a"), ID("b"), "SYMBOL_PLUS"),
|
||||||
|
]))
|
||||||
|
]), ValStr("I + II = III")),
|
||||||
|
# escaped braces become literal
|
||||||
|
('"use {{braces}}"',
|
||||||
|
Program([], [ExpressionStatement(String("use {braces}"))]),
|
||||||
|
ValStr("use {braces}")),
|
||||||
|
# single-quoted strings ignore braces
|
||||||
|
("'hello {world}'",
|
||||||
|
Program([], [ExpressionStatement(String("hello {world}"))]),
|
||||||
|
ValStr("hello {world}")),
|
||||||
|
# integer coercion
|
||||||
|
('DESIGNA n VT V\n"n is {n}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("n"), Numeral("V")),
|
||||||
|
ExpressionStatement(InterpolatedString([String("n is "), ID("n")]))
|
||||||
|
]), ValStr("n is V")),
|
||||||
|
# boolean coercion
|
||||||
|
('DESIGNA b VT VERITAS\n"value: {b}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("b"), Bool(True)),
|
||||||
|
ExpressionStatement(InterpolatedString([String("value: "), ID("b")]))
|
||||||
|
]), ValStr("value: VERITAS")),
|
||||||
|
# NVLLVS coercion
|
||||||
|
('"value: {NVLLVS}"',
|
||||||
|
Program([], [
|
||||||
|
ExpressionStatement(InterpolatedString([String("value: "), Nullus()]))
|
||||||
|
]), ValStr("value: NVLLVS")),
|
||||||
|
# expression-only string (no literal parts around it)
|
||||||
|
('DESIGNA x VT "hi"\n"{x}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("x"), String("hi")),
|
||||||
|
ExpressionStatement(InterpolatedString([ID("x")]))
|
||||||
|
]), ValStr("hi")),
|
||||||
|
# adjacent interpolations
|
||||||
|
('DESIGNA a VT "x"\nDESIGNA b VT "y"\n"{a}{b}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("a"), String("x")),
|
||||||
|
Designa(ID("b"), String("y")),
|
||||||
|
ExpressionStatement(InterpolatedString([ID("a"), ID("b")]))
|
||||||
|
]), ValStr("xy")),
|
||||||
|
# function call inside interpolation
|
||||||
|
("DEFINI f () VT {\nREDI (V)\n}\n\"result: {INVOCA f()}\"",
|
||||||
|
Program([], [
|
||||||
|
Defini(ID("f"), [], [Redi([Numeral("V")])]),
|
||||||
|
ExpressionStatement(InterpolatedString([String("result: "), Invoca(ID("f"), [])]))
|
||||||
|
]), ValStr("result: V")),
|
||||||
|
# single-quoted string inside interpolation
|
||||||
|
("DESIGNA x VT 'hello'\n\"{x & '!'}\"",
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("x"), String("hello")),
|
||||||
|
ExpressionStatement(InterpolatedString([BinOp(ID("x"), String("!"), "SYMBOL_AMPERSAND")]))
|
||||||
|
]), ValStr("hello!")),
|
||||||
|
# plain double-quoted string (no braces) still works
|
||||||
|
('"hello world"',
|
||||||
|
Program([], [ExpressionStatement(String("hello world"))]),
|
||||||
|
ValStr("hello world")),
|
||||||
|
# interpolation in DICE output
|
||||||
|
('DESIGNA name VT "Roma"\nDICE("Salve, {name}!")',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("name"), String("Roma")),
|
||||||
|
ExpressionStatement(BuiltIn("DICE", [InterpolatedString([String("Salve, "), ID("name"), String("!")])]))
|
||||||
|
]), ValStr("Salve, Roma!"), "Salve, Roma!\n"),
|
||||||
|
]
|
||||||
|
|
||||||
|
class TestInterpolation(unittest.TestCase):
|
||||||
|
@parameterized.expand(interpolation_tests)
|
||||||
|
def test_interpolation(self, source, nodes, value, output=""):
|
||||||
|
run_test(self, source, nodes, value, output)
|
||||||
|
|
||||||
|
|
||||||
# --- Comparison operators ---
|
# --- Comparison operators ---
|
||||||
|
|
||||||
comparison_tests = [
|
comparison_tests = [
|
||||||
|
|||||||
Reference in New Issue
Block a user