diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index 937a3a5..4293a34 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -343,10 +343,15 @@ class String(Node): return f"String({self.value})" def print(self): + v = (self.value + .replace('\\', '\\\\') + .replace('\n', '\\n') + .replace('\t', '\\t') + .replace('\r', '\\r')) if self.quote == "'": - return f"'{self.value}'" - escaped = self.value.replace('{', '{{').replace('}', '}}') - return f'"{escaped}"' + return f"'{v}'" + v = v.replace('"', '\\"').replace('{', '{{').replace('}', '}}') + return f'"{v}"' def _eval(self, vtable): return vtable, ValStr(self.value) diff --git a/centvrion/lexer.py b/centvrion/lexer.py index d2d2301..42984d4 100644 --- a/centvrion/lexer.py +++ b/centvrion/lexer.py @@ -66,7 +66,7 @@ builtin_tokens = [("BUILTIN", i) for i in [ ]] data_tokens = [ - ("DATA_STRING", r"(\".*?\"|'.*?')"), + ("DATA_STRING", r'("(?:[^"\\]|\\.)*"|' + r"'(?:[^'\\]|\\.)*')"), ("DATA_FRACTION", r"([IVXLCDM][IVXLCDM_]*)?([S][S:.|]*|:[S:.|]+|\.[S:.|]*)"), ("DATA_NUMERAL", r"[IVXLCDM][IVXLCDM_]*") ] diff --git a/centvrion/parser.py b/centvrion/parser.py index 6ff4123..0a3b8d2 100644 --- a/centvrion/parser.py +++ b/centvrion/parser.py @@ -7,19 +7,61 @@ from . import ast_nodes ALL_TOKENS = list(set([i[0] for i in all_tokens])) +_ESCAPE_MAP = { + 'n': '\n', + 't': '\t', + 'r': '\r', + '\\': '\\', + '"': '"', + "'": "'", +} + + +def _read_escape(s, i): + """Read a backslash escape at position i (the backslash). Returns (char, new_i).""" + if i + 1 >= len(s): + raise CentvrionError("Trailing backslash in string") + nxt = s[i + 1] + if nxt in _ESCAPE_MAP: + return _ESCAPE_MAP[nxt], i + 2 + # unknown escapes pass through literally (e.g. \1 for regex backrefs) + return '\\' + nxt, i + 2 + + +def _unescape(s): + """Process escape sequences in a string with no interpolation.""" + out = [] + i = 0 + while i < len(s): + if s[i] == '\\': + ch, i = _read_escape(s, i) + out.append(ch) + else: + out.append(s[i]) + i += 1 + return ''.join(out) + + def _parse_interpolated(raw_value): quote_char = raw_value[0] inner = raw_value[1:-1] - if quote_char == "'" or len(inner) == 0: + if len(inner) == 0: return ast_nodes.String(inner) + if quote_char == "'": + return ast_nodes.String(_unescape(inner)) + parts = [] i = 0 current = [] while i < len(inner): ch = inner[i] + if ch == '\\': + c, i = _read_escape(inner, i) + current.append(c) + continue if ch == '{': if i + 1 < len(inner) and inner[i + 1] == '{': current.append('{') diff --git a/tests.py b/tests.py index 34fe3e3..263a807 100644 --- a/tests.py +++ b/tests.py @@ -1080,6 +1080,63 @@ class TestInterpolation(unittest.TestCase): run_test(self, source, nodes, value, output) +# --- Escape sequences --- + +escape_tests = [ + # \n → newline + ('"hello\\nworld"', + Program([], [ExpressionStatement(String("hello\nworld"))]), + ValStr("hello\nworld")), + # \t → tab + ('"col\\tcol"', + Program([], [ExpressionStatement(String("col\tcol"))]), + ValStr("col\tcol")), + # \r → carriage return + ('"line\\rover"', + Program([], [ExpressionStatement(String("line\rover"))]), + ValStr("line\rover")), + # \\ → literal backslash + ('"back\\\\slash"', + Program([], [ExpressionStatement(String("back\\slash"))]), + ValStr("back\\slash")), + # \" → literal double quote + ('"say \\"salve\\""', + Program([], [ExpressionStatement(String('say "salve"'))]), + ValStr('say "salve"')), + # \' → literal single quote in single-quoted string + ("'it\\'s'", + Program([], [ExpressionStatement(String("it's"))]), + ValStr("it's")), + # \n in single-quoted string + ("'hello\\nworld'", + Program([], [ExpressionStatement(String("hello\nworld"))]), + ValStr("hello\nworld")), + # escape inside interpolated string + ('DESIGNA name VT "Roma"\n"salve\\n{name}"', + Program([], [ + Designa(ID("name"), String("Roma")), + ExpressionStatement(InterpolatedString([String("salve\n"), ID("name")])) + ]), ValStr("salve\nRoma")), + # DIC with newline escape + ('DIC("hello\\nworld")', + Program([], [ExpressionStatement(BuiltIn("DIC", [String("hello\nworld")]))]), + ValStr("hello\nworld"), "hello\nworld\n"), + # multiple escapes in one string + ('"\\t\\n\\\\"', + Program([], [ExpressionStatement(String("\t\n\\"))]), + ValStr("\t\n\\")), + # unknown escapes pass through (regex backrefs) + ('"\\1\\2"', + Program([], [ExpressionStatement(String("\\1\\2"))]), + ValStr("\\1\\2")), +] + +class TestEscapeSequences(unittest.TestCase): + @parameterized.expand(escape_tests) + def test_escape(self, source, nodes, value, output=""): + run_test(self, source, nodes, value, output) + + # --- Comparison operators --- comparison_tests = [