🐐 Escape sequences
This commit is contained in:
@@ -343,10 +343,15 @@ class String(Node):
|
|||||||
return f"String({self.value})"
|
return f"String({self.value})"
|
||||||
|
|
||||||
def print(self):
|
def print(self):
|
||||||
|
v = (self.value
|
||||||
|
.replace('\\', '\\\\')
|
||||||
|
.replace('\n', '\\n')
|
||||||
|
.replace('\t', '\\t')
|
||||||
|
.replace('\r', '\\r'))
|
||||||
if self.quote == "'":
|
if self.quote == "'":
|
||||||
return f"'{self.value}'"
|
return f"'{v}'"
|
||||||
escaped = self.value.replace('{', '{{').replace('}', '}}')
|
v = v.replace('"', '\\"').replace('{', '{{').replace('}', '}}')
|
||||||
return f'"{escaped}"'
|
return f'"{v}"'
|
||||||
|
|
||||||
def _eval(self, vtable):
|
def _eval(self, vtable):
|
||||||
return vtable, ValStr(self.value)
|
return vtable, ValStr(self.value)
|
||||||
|
|||||||
@@ -66,7 +66,7 @@ builtin_tokens = [("BUILTIN", i) for i in [
|
|||||||
]]
|
]]
|
||||||
|
|
||||||
data_tokens = [
|
data_tokens = [
|
||||||
("DATA_STRING", r"(\".*?\"|'.*?')"),
|
("DATA_STRING", r'("(?:[^"\\]|\\.)*"|' + r"'(?:[^'\\]|\\.)*')"),
|
||||||
("DATA_FRACTION", r"([IVXLCDM][IVXLCDM_]*)?([S][S:.|]*|:[S:.|]+|\.[S:.|]*)"),
|
("DATA_FRACTION", r"([IVXLCDM][IVXLCDM_]*)?([S][S:.|]*|:[S:.|]+|\.[S:.|]*)"),
|
||||||
("DATA_NUMERAL", r"[IVXLCDM][IVXLCDM_]*")
|
("DATA_NUMERAL", r"[IVXLCDM][IVXLCDM_]*")
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -7,19 +7,61 @@ from . import ast_nodes
|
|||||||
ALL_TOKENS = list(set([i[0] for i in all_tokens]))
|
ALL_TOKENS = list(set([i[0] for i in all_tokens]))
|
||||||
|
|
||||||
|
|
||||||
|
_ESCAPE_MAP = {
|
||||||
|
'n': '\n',
|
||||||
|
't': '\t',
|
||||||
|
'r': '\r',
|
||||||
|
'\\': '\\',
|
||||||
|
'"': '"',
|
||||||
|
"'": "'",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _read_escape(s, i):
|
||||||
|
"""Read a backslash escape at position i (the backslash). Returns (char, new_i)."""
|
||||||
|
if i + 1 >= len(s):
|
||||||
|
raise CentvrionError("Trailing backslash in string")
|
||||||
|
nxt = s[i + 1]
|
||||||
|
if nxt in _ESCAPE_MAP:
|
||||||
|
return _ESCAPE_MAP[nxt], i + 2
|
||||||
|
# unknown escapes pass through literally (e.g. \1 for regex backrefs)
|
||||||
|
return '\\' + nxt, i + 2
|
||||||
|
|
||||||
|
|
||||||
|
def _unescape(s):
|
||||||
|
"""Process escape sequences in a string with no interpolation."""
|
||||||
|
out = []
|
||||||
|
i = 0
|
||||||
|
while i < len(s):
|
||||||
|
if s[i] == '\\':
|
||||||
|
ch, i = _read_escape(s, i)
|
||||||
|
out.append(ch)
|
||||||
|
else:
|
||||||
|
out.append(s[i])
|
||||||
|
i += 1
|
||||||
|
return ''.join(out)
|
||||||
|
|
||||||
|
|
||||||
def _parse_interpolated(raw_value):
|
def _parse_interpolated(raw_value):
|
||||||
quote_char = raw_value[0]
|
quote_char = raw_value[0]
|
||||||
inner = raw_value[1:-1]
|
inner = raw_value[1:-1]
|
||||||
|
|
||||||
if quote_char == "'" or len(inner) == 0:
|
if len(inner) == 0:
|
||||||
return ast_nodes.String(inner)
|
return ast_nodes.String(inner)
|
||||||
|
|
||||||
|
if quote_char == "'":
|
||||||
|
return ast_nodes.String(_unescape(inner))
|
||||||
|
|
||||||
parts = []
|
parts = []
|
||||||
i = 0
|
i = 0
|
||||||
current = []
|
current = []
|
||||||
|
|
||||||
while i < len(inner):
|
while i < len(inner):
|
||||||
ch = inner[i]
|
ch = inner[i]
|
||||||
|
if ch == '\\':
|
||||||
|
c, i = _read_escape(inner, i)
|
||||||
|
current.append(c)
|
||||||
|
continue
|
||||||
if ch == '{':
|
if ch == '{':
|
||||||
if i + 1 < len(inner) and inner[i + 1] == '{':
|
if i + 1 < len(inner) and inner[i + 1] == '{':
|
||||||
current.append('{')
|
current.append('{')
|
||||||
|
|||||||
57
tests.py
57
tests.py
@@ -1080,6 +1080,63 @@ class TestInterpolation(unittest.TestCase):
|
|||||||
run_test(self, source, nodes, value, output)
|
run_test(self, source, nodes, value, output)
|
||||||
|
|
||||||
|
|
||||||
|
# --- Escape sequences ---
|
||||||
|
|
||||||
|
escape_tests = [
|
||||||
|
# \n → newline
|
||||||
|
('"hello\\nworld"',
|
||||||
|
Program([], [ExpressionStatement(String("hello\nworld"))]),
|
||||||
|
ValStr("hello\nworld")),
|
||||||
|
# \t → tab
|
||||||
|
('"col\\tcol"',
|
||||||
|
Program([], [ExpressionStatement(String("col\tcol"))]),
|
||||||
|
ValStr("col\tcol")),
|
||||||
|
# \r → carriage return
|
||||||
|
('"line\\rover"',
|
||||||
|
Program([], [ExpressionStatement(String("line\rover"))]),
|
||||||
|
ValStr("line\rover")),
|
||||||
|
# \\ → literal backslash
|
||||||
|
('"back\\\\slash"',
|
||||||
|
Program([], [ExpressionStatement(String("back\\slash"))]),
|
||||||
|
ValStr("back\\slash")),
|
||||||
|
# \" → literal double quote
|
||||||
|
('"say \\"salve\\""',
|
||||||
|
Program([], [ExpressionStatement(String('say "salve"'))]),
|
||||||
|
ValStr('say "salve"')),
|
||||||
|
# \' → literal single quote in single-quoted string
|
||||||
|
("'it\\'s'",
|
||||||
|
Program([], [ExpressionStatement(String("it's"))]),
|
||||||
|
ValStr("it's")),
|
||||||
|
# \n in single-quoted string
|
||||||
|
("'hello\\nworld'",
|
||||||
|
Program([], [ExpressionStatement(String("hello\nworld"))]),
|
||||||
|
ValStr("hello\nworld")),
|
||||||
|
# escape inside interpolated string
|
||||||
|
('DESIGNA name VT "Roma"\n"salve\\n{name}"',
|
||||||
|
Program([], [
|
||||||
|
Designa(ID("name"), String("Roma")),
|
||||||
|
ExpressionStatement(InterpolatedString([String("salve\n"), ID("name")]))
|
||||||
|
]), ValStr("salve\nRoma")),
|
||||||
|
# DIC with newline escape
|
||||||
|
('DIC("hello\\nworld")',
|
||||||
|
Program([], [ExpressionStatement(BuiltIn("DIC", [String("hello\nworld")]))]),
|
||||||
|
ValStr("hello\nworld"), "hello\nworld\n"),
|
||||||
|
# multiple escapes in one string
|
||||||
|
('"\\t\\n\\\\"',
|
||||||
|
Program([], [ExpressionStatement(String("\t\n\\"))]),
|
||||||
|
ValStr("\t\n\\")),
|
||||||
|
# unknown escapes pass through (regex backrefs)
|
||||||
|
('"\\1\\2"',
|
||||||
|
Program([], [ExpressionStatement(String("\\1\\2"))]),
|
||||||
|
ValStr("\\1\\2")),
|
||||||
|
]
|
||||||
|
|
||||||
|
class TestEscapeSequences(unittest.TestCase):
|
||||||
|
@parameterized.expand(escape_tests)
|
||||||
|
def test_escape(self, source, nodes, value, output=""):
|
||||||
|
run_test(self, source, nodes, value, output)
|
||||||
|
|
||||||
|
|
||||||
# --- Comparison operators ---
|
# --- Comparison operators ---
|
||||||
|
|
||||||
comparison_tests = [
|
comparison_tests = [
|
||||||
|
|||||||
Reference in New Issue
Block a user