460 lines
17 KiB
Python
460 lines
17 KiB
Python
from rply import ParserGenerator
|
|
|
|
from centvrion.errors import CentvrionError
|
|
from centvrion.lexer import Lexer, all_tokens
|
|
from . import ast_nodes
|
|
|
|
ALL_TOKENS = list(set([i[0] for i in all_tokens]))
|
|
|
|
|
|
_ESCAPE_MAP = {
|
|
'n': '\n',
|
|
't': '\t',
|
|
'r': '\r',
|
|
'\\': '\\',
|
|
'"': '"',
|
|
"'": "'",
|
|
}
|
|
|
|
|
|
def _read_escape(s, i):
|
|
"""Read a backslash escape at position i (the backslash). Returns (char, new_i)."""
|
|
if i + 1 >= len(s):
|
|
raise CentvrionError("Trailing backslash in string")
|
|
nxt = s[i + 1]
|
|
if nxt in _ESCAPE_MAP:
|
|
return _ESCAPE_MAP[nxt], i + 2
|
|
# unknown escapes pass through literally (e.g. \1 for regex backrefs)
|
|
return '\\' + nxt, i + 2
|
|
|
|
|
|
def _unescape(s):
|
|
"""Process escape sequences in a string with no interpolation."""
|
|
out = []
|
|
i = 0
|
|
while i < len(s):
|
|
if s[i] == '\\':
|
|
ch, i = _read_escape(s, i)
|
|
out.append(ch)
|
|
else:
|
|
out.append(s[i])
|
|
i += 1
|
|
return ''.join(out)
|
|
|
|
|
|
def _parse_interpolated(raw_value):
|
|
quote_char = raw_value[0]
|
|
inner = raw_value[1:-1]
|
|
|
|
if len(inner) == 0:
|
|
return ast_nodes.String(inner)
|
|
|
|
if quote_char == "'":
|
|
return ast_nodes.String(_unescape(inner))
|
|
|
|
parts = []
|
|
i = 0
|
|
current = []
|
|
|
|
while i < len(inner):
|
|
ch = inner[i]
|
|
if ch == '\\':
|
|
c, i = _read_escape(inner, i)
|
|
current.append(c)
|
|
continue
|
|
if ch == '{':
|
|
if i + 1 < len(inner) and inner[i + 1] == '{':
|
|
current.append('{')
|
|
i += 2
|
|
continue
|
|
if current:
|
|
parts.append(ast_nodes.String(''.join(current)))
|
|
current = []
|
|
j = i + 1
|
|
depth = 1
|
|
while j < len(inner) and depth > 0:
|
|
if inner[j] == '{':
|
|
depth += 1
|
|
elif inner[j] == '}':
|
|
depth -= 1
|
|
j += 1
|
|
if depth != 0:
|
|
raise CentvrionError("Unclosed '{' in interpolated string")
|
|
expr_src = inner[i + 1:j - 1]
|
|
tokens = Lexer().get_lexer().lex(expr_src + "\n")
|
|
program = Parser().parse(tokens)
|
|
if len(program.statements) != 1:
|
|
raise CentvrionError("Interpolation must contain exactly one expression")
|
|
stmt = program.statements[0]
|
|
if not isinstance(stmt, ast_nodes.ExpressionStatement):
|
|
raise CentvrionError("Interpolation must contain an expression, not a statement")
|
|
parts.append(stmt.expression)
|
|
i = j
|
|
elif ch == '}':
|
|
if i + 1 < len(inner) and inner[i + 1] == '}':
|
|
current.append('}')
|
|
i += 2
|
|
continue
|
|
raise CentvrionError("Unmatched '}' in string (use '}}' for literal '}')")
|
|
else:
|
|
current.append(ch)
|
|
i += 1
|
|
|
|
if current:
|
|
parts.append(ast_nodes.String(''.join(current)))
|
|
|
|
if len(parts) == 1 and isinstance(parts[0], ast_nodes.String):
|
|
return parts[0]
|
|
|
|
return ast_nodes.InterpolatedString(parts)
|
|
|
|
class Parser():
|
|
def __init__(self):
|
|
self.pg = ParserGenerator(
|
|
ALL_TOKENS,
|
|
precedence=[
|
|
('left', ["KEYWORD_AVT"]),
|
|
('left', ["KEYWORD_ET"]),
|
|
('left', ["KEYWORD_PLVS", "KEYWORD_MINVS", "KEYWORD_EST", "KEYWORD_DISPAR",
|
|
"KEYWORD_HAVD_PLVS", "KEYWORD_HAVD_MINVS"]),
|
|
('left', ["SYMBOL_AMPERSAND", "SYMBOL_AT", "SYMBOL_PLUS", "SYMBOL_MINUS"]),
|
|
('left', ["SYMBOL_TIMES", "SYMBOL_DIVIDE", "KEYWORD_RELIQVVM"]),
|
|
('right', ["UMINUS", "UNOT"]),
|
|
('left', ["SYMBOL_LBRACKET", "INDEX"]),
|
|
]
|
|
)
|
|
|
|
def parse(self, tokens_input) -> ast_nodes.Program:
|
|
|
|
# Top-level program stuff
|
|
@self.pg.production('program : opt_newline module_calls statement_list')
|
|
def program(tokens):
|
|
return ast_nodes.Program(tokens[1], tokens[2])
|
|
|
|
@self.pg.production('newlines : NEWLINE')
|
|
@self.pg.production('newlines : NEWLINE newlines')
|
|
def newlines(_):
|
|
return None
|
|
|
|
@self.pg.production('opt_newline : ')
|
|
@self.pg.production('opt_newline : newlines')
|
|
def opt_newline(_):
|
|
return None
|
|
|
|
# Module calls
|
|
@self.pg.production('module_calls : ')
|
|
@self.pg.production('module_calls : module_call newlines module_calls')
|
|
def module_calls(calls):
|
|
if len(calls) == 0:
|
|
return []
|
|
elif len(calls) == 1:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[2]
|
|
|
|
@self.pg.production('module_call : KEYWORD_CVM MODULE')
|
|
def module_call(tokens):
|
|
return ast_nodes.ModuleCall(tokens[1].value)
|
|
|
|
|
|
# Statements
|
|
@self.pg.production('statements : opt_newline statement_list')
|
|
def statements(tokens):
|
|
return tokens[1]
|
|
|
|
@self.pg.production('statement_list : statement opt_newline')
|
|
@self.pg.production('statement_list : statement newlines statement_list')
|
|
def statement_list(calls):
|
|
if len(calls) == 2:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[2]
|
|
|
|
@self.pg.production('statement : KEYWORD_DESIGNA id KEYWORD_VT expression')
|
|
def statement_designa(tokens):
|
|
return ast_nodes.Designa(tokens[1], tokens[3])
|
|
|
|
@self.pg.production('index_chain : SYMBOL_LBRACKET expression SYMBOL_RBRACKET')
|
|
def index_chain_single(tokens):
|
|
return [tokens[1]]
|
|
|
|
@self.pg.production('index_chain : SYMBOL_LBRACKET expression SYMBOL_RBRACKET index_chain')
|
|
def index_chain_multi(tokens):
|
|
return [tokens[1]] + tokens[3]
|
|
|
|
@self.pg.production('statement : KEYWORD_DESIGNA id index_chain KEYWORD_VT expression')
|
|
def statement_designa_index(tokens):
|
|
return ast_nodes.DesignaIndex(tokens[1], tokens[2], tokens[4])
|
|
|
|
@self.pg.production('statement : KEYWORD_DESIGNA id SYMBOL_COMMA id_list_rest KEYWORD_VT expression')
|
|
def statement_designa_destructure(tokens):
|
|
return ast_nodes.DesignaDestructure([tokens[1]] + tokens[3], tokens[5])
|
|
|
|
@self.pg.production('statement : id KEYWORD_AVGE expression')
|
|
def statement_avge(tokens):
|
|
return ast_nodes.Designa(tokens[0], ast_nodes.BinOp(tokens[0], tokens[2], "SYMBOL_PLUS"))
|
|
|
|
@self.pg.production('statement : id KEYWORD_MINVE expression')
|
|
def statement_minve(tokens):
|
|
return ast_nodes.Designa(tokens[0], ast_nodes.BinOp(tokens[0], tokens[2], "SYMBOL_MINUS"))
|
|
|
|
@self.pg.production('statement : id KEYWORD_MVLTIPLICA expression')
|
|
def statement_mvltiplica(tokens):
|
|
return ast_nodes.Designa(tokens[0], ast_nodes.BinOp(tokens[0], tokens[2], "SYMBOL_TIMES"))
|
|
|
|
@self.pg.production('statement : id KEYWORD_DIVIDE expression')
|
|
def statement_divide(tokens):
|
|
return ast_nodes.Designa(tokens[0], ast_nodes.BinOp(tokens[0], tokens[2], "SYMBOL_DIVIDE"))
|
|
|
|
@self.pg.production('statement : expression')
|
|
def statement_expression(tokens):
|
|
return ast_nodes.ExpressionStatement(tokens[0])
|
|
|
|
@self.pg.production('statement : KEYWORD_DEFINI id ids KEYWORD_VT SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def defini(tokens):
|
|
return ast_nodes.Defini(tokens[1], tokens[2], tokens[5])
|
|
|
|
@self.pg.production('statement : KEYWORD_REDI expressions')
|
|
def redi(tokens):
|
|
return ast_nodes.Redi(tokens[1])
|
|
|
|
@self.pg.production('statement : per_statement')
|
|
@self.pg.production('statement : dum_statement')
|
|
@self.pg.production('statement : donicum_statement')
|
|
@self.pg.production('statement : si_statement')
|
|
@self.pg.production('statement : tempta_statement')
|
|
def nested_statements(tokens):
|
|
return tokens[0]
|
|
|
|
@self.pg.production('statement : KEYWORD_ERVMPE')
|
|
def erumpe(_):
|
|
return ast_nodes.Erumpe()
|
|
|
|
@self.pg.production('statement : KEYWORD_CONTINVA')
|
|
def continva(_):
|
|
return ast_nodes.Continva()
|
|
|
|
@self.pg.production('si_statement : KEYWORD_SI expression KEYWORD_TVNC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
@self.pg.production('si_statement : KEYWORD_SI expression KEYWORD_TVNC SYMBOL_LCURL statements SYMBOL_RCURL aluid_statement')
|
|
def si_statement(tokens):
|
|
if len(tokens) == 7:
|
|
return ast_nodes.SiStatement(tokens[1], tokens[4], tokens[6])
|
|
else:
|
|
return ast_nodes.SiStatement(tokens[1], tokens[4], None)
|
|
|
|
@self.pg.production('aluid_statement : KEYWORD_ALIVD si_statement')
|
|
def aluid_si(tokens):
|
|
return [tokens[1]]
|
|
|
|
@self.pg.production('aluid_statement : KEYWORD_ALIVD SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def aluid(tokens):
|
|
return tokens[2]
|
|
|
|
@self.pg.production('dum_statement : KEYWORD_DVM expression KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def dum(tokens):
|
|
return ast_nodes.DumStatement(tokens[1], tokens[4])
|
|
|
|
# AETERNVM is sugar for `DVM FALSITAS` — same AST, no observable difference.
|
|
@self.pg.production('dum_statement : KEYWORD_AETERNVM KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def aeternvm(tokens):
|
|
return ast_nodes.DumStatement(ast_nodes.Bool(False), tokens[3])
|
|
|
|
@self.pg.production('per_statement : KEYWORD_PER id SYMBOL_COMMA id_list_rest KEYWORD_IN expression KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def per_destructure(tokens):
|
|
return ast_nodes.PerStatement(tokens[5], [tokens[1]] + tokens[3], tokens[8])
|
|
|
|
@self.pg.production('per_statement : KEYWORD_PER id KEYWORD_IN expression KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def per(tokens):
|
|
return ast_nodes.PerStatement(tokens[3], tokens[1], tokens[6])
|
|
|
|
@self.pg.production('tempta_statement : KEYWORD_TEMPTA SYMBOL_LCURL statements SYMBOL_RCURL KEYWORD_CAPE id SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def tempta(tokens):
|
|
return ast_nodes.TemptaStatement(tokens[2], tokens[5], tokens[7])
|
|
|
|
@self.pg.production('donicum_statement : KEYWORD_DONICVM id KEYWORD_VT expression KEYWORD_VSQVE expression KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def donicum(tokens):
|
|
range_array = ast_nodes.DataRangeArray(tokens[3], tokens[5])
|
|
return ast_nodes.PerStatement(range_array, tokens[1], tokens[8])
|
|
|
|
@self.pg.production('donicum_statement : KEYWORD_DONICVM id KEYWORD_VT expression KEYWORD_VSQVE expression KEYWORD_GRADV expression KEYWORD_FAC SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def donicum_step(tokens):
|
|
range_array = ast_nodes.DataRangeArray(tokens[3], tokens[5], tokens[7])
|
|
return ast_nodes.PerStatement(range_array, tokens[1], tokens[10])
|
|
|
|
# expressions
|
|
@self.pg.production('expressions : SYMBOL_LPARENS expression_list')
|
|
def expressions(tokens):
|
|
return tokens[1]
|
|
|
|
@self.pg.production('expression_list : SYMBOL_RPARENS')
|
|
@self.pg.production('expression_list : expression SYMBOL_RPARENS')
|
|
@self.pg.production('expression_list : expression SYMBOL_COMMA expression_list')
|
|
def expression_list(calls):
|
|
if len(calls) == 1:
|
|
return []
|
|
elif len(calls) == 2:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[2]
|
|
|
|
@self.pg.production('array_items : expression')
|
|
@self.pg.production('array_items : expression SYMBOL_COMMA opt_newline array_items')
|
|
def array_items(calls):
|
|
if len(calls) == 1:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[3]
|
|
|
|
@self.pg.production('expression : id')
|
|
def expression_id(tokens):
|
|
return tokens[0]
|
|
|
|
@self.pg.production('expression : BUILTIN expressions')
|
|
def expression_builtin(tokens):
|
|
return ast_nodes.BuiltIn(tokens[0].value, tokens[1])
|
|
|
|
@self.pg.production('expression : DATA_STRING')
|
|
def expression_string(tokens):
|
|
return _parse_interpolated(tokens[0].value)
|
|
|
|
@self.pg.production('expression : DATA_NUMERAL')
|
|
def expression_numeral(tokens):
|
|
return ast_nodes.Numeral(tokens[0].value)
|
|
|
|
@self.pg.production('expression : DATA_FRACTION')
|
|
def expression_fraction(tokens):
|
|
return ast_nodes.Fractio(tokens[0].value)
|
|
|
|
@self.pg.production('expression : KEYWORD_FALSITAS')
|
|
@self.pg.production('expression : KEYWORD_VERITAS')
|
|
def expression_bool(tokens):
|
|
return ast_nodes.Bool(tokens[0].name == "KEYWORD_VERITAS")
|
|
|
|
@self.pg.production('expression : KEYWORD_NVLLVS')
|
|
def expression_nullus(_):
|
|
return ast_nodes.Nullus()
|
|
|
|
@self.pg.production('expression : expression SYMBOL_AT expression')
|
|
@self.pg.production('expression : expression SYMBOL_AMPERSAND expression')
|
|
@self.pg.production('expression : expression SYMBOL_MINUS expression')
|
|
@self.pg.production('expression : expression SYMBOL_PLUS expression')
|
|
@self.pg.production('expression : expression SYMBOL_TIMES expression')
|
|
@self.pg.production('expression : expression SYMBOL_DIVIDE expression')
|
|
@self.pg.production('expression : expression KEYWORD_RELIQVVM expression')
|
|
@self.pg.production('expression : expression KEYWORD_EST expression')
|
|
@self.pg.production('expression : expression KEYWORD_DISPAR expression')
|
|
@self.pg.production('expression : expression KEYWORD_MINVS expression')
|
|
@self.pg.production('expression : expression KEYWORD_PLVS expression')
|
|
@self.pg.production('expression : expression KEYWORD_HAVD_PLVS expression')
|
|
@self.pg.production('expression : expression KEYWORD_HAVD_MINVS expression')
|
|
@self.pg.production('expression : expression KEYWORD_ET expression')
|
|
@self.pg.production('expression : expression KEYWORD_AVT expression')
|
|
def binop(tokens):
|
|
return ast_nodes.BinOp(tokens[0], tokens[2], tokens[1].name)
|
|
|
|
@self.pg.production('expression : SYMBOL_MINUS expression', precedence='UMINUS')
|
|
def unary_minus(tokens):
|
|
return ast_nodes.UnaryMinus(tokens[1])
|
|
|
|
@self.pg.production('expression : KEYWORD_NON expression', precedence='UNOT')
|
|
def unary_not(tokens):
|
|
return ast_nodes.UnaryNot(tokens[1])
|
|
|
|
@self.pg.production('expression : KEYWORD_INVOCA expression expressions')
|
|
def invoca(tokens):
|
|
return ast_nodes.Invoca(tokens[1], tokens[2])
|
|
|
|
@self.pg.production('expression : KEYWORD_FVNCTIO ids KEYWORD_VT SYMBOL_LCURL statements SYMBOL_RCURL')
|
|
def fvnctio(tokens):
|
|
return ast_nodes.Fvnctio(tokens[1], tokens[4])
|
|
|
|
@self.pg.production('expression : SYMBOL_LPARENS expression SYMBOL_RPARENS')
|
|
def parens(tokens):
|
|
return tokens[1]
|
|
|
|
@self.pg.production('dict_items : expression KEYWORD_VT expression')
|
|
@self.pg.production('dict_items : expression KEYWORD_VT expression SYMBOL_COMMA opt_newline dict_items')
|
|
def dict_items(calls):
|
|
if len(calls) == 3:
|
|
return [(calls[0], calls[2])]
|
|
else:
|
|
return [(calls[0], calls[2])] + calls[5]
|
|
|
|
@self.pg.production('expression : KEYWORD_TABVLA SYMBOL_LCURL opt_newline SYMBOL_RCURL')
|
|
def dict_literal_empty(tokens):
|
|
return ast_nodes.DataDict([])
|
|
|
|
@self.pg.production('expression : KEYWORD_TABVLA SYMBOL_LCURL opt_newline dict_items opt_newline SYMBOL_RCURL')
|
|
def dict_literal(tokens):
|
|
return ast_nodes.DataDict(tokens[3])
|
|
|
|
@self.pg.production('expression : SYMBOL_LBRACKET SYMBOL_RBRACKET')
|
|
@self.pg.production('expression : SYMBOL_LBRACKET newlines SYMBOL_RBRACKET')
|
|
def array_empty(_):
|
|
return ast_nodes.DataArray([])
|
|
|
|
@self.pg.production('expression : SYMBOL_LBRACKET array_items opt_newline SYMBOL_RBRACKET')
|
|
def array(tokens):
|
|
return ast_nodes.DataArray(tokens[1])
|
|
|
|
@self.pg.production('expression : SYMBOL_LBRACKET newlines array_items opt_newline SYMBOL_RBRACKET')
|
|
def array_leading_newline(tokens):
|
|
return ast_nodes.DataArray(tokens[2])
|
|
|
|
@self.pg.production('expression : SYMBOL_LBRACKET expression KEYWORD_VSQVE expression SYMBOL_RBRACKET')
|
|
def range_array(tokens):
|
|
return ast_nodes.DataRangeArray(tokens[1], tokens[3])
|
|
|
|
@self.pg.production('expression : SYMBOL_LBRACKET expression KEYWORD_VSQVE expression KEYWORD_GRADV expression SYMBOL_RBRACKET')
|
|
def range_array_step(tokens):
|
|
return ast_nodes.DataRangeArray(tokens[1], tokens[3], tokens[5])
|
|
|
|
@self.pg.production('expression : expression SYMBOL_LBRACKET expression SYMBOL_RBRACKET', precedence='INDEX')
|
|
def array_index(tokens):
|
|
return ast_nodes.ArrayIndex(tokens[0], tokens[2])
|
|
|
|
@self.pg.production('expression : expression SYMBOL_LBRACKET expression KEYWORD_VSQVE expression SYMBOL_RBRACKET', precedence='INDEX')
|
|
def array_slice(tokens):
|
|
return ast_nodes.ArraySlice(tokens[0], tokens[2], tokens[4])
|
|
|
|
# ids
|
|
@self.pg.production('ids : SYMBOL_LPARENS id_list')
|
|
def ids(tokens):
|
|
return tokens[1]
|
|
|
|
@self.pg.production('id_list : SYMBOL_RPARENS')
|
|
@self.pg.production('id_list : id SYMBOL_RPARENS')
|
|
@self.pg.production('id_list : id SYMBOL_COMMA id_list')
|
|
def id_list(calls):
|
|
if len(calls) == 1:
|
|
return []
|
|
elif len(calls) == 2:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[2]
|
|
|
|
@self.pg.production('id_list_rest : id')
|
|
@self.pg.production('id_list_rest : id SYMBOL_COMMA id_list_rest')
|
|
def id_list_rest(calls):
|
|
if len(calls) == 1:
|
|
return [calls[0]]
|
|
else:
|
|
return [calls[0]] + calls[2]
|
|
|
|
@self.pg.production("id : ID")
|
|
def id_expression(tokens):
|
|
return ast_nodes.ID(tokens[0].value)
|
|
|
|
@self.pg.error
|
|
def error_handle(token):
|
|
pos = token.source_pos
|
|
loc = f" at line {pos.lineno}, column {pos.colno}" if pos else ""
|
|
if token.name == "SYMBOL_LPARENS":
|
|
raise SyntaxError(
|
|
f"Unexpected '('{loc}. To call a function, use INVOCA: INVOCA func (args)"
|
|
)
|
|
raise SyntaxError(f"Unexpected token '{token.value}'{loc}")
|
|
|
|
parser = self.pg.build()
|
|
return parser.parse(tokens_input) # type: ignore
|