Files
centvrion/centvrion/lexer.py
2026-04-24 18:33:48 +02:00

137 lines
2.2 KiB
Python

from rply import LexerGenerator
valid_characters = '|'.join(list("abcdefghiklmnopqrstvxyz_"))
keyword_tokens = [("KEYWORD_"+i, i) for i in [
"AETERNVM",
"ALIVD",
"AVGE",
"CAPE",
"AVT",
"DEFINI",
"DESIGNA",
"DISPAR",
"DIVIDE",
"DONICVM",
"DVM",
"CONTINVA",
"ERVMPE",
"EST",
"ET",
"FAC",
"FALSITAS",
"FVNCTIO",
"GRADV",
"HAVD_MINVS",
"HAVD_PLVS",
"INVOCA",
"IN",
"MINVE",
"MINVS",
"MVLTIPLICA",
"NON",
"NVLLVS",
"PER",
"PLVS",
"REDI",
"RELIQVVM",
"SI",
"TVNC",
"TABVLA",
"TEMPTA",
"VSQVE",
"VT",
"VERITAS",
"CVM"
]]
builtin_tokens = [("BUILTIN", i) for i in [
"AVDI_NVMERVS",
"AVDI",
"CLAVES",
"DECIMATIO",
"DIC",
"DORMI",
"EVERRE",
"FORTVITVS_NVMERVS",
"FORTVITA_ELECTIO",
"LITTERA",
"LONGITVDO",
"MAIVSCVLA",
"MINVSCVLA",
"NVMERVS",
"ORDINA",
"SEMEN",
"SENATVS",
"TYPVS",
"LEGE",
"SCRIBE",
"ADIVNGE",
"QVAERE",
"SVBSTITVE",
"SCINDE",
"PETE",
"PETITVR",
"AVSCVLTA"
]]
data_tokens = [
("DATA_STRING", r'("(?:[^"\\]|\\.)*"|' + r"'(?:[^'\\]|\\.)*')"),
("DATA_FRACTION", r"([IVXLCDM][IVXLCDM_]*)?([S][S:.|]*|:[S:.|]+|\.[S:.|]*)"),
("DATA_NUMERAL", r"[IVXLCDM][IVXLCDM_]*")
]
module_tokens = [("MODULE", i) for i in [
"FORS",
"FRACTIO",
"MAGNVM",
"SCRIPTA",
"SVBNVLLA",
"RETE"
]]
symbol_tokens = [
("SYMBOL_LPARENS", r"\("),
("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/"),
("SYMBOL_AMPERSAND", r"&"),
("SYMBOL_AT", r"@"),
("SYMBOL_COMMA", r",")
]
whitespace_tokens = [
("NEWLINE", r"\n+")
]
all_tokens = (
builtin_tokens +
keyword_tokens +
module_tokens +
data_tokens +
symbol_tokens +
whitespace_tokens +
[("ID", f"({valid_characters})+")]
)
class Lexer():
def __init__(self):
self.lexer = LexerGenerator()
def _add_tokens(self):
for token in all_tokens:
self.lexer.add(*token)
self.lexer.ignore(r" +")
self.lexer.ignore(r'//[^\n]*')
self.lexer.ignore(r'/\*[\s\S]*?\*/')
def get_lexer(self):
self._add_tokens()
return self.lexer.build()