🐐 Reviving this old project. Mainly adding tests and fixing bugs.

This commit is contained in:
2026-03-31 18:25:20 +02:00
parent 88d7f0ed69
commit e845cb62c1
20 changed files with 1502 additions and 1090 deletions

View File

@@ -3,88 +3,92 @@ from rply import LexerGenerator
valid_characters = '|'.join(list("abcdefghiklmnopqrstvxyz_"))
keyword_tokens = [("KEYWORD_"+i, i) for i in [
"ALVID",
"DEFINI",
"DESIGNA",
"DONICVM",
"DVM",
"ERVMPE",
"EST",
"FACE",
"FALSITAS",
"INVOCA",
"IN",
"MINVS",
"NVLLVS",
"PER",
"PLVS",
"REDI",
"SI",
"TVNC",
"VSQVE",
"VT",
"VERITAS",
"CVM"
"ALVID",
"AVT",
"DEFINI",
"DESIGNA",
"DONICVM",
"DVM",
"ERVMPE",
"EST",
"ET",
"FACE",
"FALSITAS",
"INVOCA",
"IN",
"MINVS",
"NVLLVS",
"PER",
"PLVS",
"REDI",
"SI",
"TVNC",
"VSQVE",
"VT",
"VERITAS",
"CVM"
]]
builtin_tokens = [("BUILTIN", i) for i in [
"AVDI_NVMERVS",
"AVDI",
"DICE",
"FORTIS_NVMERVS",
"FORTIS_ELECTIONIS",
"LONGITVDO"
"AVDI_NVMERVS",
"AVDI",
"DICE",
"FORTIS_NVMERVS",
"FORTIS_ELECTIONIS",
"LONGITVDO"
]]
data_tokens = [
("DATA_STRING", r"\".*?\""),
("DATA_NUMERAL", r"[IVXLCDM]+")
("DATA_STRING", r"(\".*?\"|'.*?')"),
("DATA_NUMERAL", r"[IVXLCDM][IVXLCDM_]*")
]
module_tokens = [("MODULE", i) for i in [
"FORS",
"FRACTIO",
"MAGNVM",
"SVBNVLLA"
"FORS",
"FRACTIO",
"MAGNVM",
"SVBNVLLA"
]]
symbol_tokens = [
("SYMBOL_LPARENS", r"\("),
("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/"),
("SYMBOL_COMMA", r",")
("SYMBOL_LPARENS", r"\("),
("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/"),
("SYMBOL_COMMA", r",")
]
whitespace_tokens = [
("NEWLINE", r"\n+")
("NEWLINE", r"\n+")
]
all_tokens = (
keyword_tokens +
builtin_tokens +
module_tokens +
symbol_tokens +
data_tokens +
whitespace_tokens +
[("ID", f"({valid_characters})+")]
keyword_tokens +
builtin_tokens +
module_tokens +
symbol_tokens +
data_tokens +
whitespace_tokens +
[("ID", f"({valid_characters})+")]
)
class Lexer():
def __init__(self):
self.lexer = LexerGenerator()
def __init__(self):
self.lexer = LexerGenerator()
def _add_tokens(self):
for token in all_tokens:
self.lexer.add(*token)
self.lexer.ignore(r" +")
def _add_tokens(self):
for token in all_tokens:
self.lexer.add(*token)
self.lexer.ignore(r" +")
self.lexer.ignore(r'//[^\n]*')
self.lexer.ignore(r'/\*[\s\S]*?\*/')
def get_lexer(self):
self._add_tokens()
return self.lexer.build()
def get_lexer(self):
self._add_tokens()
return self.lexer.build()