Files
centvrion/lexer.py
NikolajDanger f4c608aaf2
2022-06-08 16:16:20 +02:00

90 lines
1.6 KiB
Python

from rply import LexerGenerator
valid_characters = '|'.join(list("abcdefghiklmnopqrstvxyz_"))
keyword_tokens = [("KEYWORD_"+i, i) for i in [
"ALVID",
"DEFINI",
"DESIGNA",
"DONICVM",
"DVM",
"ERVMPE",
"EST",
"FACE",
"FALSITAS",
"INVOCA",
"IN",
"MINVS",
"NVLLVS",
"PER",
"PLVS",
"REDI",
"SI",
"TVNC",
"VSQVE",
"VT",
"VERITAS",
"VOCA"
]]
builtin_tokens = [("BUILTIN", i) for i in [
"AVDI_NVMERVS",
"AVDI",
"DICE",
"FORTIS_NVMERVS",
"FORTIS_ELECTIONIS",
"LONGITVDO"
]]
data_tokens = [
("DATA_STRING", r"\".*?\""),
("DATA_NUMERAL", r"[IVXLCDM]+")
]
module_tokens = [("MODULE", i) for i in [
"FORS",
"FRACTIO",
"MAGNVM",
"SVBNVLLA"
]]
symbol_tokens = [
("SYMBOL_LPARENS", r"\("),
("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/")
]
whitespace_tokens = [
("NEWLINE", r"\n+")
]
all_tokens = (
keyword_tokens +
builtin_tokens +
module_tokens +
symbol_tokens +
data_tokens +
whitespace_tokens +
[("ID", f"({valid_characters})+")]
)
class Lexer():
def __init__(self):
self.lexer = LexerGenerator()
def _add_tokens(self):
for token in all_tokens:
self.lexer.add(*token)
self.lexer.ignore(r" +")
def get_lexer(self):
self._add_tokens()
return self.lexer.build()