89 lines
1.6 KiB
Python
89 lines
1.6 KiB
Python
from rply import LexerGenerator
|
|
|
|
valid_characters = '|'.join(list("abcdefghiklmnopqrstvxyz_"))
|
|
|
|
keyword_tokens = [("KEYWORD_"+i, i) for i in [
|
|
"ALUID",
|
|
"DEFINI",
|
|
"DESIGNA",
|
|
"DONICUM",
|
|
"DUM",
|
|
"ERUMPE",
|
|
"EST",
|
|
"FACE",
|
|
"FALSITAS",
|
|
"INVOCA",
|
|
"MINUS",
|
|
"NULLUS",
|
|
"PER",
|
|
"PLUS",
|
|
"REDI",
|
|
"SI",
|
|
"TUNC",
|
|
"USQUE",
|
|
"UT",
|
|
"VERITAS",
|
|
"VOCA"
|
|
]]
|
|
|
|
builtin_tokens = [("BUILTIN", i) for i in [
|
|
"AUDI_NUMERUS",
|
|
"AUDI",
|
|
"DICE",
|
|
"FORTIS_NUMERUS",
|
|
"FORTIS_ELECTIONIS",
|
|
"LONGITUDO"
|
|
]]
|
|
|
|
data_tokens = [
|
|
("DATA_STRING", r"\".*?\""),
|
|
("DATA_NUMERAL", r"[IVXLCDM]+")
|
|
]
|
|
|
|
module_tokens = [("MODULE", i) for i in [
|
|
"FORS",
|
|
"FRACTIO",
|
|
"MAGNUM",
|
|
"SUBNULLA"
|
|
]]
|
|
|
|
symbol_tokens = [
|
|
("SYMBOL_LPARENS", r"\("),
|
|
("SYMBOL_RPARENS", r"\)"),
|
|
("SYMBOL_LBRACKET", r"\["),
|
|
("SYMBOL_RBRACKET", r"\]"),
|
|
("SYMBOL_LCURL", r"\{"),
|
|
("SYMBOL_RCURL", r"\}"),
|
|
("SYMBOL_PLUS", r"\+"),
|
|
("SYMBOL_MINUS", r"\-"),
|
|
("SYMBOL_TIMES", r"\*"),
|
|
("SYMBOL_DIVIDE", r"\/")
|
|
]
|
|
|
|
whitespace_tokens = [
|
|
("NEWLINE", r"\n+")
|
|
]
|
|
|
|
all_tokens = (
|
|
keyword_tokens +
|
|
builtin_tokens +
|
|
module_tokens +
|
|
symbol_tokens +
|
|
data_tokens +
|
|
whitespace_tokens +
|
|
[("ID", f"({valid_characters})+")]
|
|
)
|
|
|
|
class Lexer():
|
|
def __init__(self):
|
|
self.lexer = LexerGenerator()
|
|
|
|
def _add_tokens(self):
|
|
for token in all_tokens:
|
|
self.lexer.add(*token)
|
|
self.lexer.ignore(r" +")
|
|
|
|
def get_lexer(self):
|
|
self._add_tokens()
|
|
return self.lexer.build()
|