Files
centvrion/lexer.py
NikolajDanger de697b121e
2022-06-07 21:59:46 +02:00

89 lines
1.6 KiB
Python

from rply import LexerGenerator
valid_characters = '|'.join(list("abcdefghiklmnopqrstvxyz_"))
keyword_tokens = [("KEYWORD_"+i, i) for i in [
"ALUID",
"DEFINI",
"DESIGNA",
"DONICUM",
"DUM",
"ERUMPE",
"EST",
"FACE",
"FALSITAS",
"INVOCA",
"MINUS",
"NULLUS",
"PER",
"PLUS",
"REDI",
"SI",
"TUNC",
"USQUE",
"UT",
"VERITAS",
"VOCA"
]]
builtin_tokens = [("BUILTIN", i) for i in [
"AUDI_NUMERUS",
"AUDI",
"DICE",
"FORTIS_NUMERUS",
"FORTIS_ELECTIONIS",
"LONGITUDO"
]]
data_tokens = [
("DATA_STRING", r"\".*?\""),
("DATA_NUMERAL", r"[IVXLCDM]+")
]
module_tokens = [("MODULE", i) for i in [
"FORS",
"FRACTIO",
"MAGNUM",
"SUBNULLA"
]]
symbol_tokens = [
("SYMBOL_LPARENS", r"\("),
("SYMBOL_RPARENS", r"\)"),
("SYMBOL_LBRACKET", r"\["),
("SYMBOL_RBRACKET", r"\]"),
("SYMBOL_LCURL", r"\{"),
("SYMBOL_RCURL", r"\}"),
("SYMBOL_PLUS", r"\+"),
("SYMBOL_MINUS", r"\-"),
("SYMBOL_TIMES", r"\*"),
("SYMBOL_DIVIDE", r"\/")
]
whitespace_tokens = [
("NEWLINE", r"\n+")
]
all_tokens = (
keyword_tokens +
builtin_tokens +
module_tokens +
symbol_tokens +
data_tokens +
whitespace_tokens +
[("ID", f"({valid_characters})+")]
)
class Lexer():
def __init__(self):
self.lexer = LexerGenerator()
def _add_tokens(self):
for token in all_tokens:
self.lexer.add(*token)
self.lexer.ignore(r" +")
def get_lexer(self):
self._add_tokens()
return self.lexer.build()