diff --git a/DOCS.md b/DOCS.md index 833f93e..ea2fd06 100644 --- a/DOCS.md +++ b/DOCS.md @@ -509,6 +509,34 @@ The symbol `|` can be used to denote that the following fraction symbols are 1 " A single "set" of fraction symbols can only represent up to 11/12, as 12/12 can be written as 1. +### IASON + +> ⚠ **Warning.** The `IASON` module enables your program to read and write non-Roman numerals. Numbers handled by `IASON_LEGE` and `IASON_SCRIBE` use the decimal digits `0`–`9` (e.g. `42`, `1789`, `30`), not Roman numerals. This goes against the design philosophy of CENTVRION and should not be used unless absolutely necessary. + +![CVM IASON](snippets/iason.png) + +The `IASON` module adds two builtins for converting between `CENTVRION` values and JSON strings. + +`IASON_LEGE(string)` parses a JSON string and returns the corresponding `CENTVRION` value. Mappings: JSON `null` → `NVLLVS`, `true`/`false` → `VERITAS`/`FALSITAS`, integer → numeral, string → string, array → array, object → `TABVLA` (string keys). + +JSON floats with no fractional part (e.g. `3.0`) come back as integers. Other floats depend on whether the `FRACTIO` module is also loaded: with `FRACTIO`, `0.1` parses to the exact fraction `I:|::|::|S:.|S.|:` (1/10); without it, the value is floored to the nearest integer. + +![IASON_LEGE example](snippets/iason_lege.png) + +``` +> Marcus +> XXX +> [gladius scutum] +``` + +`IASON_SCRIBE(value)` serializes a `CENTVRION` value to a JSON string. Integers and fractions become JSON numbers (fractions via shortest-round-trip float), strings become JSON strings (with the standard escapes), arrays become arrays, dicts become objects (insertion order preserved). Functions and dicts with non-string keys raise an error. + +![IASON_SCRIBE example](snippets/iason_scribe.png) + +``` +> {"nomen": "Marcus", "anni": 30} +``` + ### MAGNVM ![CVM MAGNVM](snippets/magnvm.png) diff --git a/cent b/cent index 8371069..1d0a886 100755 --- a/cent +++ b/cent @@ -53,17 +53,19 @@ def main(): sys.exit(f"CENTVRION error: {e}") else: c_source = compile_program(program) - runtime_c = os.path.join( + runtime_dir = os.path.join( os.path.dirname(__file__), - "centvrion", "compiler", "runtime", "cent_runtime.c" + "centvrion", "compiler", "runtime" ) + runtime_c = os.path.join(runtime_dir, "cent_runtime.c") + iason_c = os.path.join(runtime_dir, "cent_iason.c") out_path = os.path.splitext(file_path)[0] if args["--keep-c"]: tmp_path = out_path + ".c" with open(tmp_path, "w") as f: f.write(c_source) subprocess.run( - ["gcc", "-O2", tmp_path, runtime_c, "-o", out_path, "-lcurl", "-lmicrohttpd"], + ["gcc", "-O2", tmp_path, runtime_c, iason_c, "-o", out_path, "-lcurl", "-lmicrohttpd", "-lm"], check=True, ) else: @@ -72,7 +74,7 @@ def main(): tmp_path = tmp.name try: subprocess.run( - ["gcc", "-O2", tmp_path, runtime_c, "-o", out_path, "-lcurl", "-lmicrohttpd"], + ["gcc", "-O2", tmp_path, runtime_c, iason_c, "-o", out_path, "-lcurl", "-lmicrohttpd", "-lm"], check=True, ) finally: diff --git a/centvrion/ast_nodes.py b/centvrion/ast_nodes.py index c4f3917..6d90aaa 100644 --- a/centvrion/ast_nodes.py +++ b/centvrion/ast_nodes.py @@ -1,5 +1,7 @@ import functools import http.server +import json +import math import re import time import urllib.parse @@ -290,6 +292,51 @@ def frac_to_fraction(s, magnvm=False, svbnvlla=False): return total +def _json_to_val(obj): + if obj is None: + return ValNul() + if isinstance(obj, bool): + return ValBool(obj) + if isinstance(obj, int): + return ValInt(obj) + if isinstance(obj, Fraction): + if obj.denominator == 1: + return ValInt(obj.numerator) + return ValFrac(obj) + if isinstance(obj, str): + return ValStr(obj) + if isinstance(obj, list): + return ValList([_json_to_val(x) for x in obj]) + if isinstance(obj, dict): + return ValDict({k: _json_to_val(v) for k, v in obj.items()}) + raise CentvrionError(f"IASON_LEGE: unsupported JSON value of type {type(obj).__name__}") + + +def _val_to_json(val): + if isinstance(val, ValNul): + return None + if isinstance(val, ValBool): + return val.value() + if isinstance(val, ValInt): + return val.value() + if isinstance(val, ValFrac): + return float(val.value()) + if isinstance(val, ValStr): + return val.value() + if isinstance(val, ValList): + return [_val_to_json(x) for x in val.value()] + if isinstance(val, ValDict): + out = {} + for k, v in val.value().items(): + if not isinstance(k, str): + raise CentvrionError("IASON_SCRIBE: dict keys must be strings to serialize as JSON") + out[k] = _val_to_json(v) + return out + if isinstance(val, ValFunc): + raise CentvrionError("IASON_SCRIBE: cannot serialize a function") + raise CentvrionError(f"IASON_SCRIBE: cannot serialize value of type {type(val).__name__}") + + def fraction_to_frac(f, magnvm=False, svbnvlla=False) -> str: if f < 0: if not svbnvlla: @@ -1782,6 +1829,30 @@ class BuiltIn(Node): server = http.server.HTTPServer(("0.0.0.0", port.value()), _CentHandler) server.serve_forever() return vtable, ValNul() + case "IASON_LEGE": + if "IASON" not in vtable["#modules"]: + raise CentvrionError("Cannot use 'IASON_LEGE' without module 'IASON'") + if len(params) != 1: + raise CentvrionError("IASON_LEGE takes exactly I argument") + s = params[0] + if not isinstance(s, ValStr): + raise CentvrionError("IASON_LEGE requires a string") + fractio = "FRACTIO" in vtable["#modules"] + try: + if fractio: + parsed = json.loads(s.value(), parse_float=Fraction) + else: + parsed = json.loads(s.value(), parse_float=lambda x: math.floor(float(x))) + except json.JSONDecodeError as e: + raise CentvrionError(f"IASON_LEGE: invalid JSON: {e.msg}") + return vtable, _json_to_val(parsed) + case "IASON_SCRIBE": + if "IASON" not in vtable["#modules"]: + raise CentvrionError("Cannot use 'IASON_SCRIBE' without module 'IASON'") + if len(params) != 1: + raise CentvrionError("IASON_SCRIBE takes exactly I argument") + obj = _val_to_json(params[0]) + return vtable, ValStr(json.dumps(obj, ensure_ascii=False)) case _: raise NotImplementedError(self.builtin) diff --git a/centvrion/compiler/emit_expr.py b/centvrion/compiler/emit_expr.py index c95a185..328d978 100644 --- a/centvrion/compiler/emit_expr.py +++ b/centvrion/compiler/emit_expr.py @@ -437,6 +437,25 @@ def _emit_builtin(node, ctx): lines.append(f"cent_avscvlta({param_vars[0]});") lines.append(f"CentValue {tmp} = cent_null();") + case "IASON_LEGE": + if not ctx.has_module("IASON"): + lines.append('cent_runtime_error("IASON module required for IASON_LEGE");') + lines.append(f"CentValue {tmp} = cent_null();") + elif len(param_vars) != 1: + raise _err(node, "IASON_LEGE takes exactly I argument") + else: + fractio_flag = "1" if ctx.has_module("FRACTIO") else "0" + lines.append(f"CentValue {tmp} = cent_iason_lege({param_vars[0]}, {fractio_flag});") + + case "IASON_SCRIBE": + if not ctx.has_module("IASON"): + lines.append('cent_runtime_error("IASON module required for IASON_SCRIBE");') + lines.append(f"CentValue {tmp} = cent_null();") + elif len(param_vars) != 1: + raise _err(node, "IASON_SCRIBE takes exactly I argument") + else: + lines.append(f"CentValue {tmp} = cent_iason_scribe({param_vars[0]});") + case _: raise NotImplementedError(node.builtin) diff --git a/centvrion/compiler/emitter.py b/centvrion/compiler/emitter.py index 4ca6768..0bb4992 100644 --- a/centvrion/compiler/emitter.py +++ b/centvrion/compiler/emitter.py @@ -58,6 +58,7 @@ def compile_program(program): # Includes lines += [ f'#include "{_RUNTIME_DIR}/cent_runtime.h"', + f'#include "{_RUNTIME_DIR}/cent_iason.h"', "", ] diff --git a/centvrion/compiler/runtime/cent_iason.c b/centvrion/compiler/runtime/cent_iason.c new file mode 100644 index 0000000..54c16cf --- /dev/null +++ b/centvrion/compiler/runtime/cent_iason.c @@ -0,0 +1,426 @@ +#include "cent_iason.h" + +#include +#include +#include +#include +#include + +/* ---------- shared helpers ----------------------------------------- */ + +static long iason_gcd(long a, long b) { + if (a < 0) a = -a; + if (b < 0) b = -b; + while (b) { long t = b; b = a % b; a = t; } + return a ? a : 1; +} + +static CentValue iason_frac_reduce(long num, long den) { + if (den < 0) { num = -num; den = -den; } + long g = iason_gcd(num, den); + num /= g; den /= g; + if (den == 1) return cent_int(num); + return cent_frac(num, den); +} + +/* ---------- parser ------------------------------------------------- */ + +typedef struct { + const char *src; + size_t pos; + size_t len; + int fractio; +} IasonParser; + +static void iason_die(const char *msg) { + cent_runtime_error(msg); +} + +static void iason_skip_ws(IasonParser *p) { + while (p->pos < p->len) { + char c = p->src[p->pos]; + if (c == ' ' || c == '\t' || c == '\n' || c == '\r') p->pos++; + else break; + } +} + +static int iason_peek(IasonParser *p) { + return (p->pos < p->len) ? (unsigned char)p->src[p->pos] : -1; +} + +static void iason_expect(IasonParser *p, char c, const char *msg) { + if (p->pos >= p->len || p->src[p->pos] != c) + iason_die(msg); + p->pos++; +} + +static void iason_expect_word(IasonParser *p, const char *word) { + size_t n = strlen(word); + if (p->len - p->pos < n || memcmp(p->src + p->pos, word, n) != 0) + iason_die("IASON_LEGE: invalid JSON literal"); + p->pos += n; +} + +/* Encode a Unicode codepoint as UTF-8 into buf; returns bytes written. */ +static int iason_utf8_encode(unsigned cp, char *buf) { + if (cp <= 0x7F) { buf[0] = (char)cp; return 1; } + if (cp <= 0x7FF) { buf[0] = (char)(0xC0 | (cp >> 6)); + buf[1] = (char)(0x80 | (cp & 0x3F)); return 2; } + if (cp <= 0xFFFF) { buf[0] = (char)(0xE0 | (cp >> 12)); + buf[1] = (char)(0x80 | ((cp >> 6) & 0x3F)); + buf[2] = (char)(0x80 | (cp & 0x3F)); return 3; } + if (cp <= 0x10FFFF) { buf[0] = (char)(0xF0 | (cp >> 18)); + buf[1] = (char)(0x80 | ((cp >> 12) & 0x3F)); + buf[2] = (char)(0x80 | ((cp >> 6) & 0x3F)); + buf[3] = (char)(0x80 | (cp & 0x3F)); return 4; } + iason_die("IASON_LEGE: codepoint out of range"); + return 0; +} + +static unsigned iason_read_hex4(IasonParser *p) { + if (p->len - p->pos < 4) iason_die("IASON_LEGE: truncated \\u escape"); + unsigned v = 0; + for (int i = 0; i < 4; i++) { + char c = p->src[p->pos++]; + v <<= 4; + if (c >= '0' && c <= '9') v |= c - '0'; + else if (c >= 'a' && c <= 'f') v |= c - 'a' + 10; + else if (c >= 'A' && c <= 'F') v |= c - 'A' + 10; + else iason_die("IASON_LEGE: invalid hex in \\u escape"); + } + return v; +} + +/* Parses a JSON string literal at p->pos (positioned at the opening "), + returns an arena-allocated NUL-terminated UTF-8 string. */ +static char *iason_parse_string(IasonParser *p) { + iason_expect(p, '"', "IASON_LEGE: expected string"); + /* upper bound on output: same as remaining input (escapes shrink). */ + size_t cap = (p->len - p->pos) + 1; + char *buf = cent_arena_alloc(cent_arena, cap); + size_t out = 0; + while (p->pos < p->len) { + unsigned char c = (unsigned char)p->src[p->pos++]; + if (c == '"') { buf[out] = '\0'; return buf; } + if (c == '\\') { + if (p->pos >= p->len) iason_die("IASON_LEGE: trailing \\ in string"); + char esc = p->src[p->pos++]; + switch (esc) { + case '"': buf[out++] = '"'; break; + case '\\': buf[out++] = '\\'; break; + case '/': buf[out++] = '/'; break; + case 'b': buf[out++] = '\b'; break; + case 'f': buf[out++] = '\f'; break; + case 'n': buf[out++] = '\n'; break; + case 'r': buf[out++] = '\r'; break; + case 't': buf[out++] = '\t'; break; + case 'u': { + unsigned cp = iason_read_hex4(p); + if (cp >= 0xD800 && cp <= 0xDBFF) { + /* high surrogate; expect \uXXXX low surrogate */ + if (p->len - p->pos < 6 || p->src[p->pos] != '\\' || p->src[p->pos + 1] != 'u') + iason_die("IASON_LEGE: missing low surrogate after high surrogate"); + p->pos += 2; + unsigned lo = iason_read_hex4(p); + if (lo < 0xDC00 || lo > 0xDFFF) + iason_die("IASON_LEGE: invalid low surrogate"); + cp = 0x10000 + (((cp - 0xD800) << 10) | (lo - 0xDC00)); + } else if (cp >= 0xDC00 && cp <= 0xDFFF) { + iason_die("IASON_LEGE: stray low surrogate"); + } + out += iason_utf8_encode(cp, buf + out); + break; + } + default: iason_die("IASON_LEGE: invalid escape sequence"); + } + } else if (c < 0x20) { + iason_die("IASON_LEGE: unescaped control character in string"); + } else { + buf[out++] = (char)c; + } + } + iason_die("IASON_LEGE: unterminated string"); + return NULL; +} + +/* Cap on fractional digits parsed exactly; beyond this we truncate to + keep `long` arithmetic safe (10^18 fits in int64). */ +#define IASON_MAX_FRAC_DIGITS 18 + +static CentValue iason_parse_number(IasonParser *p) { + size_t start = p->pos; + int negative = 0; + if (p->src[p->pos] == '-') { negative = 1; p->pos++; } + + /* Integer part. */ + if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos])) + iason_die("IASON_LEGE: invalid number"); + if (p->src[p->pos] == '0') { + p->pos++; + } else { + while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++; + } + + int has_frac = 0, has_exp = 0; + size_t frac_start = 0, frac_end = 0; + if (p->pos < p->len && p->src[p->pos] == '.') { + has_frac = 1; + p->pos++; + frac_start = p->pos; + if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos])) + iason_die("IASON_LEGE: invalid number"); + while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++; + frac_end = p->pos; + } + long exp = 0; + if (p->pos < p->len && (p->src[p->pos] == 'e' || p->src[p->pos] == 'E')) { + has_exp = 1; + p->pos++; + int esign = 1; + if (p->pos < p->len && (p->src[p->pos] == '+' || p->src[p->pos] == '-')) { + if (p->src[p->pos] == '-') esign = -1; + p->pos++; + } + if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos])) + iason_die("IASON_LEGE: invalid number"); + while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) { + exp = exp * 10 + (p->src[p->pos] - '0'); + p->pos++; + } + exp *= esign; + } + + size_t end = p->pos; + + if (!has_frac && !has_exp) { + /* Pure integer. Use strtol-style parse (we already validated digits). */ + long v = 0; + for (size_t i = (negative ? start + 1 : start); i < end; i++) { + v = v * 10 + (p->src[i] - '0'); + } + return cent_int(negative ? -v : v); + } + + if (!p->fractio) { + /* Floor to int. strtod handles the full grammar here. */ + char *tmp = cent_arena_alloc(cent_arena, end - start + 1); + memcpy(tmp, p->src + start, end - start); + tmp[end - start] = '\0'; + double d = strtod(tmp, NULL); + return cent_int((long)floor(d)); + } + + /* FRACTIO loaded: build an exact fraction from the decimal/exponent form. */ + long num = 0; + /* Integer-part digits */ + size_t int_start = negative ? start + 1 : start; + size_t int_end = has_frac ? (frac_start - 1) : (has_exp ? end - 0 : end); + /* If we have an exponent without a fraction part, find where digits end. */ + if (has_exp && !has_frac) { + int_end = int_start; + while (int_end < p->len && isdigit((unsigned char)p->src[int_end])) int_end++; + } + for (size_t i = int_start; i < int_end; i++) { + num = num * 10 + (p->src[i] - '0'); + } + /* Fractional digits, capped */ + long den = 1; + if (has_frac) { + size_t take = frac_end - frac_start; + if (take > IASON_MAX_FRAC_DIGITS) take = IASON_MAX_FRAC_DIGITS; + for (size_t i = 0; i < take; i++) { + num = num * 10 + (p->src[frac_start + i] - '0'); + den *= 10; + } + } + if (negative) num = -num; + /* Apply exponent: positive shifts num, negative shifts den. */ + while (exp > 0) { num *= 10; exp--; } + while (exp < 0) { den *= 10; exp++; } + return iason_frac_reduce(num, den); +} + +static CentValue iason_parse_value(IasonParser *p); + +static CentValue iason_parse_array(IasonParser *p) { + iason_expect(p, '[', "IASON_LEGE: expected ["); + iason_skip_ws(p); + CentValue lst = cent_list_new(4); + if (iason_peek(p) == ']') { p->pos++; return lst; } + for (;;) { + iason_skip_ws(p); + CentValue elem = iason_parse_value(p); + cent_list_push(&lst, elem); + iason_skip_ws(p); + int c = iason_peek(p); + if (c == ',') { p->pos++; continue; } + if (c == ']') { p->pos++; return lst; } + iason_die("IASON_LEGE: expected , or ] in array"); + } +} + +static CentValue iason_parse_object(IasonParser *p) { + iason_expect(p, '{', "IASON_LEGE: expected {"); + iason_skip_ws(p); + CentValue d = cent_dict_new(4); + if (iason_peek(p) == '}') { p->pos++; return d; } + for (;;) { + iason_skip_ws(p); + if (iason_peek(p) != '"') iason_die("IASON_LEGE: object key must be a string"); + char *key = iason_parse_string(p); + iason_skip_ws(p); + iason_expect(p, ':', "IASON_LEGE: expected : after object key"); + iason_skip_ws(p); + CentValue val = iason_parse_value(p); + cent_dict_set(&d, cent_str(key), val); + iason_skip_ws(p); + int c = iason_peek(p); + if (c == ',') { p->pos++; continue; } + if (c == '}') { p->pos++; return d; } + iason_die("IASON_LEGE: expected , or } in object"); + } +} + +static CentValue iason_parse_value(IasonParser *p) { + iason_skip_ws(p); + int c = iason_peek(p); + if (c < 0) iason_die("IASON_LEGE: unexpected end of input"); + if (c == '{') return iason_parse_object(p); + if (c == '[') return iason_parse_array(p); + if (c == '"') return cent_str(iason_parse_string(p)); + if (c == 't') { iason_expect_word(p, "true"); return cent_bool(1); } + if (c == 'f') { iason_expect_word(p, "false"); return cent_bool(0); } + if (c == 'n') { iason_expect_word(p, "null"); return cent_null(); } + if (c == '-' || (c >= '0' && c <= '9')) return iason_parse_number(p); + iason_die("IASON_LEGE: unexpected character"); + return cent_null(); +} + +CentValue cent_iason_lege(CentValue s, int fractio_loaded) { + if (s.type != CENT_STR) cent_type_error("IASON_LEGE requires a string"); + IasonParser p = { s.sval, 0, strlen(s.sval), fractio_loaded }; + CentValue v = iason_parse_value(&p); + iason_skip_ws(&p); + if (p.pos != p.len) iason_die("IASON_LEGE: trailing data after JSON value"); + return v; +} + +/* ---------- serializer --------------------------------------------- */ + +typedef struct { + char *buf; + size_t len; + size_t cap; +} IasonBuf; + +static void iason_buf_reserve(IasonBuf *b, size_t extra) { + if (b->len + extra <= b->cap) return; + size_t new_cap = b->cap ? b->cap * 2 : 64; + while (new_cap < b->len + extra) new_cap *= 2; + char *nb = cent_arena_alloc(cent_arena, new_cap); + if (b->len) memcpy(nb, b->buf, b->len); + b->buf = nb; + b->cap = new_cap; +} + +static void iason_buf_putc(IasonBuf *b, char c) { + iason_buf_reserve(b, 1); + b->buf[b->len++] = c; +} + +static void iason_buf_puts(IasonBuf *b, const char *s) { + size_t n = strlen(s); + iason_buf_reserve(b, n); + memcpy(b->buf + b->len, s, n); + b->len += n; +} + +static void iason_buf_putn(IasonBuf *b, const char *s, size_t n) { + iason_buf_reserve(b, n); + memcpy(b->buf + b->len, s, n); + b->len += n; +} + +static void iason_emit_string(IasonBuf *b, const char *s) { + iason_buf_putc(b, '"'); + for (const unsigned char *p = (const unsigned char *)s; *p; p++) { + unsigned char c = *p; + switch (c) { + case '"': iason_buf_puts(b, "\\\""); break; + case '\\': iason_buf_puts(b, "\\\\"); break; + case '\b': iason_buf_puts(b, "\\b"); break; + case '\f': iason_buf_puts(b, "\\f"); break; + case '\n': iason_buf_puts(b, "\\n"); break; + case '\r': iason_buf_puts(b, "\\r"); break; + case '\t': iason_buf_puts(b, "\\t"); break; + default: + if (c < 0x20) { + char tmp[8]; + snprintf(tmp, sizeof tmp, "\\u%04x", c); + iason_buf_puts(b, tmp); + } else { + iason_buf_putc(b, (char)c); + } + } + } + iason_buf_putc(b, '"'); +} + +static void iason_emit_value(IasonBuf *b, CentValue v) { + switch (v.type) { + case CENT_NULL: iason_buf_puts(b, "null"); return; + case CENT_BOOL: iason_buf_puts(b, v.bval ? "true" : "false"); return; + case CENT_INT: { + char tmp[32]; + int n = snprintf(tmp, sizeof tmp, "%ld", v.ival); + iason_buf_putn(b, tmp, (size_t)n); + return; + } + case CENT_FRAC: { + double d = (double)v.fval.num / (double)v.fval.den; + /* Shortest round-trippable representation, like Python's float repr. */ + char tmp[64]; + int n = 0; + for (int prec = 15; prec <= 17; prec++) { + n = snprintf(tmp, sizeof tmp, "%.*g", prec, d); + if (strtod(tmp, NULL) == d) break; + } + iason_buf_putn(b, tmp, (size_t)n); + return; + } + case CENT_STR: iason_emit_string(b, v.sval); return; + case CENT_LIST: { + iason_buf_putc(b, '['); + for (int i = 0; i < v.lval.len; i++) { + if (i > 0) iason_buf_puts(b, ", "); + iason_emit_value(b, v.lval.items[i]); + } + iason_buf_putc(b, ']'); + return; + } + case CENT_DICT: { + iason_buf_putc(b, '{'); + for (int i = 0; i < v.dval.len; i++) { + if (i > 0) iason_buf_puts(b, ", "); + CentValue k = v.dval.keys[i]; + if (k.type != CENT_STR) + cent_runtime_error("IASON_SCRIBE: dict keys must be strings to serialize as JSON"); + iason_emit_string(b, k.sval); + iason_buf_puts(b, ": "); + iason_emit_value(b, v.dval.vals[i]); + } + iason_buf_putc(b, '}'); + return; + } + case CENT_FUNC: + cent_runtime_error("IASON_SCRIBE: cannot serialize a function"); + return; + } +} + +CentValue cent_iason_scribe(CentValue v) { + IasonBuf b = { NULL, 0, 0 }; + iason_emit_value(&b, v); + iason_buf_putc(&b, '\0'); + return cent_str(b.buf); +} diff --git a/centvrion/compiler/runtime/cent_iason.h b/centvrion/compiler/runtime/cent_iason.h new file mode 100644 index 0000000..7fc6123 --- /dev/null +++ b/centvrion/compiler/runtime/cent_iason.h @@ -0,0 +1,14 @@ +#ifndef CENT_IASON_H +#define CENT_IASON_H + +#include "cent_runtime.h" + +/* IASON_LEGE — parse a JSON string into a CENTVRION value tree. + When fractio_loaded != 0, JSON floats become exact fractions; otherwise + they are floored to ints. */ +CentValue cent_iason_lege(CentValue s, int fractio_loaded); + +/* IASON_SCRIBE — serialize a CENTVRION value to a JSON string. */ +CentValue cent_iason_scribe(CentValue v); + +#endif /* CENT_IASON_H */ diff --git a/centvrion/lexer.py b/centvrion/lexer.py index 0e68727..914e438 100644 --- a/centvrion/lexer.py +++ b/centvrion/lexer.py @@ -80,7 +80,9 @@ builtin_tokens = [("BUILTIN", i) for i in [ "SCINDE", "PETE", "PETITVR", - "AVSCVLTA" + "AVSCVLTA", + "IASON_LEGE", + "IASON_SCRIBE" ]] data_tokens = [ @@ -92,6 +94,7 @@ data_tokens = [ module_tokens = [("MODULE", i) for i in [ "FORS", "FRACTIO", + "IASON", "MAGNVM", "SCRIPTA", "SVBNVLLA", diff --git a/language/main.tex b/language/main.tex index d2051ec..5231778 100644 --- a/language/main.tex +++ b/language/main.tex @@ -104,7 +104,7 @@ \newpage \begin{itemize} \item \textbf{newline}: \\ Newlines are combined, so a single newline is the same as multiple. - \item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. Available modules: \texttt{FORS} (randomness), \texttt{FRACTIO} (fractions), \texttt{MAGNVM} (large integers), \texttt{SCRIPTA} (file I/O: \texttt{LEGE}, \texttt{SCRIBE}, \texttt{ADIVNGE}), \texttt{SVBNVLLA} (negative literals), \texttt{RETE} (networking: \texttt{PETE}, \texttt{PETITVR}, \texttt{AVSCVLTA}). + \item \textbf{module-name}: \\ Modules are flags given to the interpreter/compiler, to let it know you want to be using certain rules, functions, or features. Available modules: \texttt{FORS} (randomness), \texttt{FRACTIO} (fractions), \texttt{IASON} (JSON I/O: \texttt{IASON\_LEGE}, \texttt{IASON\_SCRIBE}), \texttt{MAGNVM} (large integers), \texttt{SCRIPTA} (file I/O: \texttt{LEGE}, \texttt{SCRIBE}, \texttt{ADIVNGE}), \texttt{SVBNVLLA} (negative literals), \texttt{RETE} (networking: \texttt{PETE}, \texttt{PETITVR}, \texttt{AVSCVLTA}). \item \textbf{id}: \\ Variable. Can only consist of lowercase characters and underscores, but not the letters j, u, or w. \item \textbf{builtin}: \\ Builtin functions are uppercase latin words. \item \textbf{string}: \\ Any text encased in \texttt{"} or \texttt{'} characters. Single-quoted strings are always literal. Strings support 1-based indexing (\texttt{string[I]}) and inclusive slicing (\texttt{string[I VSQVE III]}), returning single-character strings and substrings respectively. diff --git a/snippets/iason.cent b/snippets/iason.cent new file mode 100644 index 0000000..b754f3d --- /dev/null +++ b/snippets/iason.cent @@ -0,0 +1 @@ +CVM IASON diff --git a/snippets/iason.png b/snippets/iason.png new file mode 100644 index 0000000..56ed2bb Binary files /dev/null and b/snippets/iason.png differ diff --git a/snippets/iason_lege.cent b/snippets/iason_lege.cent new file mode 100644 index 0000000..a712d13 --- /dev/null +++ b/snippets/iason_lege.cent @@ -0,0 +1,5 @@ +CVM IASON +DESIGNA data VT IASON_LEGE('{"nomen": "Marcus", "anni": 30, "armorum": ["gladius", "scutum"]}') +DIC(data["nomen"]) +DIC(data["anni"]) +DIC(data["armorum"]) diff --git a/snippets/iason_lege.png b/snippets/iason_lege.png new file mode 100644 index 0000000..2e4dcf0 Binary files /dev/null and b/snippets/iason_lege.png differ diff --git a/snippets/iason_scribe.cent b/snippets/iason_scribe.cent new file mode 100644 index 0000000..55ee2b8 --- /dev/null +++ b/snippets/iason_scribe.cent @@ -0,0 +1,3 @@ +CVM IASON +DESIGNA persona VT TABVLA {"nomen" VT "Marcus", "anni" VT XXX} +DIC(IASON_SCRIBE(persona)) diff --git a/snippets/iason_scribe.png b/snippets/iason_scribe.png new file mode 100644 index 0000000..f958a2b Binary files /dev/null and b/snippets/iason_scribe.png differ diff --git a/snippets/syntaxes/centvrion.sublime-syntax b/snippets/syntaxes/centvrion.sublime-syntax index d2cfe32..2fecc8d 100644 --- a/snippets/syntaxes/centvrion.sublime-syntax +++ b/snippets/syntaxes/centvrion.sublime-syntax @@ -70,11 +70,11 @@ contexts: scope: constant.language.centvrion builtins: - - match: '\b(ADDE|ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|CONFLA|CRIBRA|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|INSERE|IVNGE|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|MVTA|NECTE|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TOLLE|TYPVS)\b' + - match: '\b(ADDE|ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|CONFLA|CRIBRA|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|IASON_LEGE|IASON_SCRIBE|INSERE|IVNGE|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|MVTA|NECTE|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TOLLE|TYPVS)\b' scope: support.function.builtin.centvrion modules: - - match: '\b(FORS|FRACTIO|MAGNVM|RETE|SCRIPTA|SVBNVLLA)\b' + - match: '\b(FORS|FRACTIO|IASON|MAGNVM|RETE|SCRIPTA|SVBNVLLA)\b' scope: support.class.module.centvrion keywords: diff --git a/tests/12_test_failures.py b/tests/12_test_failures.py index 7d55b79..53bfba0 100644 --- a/tests/12_test_failures.py +++ b/tests/12_test_failures.py @@ -8,7 +8,7 @@ from tests._helpers import ( String, TemptaStatement, UnaryMinus, UnaryNot, Fractio, frac_to_fraction, fraction_to_frac, num_to_int, int_to_num, make_string, ValInt, ValStr, ValBool, ValList, ValDict, ValNul, ValFunc, ValFrac, - CentvrionError, _RUNTIME_C, _cent_rng, + CentvrionError, _RUNTIME_C, _IASON_C, _cent_rng, Lexer, Parser, compile_program, os, subprocess, tempfile, StringIO, patch, ) @@ -160,6 +160,24 @@ error_tests = [ ('IVNGE(["a"], II)', CentvrionError), # IVNGE second arg not a list ("IVNGE([VERITAS], [I])", CentvrionError), # IVNGE invalid key type (bool) ("IVNGE([[I]], [II])", CentvrionError), # IVNGE invalid key type (list) + ("IASON_LEGE('null')", CentvrionError), # IASON module required for IASON_LEGE + ("IASON_SCRIBE(NVLLVS)", CentvrionError), # IASON module required for IASON_SCRIBE + ("CVM IASON\nIASON_LEGE(I)", CentvrionError), # IASON_LEGE non-string arg + ("CVM IASON\nIASON_LEGE()", CentvrionError), # IASON_LEGE no args + ("CVM IASON\nIASON_LEGE('null', 'null')", CentvrionError), # IASON_LEGE too many args + ("CVM IASON\nIASON_LEGE('not json')", CentvrionError), # invalid JSON + ("CVM IASON\nIASON_LEGE('[1,]')", CentvrionError), # trailing comma in array + ("CVM IASON\nIASON_LEGE('{\"a\":}')", CentvrionError), # missing value in object + ("CVM IASON\nIASON_LEGE('{\"a\" 1}')", CentvrionError), # missing colon in object + ("CVM IASON\nIASON_LEGE('[1, 2')", CentvrionError), # unterminated array + ("CVM IASON\nIASON_LEGE('{')", CentvrionError), # unterminated object + ("CVM IASON\nIASON_LEGE('\"abc')", CentvrionError), # unterminated string + ("CVM IASON\nIASON_LEGE('[1] junk')", CentvrionError), # trailing data + ("CVM IASON\nIASON_LEGE('[\"a\\\\x\"]')", CentvrionError), # invalid escape + ("CVM IASON\nIASON_SCRIBE()", CentvrionError), # IASON_SCRIBE no args + ("CVM IASON\nIASON_SCRIBE(I, II)", CentvrionError), # IASON_SCRIBE too many args + ('CVM IASON\nIASON_SCRIBE(IVNGE([I], ["v"]))', CentvrionError), # IASON_SCRIBE int dict keys + ("CVM IASON\nIASON_SCRIBE(FVNCTIO (x) VT { REDI(x) })", CentvrionError), # IASON_SCRIBE function ] class TestErrors(unittest.TestCase): @@ -196,7 +214,7 @@ class TestErrorLineNumbers(unittest.TestCase): tmp_bin_path = tmp_bin.name try: subprocess.run( - ["gcc", "-O2", tmp_c_path, _RUNTIME_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd"], + ["gcc", "-O2", tmp_c_path, _RUNTIME_C, _IASON_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd", "-lm"], check=True, capture_output=True, ) proc = subprocess.run([tmp_bin_path], capture_output=True, text=True) diff --git a/tests/13_test_iason___.py b/tests/13_test_iason___.py new file mode 100644 index 0000000..fe26198 --- /dev/null +++ b/tests/13_test_iason___.py @@ -0,0 +1,173 @@ +from tests._helpers import ( + unittest, parameterized, Fraction, + run_test, + Bool, BuiltIn, DataArray, DataDict, Designa, ExpressionStatement, ID, + ModuleCall, Nullus, Numeral, Program, String, + ValInt, ValStr, ValBool, ValList, ValDict, ValNul, ValFrac, +) + + +def _scribe(arg, modules=("IASON",)): + return Program( + [ModuleCall(m) for m in modules], + [ExpressionStatement(BuiltIn("IASON_SCRIBE", [arg]))], + ) + +def _lege(arg, modules=("IASON",)): + return Program( + [ModuleCall(m) for m in modules], + [ExpressionStatement(BuiltIn("IASON_LEGE", [String(arg)]))], + ) + +def _src_lege(arg, extra_modules=()): + modules = ("IASON",) + tuple(extra_modules) + prefix = "\n".join(f"CVM {m}" for m in modules) + "\n" + return prefix + f"IASON_LEGE('{arg}')" + +def _src_scribe(arg_text, extra_modules=()): + modules = ("IASON",) + tuple(extra_modules) + prefix = "\n".join(f"CVM {m}" for m in modules) + "\n" + return prefix + f"IASON_SCRIBE({arg_text})" + + +iason_tests = [ + # ---- Parse: scalars ---- + (_src_lege("null"), _lege("null"), ValNul()), + (_src_lege("true"), _lege("true"), ValBool(True)), + (_src_lege("false"), _lege("false"), ValBool(False)), + (_src_lege("42"), _lege("42"), ValInt(42)), + (_src_lege('"hello"'), _lege('"hello"'), ValStr("hello")), + + # ---- Parse: empty containers ---- + (_src_lege("[]"), _lege("[]"), ValList([])), + (_src_lege("{}"), _lege("{}"), ValDict({})), + + # ---- Parse: array of mixed types ---- + (_src_lege('[1, true, null, "x"]'), + _lege('[1, true, null, "x"]'), + ValList([ValInt(1), ValBool(True), ValNul(), ValStr("x")])), + + # ---- Parse: nested ---- + (_src_lege('{"a": [1, 2], "b": {"c": 3}}'), + _lege('{"a": [1, 2], "b": {"c": 3}}'), + ValDict({ + "a": ValList([ValInt(1), ValInt(2)]), + "b": ValDict({"c": ValInt(3)}), + })), + + # ---- Parse: numbers ---- + (_src_lege("-7"), _lege("-7"), ValInt(-7)), + (_src_lege("0"), _lege("0"), ValInt(0)), + + # ---- Parse: string escapes ---- + # NB: single-quoted CENTVRION strings unescape \n / \" / \\ before the + # JSON parser sees them, so direct parse tests for those escapes would + # have ambiguous semantics. Serialize tests below cover the inverse, and + # this \u test exercises the JSON parser's escape path. + (_src_lege('"\\u00e9"'), + _lege('"\\u00e9"'), + ValStr("é")), + + # ---- Parse: float without FRACTIO floors ---- + (_src_lege("3.7"), _lege("3.7"), ValInt(3)), + (_src_lege("-2.5"), _lege("-2.5"), ValInt(-3)), + (_src_lege("1e2"), _lege("1e2"), ValInt(100)), + + # ---- Parse: float with FRACTIO is exact ---- + (_src_lege("0.5", extra_modules=("FRACTIO",)), + _lege("0.5", modules=("IASON", "FRACTIO")), + ValFrac(Fraction(1, 2))), + (_src_lege("0.1", extra_modules=("FRACTIO",)), + _lege("0.1", modules=("IASON", "FRACTIO")), + ValFrac(Fraction(1, 10))), + (_src_lege("-0.25", extra_modules=("FRACTIO",)), + _lege("-0.25", modules=("IASON", "FRACTIO")), + ValFrac(Fraction(-1, 4))), + (_src_lege("5", extra_modules=("FRACTIO",)), + _lege("5", modules=("IASON", "FRACTIO")), + ValInt(5)), + (_src_lege("3.0", extra_modules=("FRACTIO",)), + _lege("3.0", modules=("IASON", "FRACTIO")), + ValInt(3)), + + # ---- Serialize: scalars ---- + (_src_scribe("NVLLVS"), _scribe(Nullus()), ValStr("null")), + (_src_scribe("VERITAS"), _scribe(Bool(True)), ValStr("true")), + (_src_scribe("FALSITAS"), _scribe(Bool(False)), ValStr("false")), + (_src_scribe("XLII"), _scribe(Numeral("XLII")), ValStr("42")), + (_src_scribe('"hello"'), _scribe(String("hello")), ValStr('"hello"')), + (_src_scribe("[]"), _scribe(DataArray([])), ValStr("[]")), + (_src_scribe("TABVLA {}"), _scribe(DataDict([])), ValStr("{}")), + + # ---- Serialize: nested ---- + (_src_scribe("[I, II, III]"), + _scribe(DataArray([Numeral("I"), Numeral("II"), Numeral("III")])), + ValStr("[1, 2, 3]")), + (_src_scribe('TABVLA {"a" VT I, "b" VT VERITAS}'), + _scribe(DataDict([(String("a"), Numeral("I")), (String("b"), Bool(True))])), + ValStr('{"a": 1, "b": true}')), + + # ---- Serialize: special chars ---- + (_src_scribe('"a\\nb"'), + _scribe(String("a\nb")), + ValStr('"a\\nb"')), + (_src_scribe('"a\\"b"'), + _scribe(String('a"b')), + ValStr('"a\\"b"')), + (_src_scribe('"a\\\\b"'), + _scribe(String("a\\b")), + ValStr('"a\\\\b"')), + + # ---- Round-trip ---- + ("CVM IASON\nDIC(IASON_LEGE('[1, 2, 3]'))", + Program([ModuleCall("IASON")], [ExpressionStatement(BuiltIn("DIC", + [BuiltIn("IASON_LEGE", [String("[1, 2, 3]")])]))]), + ValStr("[I II III]"), "[I II III]\n"), + + ("CVM IASON\nDIC(IASON_SCRIBE(IASON_LEGE('{\"a\": [1, true, null]}')))", + Program([ModuleCall("IASON")], [ExpressionStatement(BuiltIn("DIC", + [BuiltIn("IASON_SCRIBE", + [BuiltIn("IASON_LEGE", [String('{"a": [1, true, null]}')])])]))]), + ValStr('{"a": [1, true, null]}'), + '{"a": [1, true, null]}\n'), + + ("CVM IASON\nCVM FRACTIO\nDIC(IASON_SCRIBE(IASON_LEGE('0.5')))", + Program([ModuleCall("IASON"), ModuleCall("FRACTIO")], + [ExpressionStatement(BuiltIn("DIC", + [BuiltIn("IASON_SCRIBE", + [BuiltIn("IASON_LEGE", [String("0.5")])])]))]), + ValStr("0.5"), "0.5\n"), + + ("CVM IASON\nCVM FRACTIO\nDIC(IASON_SCRIBE(IASON_LEGE('0.1')))", + Program([ModuleCall("IASON"), ModuleCall("FRACTIO")], + [ExpressionStatement(BuiltIn("DIC", + [BuiltIn("IASON_SCRIBE", + [BuiltIn("IASON_LEGE", [String("0.1")])])]))]), + ValStr("0.1"), "0.1\n"), + + # ---- Serialize: insertion order preserved ---- + (_src_scribe('TABVLA {"b" VT II, "a" VT I, "c" VT III}'), + _scribe(DataDict([ + (String("b"), Numeral("II")), + (String("a"), Numeral("I")), + (String("c"), Numeral("III")), + ])), + ValStr('{"b": 2, "a": 1, "c": 3}')), + + # ---- Whitespace-tolerant parse ---- + (_src_lege(" [ 1 , 2 ] "), + _lege(" [ 1 , 2 ] "), + ValList([ValInt(1), ValInt(2)])), + + # ---- Unicode passes through serialize (ensure_ascii=False) ---- + ('CVM IASON\nDIC(IASON_SCRIBE("café"))', + Program([ModuleCall("IASON")], [ExpressionStatement(BuiltIn("DIC", + [BuiltIn("IASON_SCRIBE", [String("café")])]))]), + ValStr('"café"'), '"café"\n'), +] + + +class TestIason(unittest.TestCase): + @parameterized.expand(iason_tests) + def test_iason(self, source, nodes, value, output="", input_lines=[]): + run_test(self, source, nodes, value, output, input_lines) diff --git a/tests/_helpers.py b/tests/_helpers.py index 417f696..23ba73d 100644 --- a/tests/_helpers.py +++ b/tests/_helpers.py @@ -24,10 +24,12 @@ from centvrion.lexer import Lexer from centvrion.parser import Parser from centvrion.values import ValInt, ValStr, ValBool, ValList, ValDict, ValNul, ValFunc, ValFrac -_RUNTIME_C = os.path.join( +_RUNTIME_DIR = os.path.join( os.path.dirname(__file__), "..", - "centvrion", "compiler", "runtime", "cent_runtime.c" + "centvrion", "compiler", "runtime" ) +_RUNTIME_C = os.path.join(_RUNTIME_DIR, "cent_runtime.c") +_IASON_C = os.path.join(_RUNTIME_DIR, "cent_iason.c") def run_test(self, source, target_nodes, target_value, target_output="", input_lines=[]): _cent_rng.seed(1) @@ -92,7 +94,7 @@ def run_test(self, source, target_nodes, target_value, target_output="", input_l tmp_bin_path = tmp_bin.name try: subprocess.run( - ["gcc", "-O2", tmp_c_path, _RUNTIME_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd"], + ["gcc", "-O2", tmp_c_path, _RUNTIME_C, _IASON_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd", "-lm"], check=True, capture_output=True, ) stdin_data = "".join(f"{l}\n" for l in input_lines) @@ -124,7 +126,7 @@ def run_compiler_error_test(self, source): tmp_bin_path = tmp_bin.name try: subprocess.run( - ["gcc", "-O2", tmp_c_path, _RUNTIME_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd"], + ["gcc", "-O2", tmp_c_path, _RUNTIME_C, _IASON_C, "-o", tmp_bin_path, "-lcurl", "-lmicrohttpd", "-lm"], check=True, capture_output=True, ) proc = subprocess.run([tmp_bin_path], capture_output=True, text=True) diff --git a/vscode-extension/syntaxes/cent.tmLanguage.json b/vscode-extension/syntaxes/cent.tmLanguage.json index eea8ede..ead9e4c 100644 --- a/vscode-extension/syntaxes/cent.tmLanguage.json +++ b/vscode-extension/syntaxes/cent.tmLanguage.json @@ -65,7 +65,7 @@ "patterns": [ { "name": "support.function.builtin.cent", - "match": "\\b(ADDE|ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|CONFLA|CRIBRA|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|INSERE|IVNGE|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|MVTA|NECTE|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TOLLE|TYPVS)\\b" + "match": "\\b(ADDE|ADIVNGE|AVDI_NVMERVS|AVDI|AVSCVLTA|CLAVES|CONFLA|CRIBRA|DECIMATIO|DIC|DORMI|EVERRE|FORTVITVS_NVMERVS|FORTVITA_ELECTIO|IASON_LEGE|IASON_SCRIBE|INSERE|IVNGE|LEGE|LITTERA|LONGITVDO|MAIVSCVLA|MINVSCVLA|MVTA|NECTE|NVMERVS|ORDINA|PETE|PETITVR|QVAERE|SCINDE|SCRIBE|SEMEN|SENATVS|SVBSTITVE|TOLLE|TYPVS)\\b" } ] }, @@ -73,7 +73,7 @@ "patterns": [ { "name": "support.class.module.cent", - "match": "\\b(FORS|FRACTIO|MAGNVM|RETE|SCRIPTA|SVBNVLLA)\\b" + "match": "\\b(FORS|FRACTIO|IASON|MAGNVM|RETE|SCRIPTA|SVBNVLLA)\\b" } ] },