Files
centvrion/centvrion/compiler/runtime/cent_iason.c
2026-04-25 22:03:30 +02:00

427 lines
14 KiB
C

#include "cent_iason.h"
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
/* ---------- shared helpers ----------------------------------------- */
static long iason_gcd(long a, long b) {
if (a < 0) a = -a;
if (b < 0) b = -b;
while (b) { long t = b; b = a % b; a = t; }
return a ? a : 1;
}
static CentValue iason_frac_reduce(long num, long den) {
if (den < 0) { num = -num; den = -den; }
long g = iason_gcd(num, den);
num /= g; den /= g;
if (den == 1) return cent_int(num);
return cent_frac(num, den);
}
/* ---------- parser ------------------------------------------------- */
typedef struct {
const char *src;
size_t pos;
size_t len;
int fractio;
} IasonParser;
static void iason_die(const char *msg) {
cent_runtime_error(msg);
}
static void iason_skip_ws(IasonParser *p) {
while (p->pos < p->len) {
char c = p->src[p->pos];
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') p->pos++;
else break;
}
}
static int iason_peek(IasonParser *p) {
return (p->pos < p->len) ? (unsigned char)p->src[p->pos] : -1;
}
static void iason_expect(IasonParser *p, char c, const char *msg) {
if (p->pos >= p->len || p->src[p->pos] != c)
iason_die(msg);
p->pos++;
}
static void iason_expect_word(IasonParser *p, const char *word) {
size_t n = strlen(word);
if (p->len - p->pos < n || memcmp(p->src + p->pos, word, n) != 0)
iason_die("IASON_LEGE: invalid JSON literal");
p->pos += n;
}
/* Encode a Unicode codepoint as UTF-8 into buf; returns bytes written. */
static int iason_utf8_encode(unsigned cp, char *buf) {
if (cp <= 0x7F) { buf[0] = (char)cp; return 1; }
if (cp <= 0x7FF) { buf[0] = (char)(0xC0 | (cp >> 6));
buf[1] = (char)(0x80 | (cp & 0x3F)); return 2; }
if (cp <= 0xFFFF) { buf[0] = (char)(0xE0 | (cp >> 12));
buf[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
buf[2] = (char)(0x80 | (cp & 0x3F)); return 3; }
if (cp <= 0x10FFFF) { buf[0] = (char)(0xF0 | (cp >> 18));
buf[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
buf[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
buf[3] = (char)(0x80 | (cp & 0x3F)); return 4; }
iason_die("IASON_LEGE: codepoint out of range");
return 0;
}
static unsigned iason_read_hex4(IasonParser *p) {
if (p->len - p->pos < 4) iason_die("IASON_LEGE: truncated \\u escape");
unsigned v = 0;
for (int i = 0; i < 4; i++) {
char c = p->src[p->pos++];
v <<= 4;
if (c >= '0' && c <= '9') v |= c - '0';
else if (c >= 'a' && c <= 'f') v |= c - 'a' + 10;
else if (c >= 'A' && c <= 'F') v |= c - 'A' + 10;
else iason_die("IASON_LEGE: invalid hex in \\u escape");
}
return v;
}
/* Parses a JSON string literal at p->pos (positioned at the opening "),
returns an arena-allocated NUL-terminated UTF-8 string. */
static char *iason_parse_string(IasonParser *p) {
iason_expect(p, '"', "IASON_LEGE: expected string");
/* upper bound on output: same as remaining input (escapes shrink). */
size_t cap = (p->len - p->pos) + 1;
char *buf = cent_arena_alloc(cent_arena, cap);
size_t out = 0;
while (p->pos < p->len) {
unsigned char c = (unsigned char)p->src[p->pos++];
if (c == '"') { buf[out] = '\0'; return buf; }
if (c == '\\') {
if (p->pos >= p->len) iason_die("IASON_LEGE: trailing \\ in string");
char esc = p->src[p->pos++];
switch (esc) {
case '"': buf[out++] = '"'; break;
case '\\': buf[out++] = '\\'; break;
case '/': buf[out++] = '/'; break;
case 'b': buf[out++] = '\b'; break;
case 'f': buf[out++] = '\f'; break;
case 'n': buf[out++] = '\n'; break;
case 'r': buf[out++] = '\r'; break;
case 't': buf[out++] = '\t'; break;
case 'u': {
unsigned cp = iason_read_hex4(p);
if (cp >= 0xD800 && cp <= 0xDBFF) {
/* high surrogate; expect \uXXXX low surrogate */
if (p->len - p->pos < 6 || p->src[p->pos] != '\\' || p->src[p->pos + 1] != 'u')
iason_die("IASON_LEGE: missing low surrogate after high surrogate");
p->pos += 2;
unsigned lo = iason_read_hex4(p);
if (lo < 0xDC00 || lo > 0xDFFF)
iason_die("IASON_LEGE: invalid low surrogate");
cp = 0x10000 + (((cp - 0xD800) << 10) | (lo - 0xDC00));
} else if (cp >= 0xDC00 && cp <= 0xDFFF) {
iason_die("IASON_LEGE: stray low surrogate");
}
out += iason_utf8_encode(cp, buf + out);
break;
}
default: iason_die("IASON_LEGE: invalid escape sequence");
}
} else if (c < 0x20) {
iason_die("IASON_LEGE: unescaped control character in string");
} else {
buf[out++] = (char)c;
}
}
iason_die("IASON_LEGE: unterminated string");
return NULL;
}
/* Cap on fractional digits parsed exactly; beyond this we truncate to
keep `long` arithmetic safe (10^18 fits in int64). */
#define IASON_MAX_FRAC_DIGITS 18
static CentValue iason_parse_number(IasonParser *p) {
size_t start = p->pos;
int negative = 0;
if (p->src[p->pos] == '-') { negative = 1; p->pos++; }
/* Integer part. */
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
iason_die("IASON_LEGE: invalid number");
if (p->src[p->pos] == '0') {
p->pos++;
} else {
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++;
}
int has_frac = 0, has_exp = 0;
size_t frac_start = 0, frac_end = 0;
if (p->pos < p->len && p->src[p->pos] == '.') {
has_frac = 1;
p->pos++;
frac_start = p->pos;
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
iason_die("IASON_LEGE: invalid number");
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++;
frac_end = p->pos;
}
long exp = 0;
if (p->pos < p->len && (p->src[p->pos] == 'e' || p->src[p->pos] == 'E')) {
has_exp = 1;
p->pos++;
int esign = 1;
if (p->pos < p->len && (p->src[p->pos] == '+' || p->src[p->pos] == '-')) {
if (p->src[p->pos] == '-') esign = -1;
p->pos++;
}
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
iason_die("IASON_LEGE: invalid number");
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) {
exp = exp * 10 + (p->src[p->pos] - '0');
p->pos++;
}
exp *= esign;
}
size_t end = p->pos;
if (!has_frac && !has_exp) {
/* Pure integer. Use strtol-style parse (we already validated digits). */
long v = 0;
for (size_t i = (negative ? start + 1 : start); i < end; i++) {
v = v * 10 + (p->src[i] - '0');
}
return cent_int(negative ? -v : v);
}
if (!p->fractio) {
/* Floor to int. strtod handles the full grammar here. */
char *tmp = cent_arena_alloc(cent_arena, end - start + 1);
memcpy(tmp, p->src + start, end - start);
tmp[end - start] = '\0';
double d = strtod(tmp, NULL);
return cent_int((long)floor(d));
}
/* FRACTIO loaded: build an exact fraction from the decimal/exponent form. */
long num = 0;
/* Integer-part digits */
size_t int_start = negative ? start + 1 : start;
size_t int_end = has_frac ? (frac_start - 1) : (has_exp ? end - 0 : end);
/* If we have an exponent without a fraction part, find where digits end. */
if (has_exp && !has_frac) {
int_end = int_start;
while (int_end < p->len && isdigit((unsigned char)p->src[int_end])) int_end++;
}
for (size_t i = int_start; i < int_end; i++) {
num = num * 10 + (p->src[i] - '0');
}
/* Fractional digits, capped */
long den = 1;
if (has_frac) {
size_t take = frac_end - frac_start;
if (take > IASON_MAX_FRAC_DIGITS) take = IASON_MAX_FRAC_DIGITS;
for (size_t i = 0; i < take; i++) {
num = num * 10 + (p->src[frac_start + i] - '0');
den *= 10;
}
}
if (negative) num = -num;
/* Apply exponent: positive shifts num, negative shifts den. */
while (exp > 0) { num *= 10; exp--; }
while (exp < 0) { den *= 10; exp++; }
return iason_frac_reduce(num, den);
}
static CentValue iason_parse_value(IasonParser *p);
static CentValue iason_parse_array(IasonParser *p) {
iason_expect(p, '[', "IASON_LEGE: expected [");
iason_skip_ws(p);
CentValue lst = cent_list_new(4);
if (iason_peek(p) == ']') { p->pos++; return lst; }
for (;;) {
iason_skip_ws(p);
CentValue elem = iason_parse_value(p);
cent_list_push(&lst, elem);
iason_skip_ws(p);
int c = iason_peek(p);
if (c == ',') { p->pos++; continue; }
if (c == ']') { p->pos++; return lst; }
iason_die("IASON_LEGE: expected , or ] in array");
}
}
static CentValue iason_parse_object(IasonParser *p) {
iason_expect(p, '{', "IASON_LEGE: expected {");
iason_skip_ws(p);
CentValue d = cent_dict_new(4);
if (iason_peek(p) == '}') { p->pos++; return d; }
for (;;) {
iason_skip_ws(p);
if (iason_peek(p) != '"') iason_die("IASON_LEGE: object key must be a string");
char *key = iason_parse_string(p);
iason_skip_ws(p);
iason_expect(p, ':', "IASON_LEGE: expected : after object key");
iason_skip_ws(p);
CentValue val = iason_parse_value(p);
cent_dict_set(&d, cent_str(key), val);
iason_skip_ws(p);
int c = iason_peek(p);
if (c == ',') { p->pos++; continue; }
if (c == '}') { p->pos++; return d; }
iason_die("IASON_LEGE: expected , or } in object");
}
}
static CentValue iason_parse_value(IasonParser *p) {
iason_skip_ws(p);
int c = iason_peek(p);
if (c < 0) iason_die("IASON_LEGE: unexpected end of input");
if (c == '{') return iason_parse_object(p);
if (c == '[') return iason_parse_array(p);
if (c == '"') return cent_str(iason_parse_string(p));
if (c == 't') { iason_expect_word(p, "true"); return cent_bool(1); }
if (c == 'f') { iason_expect_word(p, "false"); return cent_bool(0); }
if (c == 'n') { iason_expect_word(p, "null"); return cent_null(); }
if (c == '-' || (c >= '0' && c <= '9')) return iason_parse_number(p);
iason_die("IASON_LEGE: unexpected character");
return cent_null();
}
CentValue cent_iason_lege(CentValue s, int fractio_loaded) {
if (s.type != CENT_STR) cent_type_error("IASON_LEGE requires a string");
IasonParser p = { s.sval, 0, strlen(s.sval), fractio_loaded };
CentValue v = iason_parse_value(&p);
iason_skip_ws(&p);
if (p.pos != p.len) iason_die("IASON_LEGE: trailing data after JSON value");
return v;
}
/* ---------- serializer --------------------------------------------- */
typedef struct {
char *buf;
size_t len;
size_t cap;
} IasonBuf;
static void iason_buf_reserve(IasonBuf *b, size_t extra) {
if (b->len + extra <= b->cap) return;
size_t new_cap = b->cap ? b->cap * 2 : 64;
while (new_cap < b->len + extra) new_cap *= 2;
char *nb = cent_arena_alloc(cent_arena, new_cap);
if (b->len) memcpy(nb, b->buf, b->len);
b->buf = nb;
b->cap = new_cap;
}
static void iason_buf_putc(IasonBuf *b, char c) {
iason_buf_reserve(b, 1);
b->buf[b->len++] = c;
}
static void iason_buf_puts(IasonBuf *b, const char *s) {
size_t n = strlen(s);
iason_buf_reserve(b, n);
memcpy(b->buf + b->len, s, n);
b->len += n;
}
static void iason_buf_putn(IasonBuf *b, const char *s, size_t n) {
iason_buf_reserve(b, n);
memcpy(b->buf + b->len, s, n);
b->len += n;
}
static void iason_emit_string(IasonBuf *b, const char *s) {
iason_buf_putc(b, '"');
for (const unsigned char *p = (const unsigned char *)s; *p; p++) {
unsigned char c = *p;
switch (c) {
case '"': iason_buf_puts(b, "\\\""); break;
case '\\': iason_buf_puts(b, "\\\\"); break;
case '\b': iason_buf_puts(b, "\\b"); break;
case '\f': iason_buf_puts(b, "\\f"); break;
case '\n': iason_buf_puts(b, "\\n"); break;
case '\r': iason_buf_puts(b, "\\r"); break;
case '\t': iason_buf_puts(b, "\\t"); break;
default:
if (c < 0x20) {
char tmp[8];
snprintf(tmp, sizeof tmp, "\\u%04x", c);
iason_buf_puts(b, tmp);
} else {
iason_buf_putc(b, (char)c);
}
}
}
iason_buf_putc(b, '"');
}
static void iason_emit_value(IasonBuf *b, CentValue v) {
switch (v.type) {
case CENT_NULL: iason_buf_puts(b, "null"); return;
case CENT_BOOL: iason_buf_puts(b, v.bval ? "true" : "false"); return;
case CENT_INT: {
char tmp[32];
int n = snprintf(tmp, sizeof tmp, "%ld", v.ival);
iason_buf_putn(b, tmp, (size_t)n);
return;
}
case CENT_FRAC: {
double d = (double)v.fval.num / (double)v.fval.den;
/* Shortest round-trippable representation, like Python's float repr. */
char tmp[64];
int n = 0;
for (int prec = 15; prec <= 17; prec++) {
n = snprintf(tmp, sizeof tmp, "%.*g", prec, d);
if (strtod(tmp, NULL) == d) break;
}
iason_buf_putn(b, tmp, (size_t)n);
return;
}
case CENT_STR: iason_emit_string(b, v.sval); return;
case CENT_LIST: {
iason_buf_putc(b, '[');
for (int i = 0; i < v.lval.len; i++) {
if (i > 0) iason_buf_puts(b, ", ");
iason_emit_value(b, v.lval.items[i]);
}
iason_buf_putc(b, ']');
return;
}
case CENT_DICT: {
iason_buf_putc(b, '{');
for (int i = 0; i < v.dval.len; i++) {
if (i > 0) iason_buf_puts(b, ", ");
CentValue k = v.dval.keys[i];
if (k.type != CENT_STR)
cent_runtime_error("IASON_SCRIBE: dict keys must be strings to serialize as JSON");
iason_emit_string(b, k.sval);
iason_buf_puts(b, ": ");
iason_emit_value(b, v.dval.vals[i]);
}
iason_buf_putc(b, '}');
return;
}
case CENT_FUNC:
cent_runtime_error("IASON_SCRIBE: cannot serialize a function");
return;
}
}
CentValue cent_iason_scribe(CentValue v) {
IasonBuf b = { NULL, 0, 0 };
iason_emit_value(&b, v);
iason_buf_putc(&b, '\0');
return cent_str(b.buf);
}