427 lines
14 KiB
C
427 lines
14 KiB
C
#include "cent_iason.h"
|
|
|
|
#include <ctype.h>
|
|
#include <math.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
/* ---------- shared helpers ----------------------------------------- */
|
|
|
|
static long iason_gcd(long a, long b) {
|
|
if (a < 0) a = -a;
|
|
if (b < 0) b = -b;
|
|
while (b) { long t = b; b = a % b; a = t; }
|
|
return a ? a : 1;
|
|
}
|
|
|
|
static CentValue iason_frac_reduce(long num, long den) {
|
|
if (den < 0) { num = -num; den = -den; }
|
|
long g = iason_gcd(num, den);
|
|
num /= g; den /= g;
|
|
if (den == 1) return cent_int(num);
|
|
return cent_frac(num, den);
|
|
}
|
|
|
|
/* ---------- parser ------------------------------------------------- */
|
|
|
|
typedef struct {
|
|
const char *src;
|
|
size_t pos;
|
|
size_t len;
|
|
int fractio;
|
|
} IasonParser;
|
|
|
|
static void iason_die(const char *msg) {
|
|
cent_runtime_error(msg);
|
|
}
|
|
|
|
static void iason_skip_ws(IasonParser *p) {
|
|
while (p->pos < p->len) {
|
|
char c = p->src[p->pos];
|
|
if (c == ' ' || c == '\t' || c == '\n' || c == '\r') p->pos++;
|
|
else break;
|
|
}
|
|
}
|
|
|
|
static int iason_peek(IasonParser *p) {
|
|
return (p->pos < p->len) ? (unsigned char)p->src[p->pos] : -1;
|
|
}
|
|
|
|
static void iason_expect(IasonParser *p, char c, const char *msg) {
|
|
if (p->pos >= p->len || p->src[p->pos] != c)
|
|
iason_die(msg);
|
|
p->pos++;
|
|
}
|
|
|
|
static void iason_expect_word(IasonParser *p, const char *word) {
|
|
size_t n = strlen(word);
|
|
if (p->len - p->pos < n || memcmp(p->src + p->pos, word, n) != 0)
|
|
iason_die("IASON_LEGE: invalid JSON literal");
|
|
p->pos += n;
|
|
}
|
|
|
|
/* Encode a Unicode codepoint as UTF-8 into buf; returns bytes written. */
|
|
static int iason_utf8_encode(unsigned cp, char *buf) {
|
|
if (cp <= 0x7F) { buf[0] = (char)cp; return 1; }
|
|
if (cp <= 0x7FF) { buf[0] = (char)(0xC0 | (cp >> 6));
|
|
buf[1] = (char)(0x80 | (cp & 0x3F)); return 2; }
|
|
if (cp <= 0xFFFF) { buf[0] = (char)(0xE0 | (cp >> 12));
|
|
buf[1] = (char)(0x80 | ((cp >> 6) & 0x3F));
|
|
buf[2] = (char)(0x80 | (cp & 0x3F)); return 3; }
|
|
if (cp <= 0x10FFFF) { buf[0] = (char)(0xF0 | (cp >> 18));
|
|
buf[1] = (char)(0x80 | ((cp >> 12) & 0x3F));
|
|
buf[2] = (char)(0x80 | ((cp >> 6) & 0x3F));
|
|
buf[3] = (char)(0x80 | (cp & 0x3F)); return 4; }
|
|
iason_die("IASON_LEGE: codepoint out of range");
|
|
return 0;
|
|
}
|
|
|
|
static unsigned iason_read_hex4(IasonParser *p) {
|
|
if (p->len - p->pos < 4) iason_die("IASON_LEGE: truncated \\u escape");
|
|
unsigned v = 0;
|
|
for (int i = 0; i < 4; i++) {
|
|
char c = p->src[p->pos++];
|
|
v <<= 4;
|
|
if (c >= '0' && c <= '9') v |= c - '0';
|
|
else if (c >= 'a' && c <= 'f') v |= c - 'a' + 10;
|
|
else if (c >= 'A' && c <= 'F') v |= c - 'A' + 10;
|
|
else iason_die("IASON_LEGE: invalid hex in \\u escape");
|
|
}
|
|
return v;
|
|
}
|
|
|
|
/* Parses a JSON string literal at p->pos (positioned at the opening "),
|
|
returns an arena-allocated NUL-terminated UTF-8 string. */
|
|
static char *iason_parse_string(IasonParser *p) {
|
|
iason_expect(p, '"', "IASON_LEGE: expected string");
|
|
/* upper bound on output: same as remaining input (escapes shrink). */
|
|
size_t cap = (p->len - p->pos) + 1;
|
|
char *buf = cent_arena_alloc(cent_arena, cap);
|
|
size_t out = 0;
|
|
while (p->pos < p->len) {
|
|
unsigned char c = (unsigned char)p->src[p->pos++];
|
|
if (c == '"') { buf[out] = '\0'; return buf; }
|
|
if (c == '\\') {
|
|
if (p->pos >= p->len) iason_die("IASON_LEGE: trailing \\ in string");
|
|
char esc = p->src[p->pos++];
|
|
switch (esc) {
|
|
case '"': buf[out++] = '"'; break;
|
|
case '\\': buf[out++] = '\\'; break;
|
|
case '/': buf[out++] = '/'; break;
|
|
case 'b': buf[out++] = '\b'; break;
|
|
case 'f': buf[out++] = '\f'; break;
|
|
case 'n': buf[out++] = '\n'; break;
|
|
case 'r': buf[out++] = '\r'; break;
|
|
case 't': buf[out++] = '\t'; break;
|
|
case 'u': {
|
|
unsigned cp = iason_read_hex4(p);
|
|
if (cp >= 0xD800 && cp <= 0xDBFF) {
|
|
/* high surrogate; expect \uXXXX low surrogate */
|
|
if (p->len - p->pos < 6 || p->src[p->pos] != '\\' || p->src[p->pos + 1] != 'u')
|
|
iason_die("IASON_LEGE: missing low surrogate after high surrogate");
|
|
p->pos += 2;
|
|
unsigned lo = iason_read_hex4(p);
|
|
if (lo < 0xDC00 || lo > 0xDFFF)
|
|
iason_die("IASON_LEGE: invalid low surrogate");
|
|
cp = 0x10000 + (((cp - 0xD800) << 10) | (lo - 0xDC00));
|
|
} else if (cp >= 0xDC00 && cp <= 0xDFFF) {
|
|
iason_die("IASON_LEGE: stray low surrogate");
|
|
}
|
|
out += iason_utf8_encode(cp, buf + out);
|
|
break;
|
|
}
|
|
default: iason_die("IASON_LEGE: invalid escape sequence");
|
|
}
|
|
} else if (c < 0x20) {
|
|
iason_die("IASON_LEGE: unescaped control character in string");
|
|
} else {
|
|
buf[out++] = (char)c;
|
|
}
|
|
}
|
|
iason_die("IASON_LEGE: unterminated string");
|
|
return NULL;
|
|
}
|
|
|
|
/* Cap on fractional digits parsed exactly; beyond this we truncate to
|
|
keep `long` arithmetic safe (10^18 fits in int64). */
|
|
#define IASON_MAX_FRAC_DIGITS 18
|
|
|
|
static CentValue iason_parse_number(IasonParser *p) {
|
|
size_t start = p->pos;
|
|
int negative = 0;
|
|
if (p->src[p->pos] == '-') { negative = 1; p->pos++; }
|
|
|
|
/* Integer part. */
|
|
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
|
|
iason_die("IASON_LEGE: invalid number");
|
|
if (p->src[p->pos] == '0') {
|
|
p->pos++;
|
|
} else {
|
|
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++;
|
|
}
|
|
|
|
int has_frac = 0, has_exp = 0;
|
|
size_t frac_start = 0, frac_end = 0;
|
|
if (p->pos < p->len && p->src[p->pos] == '.') {
|
|
has_frac = 1;
|
|
p->pos++;
|
|
frac_start = p->pos;
|
|
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
|
|
iason_die("IASON_LEGE: invalid number");
|
|
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) p->pos++;
|
|
frac_end = p->pos;
|
|
}
|
|
long exp = 0;
|
|
if (p->pos < p->len && (p->src[p->pos] == 'e' || p->src[p->pos] == 'E')) {
|
|
has_exp = 1;
|
|
p->pos++;
|
|
int esign = 1;
|
|
if (p->pos < p->len && (p->src[p->pos] == '+' || p->src[p->pos] == '-')) {
|
|
if (p->src[p->pos] == '-') esign = -1;
|
|
p->pos++;
|
|
}
|
|
if (p->pos >= p->len || !isdigit((unsigned char)p->src[p->pos]))
|
|
iason_die("IASON_LEGE: invalid number");
|
|
while (p->pos < p->len && isdigit((unsigned char)p->src[p->pos])) {
|
|
exp = exp * 10 + (p->src[p->pos] - '0');
|
|
p->pos++;
|
|
}
|
|
exp *= esign;
|
|
}
|
|
|
|
size_t end = p->pos;
|
|
|
|
if (!has_frac && !has_exp) {
|
|
/* Pure integer. Use strtol-style parse (we already validated digits). */
|
|
long v = 0;
|
|
for (size_t i = (negative ? start + 1 : start); i < end; i++) {
|
|
v = v * 10 + (p->src[i] - '0');
|
|
}
|
|
return cent_int(negative ? -v : v);
|
|
}
|
|
|
|
if (!p->fractio) {
|
|
/* Floor to int. strtod handles the full grammar here. */
|
|
char *tmp = cent_arena_alloc(cent_arena, end - start + 1);
|
|
memcpy(tmp, p->src + start, end - start);
|
|
tmp[end - start] = '\0';
|
|
double d = strtod(tmp, NULL);
|
|
return cent_int((long)floor(d));
|
|
}
|
|
|
|
/* FRACTIO loaded: build an exact fraction from the decimal/exponent form. */
|
|
long num = 0;
|
|
/* Integer-part digits */
|
|
size_t int_start = negative ? start + 1 : start;
|
|
size_t int_end = has_frac ? (frac_start - 1) : (has_exp ? end - 0 : end);
|
|
/* If we have an exponent without a fraction part, find where digits end. */
|
|
if (has_exp && !has_frac) {
|
|
int_end = int_start;
|
|
while (int_end < p->len && isdigit((unsigned char)p->src[int_end])) int_end++;
|
|
}
|
|
for (size_t i = int_start; i < int_end; i++) {
|
|
num = num * 10 + (p->src[i] - '0');
|
|
}
|
|
/* Fractional digits, capped */
|
|
long den = 1;
|
|
if (has_frac) {
|
|
size_t take = frac_end - frac_start;
|
|
if (take > IASON_MAX_FRAC_DIGITS) take = IASON_MAX_FRAC_DIGITS;
|
|
for (size_t i = 0; i < take; i++) {
|
|
num = num * 10 + (p->src[frac_start + i] - '0');
|
|
den *= 10;
|
|
}
|
|
}
|
|
if (negative) num = -num;
|
|
/* Apply exponent: positive shifts num, negative shifts den. */
|
|
while (exp > 0) { num *= 10; exp--; }
|
|
while (exp < 0) { den *= 10; exp++; }
|
|
return iason_frac_reduce(num, den);
|
|
}
|
|
|
|
static CentValue iason_parse_value(IasonParser *p);
|
|
|
|
static CentValue iason_parse_array(IasonParser *p) {
|
|
iason_expect(p, '[', "IASON_LEGE: expected [");
|
|
iason_skip_ws(p);
|
|
CentValue lst = cent_list_new(4);
|
|
if (iason_peek(p) == ']') { p->pos++; return lst; }
|
|
for (;;) {
|
|
iason_skip_ws(p);
|
|
CentValue elem = iason_parse_value(p);
|
|
cent_list_push(&lst, elem);
|
|
iason_skip_ws(p);
|
|
int c = iason_peek(p);
|
|
if (c == ',') { p->pos++; continue; }
|
|
if (c == ']') { p->pos++; return lst; }
|
|
iason_die("IASON_LEGE: expected , or ] in array");
|
|
}
|
|
}
|
|
|
|
static CentValue iason_parse_object(IasonParser *p) {
|
|
iason_expect(p, '{', "IASON_LEGE: expected {");
|
|
iason_skip_ws(p);
|
|
CentValue d = cent_dict_new(4);
|
|
if (iason_peek(p) == '}') { p->pos++; return d; }
|
|
for (;;) {
|
|
iason_skip_ws(p);
|
|
if (iason_peek(p) != '"') iason_die("IASON_LEGE: object key must be a string");
|
|
char *key = iason_parse_string(p);
|
|
iason_skip_ws(p);
|
|
iason_expect(p, ':', "IASON_LEGE: expected : after object key");
|
|
iason_skip_ws(p);
|
|
CentValue val = iason_parse_value(p);
|
|
cent_dict_set(&d, cent_str(key), val);
|
|
iason_skip_ws(p);
|
|
int c = iason_peek(p);
|
|
if (c == ',') { p->pos++; continue; }
|
|
if (c == '}') { p->pos++; return d; }
|
|
iason_die("IASON_LEGE: expected , or } in object");
|
|
}
|
|
}
|
|
|
|
static CentValue iason_parse_value(IasonParser *p) {
|
|
iason_skip_ws(p);
|
|
int c = iason_peek(p);
|
|
if (c < 0) iason_die("IASON_LEGE: unexpected end of input");
|
|
if (c == '{') return iason_parse_object(p);
|
|
if (c == '[') return iason_parse_array(p);
|
|
if (c == '"') return cent_str(iason_parse_string(p));
|
|
if (c == 't') { iason_expect_word(p, "true"); return cent_bool(1); }
|
|
if (c == 'f') { iason_expect_word(p, "false"); return cent_bool(0); }
|
|
if (c == 'n') { iason_expect_word(p, "null"); return cent_null(); }
|
|
if (c == '-' || (c >= '0' && c <= '9')) return iason_parse_number(p);
|
|
iason_die("IASON_LEGE: unexpected character");
|
|
return cent_null();
|
|
}
|
|
|
|
CentValue cent_iason_lege(CentValue s, int fractio_loaded) {
|
|
if (s.type != CENT_STR) cent_type_error("IASON_LEGE requires a string");
|
|
IasonParser p = { s.sval, 0, strlen(s.sval), fractio_loaded };
|
|
CentValue v = iason_parse_value(&p);
|
|
iason_skip_ws(&p);
|
|
if (p.pos != p.len) iason_die("IASON_LEGE: trailing data after JSON value");
|
|
return v;
|
|
}
|
|
|
|
/* ---------- serializer --------------------------------------------- */
|
|
|
|
typedef struct {
|
|
char *buf;
|
|
size_t len;
|
|
size_t cap;
|
|
} IasonBuf;
|
|
|
|
static void iason_buf_reserve(IasonBuf *b, size_t extra) {
|
|
if (b->len + extra <= b->cap) return;
|
|
size_t new_cap = b->cap ? b->cap * 2 : 64;
|
|
while (new_cap < b->len + extra) new_cap *= 2;
|
|
char *nb = cent_arena_alloc(cent_arena, new_cap);
|
|
if (b->len) memcpy(nb, b->buf, b->len);
|
|
b->buf = nb;
|
|
b->cap = new_cap;
|
|
}
|
|
|
|
static void iason_buf_putc(IasonBuf *b, char c) {
|
|
iason_buf_reserve(b, 1);
|
|
b->buf[b->len++] = c;
|
|
}
|
|
|
|
static void iason_buf_puts(IasonBuf *b, const char *s) {
|
|
size_t n = strlen(s);
|
|
iason_buf_reserve(b, n);
|
|
memcpy(b->buf + b->len, s, n);
|
|
b->len += n;
|
|
}
|
|
|
|
static void iason_buf_putn(IasonBuf *b, const char *s, size_t n) {
|
|
iason_buf_reserve(b, n);
|
|
memcpy(b->buf + b->len, s, n);
|
|
b->len += n;
|
|
}
|
|
|
|
static void iason_emit_string(IasonBuf *b, const char *s) {
|
|
iason_buf_putc(b, '"');
|
|
for (const unsigned char *p = (const unsigned char *)s; *p; p++) {
|
|
unsigned char c = *p;
|
|
switch (c) {
|
|
case '"': iason_buf_puts(b, "\\\""); break;
|
|
case '\\': iason_buf_puts(b, "\\\\"); break;
|
|
case '\b': iason_buf_puts(b, "\\b"); break;
|
|
case '\f': iason_buf_puts(b, "\\f"); break;
|
|
case '\n': iason_buf_puts(b, "\\n"); break;
|
|
case '\r': iason_buf_puts(b, "\\r"); break;
|
|
case '\t': iason_buf_puts(b, "\\t"); break;
|
|
default:
|
|
if (c < 0x20) {
|
|
char tmp[8];
|
|
snprintf(tmp, sizeof tmp, "\\u%04x", c);
|
|
iason_buf_puts(b, tmp);
|
|
} else {
|
|
iason_buf_putc(b, (char)c);
|
|
}
|
|
}
|
|
}
|
|
iason_buf_putc(b, '"');
|
|
}
|
|
|
|
static void iason_emit_value(IasonBuf *b, CentValue v) {
|
|
switch (v.type) {
|
|
case CENT_NULL: iason_buf_puts(b, "null"); return;
|
|
case CENT_BOOL: iason_buf_puts(b, v.bval ? "true" : "false"); return;
|
|
case CENT_INT: {
|
|
char tmp[32];
|
|
int n = snprintf(tmp, sizeof tmp, "%ld", v.ival);
|
|
iason_buf_putn(b, tmp, (size_t)n);
|
|
return;
|
|
}
|
|
case CENT_FRAC: {
|
|
double d = (double)v.fval.num / (double)v.fval.den;
|
|
/* Shortest round-trippable representation, like Python's float repr. */
|
|
char tmp[64];
|
|
int n = 0;
|
|
for (int prec = 15; prec <= 17; prec++) {
|
|
n = snprintf(tmp, sizeof tmp, "%.*g", prec, d);
|
|
if (strtod(tmp, NULL) == d) break;
|
|
}
|
|
iason_buf_putn(b, tmp, (size_t)n);
|
|
return;
|
|
}
|
|
case CENT_STR: iason_emit_string(b, v.sval); return;
|
|
case CENT_LIST: {
|
|
iason_buf_putc(b, '[');
|
|
for (int i = 0; i < v.lval.len; i++) {
|
|
if (i > 0) iason_buf_puts(b, ", ");
|
|
iason_emit_value(b, v.lval.items[i]);
|
|
}
|
|
iason_buf_putc(b, ']');
|
|
return;
|
|
}
|
|
case CENT_DICT: {
|
|
iason_buf_putc(b, '{');
|
|
for (int i = 0; i < v.dval.len; i++) {
|
|
if (i > 0) iason_buf_puts(b, ", ");
|
|
CentValue k = v.dval.keys[i];
|
|
if (k.type != CENT_STR)
|
|
cent_runtime_error("IASON_SCRIBE: dict keys must be strings to serialize as JSON");
|
|
iason_emit_string(b, k.sval);
|
|
iason_buf_puts(b, ": ");
|
|
iason_emit_value(b, v.dval.vals[i]);
|
|
}
|
|
iason_buf_putc(b, '}');
|
|
return;
|
|
}
|
|
case CENT_FUNC:
|
|
cent_runtime_error("IASON_SCRIBE: cannot serialize a function");
|
|
return;
|
|
}
|
|
}
|
|
|
|
CentValue cent_iason_scribe(CentValue v) {
|
|
IasonBuf b = { NULL, 0, 0 };
|
|
iason_emit_value(&b, v);
|
|
iason_buf_putc(&b, '\0');
|
|
return cent_str(b.buf);
|
|
}
|