🐐 QVAERE

This commit is contained in:
2026-04-22 09:08:36 +02:00
parent ebea9f942b
commit b9a1ed1bcc
12 changed files with 84 additions and 281 deletions

View File

@@ -349,6 +349,11 @@ Returns the type of `value` as a string: `NVMERVS` (integer), `LITTERA` (string)
Sleeps for `n` seconds, where `n` is an integer, fraction, or NVLLVS (treated as 0). Returns nothing meaningful.
### QVAERE
`QVAERE(pattern, string)`
Returns an array of all non-overlapping matches of the regex `pattern` in `string`. Both arguments must be strings. Patterns use extended regular expression syntax. Returns an empty array if there are no matches. Raises an error if the pattern is invalid.
## Modules
Modules are additions to the base `CENTVRION` syntax. They add or change certain features. Modules are included in your code by having
@@ -361,15 +366,15 @@ Vnlike many other programming languages with modules, the modules in `CENTVRION`
### FORS
![CVM FORS](snippets/fors.png)
The `FORS` module allows you to add randomness to your `CENTVRION` program. It adds 4 new built-in functions: `FORTVITVS_NVMERVS int int`, `FORTVITA_ELECTIO ['a]`, `DECIMATIO ['a]`, and `SEMEN int`.
The `FORS` module allows you to add randomness to your `CENTVRION` program. It adds 4 new built-in functions: `FORTVITVS_NVMERVS(int, int)`, `FORTVITA_ELECTIO(['a])`, `DECIMATIO(['a])`, and `SEMEN(int)`.
`FORTVITVS_NVMERVS int int` picks a random int in the (inclusive) range of the two given ints.
`FORTVITVS_NVMERVS(int, int)` picks a random int in the (inclusive) range of the two given ints.
`FORTVITA_ELECTIO ['a]` picks a random element from the given array. `FORTVITA_ELECTIO array` is identical to ```array[FORTVITVS_NVMERVS NVLLVS ((LONGITVDO array)-I)]```.
`FORTVITA_ELECTIO(['a])` picks a random element from the given array. `FORTVITA_ELECTIO(array)` is identical to ```array[FORTVITVS_NVMERVS NVLLVS ((LONGITVDO array)-I)]```.
`DECIMATIO ['a]` returns a copy of the given array with a random tenth of its elements removed. Arrays with fewer than 10 elements are returned unchanged.
`DECIMATIO(['a])` returns a copy of the given array with a random tenth of its elements removed. Arrays with fewer than 10 elements are returned unchanged.
`SEMEN int` seeds the random number generator for reproducibility.
`SEMEN(int)` seeds the random number generator for reproducibility.
### FRACTIO
![CVM FRACTIO](snippets/fractio.png)
@@ -394,14 +399,15 @@ When `_` is added _after_ a numeric symbol, the symbol becomes 1.000 times large
All integer symbols except `I` may be given a `_`.
### SCRIPTA
![CVM SCRIPTA](snippets/scripta.png)
The `SCRIPTA` module adds file I/O to your `CENTVRION` program. It adds 3 new built-in functions: `LEGE string`, `SCRIBE string string`, and `ADIVNGE string string`.
The `SCRIPTA` module adds file I/O to your `CENTVRION` program. It adds 3 new built-in functions: `LEGE`, `SCRIBE`, and `ADIVNGE`.
`LEGE string` reads the contents of the file at the given path and returns them as a string.
`LEGE(string)` reads the contents of the file at the given path and returns them as a string.
`SCRIBE string string` writes the second argument to the file at the path given by the first argument, overwriting any existing content.
`SCRIBE(string, string)` writes the second argument to the file at the path given by the first argument, overwriting any existing content.
`ADIVNGE string string` appends the second argument to the file at the path given by the first argument.
`ADIVNGE(string, string)` appends the second argument to the file at the path given by the first argument.
### SVBNVLLA
![CVM SVBNVLLA](snippets/svbnvlla.png)

View File

@@ -1277,6 +1277,19 @@ class BuiltIn(Node):
with open(path, "a") as f:
f.write(content)
return vtable, ValNul()
case "QVAERE":
pattern = params[0]
text = params[1]
if not isinstance(pattern, ValStr) or not isinstance(text, ValStr):
raise CentvrionError("QVAERE requires two strings")
try:
matches = [
ValStr(m.group(0))
for m in re.finditer(pattern.value(), text.value())
]
except re.error as e:
raise CentvrionError(f"Invalid regex: {e}")
return vtable, ValList(matches)
case _:
raise NotImplementedError(self.builtin)

View File

@@ -297,6 +297,9 @@ def _emit_builtin(node, ctx):
lines.append(f"cent_adivnge({param_vars[0]}, {param_vars[1]});")
lines.append(f"CentValue {tmp} = cent_null();")
case "QVAERE":
lines.append(f"CentValue {tmp} = cent_qvaere({param_vars[0]}, {param_vars[1]});")
case _:
raise NotImplementedError(node.builtin)

View File

@@ -3,6 +3,7 @@
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <regex.h>
/* ------------------------------------------------------------------ */
/* Global arena */
@@ -870,6 +871,37 @@ CentValue cent_dict_keys(CentValue dict) {
return result;
}
/* ------------------------------------------------------------------ */
/* Regex */
/* ------------------------------------------------------------------ */
CentValue cent_qvaere(CentValue pattern, CentValue text) {
if (pattern.type != CENT_STR || text.type != CENT_STR)
cent_type_error("'QVAERE' requires two strings");
regex_t re;
int rc = regcomp(&re, pattern.sval, REG_EXTENDED);
if (rc != 0) {
char errbuf[256];
regerror(rc, &re, errbuf, sizeof(errbuf));
regfree(&re);
cent_runtime_error(errbuf);
}
CentValue result = cent_list_new(8);
const char *cursor = text.sval;
regmatch_t match;
while (*cursor && regexec(&re, cursor, 1, &match, 0) == 0) {
int len = match.rm_eo - match.rm_so;
char *buf = cent_arena_alloc(cent_arena, len + 1);
memcpy(buf, cursor + match.rm_so, len);
buf[len] = '\0';
cent_list_push(&result, cent_str(buf));
cursor += match.rm_eo;
if (len == 0) cursor++; // avoid infinite loop on zero-length match
}
regfree(&re);
return result;
}
/* ------------------------------------------------------------------ */
/* Initialisation */
/* ------------------------------------------------------------------ */

View File

@@ -232,6 +232,7 @@ CentValue cent_ordina(CentValue lst); /* ORDINA */
CentValue cent_lege(CentValue path); /* LEGE */
void cent_scribe(CentValue path, CentValue content); /* SCRIBE */
void cent_adivnge(CentValue path, CentValue content); /* ADIVNGE */
CentValue cent_qvaere(CentValue pattern, CentValue text); /* QVAERE */
/* ------------------------------------------------------------------ */
/* Array helpers */

View File

@@ -57,7 +57,8 @@ builtin_tokens = [("BUILTIN", i) for i in [
"TYPVS",
"LEGE",
"SCRIBE",
"ADIVNGE"
"ADIVNGE",
"QVAERE"
]]
data_tokens = [

View File

@@ -1,2 +0,0 @@
\relax
\gdef \@abspage@last{2}

View File

@@ -1,269 +0,0 @@
This is XeTeX, Version 3.141592653-2.6-0.999998 (TeX Live 2026/Arch Linux) (preloaded format=xelatex 2026.4.8) 21 APR 2026 22:51
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
**main.tex
(./main.tex
LaTeX2e <2025-11-01>
L3 programming layer <2026-01-19>
(/usr/share/texmf-dist/tex/latex/base/article.cls
Document Class: article 2025/01/22 v1.4n Standard LaTeX document class
(/usr/share/texmf-dist/tex/latex/base/size10.clo
File: size10.clo 2025/01/22 v1.4n Standard LaTeX file (size option)
)
\c@part=\count271
\c@section=\count272
\c@subsection=\count273
\c@subsubsection=\count274
\c@paragraph=\count275
\c@subparagraph=\count276
\c@figure=\count277
\c@table=\count278
\abovecaptionskip=\skip49
\belowcaptionskip=\skip50
\bibindent=\dimen148
)
(/usr/share/texmf-dist/tex/latex/geometry/geometry.sty
Package: geometry 2020/01/02 v5.9 Page Geometry
(/usr/share/texmf-dist/tex/latex/graphics/keyval.sty
Package: keyval 2022/05/29 v1.15 key=value parser (DPC)
\KV@toks@=\toks17
)
(/usr/share/texmf-dist/tex/generic/iftex/ifvtex.sty
Package: ifvtex 2019/10/25 v1.7 ifvtex legacy package. Use iftex instead.
(/usr/share/texmf-dist/tex/generic/iftex/iftex.sty
Package: iftex 2024/12/12 v1.0g TeX engine tests
))
\Gm@cnth=\count279
\Gm@cntv=\count280
\c@Gm@tempcnt=\count281
\Gm@bindingoffset=\dimen149
\Gm@wd@mp=\dimen150
\Gm@odd@mp=\dimen151
\Gm@even@mp=\dimen152
\Gm@layoutwidth=\dimen153
\Gm@layoutheight=\dimen154
\Gm@layouthoffset=\dimen155
\Gm@layoutvoffset=\dimen156
\Gm@dimlist=\toks18
)
(/usr/share/texmf-dist/tex/latex/fontspec/fontspec.sty
(/usr/share/texmf-dist/tex/latex/l3packages/xparse/xparse.sty
(/usr/share/texmf-dist/tex/latex/l3kernel/expl3.sty
Package: expl3 2026-01-19 L3 programming layer (loader)
(/usr/share/texmf-dist/tex/latex/l3backend/l3backend-xetex.def
File: l3backend-xetex.def 2025-10-09 L3 backend support: XeTeX
\g__graphics_track_int=\count282
\g__pdfannot_backend_int=\count283
\g__pdfannot_backend_link_int=\count284
))
Package: xparse 2025-10-09 L3 Experimental document command parser
)
Package: fontspec 2025/09/29 v2.9g Font selection for XeLaTeX and LuaLaTeX
(/usr/share/texmf-dist/tex/latex/fontspec/fontspec-xetex.sty
Package: fontspec-xetex 2025/09/29 v2.9g Font selection for XeLaTeX and LuaLaTe
X
\l__fontspec_script_int=\count285
\l__fontspec_language_int=\count286
\l__fontspec_strnum_int=\count287
\l__fontspec_tmp_int=\count288
\l__fontspec_tmpa_int=\count289
\l__fontspec_tmpb_int=\count290
\l__fontspec_tmpc_int=\count291
\l__fontspec_em_int=\count292
\l__fontspec_emdef_int=\count293
\l__fontspec_strong_int=\count294
\l__fontspec_strongdef_int=\count295
\l__fontspec_tmpa_dim=\dimen157
\l__fontspec_tmpb_dim=\dimen158
\l__fontspec_tmpc_dim=\dimen159
(/usr/share/texmf-dist/tex/latex/base/fontenc.sty
Package: fontenc 2025/07/18 v2.1d Standard LaTeX package
)
(/usr/share/texmf-dist/tex/latex/fontspec/fontspec.cfg)))
Package fontspec Info:
(fontspec) Hurmit Nerd Font Mono scale = 0.7.
Package fontspec Info:
(fontspec) Hurmit Nerd Font Mono scale = 0.7.
Package fontspec Info:
(fontspec) Hurmit Nerd Font Mono/B scale = 0.7.
Package fontspec Info:
(fontspec) Hurmit Nerd Font Mono/I scale = 0.7.
Package fontspec Info:
(fontspec) Hurmit Nerd Font Mono/BI scale = 0.7.
Package fontspec Info:
(fontspec) Font family 'HurmitNerdFontMono(0)' created for font
(fontspec) 'Hurmit Nerd Font Mono' with options
(fontspec) [WordSpace={1,0,0},HyphenChar=None,PunctuationSpace=Word
Space,Scale=0.7].
(fontspec)
(fontspec) This font family consists of the following NFSS
(fontspec) series/shapes:
(fontspec)
(fontspec) - 'normal' (m/n) with NFSS spec.: <->s*[0.7]"Hurmit
(fontspec) Nerd Font Mono/OT:script=DFLT;language=dflt;"
(fontspec) - 'bold' (b/n) with NFSS spec.: <->s*[0.7]"Hurmit Nerd
(fontspec) Font Mono/B/OT:script=DFLT;language=dflt;"
(fontspec) - 'italic' (m/it) with NFSS spec.: <->s*[0.7]"Hurmit
(fontspec) Nerd Font Mono/I/OT:script=DFLT;language=dflt;"
(fontspec) - 'bold italic' (b/it) with NFSS spec.:
(fontspec) <->s*[0.7]"Hurmit Nerd Font
(fontspec) Mono/BI/OT:script=DFLT;language=dflt;"
No file main.aux.
\openout1 = `main.aux'.
LaTeX Font Info: Checking defaults for OML/cmm/m/it on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OMS/cmsy/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OT1/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for T1/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for TS1/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for TU/lmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for OMX/cmex/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
LaTeX Font Info: Checking defaults for U/cmr/m/n on input line 11.
LaTeX Font Info: ... okay on input line 11.
*geometry* driver: auto-detecting
*geometry* detected driver: xetex
*geometry* verbose mode - [ preamble ] result:
* driver: xetex
* paper: a4paper
* layout: <same size as paper>
* layoutoffset:(h,v)=(0.0pt,0.0pt)
* modes:
* h-part:(L,W,R)=(72.26999pt, 452.9679pt, 72.26999pt)
* v-part:(T,H,B)=(72.26999pt, 700.50687pt, 72.26999pt)
* \paperwidth=597.50787pt
* \paperheight=845.04684pt
* \textwidth=452.9679pt
* \textheight=700.50687pt
* \oddsidemargin=0.0pt
* \evensidemargin=0.0pt
* \topmargin=-37.0pt
* \headheight=12.0pt
* \headsep=25.0pt
* \topskip=10.0pt
* \footskip=30.0pt
* \marginparwidth=57.0pt
* \marginparsep=11.0pt
* \columnsep=10.0pt
* \skip\footins=9.0pt plus 4.0pt minus 2.0pt
* \hoffset=0.0pt
* \voffset=0.0pt
* \mag=1000
* \@twocolumnfalse
* \@twosidefalse
* \@mparswitchfalse
* \@reversemarginfalse
* (1in=72.27pt=25.4mm, 1cm=28.453pt)
Package fontspec Info:
(fontspec) Adjusting the maths setup (use [no-math] to avoid
(fontspec) this).
\symlegacymaths=\mathgroup4
LaTeX Font Info: Overwriting symbol font `legacymaths' in version `bold'
(Font) OT1/cmr/m/n --> OT1/cmr/bx/n on input line 11.
LaTeX Font Info: Redeclaring math accent \acute on input line 11.
LaTeX Font Info: Redeclaring math accent \grave on input line 11.
LaTeX Font Info: Redeclaring math accent \ddot on input line 11.
LaTeX Font Info: Redeclaring math accent \tilde on input line 11.
LaTeX Font Info: Redeclaring math accent \bar on input line 11.
LaTeX Font Info: Redeclaring math accent \breve on input line 11.
LaTeX Font Info: Redeclaring math accent \check on input line 11.
LaTeX Font Info: Redeclaring math accent \hat on input line 11.
LaTeX Font Info: Redeclaring math accent \dot on input line 11.
LaTeX Font Info: Redeclaring math accent \mathring on input line 11.
LaTeX Font Info: Redeclaring math symbol \colon on input line 11.
LaTeX Font Info: Redeclaring math symbol \Gamma on input line 11.
LaTeX Font Info: Redeclaring math symbol \Delta on input line 11.
LaTeX Font Info: Redeclaring math symbol \Theta on input line 11.
LaTeX Font Info: Redeclaring math symbol \Lambda on input line 11.
LaTeX Font Info: Redeclaring math symbol \Xi on input line 11.
LaTeX Font Info: Redeclaring math symbol \Pi on input line 11.
LaTeX Font Info: Redeclaring math symbol \Sigma on input line 11.
LaTeX Font Info: Redeclaring math symbol \Upsilon on input line 11.
LaTeX Font Info: Redeclaring math symbol \Phi on input line 11.
LaTeX Font Info: Redeclaring math symbol \Psi on input line 11.
LaTeX Font Info: Redeclaring math symbol \Omega on input line 11.
LaTeX Font Info: Redeclaring math symbol \mathdollar on input line 11.
LaTeX Font Info: Redeclaring symbol font `operators' on input line 11.
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
(Font) `operators' in the math version `normal' on input line 11.
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
(Font) OT1/cmr/m/n --> TU/lmr/m/n on input line 11.
LaTeX Font Info: Encoding `OT1' has changed to `TU' for symbol font
(Font) `operators' in the math version `bold' on input line 11.
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
(Font) OT1/cmr/bx/n --> TU/lmr/m/n on input line 11.
LaTeX Font Info: Overwriting symbol font `operators' in version `normal'
(Font) TU/lmr/m/n --> TU/lmr/m/n on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `normal'
(Font) OT1/cmr/m/it --> TU/lmr/m/it on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathbf' in version `normal'
(Font) OT1/cmr/bx/n --> TU/lmr/b/n on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `normal'
(Font) OT1/cmss/m/n --> TU/lmss/m/n on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `normal'
(Font) OT1/cmtt/m/n --> TU/HurmitNerdFontMono(0)/m/n on input
line 11.
LaTeX Font Info: Overwriting symbol font `operators' in version `bold'
(Font) TU/lmr/m/n --> TU/lmr/b/n on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathit' in version `bold'
(Font) OT1/cmr/bx/it --> TU/lmr/b/it on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathsf' in version `bold'
(Font) OT1/cmss/bx/n --> TU/lmss/b/n on input line 11.
LaTeX Font Info: Overwriting math alphabet `\mathtt' in version `bold'
(Font) OT1/cmtt/m/n --> TU/HurmitNerdFontMono(0)/b/n on input
line 11.
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <7> on input line 14.
LaTeX Font Info: External font `cmex10' loaded for size
(Font) <5> on input line 14.
LaTeX Font Info: Font shape `TU/HurmitNerdFontMono(0)/m/n' will be
(Font) scaled to size 6.99997pt on input line 22.
LaTeX Warning: Float too large for page by 44.293pt on input line 93.
[1
] [2] (./main.aux)
***********
LaTeX2e <2025-11-01>
L3 programming layer <2026-01-19>
***********
)
Here is how much of TeX's memory you used:
3526 strings out of 470191
106539 string characters out of 5479698
562689 words of memory out of 5000000
32135 multiletter control sequences out of 15000+600000
627857 words of font info for 57 fonts, out of 8000000 for 9000
14 hyphenation exceptions out of 8191
73i,9n,93p,432b,328s stack positions out of 10000i,1000n,20000p,200000b,200000s
Output written on main.pdf (2 pages).

Binary file not shown.

1
snippets/scripta.cent Normal file
View File

@@ -0,0 +1 @@
CVM SCRIPTA

BIN
snippets/scripta.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.7 KiB

View File

@@ -606,6 +606,20 @@ builtin_tests = [
("TYPVS(FVNCTIO () VT { REDI(I) })", Program([], [ExpressionStatement(BuiltIn("TYPVS", [Fvnctio([], [Redi([Numeral("I")])])]))]), ValStr("FVNCTIO")),
# TYPVS: null
("TYPVS(NVLLVS)", Program([], [ExpressionStatement(BuiltIn("TYPVS", [Nullus()]))]), ValStr("NVLLVS")),
# QVAERE: basic literal match
('QVAERE("ab", "abcabc")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("ab"), String("abcabc")]))]), ValList([ValStr("ab"), ValStr("ab")])),
# QVAERE: no match → empty list
('QVAERE("xyz", "abc")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("xyz"), String("abc")]))]), ValList([])),
# QVAERE: regex character class
('QVAERE("[a-z]+", "abc123def")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("[a-z]+"), String("abc123def")]))]), ValList([ValStr("abc"), ValStr("def")])),
# QVAERE: empty text → empty list
('QVAERE("a", "")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("a"), String("")]))]), ValList([])),
# QVAERE: capture groups still return full match
('QVAERE("(a)(b)", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("(a)(b)"), String("ab")]))]), ValList([ValStr("ab")])),
# QVAERE: empty pattern matches between every character
('QVAERE("", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String(""), String("ab")]))]), ValList([ValStr(""), ValStr(""), ValStr("")])),
# QVAERE: dot matches any character
('QVAERE(".", "ab")', Program([], [ExpressionStatement(BuiltIn("QVAERE", [String("."), String("ab")]))]), ValList([ValStr("a"), ValStr("b")])),
]
class TestBuiltins(unittest.TestCase):
@@ -687,6 +701,9 @@ error_tests = [
("CVM FRACTIO\n[I, II, III][I VSQVE IIIS]", CentvrionError), # slice with fractional upper bound
("CVM FRACTIO\n[I, II, III][I / II VSQVE III]", CentvrionError), # slice with division-fraction lower bound
("TEMPTA {\nDESIGNA x VT I / NVLLVS\n} CAPE e {\nDESIGNA y VT I / NVLLVS\n}", CentvrionError), # uncaught error in catch block propagates
('QVAERE(I, "abc")', CentvrionError), # QVAERE requires strings, not int
('QVAERE("abc", I)', CentvrionError), # QVAERE requires strings, not int
('QVAERE("[", "abc")', CentvrionError), # QVAERE invalid regex
]
class TestErrors(unittest.TestCase):