bqn-safetensors/json.bqn
2024-11-15 20:58:49 +01:00

227 lines
8.4 KiB
BQN
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Part of bqn-libs: https://github.com/mlochbaum/bqn-libs
# 0-BSD License
# JSON: JavaScript Object Notation
Parse # JSON string to BQN
Export # BQN value to JSON (also Parse⁼)
# JSON numbers, strings, and lists correspond directly to BQN
# Objects are represented as keys≍values
# true, false, null are represented as <"true", <"false", <"null"
# An empty list exports as "" if its fill is a space and [] otherwise
Consts, ExportConst {
val <¨ name "true""false""null"
Consts {
i name 𝕩
"Unknown constant" 𝕎 i = name # 𝕎 formats errors
i val
}
cm "Enclosed value must be JSON constant"' '¨"or "(¯1)name
ExportConst {
i val< 𝕩
cm ! i<val
i name
}
}
UnEscape, Escape {
in """\/bfrnt"
out (3in)@+81213109
diff (out-in) 0
Basic {
i in 𝕩
"Unknown escape" ! ´𝕨i<in
𝕩 + 𝕨 × i diff
}
hc "0Aa"
hb hc+0˘1066 # Hex boundaries, start and after-end
ho 2/hc-01010 # Corresponding offsets
Hex { u 𝕊 𝕩:
d 𝕩 /˜ m ` (40)» »u
t hb d
"String \u must be followed by 4 hex characters" ! ´1=2|t
# Now m can't run past the end or self-intersect,
# or it would have hit a closing quote or backslash
v 16×+˜˝ 4 d-tho
we Surrogate v
(w+@-'u')+(u/)𝕩, e(u/)m
}
sr 210 # Surrogate base/radix
sb sr×52+2+3 # Surrogate character boundaries
Surrogate {
c (sb)|sb𝕩 # 0 for non-surrogate, 1 then 2 for surrogate
h 1=c # First half
"Unmatched surrogate pair" ! (0h) (2=c)0
r 𝕩 - c0sb # Numeric value of surrogates
v r + »h×sr×(26)+r
v, h
}
UnEscape { e 𝕊 𝕩:
u e 𝕩='u'
(u<e)Basic u Hex(´u) 𝕩0
}
Hex32 { # Convert 𝕩<32 to two hex digits
ul 16(÷˜|)𝕩 # Upper and lower digits; ∧´u<2
'0'+u˘l-(10+-´"0A")×10l
}
Escape {
e (𝕩2in) 𝕩<@+32 # Quote, backslash, and control characters
j /e k ¬e # Their indices; characters to keep
c j 𝕩 # Characters to be escaped
m c - (outc)diff # Escape the basic ones
g 2 ˜ m<@+32 # Group 𝕨 to separate...
nu g m # Characters requiring basic, hex escapes
i /k26/¨gj # Target indices
i # Use them to reorder:
k/𝕩 # Kept characters
'\'˘n # Basic escapes
"\u00"˘ Hex32 u-@ # Hex escapes
}
}
# Format error with message 𝕨 at locations 𝕩 in 𝕗
_fmtErr { msg src _𝕣 pos:
lf @+10
s 01+ b/src=lf # Line start and break positions
l (1s) pos # Error line numbers
c pos - ls # Error column numbers
d l # First line only
1lf¨
msg
(ds)(dbsrc)src # Display first line
" ^" ˜ / (d=l)/c # And carets under errors
{
n a d/l # 1-indexed numbers of other lines
0<n ? ((-1=n)"Also lines")1(<", ")˘•Repr¨1+a
;
}
}
# JSON to ⟨tokens, constants, numbers, strings⟩
# Tokens are characters {}[],: and a constant, 0 number, " string
# Values correspond to a, 0, and " in order
Tokenize {
FE 𝕩_fmtErr _err {(! 𝕗 FE /)(´)}
# Strings
e »eo <`'\'=𝕩
s `q e<'"'=𝕩
"Unclosed quote" ! ¬¯1s
"Backslash outside string"_err s<eo
"Un-escaped control character"_err s>𝕩@+32
xedr e UnEscape 𝕩 # Escaped 𝕩; characters to drop
sg ((1-˜(s>qeodr)×+`)+´)sq # Start at s∧q; exclude q, eo, dr
str sg xe # Strings
# Numbers and constants
b s 𝕩@+9101332 # Whitespace (blank)
l ¬ b 𝕩"""{}[],:" # Word characters
w »< l # Word starts
neg '-' = 𝕩
dig ('0'𝕩) 𝕩'9'
m l (+`w)0w/digneg # Numbers
n mw k m<w # Number and constant starts
"Leading zero"_err (n»nneg)('0'=𝕩)«dig
"Digits required on both sides of decimal"_err (m'.'=𝕩)>(»«)dig
CE {(! 𝕨 FE /(/k))(´) 𝕩}
cns ce Consts (1-˜(m<l)×+`k)𝕩 # Constants
num •ParseFloat¨ (1-˜m×+`n)𝕩 # Numbers
# Tokenize
f ¬bw<l # First characters of tokens
tok '0'¨((f/n)/) 'a'¨((f/k)/) f/𝕩
tok, f, cns, num, str
}
Parse {
𝕊𝕩: Export 𝕩 ;
"Empty input" ! 0<𝕩
ttfcnsnumstrk Tokenize 𝕩
"Empty input" ! 0<t
# Validate
# _tr records persistent transformations
# Temporary transformations are passed to _err_ as 𝔽
trs tf,/ _tr {trs𝕨𝔽𝕨𝔽𝕩}
FE 𝕩_fmtErr _err_ {𝕗{! 𝕗 FE 𝕩/𝔾{𝕎𝕩}´trs}𝔾(´)}
q '"'=t
c ':'=t
"Object keys must be strings"_err_ (»q)<c
g +`(cot"[{")-cct"]}" # Bracket depth ordering indices
u g _tr t
r +` s u"[{"
o s/'{'=u # Container is object
uc (u"]}")/'}'=u
"Unmatched brackets" ! o=uc
"Mismatched brackets"_err_{s/𝕩} o uc
v («¬s) /_tr u # Remove empty lists/objects
vs v "[{,:" # Must alternate 0101...10
"Improper , or : usage"_err_(´) 1(=˜) vs
v /_tr˜ vs
# v should be composed of lists [,,, and objects {:,:,:
# Convert {: to { and ,: to : to get [,,, and {::
"Multiple keys for one value"_err_ »v=':'
v /_tr˜ ¬((»v='{') (v=',')«)v=':'
# Or, every , follows list-like [, and every : follows object-like {:
d v",:"
"Top-level , or :"_err_{1𝕩} 1d
"Bad object structure"_err_ d > »=v"{:"
# Keys
l (g) r # Container index
j +`× o # Object index (start at 1; 0 if list)
keys ((q/(«c)×l0j)1+´o) strk
str keys
# Purely numeric lists
l nm01(/˜)s(/)u"]}""a"
nn +´¬nm
nl num˜(1+´nm)˜0<×(1-nn)+(t='0')/l
jjjn 2(1nm)j
Ob keys(0<)
n jn Ob¨ 1nl
# Build collections
nv n -˜ valscns,nl,str,n # Initial set of values
f (l<nn)¬(«c)co','=t # Filter for just values a0"]}
vi (f/'0'=t)+(2×f/q)+3×f/cc # Value indices
i vi (nv) nv+-cc/»l # Adjust for collection ordering
jj {vals𝕨 Ob 𝕩vals@}¨ ((jj)˜1-˜f/l)i
¯1vals
}
ExportNumber {
"NaN can't be represented in JSON" ! =˜𝕩
"Infinities can't be represented in JSON" ! |𝕩
r •Repr 𝕩
r + (-´"-¯")×r='¯'
}
IsString {´2=•Type¨ 1(0=) 𝕩}
ExportList {
IsString 𝕩 ? ExportString 𝕩 ;
"[""]"˜ 1(<",")˘Export¨ 𝕩
}
ExportString '"' () Escape
ExportObject {
"Object must consist of keys≍values" ! 2=𝕩
[k,v] 𝕩
"Object keys must be strings" ! ´ IsString¨ k
kv (ExportString¨ k) ˘ Export¨ v
"{""}"˜ ¯1 kv ˘ (kv) ":",","
}
Export (2•Type) # Check type
(3=) # Type 0, array: Check rank
ExportConst # 0 constant (enclosed string)
ExportList # 1 List or string
ExportObject # 2 Object
!"Rank >2 cannot be exported to JSON"
ExportNumber # Type 1: number
!"Only numbers and arrays can be exported to JSON"