Initial commit

This commit is contained in:
Dimitri Lozeve 2024-11-15 20:58:49 +01:00
commit 6c1d57f9cd
2 changed files with 330 additions and 0 deletions

227
json.bqn Normal file
View file

@ -0,0 +1,227 @@
# Part of bqn-libs: https://github.com/mlochbaum/bqn-libs
# 0-BSD License
# JSON: JavaScript Object Notation
Parse # JSON string to BQN
Export # BQN value to JSON (also Parse⁼)
# JSON numbers, strings, and lists correspond directly to BQN
# Objects are represented as keys≍values
# true, false, null are represented as <"true", <"false", <"null"
# An empty list exports as "" if its fill is a space and [] otherwise
Consts, ExportConst {
val <¨ name "true""false""null"
Consts {
i name 𝕩
"Unknown constant" 𝕎 i = name # 𝕎 formats errors
i val
}
cm "Enclosed value must be JSON constant"' '¨"or "(¯1)name
ExportConst {
i val< 𝕩
cm ! i<val
i name
}
}
UnEscape, Escape {
in """\/bfrnt"
out (3in)@+81213109
diff (out-in) 0
Basic {
i in 𝕩
"Unknown escape" ! ´𝕨i<in
𝕩 + 𝕨 × i diff
}
hc "0Aa"
hb hc+0˘1066 # Hex boundaries, start and after-end
ho 2/hc-01010 # Corresponding offsets
Hex { u 𝕊 𝕩:
d 𝕩 /˜ m ` (40)» »u
t hb d
"String \u must be followed by 4 hex characters" ! ´1=2|t
# Now m can't run past the end or self-intersect,
# or it would have hit a closing quote or backslash
v 16×+˜˝ 4 d-tho
we Surrogate v
(w+@-'u')+(u/)𝕩, e(u/)m
}
sr 210 # Surrogate base/radix
sb sr×52+2+3 # Surrogate character boundaries
Surrogate {
c (sb)|sb𝕩 # 0 for non-surrogate, 1 then 2 for surrogate
h 1=c # First half
"Unmatched surrogate pair" ! (0h) (2=c)0
r 𝕩 - c0sb # Numeric value of surrogates
v r + »h×sr×(26)+r
v, h
}
UnEscape { e 𝕊 𝕩:
u e 𝕩='u'
(u<e)Basic u Hex(´u) 𝕩0
}
Hex32 { # Convert 𝕩<32 to two hex digits
ul 16(÷˜|)𝕩 # Upper and lower digits; ∧´u<2
'0'+u˘l-(10+-´"0A")×10l
}
Escape {
e (𝕩2in) 𝕩<@+32 # Quote, backslash, and control characters
j /e k ¬e # Their indices; characters to keep
c j 𝕩 # Characters to be escaped
m c - (outc)diff # Escape the basic ones
g 2 ˜ m<@+32 # Group 𝕨 to separate...
nu g m # Characters requiring basic, hex escapes
i /k26/¨gj # Target indices
i # Use them to reorder:
k/𝕩 # Kept characters
'\'˘n # Basic escapes
"\u00"˘ Hex32 u-@ # Hex escapes
}
}
# Format error with message 𝕨 at locations 𝕩 in 𝕗
_fmtErr { msg src _𝕣 pos:
lf @+10
s 01+ b/src=lf # Line start and break positions
l (1s) pos # Error line numbers
c pos - ls # Error column numbers
d l # First line only
1lf¨
msg
(ds)(dbsrc)src # Display first line
" ^" ˜ / (d=l)/c # And carets under errors
{
n a d/l # 1-indexed numbers of other lines
0<n ? ((-1=n)"Also lines")1(<", ")˘•Repr¨1+a
;
}
}
# JSON to ⟨tokens, constants, numbers, strings⟩
# Tokens are characters {}[],: and a constant, 0 number, " string
# Values correspond to a, 0, and " in order
Tokenize {
FE 𝕩_fmtErr _err {(! 𝕗 FE /)(´)}
# Strings
e »eo <`'\'=𝕩
s `q e<'"'=𝕩
"Unclosed quote" ! ¬¯1s
"Backslash outside string"_err s<eo
"Un-escaped control character"_err s>𝕩@+32
xedr e UnEscape 𝕩 # Escaped 𝕩; characters to drop
sg ((1-˜(s>qeodr)×+`)+´)sq # Start at s∧q; exclude q, eo, dr
str sg xe # Strings
# Numbers and constants
b s 𝕩@+9101332 # Whitespace (blank)
l ¬ b 𝕩"""{}[],:" # Word characters
w »< l # Word starts
neg '-' = 𝕩
dig ('0'𝕩) 𝕩'9'
m l (+`w)0w/digneg # Numbers
n mw k m<w # Number and constant starts
"Leading zero"_err (n»nneg)('0'=𝕩)«dig
"Digits required on both sides of decimal"_err (m'.'=𝕩)>(»«)dig
CE {(! 𝕨 FE /(/k))(´) 𝕩}
cns ce Consts (1-˜(m<l)×+`k)𝕩 # Constants
num •ParseFloat¨ (1-˜m×+`n)𝕩 # Numbers
# Tokenize
f ¬bw<l # First characters of tokens
tok '0'¨((f/n)/) 'a'¨((f/k)/) f/𝕩
tok, f, cns, num, str
}
Parse {
𝕊𝕩: Export 𝕩 ;
"Empty input" ! 0<𝕩
ttfcnsnumstrk Tokenize 𝕩
"Empty input" ! 0<t
# Validate
# _tr records persistent transformations
# Temporary transformations are passed to _err_ as 𝔽
trs tf,/ _tr {trs𝕨𝔽𝕨𝔽𝕩}
FE 𝕩_fmtErr _err_ {𝕗{! 𝕗 FE 𝕩/𝔾{𝕎𝕩}´trs}𝔾(´)}
q '"'=t
c ':'=t
"Object keys must be strings"_err_ (»q)<c
g +`(cot"[{")-cct"]}" # Bracket depth ordering indices
u g _tr t
r +` s u"[{"
o s/'{'=u # Container is object
uc (u"]}")/'}'=u
"Unmatched brackets" ! o=uc
"Mismatched brackets"_err_{s/𝕩} o uc
v («¬s) /_tr u # Remove empty lists/objects
vs v "[{,:" # Must alternate 0101...10
"Improper , or : usage"_err_(´) 1(=˜) vs
v /_tr˜ vs
# v should be composed of lists [,,, and objects {:,:,:
# Convert {: to { and ,: to : to get [,,, and {::
"Multiple keys for one value"_err_ »v=':'
v /_tr˜ ¬((»v='{') (v=',')«)v=':'
# Or, every , follows list-like [, and every : follows object-like {:
d v",:"
"Top-level , or :"_err_{1𝕩} 1d
"Bad object structure"_err_ d > »=v"{:"
# Keys
l (g) r # Container index
j +`× o # Object index (start at 1; 0 if list)
keys ((q/(«c)×l0j)1+´o) strk
str keys
# Purely numeric lists
l nm01(/˜)s(/)u"]}""a"
nn +´¬nm
nl num˜(1+´nm)˜0<×(1-nn)+(t='0')/l
jjjn 2(1nm)j
Ob keys(0<)
n jn Ob¨ 1nl
# Build collections
nv n -˜ valscns,nl,str,n # Initial set of values
f (l<nn)¬(«c)co','=t # Filter for just values a0"]}
vi (f/'0'=t)+(2×f/q)+3×f/cc # Value indices
i vi (nv) nv+-cc/»l # Adjust for collection ordering
jj {vals𝕨 Ob 𝕩vals@}¨ ((jj)˜1-˜f/l)i
¯1vals
}
ExportNumber {
"NaN can't be represented in JSON" ! =˜𝕩
"Infinities can't be represented in JSON" ! |𝕩
r •Repr 𝕩
r + (-´"-¯")×r='¯'
}
IsString {´2=•Type¨ 1(0=) 𝕩}
ExportList {
IsString 𝕩 ? ExportString 𝕩 ;
"[""]"˜ 1(<",")˘Export¨ 𝕩
}
ExportString '"' () Escape
ExportObject {
"Object must consist of keys≍values" ! 2=𝕩
[k,v] 𝕩
"Object keys must be strings" ! ´ IsString¨ k
kv (ExportString¨ k) ˘ Export¨ v
"{""}"˜ ¯1 kv ˘ (kv) ":",","
}
Export (2•Type) # Check type
(3=) # Type 0, array: Check rank
ExportConst # 0 constant (enclosed string)
ExportList # 1 List or string
ExportObject # 2 Object
!"Rank >2 cannot be exported to JSON"
ExportNumber # Type 1: number
!"Only numbers and arrays can be exported to JSON"

103
safetensors.bqn Normal file
View file

@ -0,0 +1,103 @@
ExtractMetadata,GetArrayNames,GetArray,SerializeArrays
Parse,Export•Import"json.bqn"
ExtractHeader{𝕊bytes:
n2×+˜´8'c',1'u'•bit._cast 8bytes
Parse n8bytes,n+8
}
JsonGet{((𝕨)<𝕩)1𝕨}
ExtractMetadata{𝕊bytes:
header·ExtractHeader bytes
header JsonGet "__metadata__"
}
GetArrayNames{𝕊bytes:
header·ExtractHeader bytes
"__metadata__"¨/header
}
# Valid for sizes 8, 16, and 32 (passed as 𝕨)
ParseUint{2×+˜´˘8'c',1'u'•bit._cast˘ (𝕨÷8)𝕩}
ParseInt{(-2𝕨-1)+(2𝕨)|(2𝕨-1)+𝕨ParseUint𝕩}
ParseUint64{(232)×+˜´˘ 232 ParseUInt 𝕩}
ParseInt64{(232)×+˜´˘ 232 ParseInt 𝕩}
# Parse a floating point number
# e is the size of the exponent part
ParseFloat{e𝕊bytes:
n8'c',1'u'•bit._cast bytes
s(n)-e+1
sign1+2×-n
exponent2×+˜´e1n
significand2×+˜´1e1n
sign×(2exponent-((2e-1)-1))×significand÷2s
}
dtypes
"BOOL", # Boolean type
"U8", # Unsigned byte
"I8", # Signed byte
"F8_E5M2", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"F8_E4M3", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"I16", # Signed integer (16-bit)
"U16", # Unsigned integer (16-bit)
"F16", # Half-precision floating point
"BF16", # Brain floating point
"I32", # Signed integer (32-bit)
"U32", # Unsigned integer (32-bit)
"F32", # Floating point (32-bit)
"F64", # Floating point (64-bit)
"I64", # Signed integer (64-bit)
"U64", # Unsigned integer (64-bit)
typeConversions
8'c', 1'u'•bit._cast, # BOOL
8ParseUint, # U8
8'c', 8'i'•bit._cast, # I8
5ParseFloat˘1, # F8_E5M2
4ParseFloat˘1, # F8_E4M4
8'c',16'i'•bit._cast, # I16
16ParseUint, # U16
5ParseFloat˘2, # F16
8ParseFloat˘2, # BF16
8'c',32'i'•bit._cast, # I32
32ParseUint, # U32
8ParseFloat˘4, # F32
8'c',64'f'•bit._cast, # F64
ParseInt64, # I64
ParseUint64, # U64
GetArray{bytes𝕊name:
headernExtractHeader bytes
byteBufnbytes
infoheader JsonGet name
seinfo JsonGet "data_offsets"
shapeinfo JsonGet "shape"
dtypeIdxdtypes<info JsonGet "dtype"
convdtypeIdxtypeConversions
shapeConv sebyteBuf
}
SerializeArray{
dtype(´=𝕩)"F64""I32"
shape𝕩
data(´=)64'f',8'c'•bit._cast,32'i',8'c'•bit._cast𝕩
dtypeshapedata
}
SerializeArrays{names𝕊arrs:
dtypesshapesdatas<˘>SerializeArray¨arrs
dataOffsets<˘20+`¨datas
blocks{𝕊namedtypeshapedataOffset:
["dtype""shape""data_offsets",dtypeshapedataOffset]
}¨<˘>namesdtypesshapesdataOffsets
header[names,blocks]
nheaderJsonExport header
nEncoded32'i',8'c'•bit._castn,0
nEncodedheaderJsondatas
}