Initial commit
This commit is contained in:
commit
6c1d57f9cd
2 changed files with 330 additions and 0 deletions
227
json.bqn
Normal file
227
json.bqn
Normal file
|
@ -0,0 +1,227 @@
|
||||||
|
# Part of bqn-libs: https://github.com/mlochbaum/bqn-libs
|
||||||
|
# 0-BSD License
|
||||||
|
|
||||||
|
# JSON: JavaScript Object Notation
|
||||||
|
⟨
|
||||||
|
Parse # JSON string to BQN
|
||||||
|
Export # BQN value to JSON (also Parse⁼)
|
||||||
|
⟩⇐
|
||||||
|
|
||||||
|
# JSON numbers, strings, and lists correspond directly to BQN
|
||||||
|
# Objects are represented as keys≍values
|
||||||
|
# true, false, null are represented as <"true", <"false", <"null"
|
||||||
|
|
||||||
|
# An empty list exports as "" if its fill is a space and [] otherwise
|
||||||
|
|
||||||
|
⟨Consts, ExportConst⟩ ← {
|
||||||
|
val ← <¨ name ← "true"‿"false"‿"null"
|
||||||
|
Consts ⇐ {
|
||||||
|
i ← name ⊐ 𝕩
|
||||||
|
"Unknown constant" 𝕎 i = ≠name # 𝕎 formats errors
|
||||||
|
i ⊏ val
|
||||||
|
}
|
||||||
|
cm ← "Enclosed value must be JSON constant"∾∾' '⊸∾¨"or "⊸∾⌾(¯1⊸⊑)name
|
||||||
|
ExportConst ⇐ {
|
||||||
|
i ← val⊸⊐⌾< 𝕩
|
||||||
|
cm ! i<≠val
|
||||||
|
i ⊑ name
|
||||||
|
}
|
||||||
|
}
|
||||||
|
⟨UnEscape, Escape⟩ ← {
|
||||||
|
in ← """\/bfrnt"
|
||||||
|
out ← (3↑in)∾@+8‿12‿13‿10‿9
|
||||||
|
diff ← (out-in) ∾ 0
|
||||||
|
Basic ← {
|
||||||
|
i ← in ⊐ 𝕩
|
||||||
|
"Unknown escape" ! ∧´𝕨≤i<≠in
|
||||||
|
𝕩 + 𝕨 × i ⊏ diff
|
||||||
|
}
|
||||||
|
|
||||||
|
hc ← "0Aa"
|
||||||
|
hb ← ⥊hc+0≍˘10‿6‿6 # Hex boundaries, start and after-end
|
||||||
|
ho ← 2/hc-0‿10‿10 # Corresponding offsets
|
||||||
|
Hex ← { u 𝕊 𝕩:
|
||||||
|
d ← 𝕩 /˜ m ← ≠` (4⥊0)⊸»⊸≠ »u
|
||||||
|
t ← hb ⍋ d
|
||||||
|
"String \u must be followed by 4 hex characters" ! ∧´1=2|t
|
||||||
|
# Now m can't run past the end or self-intersect,
|
||||||
|
# or it would have hit a closing quote or backslash
|
||||||
|
v ← 16⊸×⊸+˜˝⌽ ⍉∘‿4⥊ d-t⊏ho
|
||||||
|
w‿e ← Surrogate v
|
||||||
|
⟨(w+@-'u')⊸+⌾(u⊸/)𝕩, e⌾(u⊸/)m⟩
|
||||||
|
}
|
||||||
|
sr ← 2⋆10 # Surrogate base/radix
|
||||||
|
sb ← sr×52+2+↕3 # Surrogate character boundaries
|
||||||
|
Surrogate ← {
|
||||||
|
c ← (≠sb)|sb⍋𝕩 # 0 for non-surrogate, 1 then 2 for surrogate
|
||||||
|
h ← 1=c # First half
|
||||||
|
"Unmatched surrogate pair" ! (0∾h) ≡ (2=c)∾0
|
||||||
|
r ← 𝕩 - c⊏0∾sb # Numeric value of surrogates
|
||||||
|
v ← r + »h×sr×(2⋆6)+r
|
||||||
|
⟨v, h⟩
|
||||||
|
}
|
||||||
|
|
||||||
|
UnEscape ⇐ { e 𝕊 𝕩:
|
||||||
|
u ← e ∧ 𝕩='u'
|
||||||
|
(u<e)⊸Basic⌾⊑ u Hex⟜⊑⍟(∨´u) 𝕩‿0
|
||||||
|
}
|
||||||
|
|
||||||
|
Hex32 ← { # Convert 𝕩<32 to two hex digits
|
||||||
|
u‿l ← 16(⌊∘÷˜⋈|)𝕩 # Upper and lower digits; ∧´u<2
|
||||||
|
'0'+u≍˘l-(10+-´"0A")×10≤l
|
||||||
|
}
|
||||||
|
Escape ⇐ {
|
||||||
|
e ← (𝕩∊2↑in) ∨ 𝕩<@+32 # Quote, backslash, and control characters
|
||||||
|
j ← /e ⋄ k ← ¬e # Their indices; characters to keep
|
||||||
|
c ← j ⊏ 𝕩 # Characters to be escaped
|
||||||
|
m ← c - (out⊐c)⊏diff # Escape the basic ones
|
||||||
|
g ← 2 ∾˜ m<@+32 # Group 𝕨 to separate...
|
||||||
|
n‿u ← g ⊔ m # Characters requiring basic, hex escapes
|
||||||
|
i ← ∾⟨/k⟩∾2‿6/¨g⊔j # Target indices
|
||||||
|
i ⍋⊸⊏ ∾⟨ # Use them to reorder:
|
||||||
|
k/𝕩 # Kept characters
|
||||||
|
⥊'\'≍˘n # Basic escapes
|
||||||
|
⥊"\u00"⊸∾˘ Hex32 u-@ # Hex escapes
|
||||||
|
⟩
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Format error with message 𝕨 at locations 𝕩 in 𝕗
|
||||||
|
_fmtErr ← { msg src _𝕣 pos:
|
||||||
|
lf ← @+10
|
||||||
|
s ← 0∾1+ b←/src=lf # Line start and break positions
|
||||||
|
l ← (1↓s) ⍋ pos # Error line numbers
|
||||||
|
c ← pos - l⊏s # Error column numbers
|
||||||
|
d ← ⊑l # First line only
|
||||||
|
1↓∾lf⊸∾¨ ⟨
|
||||||
|
msg
|
||||||
|
(d⊑s)↓(d⊑b∾≠src)↑src # Display first line
|
||||||
|
" ^" ⊏˜ /⁼ (d=l)/c # And carets under errors
|
||||||
|
⟩∾{
|
||||||
|
n ← ≠ a ← d⊸≠⊸/l # 1-indexed numbers of other lines
|
||||||
|
0<n ? ⋈((-1=n)↓"Also lines")∾1↓∾⥊(<", ")≍˘•Repr¨1+a
|
||||||
|
; ⟨⟩
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# JSON to ⟨tokens, constants, numbers, strings⟩
|
||||||
|
# Tokens are characters {}[],: and a constant, 0 number, " string
|
||||||
|
# Values correspond to a, 0, and " in order
|
||||||
|
Tokenize ← {
|
||||||
|
FE ← 𝕩_fmtErr ⋄ _err ← {(! 𝕗 FE /)⍟(∨´)}
|
||||||
|
# Strings
|
||||||
|
e ← »eo ← <`'\'=𝕩
|
||||||
|
s ← ≠`q ← e<'"'=𝕩
|
||||||
|
"Unclosed quote" ! ¬¯1⊑s
|
||||||
|
"Backslash outside string"_err s<eo
|
||||||
|
"Un-escaped control character"_err s>𝕩≥@+32
|
||||||
|
xe‿dr ← e UnEscape 𝕩 # Escaped 𝕩; characters to drop
|
||||||
|
sg ← ((1-˜(s>q∨eo∨dr)×+`)∾+´)s∧q # Start at s∧q; exclude q, eo, dr
|
||||||
|
str ← sg ⊔ xe # Strings
|
||||||
|
|
||||||
|
# Numbers and constants
|
||||||
|
b ← s ∨ 𝕩∊@+9‿10‿13‿32 # Whitespace (blank)
|
||||||
|
l ← ¬ b ∨ 𝕩∊"""{}[],:" # Word characters
|
||||||
|
w ← »⊸< l # Word starts
|
||||||
|
neg ← '-' = 𝕩
|
||||||
|
dig ← ('0'≤𝕩) ∧ 𝕩≤'9'
|
||||||
|
m ← l ∧ (+`w)⊏0∾w/dig∨neg # Numbers
|
||||||
|
n ← m∧w ⋄ k ← m<w # Number and constant starts
|
||||||
|
"Leading zero"_err (n≠»⊸∨n∧neg)∧('0'=𝕩)∧«dig
|
||||||
|
"Digits required on both sides of decimal"_err (m∧'.'=𝕩)>(»∧«)dig
|
||||||
|
CE ← {(! 𝕨 FE /⟜(/k))⍟(∨´) 𝕩}
|
||||||
|
cns ← ce Consts (1-˜(m<l)×+`k)⊔𝕩 # Constants
|
||||||
|
num ← •ParseFloat¨ (1-˜m×+`n)⊔𝕩 # Numbers
|
||||||
|
|
||||||
|
# Tokenize
|
||||||
|
f ← ¬b∨w<l # First characters of tokens
|
||||||
|
tok ← '0'¨⌾((f/n)⊸/) 'a'¨⌾((f/k)⊸/) f/𝕩
|
||||||
|
⟨tok, f, cns, num, str⟩
|
||||||
|
}
|
||||||
|
|
||||||
|
Parse ← {
|
||||||
|
𝕊⁼𝕩: Export 𝕩 ;
|
||||||
|
"Empty input" ! 0<≠𝕩
|
||||||
|
t‿tf‿cns‿num‿strk ← Tokenize 𝕩
|
||||||
|
"Empty input" ! 0<≠t
|
||||||
|
|
||||||
|
# Validate
|
||||||
|
# _tr records persistent transformations
|
||||||
|
# Temporary transformations are passed to _err_ as 𝔽
|
||||||
|
trs ← ⟨tf,/⟩ ⋄ _tr ← {trs∾↩⟨𝕨⊸𝔽⟩⋄𝕨𝔽𝕩}
|
||||||
|
FE ← 𝕩_fmtErr ⋄ _err_ ← {𝕗{! 𝕗 FE 𝕩/𝔾{𝕎𝕩}´⌽trs}𝔾⍟(∨´)}
|
||||||
|
q ← '"'=t
|
||||||
|
c ← ':'=t
|
||||||
|
"Object keys must be strings"_err_⊢ (»q)<c
|
||||||
|
g ← ⍋+`(co←t∊"[{")-cc←t∊"]}" # Bracket depth ordering indices
|
||||||
|
u ← g ⊏_tr t
|
||||||
|
r ← +` s ← u∊"[{"
|
||||||
|
o ← s/'{'=u # Container is object
|
||||||
|
uc← (u∊"]}")/'}'=u
|
||||||
|
"Unmatched brackets" ! o=○≠uc
|
||||||
|
"Mismatched brackets"_err_{s/𝕩} o ≠ uc
|
||||||
|
v ← («⊸∨¬s) /_tr u # Remove empty lists/objects
|
||||||
|
vs← v ∊ "[{,:" # Must alternate 0101...10
|
||||||
|
"Improper , or : usage"_err_(⊢∾⊢´) 1(∾=∾˜) vs
|
||||||
|
v /_tr˜↩ vs
|
||||||
|
# v should be composed of lists [,,, and objects {:,:,:
|
||||||
|
# Convert {: to { and ,: to : to get [,,, and {::
|
||||||
|
"Multiple keys for one value"_err_⊢ »⊸∧v=':'
|
||||||
|
v /_tr˜↩ ¬((»v='{')⊸∧ ∨ (v=',')∧«)v=':'
|
||||||
|
# Or, every , follows list-like [, and every : follows object-like {:
|
||||||
|
d ← v∊",:"
|
||||||
|
"Top-level , or :"_err_{1↑𝕩} 1↑d
|
||||||
|
"Bad object structure"_err_⊢ d > »⊸=v∊"{:"
|
||||||
|
|
||||||
|
# Keys
|
||||||
|
l ← (⍋g) ⊏ r # Container index
|
||||||
|
j ← +`⊸× o # Object index (start at 1; 0 if list)
|
||||||
|
keys ← ((q/(«c)×l⊏0∾j)∾1+´o) ⊔ strk
|
||||||
|
str ← ⊑keys
|
||||||
|
|
||||||
|
# Purely numeric lists
|
||||||
|
l ⊏↩ ⍋⍋nm←0⌾⊑1(∾/∾˜)s(∨/⊣)u∊"]}""a"
|
||||||
|
nn ← +´¬nm
|
||||||
|
nl ← num⊔˜(1+´nm)∾˜0⊸<⊸×(1-nn)+(t='0')/l
|
||||||
|
jj‿jn ← 2↑(1↓nm)⊔j
|
||||||
|
Ob ← ⊑⟜keys⊸≍⍟(0<⊣)
|
||||||
|
n ← jn Ob¨ 1↓nl
|
||||||
|
|
||||||
|
# Build collections
|
||||||
|
nv← n -˜○≠ vals←∾⟨cns,⊑nl,str,⌽n⟩ # Initial set of values
|
||||||
|
f ← (l<nn)∧¬(«⊸∨c)∨co∨','=t # Filter for just values a0"]}
|
||||||
|
vi← ⍋⍋(f/'0'=t)+(2×f/q)+3×f/cc # Value indices
|
||||||
|
i ← vi ⊏ (↕nv) ∾ nv+≠⊸-cc/»l # Adjust for collection ordering
|
||||||
|
jj {vals∾↩⟨𝕨 Ob 𝕩⊏vals⟩⋄@}¨○⌽ ((≠jj)∾˜1-˜f/l)⊔i
|
||||||
|
¯1⊑vals
|
||||||
|
}
|
||||||
|
|
||||||
|
ExportNumber ← {
|
||||||
|
"NaN can't be represented in JSON" ! =˜𝕩
|
||||||
|
"Infinities can't be represented in JSON" ! ∞≠|𝕩
|
||||||
|
r ← •Repr 𝕩
|
||||||
|
r + (-´"-¯")×r='¯'
|
||||||
|
}
|
||||||
|
IsString ← {∧´2=•Type¨ 1⊸↑⍟(0=≠) 𝕩}
|
||||||
|
ExportList ← {
|
||||||
|
IsString 𝕩 ? ExportString 𝕩 ;
|
||||||
|
∾ ⟨"["⟩∾⟨"]"⟩∾˜ 1↓⥊(<",")≍˘Export¨ 𝕩
|
||||||
|
}
|
||||||
|
ExportString ← '"' (∾∾⊣) Escape
|
||||||
|
ExportObject ← {
|
||||||
|
"Object must consist of keys≍values" ! 2=≠𝕩
|
||||||
|
[k,v] ← 𝕩
|
||||||
|
"Object keys must be strings" ! ∧´ IsString¨ k
|
||||||
|
kv ← ⥊ (ExportString¨ k) ≍˘ Export¨ v
|
||||||
|
∾ ⟨"{"⟩∾⟨"}"⟩∾˜ ¯1↓⥊ kv ≍˘ (≠kv) ⥊ ⟨":",","⟩
|
||||||
|
}
|
||||||
|
Export ← (2⌊•Type)◶⟨ # Check type
|
||||||
|
(3⌊=)◶⟨ # Type 0, array: Check rank
|
||||||
|
ExportConst # 0 constant (enclosed string)
|
||||||
|
ExportList # 1 List or string
|
||||||
|
ExportObject # 2 Object
|
||||||
|
!∘"Rank >2 cannot be exported to JSON"
|
||||||
|
⟩
|
||||||
|
ExportNumber # Type 1: number
|
||||||
|
!∘"Only numbers and arrays can be exported to JSON"
|
||||||
|
⟩
|
103
safetensors.bqn
Normal file
103
safetensors.bqn
Normal file
|
@ -0,0 +1,103 @@
|
||||||
|
⟨ExtractMetadata,GetArrayNames,GetArray,SerializeArrays⟩⇐
|
||||||
|
|
||||||
|
⟨Parse,Export⟩←•Import"json.bqn"
|
||||||
|
|
||||||
|
ExtractHeader←{𝕊bytes:
|
||||||
|
n←2⊸×⊸+˜´⟨8‿'c',1‿'u'⟩•bit._cast 8↑bytes
|
||||||
|
⟨Parse n↑8↓bytes,n+8⟩
|
||||||
|
}
|
||||||
|
|
||||||
|
JsonGet←{(⊑(⊏𝕨)⊐<𝕩)⊑1⊏𝕨}
|
||||||
|
|
||||||
|
ExtractMetadata←{𝕊bytes:
|
||||||
|
header‿·←ExtractHeader bytes
|
||||||
|
header JsonGet⎊⟨⟩ "__metadata__"
|
||||||
|
}
|
||||||
|
|
||||||
|
GetArrayNames←{𝕊bytes:
|
||||||
|
header‿·←ExtractHeader bytes
|
||||||
|
"__metadata__"⊸≢¨⊸/⊏header
|
||||||
|
}
|
||||||
|
|
||||||
|
# Valid for sizes 8, 16, and 32 (passed as 𝕨)
|
||||||
|
ParseUint←{2⊸×⊸+˜´˘⟨8‿'c',1‿'u'⟩•bit._cast˘ ∘‿(𝕨÷8)⥊𝕩}
|
||||||
|
ParseInt←{(-2⋆𝕨-1)+(2⋆𝕨)|(2⋆𝕨-1)+𝕨ParseUint𝕩}
|
||||||
|
|
||||||
|
ParseUint64←{(2⋆32)⊸×⊸+˜´˘ ∘‿2⥊32 ParseUInt 𝕩}
|
||||||
|
ParseInt64←{(2⋆32)⊸×⊸+˜´˘ ∘‿2⥊32 ParseInt 𝕩}
|
||||||
|
|
||||||
|
# Parse a floating point number
|
||||||
|
# e is the size of the exponent part
|
||||||
|
ParseFloat←{e𝕊bytes:
|
||||||
|
n←⌽⟨8‿'c',1‿'u'⟩•bit._cast bytes
|
||||||
|
s←(≠n)-e+1
|
||||||
|
sign←1+2×-⊑n
|
||||||
|
exponent←2⊸×⊸+˜´⌽e↑1↓n
|
||||||
|
significand←2⊸×⊸+˜´⌽1∾e↓1↓n
|
||||||
|
sign×(2⋆exponent-((2⋆e-1)-1))×significand÷2⋆s
|
||||||
|
}
|
||||||
|
|
||||||
|
dtypes←⟨
|
||||||
|
"BOOL", # Boolean type
|
||||||
|
"U8", # Unsigned byte
|
||||||
|
"I8", # Signed byte
|
||||||
|
"F8_E5M2", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
|
||||||
|
"F8_E4M3", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
|
||||||
|
"I16", # Signed integer (16-bit)
|
||||||
|
"U16", # Unsigned integer (16-bit)
|
||||||
|
"F16", # Half-precision floating point
|
||||||
|
"BF16", # Brain floating point
|
||||||
|
"I32", # Signed integer (32-bit)
|
||||||
|
"U32", # Unsigned integer (32-bit)
|
||||||
|
"F32", # Floating point (32-bit)
|
||||||
|
"F64", # Floating point (64-bit)
|
||||||
|
"I64", # Signed integer (64-bit)
|
||||||
|
"U64", # Unsigned integer (64-bit)
|
||||||
|
⟩
|
||||||
|
typeConversions←⟨
|
||||||
|
⟨8‿'c', 1‿'u'⟩•bit._cast, # BOOL
|
||||||
|
8⊸ParseUint, # U8
|
||||||
|
⟨8‿'c', 8‿'i'⟩•bit._cast, # I8
|
||||||
|
5⊸ParseFloat˘∘‿1⊸⥊, # F8_E5M2
|
||||||
|
4⊸ParseFloat˘∘‿1⊸⥊, # F8_E4M4
|
||||||
|
⟨8‿'c',16‿'i'⟩•bit._cast, # I16
|
||||||
|
16⊸ParseUint, # U16
|
||||||
|
5⊸ParseFloat˘∘‿2⊸⥊, # F16
|
||||||
|
8⊸ParseFloat˘∘‿2⊸⥊, # BF16
|
||||||
|
⟨8‿'c',32‿'i'⟩•bit._cast, # I32
|
||||||
|
32⊸ParseUint, # U32
|
||||||
|
8⊸ParseFloat˘∘‿4⊸⥊, # F32
|
||||||
|
⟨8‿'c',64‿'f'⟩•bit._cast, # F64
|
||||||
|
ParseInt64, # I64
|
||||||
|
ParseUint64, # U64
|
||||||
|
⟩
|
||||||
|
|
||||||
|
GetArray←{bytes𝕊name:
|
||||||
|
header‿n←ExtractHeader bytes
|
||||||
|
byteBuf←n↓bytes
|
||||||
|
info←header JsonGet name
|
||||||
|
s‿e←info JsonGet "data_offsets"
|
||||||
|
shape←info JsonGet "shape"
|
||||||
|
dtypeIdx←⊑dtypes⊐<info JsonGet "dtype"
|
||||||
|
conv←dtypeIdx⊑typeConversions
|
||||||
|
shape⥊Conv s↓e↑byteBuf
|
||||||
|
}
|
||||||
|
|
||||||
|
SerializeArray←{
|
||||||
|
dtype←(∧´⌊⊸=⥊𝕩)⊑"F64"‿"I32"
|
||||||
|
shape←≢𝕩
|
||||||
|
data←(∧´⌊⊸=)◶⟨⟨64‿'f',8‿'c'⟩•bit._cast,⟨32‿'i',8‿'c'⟩•bit._cast⟩⥊𝕩
|
||||||
|
dtype‿shape‿data
|
||||||
|
}
|
||||||
|
|
||||||
|
SerializeArrays←{names𝕊arrs:
|
||||||
|
dtypes‿shapes‿datas←<˘⍉>SerializeArray¨arrs
|
||||||
|
dataOffsets←<˘2↕0∾+`≠¨datas
|
||||||
|
blocks←{𝕊name‿dtype‿shape‿dataOffset:
|
||||||
|
["dtype"‿"shape"‿"data_offsets",dtype‿shape‿dataOffset]
|
||||||
|
}¨<˘⍉>names‿dtypes‿shapes‿dataOffsets
|
||||||
|
header←[names,blocks]
|
||||||
|
n←≠headerJson←Export header
|
||||||
|
nEncoded←⟨32‿'i',8‿'c'⟩•bit._cast⟨n,0⟩
|
||||||
|
nEncoded∾headerJson∾∾datas
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue