Initial commit
This commit is contained in:
commit
6c1d57f9cd
2 changed files with 330 additions and 0 deletions
227
json.bqn
Normal file
227
json.bqn
Normal file
|
@ -0,0 +1,227 @@
|
|||
# Part of bqn-libs: https://github.com/mlochbaum/bqn-libs
|
||||
# 0-BSD License
|
||||
|
||||
# JSON: JavaScript Object Notation
|
||||
⟨
|
||||
Parse # JSON string to BQN
|
||||
Export # BQN value to JSON (also Parse⁼)
|
||||
⟩⇐
|
||||
|
||||
# JSON numbers, strings, and lists correspond directly to BQN
|
||||
# Objects are represented as keys≍values
|
||||
# true, false, null are represented as <"true", <"false", <"null"
|
||||
|
||||
# An empty list exports as "" if its fill is a space and [] otherwise
|
||||
|
||||
⟨Consts, ExportConst⟩ ← {
|
||||
val ← <¨ name ← "true"‿"false"‿"null"
|
||||
Consts ⇐ {
|
||||
i ← name ⊐ 𝕩
|
||||
"Unknown constant" 𝕎 i = ≠name # 𝕎 formats errors
|
||||
i ⊏ val
|
||||
}
|
||||
cm ← "Enclosed value must be JSON constant"∾∾' '⊸∾¨"or "⊸∾⌾(¯1⊸⊑)name
|
||||
ExportConst ⇐ {
|
||||
i ← val⊸⊐⌾< 𝕩
|
||||
cm ! i<≠val
|
||||
i ⊑ name
|
||||
}
|
||||
}
|
||||
⟨UnEscape, Escape⟩ ← {
|
||||
in ← """\/bfrnt"
|
||||
out ← (3↑in)∾@+8‿12‿13‿10‿9
|
||||
diff ← (out-in) ∾ 0
|
||||
Basic ← {
|
||||
i ← in ⊐ 𝕩
|
||||
"Unknown escape" ! ∧´𝕨≤i<≠in
|
||||
𝕩 + 𝕨 × i ⊏ diff
|
||||
}
|
||||
|
||||
hc ← "0Aa"
|
||||
hb ← ⥊hc+0≍˘10‿6‿6 # Hex boundaries, start and after-end
|
||||
ho ← 2/hc-0‿10‿10 # Corresponding offsets
|
||||
Hex ← { u 𝕊 𝕩:
|
||||
d ← 𝕩 /˜ m ← ≠` (4⥊0)⊸»⊸≠ »u
|
||||
t ← hb ⍋ d
|
||||
"String \u must be followed by 4 hex characters" ! ∧´1=2|t
|
||||
# Now m can't run past the end or self-intersect,
|
||||
# or it would have hit a closing quote or backslash
|
||||
v ← 16⊸×⊸+˜˝⌽ ⍉∘‿4⥊ d-t⊏ho
|
||||
w‿e ← Surrogate v
|
||||
⟨(w+@-'u')⊸+⌾(u⊸/)𝕩, e⌾(u⊸/)m⟩
|
||||
}
|
||||
sr ← 2⋆10 # Surrogate base/radix
|
||||
sb ← sr×52+2+↕3 # Surrogate character boundaries
|
||||
Surrogate ← {
|
||||
c ← (≠sb)|sb⍋𝕩 # 0 for non-surrogate, 1 then 2 for surrogate
|
||||
h ← 1=c # First half
|
||||
"Unmatched surrogate pair" ! (0∾h) ≡ (2=c)∾0
|
||||
r ← 𝕩 - c⊏0∾sb # Numeric value of surrogates
|
||||
v ← r + »h×sr×(2⋆6)+r
|
||||
⟨v, h⟩
|
||||
}
|
||||
|
||||
UnEscape ⇐ { e 𝕊 𝕩:
|
||||
u ← e ∧ 𝕩='u'
|
||||
(u<e)⊸Basic⌾⊑ u Hex⟜⊑⍟(∨´u) 𝕩‿0
|
||||
}
|
||||
|
||||
Hex32 ← { # Convert 𝕩<32 to two hex digits
|
||||
u‿l ← 16(⌊∘÷˜⋈|)𝕩 # Upper and lower digits; ∧´u<2
|
||||
'0'+u≍˘l-(10+-´"0A")×10≤l
|
||||
}
|
||||
Escape ⇐ {
|
||||
e ← (𝕩∊2↑in) ∨ 𝕩<@+32 # Quote, backslash, and control characters
|
||||
j ← /e ⋄ k ← ¬e # Their indices; characters to keep
|
||||
c ← j ⊏ 𝕩 # Characters to be escaped
|
||||
m ← c - (out⊐c)⊏diff # Escape the basic ones
|
||||
g ← 2 ∾˜ m<@+32 # Group 𝕨 to separate...
|
||||
n‿u ← g ⊔ m # Characters requiring basic, hex escapes
|
||||
i ← ∾⟨/k⟩∾2‿6/¨g⊔j # Target indices
|
||||
i ⍋⊸⊏ ∾⟨ # Use them to reorder:
|
||||
k/𝕩 # Kept characters
|
||||
⥊'\'≍˘n # Basic escapes
|
||||
⥊"\u00"⊸∾˘ Hex32 u-@ # Hex escapes
|
||||
⟩
|
||||
}
|
||||
}
|
||||
|
||||
# Format error with message 𝕨 at locations 𝕩 in 𝕗
|
||||
_fmtErr ← { msg src _𝕣 pos:
|
||||
lf ← @+10
|
||||
s ← 0∾1+ b←/src=lf # Line start and break positions
|
||||
l ← (1↓s) ⍋ pos # Error line numbers
|
||||
c ← pos - l⊏s # Error column numbers
|
||||
d ← ⊑l # First line only
|
||||
1↓∾lf⊸∾¨ ⟨
|
||||
msg
|
||||
(d⊑s)↓(d⊑b∾≠src)↑src # Display first line
|
||||
" ^" ⊏˜ /⁼ (d=l)/c # And carets under errors
|
||||
⟩∾{
|
||||
n ← ≠ a ← d⊸≠⊸/l # 1-indexed numbers of other lines
|
||||
0<n ? ⋈((-1=n)↓"Also lines")∾1↓∾⥊(<", ")≍˘•Repr¨1+a
|
||||
; ⟨⟩
|
||||
}
|
||||
}
|
||||
|
||||
# JSON to ⟨tokens, constants, numbers, strings⟩
|
||||
# Tokens are characters {}[],: and a constant, 0 number, " string
|
||||
# Values correspond to a, 0, and " in order
|
||||
Tokenize ← {
|
||||
FE ← 𝕩_fmtErr ⋄ _err ← {(! 𝕗 FE /)⍟(∨´)}
|
||||
# Strings
|
||||
e ← »eo ← <`'\'=𝕩
|
||||
s ← ≠`q ← e<'"'=𝕩
|
||||
"Unclosed quote" ! ¬¯1⊑s
|
||||
"Backslash outside string"_err s<eo
|
||||
"Un-escaped control character"_err s>𝕩≥@+32
|
||||
xe‿dr ← e UnEscape 𝕩 # Escaped 𝕩; characters to drop
|
||||
sg ← ((1-˜(s>q∨eo∨dr)×+`)∾+´)s∧q # Start at s∧q; exclude q, eo, dr
|
||||
str ← sg ⊔ xe # Strings
|
||||
|
||||
# Numbers and constants
|
||||
b ← s ∨ 𝕩∊@+9‿10‿13‿32 # Whitespace (blank)
|
||||
l ← ¬ b ∨ 𝕩∊"""{}[],:" # Word characters
|
||||
w ← »⊸< l # Word starts
|
||||
neg ← '-' = 𝕩
|
||||
dig ← ('0'≤𝕩) ∧ 𝕩≤'9'
|
||||
m ← l ∧ (+`w)⊏0∾w/dig∨neg # Numbers
|
||||
n ← m∧w ⋄ k ← m<w # Number and constant starts
|
||||
"Leading zero"_err (n≠»⊸∨n∧neg)∧('0'=𝕩)∧«dig
|
||||
"Digits required on both sides of decimal"_err (m∧'.'=𝕩)>(»∧«)dig
|
||||
CE ← {(! 𝕨 FE /⟜(/k))⍟(∨´) 𝕩}
|
||||
cns ← ce Consts (1-˜(m<l)×+`k)⊔𝕩 # Constants
|
||||
num ← •ParseFloat¨ (1-˜m×+`n)⊔𝕩 # Numbers
|
||||
|
||||
# Tokenize
|
||||
f ← ¬b∨w<l # First characters of tokens
|
||||
tok ← '0'¨⌾((f/n)⊸/) 'a'¨⌾((f/k)⊸/) f/𝕩
|
||||
⟨tok, f, cns, num, str⟩
|
||||
}
|
||||
|
||||
Parse ← {
|
||||
𝕊⁼𝕩: Export 𝕩 ;
|
||||
"Empty input" ! 0<≠𝕩
|
||||
t‿tf‿cns‿num‿strk ← Tokenize 𝕩
|
||||
"Empty input" ! 0<≠t
|
||||
|
||||
# Validate
|
||||
# _tr records persistent transformations
|
||||
# Temporary transformations are passed to _err_ as 𝔽
|
||||
trs ← ⟨tf,/⟩ ⋄ _tr ← {trs∾↩⟨𝕨⊸𝔽⟩⋄𝕨𝔽𝕩}
|
||||
FE ← 𝕩_fmtErr ⋄ _err_ ← {𝕗{! 𝕗 FE 𝕩/𝔾{𝕎𝕩}´⌽trs}𝔾⍟(∨´)}
|
||||
q ← '"'=t
|
||||
c ← ':'=t
|
||||
"Object keys must be strings"_err_⊢ (»q)<c
|
||||
g ← ⍋+`(co←t∊"[{")-cc←t∊"]}" # Bracket depth ordering indices
|
||||
u ← g ⊏_tr t
|
||||
r ← +` s ← u∊"[{"
|
||||
o ← s/'{'=u # Container is object
|
||||
uc← (u∊"]}")/'}'=u
|
||||
"Unmatched brackets" ! o=○≠uc
|
||||
"Mismatched brackets"_err_{s/𝕩} o ≠ uc
|
||||
v ← («⊸∨¬s) /_tr u # Remove empty lists/objects
|
||||
vs← v ∊ "[{,:" # Must alternate 0101...10
|
||||
"Improper , or : usage"_err_(⊢∾⊢´) 1(∾=∾˜) vs
|
||||
v /_tr˜↩ vs
|
||||
# v should be composed of lists [,,, and objects {:,:,:
|
||||
# Convert {: to { and ,: to : to get [,,, and {::
|
||||
"Multiple keys for one value"_err_⊢ »⊸∧v=':'
|
||||
v /_tr˜↩ ¬((»v='{')⊸∧ ∨ (v=',')∧«)v=':'
|
||||
# Or, every , follows list-like [, and every : follows object-like {:
|
||||
d ← v∊",:"
|
||||
"Top-level , or :"_err_{1↑𝕩} 1↑d
|
||||
"Bad object structure"_err_⊢ d > »⊸=v∊"{:"
|
||||
|
||||
# Keys
|
||||
l ← (⍋g) ⊏ r # Container index
|
||||
j ← +`⊸× o # Object index (start at 1; 0 if list)
|
||||
keys ← ((q/(«c)×l⊏0∾j)∾1+´o) ⊔ strk
|
||||
str ← ⊑keys
|
||||
|
||||
# Purely numeric lists
|
||||
l ⊏↩ ⍋⍋nm←0⌾⊑1(∾/∾˜)s(∨/⊣)u∊"]}""a"
|
||||
nn ← +´¬nm
|
||||
nl ← num⊔˜(1+´nm)∾˜0⊸<⊸×(1-nn)+(t='0')/l
|
||||
jj‿jn ← 2↑(1↓nm)⊔j
|
||||
Ob ← ⊑⟜keys⊸≍⍟(0<⊣)
|
||||
n ← jn Ob¨ 1↓nl
|
||||
|
||||
# Build collections
|
||||
nv← n -˜○≠ vals←∾⟨cns,⊑nl,str,⌽n⟩ # Initial set of values
|
||||
f ← (l<nn)∧¬(«⊸∨c)∨co∨','=t # Filter for just values a0"]}
|
||||
vi← ⍋⍋(f/'0'=t)+(2×f/q)+3×f/cc # Value indices
|
||||
i ← vi ⊏ (↕nv) ∾ nv+≠⊸-cc/»l # Adjust for collection ordering
|
||||
jj {vals∾↩⟨𝕨 Ob 𝕩⊏vals⟩⋄@}¨○⌽ ((≠jj)∾˜1-˜f/l)⊔i
|
||||
¯1⊑vals
|
||||
}
|
||||
|
||||
ExportNumber ← {
|
||||
"NaN can't be represented in JSON" ! =˜𝕩
|
||||
"Infinities can't be represented in JSON" ! ∞≠|𝕩
|
||||
r ← •Repr 𝕩
|
||||
r + (-´"-¯")×r='¯'
|
||||
}
|
||||
IsString ← {∧´2=•Type¨ 1⊸↑⍟(0=≠) 𝕩}
|
||||
ExportList ← {
|
||||
IsString 𝕩 ? ExportString 𝕩 ;
|
||||
∾ ⟨"["⟩∾⟨"]"⟩∾˜ 1↓⥊(<",")≍˘Export¨ 𝕩
|
||||
}
|
||||
ExportString ← '"' (∾∾⊣) Escape
|
||||
ExportObject ← {
|
||||
"Object must consist of keys≍values" ! 2=≠𝕩
|
||||
[k,v] ← 𝕩
|
||||
"Object keys must be strings" ! ∧´ IsString¨ k
|
||||
kv ← ⥊ (ExportString¨ k) ≍˘ Export¨ v
|
||||
∾ ⟨"{"⟩∾⟨"}"⟩∾˜ ¯1↓⥊ kv ≍˘ (≠kv) ⥊ ⟨":",","⟩
|
||||
}
|
||||
Export ← (2⌊•Type)◶⟨ # Check type
|
||||
(3⌊=)◶⟨ # Type 0, array: Check rank
|
||||
ExportConst # 0 constant (enclosed string)
|
||||
ExportList # 1 List or string
|
||||
ExportObject # 2 Object
|
||||
!∘"Rank >2 cannot be exported to JSON"
|
||||
⟩
|
||||
ExportNumber # Type 1: number
|
||||
!∘"Only numbers and arrays can be exported to JSON"
|
||||
⟩
|
103
safetensors.bqn
Normal file
103
safetensors.bqn
Normal file
|
@ -0,0 +1,103 @@
|
|||
⟨ExtractMetadata,GetArrayNames,GetArray,SerializeArrays⟩⇐
|
||||
|
||||
⟨Parse,Export⟩←•Import"json.bqn"
|
||||
|
||||
ExtractHeader←{𝕊bytes:
|
||||
n←2⊸×⊸+˜´⟨8‿'c',1‿'u'⟩•bit._cast 8↑bytes
|
||||
⟨Parse n↑8↓bytes,n+8⟩
|
||||
}
|
||||
|
||||
JsonGet←{(⊑(⊏𝕨)⊐<𝕩)⊑1⊏𝕨}
|
||||
|
||||
ExtractMetadata←{𝕊bytes:
|
||||
header‿·←ExtractHeader bytes
|
||||
header JsonGet⎊⟨⟩ "__metadata__"
|
||||
}
|
||||
|
||||
GetArrayNames←{𝕊bytes:
|
||||
header‿·←ExtractHeader bytes
|
||||
"__metadata__"⊸≢¨⊸/⊏header
|
||||
}
|
||||
|
||||
# Valid for sizes 8, 16, and 32 (passed as 𝕨)
|
||||
ParseUint←{2⊸×⊸+˜´˘⟨8‿'c',1‿'u'⟩•bit._cast˘ ∘‿(𝕨÷8)⥊𝕩}
|
||||
ParseInt←{(-2⋆𝕨-1)+(2⋆𝕨)|(2⋆𝕨-1)+𝕨ParseUint𝕩}
|
||||
|
||||
ParseUint64←{(2⋆32)⊸×⊸+˜´˘ ∘‿2⥊32 ParseUInt 𝕩}
|
||||
ParseInt64←{(2⋆32)⊸×⊸+˜´˘ ∘‿2⥊32 ParseInt 𝕩}
|
||||
|
||||
# Parse a floating point number
|
||||
# e is the size of the exponent part
|
||||
ParseFloat←{e𝕊bytes:
|
||||
n←⌽⟨8‿'c',1‿'u'⟩•bit._cast bytes
|
||||
s←(≠n)-e+1
|
||||
sign←1+2×-⊑n
|
||||
exponent←2⊸×⊸+˜´⌽e↑1↓n
|
||||
significand←2⊸×⊸+˜´⌽1∾e↓1↓n
|
||||
sign×(2⋆exponent-((2⋆e-1)-1))×significand÷2⋆s
|
||||
}
|
||||
|
||||
dtypes←⟨
|
||||
"BOOL", # Boolean type
|
||||
"U8", # Unsigned byte
|
||||
"I8", # Signed byte
|
||||
"F8_E5M2", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
|
||||
"F8_E4M3", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
|
||||
"I16", # Signed integer (16-bit)
|
||||
"U16", # Unsigned integer (16-bit)
|
||||
"F16", # Half-precision floating point
|
||||
"BF16", # Brain floating point
|
||||
"I32", # Signed integer (32-bit)
|
||||
"U32", # Unsigned integer (32-bit)
|
||||
"F32", # Floating point (32-bit)
|
||||
"F64", # Floating point (64-bit)
|
||||
"I64", # Signed integer (64-bit)
|
||||
"U64", # Unsigned integer (64-bit)
|
||||
⟩
|
||||
typeConversions←⟨
|
||||
⟨8‿'c', 1‿'u'⟩•bit._cast, # BOOL
|
||||
8⊸ParseUint, # U8
|
||||
⟨8‿'c', 8‿'i'⟩•bit._cast, # I8
|
||||
5⊸ParseFloat˘∘‿1⊸⥊, # F8_E5M2
|
||||
4⊸ParseFloat˘∘‿1⊸⥊, # F8_E4M4
|
||||
⟨8‿'c',16‿'i'⟩•bit._cast, # I16
|
||||
16⊸ParseUint, # U16
|
||||
5⊸ParseFloat˘∘‿2⊸⥊, # F16
|
||||
8⊸ParseFloat˘∘‿2⊸⥊, # BF16
|
||||
⟨8‿'c',32‿'i'⟩•bit._cast, # I32
|
||||
32⊸ParseUint, # U32
|
||||
8⊸ParseFloat˘∘‿4⊸⥊, # F32
|
||||
⟨8‿'c',64‿'f'⟩•bit._cast, # F64
|
||||
ParseInt64, # I64
|
||||
ParseUint64, # U64
|
||||
⟩
|
||||
|
||||
GetArray←{bytes𝕊name:
|
||||
header‿n←ExtractHeader bytes
|
||||
byteBuf←n↓bytes
|
||||
info←header JsonGet name
|
||||
s‿e←info JsonGet "data_offsets"
|
||||
shape←info JsonGet "shape"
|
||||
dtypeIdx←⊑dtypes⊐<info JsonGet "dtype"
|
||||
conv←dtypeIdx⊑typeConversions
|
||||
shape⥊Conv s↓e↑byteBuf
|
||||
}
|
||||
|
||||
SerializeArray←{
|
||||
dtype←(∧´⌊⊸=⥊𝕩)⊑"F64"‿"I32"
|
||||
shape←≢𝕩
|
||||
data←(∧´⌊⊸=)◶⟨⟨64‿'f',8‿'c'⟩•bit._cast,⟨32‿'i',8‿'c'⟩•bit._cast⟩⥊𝕩
|
||||
dtype‿shape‿data
|
||||
}
|
||||
|
||||
SerializeArrays←{names𝕊arrs:
|
||||
dtypes‿shapes‿datas←<˘⍉>SerializeArray¨arrs
|
||||
dataOffsets←<˘2↕0∾+`≠¨datas
|
||||
blocks←{𝕊name‿dtype‿shape‿dataOffset:
|
||||
["dtype"‿"shape"‿"data_offsets",dtype‿shape‿dataOffset]
|
||||
}¨<˘⍉>names‿dtypes‿shapes‿dataOffsets
|
||||
header←[names,blocks]
|
||||
n←≠headerJson←Export header
|
||||
nEncoded←⟨32‿'i',8‿'c'⟩•bit._cast⟨n,0⟩
|
||||
nEncoded∾headerJson∾∾datas
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue