Initial commit

This commit is contained in:
Dimitri Lozeve 2024-11-15 20:58:49 +01:00
commit 6c1d57f9cd
2 changed files with 330 additions and 0 deletions

103
safetensors.bqn Normal file
View file

@ -0,0 +1,103 @@
ExtractMetadata,GetArrayNames,GetArray,SerializeArrays
Parse,Export•Import"json.bqn"
ExtractHeader{𝕊bytes:
n2×+˜´8'c',1'u'•bit._cast 8bytes
Parse n8bytes,n+8
}
JsonGet{((𝕨)<𝕩)1𝕨}
ExtractMetadata{𝕊bytes:
header·ExtractHeader bytes
header JsonGet "__metadata__"
}
GetArrayNames{𝕊bytes:
header·ExtractHeader bytes
"__metadata__"¨/header
}
# Valid for sizes 8, 16, and 32 (passed as 𝕨)
ParseUint{2×+˜´˘8'c',1'u'•bit._cast˘ (𝕨÷8)𝕩}
ParseInt{(-2𝕨-1)+(2𝕨)|(2𝕨-1)+𝕨ParseUint𝕩}
ParseUint64{(232)×+˜´˘ 232 ParseUInt 𝕩}
ParseInt64{(232)×+˜´˘ 232 ParseInt 𝕩}
# Parse a floating point number
# e is the size of the exponent part
ParseFloat{e𝕊bytes:
n8'c',1'u'•bit._cast bytes
s(n)-e+1
sign1+2×-n
exponent2×+˜´e1n
significand2×+˜´1e1n
sign×(2exponent-((2e-1)-1))×significand÷2s
}
dtypes
"BOOL", # Boolean type
"U8", # Unsigned byte
"I8", # Signed byte
"F8_E5M2", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"F8_E4M3", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"I16", # Signed integer (16-bit)
"U16", # Unsigned integer (16-bit)
"F16", # Half-precision floating point
"BF16", # Brain floating point
"I32", # Signed integer (32-bit)
"U32", # Unsigned integer (32-bit)
"F32", # Floating point (32-bit)
"F64", # Floating point (64-bit)
"I64", # Signed integer (64-bit)
"U64", # Unsigned integer (64-bit)
typeConversions
8'c', 1'u'•bit._cast, # BOOL
8ParseUint, # U8
8'c', 8'i'•bit._cast, # I8
5ParseFloat˘1, # F8_E5M2
4ParseFloat˘1, # F8_E4M4
8'c',16'i'•bit._cast, # I16
16ParseUint, # U16
5ParseFloat˘2, # F16
8ParseFloat˘2, # BF16
8'c',32'i'•bit._cast, # I32
32ParseUint, # U32
8ParseFloat˘4, # F32
8'c',64'f'•bit._cast, # F64
ParseInt64, # I64
ParseUint64, # U64
GetArray{bytes𝕊name:
headernExtractHeader bytes
byteBufnbytes
infoheader JsonGet name
seinfo JsonGet "data_offsets"
shapeinfo JsonGet "shape"
dtypeIdxdtypes<info JsonGet "dtype"
convdtypeIdxtypeConversions
shapeConv sebyteBuf
}
SerializeArray{
dtype(´=𝕩)"F64""I32"
shape𝕩
data(´=)64'f',8'c'•bit._cast,32'i',8'c'•bit._cast𝕩
dtypeshapedata
}
SerializeArrays{names𝕊arrs:
dtypesshapesdatas<˘>SerializeArray¨arrs
dataOffsets<˘20+`¨datas
blocks{𝕊namedtypeshapedataOffset:
["dtype""shape""data_offsets",dtypeshapedataOffset]
}¨<˘>namesdtypesshapesdataOffsets
header[names,blocks]
nheaderJsonExport header
nEncoded32'i',8'c'•bit._castn,0
nEncodedheaderJsondatas
}