bqn-safetensors/safetensors.bqn
2024-11-15 22:29:10 +01:00

103 lines
3.3 KiB
BQN
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

ExtractMetadata,GetArrayNames,GetArray,SerializeArrays
Parse,Export•Import"json.bqn"
ExtractHeader{𝕊bytes:
n2×+˜´8'c',1'u'•bit._cast 8bytes
Parse n8bytes,n+8
}
JsonGet{((𝕨)<𝕩)1𝕨}
ExtractMetadata{𝕊bytes:
header·ExtractHeader bytes
header JsonGet "__metadata__"
}
GetArrayNames{𝕊bytes:
header·ExtractHeader bytes
"__metadata__"¨/header
}
# Valid for sizes 8, 16, and 32 (passed as 𝕨)
ParseUint{2×+˜´˘8'c',1'u'•bit._cast˘ (𝕨÷8)𝕩}
ParseInt{(-2𝕨-1)+(2𝕨)|(2𝕨-1)+𝕨ParseUint𝕩}
ParseUint64{(232)×+˜´˘ 232 ParseUInt 𝕩}
ParseInt64{(232)×+˜´˘ 232 ParseInt 𝕩}
# Parse a floating point number
# e is the size of the exponent part
ParseFloat{e𝕊bytes:
n8'c',1'u'•bit._cast bytes
s(n)-e+1
sign1+2×-n
exponent2×+˜´e1n
significand2×+˜´1e1n
sign×(2exponent-((2e-1)-1))×significand÷2s
}
dtypes
"BOOL", # Boolean type
"U8", # Unsigned byte
"I8", # Signed byte
"F8_E5M2", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"F8_E4M3", # FP8 <https://arxiv.org/pdf/2209.05433.pdf>
"I16", # Signed integer (16-bit)
"U16", # Unsigned integer (16-bit)
"F16", # Half-precision floating point
"BF16", # Brain floating point
"I32", # Signed integer (32-bit)
"U32", # Unsigned integer (32-bit)
"F32", # Floating point (32-bit)
"F64", # Floating point (64-bit)
"I64", # Signed integer (64-bit)
"U64", # Unsigned integer (64-bit)
typeConversions
8'c', 1'u'•bit._cast, # BOOL
8ParseUint, # U8
8'c', 8'i'•bit._cast, # I8
5ParseFloat˘1, # F8_E5M2
4ParseFloat˘1, # F8_E4M4
8'c',16'i'•bit._cast, # I16
16ParseUint, # U16
5ParseFloat˘2, # F16
8ParseFloat˘2, # BF16
8'c',32'i'•bit._cast, # I32
32ParseUint, # U32
8ParseFloat˘4, # F32
8'c',64'f'•bit._cast, # F64
ParseInt64, # I64
ParseUint64, # U64
GetArray{bytes𝕊name:
headernExtractHeader bytes
byteBufnbytes
infoheader JsonGet name
seinfo JsonGet "data_offsets"
shapeinfo JsonGet "shape"
dtypeIdxdtypes<info JsonGet "dtype"
convdtypeIdxtypeConversions
shapeConv sebyteBuf
}
SerializeArray{
dtype(´=𝕩)"F64""I32"
shape𝕩
data(´=)64'f',8'c'•bit._cast,32'i',8'c'•bit._cast𝕩
dtypeshapedata
}
SerializeArrays{names𝕊arrs:
dtypesshapesdatas<˘>SerializeArray¨arrs
dataOffsets<˘20+`¨datas
blocks{𝕊namedtypeshapedataOffset:
["dtype""shape""data_offsets",dtypeshapedataOffset]
}¨<˘>namesdtypesshapesdataOffsets
header[names,blocks]
nheaderJsonExport header
nEncoded32'i',8'c'•bit._castn,0
nEncodedheaderJsondatas
}