refactor: group scripts into walker/ wikigen/ bundle/
Organize the 16 loose scripts by concern:
walker/ -- .wbt save tooling (sand, build_wbt, walker_hashes,
harvest_hashes, recover_key)
wikigen/ -- MediaWiki page generators (make_*_wiki, render_wiki)
bundle/ -- Unity/Odin asset extraction (unitybundle, odin_read,
extract_*, loot_probe, dump_loot_bytes)
The only cross-script imports (build_wbt->walker_hashes,
extract_loot->odin_read) live within the same folder, so each
script's dir on sys.path[0] keeps them resolving with no code
changes. All data paths are absolute, so the moves don't affect
I/O. Named the code dir wikigen/ to avoid colliding with the
generated wiki/ output dir; ignore the regenerable wiki_site/ render.
This commit is contained in:
223
bundle/odin_read.py
Normal file
223
bundle/odin_read.py
Normal file
@@ -0,0 +1,223 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Minimal reader for Sirenix Odin 'Binary' DataFormat (SerializedFormat=0).
|
||||
|
||||
Implements the BinaryDataReader entry-stream well enough to reconstruct the object
|
||||
tree (named fields, nodes, arrays, primitives, strings, references). Produces a
|
||||
generic Python structure; the caller maps it to loot-table semantics.
|
||||
|
||||
Reference: Sirenix.Serialization.BinaryDataReader / BinaryEntryType enum.
|
||||
String wire format: [flag:1 byte (0=8bit,1=16bit)] [charCount:int32] [chars].
|
||||
"""
|
||||
import struct
|
||||
|
||||
# BinaryEntryType
|
||||
INVALID=0
|
||||
NAMED_START_REF=1; UNNAMED_START_REF=2
|
||||
NAMED_START_STRUCT=3; UNNAMED_START_STRUCT=4
|
||||
END_OF_NODE=5
|
||||
START_OF_ARRAY=6; END_OF_ARRAY=7
|
||||
PRIMITIVE_ARRAY=8
|
||||
NAMED_INTERNAL_REF=9; UNNAMED_INTERNAL_REF=10
|
||||
NAMED_EXT_REF_INDEX=11; UNNAMED_EXT_REF_INDEX=12
|
||||
NAMED_EXT_REF_GUID=13; UNNAMED_EXT_REF_GUID=14
|
||||
NAMED_SBYTE=15; UNNAMED_SBYTE=16
|
||||
NAMED_BYTE=17; UNNAMED_BYTE=18
|
||||
NAMED_SHORT=19; UNNAMED_SHORT=20
|
||||
NAMED_USHORT=21; UNNAMED_USHORT=22
|
||||
NAMED_INT=23; UNNAMED_INT=24
|
||||
NAMED_UINT=25; UNNAMED_UINT=26
|
||||
NAMED_LONG=27; UNNAMED_LONG=28
|
||||
NAMED_ULONG=29; UNNAMED_ULONG=30
|
||||
NAMED_FLOAT=31; UNNAMED_FLOAT=32
|
||||
NAMED_DOUBLE=33; UNNAMED_DOUBLE=34
|
||||
NAMED_DECIMAL=35; UNNAMED_DECIMAL=36
|
||||
NAMED_CHAR=37; UNNAMED_CHAR=38
|
||||
NAMED_STRING=39; UNNAMED_STRING=40
|
||||
NAMED_GUID=41; UNNAMED_GUID=42
|
||||
NAMED_BOOL=43; UNNAMED_BOOL=44
|
||||
NAMED_NULL=45; UNNAMED_NULL=46
|
||||
TYPE_NAME=47; TYPE_ID=48
|
||||
END_OF_STREAM=49
|
||||
NAMED_EXT_REF_STRING=50; UNNAMED_EXT_REF_STRING=51
|
||||
|
||||
NAMED = {NAMED_START_REF,NAMED_START_STRUCT,NAMED_INTERNAL_REF,NAMED_EXT_REF_INDEX,
|
||||
NAMED_EXT_REF_GUID,NAMED_SBYTE,NAMED_BYTE,NAMED_SHORT,NAMED_USHORT,NAMED_INT,
|
||||
NAMED_UINT,NAMED_LONG,NAMED_ULONG,NAMED_FLOAT,NAMED_DOUBLE,NAMED_DECIMAL,
|
||||
NAMED_CHAR,NAMED_STRING,NAMED_GUID,NAMED_BOOL,NAMED_NULL,NAMED_EXT_REF_STRING}
|
||||
|
||||
class Node:
|
||||
"""A reconstructed reference/struct node: type name + fields + unnamed items."""
|
||||
__slots__=("type","id","fields","items")
|
||||
def __init__(self,type,id):
|
||||
self.type=type; self.id=id; self.fields={}; self.items=[]
|
||||
def to_py(self):
|
||||
d={}
|
||||
if self.type: d["$type"]=_short(self.type)
|
||||
for k,v in self.fields.items():
|
||||
d[k]=_topy(v)
|
||||
if self.items:
|
||||
d["$items"]=[_topy(x) for x in self.items]
|
||||
return d
|
||||
|
||||
def _short(t):
|
||||
# trim 'Namespace.Class, Assembly' -> Class (keep generics short-ish)
|
||||
base=t.split(',')[0]
|
||||
return base
|
||||
|
||||
def _topy(v):
|
||||
if isinstance(v,Node): return v.to_py()
|
||||
if isinstance(v,list): return [_topy(x) for x in v]
|
||||
return v
|
||||
|
||||
class Ref:
|
||||
def __init__(self,kind,val): self.kind=kind; self.val=val
|
||||
def to_py(self): return {"$ref":self.val,"kind":self.kind}
|
||||
|
||||
class Reader:
|
||||
def __init__(self,data):
|
||||
self.d=data; self.p=0; self.n=len(data); self.types={}
|
||||
def eof(self): return self.p>=self.n
|
||||
def u8(self):
|
||||
v=self.d[self.p]; self.p+=1; return v
|
||||
def i32(self):
|
||||
v=struct.unpack_from('<i',self.d,self.p)[0]; self.p+=4; return v
|
||||
def u32(self):
|
||||
v=struct.unpack_from('<I',self.d,self.p)[0]; self.p+=4; return v
|
||||
def i64(self):
|
||||
v=struct.unpack_from('<q',self.d,self.p)[0]; self.p+=8; return v
|
||||
def u64(self):
|
||||
v=struct.unpack_from('<Q',self.d,self.p)[0]; self.p+=8; return v
|
||||
def f32(self):
|
||||
v=struct.unpack_from('<f',self.d,self.p)[0]; self.p+=4; return v
|
||||
def f64(self):
|
||||
v=struct.unpack_from('<d',self.d,self.p)[0]; self.p+=8; return v
|
||||
def i16(self):
|
||||
v=struct.unpack_from('<h',self.d,self.p)[0]; self.p+=2; return v
|
||||
def u16(self):
|
||||
v=struct.unpack_from('<H',self.d,self.p)[0]; self.p+=2; return v
|
||||
def string(self):
|
||||
flag=self.u8(); ln=self.i32()
|
||||
if flag==0:
|
||||
b=self.d[self.p:self.p+ln]; self.p+=ln
|
||||
return b.decode('latin1')
|
||||
else:
|
||||
b=self.d[self.p:self.p+ln*2]; self.p+=ln*2
|
||||
return b.decode('utf-16-le')
|
||||
def peek(self):
|
||||
return self.d[self.p] if self.p<self.n else END_OF_STREAM
|
||||
|
||||
def read_type_entry(self):
|
||||
b=self.peek()
|
||||
if b==TYPE_NAME:
|
||||
self.p+=1; tid=self.i32(); s=self.string(); self.types[tid]=s; return s
|
||||
if b==TYPE_ID:
|
||||
self.p+=1; tid=self.i32(); return self.types.get(tid)
|
||||
return None
|
||||
|
||||
def read_node(self,is_ref):
|
||||
t=self.read_type_entry()
|
||||
nid=self.i32() if is_ref else None
|
||||
node=Node(t,nid)
|
||||
while True:
|
||||
if self.eof(): break
|
||||
b=self.u8()
|
||||
if b==END_OF_NODE: break
|
||||
name=None
|
||||
if b in NAMED: name=self.string()
|
||||
val=self.read_value(b)
|
||||
if name is not None: node.fields[name]=val
|
||||
else: node.items.append(val)
|
||||
return node
|
||||
|
||||
def read_array(self):
|
||||
length=self.i64()
|
||||
items=[]
|
||||
while True:
|
||||
if self.eof(): break
|
||||
b=self.peek()
|
||||
if b==END_OF_ARRAY:
|
||||
self.p+=1; break
|
||||
self.p+=1
|
||||
name=None
|
||||
if b in NAMED: name=self.string()
|
||||
items.append(self.read_value(b))
|
||||
if len(items)>length+8: break # safety
|
||||
return items
|
||||
|
||||
def read_value(self,b):
|
||||
# b is the entry byte already consumed (and name already read if NAMED)
|
||||
if b in (NAMED_START_REF,UNNAMED_START_REF):
|
||||
return self.read_node(True)
|
||||
if b in (NAMED_START_STRUCT,UNNAMED_START_STRUCT):
|
||||
return self.read_node(False)
|
||||
if b==START_OF_ARRAY:
|
||||
return self.read_array()
|
||||
if b==PRIMITIVE_ARRAY:
|
||||
cnt=self.i32(); bpe=self.i32()
|
||||
raw=self.d[self.p:self.p+cnt*bpe]; self.p+=cnt*bpe
|
||||
return {"$primarray":cnt,"bytesPer":bpe}
|
||||
if b in (NAMED_INTERNAL_REF,UNNAMED_INTERNAL_REF):
|
||||
return Ref("internal",self.i32())
|
||||
if b in (NAMED_EXT_REF_INDEX,UNNAMED_EXT_REF_INDEX):
|
||||
return Ref("ext_index",self.i32())
|
||||
if b in (NAMED_EXT_REF_GUID,UNNAMED_EXT_REF_GUID):
|
||||
g=self.d[self.p:self.p+16]; self.p+=16; return Ref("ext_guid",g.hex())
|
||||
if b in (NAMED_EXT_REF_STRING,UNNAMED_EXT_REF_STRING):
|
||||
return Ref("ext_string",self.string())
|
||||
if b in (NAMED_SBYTE,UNNAMED_SBYTE):
|
||||
v=self.u8(); return v-256 if v>127 else v
|
||||
if b in (NAMED_BYTE,UNNAMED_BYTE): return self.u8()
|
||||
if b in (NAMED_SHORT,UNNAMED_SHORT): return self.i16()
|
||||
if b in (NAMED_USHORT,UNNAMED_USHORT): return self.u16()
|
||||
if b in (NAMED_INT,UNNAMED_INT): return self.i32()
|
||||
if b in (NAMED_UINT,UNNAMED_UINT): return self.u32()
|
||||
if b in (NAMED_LONG,UNNAMED_LONG): return self.i64()
|
||||
if b in (NAMED_ULONG,UNNAMED_ULONG): return self.u64()
|
||||
if b in (NAMED_FLOAT,UNNAMED_FLOAT): return self.f32()
|
||||
if b in (NAMED_DOUBLE,UNNAMED_DOUBLE): return self.f64()
|
||||
if b in (NAMED_DECIMAL,UNNAMED_DECIMAL):
|
||||
raw=self.d[self.p:self.p+16]; self.p+=16; return {"$decimal":raw.hex()}
|
||||
if b in (NAMED_CHAR,UNNAMED_CHAR):
|
||||
v=self.d[self.p:self.p+2]; self.p+=2; return v.decode('utf-16-le')
|
||||
if b in (NAMED_STRING,UNNAMED_STRING): return self.string()
|
||||
if b in (NAMED_GUID,UNNAMED_GUID):
|
||||
g=self.d[self.p:self.p+16]; self.p+=16; return g.hex()
|
||||
if b in (NAMED_BOOL,UNNAMED_BOOL): return self.u8()!=0
|
||||
if b in (NAMED_NULL,UNNAMED_NULL): return None
|
||||
if b==END_OF_STREAM: return None
|
||||
raise ValueError(f"unknown entry byte {b} at pos {self.p-1}")
|
||||
|
||||
def read_top(self):
|
||||
"""Read entries at the document root until end-of-stream."""
|
||||
roots={}
|
||||
items=[]
|
||||
while not self.eof():
|
||||
b=self.u8()
|
||||
if b in (END_OF_STREAM,INVALID): break
|
||||
if b==END_OF_NODE or b==END_OF_ARRAY: continue
|
||||
name=None
|
||||
if b in NAMED: name=self.string()
|
||||
val=self.read_value(b)
|
||||
if name is not None: roots[name]=val
|
||||
else: items.append(val)
|
||||
return roots, items
|
||||
|
||||
def parse(data):
|
||||
r=Reader(data)
|
||||
roots,items=r.read_top()
|
||||
return {"roots":{k:_topy(v) for k,v in roots.items()},
|
||||
"items":[_topy(x) for x in items],
|
||||
"consumed":r.p,"total":r.n,"types":r.types}
|
||||
|
||||
if __name__=="__main__":
|
||||
import sys,json
|
||||
data=open(sys.argv[1],'rb').read()
|
||||
res=parse(data)
|
||||
print(f"consumed {res['consumed']}/{res['total']} bytes")
|
||||
print(f"{len(res['types'])} types registered")
|
||||
out=sys.argv[2] if len(sys.argv)>2 else None
|
||||
if out:
|
||||
json.dump(res,open(out,'w'),indent=1,ensure_ascii=False)
|
||||
print("wrote",out)
|
||||
else:
|
||||
print(json.dumps(res,indent=1,ensure_ascii=False)[:4000])
|
||||
Reference in New Issue
Block a user