refactor: group scripts into walker/ wikigen/ bundle/

Organize the 16 loose scripts by concern:
  walker/  -- .wbt save tooling (sand, build_wbt, walker_hashes,
              harvest_hashes, recover_key)
  wikigen/ -- MediaWiki page generators (make_*_wiki, render_wiki)
  bundle/  -- Unity/Odin asset extraction (unitybundle, odin_read,
              extract_*, loot_probe, dump_loot_bytes)

The only cross-script imports (build_wbt->walker_hashes,
extract_loot->odin_read) live within the same folder, so each
script's dir on sys.path[0] keeps them resolving with no code
changes. All data paths are absolute, so the moves don't affect
I/O. Named the code dir wikigen/ to avoid colliding with the
generated wiki/ output dir; ignore the regenerable wiki_site/ render.
This commit is contained in:
DownloadPizza
2026-06-11 14:49:33 +02:00
parent 2e886f31f0
commit a44e4db1c3
17 changed files with 3 additions and 0 deletions

47
bundle/dump_loot_bytes.py Normal file
View File

@@ -0,0 +1,47 @@
#!/usr/bin/env python3
"""Dump the raw Odin SerializedBytes for the two LootTablesConfig assets, and do a
first-pass analysis: hexdump head, and list ASCII strings (>=3 chars) found."""
import os, sys, json, re, UnityPy
from UnityPy.helpers.TypeTreeGenerator import TypeTreeGenerator
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
BD = os.path.join(GAME, "Sand_Data/StreamingAssets/aa/StandaloneWindows64")
META = os.path.join(GAME, "Sand_Data/il2cpp_data/Metadata/global-metadata.dat")
DLL = os.path.join(GAME, "GameAssembly.dll")
OUT = "/home/downloadpizza/sand_tools/extracted"
gen = TypeTreeGenerator("6000.0.40f1")
gen.load_il2cpp(open(DLL, "rb").read(), open(META, "rb").read())
env = UnityPy.load(os.path.join(BD, "configuration_assets_all.bundle"),
os.path.join(BD, "sand_monoscripts.bundle"))
for o in env.objects:
if o.type.name != "MonoBehaviour":
continue
try:
d = o.read(); nm = getattr(d, "m_Name", "") or ""
except Exception:
continue
if "LootTables" not in nm:
continue
script = d.m_Script.read()
full = (script.m_Namespace + "." if script.m_Namespace else "") + script.m_ClassName
nodes = json.loads(gen.get_nodes_as_json(script.m_AssemblyName, full))
tree = o.read_typetree(nodes)
sb = tree["serializationData"]["SerializedBytes"]
data = bytes(sb)
out_bin = os.path.join(OUT, f"{nm}.odin.bin")
open(out_bin, "wb").write(data)
print(f"\n=== {nm}: {len(data)} bytes -> {out_bin}")
print("head hex:", data[:64].hex())
# ascii strings
strings = re.findall(rb"[\x20-\x7e]{3,}", data)
uniq = []
seen = set()
for s in strings:
t = s.decode()
if t not in seen:
seen.add(t); uniq.append(t)
print(f"{len(strings)} ascii runs, {len(uniq)} unique. First 60 unique:")
for t in uniq[:60]:
print(" ", repr(t))

89
bundle/extract_data.py Normal file
View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Extract SAND MonoBehaviour data (loot tables, crafting graph, …) using IL2CPP typetrees.
Builds a UnityPy TypeTreeGenerator from GameAssembly.dll + global-metadata.dat (Unity
6000.0.40f1), loads the relevant bundles into one environment so PPtr references resolve,
reads each MonoBehaviour against its generated typetree, and rewrites PPtrs as the target's
m_Name where known. Output: structured JSON in extracted/.
"""
import os, sys, json, UnityPy
from UnityPy.helpers.TypeTreeGenerator import TypeTreeGenerator
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
BD = os.path.join(GAME, "Sand_Data/StreamingAssets/aa/StandaloneWindows64")
META = os.path.join(GAME, "Sand_Data/il2cpp_data/Metadata/global-metadata.dat")
DLL = os.path.join(GAME, "GameAssembly.dll")
OUT = "/home/downloadpizza/sand_tools/extracted"
UNITY = "6000.0.40f1"
def build_generator():
g = TypeTreeGenerator(UNITY)
g.load_il2cpp(open(DLL, "rb").read(), open(META, "rb").read())
return g
_node_cache = {}
def nodes_for(gen, script):
key = (script.m_AssemblyName, script.m_Namespace, script.m_ClassName)
if key not in _node_cache:
full = (script.m_Namespace + "." if script.m_Namespace else "") + script.m_ClassName
try:
_node_cache[key] = gen.get_nodes(script.m_AssemblyName, full)
except Exception as e:
_node_cache[key] = None
return _node_cache[key]
def load_env(*bundles):
paths = [os.path.join(BD, b) for b in bundles]
return UnityPy.load(*paths)
def build_name_index(env):
"""path_id -> m_Name for every object we can cheaply name (GameObjects, named MBs)."""
idx = {}
for o in env.objects:
try:
if o.type.name in ("GameObject",):
idx[o.path_id] = o.read().m_Name
except Exception:
pass
return idx
def read_mb(gen, obj, name_idx=None):
"""Read a MonoBehaviour into a plain dict; resolve PPtr dicts to {'->': name|pathid}."""
d = obj.read()
try:
script = d.m_Script.read()
except Exception:
return None
nodes = nodes_for(gen, script)
if not nodes:
return None
tree = obj.read_typetree(nodes)
return _clean(tree, name_idx, obj)
def _clean(v, name_idx, obj):
if isinstance(v, dict):
# PPtr shape
if set(v.keys()) >= {"m_FileID", "m_PathID"} and len(v) <= 3:
pid = v["m_PathID"]
if pid == 0:
return None
nm = name_idx.get(pid) if name_idx else None
return {"ref": nm or pid}
return {k: _clean(x, name_idx, obj) for k, x in v.items()}
if isinstance(v, list):
return [_clean(x, name_idx, obj) for x in v]
return v
if __name__ == "__main__":
print("building generator…", flush=True)
gen = build_generator()
print("generator ready", flush=True)
# quick validation on one loot table
env = load_env("lootsets_assets_all.bundle", "sand_monoscripts.bundle",
"epb_assets_all.bundle", "configuration_assets_all.bundle")
name_idx = build_name_index(env)
print("name index:", len(name_idx), "entries", flush=True)
o = next(x for x in env.objects if x.type.name == "MonoBehaviour"
and x.read().m_Name == "POIShipMediumWeapons")
import pprint
pprint.pprint(read_mb(gen, o, name_idx))

101
bundle/extract_i2.py Normal file
View File

@@ -0,0 +1,101 @@
#!/usr/bin/env python3
"""Extract the I2 Localization term table (English) from SAND's I2Languages asset.
The typetree read trips on an I2 alignment quirk, but each TermData is
self-describing, so we parse the MonoBehaviour body directly. Layout (Unity
serialization, little-endian, 4-byte aligned strings/arrays), from dump.cs:
engine header: m_GameObject(12) m_Enabled(1,align4) m_Script(12) m_Name(string)
LanguageSourceData mSource:
UInt8 x3 flags (align4)
TermData[] mTerms: int count, then per term:
string Term
int32 TermType
string Description
string[] Languages (the translations, one per language)
byte[] Flags
string[] Languages_Touch
UInt8 CaseInsensitiveTerms (align4)
int32 OnMissingTranslation
string mTerm_AppName
LanguageData[] mLanguages: int count, then per language:
string Name; string Code; byte Flags; byte Compressed (align4)
"""
import os, struct, json
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
DU = os.path.join(GAME, "Sand_Data/data.unity3d")
OUT = "/home/downloadpizza/sand_tools/i2_terms_en.json"
class R:
def __init__(self, b, off=0): self.b=b; self.o=off
def align(self):
self.o = (self.o + 3) & ~3
def u8(self):
v=self.b[self.o]; self.o+=1; return v
def i32(self):
v=struct.unpack_from('<i',self.b,self.o)[0]; self.o+=4; return v
def i64(self):
v=struct.unpack_from('<q',self.b,self.o)[0]; self.o+=8; return v
def s(self):
n=self.i32()
if n<0 or n>10_000_000: raise ValueError(f"bad str len {n} @ {self.o}")
v=self.b[self.o:self.o+n].decode('utf-8','replace'); self.o+=n; self.align(); return v
def bytes(self):
n=self.i32(); v=self.b[self.o:self.o+n]; self.o+=n; self.align(); return v
def str_array(self):
n=self.i32(); return [self.s() for _ in range(n)]
def parse(raw):
r=R(raw)
# engine header
r.i32(); r.i64() # m_GameObject
r.u8(); r.align() # m_Enabled
r.i32(); r.i64() # m_Script
name=r.s() # m_Name
# mSource — each bool is individually 4-byte aligned
r.u8(); r.align(); r.u8(); r.align(); r.u8(); r.align()
nterms=r.i32()
terms=[]
for _ in range(nterms):
term=r.s()
desc=r.s() # TermType is not serialized; Description (usually empty)
langs=r.str_array() # the translations, one per language
flags=r.bytes() # one flag byte per language
touch=r.str_array() # Languages_Touch
terms.append((term, langs))
languages=[]
try: # trailing block is best-effort (English is index 0 regardless)
r.u8(); r.align() # CaseInsensitiveTerms
r.i32() # OnMissingTranslation
r.s() # mTerm_AppName
nlang=r.i32()
for _ in range(nlang):
lname=r.s(); lcode=r.s(); r.u8(); r.align(); r.u8(); r.align()
languages.append((lname,lcode))
except Exception:
pass
return name, languages, terms
def main():
import UnityPy
env=UnityPy.load(DU)
obj=next(o for o in env.objects if o.type.name=='MonoBehaviour'
and len(o.get_raw_data())==3816792)
name, languages, terms = parse(obj.get_raw_data())
print("asset:",name,"| languages:",languages)
eng=next((i for i,(n,c) in enumerate(languages)
if c.lower().startswith('en') or n.lower().startswith('english')),0)
print("english index:",eng,"| terms:",len(terms))
table={t:(tr[eng] if eng<len(tr) else (tr[0] if tr else '')) for t,tr in terms}
json.dump({'_source':'I2 LanguageSourceAsset (I2Languages), data.unity3d',
'languages':languages,'english_index':eng,'count':len(table),
'terms':table}, open(OUT,'w'), indent=2, ensure_ascii=False)
from collections import Counter
ns=Counter(k.split('/')[0] for k in table)
print("\ntop term-key prefixes:")
for k,c in ns.most_common(30): print(f" {c:4d} {k}")
print("\nwrote",OUT)
if __name__=='__main__':
main()

73
bundle/extract_loot.py Normal file
View File

@@ -0,0 +1,73 @@
#!/usr/bin/env python3
"""Extract SAND loot/drop tables from the two Odin-binary LootTablesConfig assets.
Decodes conf_worldLootTablesStormConfig + conf_worldLootTablesVoyageConfig (Odin
SerializedFormat=0 Binary) via odin_read, flattens to a clean dict:
{ region: { lootTableId: [ {itemBlueprint, countMin, countMax, ...}, ... ] } }
Writes extracted/loot_tables.json (+ reports any unexpected fields / coverage).
"""
import os, sys, json
import odin_read
EX = "/home/downloadpizza/sand_tools/extracted"
REGIONS = {
"Storm": "conf_worldLootTablesStormConfig.odin.bin",
"Voyage": "conf_worldLootTablesVoyageConfig.odin.bin",
}
def odin_list(node):
"""Unwrap an Odin List<T> node -> python list of elements."""
if node is None: return []
if isinstance(node, list): return node
items = node.get("$items", [])
# List nodes serialize as one inner array: $items == [[...elements...]]
out = []
for chunk in items:
if isinstance(chunk, list): out.extend(chunk)
else: out.append(chunk)
return out
def main():
result = {}
extra_fields = set()
item_ids = set()
for region, fn in REGIONS.items():
path = os.path.join(EX, fn)
data = open(path, "rb").read()
parsed = odin_read.parse(data)
assert parsed["consumed"] == parsed["total"], f"{region}: incomplete parse"
tables = odin_list(parsed["roots"]["_lootTables"])
region_out = {}
for t in tables:
tid = t.get("lootTableId")
items = odin_list(t.get("items"))
rows = []
for it in items:
row = {k: v for k, v in it.items() if k != "$type"}
for k in row:
if k not in ("itemBlueprint", "countMin", "countMax"):
extra_fields.add(k)
if "itemBlueprint" in row:
item_ids.add(row["itemBlueprint"])
rows.append(row)
region_out[tid] = rows
result[region] = region_out
print(f"{region}: {len(region_out)} loot tables, "
f"{sum(len(v) for v in region_out.values())} drop rows")
out = os.path.join(EX, "loot_tables.json")
json.dump(result, open(out, "w"), indent=1, ensure_ascii=False)
print(f"\nwrote {out}")
print(f"unique item blueprints referenced: {len(item_ids)}")
if extra_fields:
print(f"NOTE extra (non count) fields present: {sorted(extra_fields)}")
# how many drop-table item ids are NOT in our authoritative item registry?
reg_path = os.path.join(EX, "items_registry.json")
if os.path.exists(reg_path):
reg = set(json.load(open(reg_path))["items"].keys())
unknown = sorted(i for i in item_ids if i not in reg)
print(f"item blueprints not in items_registry: {len(unknown)}")
for u in unknown[:40]:
print(" ", u)
if __name__ == "__main__":
main()

89
bundle/loot_probe.py Normal file
View File

@@ -0,0 +1,89 @@
#!/usr/bin/env python3
"""Read the configuration bundle's MonoBehaviours via IL2CPP typetree and find the
loot tables. Determine definitively: are they plain (readable) or Odin-binary blobs?"""
import os, sys, json, UnityPy
from UnityPy.helpers.TypeTreeGenerator import TypeTreeGenerator
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
BD = os.path.join(GAME, "Sand_Data/StreamingAssets/aa/StandaloneWindows64")
META = os.path.join(GAME, "Sand_Data/il2cpp_data/Metadata/global-metadata.dat")
DLL = os.path.join(GAME, "GameAssembly.dll")
OUT = "/home/downloadpizza/sand_tools/extracted"
print("building generator...", flush=True)
gen = TypeTreeGenerator("6000.0.40f1")
gen.load_il2cpp(open(DLL, "rb").read(), open(META, "rb").read())
print("ready", flush=True)
env = UnityPy.load(os.path.join(BD, "configuration_assets_all.bundle"),
os.path.join(BD, "sand_monoscripts.bundle"))
_cache = {}
def nodes_for(script):
full = (script.m_Namespace + "." if script.m_Namespace else "") + script.m_ClassName
key = (script.m_AssemblyName, full)
if key not in _cache:
try:
_cache[key] = json.loads(gen.get_nodes_as_json(script.m_AssemblyName, full))
except Exception as e:
_cache[key] = None
return _cache[key]
# list all MBs with class + whether they look loot/odin
mbs = [o for o in env.objects if o.type.name == "MonoBehaviour"]
print(f"{len(mbs)} MonoBehaviours in configuration bundle\n", flush=True)
loot_objs = []
for o in mbs:
try:
d = o.read(); nm = getattr(d, "m_Name", "") or ""
script = d.m_Script.read(); cls = script.m_ClassName
except Exception as e:
nm, cls, script = "?", f"<{e}>", None
raw = o.get_raw_data()
has_loot = b"LootTable" in raw or b"Storm" in raw or b"Voyage" in raw
mark = " <== LOOT" if has_loot else ""
print(f" {nm:45s} {cls:35s} {len(raw):8d}B{mark}", flush=True)
if has_loot or "Loot" in cls:
loot_objs.append((o, nm, cls, script))
print(f"\n--- inspecting {len(loot_objs)} loot-ish objects ---", flush=True)
for o, nm, cls, script in loot_objs:
print(f"\n### {nm} ({cls})", flush=True)
if script is None:
print(" no script"); continue
nodes = nodes_for(script)
if not nodes:
print(" no typetree nodes"); continue
try:
tree = o.read_typetree(nodes)
except Exception as e:
print(f" typetree read FAILED: {e}"); continue
keys = list(tree.keys()) if isinstance(tree, dict) else type(tree)
print(f" top-level keys: {keys}", flush=True)
# Odin signature: only m_GameObject/m_Enabled/m_Script/m_Name + serializationData
sd = tree.get("serializationData") if isinstance(tree, dict) else None
if sd:
print(f" ODIN serializationData fields: {list(sd.keys())}", flush=True)
for f in ("SerializedBytes", "SerializedBytesString", "SerializationNodes"):
v = sd.get(f)
if v:
print(f" {f}: len={len(v)}", flush=True)
# dump a trimmed view
def trim(v, depth=0):
if depth > 3: return "..."
if isinstance(v, dict):
return {k: trim(x, depth+1) for k, x in list(v.items())[:8]}
if isinstance(v, list):
return [trim(x, depth+1) for x in v[:3]] + (["...+%d"%(len(v)-3)] if len(v) > 3 else [])
if isinstance(v, (bytes, bytearray)):
return f"<{len(v)} bytes>"
return v
import pprint
pprint.pprint(trim(tree), width=120)
# save full
safe = "".join(c if c.isalnum() else "_" for c in nm)[:40]
try:
json.dump(tree, open(os.path.join(OUT, f"_loot_{safe}.json"), "w"),
default=lambda x: f"<bytes {len(x)}>" if isinstance(x,(bytes,bytearray)) else str(x))
except Exception as e:
print(f" (save skipped: {e})")

223
bundle/odin_read.py Normal file
View File

@@ -0,0 +1,223 @@
#!/usr/bin/env python3
"""Minimal reader for Sirenix Odin 'Binary' DataFormat (SerializedFormat=0).
Implements the BinaryDataReader entry-stream well enough to reconstruct the object
tree (named fields, nodes, arrays, primitives, strings, references). Produces a
generic Python structure; the caller maps it to loot-table semantics.
Reference: Sirenix.Serialization.BinaryDataReader / BinaryEntryType enum.
String wire format: [flag:1 byte (0=8bit,1=16bit)] [charCount:int32] [chars].
"""
import struct
# BinaryEntryType
INVALID=0
NAMED_START_REF=1; UNNAMED_START_REF=2
NAMED_START_STRUCT=3; UNNAMED_START_STRUCT=4
END_OF_NODE=5
START_OF_ARRAY=6; END_OF_ARRAY=7
PRIMITIVE_ARRAY=8
NAMED_INTERNAL_REF=9; UNNAMED_INTERNAL_REF=10
NAMED_EXT_REF_INDEX=11; UNNAMED_EXT_REF_INDEX=12
NAMED_EXT_REF_GUID=13; UNNAMED_EXT_REF_GUID=14
NAMED_SBYTE=15; UNNAMED_SBYTE=16
NAMED_BYTE=17; UNNAMED_BYTE=18
NAMED_SHORT=19; UNNAMED_SHORT=20
NAMED_USHORT=21; UNNAMED_USHORT=22
NAMED_INT=23; UNNAMED_INT=24
NAMED_UINT=25; UNNAMED_UINT=26
NAMED_LONG=27; UNNAMED_LONG=28
NAMED_ULONG=29; UNNAMED_ULONG=30
NAMED_FLOAT=31; UNNAMED_FLOAT=32
NAMED_DOUBLE=33; UNNAMED_DOUBLE=34
NAMED_DECIMAL=35; UNNAMED_DECIMAL=36
NAMED_CHAR=37; UNNAMED_CHAR=38
NAMED_STRING=39; UNNAMED_STRING=40
NAMED_GUID=41; UNNAMED_GUID=42
NAMED_BOOL=43; UNNAMED_BOOL=44
NAMED_NULL=45; UNNAMED_NULL=46
TYPE_NAME=47; TYPE_ID=48
END_OF_STREAM=49
NAMED_EXT_REF_STRING=50; UNNAMED_EXT_REF_STRING=51
NAMED = {NAMED_START_REF,NAMED_START_STRUCT,NAMED_INTERNAL_REF,NAMED_EXT_REF_INDEX,
NAMED_EXT_REF_GUID,NAMED_SBYTE,NAMED_BYTE,NAMED_SHORT,NAMED_USHORT,NAMED_INT,
NAMED_UINT,NAMED_LONG,NAMED_ULONG,NAMED_FLOAT,NAMED_DOUBLE,NAMED_DECIMAL,
NAMED_CHAR,NAMED_STRING,NAMED_GUID,NAMED_BOOL,NAMED_NULL,NAMED_EXT_REF_STRING}
class Node:
"""A reconstructed reference/struct node: type name + fields + unnamed items."""
__slots__=("type","id","fields","items")
def __init__(self,type,id):
self.type=type; self.id=id; self.fields={}; self.items=[]
def to_py(self):
d={}
if self.type: d["$type"]=_short(self.type)
for k,v in self.fields.items():
d[k]=_topy(v)
if self.items:
d["$items"]=[_topy(x) for x in self.items]
return d
def _short(t):
# trim 'Namespace.Class, Assembly' -> Class (keep generics short-ish)
base=t.split(',')[0]
return base
def _topy(v):
if isinstance(v,Node): return v.to_py()
if isinstance(v,list): return [_topy(x) for x in v]
return v
class Ref:
def __init__(self,kind,val): self.kind=kind; self.val=val
def to_py(self): return {"$ref":self.val,"kind":self.kind}
class Reader:
def __init__(self,data):
self.d=data; self.p=0; self.n=len(data); self.types={}
def eof(self): return self.p>=self.n
def u8(self):
v=self.d[self.p]; self.p+=1; return v
def i32(self):
v=struct.unpack_from('<i',self.d,self.p)[0]; self.p+=4; return v
def u32(self):
v=struct.unpack_from('<I',self.d,self.p)[0]; self.p+=4; return v
def i64(self):
v=struct.unpack_from('<q',self.d,self.p)[0]; self.p+=8; return v
def u64(self):
v=struct.unpack_from('<Q',self.d,self.p)[0]; self.p+=8; return v
def f32(self):
v=struct.unpack_from('<f',self.d,self.p)[0]; self.p+=4; return v
def f64(self):
v=struct.unpack_from('<d',self.d,self.p)[0]; self.p+=8; return v
def i16(self):
v=struct.unpack_from('<h',self.d,self.p)[0]; self.p+=2; return v
def u16(self):
v=struct.unpack_from('<H',self.d,self.p)[0]; self.p+=2; return v
def string(self):
flag=self.u8(); ln=self.i32()
if flag==0:
b=self.d[self.p:self.p+ln]; self.p+=ln
return b.decode('latin1')
else:
b=self.d[self.p:self.p+ln*2]; self.p+=ln*2
return b.decode('utf-16-le')
def peek(self):
return self.d[self.p] if self.p<self.n else END_OF_STREAM
def read_type_entry(self):
b=self.peek()
if b==TYPE_NAME:
self.p+=1; tid=self.i32(); s=self.string(); self.types[tid]=s; return s
if b==TYPE_ID:
self.p+=1; tid=self.i32(); return self.types.get(tid)
return None
def read_node(self,is_ref):
t=self.read_type_entry()
nid=self.i32() if is_ref else None
node=Node(t,nid)
while True:
if self.eof(): break
b=self.u8()
if b==END_OF_NODE: break
name=None
if b in NAMED: name=self.string()
val=self.read_value(b)
if name is not None: node.fields[name]=val
else: node.items.append(val)
return node
def read_array(self):
length=self.i64()
items=[]
while True:
if self.eof(): break
b=self.peek()
if b==END_OF_ARRAY:
self.p+=1; break
self.p+=1
name=None
if b in NAMED: name=self.string()
items.append(self.read_value(b))
if len(items)>length+8: break # safety
return items
def read_value(self,b):
# b is the entry byte already consumed (and name already read if NAMED)
if b in (NAMED_START_REF,UNNAMED_START_REF):
return self.read_node(True)
if b in (NAMED_START_STRUCT,UNNAMED_START_STRUCT):
return self.read_node(False)
if b==START_OF_ARRAY:
return self.read_array()
if b==PRIMITIVE_ARRAY:
cnt=self.i32(); bpe=self.i32()
raw=self.d[self.p:self.p+cnt*bpe]; self.p+=cnt*bpe
return {"$primarray":cnt,"bytesPer":bpe}
if b in (NAMED_INTERNAL_REF,UNNAMED_INTERNAL_REF):
return Ref("internal",self.i32())
if b in (NAMED_EXT_REF_INDEX,UNNAMED_EXT_REF_INDEX):
return Ref("ext_index",self.i32())
if b in (NAMED_EXT_REF_GUID,UNNAMED_EXT_REF_GUID):
g=self.d[self.p:self.p+16]; self.p+=16; return Ref("ext_guid",g.hex())
if b in (NAMED_EXT_REF_STRING,UNNAMED_EXT_REF_STRING):
return Ref("ext_string",self.string())
if b in (NAMED_SBYTE,UNNAMED_SBYTE):
v=self.u8(); return v-256 if v>127 else v
if b in (NAMED_BYTE,UNNAMED_BYTE): return self.u8()
if b in (NAMED_SHORT,UNNAMED_SHORT): return self.i16()
if b in (NAMED_USHORT,UNNAMED_USHORT): return self.u16()
if b in (NAMED_INT,UNNAMED_INT): return self.i32()
if b in (NAMED_UINT,UNNAMED_UINT): return self.u32()
if b in (NAMED_LONG,UNNAMED_LONG): return self.i64()
if b in (NAMED_ULONG,UNNAMED_ULONG): return self.u64()
if b in (NAMED_FLOAT,UNNAMED_FLOAT): return self.f32()
if b in (NAMED_DOUBLE,UNNAMED_DOUBLE): return self.f64()
if b in (NAMED_DECIMAL,UNNAMED_DECIMAL):
raw=self.d[self.p:self.p+16]; self.p+=16; return {"$decimal":raw.hex()}
if b in (NAMED_CHAR,UNNAMED_CHAR):
v=self.d[self.p:self.p+2]; self.p+=2; return v.decode('utf-16-le')
if b in (NAMED_STRING,UNNAMED_STRING): return self.string()
if b in (NAMED_GUID,UNNAMED_GUID):
g=self.d[self.p:self.p+16]; self.p+=16; return g.hex()
if b in (NAMED_BOOL,UNNAMED_BOOL): return self.u8()!=0
if b in (NAMED_NULL,UNNAMED_NULL): return None
if b==END_OF_STREAM: return None
raise ValueError(f"unknown entry byte {b} at pos {self.p-1}")
def read_top(self):
"""Read entries at the document root until end-of-stream."""
roots={}
items=[]
while not self.eof():
b=self.u8()
if b in (END_OF_STREAM,INVALID): break
if b==END_OF_NODE or b==END_OF_ARRAY: continue
name=None
if b in NAMED: name=self.string()
val=self.read_value(b)
if name is not None: roots[name]=val
else: items.append(val)
return roots, items
def parse(data):
r=Reader(data)
roots,items=r.read_top()
return {"roots":{k:_topy(v) for k,v in roots.items()},
"items":[_topy(x) for x in items],
"consumed":r.p,"total":r.n,"types":r.types}
if __name__=="__main__":
import sys,json
data=open(sys.argv[1],'rb').read()
res=parse(data)
print(f"consumed {res['consumed']}/{res['total']} bytes")
print(f"{len(res['types'])} types registered")
out=sys.argv[2] if len(sys.argv)>2 else None
if out:
json.dump(res,open(out,'w'),indent=1,ensure_ascii=False)
print("wrote",out)
else:
print(json.dumps(res,indent=1,ensure_ascii=False)[:4000])

81
bundle/unitybundle.py Normal file
View File

@@ -0,0 +1,81 @@
"""Minimal UnityFS bundle extractor (LZ4/LZ4HC + uncompressed blocks)."""
import struct
def lz4_decompress(src, dst_size):
out=bytearray(); i=0; n=len(src)
while i<n:
tok=src[i]; i+=1
lit=tok>>4
if lit==15:
while True:
bb=src[i]; i+=1; lit+=bb
if bb!=255: break
out+=src[i:i+lit]; i+=lit
if i>=n: break
off=src[i]|(src[i+1]<<8); i+=2
ml=tok&15
if ml==15:
while True:
bb=src[i]; i+=1; ml+=bb
if bb!=255: break
ml+=4
start=len(out)-off
for j in range(ml):
out.append(out[start+j])
return bytes(out[:dst_size])
def extract(path):
"""Return (nodes, data_bytes). nodes=[(off,size,flags,name)]."""
b=open(path,'rb').read()
if b[:7]!=b'UnityFS': raise ValueError('not UnityFS')
p=8
ver,=struct.unpack_from('>I',b,p); p+=4
def rstr(p):
e=b.index(b'\x00',p); return b[p:e].decode(), e+1
uver,p=rstr(p); ueng,p=rstr(p)
size,=struct.unpack_from('>q',b,p); p+=8
cblk,=struct.unpack_from('>I',b,p); p+=4
ublk,=struct.unpack_from('>I',b,p); p+=4
flags,=struct.unpack_from('>I',b,p); p+=4
if flags & 0x80: # blocksInfo at end
bi=b[len(b)-cblk:]
else:
if flags & 0x200:
p=(p+15)&~15
bi=b[p:p+cblk]; p+=cblk
comp=flags&0x3f
blocks_info=bi if comp==0 else lz4_decompress(bi,ublk)
q=16
bc,=struct.unpack_from('>I',blocks_info,q); q+=4
blocks=[]
for _ in range(bc):
u,c,f=struct.unpack_from('>IIH',blocks_info,q); q+=10
blocks.append((u,c,f))
ncount,=struct.unpack_from('>I',blocks_info,q); q+=4
nodes=[]
for _ in range(ncount):
off,sz,fl=struct.unpack_from('>qqI',blocks_info,q); q+=20
e=blocks_info.index(b'\x00',q); name=blocks_info[q:e].decode(); q=e+1
nodes.append((off,sz,fl,name))
if not (flags & 0x80):
pass
data=bytearray()
for (u,c,f) in blocks:
blk=b[p:p+c]; p+=c
data += blk if (f&0x3f)==0 else lz4_decompress(blk,u)
return nodes, bytes(data)
if __name__=='__main__':
import sys,glob
needle=sys.argv[1].encode() if len(sys.argv)>1 else b'actualVersion'
d='/mnt/d/SteamLibrary/steamapps/common/Sand Playtest/Sand_Data/StreamingAssets/aa/StandaloneWindows64'
for f in sorted(glob.glob(d+'/*.bundle')):
import os
if os.path.getsize(f) > 30_000_000: # skip huge ones in quick pass
continue
try:
nodes,data=extract(f)
if needle in data:
print('HIT', os.path.basename(f), 'at', data.find(needle))
except Exception as e:
print('ERR', os.path.basename(f), e)