weapon damage: full static map of the model (values are runtime/server, not statically anchorable)
Mapped the damage system end-to-end statically and documented it for cross-patch
tracking (docs/WEAPON_DAMAGE.md):
- Model: 8 DamageXxxDataComponent (value @+0x10) on item/ammo, read by
HealthAndDamageExtensions.GetDamage (RVA 0x4BAC520); per-shot formula in
<GetDamage>d__12.MoveNext (RVA 0x4BB3DB0) = base x range-falloff x headshot,
melee skips range falloff.
- Delivery: PlainDamgeDealerComponent{damageAmount,damageType,isMelee} -> HitEventInfo
-> reduces HealthDataComponent.value; networked via DamageEvent.
Verified the base numbers are in NO asset (blueprints/ammo/projectiles/CheatItemDefs/all
bundles UTF-16). Established WHY the literal constants aren't statically anchorable: this
build accesses every component via fully-generic Entitas dispatch (no static class/index/
string reference in producing code; typed setters all dead build-wide; item-id strings
have 0 refs, verified via a calibrated string-xref) and damage resolution is server-
authoritative. So the value is a runtime component, not a reachable static constant.
Corrects the earlier draft that overstated "no value exists".
Tools: reverse/il2cpp_re.py (+find_rip_refs_batch, scan_movss_consts),
bundle/component_census.py, bundle/dump_blueprint.py.
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -15,6 +15,9 @@ snapshots/
|
||||
# Local HTML render of the wiki (regenerable via wikigen/render_wiki.py)
|
||||
/wiki_site/
|
||||
|
||||
# RE method-index cache (regenerable from il2cpp/dump.cs)
|
||||
/reverse/_method_index.pkl
|
||||
|
||||
# Python
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
|
||||
90
bundle/component_census.py
Normal file
90
bundle/component_census.py
Normal file
@@ -0,0 +1,90 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Census of ECS components across every EntityBlueprint in epb_assets_all.bundle.
|
||||
|
||||
Decodes all 1446 blueprints' Odin payloads and tallies which $type components
|
||||
appear and how often. Use it to answer "is component X authored in data at all?"
|
||||
firsthand (Odin stores type names as UTF-16, so an ascii grep gives false
|
||||
negatives — this decodes properly). Optional filter substring narrows output.
|
||||
|
||||
python bundle/component_census.py # combat-ish components
|
||||
python bundle/component_census.py Damage # only names containing 'Damage'
|
||||
python bundle/component_census.py '' # everything
|
||||
"""
|
||||
import os, sys, json, UnityPy
|
||||
from collections import Counter
|
||||
from UnityPy.helpers.TypeTreeGenerator import TypeTreeGenerator
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import odin_read
|
||||
|
||||
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
|
||||
META = os.path.join(GAME, "Sand_Data/il2cpp_data/Metadata/global-metadata.dat")
|
||||
DLL = os.path.join(GAME, "GameAssembly.dll")
|
||||
BUNDLES = "/home/downloadpizza/sand_tools/bundles"
|
||||
UNITY = "6000.0.40f1"
|
||||
COMBAT = ("Damage", "Health", "Weapon", "Attack", "Melee", "Shoot", "Projectile",
|
||||
"Penetra", "Overheat", "Hit", "AoE", "Armor", "Resist")
|
||||
|
||||
|
||||
def walk(n):
|
||||
if isinstance(n, dict):
|
||||
t = n.get("$type")
|
||||
if t:
|
||||
yield t
|
||||
for k, v in n.items():
|
||||
if k != "$type":
|
||||
yield from walk(v)
|
||||
elif isinstance(n, list):
|
||||
for v in n:
|
||||
yield from walk(v)
|
||||
|
||||
|
||||
def main():
|
||||
filt = sys.argv[1] if len(sys.argv) > 1 else None # None -> combat preset
|
||||
gen = TypeTreeGenerator(UNITY)
|
||||
gen.load_il2cpp(open(DLL, "rb").read(), open(META, "rb").read())
|
||||
env = UnityPy.load(os.path.join(BUNDLES, "epb_assets_all.bundle"),
|
||||
os.path.join(BUNDLES, "sand_monoscripts.bundle"))
|
||||
comp = Counter(); ndone = 0; nodecache = {}
|
||||
for path, obj in env.container.items():
|
||||
if obj.type.name != "GameObject":
|
||||
continue
|
||||
eb = None
|
||||
for c in obj.read().m_Components:
|
||||
co = c.read(); r = co.object_reader
|
||||
if r.type.name == "MonoBehaviour" and co.m_Script.read().m_ClassName == "EntityBlueprint":
|
||||
eb = r; break
|
||||
if eb is None:
|
||||
continue
|
||||
sc = eb.read().m_Script.read()
|
||||
full = (sc.m_Namespace + "." if sc.m_Namespace else "") + sc.m_ClassName
|
||||
if full not in nodecache:
|
||||
nodecache[full] = json.loads(gen.get_nodes_as_json(sc.m_AssemblyName, full))
|
||||
try:
|
||||
sb = eb.read_typetree(nodecache[full]).get("serializationData", {}).get("SerializedBytes")
|
||||
except Exception:
|
||||
continue
|
||||
if not sb:
|
||||
continue
|
||||
try:
|
||||
p = odin_read.parse(bytes(sb))
|
||||
except Exception:
|
||||
continue
|
||||
seen = set()
|
||||
for root in ("roots", "items"):
|
||||
for t in walk(p.get(root)):
|
||||
seen.add(t)
|
||||
for t in seen:
|
||||
comp[t] += 1
|
||||
ndone += 1
|
||||
print("blueprints parsed:", ndone, " distinct components:", len(comp))
|
||||
for t, n in comp.most_common():
|
||||
short = t.split(".")[-1]
|
||||
if filt is None:
|
||||
if any(w in short for w in COMBAT):
|
||||
print("%4d %s" % (n, t))
|
||||
elif filt == "" or filt in t:
|
||||
print("%4d %s" % (n, t))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
95
bundle/dump_blueprint.py
Normal file
95
bundle/dump_blueprint.py
Normal file
@@ -0,0 +1,95 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Fully decode one or more EntityBlueprints (by base name) from epb_assets_all.bundle.
|
||||
|
||||
Prints every Odin component ($type) and every scalar field, so we can see firsthand
|
||||
whether a weapon blueprint carries any damage magnitude. Usage:
|
||||
|
||||
python bundle/dump_blueprint.py item_revolverSmall_dusters [other_base ...]
|
||||
"""
|
||||
import os, sys, json, UnityPy
|
||||
from UnityPy.helpers.TypeTreeGenerator import TypeTreeGenerator
|
||||
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
||||
import odin_read
|
||||
|
||||
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
|
||||
META = os.path.join(GAME, "Sand_Data/il2cpp_data/Metadata/global-metadata.dat")
|
||||
DLL = os.path.join(GAME, "GameAssembly.dll")
|
||||
BUNDLES = "/home/downloadpizza/sand_tools/bundles"
|
||||
UNITY = "6000.0.40f1"
|
||||
|
||||
|
||||
def entity_blueprint(go):
|
||||
for c in go.m_Components:
|
||||
co = c.read(); r = co.object_reader
|
||||
if r.type.name == "MonoBehaviour" and co.m_Script.read().m_ClassName == "EntityBlueprint":
|
||||
return r
|
||||
return None
|
||||
|
||||
|
||||
def walk(node, prefix=""):
|
||||
"""Yield (path, type_label, scalar) for every node; flags floats."""
|
||||
if isinstance(node, dict):
|
||||
t = node.get("$type")
|
||||
if t:
|
||||
yield (prefix, t, None)
|
||||
for k, v in node.items():
|
||||
if k == "$type":
|
||||
continue
|
||||
yield from walk(v, prefix + "." + str(k))
|
||||
elif isinstance(node, list):
|
||||
for i, v in enumerate(node):
|
||||
yield from walk(v, prefix + f"[{i}]")
|
||||
else:
|
||||
yield (prefix, None, node)
|
||||
|
||||
|
||||
def main():
|
||||
targets = sys.argv[1:] or ["item_revolverSmall_dusters"]
|
||||
gen = TypeTreeGenerator(UNITY)
|
||||
gen.load_il2cpp(open(DLL, "rb").read(), open(META, "rb").read())
|
||||
env = UnityPy.load(os.path.join(BUNDLES, "epb_assets_all.bundle"),
|
||||
os.path.join(BUNDLES, "sand_monoscripts.bundle"))
|
||||
|
||||
want = {t: None for t in targets}
|
||||
for path, obj in env.container.items():
|
||||
base = path.split("/")[-1].replace("_epb.prefab", "")
|
||||
if base in want and obj.type.name == "GameObject":
|
||||
want[base] = obj
|
||||
|
||||
for base, obj in want.items():
|
||||
print("\n" + "=" * 70)
|
||||
print("BLUEPRINT:", base, "" if obj else " *** NOT FOUND ***")
|
||||
print("=" * 70)
|
||||
if not obj:
|
||||
continue
|
||||
eb = entity_blueprint(obj.read())
|
||||
if eb is None:
|
||||
print(" no EntityBlueprint component")
|
||||
continue
|
||||
script = eb.read().m_Script.read()
|
||||
full = (script.m_Namespace + "." if script.m_Namespace else "") + script.m_ClassName
|
||||
nodes = json.loads(gen.get_nodes_as_json(script.m_AssemblyName, full))
|
||||
sb = eb.read_typetree(nodes).get("serializationData", {}).get("SerializedBytes")
|
||||
if not sb:
|
||||
print(" no SerializedBytes")
|
||||
continue
|
||||
parsed = odin_read.parse(bytes(sb))
|
||||
# list components
|
||||
comps = set()
|
||||
floats = []
|
||||
for root in ("roots", "items"):
|
||||
for p, t, sc in walk(parsed.get(root)):
|
||||
if t:
|
||||
comps.add(t)
|
||||
if isinstance(sc, float) and sc not in (0.0,) :
|
||||
floats.append((p, sc))
|
||||
print(" components (%d):" % len(comps))
|
||||
for c in sorted(comps):
|
||||
print(" -", c)
|
||||
print(" non-zero float fields (%d):" % len(floats))
|
||||
for p, v in floats:
|
||||
print(" %-60s = %s" % (p, v))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
106
docs/WEAPON_DAMAGE.md
Normal file
106
docs/WEAPON_DAMAGE.md
Normal file
@@ -0,0 +1,106 @@
|
||||
# Weapon damage — static map of the model (for tracking across updates)
|
||||
|
||||
Goal: locate weapon damage (esp. the `_dusters` revolver's **melee** value) in the static
|
||||
files, documented so it can be re-checked on patches.
|
||||
|
||||
**Status:** the damage *model* is fully mapped statically (below). The per-weapon *base
|
||||
numbers* are **not** present in any asset, and are **not reachable by static anchor** in
|
||||
`GameAssembly.dll` because every ECS component is accessed through fully-generic Entitas
|
||||
dispatch (runtime integer index, no static class/index reference in calling code) and the
|
||||
damage *resolution* is server-authoritative. What you can diff across updates is the model
|
||||
and the formula function (RVAs below); the literal constants need a different method (see
|
||||
end). This corrects an earlier draft that wrongly concluded "no value exists" — the values
|
||||
**are** live at runtime; they just aren't statically anchorable constants.
|
||||
|
||||
## Damage model (all static, all in `il2cpp/dump.cs` + verified by disasm)
|
||||
|
||||
Per-type damage lives as a `float value` (object offset **+0x10**) on 8 components on the
|
||||
**item/ammo** entity: `Damage{Physical,Cold,Heat,Rad,Fire,Poison,Siege,True}DataComponent`
|
||||
(`: BaseFloatValueComponent`). `MeleeDataComponent` (TypeDefIndex 5825) is a marker only.
|
||||
|
||||
Read path — `Hologryph.Sand.Shared.Damage.HealthAndDamageExtensions`:
|
||||
|
||||
| Method | RVA | Role |
|
||||
|---|---|---|
|
||||
| `GetDamage(ItemEntity, DamageType)` | `0x4BAC520` | jump-table over the 8 types → returns that type's `DamageXxxDataComponent.value` (`[comp+0x10]`), or `0` if absent |
|
||||
| `GetDamageModifier(ItemEntity, DamageType)` | `0x4BAC340` | per-type `DamageModifierXxx` value |
|
||||
| `GetAoEDamage(ItemEntity, DamageType)` | (via calc) | explosive AoE (`AoEDamageDataComponent.AoEDamage` struct: radius + 8 floats) |
|
||||
| `IEnumerable<WeaponDamage> GetDamage(weapon, ammo, distance, isHeadShot, isAoE)` | `0x4BAC460` | the per-shot calc factory (state machine `<GetDamage>d__12`) |
|
||||
|
||||
**The damage formula** = `<GetDamage>d__12.MoveNext` (**RVA `0x4BB3DB0`**) — decompiled here:
|
||||
per damage type it takes base `GetDamage(weapon,type)` (and ammo), then multiplies by
|
||||
|
||||
- **range falloff**: `RangeDamageModifierDataComponent` → `GetModifierByDistance(distance)`
|
||||
(RVA `0x4A4B870`); **skipped when `isMelee`** (iterator flag at `+0x4c`),
|
||||
- **headshot**: `HeadShotMultiplierDataComponent.value` (`[+0x10]`) when `isHeadShot`,
|
||||
- plus `GetDamageModifier` / `GetAoEDamage`.
|
||||
|
||||
So **melee damage = base per-type value × headshot mult** (no distance falloff). The base is
|
||||
the item's `DamagePhysical`/`DamageSiege` value. `WeaponDamage` struct = `{float damageAmount,
|
||||
DamageType damageType}`.
|
||||
|
||||
## Damage delivery / application (static)
|
||||
|
||||
- A shot/attack carries **`PlainDamgeDealerComponent`** (game's typo "Damge"; TypeDefIndex
|
||||
7343, `IGameContextComponent`): `float damageAmount @+0x10`, `DamageType damageType @+0x14`,
|
||||
`bool isMelee @+0x15`. Created at runtime per shot (not in any asset).
|
||||
- On impact → **`HitEventInfo`** (`Effects`, TypeDefIndex 7208): `float damageAmount @+0x30`,
|
||||
`ammoId`, `ProjectileType`, `HitType`; carried by `HitEventComponent` and snapshot
|
||||
`PlainHitEventComponentData` (`damageAmount @+0x2C`).
|
||||
- Applied to **`HealthDataComponent`** (TypeDefIndex 7319): `ushort count @+0x10`,
|
||||
`DamageType[] damageMask @+0x18`, `float value @+0x20`. `GetTotal()` RVA `0x4BAC900`.
|
||||
- Networked as **`DamageEvent`** (TypeDefIndex 7299): `int damageAmount @+0xC`, `targetId`,
|
||||
`blueprintId`, `bool isLethal`; batched in `DamageEventMessage`. Client-side resolution
|
||||
callers (`OnDamageAvatar(hpBeforeDamage)`, `ConsumeDamage`, armor `GetArmorAbsorbMax`/
|
||||
`IsBlocksDamage`/`GetDamageExtra`) are **display/feedback only** → the authoritative
|
||||
computation is server-side.
|
||||
|
||||
## The base numbers are not in any asset (verified, multiple ways)
|
||||
|
||||
| Check | Result |
|
||||
|---|---|
|
||||
| All 1446 EntityBlueprints, Odin-decoded (`bundle/component_census.py`) | no `DamageXxx`/`PlainDamgeDealer`/`AoEDamage` component on any item, ammo, or projectile |
|
||||
| `item_revolverSmall_dusters` full decode (`bundle/dump_blueprint.py`) | 8 components, all presentation/physics; actions only `WeaponPickup`/`WeaponSwap` |
|
||||
| All 35 bundles, UTF-16 + ascii grep of the component type-names | 0 occurrences anywhere |
|
||||
| `CheatItemDefinitionsData` (`ItemDefinition`) | only `{Name, Type, StorageStack}` — no damage field |
|
||||
|
||||
## Why the literal constants aren't statically anchorable
|
||||
|
||||
All of these were checked and eliminated as ways to reach the code that *sets* a weapon's
|
||||
damage value:
|
||||
|
||||
- **Typed Entitas setters are dead.** `AddDamagePhysical(entity,float)` etc. — 0 callers.
|
||||
This is build-wide, not damage-specific (the `HealthDataComponent` setters are also
|
||||
uncalled), so "setter has no callers" proves nothing — this build mutates components
|
||||
in-place / via generic add, never the generated typed setters.
|
||||
- **Component class & module globals** (`DamagePhysical` class `0x187CE73B8`, module
|
||||
`0x187D6B778`; `PlainDamgeDealer` class `0x187CE3638`, module `0x187D82300`) are
|
||||
referenced **only by the generated extension methods** — no producer/init references them.
|
||||
- **Item-id strings have 0 code references.** Verified the string-xref method is sound
|
||||
(60/60 sampled `ScriptString` slots resolve to ≥1 ref), then confirmed
|
||||
`"item_revolverSmall_dusters"` (slot `0x1807CC1808`) has **0** — so damage is not keyed
|
||||
by item-id string in code.
|
||||
- **Constant scans** (rip-relative `movss`, and `mov dword[mem],imm32` float-immediates)
|
||||
don't isolate weapon damage from UI/physics/geometry noise.
|
||||
|
||||
Net: an item's damage component is created from a pooled factory (set up once at context
|
||||
init) and added via `entity.AddComponent(component)` where the component's index is derived
|
||||
from the object itself — there is **no static reference in the producing code** that an xref
|
||||
can follow. That, plus server-authoritative resolution, is why the constant can't be reached
|
||||
by static anchoring (in my tooling **or** Ghidra — Ghidra reads bodies but can't defeat the
|
||||
dynamic dispatch without already knowing which system to read).
|
||||
|
||||
## Re-deriving / tracking on updates
|
||||
|
||||
Tooling (regenerates the map from a new `dump.cs` + bundles):
|
||||
- `reverse/il2cpp_re.py` — PE map, dump.cs method index, `find_xrefs` (call rel32),
|
||||
`find_rip_refs` / `find_rip_refs_batch` (RIP-relative data xrefs), `disasm_method` /
|
||||
`analyze` (resolves calls, reads float consts), `scan_movss_consts`.
|
||||
- `bundle/component_census.py`, `bundle/dump_blueprint.py`.
|
||||
|
||||
To diff the *formula* across patches: re-locate `HealthAndDamageExtensions.GetDamage` and
|
||||
`<GetDamage>d__12.MoveNext` by signature and compare. To get the actual *numbers*: they are
|
||||
runtime component values shown in the in-game weapon-inspect UI (`ShowWeaponInfo` /
|
||||
`ShowPhysicalDamageInfo` call `GetDamage`), so a live read is the reliable source; static
|
||||
extraction would require following the dynamic Entitas dispatch by hand through the (server-
|
||||
side) shot/attack init — not achievable via xref/anchor.
|
||||
355
reverse/il2cpp_re.py
Normal file
355
reverse/il2cpp_re.py
Normal file
@@ -0,0 +1,355 @@
|
||||
"""Core IL2CPP reverse-engineering helpers for SAND's GameAssembly.dll.
|
||||
|
||||
dump.cs (Il2CppDumper output) gives every method's RVA, file Offset, VA and
|
||||
signature. GameAssembly.dll holds the actual x86-64 bodies. This module:
|
||||
|
||||
- maps VA <-> file offset via the PE section table,
|
||||
- builds a sorted method index from dump.cs (cached to a pickle),
|
||||
- attributes any VA to its containing method,
|
||||
- finds call-rel32 / jmp-rel32 sites that target a given VA (xrefs),
|
||||
- disassembles a method body and pulls out float constants it loads.
|
||||
|
||||
No method bodies live in dump.cs — only signatures. The values we want
|
||||
(weapon damage floats) are operands inside the bodies, recovered here.
|
||||
"""
|
||||
import os, re, pickle, bisect, struct
|
||||
|
||||
GAME = "/mnt/d/SteamLibrary/steamapps/common/Sand Playtest"
|
||||
DLL = os.path.join(GAME, "GameAssembly.dll")
|
||||
DUMP = "/home/downloadpizza/sand_tools/il2cpp/dump.cs"
|
||||
CACHE = "/home/downloadpizza/sand_tools/reverse/_method_index.pkl"
|
||||
|
||||
IMAGE_BASE = 0x180000000 # from PE optional header (VA = IMAGE_BASE + RVA)
|
||||
|
||||
|
||||
class PE:
|
||||
"""Minimal PE map: VA<->file-offset, plus raw byte access."""
|
||||
def __init__(self, path=DLL):
|
||||
import pefile
|
||||
self.pe = pefile.PE(path, fast_load=True)
|
||||
self.base = self.pe.OPTIONAL_HEADER.ImageBase
|
||||
self.data = self.pe.__data__ # bytes-like of whole file
|
||||
self.sections = []
|
||||
for s in self.pe.sections:
|
||||
self.sections.append((
|
||||
s.VirtualAddress, s.Misc_VirtualSize,
|
||||
s.PointerToRawData, s.SizeOfRawData,
|
||||
s.Name.decode(errors="replace").strip("\x00"),
|
||||
))
|
||||
|
||||
def rva_to_off(self, rva):
|
||||
for va, vsz, praw, rsz, name in self.sections:
|
||||
if va <= rva < va + max(vsz, rsz):
|
||||
if rva - va < rsz:
|
||||
return praw + (rva - va)
|
||||
return None # in bss / uninitialized
|
||||
return None
|
||||
|
||||
def off_to_rva(self, off):
|
||||
for va, vsz, praw, rsz, name in self.sections:
|
||||
if praw <= off < praw + rsz:
|
||||
return va + (off - praw)
|
||||
return None
|
||||
|
||||
def va_to_off(self, va):
|
||||
return self.rva_to_off(va - self.base)
|
||||
|
||||
def read_off(self, off, n):
|
||||
return bytes(self.data[off:off + n])
|
||||
|
||||
def read_va(self, va, n):
|
||||
off = self.va_to_off(va)
|
||||
if off is None:
|
||||
return None
|
||||
return self.read_off(off, n)
|
||||
|
||||
def text_range(self):
|
||||
for va, vsz, praw, rsz, name in self.sections:
|
||||
if name == ".text":
|
||||
return va, vsz, praw, rsz
|
||||
return None
|
||||
|
||||
def code_ranges(self):
|
||||
"""Executable code sections. IL2CPP method bodies live in the
|
||||
'il2cpp' section; runtime/engine glue in '.text'."""
|
||||
out = []
|
||||
for va, vsz, praw, rsz, name in self.sections:
|
||||
if name in (".text", "il2cpp"):
|
||||
out.append((va, vsz, praw, rsz, name))
|
||||
return out
|
||||
|
||||
|
||||
_RVA_RE = re.compile(r"// RVA: 0x([0-9A-Fa-f]+) Offset: 0x([0-9A-Fa-f]+) VA: 0x([0-9A-Fa-f]+)")
|
||||
|
||||
|
||||
def build_index(force=False):
|
||||
"""Parse dump.cs -> list of methods sorted by VA.
|
||||
|
||||
Each entry: dict(va, rva, off, sig). sig is the C#-ish declaration line.
|
||||
Cached to CACHE.
|
||||
"""
|
||||
if not force and os.path.exists(CACHE):
|
||||
with open(CACHE, "rb") as f:
|
||||
return pickle.load(f)
|
||||
methods = []
|
||||
with open(DUMP, "r", encoding="utf-8", errors="replace") as f:
|
||||
prev = None
|
||||
for line in f:
|
||||
m = _RVA_RE.search(line)
|
||||
if m:
|
||||
prev = (int(m.group(1), 16), int(m.group(2), 16), int(m.group(3), 16))
|
||||
continue
|
||||
if prev is not None:
|
||||
sig = line.strip()
|
||||
rva, off, va = prev
|
||||
if rva != 0:
|
||||
methods.append({"va": va, "rva": rva, "off": off, "sig": sig})
|
||||
prev = None
|
||||
methods.sort(key=lambda d: d["va"])
|
||||
with open(CACHE, "wb") as f:
|
||||
pickle.dump(methods, f)
|
||||
return methods
|
||||
|
||||
|
||||
class Index:
|
||||
def __init__(self):
|
||||
self.methods = build_index()
|
||||
self.vas = [m["va"] for m in self.methods]
|
||||
|
||||
def method_at(self, va):
|
||||
"""Return the method whose body contains `va` (largest va <= target)."""
|
||||
i = bisect.bisect_right(self.vas, va) - 1
|
||||
if i < 0:
|
||||
return None
|
||||
return self.methods[i]
|
||||
|
||||
def next_va(self, va):
|
||||
i = bisect.bisect_right(self.vas, va)
|
||||
return self.vas[i] if i < len(self.vas) else None
|
||||
|
||||
def find_sig(self, needle):
|
||||
return [m for m in self.methods if needle in m["sig"]]
|
||||
|
||||
|
||||
def find_xrefs(pe, target_vas, call_only=True):
|
||||
"""Scan all code sections for E8 (call) / E9 (jmp) rel32 sites whose
|
||||
target is in `target_vas` (int or set/iterable of ints).
|
||||
|
||||
Returns list of (site_va, opcode_byte, target_va). Linear byte scan with
|
||||
displacement check; a few false positives are possible but validated by
|
||||
the disassembler downstream.
|
||||
"""
|
||||
if isinstance(target_vas, int):
|
||||
target_vas = {target_vas}
|
||||
else:
|
||||
target_vas = set(target_vas)
|
||||
out = []
|
||||
base = pe.base
|
||||
data = pe.data
|
||||
for sva, svsz, spraw, srsz, name in pe.code_ranges():
|
||||
raw = bytes(data[spraw:spraw + srsz])
|
||||
n = len(raw)
|
||||
j = 0
|
||||
while True:
|
||||
j = raw.find(b"\xe8", j) if call_only else _find_either(raw, j)
|
||||
if j < 0 or j + 5 > n:
|
||||
break
|
||||
disp = struct.unpack_from("<i", raw, j + 1)[0]
|
||||
site_va = base + sva + j # off_to_rva is identity here: rva = sva + j
|
||||
tgt = site_va + 5 + disp
|
||||
if tgt in target_vas:
|
||||
out.append((site_va, raw[j], tgt))
|
||||
j += 1
|
||||
return out
|
||||
|
||||
|
||||
def find_rip_refs(pe, target_va):
|
||||
"""Find RIP-relative instructions whose disp32 (assumed to be the final 4
|
||||
bytes of the instruction) resolves to `target_va`. Returns list of
|
||||
(disp_field_va, instr_start_guess_va). Vectorized over all code sections.
|
||||
|
||||
For a load like `mov reg,[rip+disp]`, target = (va_after_instr) + disp and
|
||||
va_after_instr = disp_field_va + 4. So target_va = disp_field_va + 4 + disp
|
||||
=> disp + i == (target_va - base - sva - 4) where i is the in-section offset
|
||||
of the disp field. We solve for i.
|
||||
"""
|
||||
import numpy as np
|
||||
out = []
|
||||
base = pe.base
|
||||
data = pe.data
|
||||
for sva, svsz, spraw, srsz, name in pe.code_ranges():
|
||||
raw = np.frombuffer(bytes(data[spraw:spraw + srsz]), dtype=np.uint8)
|
||||
n = len(raw)
|
||||
# little-endian int32 at every byte offset 0..n-4
|
||||
disp = (raw[0:n-3].astype(np.int64)
|
||||
| (raw[1:n-2].astype(np.int64) << 8)
|
||||
| (raw[2:n-1].astype(np.int64) << 16)
|
||||
| (raw[3:n].astype(np.int64) << 24))
|
||||
# sign-extend 32-bit
|
||||
disp = disp - ((disp & 0x80000000) << 1)
|
||||
idx = np.arange(n - 3, dtype=np.int64)
|
||||
const = target_va - base - sva - 4
|
||||
hits = np.nonzero(disp + idx == const)[0]
|
||||
for i in hits.tolist():
|
||||
out.append(base + sva + int(i)) # va of the disp field
|
||||
return out
|
||||
|
||||
|
||||
def find_rip_refs_batch(pe, targets):
|
||||
"""One pass over code: for a set of target VAs, return {target_va: [disp_field_va,...]}.
|
||||
Assumes disp32 is the final 4 bytes of the instruction (movss/mov/lea loads)."""
|
||||
import numpy as np
|
||||
targets = np.array(sorted(set(int(t) for t in targets)), dtype=np.int64)
|
||||
out = {int(t): [] for t in targets}
|
||||
base = pe.base
|
||||
data = pe.data
|
||||
for sva, svsz, spraw, srsz, name in pe.code_ranges():
|
||||
raw = np.frombuffer(bytes(data[spraw:spraw + srsz]), dtype=np.uint8)
|
||||
n = len(raw)
|
||||
disp = (raw[0:n-3].astype(np.int64) | (raw[1:n-2].astype(np.int64) << 8)
|
||||
| (raw[2:n-1].astype(np.int64) << 16) | (raw[3:n].astype(np.int64) << 24))
|
||||
disp = disp - ((disp & 0x80000000) << 1)
|
||||
idx = np.arange(n - 3, dtype=np.int64)
|
||||
tgt = base + sva + idx + 4 + disp # absolute target of a disp-final instruction
|
||||
mask = np.isin(tgt, targets)
|
||||
for i in np.nonzero(mask)[0]:
|
||||
t = int(tgt[i])
|
||||
if t in out:
|
||||
out[t].append(base + sva + int(i))
|
||||
return out
|
||||
|
||||
|
||||
def disasm_around(pe, va, back=24, total=64):
|
||||
"""Disassemble a window [va-back, va-back+total) to recover the instruction
|
||||
that contains/precedes `va` (e.g. a RIP-ref disp field)."""
|
||||
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
|
||||
start = va - back
|
||||
off = pe.va_to_off(start)
|
||||
code = pe.read_off(off, total)
|
||||
md = Cs(CS_ARCH_X86, CS_MODE_64)
|
||||
md.detail = True
|
||||
return list(md.disasm(code, start))
|
||||
|
||||
|
||||
def _find_either(raw, start):
|
||||
a = raw.find(b"\xe8", start)
|
||||
b = raw.find(b"\xe9", start)
|
||||
if a < 0:
|
||||
return b
|
||||
if b < 0:
|
||||
return a
|
||||
return min(a, b)
|
||||
|
||||
|
||||
def disasm_method(pe, idx, va, max_bytes=0x4000):
|
||||
"""Disassemble one method body (until next method VA or max_bytes)."""
|
||||
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
|
||||
nxt = idx.next_va(va)
|
||||
size = max_bytes
|
||||
if nxt:
|
||||
size = min(size, nxt - va)
|
||||
off = pe.va_to_off(va)
|
||||
code = pe.read_off(off, size)
|
||||
md = Cs(CS_ARCH_X86, CS_MODE_64)
|
||||
md.detail = True
|
||||
return list(md.disasm(code, va))
|
||||
|
||||
|
||||
def rip_target(insn):
|
||||
"""If an instruction has a RIP-relative memory operand, return its absolute VA."""
|
||||
from capstone import x86
|
||||
for op in insn.operands:
|
||||
if op.type == x86.X86_OP_MEM and op.mem.base == x86.X86_REG_RIP:
|
||||
return insn.address + insn.size + op.mem.disp
|
||||
return None
|
||||
|
||||
|
||||
def read_f32(pe, va):
|
||||
b = pe.read_va(va, 4)
|
||||
return None if b is None else struct.unpack("<f", b)[0]
|
||||
|
||||
|
||||
def read_f64(pe, va):
|
||||
b = pe.read_va(va, 8)
|
||||
return None if b is None else struct.unpack("<d", b)[0]
|
||||
|
||||
|
||||
def scan_movss_consts(pe, lo=None, hi=None):
|
||||
"""Linear-scan code sections for `movss xmm,[rip+disp32]` (F3 0F 10 /r, mod=00 rm=101)
|
||||
and `movsd` (F2 0F 10). Returns list of (insn_va, const_value, kind). Optional
|
||||
[lo,hi] filters the constant's absolute value. Anchor-free way to find where
|
||||
float constants (e.g. damage numbers) are loaded, regardless of Entitas plumbing."""
|
||||
import numpy as np, struct as _s
|
||||
out = []
|
||||
base = pe.base
|
||||
data = pe.data
|
||||
rm_ok = set(0x05 | (r << 3) for r in range(8)) # mod=00, rm=101, reg=0..7
|
||||
for sva, svsz, spraw, srsz, name in pe.code_ranges():
|
||||
raw = bytes(data[spraw:spraw + srsz])
|
||||
n = len(raw)
|
||||
# find F3 0F 10 and F2 0F 10
|
||||
for pref, kind in ((b"\xf3\x0f\x10", "f32"), (b"\xf2\x0f\x10", "f64")):
|
||||
j = raw.find(pref)
|
||||
while j >= 0:
|
||||
if j + 8 <= n:
|
||||
modrm = raw[j + 3]
|
||||
if modrm in rm_ok:
|
||||
disp = _s.unpack_from("<i", raw, j + 4)[0]
|
||||
insn_va = base + sva + j
|
||||
tgt = insn_va + 8 + disp # instruction length = 8 (F3 0F 10 modrm disp32)
|
||||
if kind == "f32":
|
||||
b = pe.read_va(tgt, 4); v = _s.unpack("<f", b)[0] if b else None
|
||||
else:
|
||||
b = pe.read_va(tgt, 8); v = _s.unpack("<d", b)[0] if b else None
|
||||
if v is not None and v == v: # not NaN
|
||||
av = abs(v)
|
||||
if (lo is None or av >= lo) and (hi is None or av <= hi):
|
||||
out.append((insn_va, v, kind))
|
||||
j = raw.find(pref, j + 1)
|
||||
return out
|
||||
|
||||
|
||||
def analyze(pe, idx, va, max_bytes=0x6000, show=True):
|
||||
"""Disassemble a method; resolve call targets to method sigs and read any
|
||||
RIP-relative float/double constants loaded (movss/movsd/comiss/addss/...).
|
||||
Returns (insns, calls, floats)."""
|
||||
from capstone import x86
|
||||
ins = disasm_method(pe, idx, va, max_bytes)
|
||||
calls, floats, lines = [], [], []
|
||||
for i in ins:
|
||||
tgt = rip_target(i)
|
||||
note = ""
|
||||
mn = i.mnemonic
|
||||
if mn in ("call", "jmp") and i.operands and i.operands[0].type == x86.X86_OP_IMM:
|
||||
d = i.operands[0].imm
|
||||
m = idx.method_at(d)
|
||||
nm = m["sig"] if (m and abs(m["va"] - d) < 0x6000) else "?"
|
||||
if mn == "call":
|
||||
calls.append((i.address, d, nm))
|
||||
note = " -> %x %s" % (d, nm[:78])
|
||||
elif tgt is not None and ("ss" in mn or "sd" in mn or mn == "movss" or mn == "movsd"):
|
||||
# float/double constant load from .rdata
|
||||
if "sd" in mn:
|
||||
v = read_f64(pe, tgt)
|
||||
floats.append((i.address, tgt, v, "f64"))
|
||||
note = " ; =%s (f64 @%x)" % (v, tgt)
|
||||
else:
|
||||
v = read_f32(pe, tgt)
|
||||
floats.append((i.address, tgt, v, "f32"))
|
||||
note = " ; =%s (f32 @%x)" % (v, tgt)
|
||||
elif tgt is not None:
|
||||
note = " ; [data %x]" % tgt
|
||||
lines.append(" %x %-9s %s%s" % (i.address, mn, i.op_str, note))
|
||||
if show:
|
||||
print("\n".join(lines))
|
||||
return ins, calls, floats
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pe = PE()
|
||||
idx = Index()
|
||||
print("methods indexed:", len(idx.methods))
|
||||
print("text section:", [hex(x) for x in pe.text_range()])
|
||||
# sanity: the known DamagePhysical float setter
|
||||
m = idx.method_at(0x1849EC5B0)
|
||||
print("method at 0x1849EC5B0:", m["sig"] if m else None, hex(m["va"]) if m else "")
|
||||
Reference in New Issue
Block a user