scrape tooling: live capture triage + master-server WS decoder + PlayFab REST scraper

Built and unit-tested ahead of a live playtest window:
- reverse/capture_hosts.py: pcap -> DNS/SNI/endpoints in order; extracts PlayFab TitleId,
  flags hologryph master-server region + config CDN.
- reverse/ws_scrape.py: TCP reassembly + RFC-6455 framing for the cleartext ws://<region>.
  hologryph.com/gameclient/ stream; decodes JSON/BSON/MessagePack; auto-labels ServerDto,
  CompartmentDefinitionDto, ResearchNodeJsonDto, OperationResult, etc. No MITM needed.
- reverse/playfab_scrape.py: LoginWithSteam (or captured EntityToken) -> Catalog/SearchItems
  (+ Inventory/TitleData); prices resolved to item names. Read-only.
- docs/SCRAPE_RUNBOOK.md: turnkey steps for when servers are online.
This commit is contained in:
DownloadPizza
2026-06-12 10:06:48 +02:00
parent 5946e0910b
commit 3df0797acc
4 changed files with 653 additions and 0 deletions

132
reverse/capture_hosts.py Normal file
View File

@@ -0,0 +1,132 @@
#!/usr/bin/env python3
"""Quick triage of a SAND network capture: every host the client touched, in order,
with the bits we care about highlighted.
Pulls:
- DNS queries (order = startup sequence; flush DNS first for a clean list)
- TLS SNI (HTTPS hostnames even when DNS was cached)
- TCP/UDP endpoints
and flags the three backends we care about:
- <titleId>.playfabapi.com -> prints the **PlayFab TitleId** (the one constant the
REST scraper needs)
- <region>.hologryph.com -> the master-server region (ws://, port 80, cleartext)
- sandconfigstorage.blob... -> the anonymous config CDN
Usage: venv/bin/python reverse/capture_hosts.py <capture.pcapng>
"""
import sys, re
from scapy.all import rdpcap, DNS, DNSQR, DNSRR, IP, IPv6, TCP, UDP, Raw
def tls_sni(b):
"""Extract SNI from a TLS ClientHello payload (bytes). None if not a ClientHello."""
try:
if len(b) < 6 or b[0] != 0x16 or b[5] != 0x01:
return None
i = 5 + 4 + 2 + 32 # rec hdr + hs hdr + version + random
i += 1 + b[i] # session id
i += 2 + int.from_bytes(b[i:i + 2], "big") # cipher suites
i += 1 + b[i] # compression methods
end = i + 2 + int.from_bytes(b[i:i + 2], "big")
i += 2
while i + 4 <= end:
et = int.from_bytes(b[i:i + 2], "big")
el = int.from_bytes(b[i + 2:i + 4], "big")
i += 4
if et == 0: # server_name
j = i + 2
nl = int.from_bytes(b[j + 1:j + 3], "big")
return b[j + 3:j + 3 + nl].decode(errors="replace")
i += el
except Exception:
return None
return None
PLAYFAB = re.compile(r"^([0-9A-Fa-f]{4,7})\.playfabapi\.com$")
HOLOGRYPH = re.compile(r"^([a-z0-9-]+)\.hologryph\.com$", re.I)
def main():
if len(sys.argv) < 2:
sys.exit("usage: capture_hosts.py <pcap>")
pk = rdpcap(sys.argv[1])
t0 = float(pk[0].time)
dns_order, dns_seen = [], set()
ip2host, snis = {}, {}
tcp_first, udp_first = {}, {}
for p in pk:
if p.haslayer(DNS):
d = p[DNS]
try:
qn = p[DNSQR].qname.decode(errors="replace").rstrip(".")
except Exception:
qn = None
if qn and d.qr == 0 and not qn.endswith(".local") and qn != "wpad.localdomain":
if qn not in dns_seen:
dns_seen.add(qn)
dns_order.append((float(p.time) - t0, qn))
if qn and d.qr == 1 and d.ancount:
for k in range(d.ancount):
rr = d.an[k]
if rr.type in (1, 28):
try:
ip2host[str(rr.rdata)] = qn
except Exception:
pass
ipl = p[IP] if p.haslayer(IP) else (p[IPv6] if p.haslayer(IPv6) else None)
if ipl is None:
continue
if p.haslayer(TCP):
t = p[TCP]
if t.flags & 0x02 and not t.flags & 0x10:
tcp_first.setdefault((ipl.dst, t.dport), float(p.time) - t0)
if p.haslayer(Raw) and t.dport == 443:
s = tls_sni(bytes(p[Raw].load))
if s:
snis[ipl.dst] = s
elif p.haslayer(UDP):
u = p[UDP]
if u.dport not in (53, 5353, 1900, 5355, 137) and u.sport not in (53, 5353):
key = (ipl.dst, u.dport) if u.dport < u.sport else (ipl.src, u.sport)
udp_first.setdefault(key, float(p.time) - t0)
def label(ip):
return snis.get(ip) or ip2host.get(ip, "")
print("=== DNS queries (in order) ===")
for ts, q in dns_order:
print(" +%7.2fs %s" % (ts, q))
print("\n=== TCP destinations (first SYN) ===")
for (ip, port), ts in sorted(tcp_first.items(), key=lambda x: x[1]):
print(" +%7.2fs %-17s :%-5s %s" % (ts, ip, port, label(ip)))
print("\n=== UDP destinations ===")
for (ip, port), ts in sorted(udp_first.items(), key=lambda x: x[1])[:20]:
print(" +%7.2fs %-17s :%-5s %s" % (ts, ip, port, label(ip)))
# ---- the three backends we care about ----
print("\n=== BACKENDS DETECTED ===")
allhosts = set(q for _, q in dns_order) | set(snis.values()) | set(ip2host.values())
found = False
for h in sorted(allhosts):
m = PLAYFAB.match(h)
if m:
print(" PlayFab host=%s ** TitleId = %s **" % (h, m.group(1).upper()))
found = True
elif HOLOGRYPH.match(h) and "gameclient" not in h:
print(" Master server host=%s (region=%s, ws://80 cleartext)"
% (h, HOLOGRYPH.match(h).group(1)))
found = True
elif "sandconfigstorage" in h:
print(" Config CDN host=%s (anonymous HTTPS)" % h)
found = True
if not found:
print(" (none of PlayFab / hologryph / config-blob seen — backend not contacted)")
if __name__ == "__main__":
main()