import os.path import io import struct import re from binascii import b2a_hex from hexdump import hexdump, asasc, tohex, unhex, strescape from koddecoder import kodecode from readers import ByteReader import zlib from collections import defaultdict """ python3 crodump.py crodump chechnya_proverki_ul_2012 python3 crodump.py kodump -s 6 -o 0x4cc9 -e 0x5d95 chechnya_proverki_ul_2012/CroStru.dat """ def toout(args, data): """ return either ascdump or hexdump """ if args.ascdump: return asasc(data) else: return tohex(data) def enumunreferenced(ranges, filesize): """ from a list of used ranges and the filesize, enumerate the list of unused ranges """ o = 0 for start, end, desc in sorted(ranges): if start > o: yield o, start-o o = end if o Bank # 0400 -> Index or Sys # 0200 -> Stru or Sys # encoding # 0000 # 0001 --> 'KOD encoded' # 0002 # 0003 --> encrypted def readtad(self): self.tad.seek(0) hdrdata = self.tad.read(2*4) self.nrdeleted, self.firstdeleted = struct.unpack("<2L", hdrdata) indexdata = self.tad.read() if self.use64bit: # 01.03 has 64 bit file offsets self.tadidx = [ struct.unpack_from(">24 ln &= 0xFFFFFFF dat = self.readdata(ofs, ln) if not dat: # empty record encdat = dat elif not flags: extofs, extlen = struct.unpack(">24 ln &= 0xFFFFFFF dat = self.readdata(ofs, ln) ranges.append((ofs, ofs+ln, "item #%d" % i)) decflags = [' ', ' '] infostr = "" tail = b'' if not dat: # empty record encdat = dat elif not flags: if self.use64bit: extofs, extlen = struct.unpack("HH", data, 0) if size+5 != len(data): return if flag!=0x800: return if data[-3:] != b"\x00\x00\x02": return return True def decompress(self, data): C = zlib.decompressobj(-15) return C.decompress(data[8:-3]) def dump_bank_definition(args, bankdict): """ decode the 'bank' / database definition """ for k, v in bankdict.items(): if re.search(b'[^\x0d\x0a\x09\x20-\x7e\xc0-\xff]', v): print("%-20s - %s" % (k, toout(args, v))) else: print("%-20s - \"%s\"" % (k, strescape(v))) def decode_field(data): rd = ByteReader(data) typ = rd.readword() idx1 = rd.readdword() name = rd.readname() unk1 = rd.readdword() unk2 = rd.readbyte() # Always 1 if typ: idx2 = rd.readdword() unk3 = rd.readdword() # max value or length unk4 = rd.readdword() # Always 0x00000009 or 0x0001000d remain = rd.readbytes() print("Type: %d (%02d/%02d) %04x,(%d-%d),%04x - '%s' -- %s" % (typ, idx1, idx2, unk1, unk2, unk3, unk4, name, tohex(remain))) else: print("Type: %d %2d %d,%d - '%s'" % (typ, idx1, unk1, unk2, name)) def destruct_base_definition(args, data): """ decode the 'base' / table definition """ rd = ByteReader(data) unk123 = [rd.readword() for _ in range(3)] unk45 = [rd.readdword() for _ in range(2)] tablename = rd.readname() unkname = rd.readname() unk7 = rd.readdword() nrfields = rd.readdword() print("%d,%d,%d,%d,%d %d,%d '%s' '%s'" % (*unk123, *unk45, unk7, nrfields, tablename, unkname)) fields = [] for _ in range(nrfields): l = rd.readword() fielddef = rd.readbytes(l) if args.verbose: print("field: @%04x: %04x - %s" % (rd.o, l, tohex(fielddef))) fields.append(decode_field(fielddef)) remaining = rd.readbytes() print("rem: %s" % tohex(remaining)) def destruct_sys3_def(rd): pass def destruct_sys4_def(rd): n = rd.readdword() for _ in range(n): marker = rd.readdword() description = rd.readlongstring() path = rd.readlongstring() marker2 = rd.readdword() print("%08x;%08x: %-50s : %s" % (marker, marker2, path, description)) def destruct_sys_definition(args, data): """ decode the 'sys' / dbindex definition """ rd = ByteReader(data) systype = rd.readbyte() if systype == 3: return destruct_sys3_def(rd) elif systype == 4: return destruct_sys4_def(rd) else: raise Exception("unsupported sys record") class Database: """ represent the entire database, consisting of stru, index and bank files """ def __init__(self, dbdir): self.dbdir = dbdir self.stru = self.getfile("Stru") self.index = self.getfile("Index") self.bank = self.getfile("Bank") self.sys = self.getfile("Sys") # BankTemp, Int def getfile(self, name): try: datname = self.getname(name, "dat") tadname = self.getname(name, "tad") if datname and tadname: return Datafile(name, open(datname, "rb"), open(tadname, "rb")) except IOError: return def getname(self, name, ext): """ get a case-insensitive filename match for 'name.ext'. Returns None when no matching file was not found. """ basename = "Cro%s.%s" % (name, ext) for fn in os.scandir(self.dbdir): if basename.lower() == fn.name.lower(): return os.path.join(self.dbdir, fn.name) def dump(self, args): if self.stru: self.stru.dump(args) if self.index: self.index.dump(args) if self.bank: self.bank.dump(args) if self.sys: self.sys.dump(args) def strudump(self, args): if not self.stru: print("missing CroStru file") return self.dumptabledefs(args) def decode_bank_definition(self, data): """ decode the 'bank' / database definition """ rd = ByteReader(data) d = dict() while not rd.eof(): keyname = rd.readname() if keyname in d: print("WARN: duplicate key: %s" % keyname) index_or_length = rd.readdword() if index_or_length >> 31: d[keyname] = rd.readbytes(index_or_length & 0x7FFFFFFF) else: refdata = self.stru.readrec(index_or_length) if refdata[:1] != b"\x04": print("WARN: expected refdata to start with 0x04") d[keyname] = refdata[1:] return d def dumptabledefs(self, args): dbinfo = self.stru.readrec(1) if dbinfo[:1] != b"\x03": print("WARN: expected dbinfo to start with 0x03") dbdef = self.decode_bank_definition(dbinfo[1:]) dump_bank_definition(args, dbdef) for k, v in dbdef.items(): if k.startswith("Base") and k[4:].isnumeric(): print("== %s ==" % k) tbdef = destruct_base_definition(args, v) def bankdump(self, args): if not self.bank: print("No CroBank.dat found") return nerr = 0 xref = defaultdict(int) for i in range(args.maxrecs): try: data = self.bank.readrec(i) if not args.stats: if data is None: print("%5d: " % i) else: print("%5d: %s" % (i, toout(args, data))) else: if data is None: xref["None"] += 1 elif not len(data): xref["Empty"] += 1 else: xref["%02x" % data[0]] += 1 nerr = 0 except IndexError: break except Exception as e: print("%5d: <%s>" % (i, e)) nerr += 1 if nerr > 5: break if args.stats: print("-- stats --") for k, v in xref.items(): print("%5d * %s" % (v, k)) def incdata(data, s): """ add 's' to each byte. This is useful for finding the correct shift from an incorrectly shifted chunk. """ return b"".join(struct.pack(" read from stdin. import sys data = sys.stdin.buffer.read() if args.unhex: data = unhex(data) decode_kod(args, data) def cro_dump(args): """ handle 'crodump' subcommand """ db = Database(args.dbdir) db.dump(args) def stru_dump(args): """ handle 'strudump' subcommand """ db = Database(args.dbdir) db.strudump(args) def sys_dump(args): """ hexdump all CroSys records """ db = Database(args.dbdir) if db.sys: db.sys.dump(args) def bank_dump(args): """ hexdump all records """ if args.maxrecs: args.maxrecs = int(args.maxrecs, 0) else: # an arbitrarily large number. args.maxrecs = 0xFFFFFFFF db = Database(args.dbdir) db.bankdump(args) def destruct(args): """ decode the index#1 structure information record Takes hex input from stdin. """ import sys data = sys.stdin.buffer.read() data = unhex(data) if args.type==1: destruct_bank_definition(args, data) elif args.type==2: destruct_base_definition(args, data) elif args.type==3: destruct_sys_definition(args, data) def main(): import argparse parser = argparse.ArgumentParser(description='CRO hexdumper') subparsers = parser.add_subparsers() parser.set_defaults(handler=None) ko = subparsers.add_parser('kodump', help='KOD/hex dumper') ko.add_argument('--offset', '-o', type=str, default="0") ko.add_argument('--length', '-l', type=str) ko.add_argument('--width', '-w', type=str) ko.add_argument('--endofs', '-e', type=str) ko.add_argument('--unhex', '-x', action='store_true', help="assume the input contains hex data") ko.add_argument('--shift', '-s', type=str, help="KOD decode with the specified shift") ko.add_argument('--increment', '-i', action='store_true', help="assume data is already KOD decoded, but with wrong shift -> dump alternatives.") ko.add_argument('--ascdump', '-a', action='store_true', help="CP1251 asc dump of the data") ko.add_argument('--nokod', '-n', action='store_true', help="don't KOD decode") ko.add_argument('filename', type=str, nargs='?', help="dump either stdin, or the specified file") ko.set_defaults(handler=kod_hexdump) p = subparsers.add_parser('crodump', help='CROdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--kodecode', '-k', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--nokod', '-n', action='store_true') p.add_argument('--nodecompress', action='store_false', dest='decompress', default='true') p.add_argument('dbdir', type=str) p.set_defaults(handler=cro_dump) p = subparsers.add_parser('sysdump', help='SYSdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--nodecompress', action='store_false', dest='decompress', default='true') p.add_argument('dbdir', type=str) p.set_defaults(handler=sys_dump) p = subparsers.add_parser('bankdump', help='BANKdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--maxrecs', '-n', type=str, help="max nr or recots to output") p.add_argument('--stats', action='store_true', help='calc table stats from the first byte of each record') p.add_argument('dbdir', type=str) p.set_defaults(handler=bank_dump) p = subparsers.add_parser('strudump', help='STRUdumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('dbdir', type=str) p.set_defaults(handler=stru_dump) p = subparsers.add_parser('destruct', help='Stru dumper') p.add_argument('--verbose', '-v', action='store_true') p.add_argument('--ascdump', '-a', action='store_true') p.add_argument('--type', '-t', type=int, help='what type of record to destruct') p.set_defaults(handler=destruct) args = parser.parse_args() if args.handler: args.handler(args) if __name__=='__main__': main()