From accc195b894c45f1a9a837188d38bf9df1ab0fd5 Mon Sep 17 00:00:00 2001 From: itsme Date: Mon, 12 Jul 2021 23:00:24 +0200 Subject: now handling long compressed records. --- crodump.py | 55 +++++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 10 deletions(-) diff --git a/crodump.py b/crodump.py index 1b2540f..77f00e9 100644 --- a/crodump.py +++ b/crodump.py @@ -87,6 +87,9 @@ class Datafile: return self.dat.read(size) def readrec(self, idx): + """ + extract and decode a single record. + """ ofs, ln, chk = self.tadidx[idx-1] if ln==0xFFFFFFFF: # deleted record @@ -120,6 +123,9 @@ class Datafile: def dump(self, args): + """ + dump decodes all references data, and optionally will print out all unused bytes in the .dat file. + """ print("hdr: %-6s dat: %04x %s enc:%04x bs:%04x, tad: %08x %08x" % (self.name, self.hdrunk, self.version, self.encoding, self.blocksize, self.nrdeleted, self.firstdeleted)) ranges = [] # keep track of used bytes in the .dat file. for i, (ofs, ln, chk) in enumerate(self.tadidx): @@ -183,20 +189,31 @@ class Datafile: print("%08x-%08x: %s" % (o, o+l, toout(args, dat))) def iscompressed(self, data): + """ + Note that the compression header uses big-endian numbers. + """ if len(data)<11: return - size, flag = struct.unpack_from(">HH", data, 0) - if size+5 != len(data): - return - if flag!=0x800: - return if data[-3:] != b"\x00\x00\x02": return + o = 0 + while o < len(data)-3: + size, flag = struct.unpack_from(">HH", data, o) + if flag!=0x800 and flag!=0x008: + return + o += size + 2 return True def decompress(self, data): - C = zlib.decompressobj(-15) - return C.decompress(data[8:-3]) + result = b"" + o = 0 + while o < len(data)-3: + size, flag, crc = struct.unpack_from(">HHL", data, o) + C = zlib.decompressobj(-15) + result += C.decompress(data[o+8:o+8+size]) + o += size + 2 + return result + def dump_bank_definition(args, bankdict): """ @@ -222,9 +239,9 @@ def decode_field(data): unk4 = rd.readdword() # Always 0x00000009 or 0x0001000d remain = rd.readbytes() - print("Type: %d (%02d/%02d) %04x,(%d-%d),%04x - '%s' -- %s" % (typ, idx1, idx2, unk1, unk2, unk3, unk4, name, tohex(remain))) + print("Type: %2d (%2d/%2d) %04x,(%d-%4d),%04x - '%s' -- %s" % (typ, idx1, idx2, unk1, unk2, unk3, unk4, name, tohex(remain))) else: - print("Type: %d %2d %d,%d - '%s'" % (typ, idx1, unk1, unk2, name)) + print("Type: %2d %2d %d,%d - '%s'" % (typ, idx1, unk1, unk2, name)) def destruct_base_definition(args, data): @@ -239,7 +256,10 @@ def destruct_base_definition(args, data): unkname = rd.readname() unk7 = rd.readdword() nrfields = rd.readdword() + if args.verbose: + print("table: %s" % tohex(data[:rd.o])) print("%d,%d,%d,%d,%d %d,%d '%s' '%s'" % (*unk123, *unk45, unk7, nrfields, tablename, unkname)) + fields = [] for _ in range(nrfields): l = rd.readword() @@ -363,12 +383,20 @@ class Database: if not self.bank: print("No CroBank.dat found") return + if args.skipencrypted and self.bank.encoding==3: + print("Skipping encrypted CroBank") + return nerr = 0 xref = defaultdict(int) for i in range(args.maxrecs): try: data = self.bank.readrec(i) - if not args.stats: + if args.find1d: + if data and (data.find(b"\x1d")>0 or data.find(b"\x1b")>0): + print("%d -> %s" % (i, b2a_hex(data))) + break + + elif not args.stats: if data is None: print("%5d: " % i) else: @@ -393,6 +421,11 @@ class Database: for k, v in xref.items(): print("%5d * %s" % (v, k)) + def readrec(self, sysnum): + data = self.bank.readrec(sysnum) + tabnum, = struct.unpack_from(" Date: Mon, 12 Jul 2021 23:00:24 +0200 Subject: noting that there can be more than one compressed chunk. --- docs/cronos-research.md | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/docs/cronos-research.md b/docs/cronos-research.md index 1ee75c4..5d0a508 100644 --- a/docs/cronos-research.md +++ b/docs/cronos-research.md @@ -157,7 +157,9 @@ The toplevel table-id for CroStru and CroSys is #3, while referenced records hav CroBank.dat contains the actual database entries for multiple tables as described in the CroStru file. After each chunk is re-assembled (and potentially decoded with the per block offset being the record number in the .tad file). -Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e. There is an extra concept of sub fields in those columns, indicated by a 0x1d byte. +Its first byte defines, which table it belongs to. It is encoded in cp1251 (or possibly IBM866) with actual column data separated by 0x1e. +There is an extra concept of sub fields in those columns, indicated by a 0x1d byte. +Also files seem have have special fields, starting with a 0x1b byte. ## structure definitions @@ -246,10 +248,12 @@ Other unassigned values in the table entry definition are some records are compressed, the format is like this: - uint16 size - uint8 head[2] = { 8, 0 } - uint32 crc32 - uint8 compdata[size-4] + multiple-chunks { + uint16 size; // stored in bigendian format. + uint8 head[2] = { 8, 0 } + uint32 crc32 + uint8 compdata[size-6] + } uint8 tail[3] = { 0, 0, 2 } ## encrypted records -- cgit v1.2.3