crodump: added verbose dump, which prints all unreferenced datablocks. added --increment to scan for KOD shifts. support hex stdin data. added --struonly option.

author: itsme <itsme@xs4all.nl> 2021-07-06 22:50:59 +0200
committer: itsme <itsme@xs4all.nl> 2021-07-06 22:50:59 +0200
commit: ae5362ee71880cee2a986da15b47a5aa4c4368ce (patch)
tree: 259db509dcfb54d30d3e9f1f3c4f1e248d093996
parent: e9e4b7a5d53ca044f5efd325b9d25d2c72b1a2dc (diff)
2 files changed, 72 insertions, 11 deletions
diff --git a/crodump.py b/crodump.py
index a2a94a1..1ba7e5e 100644
--- a/crodump.py
+++ b/crodump.py
@@ -1,13 +1,23 @@
 import os.path
+import io
 import struct
 from binascii import b2a_hex
-from hexdump import hexdump, asasc, tohex
+from hexdump import hexdump, asasc, tohex, unhex
 from koddecoder import kodecode
 """
 python3 crodump.py crodump chechnya_proverki_ul_2012
 python3 crodump.py kodump -s 6   -o 0x4cc9 -e 0x5d95 chechnya_proverki_ul_2012/CroStru.dat
 """
+def enumunreferenced(ranges, filesize):
+    o = 0
+    for start, end, desc in sorted(ranges):
+        if start > o:
+            yield o, start-o
+        o = end
+    if o<filesize:
+        yield o, filesize-o
 class Datafile:
    def __init__(self, dat, tad):
        self.dat = dat
@@ -15,6 +25,9 @@ class Datafile:
        self.readtad()
+        self.dat.seek(0, io.SEEK_END)
+        self.datsize = self.dat.tell()
    def readtad(self):
        self.tad.seek(0)
        hdrdata = self.tad.read(2*4)
@@ -28,21 +41,37 @@ class Datafile:
    def dump(self, args, dokodecode=False, plainbytes=0):
        print("tadhdr: %08x %08x" % tuple(self.tadhdr))
+        ranges = []
        for i, (ofs, ln, chk) in enumerate(self.tadidx):
            if ln==0xFFFFFFFF:
                print("%5d: %08x %08x %08x" % (i, ofs, ln, chk))
                continue
            flags = ln>>24
            ln &= 0xFFFFFFF
            dat = self.readdata(ofs, ln)
            plain = b''
+            decrypted = ' '
            if dokodecode and not args.nokod:
-                plain = dat[:plainbytes]
+                pb = plainbytes if flags else 8
-                dat = kodecode(i+1, dat[plainbytes:])
+                plain = dat[:pb]
+                dat = kodecode(i+1, dat[pb:])
+                decrypted = '*' if flags else '+'
            if args.ascdump:
-                print("%5d: %08x-%08x: (%02x:%08x) %s %s" % (i, ofs, ofs+ln, flags, chk, tohex(plain), asasc(dat)))
+                print("%5d: %08x-%08x: (%02x:%08x) %s %s%s" % (i, ofs, ofs+ln, flags, chk, tohex(plain), decrypted, asasc(dat)))
            else:
-                print("%5d: %08x-%08x: (%02x:%08x) %s %s" % (i, ofs, ofs+ln, flags, chk, tohex(plain), tohex(dat)))
+                print("%5d: %08x-%08x: (%02x:%08x) %s %s%s" % (i, ofs, ofs+ln, flags, chk, tohex(plain), decrypted, tohex(dat)))
+            ranges.append((ofs, ofs+ln, "item #%d" % i))
+        if args.verbose:
+            # output parts not referenced in the .tad file.
+            for o, l in enumunreferenced(ranges, self.datsize):
+                dat = self.readdata(o, l)
+                if args.ascdump:
+                    print("%08x-%08x: %s" % (o, o+l, asasc(dat)))
+                else:
+                    print("%08x-%08x: %s" % (o, o+l, tohex(dat)))
 class Database:
    def __init__(self, dbdir):
@@ -62,6 +91,12 @@ class Database:
    def getname(self, name, ext):
        return os.path.join(self.dbdir, "Cro%s.%s" % (name, ext))
+def incdata(data, s):
+    """
+    add 's' to each byte.
+    This is useful for finding the correct shift from an incorrectly shifted chunk.
+    """
+    return b"".join(struct.pack("<B", (_+s)&0xFF) for _ in data)
 def decode_kod(args, data):
    """
@@ -75,6 +110,17 @@ def decode_kod(args, data):
        args.shift = int(args.shift, 0)
        enc = kodecode(args.shift, data)
        hexdump(args.offset, enc)
+    elif args.increment:
+        # explicitly specified shift.
+        for s in range(256):
+            enc = incdata(data, s)
+            if args.ascdump:
+                print("%02x: %s" % (s, asasc(enc)))
+            else:
+                print("%02x: %s" % (s, tohex(enc)))
    else:
        # output with all possible 'shift' values.
        for s in range(256):
@@ -106,6 +152,8 @@ def kod_hexdump(args):
        # no filename -> read from stdin.
        import sys
        data = sys.stdin.buffer.read()
+        if args.unhex:
+            data = unhex(data)
        decode_kod(args, data)
@@ -115,6 +163,8 @@ def cro_dump(args):
    if db.stru:
        print("stru")
        db.stru.dump(args, dokodecode=True)
+    if args.struonly:
+        return
    if db.index:
        print("index")
        db.index.dump(args)
@@ -132,20 +182,24 @@ def main():
    subparsers = parser.add_subparsers()
    parser.set_defaults(handler=None)
-    ko = subparsers.add_parser('kodump', help='KOD dumper')
+    ko = subparsers.add_parser('kodump', help='KOD/hex dumper')
    ko.add_argument('--offset', '-o', type=str, default="0")
    ko.add_argument('--length', '-l', type=str)
    ko.add_argument('--endofs', '-e', type=str)
-    ko.add_argument('--shift', '-s', type=str)
+    ko.add_argument('--unhex', '-x', action='store_true', help="assume the input contains hex data")
-    ko.add_argument('--ascdump', '-a', action='store_true')
+    ko.add_argument('--shift', '-s', type=str, help="KOD decode with the specified shift")
-    ko.add_argument('--nokod', '-n', action='store_true')
+    ko.add_argument('--increment', '-i', action='store_true', help="assume data is already KOD decoded, but with wrong shift -> dump alternatives.")
-    ko.add_argument('filename', type=str, nargs='?')
+    ko.add_argument('--ascdump', '-a', action='store_true', help="CP1251 asc dump of the data")
+    ko.add_argument('--nokod', '-n', action='store_true', help="don't KOD decode")
+    ko.add_argument('filename', type=str, nargs='?', help="dump either stdin, or the specified file")
    ko.set_defaults(handler=kod_hexdump)
    cro = subparsers.add_parser('crodump', help='CROdumper')
+    cro.add_argument('--verbose', '-v', action='store_true')
    cro.add_argument('--kodecode', '-k', action='store_true')
    cro.add_argument('--ascdump', '-a', action='store_true')
    cro.add_argument('--nokod', '-n', action='store_true')
+    cro.add_argument('--struonly', action='store_true')
    cro.add_argument('dbdir', type=str)
    cro.set_defaults(handler=cro_dump)
diff --git a/hexdump.py b/hexdump.py
index c119b16..aeb8c88 100644
--- a/hexdump.py
+++ b/hexdump.py
@@ -1,9 +1,16 @@
 import struct
-from binascii import b2a_hex
+from binascii import b2a_hex, a2b_hex
 """
 Simple hexdump, 16 bytes per line with offset.
 """
+def unhex(data):
+    if type(data)==bytes:
+        data = data.decode('ascii')
+    data = data.replace(' ', '')
+    data = data.strip()
+    return a2b_hex(data)
 def ashex(line):
    return " ".join("%02x" % _ for _ in line)
 def aschr(b):
author	itsme <itsme@xs4all.nl>	2021-07-06 22:50:59 +0200
committer	itsme <itsme@xs4all.nl>	2021-07-06 22:50:59 +0200
commit	ae5362ee71880cee2a986da15b47a5aa4c4368ce (patch)
tree	259db509dcfb54d30d3e9f1f3c4f1e248d093996
parent	e9e4b7a5d53ca044f5efd325b9d25d2c72b1a2dc (diff)