#include "mystdlib.h" #include #include #include #include #include enum { COLUMNS = 15 }; typedef struct { char *ptr; long rows; long outoff; long flag; } entry_t; typedef struct { char *ptr; size_t size; } outvec_t; const char *g_year_map[] = { "1992_Q2", "1995_Q0", "1996_Q0", "1996_Q1", "1997_Q1", "1997_Q3", "1998_Q1", "1998_Q3", "1999_Q1", "1999_Q3", "2000_Q1", "2000_Q3", "2001_Q1", "2001_Q2", "2001_Q3", "2001_Q4", "2002_Q1", "2002_Q3", "2003_Q1", "2003_Q3", "2004_Q1", "2004_Q3", "2005_Q1", "2005_Q3", "2006_Q1", "2006_Q3", "2007_Q1", "2007_Q3", "2008_Q1", "2008_Q3", "2009_Q1", "2009_Q3", "2010_Q1", "2010_Q3", "2011_Q1", "2011_Q3", "2012_Q1", "2012_Q3", "2013_Q1", "2013_Q3", "2014_Q1", "2014_Q3", "2015_Q1", "2015_Q3", "2016_Q1", "2016_Q3", "2017_Q1", "2017_Q3", "2018_Q1", "2018_Q3", "2019_Q1", 0 }; void SKIP_1_COLUMN(char **ptr) { *ptr = strchr(*ptr, 10) + 1; } void SKIP_2_COLUMNS(char **ptr) { SKIP_1_COLUMN(ptr); SKIP_1_COLUMN(ptr); } void SKIP_3_COLUMNS(char **ptr) { SKIP_1_COLUMN(ptr); SKIP_1_COLUMN(ptr); SKIP_1_COLUMN(ptr); } int year_to_offset(const char *year) { const char **y = g_year_map; int off = 0; while (*y) { if (!memcmp(year, *y, 7)) return off; ++off; ++y; } return -1; } int STRCMP_n (const char *p1, const char *p2) { const unsigned char *s1 = (const unsigned char *) p1; const unsigned char *s2 = (const unsigned char *) p2; unsigned char c1, c2; do { c1 = (unsigned char) *s1++; c2 = (unsigned char) *s2++; if (c1 == 10) return c1 - c2; } while (c1 == c2); return c1 - c2; } int compare_entries(entry_t*a, entry_t*b, int *prec) { char *pa = a->ptr, *pb = b->ptr; int col, row, res = 0, nprec = -1; /* Multi line entries never match single line entries */ if (a->rows != b->rows) return -1; /* Assume house number precision first .. unless */ if (!memcmp(pa,"2006_Q3",7)) *prec = 2; else *prec = 3; if (!memcmp(pb,"2006_Q3",7)) nprec = 2; else nprec = 3; /* Skip year and flags */ SKIP_2_COLUMNS(&pa); SKIP_2_COLUMNS(&pb); /* Test all columns for identity */ for (col=2; colrows; ++row) { /* Skip last row's coordinate columns, year and flags */ SKIP_3_COLUMNS(&pa); SKIP_3_COLUMNS(&pb); for (col=2; colptr; char * pb = (char*)e_b->ptr; int results[COLUMNS], c; if (e_a->rows != e_b->rows) return e_a->rows - e_b->rows; for (c = 0; csize; ++i) if (tbuch->addr[i] == 10) ++lines; sort_array = (entry_t*)malloc((lines / COLUMNS) * sizeof(entry_t)); out_array = (outvec_t*)malloc((lines / COLUMNS) * sizeof(outvec_t)); ptr = (char*)tbuch->addr; start = ptr; while (ptr < (char*)tbuch->addr + tbuch->size) { int c; start = ptr; /* Look for field terminator */ for (c=0; c= 0); sort_array[current].rows++; } else { sort_array[++current].ptr = start; sort_array[current].rows = 0; sort_array[current].outoff = outoff; sort_array[current].flag = flag; } out_array[outoff].size = ptr - out_array[outoff].ptr; outoff++; } /* Sort the whole thing */ qsort(sort_array, current, sizeof(entry_t), sort_me); for (i=0; i<=current; ++i) { int j, dump = 0, prec; int year = year_to_offset(sort_array[i].ptr); year_list |= 1LL << year; if (sort_array[i].flag & 0x80 ) bizflag_list |= 1LL << year; if (sort_array[i].flag & 0x40 ) revflag_list |= 1LL << year; /* The last entry always needs to be dumped, but check if its precision is better than the old truth's The second comparision checks for equality of entries (modulo coordinate mismatch) */ if (i == current) { compare_entries(sort_array+i, sort_array+i, &prec); dump = 1; } else if (compare_entries(sort_array+i, sort_array+i+1, &prec)) dump = 1; /* If this entry's precision is higher than the one of possible earlier matches, then the current entry becomes the truth */ if (prec >= truth_prec) { truth = i; truth_prec = prec; } if (dump) { printf("%" PRIu64 "\t%" PRIu64 "\t%" PRIu64 "\t", year_list, bizflag_list, revflag_list); for (int c=0; cptr, 10); size_t len = s - out->ptr; if (!len || out->ptr[0] == 9) skipped++; else { if (!started++) putchar('{'); else putchar(','); for (int x=0; xptr, len); else { char coords[64], *tab; // memcpy(coords, "POINT(", 6); // memcpy(coords + 6, out->ptr, len); // tab = memchr(coords + 6, 9, len); // if (tab) *tab = ' '; // coords[6+len] = ')'; // fwrite(coords, 7 + len, 1, stdout); memcpy(coords, out->ptr, len); tab = memchr(coords, 9, len); if (tab) *tab = ' '; fwrite(coords, len, 1, stdout); } skipped = 0; } out->ptr = s + 1; ++out; } if (started) putchar('}'); if (c