From ce15c0e9a6a3801774a7f76a3036cdf6abd18466 Mon Sep 17 00:00:00 2001 From: Dirk Engling Date: Mon, 15 Jul 2013 14:41:01 +0200 Subject: Stable version --- kara.c | 301 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 301 insertions(+) create mode 100644 kara.c (limited to 'kara.c') diff --git a/kara.c b/kara.c new file mode 100644 index 0000000..2898e63 --- /dev/null +++ b/kara.c @@ -0,0 +1,301 @@ +#include +#include +#include + +typedef uint8_t leg_t; +typedef uint16_t dleg_t; + +enum { g_legs = 256 }; + +#define dump_int(A,B,C) dump_int2(A,B,C) +static void dump_int2( uint8_t const * p, int l, int nl ) +{ + while( l-- ) + printf( "%02X", *(p++) ); + if( nl ) putchar( 10 ); +} + +//#define printf(...) +//#define putchar(A) + +static int mp_cmp( leg_t const *a, leg_t const *b, int legs ) +{ + int leg; + for( leg=0; leg b[leg] ) return 1; + } + return 0; +} + +static void add( leg_t *oper, leg_t const *oper2, int legs ) +{ + dleg_t acc = 0; + while( legs-- > 0 ) + { + acc += oper[legs] + oper2[legs]; + oper[legs] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } + while( acc ) + { + acc += oper[legs]; + oper[legs--] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } +} + +static void adm( leg_t *oper, leg_t const *oper2, int legs, int flegs ) +{ + dleg_t acc = 0; + while( legs-- > 0 ) + { + acc += oper[legs] + oper2[legs]; + oper[legs] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } + while( acc && flegs-- ) + { + acc += oper[legs]; + oper[legs--] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } +} + +static void sub( leg_t *oper, leg_t const *oper2, int legs ) +{ + int borrow = 0, borrow_temp; + while( legs-- ) + { + leg_t temp = oper[legs] - oper2[legs]; + borrow_temp = temp > oper[legs]; + oper[legs] = temp - borrow; + borrow = borrow_temp | ( oper[legs] > temp ); + } + while( borrow ) + { + leg_t temp = oper[legs] - borrow; + borrow = temp > oper[legs]; + oper[legs--] = temp; + } +printf( "sub: %d\n", legs ); +} + +/* result is guaranteed to be initialized with enough legs prepended to take the carry */ +static void mp_mul_uint_add( leg_t *result, leg_t const *oper, leg_t fac, int legs ) +{ + dleg_t acc = 0; + while( legs-- ) + { + acc += (dleg_t)result[legs] + (dleg_t)oper[legs] * (dleg_t)fac; + result[legs] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } + while( acc ) + { + acc += result[legs]; + result[legs--] = (leg_t)acc; + acc >>= 8*sizeof(leg_t); + } +} + +static void mul( leg_t * result, leg_t const *oper, leg_t const *oper2, int legs ) +{ + int leg = legs; + while( leg-- ) + mp_mul_uint_add( result + 1 + leg, oper, oper2[leg], legs ); +} + +static int sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int legs ) +{ + int borrow = 0, borrow_temp; + while( legs-- ) + { + leg_t temp = a[legs] - b[legs]; + borrow_temp = temp > a[legs]; + result[legs] = temp - borrow; + borrow = borrow_temp | (result[legs] > temp ); + } + return borrow; +} + +static void negate( leg_t * p, int legs ) +{ + while( legs && !p[legs-1] ) p[legs--]^=(leg_t)-1; + if( !legs ) return; + p[legs-1]--; + while( legs--) p[legs]^=(leg_t)-1; +} + +void kara2( leg_t* p, leg_t *a, leg_t *b, int len ) +{ +printf( "L%d karaloop: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); + memset( p, 0, 2 * len * sizeof( leg_t )); + if( len == 2 ) + { + mul( p, a, b, len ); + printf( "L%d p:", len ); dump_int( p, len*2, 1 ); + } + else + { + leg_t scratch[len]; + int sign = 0, n = len / 2; + + if( mp_cmp( a, a + n, n ) > 0 ) + sub_mod( scratch, a, a + n, n ); + else + { + sub_mod( scratch, a + n, a, n ); + sign = 1; + } + + if( mp_cmp( b, b + n, n ) > 0 ) + sub_mod( scratch + n, b, b + n, n ); + else + { + sub_mod( scratch + n, b + n, b, n ); + sign ^= 1; + } + +printf( "L%d (x0-x1)*(y1-y0)=", len ); dump_int( scratch, n, 0 ); putchar('*'); dump_int( scratch + n, n, 0 ); printf( "(sign = %d)\n", sign ); + + kara2( p + n, scratch, scratch + n, n ); + if( !sign ) + negate( p, len + n ); + +printf( "L%d psub:", len ); dump_int( p, len*2, 1 ); + + kara2( scratch, a + n, b + n, n ); + adm( p + len, scratch, len, len ); + adm( p + n, scratch, len, n ); + + kara2( scratch, a, b, n ); + adm( p + n, scratch, len, n ); + adm( p, scratch, len, 0 ); +printf( "L%d p:", len ); dump_int( p, len*2, 1 ); + } +} + +void kara( uint8_t* p, uint8_t *a, uint8_t *b, int len ) +{ + int i; +memset( p, 0, 2 * len * sizeof(leg_t ) ); + if( len <= 3 ) + { +printf( "L%d karaend: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); + mul( p, a, b, len ); +printf( "L%d result:\n", len ); dump_int( p, len*2, 1 ); + } + else { + int n = len / 2; + int l = ( len + 1 ) / 2; + int o = l - n; + int sh = l + 1; + uint8_t scratch[2*sh]; + uint8_t *midpoint = p + len * 2 - n; + +printf( "L%d karaloop: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); +printf( "Values n: %d l: %d o: %d sh: %d\n", n, l, o, sh ); + + memset( scratch, 0, sizeof(scratch)); + memcpy( scratch + 1 , a, l ); + memcpy( scratch + 1 + sh , b, l ); + + /* P3 = (a1+a2)*(b1+b2) */ + add( scratch + 1 + o, a + l, n ); + add( scratch + sh + 1 + o, b + l, n ); + + printf( "L%d a1+a2: \n", len ); dump_int( scratch, sh, 1 ); + printf( "L%d b1+b2: \n", len ); dump_int( scratch+sh, sh, 1 ); + + kara( midpoint - 2 * sh, scratch, scratch + sh, sh ); + + printf( "L%d (a1+a2)*(b1+b2)<