diff options
| author | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-18 21:31:26 +0200 |
|---|---|---|
| committer | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-18 21:31:26 +0200 |
| commit | aae5320226e9f0fcc764471337326ab690683df7 (patch) | |
| tree | 7fba35f45b2e996260dcfc1194e1d92e1623d5f3 | |
| parent | ff41516f91aa809397c96101bc19ebe0289456b6 (diff) | |
Enable unrolling to speed up base case mul
| -rw-r--r-- | powm.c | 61 |
1 files changed, 51 insertions, 10 deletions
| @@ -2,6 +2,7 @@ | |||
| 2 | #include <stdint.h> | 2 | #include <stdint.h> |
| 3 | #include <string.h> | 3 | #include <string.h> |
| 4 | #include <stdio.h> | 4 | #include <stdio.h> |
| 5 | #include <assert.h> | ||
| 5 | 6 | ||
| 6 | /* | 7 | /* |
| 7 | This code implements base ^ exp mod p with base being a small integer, | 8 | This code implements base ^ exp mod p with base being a small integer, |
| @@ -25,7 +26,6 @@ does not leak information about the number of bits set, but on average doubling | |||
| 25 | time needed. Discuss ;) | 26 | time needed. Discuss ;) |
| 26 | */ | 27 | */ |
| 27 | 28 | ||
| 28 | static int run_tests( ); | ||
| 29 | #if 0 | 29 | #if 0 |
| 30 | typedef uint32_t leg_t; | 30 | typedef uint32_t leg_t; |
| 31 | typedef uint64_t dleg_t; | 31 | typedef uint64_t dleg_t; |
| @@ -37,9 +37,16 @@ typedef uint128_t dleg_t; | |||
| 37 | 37 | ||
| 38 | //#define WITH_MEASURE_GMP | 38 | //#define WITH_MEASURE_GMP |
| 39 | //#define WITH_PREVENT_TIMING_ATTACKS | 39 | //#define WITH_PREVENT_TIMING_ATTACKS |
| 40 | //#define WITH_TESTS | ||
| 41 | //#define WITHOUT_UNROLLING | ||
| 40 | #define WITH_ROUNDS 128 | 42 | #define WITH_ROUNDS 128 |
| 43 | |||
| 44 | #ifdef WITH_TESTS | ||
| 45 | static int run_tests( ); | ||
| 46 | #define KARATSUBA_THRESHOLD 4 | ||
| 47 | #else | ||
| 41 | #define KARATSUBA_THRESHOLD 16 | 48 | #define KARATSUBA_THRESHOLD 16 |
| 42 | //#define WITH_TESTS | 49 | #endif |
| 43 | 50 | ||
| 44 | #ifdef WITH_MEASURE_GMP | 51 | #ifdef WITH_MEASURE_GMP |
| 45 | #include <gmp.h> | 52 | #include <gmp.h> |
| @@ -261,18 +268,52 @@ static void mp_negate( leg_t * p, int legs ) | |||
| 261 | result is guaranteed to be initialized with enough legs prepended to take the carry */ | 268 | result is guaranteed to be initialized with enough legs prepended to take the carry */ |
| 262 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) | 269 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) |
| 263 | { | 270 | { |
| 264 | dleg_t acc = 0; | 271 | dleg_t acc8 = 0; |
| 272 | #ifndef WITHOUT_UNROLLING | ||
| 273 | dleg_t acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0; | ||
| 274 | dleg_t acc5 = 0, acc6 = 0, acc7 = 0; | ||
| 275 | |||
| 276 | while( legs >= 8 ) | ||
| 277 | { | ||
| 278 | acc1 = ( acc8 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 279 | *(result++) = (leg_t)acc1; | ||
| 280 | |||
| 281 | acc2 = ( acc1 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 282 | *(result++) = (leg_t)acc2; | ||
| 283 | |||
| 284 | acc3 = ( acc2 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 285 | *(result++) = (leg_t)acc3; | ||
| 286 | |||
| 287 | acc4 = ( acc3 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 288 | *(result++) = (leg_t)acc4; | ||
| 289 | |||
| 290 | acc5 = ( acc4 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 291 | *(result++) = (leg_t)acc5; | ||
| 292 | |||
| 293 | acc6 = ( acc5 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 294 | *(result++) = (leg_t)acc6; | ||
| 295 | |||
| 296 | acc7 = ( acc6 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 297 | *(result++) = (leg_t)acc7; | ||
| 298 | |||
| 299 | acc8 = ( acc7 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
| 300 | *(result++) = (leg_t)acc8; | ||
| 301 | |||
| 302 | legs -= 8; | ||
| 303 | } | ||
| 304 | acc8 >>= 8*sizeof(leg_t); | ||
| 305 | #endif | ||
| 265 | while( legs-- ) | 306 | while( legs-- ) |
| 266 | { | 307 | { |
| 267 | acc += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | 308 | acc8 += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
| 268 | *(result++) = (leg_t)acc; | 309 | *(result++) = (leg_t)acc8; |
| 269 | acc >>= 8*sizeof(leg_t); | 310 | acc8 >>= 8*sizeof(leg_t); |
| 270 | } | 311 | } |
| 271 | while( acc ) | 312 | while( acc8 ) |
| 272 | { | 313 | { |
| 273 | acc += (dleg_t)*result; | 314 | acc8 += (dleg_t)*result; |
| 274 | *(result++) = (leg_t)acc; | 315 | *(result++) = (leg_t)acc8; |
| 275 | acc >>= 8*sizeof(leg_t); | 316 | acc8 >>= 8*sizeof(leg_t); |
| 276 | } | 317 | } |
| 277 | } | 318 | } |
| 278 | 319 | ||
