diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-18 21:31:26 +0200 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-18 21:31:26 +0200 |
commit | aae5320226e9f0fcc764471337326ab690683df7 (patch) | |
tree | 7fba35f45b2e996260dcfc1194e1d92e1623d5f3 | |
parent | ff41516f91aa809397c96101bc19ebe0289456b6 (diff) |
Enable unrolling to speed up base case mul
-rw-r--r-- | powm.c | 61 |
1 files changed, 51 insertions, 10 deletions
@@ -2,6 +2,7 @@ | |||
2 | #include <stdint.h> | 2 | #include <stdint.h> |
3 | #include <string.h> | 3 | #include <string.h> |
4 | #include <stdio.h> | 4 | #include <stdio.h> |
5 | #include <assert.h> | ||
5 | 6 | ||
6 | /* | 7 | /* |
7 | This code implements base ^ exp mod p with base being a small integer, | 8 | This code implements base ^ exp mod p with base being a small integer, |
@@ -25,7 +26,6 @@ does not leak information about the number of bits set, but on average doubling | |||
25 | time needed. Discuss ;) | 26 | time needed. Discuss ;) |
26 | */ | 27 | */ |
27 | 28 | ||
28 | static int run_tests( ); | ||
29 | #if 0 | 29 | #if 0 |
30 | typedef uint32_t leg_t; | 30 | typedef uint32_t leg_t; |
31 | typedef uint64_t dleg_t; | 31 | typedef uint64_t dleg_t; |
@@ -37,9 +37,16 @@ typedef uint128_t dleg_t; | |||
37 | 37 | ||
38 | //#define WITH_MEASURE_GMP | 38 | //#define WITH_MEASURE_GMP |
39 | //#define WITH_PREVENT_TIMING_ATTACKS | 39 | //#define WITH_PREVENT_TIMING_ATTACKS |
40 | //#define WITH_TESTS | ||
41 | //#define WITHOUT_UNROLLING | ||
40 | #define WITH_ROUNDS 128 | 42 | #define WITH_ROUNDS 128 |
43 | |||
44 | #ifdef WITH_TESTS | ||
45 | static int run_tests( ); | ||
46 | #define KARATSUBA_THRESHOLD 4 | ||
47 | #else | ||
41 | #define KARATSUBA_THRESHOLD 16 | 48 | #define KARATSUBA_THRESHOLD 16 |
42 | //#define WITH_TESTS | 49 | #endif |
43 | 50 | ||
44 | #ifdef WITH_MEASURE_GMP | 51 | #ifdef WITH_MEASURE_GMP |
45 | #include <gmp.h> | 52 | #include <gmp.h> |
@@ -261,18 +268,52 @@ static void mp_negate( leg_t * p, int legs ) | |||
261 | result is guaranteed to be initialized with enough legs prepended to take the carry */ | 268 | result is guaranteed to be initialized with enough legs prepended to take the carry */ |
262 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) | 269 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) |
263 | { | 270 | { |
264 | dleg_t acc = 0; | 271 | dleg_t acc8 = 0; |
272 | #ifndef WITHOUT_UNROLLING | ||
273 | dleg_t acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0; | ||
274 | dleg_t acc5 = 0, acc6 = 0, acc7 = 0; | ||
275 | |||
276 | while( legs >= 8 ) | ||
277 | { | ||
278 | acc1 = ( acc8 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
279 | *(result++) = (leg_t)acc1; | ||
280 | |||
281 | acc2 = ( acc1 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
282 | *(result++) = (leg_t)acc2; | ||
283 | |||
284 | acc3 = ( acc2 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
285 | *(result++) = (leg_t)acc3; | ||
286 | |||
287 | acc4 = ( acc3 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
288 | *(result++) = (leg_t)acc4; | ||
289 | |||
290 | acc5 = ( acc4 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
291 | *(result++) = (leg_t)acc5; | ||
292 | |||
293 | acc6 = ( acc5 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
294 | *(result++) = (leg_t)acc6; | ||
295 | |||
296 | acc7 = ( acc6 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
297 | *(result++) = (leg_t)acc7; | ||
298 | |||
299 | acc8 = ( acc7 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | ||
300 | *(result++) = (leg_t)acc8; | ||
301 | |||
302 | legs -= 8; | ||
303 | } | ||
304 | acc8 >>= 8*sizeof(leg_t); | ||
305 | #endif | ||
265 | while( legs-- ) | 306 | while( legs-- ) |
266 | { | 307 | { |
267 | acc += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; | 308 | acc8 += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
268 | *(result++) = (leg_t)acc; | 309 | *(result++) = (leg_t)acc8; |
269 | acc >>= 8*sizeof(leg_t); | 310 | acc8 >>= 8*sizeof(leg_t); |
270 | } | 311 | } |
271 | while( acc ) | 312 | while( acc8 ) |
272 | { | 313 | { |
273 | acc += (dleg_t)*result; | 314 | acc8 += (dleg_t)*result; |
274 | *(result++) = (leg_t)acc; | 315 | *(result++) = (leg_t)acc8; |
275 | acc >>= 8*sizeof(leg_t); | 316 | acc8 >>= 8*sizeof(leg_t); |
276 | } | 317 | } |
277 | } | 318 | } |
278 | 319 | ||