summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDirk Engling <erdgeist@erdgeist.org>2013-07-18 21:31:26 +0200
committerDirk Engling <erdgeist@erdgeist.org>2013-07-18 21:31:26 +0200
commitaae5320226e9f0fcc764471337326ab690683df7 (patch)
tree7fba35f45b2e996260dcfc1194e1d92e1623d5f3
parentff41516f91aa809397c96101bc19ebe0289456b6 (diff)
Enable unrolling to speed up base case mul
-rw-r--r--powm.c61
1 files changed, 51 insertions, 10 deletions
diff --git a/powm.c b/powm.c
index b8599df..5c281a6 100644
--- a/powm.c
+++ b/powm.c
@@ -2,6 +2,7 @@
2#include <stdint.h> 2#include <stdint.h>
3#include <string.h> 3#include <string.h>
4#include <stdio.h> 4#include <stdio.h>
5#include <assert.h>
5 6
6/* 7/*
7This code implements base ^ exp mod p with base being a small integer, 8This code implements base ^ exp mod p with base being a small integer,
@@ -25,7 +26,6 @@ does not leak information about the number of bits set, but on average doubling
25time needed. Discuss ;) 26time needed. Discuss ;)
26*/ 27*/
27 28
28static int run_tests( );
29#if 0 29#if 0
30typedef uint32_t leg_t; 30typedef uint32_t leg_t;
31typedef uint64_t dleg_t; 31typedef uint64_t dleg_t;
@@ -37,9 +37,16 @@ typedef uint128_t dleg_t;
37 37
38//#define WITH_MEASURE_GMP 38//#define WITH_MEASURE_GMP
39//#define WITH_PREVENT_TIMING_ATTACKS 39//#define WITH_PREVENT_TIMING_ATTACKS
40//#define WITH_TESTS
41//#define WITHOUT_UNROLLING
40#define WITH_ROUNDS 128 42#define WITH_ROUNDS 128
43
44#ifdef WITH_TESTS
45static int run_tests( );
46#define KARATSUBA_THRESHOLD 4
47#else
41#define KARATSUBA_THRESHOLD 16 48#define KARATSUBA_THRESHOLD 16
42//#define WITH_TESTS 49#endif
43 50
44#ifdef WITH_MEASURE_GMP 51#ifdef WITH_MEASURE_GMP
45#include <gmp.h> 52#include <gmp.h>
@@ -261,18 +268,52 @@ static void mp_negate( leg_t * p, int legs )
261 result is guaranteed to be initialized with enough legs prepended to take the carry */ 268 result is guaranteed to be initialized with enough legs prepended to take the carry */
262static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) 269static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs )
263{ 270{
264 dleg_t acc = 0; 271 dleg_t acc8 = 0;
272#ifndef WITHOUT_UNROLLING
273 dleg_t acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0;
274 dleg_t acc5 = 0, acc6 = 0, acc7 = 0;
275
276 while( legs >= 8 )
277 {
278 acc1 = ( acc8 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
279 *(result++) = (leg_t)acc1;
280
281 acc2 = ( acc1 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
282 *(result++) = (leg_t)acc2;
283
284 acc3 = ( acc2 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
285 *(result++) = (leg_t)acc3;
286
287 acc4 = ( acc3 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
288 *(result++) = (leg_t)acc4;
289
290 acc5 = ( acc4 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
291 *(result++) = (leg_t)acc5;
292
293 acc6 = ( acc5 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
294 *(result++) = (leg_t)acc6;
295
296 acc7 = ( acc6 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
297 *(result++) = (leg_t)acc7;
298
299 acc8 = ( acc7 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
300 *(result++) = (leg_t)acc8;
301
302 legs -= 8;
303 }
304 acc8 >>= 8*sizeof(leg_t);
305#endif
265 while( legs-- ) 306 while( legs-- )
266 { 307 {
267 acc += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 308 acc8 += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac;
268 *(result++) = (leg_t)acc; 309 *(result++) = (leg_t)acc8;
269 acc >>= 8*sizeof(leg_t); 310 acc8 >>= 8*sizeof(leg_t);
270 } 311 }
271 while( acc ) 312 while( acc8 )
272 { 313 {
273 acc += (dleg_t)*result; 314 acc8 += (dleg_t)*result;
274 *(result++) = (leg_t)acc; 315 *(result++) = (leg_t)acc8;
275 acc >>= 8*sizeof(leg_t); 316 acc8 >>= 8*sizeof(leg_t);
276 } 317 }
277} 318}
278 319