diff options
author | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-15 14:41:01 +0200 |
---|---|---|
committer | Dirk Engling <erdgeist@erdgeist.org> | 2013-07-15 14:41:01 +0200 |
commit | ce15c0e9a6a3801774a7f76a3036cdf6abd18466 (patch) | |
tree | 46893c2804cbf85d7c7a6a1bf43107f83c694bf3 |
Stable version
-rw-r--r-- | kara.c | 301 | ||||
-rw-r--r-- | powm.c | 516 |
2 files changed, 817 insertions, 0 deletions
@@ -0,0 +1,301 @@ | |||
1 | #include <stdio.h> | ||
2 | #include <stdint.h> | ||
3 | #include <string.h> | ||
4 | |||
5 | typedef uint8_t leg_t; | ||
6 | typedef uint16_t dleg_t; | ||
7 | |||
8 | enum { g_legs = 256 }; | ||
9 | |||
10 | #define dump_int(A,B,C) dump_int2(A,B,C) | ||
11 | static void dump_int2( uint8_t const * p, int l, int nl ) | ||
12 | { | ||
13 | while( l-- ) | ||
14 | printf( "%02X", *(p++) ); | ||
15 | if( nl ) putchar( 10 ); | ||
16 | } | ||
17 | |||
18 | //#define printf(...) | ||
19 | //#define putchar(A) | ||
20 | |||
21 | static int mp_cmp( leg_t const *a, leg_t const *b, int legs ) | ||
22 | { | ||
23 | int leg; | ||
24 | for( leg=0; leg<legs; ++leg ) | ||
25 | { | ||
26 | if( a[leg] < b[leg] ) return -1; | ||
27 | if( a[leg] > b[leg] ) return 1; | ||
28 | } | ||
29 | return 0; | ||
30 | } | ||
31 | |||
32 | static void add( leg_t *oper, leg_t const *oper2, int legs ) | ||
33 | { | ||
34 | dleg_t acc = 0; | ||
35 | while( legs-- > 0 ) | ||
36 | { | ||
37 | acc += oper[legs] + oper2[legs]; | ||
38 | oper[legs] = (leg_t)acc; | ||
39 | acc >>= 8*sizeof(leg_t); | ||
40 | } | ||
41 | while( acc ) | ||
42 | { | ||
43 | acc += oper[legs]; | ||
44 | oper[legs--] = (leg_t)acc; | ||
45 | acc >>= 8*sizeof(leg_t); | ||
46 | } | ||
47 | } | ||
48 | |||
49 | static void adm( leg_t *oper, leg_t const *oper2, int legs, int flegs ) | ||
50 | { | ||
51 | dleg_t acc = 0; | ||
52 | while( legs-- > 0 ) | ||
53 | { | ||
54 | acc += oper[legs] + oper2[legs]; | ||
55 | oper[legs] = (leg_t)acc; | ||
56 | acc >>= 8*sizeof(leg_t); | ||
57 | } | ||
58 | while( acc && flegs-- ) | ||
59 | { | ||
60 | acc += oper[legs]; | ||
61 | oper[legs--] = (leg_t)acc; | ||
62 | acc >>= 8*sizeof(leg_t); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | static void sub( leg_t *oper, leg_t const *oper2, int legs ) | ||
67 | { | ||
68 | int borrow = 0, borrow_temp; | ||
69 | while( legs-- ) | ||
70 | { | ||
71 | leg_t temp = oper[legs] - oper2[legs]; | ||
72 | borrow_temp = temp > oper[legs]; | ||
73 | oper[legs] = temp - borrow; | ||
74 | borrow = borrow_temp | ( oper[legs] > temp ); | ||
75 | } | ||
76 | while( borrow ) | ||
77 | { | ||
78 | leg_t temp = oper[legs] - borrow; | ||
79 | borrow = temp > oper[legs]; | ||
80 | oper[legs--] = temp; | ||
81 | } | ||
82 | printf( "sub: %d\n", legs ); | ||
83 | } | ||
84 | |||
85 | /* result is guaranteed to be initialized with enough legs prepended to take the carry */ | ||
86 | static void mp_mul_uint_add( leg_t *result, leg_t const *oper, leg_t fac, int legs ) | ||
87 | { | ||
88 | dleg_t acc = 0; | ||
89 | while( legs-- ) | ||
90 | { | ||
91 | acc += (dleg_t)result[legs] + (dleg_t)oper[legs] * (dleg_t)fac; | ||
92 | result[legs] = (leg_t)acc; | ||
93 | acc >>= 8*sizeof(leg_t); | ||
94 | } | ||
95 | while( acc ) | ||
96 | { | ||
97 | acc += result[legs]; | ||
98 | result[legs--] = (leg_t)acc; | ||
99 | acc >>= 8*sizeof(leg_t); | ||
100 | } | ||
101 | } | ||
102 | |||
103 | static void mul( leg_t * result, leg_t const *oper, leg_t const *oper2, int legs ) | ||
104 | { | ||
105 | int leg = legs; | ||
106 | while( leg-- ) | ||
107 | mp_mul_uint_add( result + 1 + leg, oper, oper2[leg], legs ); | ||
108 | } | ||
109 | |||
110 | static int sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int legs ) | ||
111 | { | ||
112 | int borrow = 0, borrow_temp; | ||
113 | while( legs-- ) | ||
114 | { | ||
115 | leg_t temp = a[legs] - b[legs]; | ||
116 | borrow_temp = temp > a[legs]; | ||
117 | result[legs] = temp - borrow; | ||
118 | borrow = borrow_temp | (result[legs] > temp ); | ||
119 | } | ||
120 | return borrow; | ||
121 | } | ||
122 | |||
123 | static void negate( leg_t * p, int legs ) | ||
124 | { | ||
125 | while( legs && !p[legs-1] ) p[legs--]^=(leg_t)-1; | ||
126 | if( !legs ) return; | ||
127 | p[legs-1]--; | ||
128 | while( legs--) p[legs]^=(leg_t)-1; | ||
129 | } | ||
130 | |||
131 | void kara2( leg_t* p, leg_t *a, leg_t *b, int len ) | ||
132 | { | ||
133 | printf( "L%d karaloop: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); | ||
134 | memset( p, 0, 2 * len * sizeof( leg_t )); | ||
135 | if( len == 2 ) | ||
136 | { | ||
137 | mul( p, a, b, len ); | ||
138 | printf( "L%d p:", len ); dump_int( p, len*2, 1 ); | ||
139 | } | ||
140 | else | ||
141 | { | ||
142 | leg_t scratch[len]; | ||
143 | int sign = 0, n = len / 2; | ||
144 | |||
145 | if( mp_cmp( a, a + n, n ) > 0 ) | ||
146 | sub_mod( scratch, a, a + n, n ); | ||
147 | else | ||
148 | { | ||
149 | sub_mod( scratch, a + n, a, n ); | ||
150 | sign = 1; | ||
151 | } | ||
152 | |||
153 | if( mp_cmp( b, b + n, n ) > 0 ) | ||
154 | sub_mod( scratch + n, b, b + n, n ); | ||
155 | else | ||
156 | { | ||
157 | sub_mod( scratch + n, b + n, b, n ); | ||
158 | sign ^= 1; | ||
159 | } | ||
160 | |||
161 | printf( "L%d (x0-x1)*(y1-y0)=", len ); dump_int( scratch, n, 0 ); putchar('*'); dump_int( scratch + n, n, 0 ); printf( "(sign = %d)\n", sign ); | ||
162 | |||
163 | kara2( p + n, scratch, scratch + n, n ); | ||
164 | if( !sign ) | ||
165 | negate( p, len + n ); | ||
166 | |||
167 | printf( "L%d psub:", len ); dump_int( p, len*2, 1 ); | ||
168 | |||
169 | kara2( scratch, a + n, b + n, n ); | ||
170 | adm( p + len, scratch, len, len ); | ||
171 | adm( p + n, scratch, len, n ); | ||
172 | |||
173 | kara2( scratch, a, b, n ); | ||
174 | adm( p + n, scratch, len, n ); | ||
175 | adm( p, scratch, len, 0 ); | ||
176 | printf( "L%d p:", len ); dump_int( p, len*2, 1 ); | ||
177 | } | ||
178 | } | ||
179 | |||
180 | void kara( uint8_t* p, uint8_t *a, uint8_t *b, int len ) | ||
181 | { | ||
182 | int i; | ||
183 | memset( p, 0, 2 * len * sizeof(leg_t ) ); | ||
184 | if( len <= 3 ) | ||
185 | { | ||
186 | printf( "L%d karaend: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); | ||
187 | mul( p, a, b, len ); | ||
188 | printf( "L%d result:\n", len ); dump_int( p, len*2, 1 ); | ||
189 | } | ||
190 | else { | ||
191 | int n = len / 2; | ||
192 | int l = ( len + 1 ) / 2; | ||
193 | int o = l - n; | ||
194 | int sh = l + 1; | ||
195 | uint8_t scratch[2*sh]; | ||
196 | uint8_t *midpoint = p + len * 2 - n; | ||
197 | |||
198 | printf( "L%d karaloop: a*b ", len ); dump_int( a, len, 0 ); putchar('*'); dump_int( b, len, 1 ); | ||
199 | printf( "Values n: %d l: %d o: %d sh: %d\n", n, l, o, sh ); | ||
200 | |||
201 | memset( scratch, 0, sizeof(scratch)); | ||
202 | memcpy( scratch + 1 , a, l ); | ||
203 | memcpy( scratch + 1 + sh , b, l ); | ||
204 | |||
205 | /* P3 = (a1+a2)*(b1+b2) */ | ||
206 | add( scratch + 1 + o, a + l, n ); | ||
207 | add( scratch + sh + 1 + o, b + l, n ); | ||
208 | |||
209 | printf( "L%d a1+a2: \n", len ); dump_int( scratch, sh, 1 ); | ||
210 | printf( "L%d b1+b2: \n", len ); dump_int( scratch+sh, sh, 1 ); | ||
211 | |||
212 | kara( midpoint - 2 * sh, scratch, scratch + sh, sh ); | ||
213 | |||
214 | printf( "L%d (a1+a2)*(b1+b2)<<n: \n", len ); dump_int( p, len*2, 1 ); | ||
215 | |||
216 | /* P2 = a2*b2 */ | ||
217 | kara( scratch, a + l, b + l, n ); | ||
218 | |||
219 | printf( "L%d a2*b2: \n", len ); dump_int( scratch, n*2, 1 ); | ||
220 | |||
221 | /* P = ( P3 - P2 ) * 2^n + P2 ... and later: + P1 * 2^(2*n) - P1 * 2^n*/ | ||
222 | add( midpoint - n, scratch, n * 2 ); | ||
223 | |||
224 | printf( "L%d p + p2: \n", len ); dump_int( p, len*2, 1 ); | ||
225 | |||
226 | sub( midpoint - n * 2, scratch, n * 2 ); | ||
227 | |||
228 | printf( "L%d p - p2<<n: \n", len ); dump_int( p, len*2, 1 ); | ||
229 | |||
230 | /* now + P1 * 2^(2*n) - P1 * 2^n*/ | ||
231 | kara( scratch, a, b, l ); | ||
232 | |||
233 | printf( "L%d a1*b1: \n", len ); dump_int( scratch, l*2, 1 ); | ||
234 | |||
235 | sub( midpoint - l * 2, scratch, l * 2 ); | ||
236 | |||
237 | printf( "L%d p - p1<<n: \n", len ); dump_int( p, len*2, 1 ); | ||
238 | |||
239 | add( midpoint - n - l * 2, scratch, l * 2 ); | ||
240 | |||
241 | printf( "L%d p + p1<<n^2: \n", len ); dump_int( p, len*2, 1 ); | ||
242 | } | ||
243 | } | ||
244 | |||
245 | #undef putchar | ||
246 | int main() { | ||
247 | uint8_t a[] = { 0x99,0xf3,0x0a,0x3d,0x8a,0xdd,0x2d,0x1f }; | ||
248 | uint8_t b[] = { 0xf1,0x48,0x71,0x4b,0xf7,0xb1,0x9a,0x30 }; | ||
249 | |||
250 | // uint8_t a[g_legs], b[g_legs]; | ||
251 | // uint8_t p[2*g_legs]; | ||
252 | |||
253 | /* | ||
254 | static const leg_t b[] = { | ||
255 | 0x3D, 0xA9, 0x76, 0x59, 0xE2, 0x80, 0xDB, 0x0B, 0xE6, 0x5B, 0xCC, 0x3A, 0xB7, 0x8F, 0xDA, 0xA9, 0xB7, 0xB7, 0x68, 0xC8, 0x99, 0x31, 0xD7, 0x8D, 0xF8, 0xB1, 0x17, 0x25, 0x33, 0x9E, 0xBC, 0x93, | ||
256 | 0xAA, 0x7F, 0xBD, 0x95, 0x62, 0x05, 0x9F, 0x1F, 0x4C, 0x2D, 0xE6, 0x7D, 0xAD, 0x47, 0x52, 0x7E, 0x52, 0x6A, 0x65, 0x3A, 0x7A, 0x67, 0x4B, 0xD5, 0x54, 0x01, 0xEA, 0x4F, 0x3E, 0xD7, 0x3A, 0x2F, | ||
257 | 0x70, 0xB5, 0x6F, 0x52, 0x7F, 0x6F, 0x60, 0x4F, 0xF3, 0xE5, 0x6C, 0xC2, 0xBD, 0x9F, 0x04, 0x8C, 0xFE, 0xA8, 0x0D, 0x9A, 0x6E, 0xC9, 0xFC, 0xD3, 0x3A, 0xD3, 0x6F, 0xD8, 0x22, 0xC3, 0x9F, 0x34, | ||
258 | 0x91, 0xF3, 0x0C, 0x52, 0xF7, 0x98, 0xDA, 0x6A, 0x18, 0xC3, 0xDC, 0xE2, 0x01, 0x88, 0xD8, 0x4C, 0xCB, 0x22, 0x51, 0x76, 0x25, 0x9E, 0x08, 0x0F, 0xA8, 0x9D, 0x1D, 0xCD, 0x9D, 0x38, 0x1C, 0xC5, | ||
259 | 0xBE, 0xAC, 0xD4, 0x6F, 0x3C, 0xDD, 0x11, 0x96, 0x91, 0xA4, 0xF5, 0x57, 0x29, 0x29, 0xB9, 0x0C, 0x67, 0xDE, 0x8F, 0xA0, 0x23, 0x86, 0x47, 0x14, 0xC2, 0x8A, 0x61, 0xD4, 0x74, 0x11, 0x40, 0x2D, | ||
260 | 0x6C, 0x09, 0x06, 0x0D, 0x41, 0x05, 0x86, 0x39, 0xE4, 0xFC, 0xCF, 0x1D, 0x63, 0x8F, 0x45, 0x66, 0x9E, 0x10, 0xFD, 0xE2, 0x8E, 0x54, 0x80, 0x6B, 0xF1, 0xD2, 0x7D, 0x0B, 0x5C, 0x7D, 0xC9, 0xC2, | ||
261 | 0xB6, 0x16, 0xD6, 0xFA, 0x8B, 0xE2, 0xC9, 0x1D, 0xE8, 0x10, 0x54, 0x64, 0xE9, 0xF8, 0x0A, 0x5F, 0x34, 0x72, 0x08, 0x69, 0x90, 0xDA, 0xCF, 0x1A, 0x7E, 0x2C, 0x75, 0xA5, 0x8E, 0x25, 0xF1, 0x42, | ||
262 | 0x8F, 0xB4, 0x83, 0x2E, 0xF8, 0x27, 0xDE, 0x84, 0xCA, 0x06, 0xDA, 0x91, 0xC2, 0xB3, 0xE7, 0xE2, 0x4F, 0x02, 0x41, 0x93, 0x78, 0x7A, 0x82, 0x78, 0x8B, 0xD7, 0x05, 0x62, 0xDA, 0x60, 0xE3, 0x92, | ||
263 | 0x46, 0xBD, 0xB7, 0x33, 0x6E, 0x84, 0x52, 0xD9, 0x1D, 0x7D, 0x37, 0xA2, 0x3F, 0xB8, 0xCF, 0x61, 0xB1, 0x8A, 0x9E, 0xF1, 0x50, 0xC8, 0x95, 0x3A, 0xC9, 0x39, 0x19, 0xD1, 0x2A, 0x4B, 0x1A, 0x67, | ||
264 | 0xFD, 0xC6, 0x5A, 0x26, 0x9B, 0x51, 0xC1, 0xEF, 0x09, 0x95, 0x48, 0x43, 0x20, 0xE7, 0x39, 0xF4, 0x6C, 0x79, 0x51, 0xA5, 0x23, 0xCE, 0xF7, 0x85, 0xE6, 0x0C, 0x6E, 0xFD, 0xFF, 0xB7, 0xA9, 0xA9, | ||
265 | 0x5F, 0x66, 0x61, 0x46, 0xCB, 0x44, 0x1F, 0x59, 0xAE, 0x01, 0xE0, 0xF3, 0x63, 0xA9, 0x31, 0x5D, 0x04, 0xBA, 0x04, 0x4A, 0xEB, 0x4E, 0xEF, 0xD4, 0x85, 0x63, 0x21, 0x5F, 0x72, 0xC8, 0xD9, 0x89, | ||
266 | 0x62, 0xD2, 0x18, 0x77, 0x12, 0x96, 0xEF, 0x6A, 0x20, 0xBD, 0x72, 0xB9, 0xB2, 0x1E, 0x6B, 0x3D, 0x53, 0xC4, 0x4F, 0xAB, 0x73, 0x48, 0x10, 0xF7, 0xD2, 0x03, 0xA9, 0xE0, 0xD7, 0xCE, 0x25, 0xD0, | ||
267 | 0x2E, 0x52, 0x98, 0x9E, 0xCC, 0xF8, 0x5F, 0x34, 0x91, 0x2A, 0x04, 0x91, 0x3E, 0x9E, 0xBD, 0x87, 0x82, 0x67, 0x53, 0x7D, 0x4A, 0x61, 0x2A, 0x18, 0x51, 0xE7, 0x5D, 0x99, 0x98, 0xF0, 0x01, 0xDB, | ||
268 | 0xC9, 0xC7, 0x7F, 0x0C, 0x35, 0x2D, 0x40, 0x8C, 0xA7, 0x96, 0xD1, 0x82, 0x04, 0xA6, 0x36, 0xF7, 0xE4, 0x40, 0x40, 0x92, 0x1C, 0x1E, 0x46, 0x7C, 0x52, 0x4E, 0x7C, 0x7A, 0x7E, 0xD3, 0x6C, 0x41, | ||
269 | 0x2A, 0x43, 0x4C, 0xEB, 0x23, 0x0B, 0x2D, 0xFE, 0x35, 0x49, 0xC5, 0x77, 0x7A, 0x17, 0xFB, 0x04, 0xB8, 0x50, 0xDE, 0x95, 0xD9, 0x7A, 0xC4, 0x0A, 0x55, 0xEA, 0x6F, 0x75, 0x41, 0xC4, 0xF8, 0x2B, | ||
270 | 0x37, 0xBF, 0x90, 0xFE, 0x52, 0x07, 0x4F, 0x19, 0xFA, 0x8F, 0x75, 0xF0, 0x06, 0x7E, 0x82, 0xB1, 0x8A, 0x1A, 0xC0, 0x24, 0xB3, 0x0E, 0x9B, 0x12, 0xC1, 0x4A, 0xB0, 0xDD, 0xCC, 0x03, 0xAA, 0x20 | ||
271 | }; | ||
272 | uint8_t a[sizeof(b)]; | ||
273 | */ | ||
274 | |||
275 | uint8_t p[2*sizeof(b)]; | ||
276 | int i; | ||
277 | |||
278 | srandomdev(); | ||
279 | |||
280 | // while( 1 ) | ||
281 | { | ||
282 | // memset( a, 0, sizeof(a) ); | ||
283 | // a[sizeof(b)-1]=2; | ||
284 | /* | ||
285 | memset( b, 0, sizeof(b) ); | ||
286 | a[g_legs-1] = random() & 0xff; | ||
287 | for( i=g_legs/2; i<sizeof(a); ++i ) | ||
288 | { | ||
289 | b[i] = random() & 0xff; | ||
290 | } | ||
291 | */ | ||
292 | kara2( p, a, b, sizeof(b)); | ||
293 | |||
294 | dump_int2( a, sizeof(a), 0 ); | ||
295 | putchar( '*' ); | ||
296 | dump_int2( b, sizeof(b), 0 ); | ||
297 | putchar( '-' ); | ||
298 | dump_int2( p, sizeof(p), 1 ); | ||
299 | } | ||
300 | return 0; | ||
301 | } | ||
@@ -0,0 +1,516 @@ | |||
1 | #include <stdint.h> | ||
2 | #include <string.h> | ||
3 | #include <stdio.h> | ||
4 | |||
5 | /* | ||
6 | This code implements base ^ exp mod p with base being a small integer, | ||
7 | g given in g_prime and the r number of bits (4096) being used to pre-calculate | ||
8 | g_r_square = r^2 mod p in order to speed up montgomery reduction | ||
9 | |||
10 | Because our primes conveniently end in 0xf..f for word sizes up to 64, the | ||
11 | multiplicative inverse is ("pre-calculated") g_r_inverse = 1. | ||
12 | |||
13 | Todo: check if little endian internal representation is more efficient, also | ||
14 | write an import/export function for the exponent/result from big endian uint8_t | ||
15 | representation which is the default transport representation. | ||
16 | |||
17 | Currently, with gcc -O3, we measure at around 2,75x the time libgmp needs for | ||
18 | it's powm which is not so bad, considering we don't use any assembler optimization. | ||
19 | |||
20 | Hot spot function is mp_mul_uint_add. | ||
21 | |||
22 | When we're done measuring, we should set PREVENT_TIMING_ATTACKS so that powm() | ||
23 | does not leak information about the number of bits set, but on average doubling the | ||
24 | time needed. Discuss ;) | ||
25 | */ | ||
26 | |||
27 | #if 0 | ||
28 | typedef uint32_t leg_t; | ||
29 | typedef uint64_t dleg_t; | ||
30 | #else | ||
31 | typedef uint64_t leg_t; | ||
32 | typedef unsigned int uint128_t __attribute__((mode(TI))); | ||
33 | typedef uint128_t dleg_t; | ||
34 | #endif | ||
35 | |||
36 | //#define WITH_MEASURE_GMP | ||
37 | //#define WITH_PREVENT_TIMING_ATTACKS | ||
38 | #define WITH_ROUNDS 128 | ||
39 | #define KARATSUBA_THRESHOLD 16 | ||
40 | |||
41 | #ifdef WITH_MEASURE_GMP | ||
42 | #include <gmp.h> | ||
43 | #endif | ||
44 | |||
45 | #ifdef DEBUG_FUNCTIONS | ||
46 | static void dump_int( int level, char * prefix, leg_t const * p, int l, int nl ) | ||
47 | { | ||
48 | printf( "L%d %s: ", level, prefix ); | ||
49 | while( l-- ) | ||
50 | printf( "%08llX", *(p++) ); | ||
51 | if( nl ) | ||
52 | putchar( 10 ); | ||
53 | } | ||
54 | #endif | ||
55 | |||
56 | /* Test values for smaller legs sizes */ | ||
57 | #if 0 | ||
58 | static leg_t g_prime[] = { 4293918689 }; | ||
59 | static leg_t g_r_square[] = { 333456065 }; | ||
60 | static leg_t g_r_inverse = 4192173023; | ||
61 | #endif | ||
62 | |||
63 | #if 0 | ||
64 | static leg_t g_prime[] = { 0xFFFFFFFF, 0xFFFFFFA1 }; | ||
65 | /* g_prime == 18446744073709551521 */ | ||
66 | /* g_r == 18446744073709551616 */ | ||
67 | /* g_r^2 == 340282366920938463463374607431768211456 */ | ||
68 | static leg_t g_r_square[] = { 0, 9025 }; | ||
69 | /* g_prime^-1 mod g_r = 12815632724892951649 */ | ||
70 | static leg_t g_r_inverse = 3571604383; | ||
71 | #endif | ||
72 | |||
73 | #if 0 | ||
74 | static leg_t g_prime[] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffff61 }; | ||
75 | /* g_prime == 340282366920938463463374607431768211297 */ | ||
76 | /* g_r == 340282366920938463463374607431768211456 */ | ||
77 | /* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */ | ||
78 | static leg_t g_r_square[] = { 0, 0, 0, 25281 }; | ||
79 | /* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */ | ||
80 | static leg_t g_r_inverse = 783358815; | ||
81 | #endif | ||
82 | |||
83 | #if 0 | ||
84 | static leg_t g_prime[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x61 }; | ||
85 | /* g_prime == 340282366920938463463374607431768211297 */ | ||
86 | /* g_r == 340282366920938463463374607431768211456 */ | ||
87 | /* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */ | ||
88 | static leg_t g_r_square[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x62, 0xc1 }; | ||
89 | /* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */ | ||
90 | static leg_t g_r_inverse = 0x5f; | ||
91 | #endif | ||
92 | |||
93 | #if 0 | ||
94 | /* Standard prime for leg_t defined as uint32_t */ | ||
95 | static const leg_t g_prime[] = { | ||
96 | 0xFFFFFFFF, 0xFFFFFFFF, 0xC90FDAA2, 0x2168C234, 0xC4C6628B, 0x80DC1CD1, 0x29024E08, 0x8A67CC74, | ||
97 | 0x020BBEA6, 0x3B139B22, 0x514A0879, 0x8E3404DD, 0xEF9519B3, 0xCD3A431B, 0x302B0A6D, 0xF25F1437, | ||
98 | 0x4FE1356D, 0x6D51C245, 0xE485B576, 0x625E7EC6, 0xF44C42E9, 0xA637ED6B, 0x0BFF5CB6, 0xF406B7ED, | ||
99 | 0xEE386BFB, 0x5A899FA5, 0xAE9F2411, 0x7C4B1FE6, 0x49286651, 0xECE45B3D, 0xC2007CB8, 0xA163BF05, | ||
100 | 0x98DA4836, 0x1C55D39A, 0x69163FA8, 0xFD24CF5F, 0x83655D23, 0xDCA3AD96, 0x1C62F356, 0x208552BB, | ||
101 | 0x9ED52907, 0x7096966D, 0x670C354E, 0x4ABC9804, 0xF1746C08, 0xCA18217C, 0x32905E46, 0x2E36CE3B, | ||
102 | 0xE39E772C, 0x180E8603, 0x9B2783A2, 0xEC07A28F, 0xB5C55DF0, 0x6F4C52C9, 0xDE2BCBF6, 0x95581718, | ||
103 | 0x3995497C, 0xEA956AE5, 0x15D22618, 0x98FA0510, 0x15728E5A, 0x8AAAC42D, 0xAD33170D, 0x04507A33, | ||
104 | 0xA85521AB, 0xDF1CBA64, 0xECFB8504, 0x58DBEF0A, 0x8AEA7157, 0x5D060C7D, 0xB3970F85, 0xA6E1E4C7, | ||
105 | 0xABF5AE8C, 0xDB0933D7, 0x1E8C94E0, 0x4A25619D, 0xCEE3D226, 0x1AD2EE6B, 0xF12FFA06, 0xD98A0864, | ||
106 | 0xD8760273, 0x3EC86A64, 0x521F2B18, 0x177B200C, 0xBBE11757, 0x7A615D6C, 0x770988C0, 0xBAD946E2, | ||
107 | 0x08E24FA0, 0x74E5AB31, 0x43DB5BFC, 0xE0FD108E, 0x4B82D120, 0xA9210801, 0x1A723C12, 0xA787E6D7, | ||
108 | 0x88719A10, 0xBDBA5B26, 0x99C32718, 0x6AF4E23C, 0x1A946834, 0xB6150BDA, 0x2583E9CA, 0x2AD44CE8, | ||
109 | 0xDBBBC2DB, 0x04DE8EF9, 0x2E8EFC14, 0x1FBECAA6, 0x287C5947, 0x4E6BC05D, 0x99B2964F, 0xA090C3A2, | ||
110 | 0x233BA186, 0x515BE7ED, 0x1F612970, 0xCEE2D7AF, 0xB81BDD76, 0x2170481C, 0xD0069127, 0xD5B05AA9, | ||
111 | 0x93B4EA98, 0x8D8FDDC1, 0x86FFB7DC, 0x90A6C08F, 0x4DF435C9, 0x34063199, 0xFFFFFFFF, 0xFFFFFFFF | ||
112 | }; | ||
113 | |||
114 | static const leg_t g_r_square[] = { | ||
115 | 0x3DA97659, 0xE280DB0B, 0xE65BCC3A, 0xB78FDAA9, 0xB7B768C8, 0x9931D78D, 0xF8B11725, 0x339EBC93, | ||
116 | 0xAA7FBD95, 0x62059F1F, 0x4C2DE67D, 0xAD47527E, 0x526A653A, 0x7A674BD5, 0x5401EA4F, 0x3ED73A2F, | ||
117 | 0x70B56F52, 0x7F6F604F, 0xF3E56CC2, 0xBD9F048C, 0xFEA80D9A, 0x6EC9FCD3, 0x3AD36FD8, 0x22C39F34, | ||
118 | 0x91F30C52, 0xF798DA6A, 0x18C3DCE2, 0x0188D84C, 0xCB225176, 0x259E080F, 0xA89D1DCD, 0x9D381CC5, | ||
119 | 0xBEACD46F, 0x3CDD1196, 0x91A4F557, 0x2929B90C, 0x67DE8FA0, 0x23864714, 0xC28A61D4, 0x7411402D, | ||
120 | 0x6C09060D, 0x41058639, 0xE4FCCF1D, 0x638F4566, 0x9E10FDE2, 0x8E54806B, 0xF1D27D0B, 0x5C7DC9C2, | ||
121 | 0xB616D6FA, 0x8BE2C91D, 0xE8105464, 0xE9F80A5F, 0x34720869, 0x90DACF1A, 0x7E2C75A5, 0x8E25F142, | ||
122 | 0x8FB4832E, 0xF827DE84, 0xCA06DA91, 0xC2B3E7E2, 0x4F024193, 0x787A8278, 0x8BD70562, 0xDA60E392, | ||
123 | 0x46BDB733, 0x6E8452D9, 0x1D7D37A2, 0x3FB8CF61, 0xB18A9EF1, 0x50C8953A, 0xC93919D1, 0x2A4B1A67, | ||
124 | 0xFDC65A26, 0x9B51C1EF, 0x09954843, 0x20E739F4, 0x6C7951A5, 0x23CEF785, 0xE60C6EFD, 0xFFB7A9A9, | ||
125 | 0x5F666146, 0xCB441F59, 0xAE01E0F3, 0x63A9315D, 0x04BA044A, 0xEB4EEFD4, 0x8563215F, 0x72C8D989, | ||
126 | 0x62D21877, 0x1296EF6A, 0x20BD72B9, 0xB21E6B3D, 0x53C44FAB, 0x734810F7, 0xD203A9E0, 0xD7CE25D0, | ||
127 | 0x2E52989E, 0xCCF85F34, 0x912A0491, 0x3E9EBD87, 0x8267537D, 0x4A612A18, 0x51E75D99, 0x98F001DB, | ||
128 | 0xC9C77F0C, 0x352D408C, 0xA796D182, 0x04A636F7, 0xE4404092, 0x1C1E467C, 0x524E7C7A, 0x7ED36C41, | ||
129 | 0x2A434CEB, 0x230B2DFE, 0x3549C577, 0x7A17FB04, 0xB850DE95, 0xD97AC40A, 0x55EA6F75, 0x41C4F82B, | ||
130 | 0x37BF90FE, 0x52074F19, 0xFA8F75F0, 0x067E82B1, 0x8A1AC024, 0xB30E9B12, 0xC14AB0DD, 0xCC03AA20 | ||
131 | }; | ||
132 | |||
133 | static const leg_t g_r_inverse = 1; | ||
134 | */ | ||
135 | #endif | ||
136 | |||
137 | #if 1 | ||
138 | /* Standard prime for leg_t defined as uint64_t */ | ||
139 | static const leg_t g_prime[] = { | ||
140 | 0xFFFFFFFFFFFFFFFF, 0xC90FDAA22168C234, 0xC4C6628B80DC1CD1, 0x29024E088A67CC74, | ||
141 | 0x020BBEA63B139B22, 0x514A08798E3404DD, 0xEF9519B3CD3A431B, 0x302B0A6DF25F1437, | ||
142 | 0x4FE1356D6D51C245, 0xE485B576625E7EC6, 0xF44C42E9A637ED6B, 0x0BFF5CB6F406B7ED, | ||
143 | 0xEE386BFB5A899FA5, 0xAE9F24117C4B1FE6, 0x49286651ECE45B3D, 0xC2007CB8A163BF05, | ||
144 | 0x98DA48361C55D39A, 0x69163FA8FD24CF5F, 0x83655D23DCA3AD96, 0x1C62F356208552BB, | ||
145 | 0x9ED529077096966D, 0x670C354E4ABC9804, 0xF1746C08CA18217C, 0x32905E462E36CE3B, | ||
146 | 0xE39E772C180E8603, 0x9B2783A2EC07A28F, 0xB5C55DF06F4C52C9, 0xDE2BCBF695581718, | ||
147 | 0x3995497CEA956AE5, 0x15D2261898FA0510, 0x15728E5A8AAAC42D, 0xAD33170D04507A33, | ||
148 | 0xA85521ABDF1CBA64, 0xECFB850458DBEF0A, 0x8AEA71575D060C7D, 0xB3970F85A6E1E4C7, | ||
149 | 0xABF5AE8CDB0933D7, 0x1E8C94E04A25619D, 0xCEE3D2261AD2EE6B, 0xF12FFA06D98A0864, | ||
150 | 0xD87602733EC86A64, 0x521F2B18177B200C, 0xBBE117577A615D6C, 0x770988C0BAD946E2, | ||
151 | 0x08E24FA074E5AB31, 0x43DB5BFCE0FD108E, 0x4B82D120A9210801, 0x1A723C12A787E6D7, | ||
152 | 0x88719A10BDBA5B26, 0x99C327186AF4E23C, 0x1A946834B6150BDA, 0x2583E9CA2AD44CE8, | ||
153 | 0xDBBBC2DB04DE8EF9, 0x2E8EFC141FBECAA6, 0x287C59474E6BC05D, 0x99B2964FA090C3A2, | ||
154 | 0x233BA186515BE7ED, 0x1F612970CEE2D7AF, 0xB81BDD762170481C, 0xD0069127D5B05AA9, | ||
155 | 0x93B4EA988D8FDDC1, 0x86FFB7DC90A6C08F, 0x4DF435C934063199, 0xFFFFFFFFFFFFFFFF | ||
156 | }; | ||
157 | |||
158 | static const leg_t g_r_square[] = { | ||
159 | 0x3DA97659E280DB0B, 0xE65BCC3AB78FDAA9, 0xB7B768C89931D78D, 0xF8B11725339EBC93, | ||
160 | 0xAA7FBD9562059F1F, 0x4C2DE67DAD47527E, 0x526A653A7A674BD5, 0x5401EA4F3ED73A2F, | ||
161 | 0x70B56F527F6F604F, 0xF3E56CC2BD9F048C, 0xFEA80D9A6EC9FCD3, 0x3AD36FD822C39F34, | ||
162 | 0x91F30C52F798DA6A, 0x18C3DCE20188D84C, 0xCB225176259E080F, 0xA89D1DCD9D381CC5, | ||
163 | 0xBEACD46F3CDD1196, 0x91A4F5572929B90C, 0x67DE8FA023864714, 0xC28A61D47411402D, | ||
164 | 0x6C09060D41058639, 0xE4FCCF1D638F4566, 0x9E10FDE28E54806B, 0xF1D27D0B5C7DC9C2, | ||
165 | 0xB616D6FA8BE2C91D, 0xE8105464E9F80A5F, 0x3472086990DACF1A, 0x7E2C75A58E25F142, | ||
166 | 0x8FB4832EF827DE84, 0xCA06DA91C2B3E7E2, 0x4F024193787A8278, 0x8BD70562DA60E392, | ||
167 | 0x46BDB7336E8452D9, 0x1D7D37A23FB8CF61, 0xB18A9EF150C8953A, 0xC93919D12A4B1A67, | ||
168 | 0xFDC65A269B51C1EF, 0x0995484320E739F4, 0x6C7951A523CEF785, 0xE60C6EFDFFB7A9A9, | ||
169 | 0x5F666146CB441F59, 0xAE01E0F363A9315D, 0x04BA044AEB4EEFD4, 0x8563215F72C8D989, | ||
170 | 0x62D218771296EF6A, 0x20BD72B9B21E6B3D, 0x53C44FAB734810F7, 0xD203A9E0D7CE25D0, | ||
171 | 0x2E52989ECCF85F34, 0x912A04913E9EBD87, 0x8267537D4A612A18, 0x51E75D9998F001DB, | ||
172 | 0xC9C77F0C352D408C, 0xA796D18204A636F7, 0xE44040921C1E467C, 0x524E7C7A7ED36C41, | ||
173 | 0x2A434CEB230B2DFE, 0x3549C5777A17FB04, 0xB850DE95D97AC40A, 0x55EA6F7541C4F82B, | ||
174 | 0x37BF90FE52074F19, 0xFA8F75F0067E82B1, 0x8A1AC024B30E9B12, 0xC14AB0DDCC03AA20 | ||
175 | }; | ||
176 | |||
177 | static const leg_t g_r_inverse = 1; | ||
178 | #endif | ||
179 | |||
180 | /* Returns 0 if a and b are equal, -1 if a < b, 1 if a > b */ | ||
181 | static int mp_cmp( leg_t const *a, leg_t const *b, int const legs ) | ||
182 | { | ||
183 | int leg; | ||
184 | for( leg=0; leg<legs; ++leg ) | ||
185 | { | ||
186 | if( a[leg] < b[leg] ) return -1; | ||
187 | if( a[leg] > b[leg] ) return 1; | ||
188 | } | ||
189 | return 0; | ||
190 | } | ||
191 | |||
192 | /* Subtract b from a, store in a. Expects enough words prepended | ||
193 | to borrow from */ | ||
194 | static void mp_sub( leg_t *a, leg_t const *b, int legs ) | ||
195 | { | ||
196 | int borrow = 0, borrow_temp; | ||
197 | while( legs-- ) | ||
198 | { | ||
199 | leg_t temp = a[legs] - b[legs]; | ||
200 | borrow_temp = temp > a[legs]; | ||
201 | a[legs] = temp - borrow; | ||
202 | borrow = borrow_temp | ( a[legs] > temp ); | ||
203 | } | ||
204 | while( borrow ) | ||
205 | { | ||
206 | leg_t temp = a[legs] - borrow; | ||
207 | borrow = temp > a[legs]; | ||
208 | a[legs--] = temp; | ||
209 | } | ||
210 | } | ||
211 | |||
212 | /* Add b to a, store in a. Operates on legs + flegs words, with | ||
213 | flegs the amount of legs to propagate the carry to */ | ||
214 | static void mp_adm( leg_t *a, leg_t const *b, int legs, int flegs ) | ||
215 | { | ||
216 | dleg_t acc = 0; | ||
217 | while( legs-- > 0 ) | ||
218 | { | ||
219 | acc += (dleg_t)a[legs] + (dleg_t)b[legs]; | ||
220 | a[legs] = (leg_t)acc; | ||
221 | acc >>= 8*sizeof(leg_t); | ||
222 | } | ||
223 | while( acc && flegs-- ) | ||
224 | { | ||
225 | acc += (dleg_t)a[legs]; | ||
226 | a[legs--] = (leg_t)acc; | ||
227 | acc >>= 8*sizeof(leg_t); | ||
228 | } | ||
229 | } | ||
230 | |||
231 | /* Subtract b from a and store in result. Expects nothing to borrow.*/ | ||
232 | static void mp_sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int legs ) | ||
233 | { | ||
234 | int borrow = 0, borrow_temp; | ||
235 | while( legs-- ) | ||
236 | { | ||
237 | leg_t temp = a[legs] - b[legs]; | ||
238 | borrow_temp = temp > a[legs]; | ||
239 | result[legs] = temp - borrow; | ||
240 | borrow = borrow_temp | (result[legs] > temp ); | ||
241 | } | ||
242 | } | ||
243 | |||
244 | /* Fast negate */ | ||
245 | static void mp_negate( leg_t * p, int legs ) | ||
246 | { | ||
247 | int legss = legs; | ||
248 | while( legs--) p[legs]^=(leg_t)-1; | ||
249 | while( legss-- && !++p[legss] ); | ||
250 | } | ||
251 | |||
252 | /* Multiplies a with fac, adds to result. | ||
253 | result is guaranteed to be initialized with enough legs prepended to take the carry */ | ||
254 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) | ||
255 | { | ||
256 | dleg_t acc = 0; | ||
257 | leg_t *r = result+legs-1; | ||
258 | while( legs-- ) | ||
259 | { | ||
260 | acc += (dleg_t)*r + (dleg_t)a[legs] * (dleg_t)fac; | ||
261 | *(r--) = (leg_t)acc; | ||
262 | acc >>= 8*sizeof(leg_t); | ||
263 | } | ||
264 | while( acc ) | ||
265 | { | ||
266 | acc += (dleg_t)*r; | ||
267 | *(r--) = (leg_t)acc; | ||
268 | acc >>= 8*sizeof(leg_t); | ||
269 | } | ||
270 | } | ||
271 | |||
272 | /* Multiplies a and b, adds to result, base case version. */ | ||
273 | static void mp_mul_oper_add( leg_t * result, leg_t const *a, leg_t const *b, int legs ) | ||
274 | { | ||
275 | int leg = legs; | ||
276 | while( leg-- ) | ||
277 | mp_mul_uint_add( result + 1 + leg, a, b[leg], legs ); | ||
278 | } | ||
279 | |||
280 | /* Optimized mp_mul_oper_add for a == b, i.e. squaring */ | ||
281 | static void mp_sqr( leg_t *result, leg_t const * a, int legs ) | ||
282 | { | ||
283 | while( legs-- ) { | ||
284 | leg_t *offs = result+2*legs+1; | ||
285 | leg_t fac = a[legs]; | ||
286 | int leg = legs; | ||
287 | dleg_t acc = (dleg_t)*offs + (dleg_t)fac * (dleg_t)fac; | ||
288 | |||
289 | *(offs--) = (leg_t)acc; | ||
290 | acc >>= 8*sizeof(leg_t); | ||
291 | |||
292 | while( leg-- ) | ||
293 | { | ||
294 | dleg_t subresult = (dleg_t)fac * (dleg_t)a[leg]; | ||
295 | int carry = !!( subresult >> (16*sizeof(leg_t)-1)); | ||
296 | |||
297 | acc += 2 * subresult + (dleg_t)*offs; | ||
298 | *(offs--) = (leg_t)acc; | ||
299 | |||
300 | acc >>= 8*sizeof(leg_t); | ||
301 | acc += (dleg_t)carry << 8*sizeof(leg_t); | ||
302 | } | ||
303 | |||
304 | while( acc ) | ||
305 | { | ||
306 | acc += (dleg_t)*offs; | ||
307 | *(offs--) = (leg_t)acc; | ||
308 | acc >>= 8*sizeof(leg_t); | ||
309 | } | ||
310 | } | ||
311 | } | ||
312 | |||
313 | /* Optimized karatsuba (toom2.2) for a == b, i.e. squaring */ | ||
314 | static void mp_mul_kara_square( leg_t* p, leg_t const *a, int len, leg_t *scratch ) | ||
315 | { | ||
316 | memset( p, 0, 2 * len * sizeof( leg_t )); | ||
317 | if( len <= KARATSUBA_THRESHOLD ) | ||
318 | mp_sqr( p, a, len ); | ||
319 | else | ||
320 | { | ||
321 | int n = len / 2; | ||
322 | |||
323 | if( mp_cmp( a, a + n, n ) > 0 ) | ||
324 | mp_sub_mod( scratch, a, a + n, n ); | ||
325 | else | ||
326 | mp_sub_mod( scratch, a + n, a, n ); | ||
327 | |||
328 | mp_mul_kara_square( p + n, scratch, n, scratch + len ); | ||
329 | mp_negate( p, len + n ); | ||
330 | |||
331 | mp_mul_kara_square( scratch, a + n, n, scratch + len ); | ||
332 | mp_adm( p + len, scratch, len, len ); | ||
333 | mp_adm( p + n, scratch, len, n ); | ||
334 | |||
335 | mp_mul_kara_square( scratch, a, n, scratch + len ); | ||
336 | mp_adm( p + n, scratch, len, n ); | ||
337 | mp_adm( p, scratch, len, 0 ); | ||
338 | } | ||
339 | } | ||
340 | |||
341 | /* karatsuba (toom2.2), generic */ | ||
342 | static void mp_mul_kara( leg_t* p, leg_t const *a, leg_t const *b, int len, leg_t *scratch ) | ||
343 | { | ||
344 | memset( p, 0, 2 * len * sizeof( leg_t )); | ||
345 | if( len <= KARATSUBA_THRESHOLD ) | ||
346 | mp_mul_oper_add( p, a, b, len ); | ||
347 | else | ||
348 | { | ||
349 | int sign = 0, n = len / 2; | ||
350 | |||
351 | if( mp_cmp( a, a + n, n ) > 0 ) | ||
352 | mp_sub_mod( scratch, a, a + n, n ); | ||
353 | else | ||
354 | { | ||
355 | mp_sub_mod( scratch, a + n, a, n ); | ||
356 | sign = 1; | ||
357 | } | ||
358 | |||
359 | if( mp_cmp( b, b + n, n ) > 0 ) | ||
360 | mp_sub_mod( scratch + n, b, b + n, n ); | ||
361 | else | ||
362 | { | ||
363 | mp_sub_mod( scratch + n, b + n, b, n ); | ||
364 | sign ^= 1; | ||
365 | } | ||
366 | |||
367 | mp_mul_kara( p + n, scratch, scratch + n, n, scratch + len ); | ||
368 | if( !sign ) | ||
369 | mp_negate( p, len + n ); | ||
370 | |||
371 | mp_mul_kara( scratch, a + n, b + n, n, scratch + len ); | ||
372 | mp_adm( p + len, scratch, len, len ); | ||
373 | mp_adm( p + n, scratch, len, n ); | ||
374 | |||
375 | mp_mul_kara( scratch, a, b, n, scratch + len ); | ||
376 | mp_adm( p + n, scratch, len, n ); | ||
377 | mp_adm( p, scratch, len, 0 ); | ||
378 | } | ||
379 | } | ||
380 | |||
381 | /* Multiply a and b, store in a, work in montgomery domain | ||
382 | to achieve multiply, a needs to be reduced by k * g_prime | ||
383 | until all lower legs are 0, allowing exact division by 2^r | ||
384 | |||
385 | if !do_mul, we convert from montgomery domain back to | ||
386 | g_prime domain and thus only multiply by 1 before reducing | ||
387 | */ | ||
388 | static void redc( leg_t *a, leg_t const *b, int legs, int do_mul ) | ||
389 | { | ||
390 | leg_t scratch[ 2 * ( legs * legs / KARATSUBA_THRESHOLD ) ]; | ||
391 | leg_t temp[1 + 2 * legs]; | ||
392 | leg_t leg = legs; | ||
393 | |||
394 | /* Not necessary for transforming back */ | ||
395 | if( do_mul ) | ||
396 | { | ||
397 | temp[0] = 0; | ||
398 | if( a == b ) | ||
399 | mp_mul_kara_square( temp + 1, a, legs, scratch ); | ||
400 | else | ||
401 | mp_mul_kara( temp + 1, a, b, legs, scratch ); | ||
402 | } | ||
403 | else | ||
404 | { | ||
405 | memset( temp, 0, ( 1 + legs ) * sizeof(leg_t)); | ||
406 | memcpy( temp + 1 + legs, a, legs * sizeof(leg_t)); | ||
407 | } | ||
408 | |||
409 | /* m = p * ( m * R_1 ) % R */ | ||
410 | while( leg-- ) | ||
411 | { | ||
412 | leg_t k = temp[1+legs+leg] * 1; // g_r_inverse; | ||
413 | mp_mul_uint_add( temp + 1 + leg + 1, g_prime, k, legs ); | ||
414 | } | ||
415 | |||
416 | /* the lower legs of temp are now zero */ | ||
417 | /* if necessary, reduce temp to fit in legs */ | ||
418 | if( temp[0] || mp_cmp( temp + 1, g_prime, legs ) > 0 ) | ||
419 | mp_sub( temp + 1, g_prime, legs ); | ||
420 | |||
421 | memcpy( a, temp + 1, legs * sizeof(leg_t) ); | ||
422 | } | ||
423 | |||
424 | /* calculate base ^ exponent modulo g_prime */ | ||
425 | static void powm( leg_t * result, leg_t const *exponent, leg_t base, int legs ) | ||
426 | { | ||
427 | leg_t acc[legs]; | ||
428 | #ifdef WITH_PREVENT_TIMING_ATTACKS | ||
429 | leg_t dummy[legs]; | ||
430 | #endif | ||
431 | int first = 0, bit = legs * 8 * sizeof(leg_t); | ||
432 | |||
433 | memset( acc, 0, sizeof(acc) ); | ||
434 | acc[legs-1] = base; | ||
435 | |||
436 | /* Transform base into montgomery domain */ | ||
437 | redc( acc, g_r_square, legs, 1 ); | ||
438 | |||
439 | /* mul in temp and if bit set in exponent, multiply into accumulator */ | ||
440 | while( bit-- ) | ||
441 | { | ||
442 | int this_bit = sizeof(leg_t) * 8 - 1 - ( bit % ( sizeof(leg_t) * 8 ) ); | ||
443 | if( ( exponent[ bit / ( sizeof(leg_t) * 8 ) ] >> this_bit ) & 1 ) { | ||
444 | if( first++ ) | ||
445 | redc( result, acc, legs, 1 ); | ||
446 | else | ||
447 | memcpy( result, acc, sizeof(leg_t) * legs ); | ||
448 | } | ||
449 | #ifdef WITH_PREVENT_TIMING_ATTACKS | ||
450 | else | ||
451 | redc( dummy, acc, legs, 1 ); | ||
452 | #endif | ||
453 | if( bit ) | ||
454 | redc( acc, acc, legs, 1 ); | ||
455 | } | ||
456 | |||
457 | /* Transform result in acc back into mod p domain */ | ||
458 | if( first ) | ||
459 | redc( result, 0, legs, 0 ); | ||
460 | else /* base ^ 0 mod p = 1 */ | ||
461 | { | ||
462 | memset( result, 0, legs * sizeof(leg_t) ); | ||
463 | result[legs-1] = 1; | ||
464 | } | ||
465 | } | ||
466 | |||
467 | int main() | ||
468 | { | ||
469 | int i, legs = 64; | ||
470 | leg_t input[ /*legs*/ ] = { | ||
471 | 0x1de7eae6c0d6a0b0, 0x9ac2961fe160f837, 0xe5d85f8fb8fced5c, 0xb563a9cf79c6eaec, | ||
472 | 0xa6d78571af8d3688, 0x118727b5eb17dfbd, 0x6060e2cd96000219, 0x52292d56ea2960a9, | ||
473 | 0x72d720399b4ad2d4, 0xd07dc909e7070b0e, 0xcb650843a43a4a00, 0x950e895d7777d182, | ||
474 | 0x3ca892560247ca32, 0xac5ecd1fe4997513, 0xb420bd7e538ef88b, 0x2bca50f311573101, | ||
475 | 0x31052a6fc332563d, 0xd151e2770baec00b, 0x094d72c301b8f25d, 0x70f0c055dde43121, | ||
476 | 0x1f61b1ea0e5aafb4, 0x0e84ab94e744b8ba, 0x87e0db1397d9f2fd, 0xa13b801d1ac631d5, | ||
477 | 0xb5444312b3b43541, 0xa3612061c92319ce, 0x0e2aab629710919b, 0xd7ac4896d4e08b35, | ||
478 | 0x1785bbfb88755c99, 0x70cc7ff6ea8f1b51, 0x5479eaeb9b64ba87, 0xa77b229138f2d5d1, | ||
479 | 0x942fad7b2cc3dcc5, 0x196bcb0cc0e353f8, 0xd8aec44d430026aa, 0xe9228ae741da24cd, | ||
480 | 0x518781609b447bf8, 0x353eb64e067f86cc, 0x4c3b1333a07fee24, 0x82b4e75bf0e60d06, | ||
481 | 0x2d44f438b889673c, 0xf8df973a9842e7b2, 0x4ab8b5e0adaad6a0, 0xd2111f6ce69c2bc6, | ||
482 | 0xfcc2a7f6d4b0d4f7, 0xa86946df6abbea0f, 0xc0dc8c759af7492b, 0xe0c022a74c073360, | ||
483 | 0x74fc0b5dcdfaa679, 0xd6b85c29e8e8a528, 0x547fd48d3b471c7c, 0x051a88159adc842b, | ||
484 | 0xe407bde8519bc534, 0x21da1ef76099ed94, 0x9ae478fd69254dc9, 0x6b984d2e7b9cc244, | ||
485 | 0x87e90f5b6397727e, 0xfb4e8dadb3111df4, 0xfa8718685a5b8359, 0x5ec1d62932d6937a, | ||
486 | 0x56eee1ba0dc1936c, 0xb08ad95f8040183f, 0xe0cc506399225574, 0x281b04b30bb62984 | ||
487 | }; | ||
488 | |||
489 | leg_t res[legs]; /* | ||
490 | = { 569D056BC5485C6F3C026935CFC1FC36CFC5A6B770B442323750EE08A32C63F64743A4902E32A41F7C5A9097509EB9AAA5C5B425D9BDAEDC3B98EE3A9F99AE6FB8723EBE01154937ECC1E4AE38CBC12B767FD17264A4AAEB310EA5B0E72D12FE3969532C5A0718B45F251BFB2A126EFB87F819BDCD434EAEE3B9108C387F4DFFEFA0E9532A0532192034DB0EF5C5BB2A77BCA5B4A08DFFD22F6B1D4F17F5123935CAB4F40EC7E77F4FEEEC6A8CD20CDB2D37DD1BB44544C50B01E98F8B3F5E1ABF91817D40B7E526D6ED80CDE416C6CD32218D3ECCA753D15BBB007661754E44A937C5A68957C3C2D3F39A220930567E9F720B3AAE9D34FEBA6E5A047C131F32271A3BC1E85BC1BC210F49B09F3938226399991BC90299B4E0E684B2DE6BDA684693F40CF728FD24A4B1D1A2B8F4CD3316A02DBC44069A40C1AC64F862091A888DD284AB180918B379A3BD173EDDEA5E08DF4BE51B441AEA7EDA4E439CF57914F68E0FB3E756AB7286616C3736CA72DD40A88FC37B598BF561300AFD1F80F7D44E53C4C1D85FF215250A0C56222A177299C61C2E654B040176F4A8D44BE2612B36E066E7AA6E58D84D4C4CB9A5952E1668CDF04EE55C38ABA59614CB7F43D12921551DD82062AFC3F340B33F43C643BC9D5DB6D6BAB2C94F100D139443B20BCB80384C3231261C5ABFFF3D971CB878116F4EBB7D4D24A2B6F5869 } | ||
491 | */ | ||
492 | |||
493 | #ifdef WITH_MEASURE_GMP | ||
494 | mpz_t m, base, exp, result; | ||
495 | |||
496 | mpz_init( m ); mpz_init( base ); mpz_init( exp ); mpz_init( result ); | ||
497 | mpz_import( m, sizeof(g_prime) / sizeof(uint64_t), 1, sizeof(uint64_t), 0, 0, g_prime ); | ||
498 | mpz_set_ui( base, 2 ); | ||
499 | mpz_import( exp, sizeof(input) / sizeof(uint64_t), 1, sizeof(uint64_t), 0, 0, input ); | ||
500 | |||
501 | for( i=0; i< WITH_ROUNDS; ++i ) | ||
502 | mpz_powm( result, base, exp, m ); | ||
503 | |||
504 | gmp_printf("%ZX\n", result); | ||
505 | #else | ||
506 | |||
507 | for( i=0; i<WITH_ROUNDS; ++i ) | ||
508 | powm( res, input, 2, legs ); | ||
509 | |||
510 | for( i=0; i<legs; ++i ) | ||
511 | printf( "%08llX", res[i] ); | ||
512 | putchar(10); putchar(10); | ||
513 | #endif | ||
514 | |||
515 | return 0; | ||
516 | } | ||