summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorerdgeist <erdgeist@bauklotz-3.local>2014-10-22 16:32:32 +0200
committererdgeist <erdgeist@bauklotz-3.local>2014-10-22 16:32:32 +0200
commit1e6d868791ef0c22b86c620a6d38c0a7fe964c61 (patch)
tree047d57921e7962444609c7382b1addc69885000b
parentaae5320226e9f0fcc764471337326ab690683df7 (diff)
Sync state of development with the other repository I've been working inHEADmaster
-rw-r--r--powm.c773
-rw-r--r--powm.h44
2 files changed, 218 insertions, 599 deletions
diff --git a/powm.c b/powm.c
index 5c281a6..1f0b0c7 100644
--- a/powm.c
+++ b/powm.c
@@ -1,249 +1,89 @@
1#include "powm.h"
1#include <stdlib.h> 2#include <stdlib.h>
2#include <stdint.h> 3#include <stdint.h>
4#include <stddef.h>
3#include <string.h> 5#include <string.h>
4#include <stdio.h> 6#include <stdio.h>
5#include <assert.h> 7#include <assert.h>
6 8
7/* 9/*
8This code implements base ^ exp mod p with base being a small integer, 10This code implements result = base ^ exp (mod m)
9g given in g_prime and the r number of bits (4096) being used to pre-calculate
10g_r_square = r^2 mod p in order to speed up montgomery reduction
11 11
12Because our primes conveniently end in 0xf..f for word sizes up to 64, the 12Internally bignums are represented by limbs of type limb_t ordered in little
13multiplicative inverse is ("pre-calculated") g_r_inverse = 1. 13endian (LLF).
14 14
15Todo: check if little endian internal representation is more efficient, also 15Currently, with gcc -O3, we measure at around 2,3x the time libgmp needs for
16write an import/export function for the exponent/result from big endian uint8_t 16it's powm which is not so bad, considering we don't use any assembler
17representation which is the default transport representation. 17optimization.
18
19Currently, with gcc -O3, we measure at around 2,75x the time libgmp needs for
20it's powm which is not so bad, considering we don't use any assembler optimization.
21 18
22Hot spot function is mp_mul_uint_add. 19Hot spot function is mp_mul_uint_add.
23 20
24When we're done measuring, we should set PREVENT_TIMING_ATTACKS so that powm() 21We should set PREVENT_TIMING_ATTACKS so that powm() does not leak information
25does not leak information about the number of bits set, but on average doubling the 22about the number of bits set, but on average doubling the time needed.
26time needed. Discuss ;)
27*/ 23*/
28 24
29#if 0 25#define WITH_PREVENT_TIMING_ATTACKS
30typedef uint32_t leg_t;
31typedef uint64_t dleg_t;
32#else
33typedef uint64_t leg_t;
34typedef unsigned int uint128_t __attribute__((mode(TI)));
35typedef uint128_t dleg_t;
36#endif
37
38//#define WITH_MEASURE_GMP
39//#define WITH_PREVENT_TIMING_ATTACKS
40//#define WITH_TESTS
41//#define WITHOUT_UNROLLING 26//#define WITHOUT_UNROLLING
42#define WITH_ROUNDS 128 27#define WITH_ROUNDS 128
43
44#ifdef WITH_TESTS
45static int run_tests( );
46#define KARATSUBA_THRESHOLD 4
47#else
48#define KARATSUBA_THRESHOLD 16 28#define KARATSUBA_THRESHOLD 16
49#endif
50
51#ifdef WITH_MEASURE_GMP
52#include <gmp.h>
53#endif
54
55#ifdef DEBUG_FUNCTIONS
56static void dump_int( int level, char * prefix, leg_t const * p, int l, int nl )
57{
58 printf( "L%d %s: ", level, prefix );
59 while( l-- )
60 printf( "%08llX", p[l] );
61 if( nl )
62 putchar( 10 );
63}
64#endif
65
66/* Test values for smaller legs sizes */
67#if 0
68static leg_t g_prime[] = { 4293918689 };
69static leg_t g_r_square[] = { 333456065 };
70static leg_t g_r_inverse = 4192173023;
71#endif
72
73#if 0
74static leg_t g_prime[] = { 0xFFFFFFFF, 0xFFFFFFA1 };
75/* g_prime == 18446744073709551521 */
76/* g_r == 18446744073709551616 */
77/* g_r^2 == 340282366920938463463374607431768211456 */
78static leg_t g_r_square[] = { 0, 9025 };
79/* g_prime^-1 mod g_r = 12815632724892951649 */
80static leg_t g_r_inverse = 3571604383;
81#endif
82
83#if 0
84static leg_t g_prime[] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffff61 };
85/* g_prime == 340282366920938463463374607431768211297 */
86/* g_r == 340282366920938463463374607431768211456 */
87/* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */
88static leg_t g_r_square[] = { 0, 0, 0, 25281 };
89/* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */
90static leg_t g_r_inverse = 783358815;
91#endif
92
93#if 0
94static leg_t g_prime[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x61 };
95/* g_prime == 340282366920938463463374607431768211297 */
96/* g_r == 340282366920938463463374607431768211456 */
97/* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */
98static leg_t g_r_square[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x62, 0xc1 };
99/* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */
100static leg_t g_r_inverse = 0x5f;
101#endif
102
103#if 0
104/* Standard prime for leg_t defined as uint32_t */
105static const leg_t g_prime[] = {
106 0xFFFFFFFF, 0xFFFFFFFF, 0xC90FDAA2, 0x2168C234, 0xC4C6628B, 0x80DC1CD1, 0x29024E08, 0x8A67CC74,
107 0x020BBEA6, 0x3B139B22, 0x514A0879, 0x8E3404DD, 0xEF9519B3, 0xCD3A431B, 0x302B0A6D, 0xF25F1437,
108 0x4FE1356D, 0x6D51C245, 0xE485B576, 0x625E7EC6, 0xF44C42E9, 0xA637ED6B, 0x0BFF5CB6, 0xF406B7ED,
109 0xEE386BFB, 0x5A899FA5, 0xAE9F2411, 0x7C4B1FE6, 0x49286651, 0xECE45B3D, 0xC2007CB8, 0xA163BF05,
110 0x98DA4836, 0x1C55D39A, 0x69163FA8, 0xFD24CF5F, 0x83655D23, 0xDCA3AD96, 0x1C62F356, 0x208552BB,
111 0x9ED52907, 0x7096966D, 0x670C354E, 0x4ABC9804, 0xF1746C08, 0xCA18217C, 0x32905E46, 0x2E36CE3B,
112 0xE39E772C, 0x180E8603, 0x9B2783A2, 0xEC07A28F, 0xB5C55DF0, 0x6F4C52C9, 0xDE2BCBF6, 0x95581718,
113 0x3995497C, 0xEA956AE5, 0x15D22618, 0x98FA0510, 0x15728E5A, 0x8AAAC42D, 0xAD33170D, 0x04507A33,
114 0xA85521AB, 0xDF1CBA64, 0xECFB8504, 0x58DBEF0A, 0x8AEA7157, 0x5D060C7D, 0xB3970F85, 0xA6E1E4C7,
115 0xABF5AE8C, 0xDB0933D7, 0x1E8C94E0, 0x4A25619D, 0xCEE3D226, 0x1AD2EE6B, 0xF12FFA06, 0xD98A0864,
116 0xD8760273, 0x3EC86A64, 0x521F2B18, 0x177B200C, 0xBBE11757, 0x7A615D6C, 0x770988C0, 0xBAD946E2,
117 0x08E24FA0, 0x74E5AB31, 0x43DB5BFC, 0xE0FD108E, 0x4B82D120, 0xA9210801, 0x1A723C12, 0xA787E6D7,
118 0x88719A10, 0xBDBA5B26, 0x99C32718, 0x6AF4E23C, 0x1A946834, 0xB6150BDA, 0x2583E9CA, 0x2AD44CE8,
119 0xDBBBC2DB, 0x04DE8EF9, 0x2E8EFC14, 0x1FBECAA6, 0x287C5947, 0x4E6BC05D, 0x99B2964F, 0xA090C3A2,
120 0x233BA186, 0x515BE7ED, 0x1F612970, 0xCEE2D7AF, 0xB81BDD76, 0x2170481C, 0xD0069127, 0xD5B05AA9,
121 0x93B4EA98, 0x8D8FDDC1, 0x86FFB7DC, 0x90A6C08F, 0x4DF435C9, 0x34063199, 0xFFFFFFFF, 0xFFFFFFFF
122};
123
124static const leg_t g_r_square[] = {
125 0x3DA97659, 0xE280DB0B, 0xE65BCC3A, 0xB78FDAA9, 0xB7B768C8, 0x9931D78D, 0xF8B11725, 0x339EBC93,
126 0xAA7FBD95, 0x62059F1F, 0x4C2DE67D, 0xAD47527E, 0x526A653A, 0x7A674BD5, 0x5401EA4F, 0x3ED73A2F,
127 0x70B56F52, 0x7F6F604F, 0xF3E56CC2, 0xBD9F048C, 0xFEA80D9A, 0x6EC9FCD3, 0x3AD36FD8, 0x22C39F34,
128 0x91F30C52, 0xF798DA6A, 0x18C3DCE2, 0x0188D84C, 0xCB225176, 0x259E080F, 0xA89D1DCD, 0x9D381CC5,
129 0xBEACD46F, 0x3CDD1196, 0x91A4F557, 0x2929B90C, 0x67DE8FA0, 0x23864714, 0xC28A61D4, 0x7411402D,
130 0x6C09060D, 0x41058639, 0xE4FCCF1D, 0x638F4566, 0x9E10FDE2, 0x8E54806B, 0xF1D27D0B, 0x5C7DC9C2,
131 0xB616D6FA, 0x8BE2C91D, 0xE8105464, 0xE9F80A5F, 0x34720869, 0x90DACF1A, 0x7E2C75A5, 0x8E25F142,
132 0x8FB4832E, 0xF827DE84, 0xCA06DA91, 0xC2B3E7E2, 0x4F024193, 0x787A8278, 0x8BD70562, 0xDA60E392,
133 0x46BDB733, 0x6E8452D9, 0x1D7D37A2, 0x3FB8CF61, 0xB18A9EF1, 0x50C8953A, 0xC93919D1, 0x2A4B1A67,
134 0xFDC65A26, 0x9B51C1EF, 0x09954843, 0x20E739F4, 0x6C7951A5, 0x23CEF785, 0xE60C6EFD, 0xFFB7A9A9,
135 0x5F666146, 0xCB441F59, 0xAE01E0F3, 0x63A9315D, 0x04BA044A, 0xEB4EEFD4, 0x8563215F, 0x72C8D989,
136 0x62D21877, 0x1296EF6A, 0x20BD72B9, 0xB21E6B3D, 0x53C44FAB, 0x734810F7, 0xD203A9E0, 0xD7CE25D0,
137 0x2E52989E, 0xCCF85F34, 0x912A0491, 0x3E9EBD87, 0x8267537D, 0x4A612A18, 0x51E75D99, 0x98F001DB,
138 0xC9C77F0C, 0x352D408C, 0xA796D182, 0x04A636F7, 0xE4404092, 0x1C1E467C, 0x524E7C7A, 0x7ED36C41,
139 0x2A434CEB, 0x230B2DFE, 0x3549C577, 0x7A17FB04, 0xB850DE95, 0xD97AC40A, 0x55EA6F75, 0x41C4F82B,
140 0x37BF90FE, 0x52074F19, 0xFA8F75F0, 0x067E82B1, 0x8A1AC024, 0xB30E9B12, 0xC14AB0DD, 0xCC03AA20
141};
142
143static const leg_t g_r_inverse = 1;
144*/
145#endif
146
147#if 1
148/* Standard prime for leg_t defined as uint64_t */
149static const leg_t g_prime[] = {
150 0xFFFFFFFFFFFFFFFF, 0x4DF435C934063199, 0x86FFB7DC90A6C08F, 0x93B4EA988D8FDDC1,
151 0xD0069127D5B05AA9, 0xB81BDD762170481C, 0x1F612970CEE2D7AF, 0x233BA186515BE7ED,
152 0x99B2964FA090C3A2, 0x287C59474E6BC05D, 0x2E8EFC141FBECAA6, 0xDBBBC2DB04DE8EF9,
153 0x2583E9CA2AD44CE8, 0x1A946834B6150BDA, 0x99C327186AF4E23C, 0x88719A10BDBA5B26,
154 0x1A723C12A787E6D7, 0x4B82D120A9210801, 0x43DB5BFCE0FD108E, 0x08E24FA074E5AB31,
155 0x770988C0BAD946E2, 0xBBE117577A615D6C, 0x521F2B18177B200C, 0xD87602733EC86A64,
156 0xF12FFA06D98A0864, 0xCEE3D2261AD2EE6B, 0x1E8C94E04A25619D, 0xABF5AE8CDB0933D7,
157 0xB3970F85A6E1E4C7, 0x8AEA71575D060C7D, 0xECFB850458DBEF0A, 0xA85521ABDF1CBA64,
158 0xAD33170D04507A33, 0x15728E5A8AAAC42D, 0x15D2261898FA0510, 0x3995497CEA956AE5,
159 0xDE2BCBF695581718, 0xB5C55DF06F4C52C9, 0x9B2783A2EC07A28F, 0xE39E772C180E8603,
160 0x32905E462E36CE3B, 0xF1746C08CA18217C, 0x670C354E4ABC9804, 0x9ED529077096966D,
161 0x1C62F356208552BB, 0x83655D23DCA3AD96, 0x69163FA8FD24CF5F, 0x98DA48361C55D39A,
162 0xC2007CB8A163BF05, 0x49286651ECE45B3D, 0xAE9F24117C4B1FE6, 0xEE386BFB5A899FA5,
163 0x0BFF5CB6F406B7ED, 0xF44C42E9A637ED6B, 0xE485B576625E7EC6, 0x4FE1356D6D51C245,
164 0x302B0A6DF25F1437, 0xEF9519B3CD3A431B, 0x514A08798E3404DD, 0x020BBEA63B139B22,
165 0x29024E088A67CC74, 0xC4C6628B80DC1CD1, 0xC90FDAA22168C234, 0xFFFFFFFFFFFFFFFF
166 };
167
168static const leg_t g_r_square[] = {
169 0xC14AB0DDCC03AA20, 0x8A1AC024B30E9B12, 0xFA8F75F0067E82B1, 0x37BF90FE52074F19,
170 0x55EA6F7541C4F82B, 0xB850DE95D97AC40A, 0x3549C5777A17FB04, 0x2A434CEB230B2DFE,
171 0x524E7C7A7ED36C41, 0xE44040921C1E467C, 0xA796D18204A636F7, 0xC9C77F0C352D408C,
172 0x51E75D9998F001DB, 0x8267537D4A612A18, 0x912A04913E9EBD87, 0x2E52989ECCF85F34,
173 0xD203A9E0D7CE25D0, 0x53C44FAB734810F7, 0x20BD72B9B21E6B3D, 0x62D218771296EF6A,
174 0x8563215F72C8D989, 0x04BA044AEB4EEFD4, 0xAE01E0F363A9315D, 0x5F666146CB441F59,
175 0xE60C6EFDFFB7A9A9, 0x6C7951A523CEF785, 0x0995484320E739F4, 0xFDC65A269B51C1EF,
176 0xC93919D12A4B1A67, 0xB18A9EF150C8953A, 0x1D7D37A23FB8CF61, 0x46BDB7336E8452D9,
177 0x8BD70562DA60E392, 0x4F024193787A8278, 0xCA06DA91C2B3E7E2, 0x8FB4832EF827DE84,
178 0x7E2C75A58E25F142, 0x3472086990DACF1A, 0xE8105464E9F80A5F, 0xB616D6FA8BE2C91D,
179 0xF1D27D0B5C7DC9C2, 0x9E10FDE28E54806B, 0xE4FCCF1D638F4566, 0x6C09060D41058639,
180 0xC28A61D47411402D, 0x67DE8FA023864714, 0x91A4F5572929B90C, 0xBEACD46F3CDD1196,
181 0xA89D1DCD9D381CC5, 0xCB225176259E080F, 0x18C3DCE20188D84C, 0x91F30C52F798DA6A,
182 0x3AD36FD822C39F34, 0xFEA80D9A6EC9FCD3, 0xF3E56CC2BD9F048C, 0x70B56F527F6F604F,
183 0x5401EA4F3ED73A2F, 0x526A653A7A674BD5, 0x4C2DE67DAD47527E, 0xAA7FBD9562059F1F,
184 0xF8B11725339EBC93, 0xB7B768C89931D78D, 0xE65BCC3AB78FDAA9, 0x3DA97659E280DB0B
185};
186
187static const leg_t g_r_inverse = 1;
188#endif
189 29
190/* Returns 0 if a and b are equal, -1 if a < b, 1 if a > b */ 30/* Returns 0 if a and b are equal, -1 if a < b, 1 if a > b */
191static int mp_cmp( leg_t const *a, leg_t const *b, int legs ) 31static int mp_cmp( limb_t const *a, limb_t const *b, int limbs )
192{ 32{
193 while( legs-- ) 33 while( limbs-- )
194 { 34 {
195 if( a[legs] < b[legs] ) return -1; 35 if( a[limbs] < b[limbs] ) return -1;
196 if( a[legs] > b[legs] ) return 1; 36 if( a[limbs] > b[limbs] ) return 1;
197 } 37 }
198 return 0; 38 return 0;
199} 39}
200 40
201/* Subtract b from a, store in a. Expects enough words prepended 41/* Subtract b from a, store in a. Expects enough words prepended
202 to borrow from */ 42 to borrow from */
203static void mp_sub( leg_t *a, leg_t const *b, int legs ) 43static void mp_sub( limb_t *a, limb_t const *b, int limbs )
204{ 44{
205 int borrow = 0, borrow_temp; 45 int borrow = 0, borrow_temp;
206 while( legs-- ) 46 while( limbs-- )
207 { 47 {
208 leg_t temp = *a - *(b++); 48 limb_t temp = *a - *(b++);
209 borrow_temp = temp > *a; 49 borrow_temp = temp > *a;
210 *a = temp - borrow; 50 *a = temp - borrow;
211 borrow = borrow_temp | ( *(a++) > temp ); 51 borrow = borrow_temp | ( *(a++) > temp );
212 } 52 }
213 while( borrow ) 53 while( borrow )
214 { 54 {
215 leg_t temp = *a - borrow; 55 limb_t temp = *a - borrow;
216 borrow = temp > *a; 56 borrow = temp > *a;
217 *(a++) = temp; 57 *(a++) = temp;
218 } 58 }
219} 59}
220 60
221/* Add b to a, store in a. Operates on legs + flegs words, with 61/* Add b to a, store in a. Operates on limbs + flimbs words, with
222 flegs the amount of legs to propagate the carry to */ 62 flimbs the amount of limbs to propagate the carry to */
223static void mp_add( leg_t *a, leg_t const *b, int legs, int flegs ) 63static void mp_add( limb_t *a, limb_t const *b, int limbs, int flimbs )
224{ 64{
225 dleg_t acc = 0; 65 dlimb_t acc = 0;
226 while( legs-- ) 66 while( limbs-- )
227 { 67 {
228 acc += (dleg_t)*a + (dleg_t)*(b++); 68 acc += (dlimb_t)*a + (dlimb_t)*(b++);
229 *(a++) = (leg_t)acc; 69 *(a++) = (limb_t)acc;
230 acc >>= 8*sizeof(leg_t); 70 acc >>= 8*sizeof(limb_t);
231 } 71 }
232 while( acc && flegs-- ) 72 while( acc && flimbs-- )
233 { 73 {
234 acc += (dleg_t)*a; 74 acc += (dlimb_t)*a;
235 *(a++) = (leg_t)acc; 75 *(a++) = (limb_t)acc;
236 acc >>= 8*sizeof(leg_t); 76 acc >>= 8*sizeof(limb_t);
237 } 77 }
238} 78}
239 79
240/* Subtract b from a and store in result. Expects nothing to borrow.*/ 80/* Subtract b from a and store in result. Expects nothing to borrow.*/
241static void mp_sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int legs ) 81static void mp_sub_mod( limb_t * result, limb_t const *a, limb_t const * b, int limbs )
242{ 82{
243 int borrow = 0, borrow_temp; 83 int borrow = 0, borrow_temp;
244 while( legs-- ) 84 while( limbs-- )
245 { 85 {
246 leg_t temp = *a - *(b++); 86 limb_t temp = *a - *(b++);
247 borrow_temp = temp > *(a++); 87 borrow_temp = temp > *(a++);
248 *result = temp - borrow; 88 *result = temp - borrow;
249 borrow = borrow_temp | (*(result++) > temp ); 89 borrow = borrow_temp | (*(result++) > temp );
@@ -251,119 +91,120 @@ static void mp_sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int leg
251} 91}
252 92
253/* Fast negate */ 93/* Fast negate */
254static void mp_negate( leg_t * p, int legs ) 94static void mp_negate( limb_t * p, int limbs )
255{ 95{
256 /* Only as long as we find 0 along the way, we need to 96 /* Only as long as we find 0 along the way, we need to
257 propagate the carry of -x == !x+1 */ 97 propagate the carry of -x == !x+1 */
258 int carry = 1; 98 int carry = 1;
259 while( legs-- ) 99 while( limbs-- )
260 { 100 {
261 leg_t v = carry + ( *p ^ (leg_t)-1 ); 101 limb_t v = carry + ( *p ^ (limb_t)-1 );
262 if(v) carry = 0; 102 if(v) carry = 0;
263 *(p++)=v; 103 *(p++)=v;
264 } 104 }
265} 105}
266 106
267/* Multiplies a with fac, adds to result. 107/* Multiplies a with fac, adds to result.
268 result is guaranteed to be initialized with enough legs prepended to take the carry */ 108 result is guaranteed to be initialized with enough limbs prepended to take the carry */
269static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) 109static void mp_mul_uint_add( limb_t *result, limb_t const *a, limb_t fac, int limbs )
270{ 110{
271 dleg_t acc8 = 0; 111 dlimb_t acc15 = 0;
272#ifndef WITHOUT_UNROLLING 112#ifndef WITHOUT_UNROLLING
273 dleg_t acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0; 113 dlimb_t acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0;
274 dleg_t acc5 = 0, acc6 = 0, acc7 = 0; 114 dlimb_t acc4 = 0, acc5 = 0, acc6 = 0, acc7 = 0;
115 dlimb_t acc8 = 0, acc9 = 0, acc10= 0, acc11= 0;
116 dlimb_t acc12= 0, acc13= 0, acc14= 0;
275 117
276 while( legs >= 8 ) 118 while( limbs >= 16 )
277 { 119 {
278 acc1 = ( acc8 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 120#define MUL_UINT_ADD_ROUND(ACC,CARRY) \
279 *(result++) = (leg_t)acc1; 121 acc##ACC = ( acc##CARRY >> 8*sizeof(limb_t) ) + (dlimb_t)result[ACC] + (dlimb_t)a[ACC] * (dlimb_t)fac; \
280 122 result[ACC] = (limb_t)acc##ACC;
281 acc2 = ( acc1 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 123
282 *(result++) = (leg_t)acc2; 124 MUL_UINT_ADD_ROUND(0,15)
283 125 MUL_UINT_ADD_ROUND(1,0)
284 acc3 = ( acc2 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 126 MUL_UINT_ADD_ROUND(2,1)
285 *(result++) = (leg_t)acc3; 127 MUL_UINT_ADD_ROUND(3,2)
286 128 MUL_UINT_ADD_ROUND(4,3)
287 acc4 = ( acc3 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 129 MUL_UINT_ADD_ROUND(5,4)
288 *(result++) = (leg_t)acc4; 130 MUL_UINT_ADD_ROUND(6,5)
289 131 MUL_UINT_ADD_ROUND(7,6)
290 acc5 = ( acc4 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 132 MUL_UINT_ADD_ROUND(8,7)
291 *(result++) = (leg_t)acc5; 133 MUL_UINT_ADD_ROUND(9,8)
292 134 MUL_UINT_ADD_ROUND(10,9)
293 acc6 = ( acc5 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 135 MUL_UINT_ADD_ROUND(11,10)
294 *(result++) = (leg_t)acc6; 136 MUL_UINT_ADD_ROUND(12,11)
295 137 MUL_UINT_ADD_ROUND(13,12)
296 acc7 = ( acc6 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 138 MUL_UINT_ADD_ROUND(14,13)
297 *(result++) = (leg_t)acc7; 139 MUL_UINT_ADD_ROUND(15,14)
298 140
299 acc8 = ( acc7 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 141 result += 16;
300 *(result++) = (leg_t)acc8; 142 a += 16;
301 143 limbs -= 16;
302 legs -= 8;
303 } 144 }
304 acc8 >>= 8*sizeof(leg_t); 145 acc15 >>= 8*sizeof(limb_t);
305#endif 146#endif
306 while( legs-- ) 147 while( limbs-- )
307 { 148 {
308 acc8 += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; 149 acc15 += (dlimb_t)*result + (dlimb_t)*(a++) * (dlimb_t)fac;
309 *(result++) = (leg_t)acc8; 150 *(result++) = (limb_t)acc15;
310 acc8 >>= 8*sizeof(leg_t); 151 acc15 >>= 8*sizeof(limb_t);
311 } 152 }
312 while( acc8 ) 153 while( acc15 )
313 { 154 {
314 acc8 += (dleg_t)*result; 155 acc15 += (dlimb_t)*result;
315 *(result++) = (leg_t)acc8; 156 *(result++) = (limb_t)acc15;
316 acc8 >>= 8*sizeof(leg_t); 157 acc15 >>= 8*sizeof(limb_t);
317 } 158 }
318} 159}
319 160
320/* Multiplies a and b, adds to result, base case version. */ 161/* Multiplies a and b, adds to result, base case version. */
321static void mp_mul_oper_add( leg_t * result, leg_t const *a, leg_t const *b, int legs ) 162static void mp_mul_oper_add( limb_t * result, limb_t const *a, limb_t const *b, int limbs )
322{ 163{
323 int leg = legs; 164 int limb = limbs;
324 while( leg-- ) 165 while( limb-- )
325 mp_mul_uint_add( result++, a, *(b++), legs ); 166 mp_mul_uint_add( result++, a, *(b++), limbs );
326} 167}
327 168
328/* Optimized mp_mul_oper_add for a == b, i.e. squaring */ 169/* Optimized mp_mul_oper_add for a == b, i.e. squaring */
329static void mp_sqr( leg_t *result, leg_t const * a, int legs ) 170static void mp_sqr( limb_t *result, limb_t const * a, int limbs )
330{ 171{
331 while( legs-- ) { 172 while( limbs-- ) {
332 leg_t fac = *(a++), *dest = result; 173 limb_t fac = *(a++), *dest = result;
333 leg_t const *src = a; 174 limb_t const *src = a;
334 int leg = legs; 175 int limb = limbs;
335 176
336 dleg_t acc = (dleg_t)*dest + (dleg_t)fac * (dleg_t)fac; 177 dlimb_t acc = (dlimb_t)*dest + (dlimb_t)fac * (dlimb_t)fac;
337 178
338 *(dest++) = (leg_t)acc; 179 *(dest++) = (limb_t)acc;
339 acc >>= 8*sizeof(leg_t); 180 acc >>= 8*sizeof(limb_t);
340 181
341 while( leg-- ) 182 while( limb-- )
342 { 183 {
343 dleg_t subresult = (dleg_t)fac * (dleg_t)*(src++); 184 dlimb_t subresult = (dlimb_t)fac * (dlimb_t)*(src++);
344 int carry = !!( subresult >> (16*sizeof(leg_t)-1)); 185 int carry = !!( subresult >> (16*sizeof(limb_t)-1));
345 186
346 acc += 2 * subresult + (dleg_t)*dest; 187 acc += 2 * subresult + (dlimb_t)*dest;
347 *(dest++) = (leg_t)acc; 188 *(dest++) = (limb_t)acc;
348 189
349 acc >>= 8*sizeof(leg_t); 190 acc >>= 8*sizeof(limb_t);
350 acc += (dleg_t)carry << 8*sizeof(leg_t); 191 acc += (dlimb_t)carry << 8*sizeof(limb_t);
351 } 192 }
352 193
353 while( acc ) 194 while( acc )
354 { 195 {
355 acc += (dleg_t)*dest; 196 acc += (dlimb_t)*dest;
356 *(dest++) = (leg_t)acc; 197 *(dest++) = (limb_t)acc;
357 acc >>= 8*sizeof(leg_t); 198 acc >>= 8*sizeof(limb_t);
358 } 199 }
359 result += 2; 200 result += 2;
360 } 201 }
361} 202}
362 203
363/* Optimized karatsuba (toom2.2) for a == b, i.e. squaring */ 204/* Optimized karatsuba (toom2.2) for a == b, i.e. squaring */
364static void mp_mul_kara_square( leg_t* p, leg_t const *a, int len, leg_t *scratch ) 205static void mp_mul_kara_square( limb_t* p, limb_t const *a, int len, limb_t *scratch )
365{ 206{
366 memset( p, 0, 2 * len * sizeof( leg_t )); 207 memset( p, 0, 2 * len * sizeof( limb_t ));
367 if( len <= KARATSUBA_THRESHOLD ) 208 if( len <= KARATSUBA_THRESHOLD )
368 mp_sqr( p, a, len ); 209 mp_sqr( p, a, len );
369 else 210 else
@@ -389,9 +230,9 @@ static void mp_mul_kara_square( leg_t* p, leg_t const *a, int len, leg_t *scratc
389} 230}
390 231
391/* karatsuba (toom2.2), generic */ 232/* karatsuba (toom2.2), generic */
392static void mp_mul_kara( leg_t* p, leg_t const *a, leg_t const *b, int len, leg_t *scratch ) 233static void mp_mul_kara( limb_t* p, limb_t const *a, limb_t const *b, int len, limb_t *scratch )
393{ 234{
394 memset( p, 0, 2 * len * sizeof( leg_t )); 235 memset( p, 0, 2 * len * sizeof( limb_t ));
395 if( len <= KARATSUBA_THRESHOLD ) 236 if( len <= KARATSUBA_THRESHOLD )
396 mp_mul_oper_add( p, a, b, len ); 237 mp_mul_oper_add( p, a, b, len );
397 else 238 else
@@ -428,398 +269,132 @@ static void mp_mul_kara( leg_t* p, leg_t const *a, leg_t const *b, int len, leg_
428 } 269 }
429} 270}
430 271
431/* Multiply a and b, store in a, work in montgomery domain 272/* Multiply a and b, store in a, work in montgomery domain to achieve multiply,
432 to achieve multiply, a needs to be reduced by k * g_prime 273 a needs to be reduced by k * modulus (with k being the unknown integer)
433 until all lower legs are 0, allowing exact division by 2^r 274 until all lower limbs are 0, allowing exact division (via implicit shift)
275 by 2^r
434 276
435 if !do_mul, we convert from montgomery domain back to 277 if !do_mul, we convert from montgomery domain back to modulus domain and
436 g_prime domain and thus only multiply by 1 before reducing 278 thus only multiply by 1 before reducing
437 */ 279 */
438static void redc( leg_t *a, leg_t const *b, int legs, int do_mul ) 280static void redc( limb_t * a,
281 const limb_t * b, int limbs,
282 const limb_t * modulus, limb_t r_inverse, int do_mul )
439{ 283{
440 leg_t scratch[ 2 * ( legs - KARATSUBA_THRESHOLD ) ]; 284 limb_t scratch[ 2 * ( limbs - KARATSUBA_THRESHOLD ) ];
441 leg_t temp[1 + 2 * legs]; 285 limb_t temp[ 1 + 2 * limbs ];
442 leg_t leg; 286 int limb;
443 287
444 /* Not necessary for transforming back */ 288 /* Not necessary for transforming back */
445 if( do_mul ) 289 if( do_mul )
446 { 290 {
447 temp[2*legs] = 0; 291 temp[2*limbs] = 0;
448 if( a == b ) 292 if( a == b )
449 mp_mul_kara_square( temp, a, legs, scratch ); 293 mp_mul_kara_square( temp, a, limbs, scratch );
450 else 294 else
451 mp_mul_kara( temp, a, b, legs, scratch ); 295 mp_mul_kara( temp, a, b, limbs, scratch );
452 } 296 }
453 else 297 else
454 { 298 {
455 memset( temp + legs, 0, (legs + 1 ) * sizeof(leg_t)); 299 memset( temp + limbs, 0, (limbs + 1) * sizeof(limb_t));
456 memcpy( temp, a, legs * sizeof(leg_t)); 300 memcpy( temp, a, limbs * sizeof(limb_t));
457 } 301 }
458 302
459 /* m = p * ( m * R_1 ) % R */ 303 /* m = p * ( m * R_1 ) % R */
460 for( leg = 0; leg < legs; ++leg ) 304 for( limb = 0; limb < limbs; ++limb )
461 { 305 {
462 leg_t k = temp[leg] * g_r_inverse; 306 limb_t k = temp[limb] * r_inverse;
463 mp_mul_uint_add( temp+leg, g_prime, k, legs ); 307 mp_mul_uint_add( temp+limb, modulus, k, limbs );
464 } 308 }
465 309
466 /* the lower legs of temp are now zero 310 /* the lower limbs of temp are now zero
467 if necessary, reduce temp to fit in legs */ 311 if necessary, reduce temp to fit in limbs */
468 if( temp[2*legs] || mp_cmp( temp + legs, g_prime, legs ) > 0 ) 312 if( temp[2*limbs] || mp_cmp( temp + limbs, modulus, limbs ) > 0 )
469 mp_sub( temp + legs, g_prime, legs ); 313 mp_sub( temp + limbs, modulus, limbs );
470 314
471 memcpy( a, temp + legs, legs * sizeof(leg_t) ); 315 memcpy( a, temp + limbs, limbs * sizeof(limb_t) );
472 memset( scratch, 0, sizeof( scratch ) ); 316 memset( scratch, 0, sizeof(scratch));
473 memset( temp, 0, sizeof( temp ) ); 317 memset( temp, 0, sizeof(temp));
474} 318}
475 319
476/* calculate base ^ exponent modulo g_prime */ 320/* calculate base ^ exponent modulo modulus */
477static void powm( leg_t * result, leg_t const *exponent, leg_t base, int legs ) 321void powm_internal( limb_t * result,
322 const limb_t * base, uint32_t base_limbs,
323 const limb_t * exponent, uint32_t exp_limbs,
324 const limb_t * modulus, uint32_t mod_limbs,
325 const limb_t * r_square, const limb_t r_inverse )
478{ 326{
479 leg_t acc[legs]; 327 limb_t acc[mod_limbs];
480#ifdef WITH_PREVENT_TIMING_ATTACKS 328#ifdef WITH_PREVENT_TIMING_ATTACKS
481 leg_t dummy[legs]; 329 limb_t dummy[mod_limbs];
482#endif 330#endif
483 int first = 0, bit; 331 int first = 0, bit;
332 int expbits = 8 * sizeof(limb_t) * exp_limbs;
333
334 memset( result, 0, sizeof(limb_t) * mod_limbs );
335 memset( acc, 0, sizeof(limb_t) * mod_limbs );
484 336
485 memset( acc, 0, sizeof(acc) ); 337 memcpy( acc, base, sizeof(limb_t) * base_limbs );
486 *acc = base; 338 //*acc = base;
487 339
488 /* Transform base into montgomery domain */ 340 /* Transform base into montgomery domain */
489 redc( acc, g_r_square, legs, 1 ); 341 redc( acc, r_square, mod_limbs, modulus, r_inverse, 1 );
490 342
491 /* mul in temp and if bit set in exponent, multiply into accumulator */ 343 /* mul in temp and if bit set in exponent, multiply into accumulator */
492 for( bit = 0; bit < legs * 8 * sizeof(leg_t); bit++ ) 344 for( bit = 0; bit < expbits ; bit++ )
493 { 345 {
494 int this_bit = bit % ( sizeof(leg_t) * 8 ); 346 int this_bit = bit % ( sizeof(limb_t) * 8 );
495 if( ( exponent[ bit / ( sizeof(leg_t) * 8 ) ] >> this_bit ) & 1 ) { 347 if( ( exponent[ bit / ( sizeof(limb_t) * 8 ) ] >> this_bit ) & 1 ) {
496 if( first++ ) 348 if( first++ )
497 redc( result, acc, legs, 1 ); 349 redc( result, acc, mod_limbs, modulus, r_inverse, 1 );
498 else 350 else
499 memcpy( result, acc, sizeof(leg_t) * legs ); 351 memcpy( result, acc, sizeof(limb_t) * mod_limbs );
500 } 352 }
501#ifdef WITH_PREVENT_TIMING_ATTACKS 353#ifdef WITH_PREVENT_TIMING_ATTACKS
502 else 354 else
503 redc( dummy, acc, legs, 1 ); 355 redc( dummy, acc, mod_limbs, modulus, r_inverse, 1 );
504#endif 356#endif
505 redc( acc, acc, legs, 1 ); 357 redc( acc, acc, mod_limbs, modulus, r_inverse, 1 );
506 } 358 }
507 359
508 /* Transform result in acc back into mod p domain */ 360 /* Transform result in acc back into mod m domain */
509 if( first ) 361 if( first )
510 redc( result, 0, legs, 0 ); 362 redc( result, 0, mod_limbs, modulus, r_inverse, 0 );
511 else /* base ^ 0 mod p = 1 */ 363 else /* base ^ 0 mod m = 1 */
512 { 364 {
513 memset( result+1, 0, legs * sizeof(leg_t) ); 365 memset( result+1, 0, mod_limbs * sizeof(limb_t) );
514 *result = 1; 366 *result = 1;
515 } 367 }
516} 368}
517 369
518static void mp_import( leg_t *dest, uint8_t const * src, size_t len ) 370uint32_t mp_import( limb_t *dest, uint8_t const * src, uint32_t len )
519{ 371{
520 int byte = 0; 372 int byte = 0;
521 leg_t acc = 0; 373 limb_t *d = dest;
374 limb_t acc = 0;
375
522 while( len-- ) 376 while( len-- )
523 { 377 {
524 acc |= ((leg_t)src[len]) << ( 8 * byte ); 378 acc |= ((limb_t)src[len]) << ( 8 * byte );
525 if( ++byte == sizeof(leg_t)) 379 if( ++byte == sizeof(limb_t))
526 { 380 {
527 *(dest++) = acc; 381 *(d++) = acc;
528 acc = 0; 382 acc = 0;
529 byte = 0; 383 byte = 0;
530 } 384 }
531 } 385 }
532 if( byte ) 386 if( byte )
533 *dest = acc; 387 *(d++) = acc;
388 return ( d - dest );
534} 389}
535 390
536int main() 391void mp_export( uint8_t * dest, const limb_t * src, uint32_t limb_size )
537{ 392{
538 int i, legs = 64; 393 while( limb_size )
539 leg_t input[ /*legs*/ ] = { 394 {
540 0x281b04b30bb62984, 0xe0cc506399225574, 0xb08ad95f8040183f, 0x56eee1ba0dc1936c, 395 limb_t outlimb = src[--limb_size];
541 0x5ec1d62932d6937a, 0xfa8718685a5b8359, 0xfb4e8dadb3111df4, 0x87e90f5b6397727e, 396 int cnt = sizeof( limb_t );
542 0x6b984d2e7b9cc244, 0x9ae478fd69254dc9, 0x21da1ef76099ed94, 0xe407bde8519bc534, 397 while( cnt )
543 0x051a88159adc842b, 0x547fd48d3b471c7c, 0xd6b85c29e8e8a528, 0x74fc0b5dcdfaa679, 398 *(dest++) = (uint8_t)( outlimb >> ( 8 * --cnt ) );
544 0xe0c022a74c073360, 0xc0dc8c759af7492b, 0xa86946df6abbea0f, 0xfcc2a7f6d4b0d4f7, 399 }
545 0xd2111f6ce69c2bc6, 0x4ab8b5e0adaad6a0, 0xf8df973a9842e7b2, 0x2d44f438b889673c,
546 0x82b4e75bf0e60d06, 0x4c3b1333a07fee24, 0x353eb64e067f86cc, 0x518781609b447bf8,
547 0xe9228ae741da24cd, 0xd8aec44d430026aa, 0x196bcb0cc0e353f8, 0x942fad7b2cc3dcc5,
548 0xa77b229138f2d5d1, 0x5479eaeb9b64ba87, 0x70cc7ff6ea8f1b51, 0x1785bbfb88755c99,
549 0xd7ac4896d4e08b35, 0x0e2aab629710919b, 0xa3612061c92319ce, 0xb5444312b3b43541,
550 0xa13b801d1ac631d5, 0x87e0db1397d9f2fd, 0x0e84ab94e744b8ba, 0x1f61b1ea0e5aafb4,
551 0x70f0c055dde43121, 0x094d72c301b8f25d, 0xd151e2770baec00b, 0x31052a6fc332563d,
552 0x2bca50f311573101, 0xb420bd7e538ef88b, 0xac5ecd1fe4997513, 0x3ca892560247ca32,
553 0x950e895d7777d182, 0xcb650843a43a4a00, 0xd07dc909e7070b0e, 0x72d720399b4ad2d4,
554 0x52292d56ea2960a9, 0x6060e2cd96000219, 0x118727b5eb17dfbd, 0xa6d78571af8d3688,
555 0xb563a9cf79c6eaec, 0xe5d85f8fb8fced5c, 0x9ac2961fe160f837, 0x1de7eae6c0d6a0b0,
556 };
557
558 leg_t res[legs]; /*
559= { 569D056BC5485C6F3C026935CFC1FC36CFC5A6B770B442323750EE08A32C63F64743A4902E32A41F7C5A9097509EB9AAA5C5B425D9BDAEDC3B98EE3A9F99AE6FB8723EBE01154937ECC1E4AE38CBC12B767FD17264A4AAEB310EA5B0E72D12FE3969532C5A0718B45F251BFB2A126EFB87F819BDCD434EAEE3B9108C387F4DFFEFA0E9532A0532192034DB0EF5C5BB2A77BCA5B4A08DFFD22F6B1D4F17F5123935CAB4F40EC7E77F4FEEEC6A8CD20CDB2D37DD1BB44544C50B01E98F8B3F5E1ABF91817D40B7E526D6ED80CDE416C6CD32218D3ECCA753D15BBB007661754E44A937C5A68957C3C2D3F39A220930567E9F720B3AAE9D34FEBA6E5A047C131F32271A3BC1E85BC1BC210F49B09F3938226399991BC90299B4E0E684B2DE6BDA684693F40CF728FD24A4B1D1A2B8F4CD3316A02DBC44069A40C1AC64F862091A888DD284AB180918B379A3BD173EDDEA5E08DF4BE51B441AEA7EDA4E439CF57914F68E0FB3E756AB7286616C3736CA72DD40A88FC37B598BF561300AFD1F80F7D44E53C4C1D85FF215250A0C56222A177299C61C2E654B040176F4A8D44BE2612B36E066E7AA6E58D84D4C4CB9A5952E1668CDF04EE55C38ABA59614CB7F43D12921551DD82062AFC3F340B33F43C643BC9D5DB6D6BAB2C94F100D139443B20BCB80384C3231261C5ABFFF3D971CB878116F4EBB7D4D24A2B6F5869 }
560*/
561
562#ifdef WITH_TESTS
563 run_tests();
564#endif
565
566#ifdef WITH_MEASURE_GMP
567 mpz_t m, base, exp, result;
568
569 mpz_init( m ); mpz_init( base ); mpz_init( exp ); mpz_init( result );
570 mpz_import( m, sizeof(g_prime) / sizeof(uint64_t), -1, sizeof(uint64_t), 0, 0, g_prime );
571 mpz_set_ui( base, 2 );
572 mpz_import( exp, sizeof(input) / sizeof(uint64_t), -1, sizeof(uint64_t), 0, 0, input );
573
574 for( i=0; i< WITH_ROUNDS; ++i )
575 mpz_powm( result, base, exp, m );
576
577 gmp_printf("%ZX\n", result);
578#else
579
580 for( i=0; i<WITH_ROUNDS; ++i )
581 powm( res, input, 2, legs );
582
583 for( i=legs-1; i>=0; --i )
584 printf( "%016llX", res[i] );
585 putchar(10); putchar(10);
586#endif
587
588 return 0;
589}
590
591#ifdef WITH_TESTS
592#define TEST_MAX 1024
593static void REPORT( char * fail ) { fprintf( stderr, "\nFailed in %s\n", fail ); exit(-1); }
594static leg_t test_001[] = { (leg_t)-1, 1 };
595static leg_t test_002[] = { (leg_t)-1, 2 };
596static leg_t test_003[] = { 0, 1 };
597static leg_t test_004[] = { (leg_t)-1, 1, 1 };
598static leg_t test_005[] = { 0, (leg_t)-1, 0 };
599static leg_t test_006[] = { (leg_t)-2, 4, 0 };
600static leg_t test_007[] = { (leg_t)-1, (leg_t)-2 };
601static leg_t test_008[] = { (leg_t)-1, (leg_t)-3 };
602static leg_t test_009[] = { (leg_t)-2, (leg_t)-4, 0 };
603static leg_t test_010[] = { (leg_t)-2, (leg_t)-4, 1 };
604
605static uint8_t test_011[] = {
606 0xf2, 0xda, 0xe5, 0x35, 0xa0, 0xbd, 0xb8, 0x4c, 0x88, 0x65, 0x50, 0xca, 0xe8, 0x17, 0xa8, 0xab, 0x34, 0xd6, 0x86, 0xc4, 0x29, 0xa0, 0xc8, 0x51, 0x84, 0x05, 0x3e, 0x9d, 0x4d, 0x23, 0xce, 0x75,
607 0x1f, 0xa6, 0xc1, 0x36, 0x86, 0x35, 0xf5, 0x6f, 0xcc, 0x0b, 0x13, 0x40, 0x89, 0xc2, 0x41, 0xb0, 0x0e, 0xa7, 0x68, 0xd4, 0x68, 0x6a, 0xd2, 0x08, 0xf7, 0xf5, 0x2c, 0x05, 0x11, 0x43, 0x00, 0x9b,
608 0xf4, 0x92, 0xe1, 0x17, 0xae, 0xcc, 0x05, 0x98, 0x95, 0xc4, 0x33, 0xfd, 0x54, 0x42, 0x06, 0x98, 0x90, 0x62, 0x61, 0x15, 0x34, 0xe8, 0xbd, 0x37, 0xe0, 0x57, 0x27, 0x90, 0x75, 0x08, 0x33, 0x15,
609 0xe5, 0x48, 0x19, 0xbb, 0x2c, 0xc1, 0xda, 0xeb, 0x95, 0x72, 0x97, 0x05, 0x60, 0xbf, 0xbf, 0x80, 0xfe, 0x04, 0x7a, 0x4a, 0xff, 0x0e, 0xa0, 0x47, 0x82, 0x0d, 0x90, 0xf0, 0x5b, 0xfc, 0x21, 0x82
610};
611static uint8_t test_012[] = {
612 0x0c, 0xc4, 0xc0, 0xb7, 0x13, 0xee, 0xf6, 0xf3, 0xcd, 0xf7, 0x02, 0x0b, 0x97, 0x70, 0xc0, 0x59, 0x27, 0xd2, 0x74, 0xbf, 0x54, 0xbf, 0x9d, 0x77, 0x59, 0xe0, 0x86, 0x06, 0x18, 0x7a, 0x3c, 0x3e,
613 0x12, 0x52, 0xed, 0x77, 0x10, 0xf9, 0x4b, 0xb8, 0xb7, 0x2b, 0xdc, 0x23, 0xe0, 0x45, 0x79, 0xe2, 0xe9, 0xef, 0x12, 0x97, 0x22, 0xf8, 0x72, 0xa2, 0x05, 0x1f, 0x5f, 0x36, 0xae, 0xfb, 0x7a, 0xb2,
614 0x7d, 0xd7, 0xa2, 0x15, 0xa4, 0x9f, 0x27, 0x55, 0x65, 0x2a, 0x49, 0x0f, 0xca, 0x9f, 0xf3, 0xf6, 0x26, 0xcb, 0x57, 0x07, 0x82, 0xb7, 0x04, 0x1a, 0x7b, 0x65, 0x40, 0xb8, 0x55, 0x93, 0xee, 0xd4,
615 0x09, 0x3e, 0x30, 0xa1, 0x6c, 0x00, 0x90, 0x54, 0x9b, 0x05, 0xb4, 0x08, 0xc4, 0xcc, 0xcd, 0xa2, 0xf6, 0xe6, 0x34, 0xf3, 0x5f, 0xe0, 0xe4, 0xb3, 0xcd, 0xd3, 0x8f, 0x83, 0xb2, 0xe8, 0x08, 0xb6
616};
617static uint8_t test_013[] = {
618 0x0C, 0x1C, 0xE9, 0x27, 0xD9, 0xE2, 0xF8, 0x92, 0x97, 0x0B, 0x8E, 0x79, 0x91, 0x07, 0xE7, 0xAA, 0xF8, 0x5C, 0x11, 0xAB, 0x84, 0xFA, 0x0C, 0x6F, 0xED, 0xA5, 0x0C, 0x1F, 0x4C, 0x55, 0x5F, 0x5B,
619 0x70, 0x63, 0x22, 0x85, 0x72, 0x6B, 0x3D, 0x9A, 0x78, 0x6E, 0xBF, 0x2E, 0x29, 0xB8, 0x2E, 0xE4, 0x4C, 0x09, 0x35, 0x45, 0x92, 0x26, 0xCC, 0x29, 0x15, 0xFC, 0x9A, 0x2F, 0x58, 0x4E, 0x70, 0xDD,
620 0x00, 0xA9, 0xC7, 0xDE, 0xF5, 0x26, 0x85, 0x23, 0xDF, 0xBF, 0x00, 0x61, 0xC7, 0x47, 0x0E, 0x23, 0x9D, 0xC3, 0xB6, 0x17, 0x8C, 0x36, 0x19, 0x03, 0x70, 0xD1, 0xE1, 0xE7, 0x58, 0xDD, 0x10, 0xD5,
621 0x76, 0x3B, 0xA7, 0xAE, 0x73, 0xAC, 0x4E, 0xBF, 0x4B, 0x79, 0xE5, 0xDE, 0x19, 0x68, 0x1D, 0x19, 0xD4, 0xF1, 0xF9, 0x90, 0x0B, 0x55, 0x9A, 0x8A, 0xAE, 0x5C, 0x85, 0x0C, 0x84, 0xB1, 0x33, 0x2F,
622 0x0E, 0x99, 0xFC, 0xC6, 0x57, 0xE8, 0x81, 0x53, 0x23, 0x87, 0x5D, 0x41, 0xDF, 0xC9, 0xE0, 0xE5, 0xE1, 0x8C, 0x2C, 0x4E, 0x69, 0xAE, 0x99, 0x54, 0x19, 0x85, 0x7F, 0x7F, 0x19, 0x1E, 0x28, 0xC2,
623 0x04, 0xAB, 0x16, 0x1D, 0xE9, 0x22, 0x55, 0xD2, 0x2D, 0x45, 0x44, 0xD2, 0x16, 0x3C, 0x2B, 0xD3, 0xE5, 0x88, 0xF2, 0x5E, 0x97, 0x92, 0x51, 0xF0, 0x64, 0x7D, 0x27, 0x5E, 0x0F, 0x5C, 0x99, 0xD8,
624 0x02, 0xA8, 0xD3, 0x87, 0x90, 0x02, 0x9E, 0x4E, 0x9A, 0x9D, 0x72, 0xF3, 0xA8, 0xBC, 0x28, 0x74, 0xBD, 0x60, 0x92, 0xE1, 0x4A, 0x7E, 0x3E, 0xC2, 0xB3, 0x88, 0xBA, 0xAD, 0x8F, 0xFF, 0xCD, 0x57,
625 0xE2, 0x7E, 0x43, 0x68, 0x91, 0xC3, 0xBE, 0x50, 0x9E, 0xB8, 0xFA, 0x53, 0x3C, 0xEE, 0x1F, 0x4C, 0x59, 0x5D, 0x4B, 0xE8, 0x42, 0x32, 0xD5, 0xE4, 0xFC, 0xD7, 0xFD, 0x12, 0x08, 0x1B, 0xE2, 0x6C
626};
627
628static uint8_t test_014[] = {
629 0xE6, 0x62, 0x94, 0x84, 0x90, 0x75, 0x70, 0x5D, 0xF3, 0x6C, 0xC7, 0xFC, 0x45, 0x0C, 0x9F, 0xF7, 0x37, 0xB9, 0x71, 0x65, 0x62, 0x47, 0xC6, 0xD8, 0xFE, 0x93, 0xB0, 0xF1, 0xE2, 0x34, 0x78, 0x99,
630 0xE1, 0xED, 0x3C, 0xA7, 0xF0, 0xD3, 0x00, 0x26, 0xA0, 0x18, 0xAA, 0x43, 0xDB, 0xFA, 0xD1, 0xFE, 0x2A, 0x71, 0xC5, 0xD1, 0xA1, 0x1D, 0xC0, 0xD3, 0x26, 0x15, 0x43, 0x6F, 0x06, 0x35, 0x5D, 0x02,
631 0x3A, 0xFD, 0x58, 0xAA, 0x91, 0x4A, 0x27, 0xFC, 0xD5, 0x78, 0xBC, 0x65, 0xE6, 0xE6, 0xEC, 0x4E, 0x25, 0xBE, 0xEC, 0xDD, 0x82, 0xDB, 0x6D, 0xDE, 0x42, 0x4D, 0xE6, 0x39, 0x71, 0x80, 0x4B, 0xDA,
632 0x2E, 0xD2, 0x25, 0x70, 0x96, 0x9D, 0x7B, 0x1B, 0x61, 0x85, 0xE6, 0x3C, 0x31, 0x4B, 0x60, 0x07, 0xCA, 0x66, 0xEF, 0x98, 0xEA, 0xAE, 0xED, 0xB8, 0x91, 0xC2, 0x8A, 0x70, 0x3B, 0xAC, 0x0B, 0x0A,
633 0x3D, 0x80, 0xC9, 0x55, 0x96, 0x2F, 0xF3, 0xED, 0x55, 0xBC, 0x8B, 0x06, 0x62, 0xD9, 0x3F, 0x18, 0x4D, 0x87, 0x51, 0xD2, 0x11, 0x97, 0x7D, 0x16, 0xC5, 0xF0, 0xAB, 0x25, 0xCA, 0xB4, 0xF2, 0x66,
634 0xCE, 0xCE, 0x58, 0xBB, 0x23, 0x10, 0x53, 0x6E, 0xA8, 0x96, 0xAD, 0x67, 0x9A, 0xEE, 0x53, 0xF0, 0x37, 0xCF, 0x51, 0x88, 0x51, 0xB8, 0x45, 0x15, 0x19, 0xCD, 0x84, 0xAD, 0xEB, 0xC5, 0xAA, 0x9C,
635 0xCC, 0x03, 0x47, 0x2B, 0xDC, 0xCF, 0x86, 0x67, 0x04, 0xA0, 0xBA, 0xF5, 0x1D, 0x6B, 0xB2, 0xAB, 0x2E, 0xAA, 0x83, 0x5F, 0x56, 0x3F, 0x33, 0xB7, 0xA6, 0xA9, 0xF1, 0x0D, 0x95, 0x7B, 0x2F, 0x21,
636 0xB6, 0xC0, 0x0E, 0xF2, 0x03, 0x91, 0x56, 0x3C, 0x56, 0x4C, 0x91, 0x87, 0xB3, 0x68, 0x49, 0xC5, 0x8E, 0x12, 0x41, 0xB6, 0xFD, 0xD9, 0xC8, 0xE7, 0xAE, 0xB2, 0x4B, 0xE4, 0x68, 0x52, 0xC6, 0x04
637};
638
639static uint8_t test_015[] = {
640 0x00, 0xA3, 0x09, 0x48, 0xD3, 0x48, 0x0D, 0xE4, 0xF4, 0x42, 0xA7, 0xA7, 0x7E, 0x47, 0xE0, 0x07, 0x6E, 0x93, 0x73, 0x43, 0xE9, 0x42, 0x6D, 0xCC, 0x87, 0xAE, 0x3B, 0x87, 0x33, 0xAE, 0xDF, 0x4E,
641 0x32, 0xE5, 0x78, 0x0C, 0x93, 0xD9, 0x84, 0xCC, 0x31, 0x8B, 0xB4, 0x83, 0xC7, 0x45, 0x71, 0xA9, 0x3C, 0x9F, 0x60, 0x8E, 0xA0, 0xED, 0x80, 0xF5, 0xBD, 0xC1, 0x75, 0x70, 0xB5, 0xFE, 0x39, 0x2A,
642 0x90, 0xB9, 0x00, 0xF7, 0x7D, 0x78, 0x91, 0x43, 0xB4, 0xC2, 0x9F, 0xC9, 0x77, 0x8A, 0x26, 0xC9, 0x17, 0xF4, 0x08, 0x0F, 0x5A, 0x5A, 0x64, 0x99, 0x47, 0xF6, 0x6E, 0x41, 0x77, 0x35, 0x20, 0x03,
643 0xB6, 0xCC, 0x3C, 0x21, 0x4C, 0x87, 0x18, 0x7E, 0x8A, 0x13, 0xCA, 0x2D, 0x51, 0x53, 0x76, 0xAE, 0x94, 0x37, 0xA4, 0x32, 0x5F, 0x0E, 0x47, 0x50, 0x9A, 0xD6, 0xDB, 0x30, 0xED, 0x43, 0xAC, 0xC3,
644 0xD4, 0x1E, 0x53, 0xD2, 0x9C, 0x29, 0x69, 0x98, 0xF6, 0x82, 0xF2, 0x12, 0x50, 0x04, 0x72, 0xC9, 0x57, 0x17, 0xA8, 0xF9, 0x80, 0xED, 0xDF, 0x96, 0x54, 0x5E, 0x1D, 0x21, 0x51, 0xCB, 0xC9, 0xAF,
645 0xC5, 0xC2, 0x71, 0xE1, 0xB4, 0xF1, 0xDF, 0xCA, 0xC9, 0xF3, 0x71, 0x4C, 0x91, 0x03, 0xA9, 0x71, 0x3B, 0x08, 0xCA, 0x67, 0x2D, 0x65, 0x35, 0x38, 0x16, 0x02, 0xE5, 0xF9, 0x1C, 0x58, 0x23, 0xE6,
646 0x7E, 0x14, 0x97, 0x51, 0x6B, 0xDE, 0xEB, 0x14, 0xB0, 0xE7, 0xB8, 0x59, 0xAD, 0xC4, 0x98, 0xA2, 0xEC, 0x1F, 0x32, 0x35, 0x31, 0x96, 0x64, 0x82, 0x7B, 0x2A, 0x48, 0x59, 0x1B, 0xC8, 0x9C, 0xBB,
647 0x55, 0x6D, 0xE7, 0xAD, 0xBC, 0x14, 0x48, 0xF8, 0xF5, 0xBB, 0xD1, 0xF6, 0xC5, 0xD5, 0xDC, 0x62, 0x8F, 0x4D, 0x91, 0x92, 0xA2, 0x95, 0x4C, 0x95, 0xA8, 0x3F, 0xBC, 0xB0, 0xE2, 0x2B, 0xE1, 0x64
648};
649
650static int run_tests( )
651{
652 leg_t val1[TEST_MAX], val2[TEST_MAX], val3[TEST_MAX], scratch[TEST_MAX];
653 int i;
654
655 /***********************************************************************/
656 printf( "Testing mp_cmp... " );
657
658 printf( "1 " );
659 if( mp_cmp( test_001, test_001, sizeof(test_001) / sizeof(leg_t ) ))
660 REPORT( "MP_CMP test 1" );
661
662 printf( "2 " );
663 if( mp_cmp( test_001, test_002, sizeof(test_001) / sizeof(leg_t ) ) != -1 )
664 REPORT( "MP_CMP test 2" );
665
666 printf( "3 " );
667 if( mp_cmp( test_002, test_001, sizeof(test_001) / sizeof(leg_t ) ) != 1 )
668 REPORT( "MP_CMP test 3" );
669
670 printf( "success.\n" );
671
672 /***********************************************************************/
673 printf( "Testing mp_sub... " );
674 printf( "1 " );
675 memcpy( val1, test_002, sizeof(test_002) );
676 mp_sub( val1, test_001, sizeof(test_001) / sizeof(leg_t ) );
677 if( memcmp( val1, test_003, sizeof(test_003)) )
678 REPORT( "MP_SUB test 1" );
679
680 printf( "2 " );
681 memset( val1, 0, sizeof(val1) );
682 memcpy( val1, test_004, sizeof(test_004) );
683 mp_sub( val1, test_002, sizeof(test_002) / sizeof(leg_t ) );
684 if( memcmp( val1, test_005, sizeof(test_005)))
685 REPORT( "MP_SUB test 2" );
686
687 printf( "success.\n" );
688
689 /***********************************************************************/
690 printf( "Testing mp_add... " );
691 printf( "1 " );
692 memset( val1, 0, sizeof(val1) );
693 memcpy( val1, test_002, sizeof(test_002) );
694 mp_add( val1, test_001, sizeof(test_001) / sizeof(leg_t ), 0 );
695 if( memcmp( val1, test_006, sizeof(test_006)) )
696 REPORT( "MP_ADD test 1" );
697
698 printf( "2 " );
699 memset( val1, 0, sizeof(val1) );
700 memcpy( val1, test_007, sizeof(test_007) );
701 mp_add( val1, test_008, sizeof(test_008) / sizeof(leg_t ), 0 );
702 if( memcmp( val1, test_009, sizeof(test_009)) )
703 REPORT( "MP_ADD test 2" );
704
705 printf( "3 " );
706 memset( val1, 0, sizeof(val1) );
707 memcpy( val1, test_007, sizeof(test_007) );
708 mp_add( val1, test_008, sizeof(test_008) / sizeof(leg_t ), 1 );
709 if( memcmp( val1, test_010, sizeof(test_010)) )
710 REPORT( "MP_ADD test 3" );
711
712 printf( "success.\n" );
713 /***********************************************************************/
714
715 printf( "Testing mp_mul_oper_add... " );
716 printf( "1 " );
717 memset( val1, 0, sizeof(val1) );
718 mp_import( val2, test_011, sizeof(test_011) );
719 mp_import( val3, test_012, sizeof(test_012) );
720 mp_mul_oper_add( val1, val2, val3, sizeof(test_011) / sizeof(leg_t ) );
721 mp_import( val3, test_013, sizeof(test_013) );
722
723 if( memcmp( val1, val3, sizeof(test_013)) )
724 REPORT( "MP_MUL_OPER_ADD test 1" );
725
726 printf( "2 " );
727 memset( val1, 0, sizeof(val1) );
728 mp_import( val2, test_011, sizeof(test_011) );
729 mp_mul_oper_add( val1, val2, val2, sizeof(test_011) / sizeof(leg_t ) );
730 mp_import( val3, test_014, sizeof(test_014) );
731
732 if( memcmp( val1, val3, sizeof(test_014)) )
733 REPORT( "MP_MUL_OPER_ADD test 2" );
734
735 printf( "success.\n" );
736 /***********************************************************************/
737
738 printf( "Testing mp_sqr... " );
739 printf( "1 " );
740 memset( val1, 0, sizeof(val1) );
741 mp_import( val2, test_011, sizeof(test_011) );
742 mp_sqr( val1, val2, sizeof(test_011) / sizeof(leg_t ) );
743 mp_import( val3, test_014, sizeof(test_014) );
744
745 if( memcmp( val1, val3, sizeof(test_014)) )
746 REPORT( "MP_SQR test 1" );
747
748 printf( "2 " );
749 memset( val1, 0, sizeof(val1) );
750 mp_import( val2, test_012, sizeof(test_012) );
751 mp_sqr( val1, val2, sizeof(test_012) / sizeof(leg_t ) );
752 mp_import( val3, test_015, sizeof(test_015) );
753
754 if( memcmp( val1, val3, sizeof(test_015)) )
755 REPORT( "MP_SQR test 2" );
756
757 printf( "success.\n" );
758 /***********************************************************************/
759
760 printf( "Testing mp_mul_kara_square... " );
761 printf( "1 " );
762 memset( val1, 0, sizeof(val1) );
763 mp_import( val2, test_011, sizeof(test_011) );
764 mp_mul_kara_square( val1, val2, sizeof(test_011) / sizeof(leg_t ), scratch );
765 mp_import( val3, test_014, sizeof(test_014) );
766
767 if( memcmp( val1, val3, sizeof(test_014)) )
768 REPORT( "MP_MUL_KARA_SQUARE test 1" );
769
770 printf( "2 " );
771 memset( val1, 0, sizeof(val1) );
772 mp_import( val2, test_012, sizeof(test_012) );
773 mp_mul_kara_square( val1, val2, sizeof(test_012) / sizeof(leg_t ), scratch );
774 mp_import( val3, test_015, sizeof(test_015) );
775
776 if( memcmp( val1, val3, sizeof(test_015)) )
777 REPORT( "MP_MUL_KARA_SQUARE test 2" );
778
779 printf( "success.\n" );
780 /***********************************************************************/
781
782 printf( "Testing mp_mul_kara... " );
783 printf( "1 " );
784 memset( val1, 0, sizeof(val1) );
785 mp_import( val2, test_011, sizeof(test_011) );
786 mp_import( val3, test_012, sizeof(test_012) );
787 mp_mul_kara( val1, val2, val3, sizeof(test_011) / sizeof(leg_t ), scratch );
788 mp_import( val3, test_013, sizeof(test_013) );
789
790 if( memcmp( val1, val3, sizeof(test_013)) )
791 REPORT( "MP_MUL_KARA test 1" );
792
793 printf( "2 " );
794 memset( val1, 0, sizeof(val1) );
795 mp_import( val2, test_011, sizeof(test_011) );
796 mp_import( val2, test_011, sizeof(test_011) );
797 mp_mul_kara( val1, val2, val2, sizeof(test_011) / sizeof(leg_t ), scratch );
798 mp_import( val3, test_014, sizeof(test_014) );
799
800 if( memcmp( val1, val3, sizeof(test_014)) )
801 REPORT( "MP_MUL_KARA test 2" );
802
803 printf( "3 " );
804 memset( val1, 0, sizeof(val1) );
805 mp_import( val2, test_012, sizeof(test_012) );
806 mp_import( val2, test_012, sizeof(test_012) );
807 mp_mul_kara( val1, val2, val2, sizeof(test_012) / sizeof(leg_t ), scratch );
808 mp_import( val3, test_015, sizeof(test_015) );
809
810 if( memcmp( val1, val3, sizeof(test_015)) )
811 REPORT( "MP_MUL_KARA test 3" );
812
813 printf( "success.\n" );
814 /***********************************************************************/
815
816
817// putchar(10);
818// for( i = sizeof(test_013)/sizeof(leg_t )-1; i>=0; --i) printf( "%016llX", val1[i] );
819// putchar(10);
820// for( i = sizeof(test_013)/sizeof(leg_t )-1; i>=0; --i) printf( "%016llX", val3[i] );
821// putchar(10);
822
823} 400}
824
825#endif
diff --git a/powm.h b/powm.h
new file mode 100644
index 0000000..7290165
--- /dev/null
+++ b/powm.h
@@ -0,0 +1,44 @@
1#ifndef __LIBGSMKBIGNUM_POWM_H__
2#define __LIBGSMKBIGNUM_POWM_H__
3
4#include <stdint.h>
5#include <stdlib.h>
6
7#ifdef __cplusplus
8extern "C" {
9#endif
10
11#ifdef USE32BITLIMB
12typedef uint32_t limb_t;
13typedef uint64_t dlimb_t;
14#else
15typedef uint64_t limb_t;
16typedef unsigned int uint128_t __attribute__((mode(TI)));
17typedef uint128_t dlimb_t;
18#endif
19
20
21/* import from serialized format, big endian byte representation
22 return amount of limbs */
23uint32_t mp_import( limb_t *dest, uint8_t const * src, uint32_t byte_size );
24void mp_export( uint8_t *dest, const limb_t *scr, uint32_t limb_size );
25
26/* This function works with precalculated parameters passed in r_square
27 and r_inverse. The latter being 1, if the bottom limb_t is -1. */
28void powm_internal( limb_t * result,
29 const limb_t * base, uint32_t base_limbs,
30 const limb_t * exponent, uint32_t exp_limbs,
31 const limb_t * modulus, uint32_t mod_limbs,
32 const limb_t * r_square, const limb_t r_inverse );
33
34/* Not yet implemented */
35void powm( limb_t * result,
36 const limb_t * base, uint32_t base_limbs,
37 const limb_t * exponent, int expbits,
38 const limb_t * modulus, uint32_t mod_limbs );
39
40#ifdef __cplusplus
41}
42#endif
43
44#endif