|
diff --git a/powm.c b/powm.c index 5c281a6..1f0b0c7 100644 --- a/ powm.c+++ b/ powm.c |
| @@ -1,249 +1,89 @@ |
| |
1 | #include "powm.h" |
| 1 | #include <stdlib.h> |
2 | #include <stdlib.h> |
| 2 | #include <stdint.h> |
3 | #include <stdint.h> |
| |
4 | #include <stddef.h> |
| 3 | #include <string.h> |
5 | #include <string.h> |
| 4 | #include <stdio.h> |
6 | #include <stdio.h> |
| 5 | #include <assert.h> |
7 | #include <assert.h> |
| 6 | |
8 | |
| 7 | /* |
9 | /* |
| 8 | This code implements base ^ exp mod p with base being a small integer, |
10 | This code implements result = base ^ exp (mod m) |
| 9 | g given in g_prime and the r number of bits (4096) being used to pre-calculate |
| |
| 10 | g_r_square = r^2 mod p in order to speed up montgomery reduction |
| |
| 11 | |
11 | |
| 12 | Because our primes conveniently end in 0xf..f for word sizes up to 64, the |
12 | Internally bignums are represented by limbs of type limb_t ordered in little |
| 13 | multiplicative inverse is ("pre-calculated") g_r_inverse = 1. |
13 | endian (LLF). |
| 14 | |
14 | |
| 15 | Todo: check if little endian internal representation is more efficient, also |
15 | Currently, with gcc -O3, we measure at around 2,3x the time libgmp needs for |
| 16 | write an import/export function for the exponent/result from big endian uint8_t |
16 | it's powm which is not so bad, considering we don't use any assembler |
| 17 | representation which is the default transport representation. |
17 | optimization. |
| 18 | |
| |
| 19 | Currently, with gcc -O3, we measure at around 2,75x the time libgmp needs for |
| |
| 20 | it's powm which is not so bad, considering we don't use any assembler optimization. |
| |
| 21 | |
18 | |
| 22 | Hot spot function is mp_mul_uint_add. |
19 | Hot spot function is mp_mul_uint_add. |
| 23 | |
20 | |
| 24 | When we're done measuring, we should set PREVENT_TIMING_ATTACKS so that powm() |
21 | We should set PREVENT_TIMING_ATTACKS so that powm() does not leak information |
| 25 | does not leak information about the number of bits set, but on average doubling the |
22 | about the number of bits set, but on average doubling the time needed. |
| 26 | time needed. Discuss ;) |
| |
| 27 | */ |
23 | */ |
| 28 | |
24 | |
| 29 | #if 0 |
25 | #define WITH_PREVENT_TIMING_ATTACKS |
| 30 | typedef uint32_t leg_t; |
| |
| 31 | typedef uint64_t dleg_t; |
| |
| 32 | #else |
| |
| 33 | typedef uint64_t leg_t; |
| |
| 34 | typedef unsigned int uint128_t __attribute__((mode(TI))); |
| |
| 35 | typedef uint128_t dleg_t; |
| |
| 36 | #endif |
| |
| 37 | |
| |
| 38 | //#define WITH_MEASURE_GMP |
| |
| 39 | //#define WITH_PREVENT_TIMING_ATTACKS |
| |
| 40 | //#define WITH_TESTS |
| |
| 41 | //#define WITHOUT_UNROLLING |
26 | //#define WITHOUT_UNROLLING |
| 42 | #define WITH_ROUNDS 128 |
27 | #define WITH_ROUNDS 128 |
| 43 | |
| |
| 44 | #ifdef WITH_TESTS |
| |
| 45 | static int run_tests( ); |
| |
| 46 | #define KARATSUBA_THRESHOLD 4 |
| |
| 47 | #else |
| |
| 48 | #define KARATSUBA_THRESHOLD 16 |
28 | #define KARATSUBA_THRESHOLD 16 |
| 49 | #endif |
| |
| 50 | |
| |
| 51 | #ifdef WITH_MEASURE_GMP |
| |
| 52 | #include <gmp.h> |
| |
| 53 | #endif |
| |
| 54 | |
| |
| 55 | #ifdef DEBUG_FUNCTIONS |
| |
| 56 | static void dump_int( int level, char * prefix, leg_t const * p, int l, int nl ) |
| |
| 57 | { |
| |
| 58 | printf( "L%d %s: ", level, prefix ); |
| |
| 59 | while( l-- ) |
| |
| 60 | printf( "%08llX", p[l] ); |
| |
| 61 | if( nl ) |
| |
| 62 | putchar( 10 ); |
| |
| 63 | } |
| |
| 64 | #endif |
| |
| 65 | |
| |
| 66 | /* Test values for smaller legs sizes */ |
| |
| 67 | #if 0 |
| |
| 68 | static leg_t g_prime[] = { 4293918689 }; |
| |
| 69 | static leg_t g_r_square[] = { 333456065 }; |
| |
| 70 | static leg_t g_r_inverse = 4192173023; |
| |
| 71 | #endif |
| |
| 72 | |
| |
| 73 | #if 0 |
| |
| 74 | static leg_t g_prime[] = { 0xFFFFFFFF, 0xFFFFFFA1 }; |
| |
| 75 | /* g_prime == 18446744073709551521 */ |
| |
| 76 | /* g_r == 18446744073709551616 */ |
| |
| 77 | /* g_r^2 == 340282366920938463463374607431768211456 */ |
| |
| 78 | static leg_t g_r_square[] = { 0, 9025 }; |
| |
| 79 | /* g_prime^-1 mod g_r = 12815632724892951649 */ |
| |
| 80 | static leg_t g_r_inverse = 3571604383; |
| |
| 81 | #endif |
| |
| 82 | |
| |
| 83 | #if 0 |
| |
| 84 | static leg_t g_prime[] = { 0xffffffff, 0xffffffff, 0xffffffff, 0xffffff61 }; |
| |
| 85 | /* g_prime == 340282366920938463463374607431768211297 */ |
| |
| 86 | /* g_r == 340282366920938463463374607431768211456 */ |
| |
| 87 | /* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */ |
| |
| 88 | static leg_t g_r_square[] = { 0, 0, 0, 25281 }; |
| |
| 89 | /* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */ |
| |
| 90 | static leg_t g_r_inverse = 783358815; |
| |
| 91 | #endif |
| |
| 92 | |
| |
| 93 | #if 0 |
| |
| 94 | static leg_t g_prime[] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x61 }; |
| |
| 95 | /* g_prime == 340282366920938463463374607431768211297 */ |
| |
| 96 | /* g_r == 340282366920938463463374607431768211456 */ |
| |
| 97 | /* g_r^2 == 115792089237316195423570985008687907853269984665640564039457584007913129639936 */ |
| |
| 98 | static leg_t g_r_square[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x62, 0xc1 }; |
| |
| 99 | /* g_prime^-1 mod g_r = 104866892950477891256008526818595234977 */ |
| |
| 100 | static leg_t g_r_inverse = 0x5f; |
| |
| 101 | #endif |
| |
| 102 | |
| |
| 103 | #if 0 |
| |
| 104 | /* Standard prime for leg_t defined as uint32_t */ |
| |
| 105 | static const leg_t g_prime[] = { |
| |
| 106 | 0xFFFFFFFF, 0xFFFFFFFF, 0xC90FDAA2, 0x2168C234, 0xC4C6628B, 0x80DC1CD1, 0x29024E08, 0x8A67CC74, |
| |
| 107 | 0x020BBEA6, 0x3B139B22, 0x514A0879, 0x8E3404DD, 0xEF9519B3, 0xCD3A431B, 0x302B0A6D, 0xF25F1437, |
| |
| 108 | 0x4FE1356D, 0x6D51C245, 0xE485B576, 0x625E7EC6, 0xF44C42E9, 0xA637ED6B, 0x0BFF5CB6, 0xF406B7ED, |
| |
| 109 | 0xEE386BFB, 0x5A899FA5, 0xAE9F2411, 0x7C4B1FE6, 0x49286651, 0xECE45B3D, 0xC2007CB8, 0xA163BF05, |
| |
| 110 | 0x98DA4836, 0x1C55D39A, 0x69163FA8, 0xFD24CF5F, 0x83655D23, 0xDCA3AD96, 0x1C62F356, 0x208552BB, |
| |
| 111 | 0x9ED52907, 0x7096966D, 0x670C354E, 0x4ABC9804, 0xF1746C08, 0xCA18217C, 0x32905E46, 0x2E36CE3B, |
| |
| 112 | 0xE39E772C, 0x180E8603, 0x9B2783A2, 0xEC07A28F, 0xB5C55DF0, 0x6F4C52C9, 0xDE2BCBF6, 0x95581718, |
| |
| 113 | 0x3995497C, 0xEA956AE5, 0x15D22618, 0x98FA0510, 0x15728E5A, 0x8AAAC42D, 0xAD33170D, 0x04507A33, |
| |
| 114 | 0xA85521AB, 0xDF1CBA64, 0xECFB8504, 0x58DBEF0A, 0x8AEA7157, 0x5D060C7D, 0xB3970F85, 0xA6E1E4C7, |
| |
| 115 | 0xABF5AE8C, 0xDB0933D7, 0x1E8C94E0, 0x4A25619D, 0xCEE3D226, 0x1AD2EE6B, 0xF12FFA06, 0xD98A0864, |
| |
| 116 | 0xD8760273, 0x3EC86A64, 0x521F2B18, 0x177B200C, 0xBBE11757, 0x7A615D6C, 0x770988C0, 0xBAD946E2, |
| |
| 117 | 0x08E24FA0, 0x74E5AB31, 0x43DB5BFC, 0xE0FD108E, 0x4B82D120, 0xA9210801, 0x1A723C12, 0xA787E6D7, |
| |
| 118 | 0x88719A10, 0xBDBA5B26, 0x99C32718, 0x6AF4E23C, 0x1A946834, 0xB6150BDA, 0x2583E9CA, 0x2AD44CE8, |
| |
| 119 | 0xDBBBC2DB, 0x04DE8EF9, 0x2E8EFC14, 0x1FBECAA6, 0x287C5947, 0x4E6BC05D, 0x99B2964F, 0xA090C3A2, |
| |
| 120 | 0x233BA186, 0x515BE7ED, 0x1F612970, 0xCEE2D7AF, 0xB81BDD76, 0x2170481C, 0xD0069127, 0xD5B05AA9, |
| |
| 121 | 0x93B4EA98, 0x8D8FDDC1, 0x86FFB7DC, 0x90A6C08F, 0x4DF435C9, 0x34063199, 0xFFFFFFFF, 0xFFFFFFFF |
| |
| 122 | }; |
| |
| 123 | |
| |
| 124 | static const leg_t g_r_square[] = { |
| |
| 125 | 0x3DA97659, 0xE280DB0B, 0xE65BCC3A, 0xB78FDAA9, 0xB7B768C8, 0x9931D78D, 0xF8B11725, 0x339EBC93, |
| |
| 126 | 0xAA7FBD95, 0x62059F1F, 0x4C2DE67D, 0xAD47527E, 0x526A653A, 0x7A674BD5, 0x5401EA4F, 0x3ED73A2F, |
| |
| 127 | 0x70B56F52, 0x7F6F604F, 0xF3E56CC2, 0xBD9F048C, 0xFEA80D9A, 0x6EC9FCD3, 0x3AD36FD8, 0x22C39F34, |
| |
| 128 | 0x91F30C52, 0xF798DA6A, 0x18C3DCE2, 0x0188D84C, 0xCB225176, 0x259E080F, 0xA89D1DCD, 0x9D381CC5, |
| |
| 129 | 0xBEACD46F, 0x3CDD1196, 0x91A4F557, 0x2929B90C, 0x67DE8FA0, 0x23864714, 0xC28A61D4, 0x7411402D, |
| |
| 130 | 0x6C09060D, 0x41058639, 0xE4FCCF1D, 0x638F4566, 0x9E10FDE2, 0x8E54806B, 0xF1D27D0B, 0x5C7DC9C2, |
| |
| 131 | 0xB616D6FA, 0x8BE2C91D, 0xE8105464, 0xE9F80A5F, 0x34720869, 0x90DACF1A, 0x7E2C75A5, 0x8E25F142, |
| |
| 132 | 0x8FB4832E, 0xF827DE84, 0xCA06DA91, 0xC2B3E7E2, 0x4F024193, 0x787A8278, 0x8BD70562, 0xDA60E392, |
| |
| 133 | 0x46BDB733, 0x6E8452D9, 0x1D7D37A2, 0x3FB8CF61, 0xB18A9EF1, 0x50C8953A, 0xC93919D1, 0x2A4B1A67, |
| |
| 134 | 0xFDC65A26, 0x9B51C1EF, 0x09954843, 0x20E739F4, 0x6C7951A5, 0x23CEF785, 0xE60C6EFD, 0xFFB7A9A9, |
| |
| 135 | 0x5F666146, 0xCB441F59, 0xAE01E0F3, 0x63A9315D, 0x04BA044A, 0xEB4EEFD4, 0x8563215F, 0x72C8D989, |
| |
| 136 | 0x62D21877, 0x1296EF6A, 0x20BD72B9, 0xB21E6B3D, 0x53C44FAB, 0x734810F7, 0xD203A9E0, 0xD7CE25D0, |
| |
| 137 | 0x2E52989E, 0xCCF85F34, 0x912A0491, 0x3E9EBD87, 0x8267537D, 0x4A612A18, 0x51E75D99, 0x98F001DB, |
| |
| 138 | 0xC9C77F0C, 0x352D408C, 0xA796D182, 0x04A636F7, 0xE4404092, 0x1C1E467C, 0x524E7C7A, 0x7ED36C41, |
| |
| 139 | 0x2A434CEB, 0x230B2DFE, 0x3549C577, 0x7A17FB04, 0xB850DE95, 0xD97AC40A, 0x55EA6F75, 0x41C4F82B, |
| |
| 140 | 0x37BF90FE, 0x52074F19, 0xFA8F75F0, 0x067E82B1, 0x8A1AC024, 0xB30E9B12, 0xC14AB0DD, 0xCC03AA20 |
| |
| 141 | }; |
| |
| 142 | |
| |
| 143 | static const leg_t g_r_inverse = 1; |
| |
| 144 | */ |
| |
| 145 | #endif |
| |
| 146 | |
| |
| 147 | #if 1 |
| |
| 148 | /* Standard prime for leg_t defined as uint64_t */ |
| |
| 149 | static const leg_t g_prime[] = { |
| |
| 150 | 0xFFFFFFFFFFFFFFFF, 0x4DF435C934063199, 0x86FFB7DC90A6C08F, 0x93B4EA988D8FDDC1, |
| |
| 151 | 0xD0069127D5B05AA9, 0xB81BDD762170481C, 0x1F612970CEE2D7AF, 0x233BA186515BE7ED, |
| |
| 152 | 0x99B2964FA090C3A2, 0x287C59474E6BC05D, 0x2E8EFC141FBECAA6, 0xDBBBC2DB04DE8EF9, |
| |
| 153 | 0x2583E9CA2AD44CE8, 0x1A946834B6150BDA, 0x99C327186AF4E23C, 0x88719A10BDBA5B26, |
| |
| 154 | 0x1A723C12A787E6D7, 0x4B82D120A9210801, 0x43DB5BFCE0FD108E, 0x08E24FA074E5AB31, |
| |
| 155 | 0x770988C0BAD946E2, 0xBBE117577A615D6C, 0x521F2B18177B200C, 0xD87602733EC86A64, |
| |
| 156 | 0xF12FFA06D98A0864, 0xCEE3D2261AD2EE6B, 0x1E8C94E04A25619D, 0xABF5AE8CDB0933D7, |
| |
| 157 | 0xB3970F85A6E1E4C7, 0x8AEA71575D060C7D, 0xECFB850458DBEF0A, 0xA85521ABDF1CBA64, |
| |
| 158 | 0xAD33170D04507A33, 0x15728E5A8AAAC42D, 0x15D2261898FA0510, 0x3995497CEA956AE5, |
| |
| 159 | 0xDE2BCBF695581718, 0xB5C55DF06F4C52C9, 0x9B2783A2EC07A28F, 0xE39E772C180E8603, |
| |
| 160 | 0x32905E462E36CE3B, 0xF1746C08CA18217C, 0x670C354E4ABC9804, 0x9ED529077096966D, |
| |
| 161 | 0x1C62F356208552BB, 0x83655D23DCA3AD96, 0x69163FA8FD24CF5F, 0x98DA48361C55D39A, |
| |
| 162 | 0xC2007CB8A163BF05, 0x49286651ECE45B3D, 0xAE9F24117C4B1FE6, 0xEE386BFB5A899FA5, |
| |
| 163 | 0x0BFF5CB6F406B7ED, 0xF44C42E9A637ED6B, 0xE485B576625E7EC6, 0x4FE1356D6D51C245, |
| |
| 164 | 0x302B0A6DF25F1437, 0xEF9519B3CD3A431B, 0x514A08798E3404DD, 0x020BBEA63B139B22, |
| |
| 165 | 0x29024E088A67CC74, 0xC4C6628B80DC1CD1, 0xC90FDAA22168C234, 0xFFFFFFFFFFFFFFFF |
| |
| 166 | }; |
| |
| 167 | |
| |
| 168 | static const leg_t g_r_square[] = { |
| |
| 169 | 0xC14AB0DDCC03AA20, 0x8A1AC024B30E9B12, 0xFA8F75F0067E82B1, 0x37BF90FE52074F19, |
| |
| 170 | 0x55EA6F7541C4F82B, 0xB850DE95D97AC40A, 0x3549C5777A17FB04, 0x2A434CEB230B2DFE, |
| |
| 171 | 0x524E7C7A7ED36C41, 0xE44040921C1E467C, 0xA796D18204A636F7, 0xC9C77F0C352D408C, |
| |
| 172 | 0x51E75D9998F001DB, 0x8267537D4A612A18, 0x912A04913E9EBD87, 0x2E52989ECCF85F34, |
| |
| 173 | 0xD203A9E0D7CE25D0, 0x53C44FAB734810F7, 0x20BD72B9B21E6B3D, 0x62D218771296EF6A, |
| |
| 174 | 0x8563215F72C8D989, 0x04BA044AEB4EEFD4, 0xAE01E0F363A9315D, 0x5F666146CB441F59, |
| |
| 175 | 0xE60C6EFDFFB7A9A9, 0x6C7951A523CEF785, 0x0995484320E739F4, 0xFDC65A269B51C1EF, |
| |
| 176 | 0xC93919D12A4B1A67, 0xB18A9EF150C8953A, 0x1D7D37A23FB8CF61, 0x46BDB7336E8452D9, |
| |
| 177 | 0x8BD70562DA60E392, 0x4F024193787A8278, 0xCA06DA91C2B3E7E2, 0x8FB4832EF827DE84, |
| |
| 178 | 0x7E2C75A58E25F142, 0x3472086990DACF1A, 0xE8105464E9F80A5F, 0xB616D6FA8BE2C91D, |
| |
| 179 | 0xF1D27D0B5C7DC9C2, 0x9E10FDE28E54806B, 0xE4FCCF1D638F4566, 0x6C09060D41058639, |
| |
| 180 | 0xC28A61D47411402D, 0x67DE8FA023864714, 0x91A4F5572929B90C, 0xBEACD46F3CDD1196, |
| |
| 181 | 0xA89D1DCD9D381CC5, 0xCB225176259E080F, 0x18C3DCE20188D84C, 0x91F30C52F798DA6A, |
| |
| 182 | 0x3AD36FD822C39F34, 0xFEA80D9A6EC9FCD3, 0xF3E56CC2BD9F048C, 0x70B56F527F6F604F, |
| |
| 183 | 0x5401EA4F3ED73A2F, 0x526A653A7A674BD5, 0x4C2DE67DAD47527E, 0xAA7FBD9562059F1F, |
| |
| 184 | 0xF8B11725339EBC93, 0xB7B768C89931D78D, 0xE65BCC3AB78FDAA9, 0x3DA97659E280DB0B |
| |
| 185 | }; |
| |
| 186 | |
| |
| 187 | static const leg_t g_r_inverse = 1; |
| |
| 188 | #endif |
| |
| 189 | |
29 | |
| 190 | /* Returns 0 if a and b are equal, -1 if a < b, 1 if a > b */ |
30 | /* Returns 0 if a and b are equal, -1 if a < b, 1 if a > b */ |
| 191 | static int mp_cmp( leg_t const *a, leg_t const *b, int legs ) |
31 | static int mp_cmp( limb_t const *a, limb_t const *b, int limbs ) |
| 192 | { |
32 | { |
| 193 | while( legs-- ) |
33 | while( limbs-- ) |
| 194 | { |
34 | { |
| 195 | if( a[legs] < b[legs] ) return -1; |
35 | if( a[limbs] < b[limbs] ) return -1; |
| 196 | if( a[legs] > b[legs] ) return 1; |
36 | if( a[limbs] > b[limbs] ) return 1; |
| 197 | } |
37 | } |
| 198 | return 0; |
38 | return 0; |
| 199 | } |
39 | } |
| 200 | |
40 | |
| 201 | /* Subtract b from a, store in a. Expects enough words prepended |
41 | /* Subtract b from a, store in a. Expects enough words prepended |
| 202 | to borrow from */ |
42 | to borrow from */ |
| 203 | static void mp_sub( leg_t *a, leg_t const *b, int legs ) |
43 | static void mp_sub( limb_t *a, limb_t const *b, int limbs ) |
| 204 | { |
44 | { |
| 205 | int borrow = 0, borrow_temp; |
45 | int borrow = 0, borrow_temp; |
| 206 | while( legs-- ) |
46 | while( limbs-- ) |
| 207 | { |
47 | { |
| 208 | leg_t temp = *a - *(b++); |
48 | limb_t temp = *a - *(b++); |
| 209 | borrow_temp = temp > *a; |
49 | borrow_temp = temp > *a; |
| 210 | *a = temp - borrow; |
50 | *a = temp - borrow; |
| 211 | borrow = borrow_temp | ( *(a++) > temp ); |
51 | borrow = borrow_temp | ( *(a++) > temp ); |
| 212 | } |
52 | } |
| 213 | while( borrow ) |
53 | while( borrow ) |
| 214 | { |
54 | { |
| 215 | leg_t temp = *a - borrow; |
55 | limb_t temp = *a - borrow; |
| 216 | borrow = temp > *a; |
56 | borrow = temp > *a; |
| 217 | *(a++) = temp; |
57 | *(a++) = temp; |
| 218 | } |
58 | } |
| 219 | } |
59 | } |
| 220 | |
60 | |
| 221 | /* Add b to a, store in a. Operates on legs + flegs words, with |
61 | /* Add b to a, store in a. Operates on limbs + flimbs words, with |
| 222 | flegs the amount of legs to propagate the carry to */ |
62 | flimbs the amount of limbs to propagate the carry to */ |
| 223 | static void mp_add( leg_t *a, leg_t const *b, int legs, int flegs ) |
63 | static void mp_add( limb_t *a, limb_t const *b, int limbs, int flimbs ) |
| 224 | { |
64 | { |
| 225 | dleg_t acc = 0; |
65 | dlimb_t acc = 0; |
| 226 | while( legs-- ) |
66 | while( limbs-- ) |
| 227 | { |
67 | { |
| 228 | acc += (dleg_t)*a + (dleg_t)*(b++); |
68 | acc += (dlimb_t)*a + (dlimb_t)*(b++); |
| 229 | *(a++) = (leg_t)acc; |
69 | *(a++) = (limb_t)acc; |
| 230 | acc >>= 8*sizeof(leg_t); |
70 | acc >>= 8*sizeof(limb_t); |
| 231 | } |
71 | } |
| 232 | while( acc && flegs-- ) |
72 | while( acc && flimbs-- ) |
| 233 | { |
73 | { |
| 234 | acc += (dleg_t)*a; |
74 | acc += (dlimb_t)*a; |
| 235 | *(a++) = (leg_t)acc; |
75 | *(a++) = (limb_t)acc; |
| 236 | acc >>= 8*sizeof(leg_t); |
76 | acc >>= 8*sizeof(limb_t); |
| 237 | } |
77 | } |
| 238 | } |
78 | } |
| 239 | |
79 | |
| 240 | /* Subtract b from a and store in result. Expects nothing to borrow.*/ |
80 | /* Subtract b from a and store in result. Expects nothing to borrow.*/ |
| 241 | static void mp_sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int legs ) |
81 | static void mp_sub_mod( limb_t * result, limb_t const *a, limb_t const * b, int limbs ) |
| 242 | { |
82 | { |
| 243 | int borrow = 0, borrow_temp; |
83 | int borrow = 0, borrow_temp; |
| 244 | while( legs-- ) |
84 | while( limbs-- ) |
| 245 | { |
85 | { |
| 246 | leg_t temp = *a - *(b++); |
86 | limb_t temp = *a - *(b++); |
| 247 | borrow_temp = temp > *(a++); |
87 | borrow_temp = temp > *(a++); |
| 248 | *result = temp - borrow; |
88 | *result = temp - borrow; |
| 249 | borrow = borrow_temp | (*(result++) > temp ); |
89 | borrow = borrow_temp | (*(result++) > temp ); |
| @@ -251,119 +91,120 @@ static void mp_sub_mod( leg_t * result, leg_t const *a, leg_t const * b, int leg |
| 251 | } |
91 | } |
| 252 | |
92 | |
| 253 | /* Fast negate */ |
93 | /* Fast negate */ |
| 254 | static void mp_negate( leg_t * p, int legs ) |
94 | static void mp_negate( limb_t * p, int limbs ) |
| 255 | { |
95 | { |
| 256 | /* Only as long as we find 0 along the way, we need to |
96 | /* Only as long as we find 0 along the way, we need to |
| 257 | propagate the carry of -x == !x+1 */ |
97 | propagate the carry of -x == !x+1 */ |
| 258 | int carry = 1; |
98 | int carry = 1; |
| 259 | while( legs-- ) |
99 | while( limbs-- ) |
| 260 | { |
100 | { |
| 261 | leg_t v = carry + ( *p ^ (leg_t)-1 ); |
101 | limb_t v = carry + ( *p ^ (limb_t)-1 ); |
| 262 | if(v) carry = 0; |
102 | if(v) carry = 0; |
| 263 | *(p++)=v; |
103 | *(p++)=v; |
| 264 | } |
104 | } |
| 265 | } |
105 | } |
| 266 | |
106 | |
| 267 | /* Multiplies a with fac, adds to result. |
107 | /* Multiplies a with fac, adds to result. |
| 268 | result is guaranteed to be initialized with enough legs prepended to take the carry */ |
108 | result is guaranteed to be initialized with enough limbs prepended to take the carry */ |
| 269 | static void mp_mul_uint_add( leg_t *result, leg_t const *a, leg_t fac, int legs ) |
109 | static void mp_mul_uint_add( limb_t *result, limb_t const *a, limb_t fac, int limbs ) |
| 270 | { |
110 | { |
| 271 | dleg_t acc8 = 0; |
111 | dlimb_t acc15 = 0; |
| 272 | #ifndef WITHOUT_UNROLLING |
112 | #ifndef WITHOUT_UNROLLING |
| 273 | dleg_t acc1 = 0, acc2 = 0, acc3 = 0, acc4 = 0; |
113 | dlimb_t acc0 = 0, acc1 = 0, acc2 = 0, acc3 = 0; |
| 274 | dleg_t acc5 = 0, acc6 = 0, acc7 = 0; |
114 | dlimb_t acc4 = 0, acc5 = 0, acc6 = 0, acc7 = 0; |
| |
115 | dlimb_t acc8 = 0, acc9 = 0, acc10= 0, acc11= 0; |
| |
116 | dlimb_t acc12= 0, acc13= 0, acc14= 0; |
| 275 | |
117 | |
| 276 | while( legs >= 8 ) |
118 | while( limbs >= 16 ) |
| 277 | { |
119 | { |
| 278 | acc1 = ( acc8 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
120 | #define MUL_UINT_ADD_ROUND(ACC,CARRY) \ |
| 279 | *(result++) = (leg_t)acc1; |
121 | acc##ACC = ( acc##CARRY >> 8*sizeof(limb_t) ) + (dlimb_t)result[ACC] + (dlimb_t)a[ACC] * (dlimb_t)fac; \ |
| 280 | |
122 | result[ACC] = (limb_t)acc##ACC; |
| 281 | acc2 = ( acc1 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
123 | |
| 282 | *(result++) = (leg_t)acc2; |
124 | MUL_UINT_ADD_ROUND(0,15) |
| 283 | |
125 | MUL_UINT_ADD_ROUND(1,0) |
| 284 | acc3 = ( acc2 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
126 | MUL_UINT_ADD_ROUND(2,1) |
| 285 | *(result++) = (leg_t)acc3; |
127 | MUL_UINT_ADD_ROUND(3,2) |
| 286 | |
128 | MUL_UINT_ADD_ROUND(4,3) |
| 287 | acc4 = ( acc3 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
129 | MUL_UINT_ADD_ROUND(5,4) |
| 288 | *(result++) = (leg_t)acc4; |
130 | MUL_UINT_ADD_ROUND(6,5) |
| 289 | |
131 | MUL_UINT_ADD_ROUND(7,6) |
| 290 | acc5 = ( acc4 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
132 | MUL_UINT_ADD_ROUND(8,7) |
| 291 | *(result++) = (leg_t)acc5; |
133 | MUL_UINT_ADD_ROUND(9,8) |
| 292 | |
134 | MUL_UINT_ADD_ROUND(10,9) |
| 293 | acc6 = ( acc5 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
135 | MUL_UINT_ADD_ROUND(11,10) |
| 294 | *(result++) = (leg_t)acc6; |
136 | MUL_UINT_ADD_ROUND(12,11) |
| 295 | |
137 | MUL_UINT_ADD_ROUND(13,12) |
| 296 | acc7 = ( acc6 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
138 | MUL_UINT_ADD_ROUND(14,13) |
| 297 | *(result++) = (leg_t)acc7; |
139 | MUL_UINT_ADD_ROUND(15,14) |
| 298 | |
140 | |
| 299 | acc8 = ( acc7 >> 8*sizeof(leg_t) ) + (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
141 | result += 16; |
| 300 | *(result++) = (leg_t)acc8; |
142 | a += 16; |
| 301 | |
143 | limbs -= 16; |
| 302 | legs -= 8; |
| |
| 303 | } |
144 | } |
| 304 | acc8 >>= 8*sizeof(leg_t); |
145 | acc15 >>= 8*sizeof(limb_t); |
| 305 | #endif |
146 | #endif |
| 306 | while( legs-- ) |
147 | while( limbs-- ) |
| 307 | { |
148 | { |
| 308 | acc8 += (dleg_t)*result + (dleg_t)*(a++) * (dleg_t)fac; |
149 | acc15 += (dlimb_t)*result + (dlimb_t)*(a++) * (dlimb_t)fac; |
| 309 | *(result++) = (leg_t)acc8; |
150 | *(result++) = (limb_t)acc15; |
| 310 | acc8 >>= 8*sizeof(leg_t); |
151 | acc15 >>= 8*sizeof(limb_t); |
| 311 | } |
152 | } |
| 312 | while( acc8 ) |
153 | while( acc15 ) |
| 313 | { |
154 | { |
| 314 | acc8 += (dleg_t)*result; |
155 | acc15 += (dlimb_t)*result; |
| 315 | *(result++) = (leg_t)acc8; |
156 | *(result++) = (limb_t)acc15; |
| 316 | acc8 >>= 8*sizeof(leg_t); |
157 | acc15 >>= 8*sizeof(limb_t); |
| 317 | } |
158 | } |
| 318 | } |
159 | } |
| 319 | |
160 | |
| 320 | /* Multiplies a and b, adds to result, base case version. */ |
161 | /* Multiplies a and b, adds to result, base case version. */ |
| 321 | static void mp_mul_oper_add( leg_t * result, leg_t const *a, leg_t const *b, int legs ) |
162 | static void mp_mul_oper_add( limb_t * result, limb_t const *a, limb_t const *b, int limbs ) |
| 322 | { |
163 | { |
| 323 | int leg = legs; |
164 | int limb = limbs; |
| 324 | while( leg-- ) |
165 | while( limb-- ) |
| 325 | mp_mul_uint_add( result++, a, *(b++), legs ); |
166 | mp_mul_uint_add( result++, a, *(b++), limbs ); |
| 326 | } |
167 | } |
| 327 | |
168 | |
| 328 | /* Optimized mp_mul_oper_add for a == b, i.e. squaring */ |
169 | /* Optimized mp_mul_oper_add for a == b, i.e. squaring */ |
| 329 | static void mp_sqr( leg_t *result, leg_t const * a, int legs ) |
170 | static void mp_sqr( limb_t *result, limb_t const * a, int limbs ) |
| 330 | { |
171 | { |
| 331 | while( legs-- ) { |
172 | while( limbs-- ) { |
| 332 | leg_t fac = *(a++), *dest = result; |
173 | limb_t fac = *(a++), *dest = result; |
| 333 | leg_t const *src = a; |
174 | limb_t const *src = a; |
| 334 | int leg = legs; |
175 | int limb = limbs; |
| 335 | |
176 | |
| 336 | dleg_t acc = (dleg_t)*dest + (dleg_t)fac * (dleg_t)fac; |
177 | dlimb_t acc = (dlimb_t)*dest + (dlimb_t)fac * (dlimb_t)fac; |
| 337 | |
178 | |
| 338 | *(dest++) = (leg_t)acc; |
179 | *(dest++) = (limb_t)acc; |
| 339 | acc >>= 8*sizeof(leg_t); |
180 | acc >>= 8*sizeof(limb_t); |
| 340 | |
181 | |
| 341 | while( leg-- ) |
182 | while( limb-- ) |
| 342 | { |
183 | { |
| 343 | dleg_t subresult = (dleg_t)fac * (dleg_t)*(src++); |
184 | dlimb_t subresult = (dlimb_t)fac * (dlimb_t)*(src++); |
| 344 | int carry = !!( subresult >> (16*sizeof(leg_t)-1)); |
185 | int carry = !!( subresult >> (16*sizeof(limb_t)-1)); |
| 345 | |
186 | |
| 346 | acc += 2 * subresult + (dleg_t)*dest; |
187 | acc += 2 * subresult + (dlimb_t)*dest; |
| 347 | *(dest++) = (leg_t)acc; |
188 | *(dest++) = (limb_t)acc; |
| 348 | |
189 | |
| 349 | acc >>= 8*sizeof(leg_t); |
190 | acc >>= 8*sizeof(limb_t); |
| 350 | acc += (dleg_t)carry << 8*sizeof(leg_t); |
191 | acc += (dlimb_t)carry << 8*sizeof(limb_t); |
| 351 | } |
192 | } |
| 352 | |
193 | |
| 353 | while( acc ) |
194 | while( acc ) |
| 354 | { |
195 | { |
| 355 | acc += (dleg_t)*dest; |
196 | acc += (dlimb_t)*dest; |
| 356 | *(dest++) = (leg_t)acc; |
197 | *(dest++) = (limb_t)acc; |
| 357 | acc >>= 8*sizeof(leg_t); |
198 | acc >>= 8*sizeof(limb_t); |
| 358 | } |
199 | } |
| 359 | result += 2; |
200 | result += 2; |
| 360 | } |
201 | } |
| 361 | } |
202 | } |
| 362 | |
203 | |
| 363 | /* Optimized karatsuba (toom2.2) for a == b, i.e. squaring */ |
204 | /* Optimized karatsuba (toom2.2) for a == b, i.e. squaring */ |
| 364 | static void mp_mul_kara_square( leg_t* p, leg_t const *a, int len, leg_t *scratch ) |
205 | static void mp_mul_kara_square( limb_t* p, limb_t const *a, int len, limb_t *scratch ) |
| 365 | { |
206 | { |
| 366 | memset( p, 0, 2 * len * sizeof( leg_t )); |
207 | memset( p, 0, 2 * len * sizeof( limb_t )); |
| 367 | if( len <= KARATSUBA_THRESHOLD ) |
208 | if( len <= KARATSUBA_THRESHOLD ) |
| 368 | mp_sqr( p, a, len ); |
209 | mp_sqr( p, a, len ); |
| 369 | else |
210 | else |
| @@ -389,9 +230,9 @@ static void mp_mul_kara_square( leg_t* p, leg_t const *a, int len, leg_t *scratc |
| 389 | } |
230 | } |
| 390 | |
231 | |
| 391 | /* karatsuba (toom2.2), generic */ |
232 | /* karatsuba (toom2.2), generic */ |
| 392 | static void mp_mul_kara( leg_t* p, leg_t const *a, leg_t const *b, int len, leg_t *scratch ) |
233 | static void mp_mul_kara( limb_t* p, limb_t const *a, limb_t const *b, int len, limb_t *scratch ) |
| 393 | { |
234 | { |
| 394 | memset( p, 0, 2 * len * sizeof( leg_t )); |
235 | memset( p, 0, 2 * len * sizeof( limb_t )); |
| 395 | if( len <= KARATSUBA_THRESHOLD ) |
236 | if( len <= KARATSUBA_THRESHOLD ) |
| 396 | mp_mul_oper_add( p, a, b, len ); |
237 | mp_mul_oper_add( p, a, b, len ); |
| 397 | else |
238 | else |
| @@ -428,398 +269,132 @@ static void mp_mul_kara( leg_t* p, leg_t const *a, leg_t const *b, int len, leg_ |
| 428 | } |
269 | } |
| 429 | } |
270 | } |
| 430 | |
271 | |
| 431 | /* Multiply a and b, store in a, work in montgomery domain |
272 | /* Multiply a and b, store in a, work in montgomery domain to achieve multiply, |
| 432 | to achieve multiply, a needs to be reduced by k * g_prime |
273 | a needs to be reduced by k * modulus (with k being the unknown integer) |
| 433 | until all lower legs are 0, allowing exact division by 2^r |
274 | until all lower limbs are 0, allowing exact division (via implicit shift) |
| |
275 | by 2^r |
| 434 | |
276 | |
| 435 | if !do_mul, we convert from montgomery domain back to |
277 | if !do_mul, we convert from montgomery domain back to modulus domain and |
| 436 | g_prime domain and thus only multiply by 1 before reducing |
278 | thus only multiply by 1 before reducing |
| 437 | */ |
279 | */ |
| 438 | static void redc( leg_t *a, leg_t const *b, int legs, int do_mul ) |
280 | static void redc( limb_t * a, |
| |
281 | const limb_t * b, int limbs, |
| |
282 | const limb_t * modulus, limb_t r_inverse, int do_mul ) |
| 439 | { |
283 | { |
| 440 | leg_t scratch[ 2 * ( legs - KARATSUBA_THRESHOLD ) ]; |
284 | limb_t scratch[ 2 * ( limbs - KARATSUBA_THRESHOLD ) ]; |
| 441 | leg_t temp[1 + 2 * legs]; |
285 | limb_t temp[ 1 + 2 * limbs ]; |
| 442 | leg_t leg; |
286 | int limb; |
| 443 | |
287 | |
| 444 | /* Not necessary for transforming back */ |
288 | /* Not necessary for transforming back */ |
| 445 | if( do_mul ) |
289 | if( do_mul ) |
| 446 | { |
290 | { |
| 447 | temp[2*legs] = 0; |
291 | temp[2*limbs] = 0; |
| 448 | if( a == b ) |
292 | if( a == b ) |
| 449 | mp_mul_kara_square( temp, a, legs, scratch ); |
293 | mp_mul_kara_square( temp, a, limbs, scratch ); |
| 450 | else |
294 | else |
| 451 | mp_mul_kara( temp, a, b, legs, scratch ); |
295 | mp_mul_kara( temp, a, b, limbs, scratch ); |
| 452 | } |
296 | } |
| 453 | else |
297 | else |
| 454 | { |
298 | { |
| 455 | memset( temp + legs, 0, (legs + 1 ) * sizeof(leg_t)); |
299 | memset( temp + limbs, 0, (limbs + 1) * sizeof(limb_t)); |
| 456 | memcpy( temp, a, legs * sizeof(leg_t)); |
300 | memcpy( temp, a, limbs * sizeof(limb_t)); |
| 457 | } |
301 | } |
| 458 | |
302 | |
| 459 | /* m = p * ( m * R_1 ) % R */ |
303 | /* m = p * ( m * R_1 ) % R */ |
| 460 | for( leg = 0; leg < legs; ++leg ) |
304 | for( limb = 0; limb < limbs; ++limb ) |
| 461 | { |
305 | { |
| 462 | leg_t k = temp[leg] * g_r_inverse; |
306 | limb_t k = temp[limb] * r_inverse; |
| 463 | mp_mul_uint_add( temp+leg, g_prime, k, legs ); |
307 | mp_mul_uint_add( temp+limb, modulus, k, limbs ); |
| 464 | } |
308 | } |
| 465 | |
309 | |
| 466 | /* the lower legs of temp are now zero |
310 | /* the lower limbs of temp are now zero |
| 467 | if necessary, reduce temp to fit in legs */ |
311 | if necessary, reduce temp to fit in limbs */ |
| 468 | if( temp[2*legs] || mp_cmp( temp + legs, g_prime, legs ) > 0 ) |
312 | if( temp[2*limbs] || mp_cmp( temp + limbs, modulus, limbs ) > 0 ) |
| 469 | mp_sub( temp + legs, g_prime, legs ); |
313 | mp_sub( temp + limbs, modulus, limbs ); |
| 470 | |
314 | |
| 471 | memcpy( a, temp + legs, legs * sizeof(leg_t) ); |
315 | memcpy( a, temp + limbs, limbs * sizeof(limb_t) ); |
| 472 | memset( scratch, 0, sizeof( scratch ) ); |
316 | memset( scratch, 0, sizeof(scratch)); |
| 473 | memset( temp, 0, sizeof( temp ) ); |
317 | memset( temp, 0, sizeof(temp)); |
| 474 | } |
318 | } |
| 475 | |
319 | |
| 476 | /* calculate base ^ exponent modulo g_prime */ |
320 | /* calculate base ^ exponent modulo modulus */ |
| 477 | static void powm( leg_t * result, leg_t const *exponent, leg_t base, int legs ) |
321 | void powm_internal( limb_t * result, |
| |
322 | const limb_t * base, uint32_t base_limbs, |
| |
323 | const limb_t * exponent, uint32_t exp_limbs, |
| |
324 | const limb_t * modulus, uint32_t mod_limbs, |
| |
325 | const limb_t * r_square, const limb_t r_inverse ) |
| 478 | { |
326 | { |
| 479 | leg_t acc[legs]; |
327 | limb_t acc[mod_limbs]; |
| 480 | #ifdef WITH_PREVENT_TIMING_ATTACKS |
328 | #ifdef WITH_PREVENT_TIMING_ATTACKS |
| 481 | leg_t dummy[legs]; |
329 | limb_t dummy[mod_limbs]; |
| 482 | #endif |
330 | #endif |
| 483 | int first = 0, bit; |
331 | int first = 0, bit; |
| |
332 | int expbits = 8 * sizeof(limb_t) * exp_limbs; |
| |
333 | |
| |
334 | memset( result, 0, sizeof(limb_t) * mod_limbs ); |
| |
335 | memset( acc, 0, sizeof(limb_t) * mod_limbs ); |
| 484 | |
336 | |
| 485 | memset( acc, 0, sizeof(acc) ); |
337 | memcpy( acc, base, sizeof(limb_t) * base_limbs ); |
| 486 | *acc = base; |
338 | //*acc = base; |
| 487 | |
339 | |
| 488 | /* Transform base into montgomery domain */ |
340 | /* Transform base into montgomery domain */ |
| 489 | redc( acc, g_r_square, legs, 1 ); |
341 | redc( acc, r_square, mod_limbs, modulus, r_inverse, 1 ); |
| 490 | |
342 | |
| 491 | /* mul in temp and if bit set in exponent, multiply into accumulator */ |
343 | /* mul in temp and if bit set in exponent, multiply into accumulator */ |
| 492 | for( bit = 0; bit < legs * 8 * sizeof(leg_t); bit++ ) |
344 | for( bit = 0; bit < expbits ; bit++ ) |
| 493 | { |
345 | { |
| 494 | int this_bit = bit % ( sizeof(leg_t) * 8 ); |
346 | int this_bit = bit % ( sizeof(limb_t) * 8 ); |
| 495 | if( ( exponent[ bit / ( sizeof(leg_t) * 8 ) ] >> this_bit ) & 1 ) { |
347 | if( ( exponent[ bit / ( sizeof(limb_t) * 8 ) ] >> this_bit ) & 1 ) { |
| 496 | if( first++ ) |
348 | if( first++ ) |
| 497 | redc( result, acc, legs, 1 ); |
349 | redc( result, acc, mod_limbs, modulus, r_inverse, 1 ); |
| 498 | else |
350 | else |
| 499 | memcpy( result, acc, sizeof(leg_t) * legs ); |
351 | memcpy( result, acc, sizeof(limb_t) * mod_limbs ); |
| 500 | } |
352 | } |
| 501 | #ifdef WITH_PREVENT_TIMING_ATTACKS |
353 | #ifdef WITH_PREVENT_TIMING_ATTACKS |
| 502 | else |
354 | else |
| 503 | redc( dummy, acc, legs, 1 ); |
355 | redc( dummy, acc, mod_limbs, modulus, r_inverse, 1 ); |
| 504 | #endif |
356 | #endif |
| 505 | redc( acc, acc, legs, 1 ); |
357 | redc( acc, acc, mod_limbs, modulus, r_inverse, 1 ); |
| 506 | } |
358 | } |
| 507 | |
359 | |
| 508 | /* Transform result in acc back into mod p domain */ |
360 | /* Transform result in acc back into mod m domain */ |
| 509 | if( first ) |
361 | if( first ) |
| 510 | redc( result, 0, legs, 0 ); |
362 | redc( result, 0, mod_limbs, modulus, r_inverse, 0 ); |
| 511 | else /* base ^ 0 mod p = 1 */ |
363 | else /* base ^ 0 mod m = 1 */ |
| 512 | { |
364 | { |
| 513 | memset( result+1, 0, legs * sizeof(leg_t) ); |
365 | memset( result+1, 0, mod_limbs * sizeof(limb_t) ); |
| 514 | *result = 1; |
366 | *result = 1; |
| 515 | } |
367 | } |
| 516 | } |
368 | } |
| 517 | |
369 | |
| 518 | static void mp_import( leg_t *dest, uint8_t const * src, size_t len ) |
370 | uint32_t mp_import( limb_t *dest, uint8_t const * src, uint32_t len ) |
| 519 | { |
371 | { |
| 520 | int byte = 0; |
372 | int byte = 0; |
| 521 | leg_t acc = 0; |
373 | limb_t *d = dest; |
| |
374 | limb_t acc = 0; |
| |
375 | |
| 522 | while( len-- ) |
376 | while( len-- ) |
| 523 | { |
377 | { |
| 524 | acc |= ((leg_t)src[len]) << ( 8 * byte ); |
378 | acc |= ((limb_t)src[len]) << ( 8 * byte ); |
| 525 | if( ++byte == sizeof(leg_t)) |
379 | if( ++byte == sizeof(limb_t)) |
| 526 | { |
380 | { |
| 527 | *(dest++) = acc; |
381 | *(d++) = acc; |
| 528 | acc = 0; |
382 | acc = 0; |
| 529 | byte = 0; |
383 | byte = 0; |
| 530 | } |
384 | } |
| 531 | } |
385 | } |
| 532 | if( byte ) |
386 | if( byte ) |
| 533 | *dest = acc; |
387 | *(d++) = acc; |
| |
388 | return ( d - dest ); |
| 534 | } |
389 | } |
| 535 | |
390 | |
| 536 | int main() |
391 | void mp_export( uint8_t * dest, const limb_t * src, uint32_t limb_size ) |
| 537 | { |
392 | { |
| 538 | int i, legs = 64; |
393 | while( limb_size ) |
| 539 | leg_t input[ /*legs*/ ] = { |
394 | { |
| 540 | 0x281b04b30bb62984, 0xe0cc506399225574, 0xb08ad95f8040183f, 0x56eee1ba0dc1936c, |
395 | limb_t outlimb = src[--limb_size]; |
| 541 | 0x5ec1d62932d6937a, 0xfa8718685a5b8359, 0xfb4e8dadb3111df4, 0x87e90f5b6397727e, |
396 | int cnt = sizeof( limb_t ); |
| 542 | 0x6b984d2e7b9cc244, 0x9ae478fd69254dc9, 0x21da1ef76099ed94, 0xe407bde8519bc534, |
397 | while( cnt ) |
| 543 | 0x051a88159adc842b, 0x547fd48d3b471c7c, 0xd6b85c29e8e8a528, 0x74fc0b5dcdfaa679, |
398 | *(dest++) = (uint8_t)( outlimb >> ( 8 * --cnt ) ); |
| 544 | 0xe0c022a74c073360, 0xc0dc8c759af7492b, 0xa86946df6abbea0f, 0xfcc2a7f6d4b0d4f7, |
399 | } |
| 545 | 0xd2111f6ce69c2bc6, 0x4ab8b5e0adaad6a0, 0xf8df973a9842e7b2, 0x2d44f438b889673c, |
| |
| 546 | 0x82b4e75bf0e60d06, 0x4c3b1333a07fee24, 0x353eb64e067f86cc, 0x518781609b447bf8, |
| |
| 547 | 0xe9228ae741da24cd, 0xd8aec44d430026aa, 0x196bcb0cc0e353f8, 0x942fad7b2cc3dcc5, |
| |
| 548 | 0xa77b229138f2d5d1, 0x5479eaeb9b64ba87, 0x70cc7ff6ea8f1b51, 0x1785bbfb88755c99, |
| |
| 549 | 0xd7ac4896d4e08b35, 0x0e2aab629710919b, 0xa3612061c92319ce, 0xb5444312b3b43541, |
| |
| 550 | 0xa13b801d1ac631d5, 0x87e0db1397d9f2fd, 0x0e84ab94e744b8ba, 0x1f61b1ea0e5aafb4, |
| |
| 551 | 0x70f0c055dde43121, 0x094d72c301b8f25d, 0xd151e2770baec00b, 0x31052a6fc332563d, |
| |
| 552 | 0x2bca50f311573101, 0xb420bd7e538ef88b, 0xac5ecd1fe4997513, 0x3ca892560247ca32, |
| |
| 553 | 0x950e895d7777d182, 0xcb650843a43a4a00, 0xd07dc909e7070b0e, 0x72d720399b4ad2d4, |
| |
| 554 | 0x52292d56ea2960a9, 0x6060e2cd96000219, 0x118727b5eb17dfbd, 0xa6d78571af8d3688, |
| |
| 555 | 0xb563a9cf79c6eaec, 0xe5d85f8fb8fced5c, 0x9ac2961fe160f837, 0x1de7eae6c0d6a0b0, |
| |
| 556 | }; |
| |
| 557 | |
| |
| 558 | leg_t res[legs]; /* |
| |
| 559 | = { 569D056BC5485C6F3C026935CFC1FC36CFC5A6B770B442323750EE08A32C63F64743A4902E32A41F7C5A9097509EB9AAA5C5B425D9BDAEDC3B98EE3A9F99AE6FB8723EBE01154937ECC1E4AE38CBC12B767FD17264A4AAEB310EA5B0E72D12FE3969532C5A0718B45F251BFB2A126EFB87F819BDCD434EAEE3B9108C387F4DFFEFA0E9532A0532192034DB0EF5C5BB2A77BCA5B4A08DFFD22F6B1D4F17F5123935CAB4F40EC7E77F4FEEEC6A8CD20CDB2D37DD1BB44544C50B01E98F8B3F5E1ABF91817D40B7E526D6ED80CDE416C6CD32218D3ECCA753D15BBB007661754E44A937C5A68957C3C2D3F39A220930567E9F720B3AAE9D34FEBA6E5A047C131F32271A3BC1E85BC1BC210F49B09F3938226399991BC90299B4E0E684B2DE6BDA684693F40CF728FD24A4B1D1A2B8F4CD3316A02DBC44069A40C1AC64F862091A888DD284AB180918B379A3BD173EDDEA5E08DF4BE51B441AEA7EDA4E439CF57914F68E0FB3E756AB7286616C3736CA72DD40A88FC37B598BF561300AFD1F80F7D44E53C4C1D85FF215250A0C56222A177299C61C2E654B040176F4A8D44BE2612B36E066E7AA6E58D84D4C4CB9A5952E1668CDF04EE55C38ABA59614CB7F43D12921551DD82062AFC3F340B33F43C643BC9D5DB6D6BAB2C94F100D139443B20BCB80384C3231261C5ABFFF3D971CB878116F4EBB7D4D24A2B6F5869 } |
| |
| 560 | */ |
| |
| 561 | |
| |
| 562 | #ifdef WITH_TESTS |
| |
| 563 | run_tests(); |
| |
| 564 | #endif |
| |
| 565 | |
| |
| 566 | #ifdef WITH_MEASURE_GMP |
| |
| 567 | mpz_t m, base, exp, result; |
| |
| 568 | |
| |
| 569 | mpz_init( m ); mpz_init( base ); mpz_init( exp ); mpz_init( result ); |
| |
| 570 | mpz_import( m, sizeof(g_prime) / sizeof(uint64_t), -1, sizeof(uint64_t), 0, 0, g_prime ); |
| |
| 571 | mpz_set_ui( base, 2 ); |
| |
| 572 | mpz_import( exp, sizeof(input) / sizeof(uint64_t), -1, sizeof(uint64_t), 0, 0, input ); |
| |
| 573 | |
| |
| 574 | for( i=0; i< WITH_ROUNDS; ++i ) |
| |
| 575 | mpz_powm( result, base, exp, m ); |
| |
| 576 | |
| |
| 577 | gmp_printf("%ZX\n", result); |
| |
| 578 | #else |
| |
| 579 | |
| |
| 580 | for( i=0; i<WITH_ROUNDS; ++i ) |
| |
| 581 | powm( res, input, 2, legs ); |
| |
| 582 | |
| |
| 583 | for( i=legs-1; i>=0; --i ) |
| |
| 584 | printf( "%016llX", res[i] ); |
| |
| 585 | putchar(10); putchar(10); |
| |
| 586 | #endif |
| |
| 587 | |
| |
| 588 | return 0; |
| |
| 589 | } |
| |
| 590 | |
| |
| 591 | #ifdef WITH_TESTS |
| |
| 592 | #define TEST_MAX 1024 |
| |
| 593 | static void REPORT( char * fail ) { fprintf( stderr, "\nFailed in %s\n", fail ); exit(-1); } |
| |
| 594 | static leg_t test_001[] = { (leg_t)-1, 1 }; |
| |
| 595 | static leg_t test_002[] = { (leg_t)-1, 2 }; |
| |
| 596 | static leg_t test_003[] = { 0, 1 }; |
| |
| 597 | static leg_t test_004[] = { (leg_t)-1, 1, 1 }; |
| |
| 598 | static leg_t test_005[] = { 0, (leg_t)-1, 0 }; |
| |
| 599 | static leg_t test_006[] = { (leg_t)-2, 4, 0 }; |
| |
| 600 | static leg_t test_007[] = { (leg_t)-1, (leg_t)-2 }; |
| |
| 601 | static leg_t test_008[] = { (leg_t)-1, (leg_t)-3 }; |
| |
| 602 | static leg_t test_009[] = { (leg_t)-2, (leg_t)-4, 0 }; |
| |
| 603 | static leg_t test_010[] = { (leg_t)-2, (leg_t)-4, 1 }; |
| |
| 604 | |
| |
| 605 | static uint8_t test_011[] = { |
| |
| 606 | 0xf2, 0xda, 0xe5, 0x35, 0xa0, 0xbd, 0xb8, 0x4c, 0x88, 0x65, 0x50, 0xca, 0xe8, 0x17, 0xa8, 0xab, 0x34, 0xd6, 0x86, 0xc4, 0x29, 0xa0, 0xc8, 0x51, 0x84, 0x05, 0x3e, 0x9d, 0x4d, 0x23, 0xce, 0x75, |
| |
| 607 | 0x1f, 0xa6, 0xc1, 0x36, 0x86, 0x35, 0xf5, 0x6f, 0xcc, 0x0b, 0x13, 0x40, 0x89, 0xc2, 0x41, 0xb0, 0x0e, 0xa7, 0x68, 0xd4, 0x68, 0x6a, 0xd2, 0x08, 0xf7, 0xf5, 0x2c, 0x05, 0x11, 0x43, 0x00, 0x9b, |
| |
| 608 | 0xf4, 0x92, 0xe1, 0x17, 0xae, 0xcc, 0x05, 0x98, 0x95, 0xc4, 0x33, 0xfd, 0x54, 0x42, 0x06, 0x98, 0x90, 0x62, 0x61, 0x15, 0x34, 0xe8, 0xbd, 0x37, 0xe0, 0x57, 0x27, 0x90, 0x75, 0x08, 0x33, 0x15, |
| |
| 609 | 0xe5, 0x48, 0x19, 0xbb, 0x2c, 0xc1, 0xda, 0xeb, 0x95, 0x72, 0x97, 0x05, 0x60, 0xbf, 0xbf, 0x80, 0xfe, 0x04, 0x7a, 0x4a, 0xff, 0x0e, 0xa0, 0x47, 0x82, 0x0d, 0x90, 0xf0, 0x5b, 0xfc, 0x21, 0x82 |
| |
| 610 | }; |
| |
| 611 | static uint8_t test_012[] = { |
| |
| 612 | 0x0c, 0xc4, 0xc0, 0xb7, 0x13, 0xee, 0xf6, 0xf3, 0xcd, 0xf7, 0x02, 0x0b, 0x97, 0x70, 0xc0, 0x59, 0x27, 0xd2, 0x74, 0xbf, 0x54, 0xbf, 0x9d, 0x77, 0x59, 0xe0, 0x86, 0x06, 0x18, 0x7a, 0x3c, 0x3e, |
| |
| 613 | 0x12, 0x52, 0xed, 0x77, 0x10, 0xf9, 0x4b, 0xb8, 0xb7, 0x2b, 0xdc, 0x23, 0xe0, 0x45, 0x79, 0xe2, 0xe9, 0xef, 0x12, 0x97, 0x22, 0xf8, 0x72, 0xa2, 0x05, 0x1f, 0x5f, 0x36, 0xae, 0xfb, 0x7a, 0xb2, |
| |
| 614 | 0x7d, 0xd7, 0xa2, 0x15, 0xa4, 0x9f, 0x27, 0x55, 0x65, 0x2a, 0x49, 0x0f, 0xca, 0x9f, 0xf3, 0xf6, 0x26, 0xcb, 0x57, 0x07, 0x82, 0xb7, 0x04, 0x1a, 0x7b, 0x65, 0x40, 0xb8, 0x55, 0x93, 0xee, 0xd4, |
| |
| 615 | 0x09, 0x3e, 0x30, 0xa1, 0x6c, 0x00, 0x90, 0x54, 0x9b, 0x05, 0xb4, 0x08, 0xc4, 0xcc, 0xcd, 0xa2, 0xf6, 0xe6, 0x34, 0xf3, 0x5f, 0xe0, 0xe4, 0xb3, 0xcd, 0xd3, 0x8f, 0x83, 0xb2, 0xe8, 0x08, 0xb6 |
| |
| 616 | }; |
| |
| 617 | static uint8_t test_013[] = { |
| |
| 618 | 0x0C, 0x1C, 0xE9, 0x27, 0xD9, 0xE2, 0xF8, 0x92, 0x97, 0x0B, 0x8E, 0x79, 0x91, 0x07, 0xE7, 0xAA, 0xF8, 0x5C, 0x11, 0xAB, 0x84, 0xFA, 0x0C, 0x6F, 0xED, 0xA5, 0x0C, 0x1F, 0x4C, 0x55, 0x5F, 0x5B, |
| |
| 619 | 0x70, 0x63, 0x22, 0x85, 0x72, 0x6B, 0x3D, 0x9A, 0x78, 0x6E, 0xBF, 0x2E, 0x29, 0xB8, 0x2E, 0xE4, 0x4C, 0x09, 0x35, 0x45, 0x92, 0x26, 0xCC, 0x29, 0x15, 0xFC, 0x9A, 0x2F, 0x58, 0x4E, 0x70, 0xDD, |
| |
| 620 | 0x00, 0xA9, 0xC7, 0xDE, 0xF5, 0x26, 0x85, 0x23, 0xDF, 0xBF, 0x00, 0x61, 0xC7, 0x47, 0x0E, 0x23, 0x9D, 0xC3, 0xB6, 0x17, 0x8C, 0x36, 0x19, 0x03, 0x70, 0xD1, 0xE1, 0xE7, 0x58, 0xDD, 0x10, 0xD5, |
| |
| 621 | 0x76, 0x3B, 0xA7, 0xAE, 0x73, 0xAC, 0x4E, 0xBF, 0x4B, 0x79, 0xE5, 0xDE, 0x19, 0x68, 0x1D, 0x19, 0xD4, 0xF1, 0xF9, 0x90, 0x0B, 0x55, 0x9A, 0x8A, 0xAE, 0x5C, 0x85, 0x0C, 0x84, 0xB1, 0x33, 0x2F, |
| |
| 622 | 0x0E, 0x99, 0xFC, 0xC6, 0x57, 0xE8, 0x81, 0x53, 0x23, 0x87, 0x5D, 0x41, 0xDF, 0xC9, 0xE0, 0xE5, 0xE1, 0x8C, 0x2C, 0x4E, 0x69, 0xAE, 0x99, 0x54, 0x19, 0x85, 0x7F, 0x7F, 0x19, 0x1E, 0x28, 0xC2, |
| |
| 623 | 0x04, 0xAB, 0x16, 0x1D, 0xE9, 0x22, 0x55, 0xD2, 0x2D, 0x45, 0x44, 0xD2, 0x16, 0x3C, 0x2B, 0xD3, 0xE5, 0x88, 0xF2, 0x5E, 0x97, 0x92, 0x51, 0xF0, 0x64, 0x7D, 0x27, 0x5E, 0x0F, 0x5C, 0x99, 0xD8, |
| |
| 624 | 0x02, 0xA8, 0xD3, 0x87, 0x90, 0x02, 0x9E, 0x4E, 0x9A, 0x9D, 0x72, 0xF3, 0xA8, 0xBC, 0x28, 0x74, 0xBD, 0x60, 0x92, 0xE1, 0x4A, 0x7E, 0x3E, 0xC2, 0xB3, 0x88, 0xBA, 0xAD, 0x8F, 0xFF, 0xCD, 0x57, |
| |
| 625 | 0xE2, 0x7E, 0x43, 0x68, 0x91, 0xC3, 0xBE, 0x50, 0x9E, 0xB8, 0xFA, 0x53, 0x3C, 0xEE, 0x1F, 0x4C, 0x59, 0x5D, 0x4B, 0xE8, 0x42, 0x32, 0xD5, 0xE4, 0xFC, 0xD7, 0xFD, 0x12, 0x08, 0x1B, 0xE2, 0x6C |
| |
| 626 | }; |
| |
| 627 | |
| |
| 628 | static uint8_t test_014[] = { |
| |
| 629 | 0xE6, 0x62, 0x94, 0x84, 0x90, 0x75, 0x70, 0x5D, 0xF3, 0x6C, 0xC7, 0xFC, 0x45, 0x0C, 0x9F, 0xF7, 0x37, 0xB9, 0x71, 0x65, 0x62, 0x47, 0xC6, 0xD8, 0xFE, 0x93, 0xB0, 0xF1, 0xE2, 0x34, 0x78, 0x99, |
| |
| 630 | 0xE1, 0xED, 0x3C, 0xA7, 0xF0, 0xD3, 0x00, 0x26, 0xA0, 0x18, 0xAA, 0x43, 0xDB, 0xFA, 0xD1, 0xFE, 0x2A, 0x71, 0xC5, 0xD1, 0xA1, 0x1D, 0xC0, 0xD3, 0x26, 0x15, 0x43, 0x6F, 0x06, 0x35, 0x5D, 0x02, |
| |
| 631 | 0x3A, 0xFD, 0x58, 0xAA, 0x91, 0x4A, 0x27, 0xFC, 0xD5, 0x78, 0xBC, 0x65, 0xE6, 0xE6, 0xEC, 0x4E, 0x25, 0xBE, 0xEC, 0xDD, 0x82, 0xDB, 0x6D, 0xDE, 0x42, 0x4D, 0xE6, 0x39, 0x71, 0x80, 0x4B, 0xDA, |
| |
| 632 | 0x2E, 0xD2, 0x25, 0x70, 0x96, 0x9D, 0x7B, 0x1B, 0x61, 0x85, 0xE6, 0x3C, 0x31, 0x4B, 0x60, 0x07, 0xCA, 0x66, 0xEF, 0x98, 0xEA, 0xAE, 0xED, 0xB8, 0x91, 0xC2, 0x8A, 0x70, 0x3B, 0xAC, 0x0B, 0x0A, |
| |
| 633 | 0x3D, 0x80, 0xC9, 0x55, 0x96, 0x2F, 0xF3, 0xED, 0x55, 0xBC, 0x8B, 0x06, 0x62, 0xD9, 0x3F, 0x18, 0x4D, 0x87, 0x51, 0xD2, 0x11, 0x97, 0x7D, 0x16, 0xC5, 0xF0, 0xAB, 0x25, 0xCA, 0xB4, 0xF2, 0x66, |
| |
| 634 | 0xCE, 0xCE, 0x58, 0xBB, 0x23, 0x10, 0x53, 0x6E, 0xA8, 0x96, 0xAD, 0x67, 0x9A, 0xEE, 0x53, 0xF0, 0x37, 0xCF, 0x51, 0x88, 0x51, 0xB8, 0x45, 0x15, 0x19, 0xCD, 0x84, 0xAD, 0xEB, 0xC5, 0xAA, 0x9C, |
| |
| 635 | 0xCC, 0x03, 0x47, 0x2B, 0xDC, 0xCF, 0x86, 0x67, 0x04, 0xA0, 0xBA, 0xF5, 0x1D, 0x6B, 0xB2, 0xAB, 0x2E, 0xAA, 0x83, 0x5F, 0x56, 0x3F, 0x33, 0xB7, 0xA6, 0xA9, 0xF1, 0x0D, 0x95, 0x7B, 0x2F, 0x21, |
| |
| 636 | 0xB6, 0xC0, 0x0E, 0xF2, 0x03, 0x91, 0x56, 0x3C, 0x56, 0x4C, 0x91, 0x87, 0xB3, 0x68, 0x49, 0xC5, 0x8E, 0x12, 0x41, 0xB6, 0xFD, 0xD9, 0xC8, 0xE7, 0xAE, 0xB2, 0x4B, 0xE4, 0x68, 0x52, 0xC6, 0x04 |
| |
| 637 | }; |
| |
| 638 | |
| |
| 639 | static uint8_t test_015[] = { |
| |
| 640 | 0x00, 0xA3, 0x09, 0x48, 0xD3, 0x48, 0x0D, 0xE4, 0xF4, 0x42, 0xA7, 0xA7, 0x7E, 0x47, 0xE0, 0x07, 0x6E, 0x93, 0x73, 0x43, 0xE9, 0x42, 0x6D, 0xCC, 0x87, 0xAE, 0x3B, 0x87, 0x33, 0xAE, 0xDF, 0x4E, |
| |
| 641 | 0x32, 0xE5, 0x78, 0x0C, 0x93, 0xD9, 0x84, 0xCC, 0x31, 0x8B, 0xB4, 0x83, 0xC7, 0x45, 0x71, 0xA9, 0x3C, 0x9F, 0x60, 0x8E, 0xA0, 0xED, 0x80, 0xF5, 0xBD, 0xC1, 0x75, 0x70, 0xB5, 0xFE, 0x39, 0x2A, |
| |
| 642 | 0x90, 0xB9, 0x00, 0xF7, 0x7D, 0x78, 0x91, 0x43, 0xB4, 0xC2, 0x9F, 0xC9, 0x77, 0x8A, 0x26, 0xC9, 0x17, 0xF4, 0x08, 0x0F, 0x5A, 0x5A, 0x64, 0x99, 0x47, 0xF6, 0x6E, 0x41, 0x77, 0x35, 0x20, 0x03, |
| |
| 643 | 0xB6, 0xCC, 0x3C, 0x21, 0x4C, 0x87, 0x18, 0x7E, 0x8A, 0x13, 0xCA, 0x2D, 0x51, 0x53, 0x76, 0xAE, 0x94, 0x37, 0xA4, 0x32, 0x5F, 0x0E, 0x47, 0x50, 0x9A, 0xD6, 0xDB, 0x30, 0xED, 0x43, 0xAC, 0xC3, |
| |
| 644 | 0xD4, 0x1E, 0x53, 0xD2, 0x9C, 0x29, 0x69, 0x98, 0xF6, 0x82, 0xF2, 0x12, 0x50, 0x04, 0x72, 0xC9, 0x57, 0x17, 0xA8, 0xF9, 0x80, 0xED, 0xDF, 0x96, 0x54, 0x5E, 0x1D, 0x21, 0x51, 0xCB, 0xC9, 0xAF, |
| |
| 645 | 0xC5, 0xC2, 0x71, 0xE1, 0xB4, 0xF1, 0xDF, 0xCA, 0xC9, 0xF3, 0x71, 0x4C, 0x91, 0x03, 0xA9, 0x71, 0x3B, 0x08, 0xCA, 0x67, 0x2D, 0x65, 0x35, 0x38, 0x16, 0x02, 0xE5, 0xF9, 0x1C, 0x58, 0x23, 0xE6, |
| |
| 646 | 0x7E, 0x14, 0x97, 0x51, 0x6B, 0xDE, 0xEB, 0x14, 0xB0, 0xE7, 0xB8, 0x59, 0xAD, 0xC4, 0x98, 0xA2, 0xEC, 0x1F, 0x32, 0x35, 0x31, 0x96, 0x64, 0x82, 0x7B, 0x2A, 0x48, 0x59, 0x1B, 0xC8, 0x9C, 0xBB, |
| |
| 647 | 0x55, 0x6D, 0xE7, 0xAD, 0xBC, 0x14, 0x48, 0xF8, 0xF5, 0xBB, 0xD1, 0xF6, 0xC5, 0xD5, 0xDC, 0x62, 0x8F, 0x4D, 0x91, 0x92, 0xA2, 0x95, 0x4C, 0x95, 0xA8, 0x3F, 0xBC, 0xB0, 0xE2, 0x2B, 0xE1, 0x64 |
| |
| 648 | }; |
| |
| 649 | |
| |
| 650 | static int run_tests( ) |
| |
| 651 | { |
| |
| 652 | leg_t val1[TEST_MAX], val2[TEST_MAX], val3[TEST_MAX], scratch[TEST_MAX]; |
| |
| 653 | int i; |
| |
| 654 | |
| |
| 655 | /***********************************************************************/ |
| |
| 656 | printf( "Testing mp_cmp... " ); |
| |
| 657 | |
| |
| 658 | printf( "1 " ); |
| |
| 659 | if( mp_cmp( test_001, test_001, sizeof(test_001) / sizeof(leg_t ) )) |
| |
| 660 | REPORT( "MP_CMP test 1" ); |
| |
| 661 | |
| |
| 662 | printf( "2 " ); |
| |
| 663 | if( mp_cmp( test_001, test_002, sizeof(test_001) / sizeof(leg_t ) ) != -1 ) |
| |
| 664 | REPORT( "MP_CMP test 2" ); |
| |
| 665 | |
| |
| 666 | printf( "3 " ); |
| |
| 667 | if( mp_cmp( test_002, test_001, sizeof(test_001) / sizeof(leg_t ) ) != 1 ) |
| |
| 668 | REPORT( "MP_CMP test 3" ); |
| |
| 669 | |
| |
| 670 | printf( "success.\n" ); |
| |
| 671 | |
| |
| 672 | /***********************************************************************/ |
| |
| 673 | printf( "Testing mp_sub... " ); |
| |
| 674 | printf( "1 " ); |
| |
| 675 | memcpy( val1, test_002, sizeof(test_002) ); |
| |
| 676 | mp_sub( val1, test_001, sizeof(test_001) / sizeof(leg_t ) ); |
| |
| 677 | if( memcmp( val1, test_003, sizeof(test_003)) ) |
| |
| 678 | REPORT( "MP_SUB test 1" ); |
| |
| 679 | |
| |
| 680 | printf( "2 " ); |
| |
| 681 | memset( val1, 0, sizeof(val1) ); |
| |
| 682 | memcpy( val1, test_004, sizeof(test_004) ); |
| |
| 683 | mp_sub( val1, test_002, sizeof(test_002) / sizeof(leg_t ) ); |
| |
| 684 | if( memcmp( val1, test_005, sizeof(test_005))) |
| |
| 685 | REPORT( "MP_SUB test 2" ); |
| |
| 686 | |
| |
| 687 | printf( "success.\n" ); |
| |
| 688 | |
| |
| 689 | /***********************************************************************/ |
| |
| 690 | printf( "Testing mp_add... " ); |
| |
| 691 | printf( "1 " ); |
| |
| 692 | memset( val1, 0, sizeof(val1) ); |
| |
| 693 | memcpy( val1, test_002, sizeof(test_002) ); |
| |
| 694 | mp_add( val1, test_001, sizeof(test_001) / sizeof(leg_t ), 0 ); |
| |
| 695 | if( memcmp( val1, test_006, sizeof(test_006)) ) |
| |
| 696 | REPORT( "MP_ADD test 1" ); |
| |
| 697 | |
| |
| 698 | printf( "2 " ); |
| |
| 699 | memset( val1, 0, sizeof(val1) ); |
| |
| 700 | memcpy( val1, test_007, sizeof(test_007) ); |
| |
| 701 | mp_add( val1, test_008, sizeof(test_008) / sizeof(leg_t ), 0 ); |
| |
| 702 | if( memcmp( val1, test_009, sizeof(test_009)) ) |
| |
| 703 | REPORT( "MP_ADD test 2" ); |
| |
| 704 | |
| |
| 705 | printf( "3 " ); |
| |
| 706 | memset( val1, 0, sizeof(val1) ); |
| |
| 707 | memcpy( val1, test_007, sizeof(test_007) ); |
| |
| 708 | mp_add( val1, test_008, sizeof(test_008) / sizeof(leg_t ), 1 ); |
| |
| 709 | if( memcmp( val1, test_010, sizeof(test_010)) ) |
| |
| 710 | REPORT( "MP_ADD test 3" ); |
| |
| 711 | |
| |
| 712 | printf( "success.\n" ); |
| |
| 713 | /***********************************************************************/ |
| |
| 714 | |
| |
| 715 | printf( "Testing mp_mul_oper_add... " ); |
| |
| 716 | printf( "1 " ); |
| |
| 717 | memset( val1, 0, sizeof(val1) ); |
| |
| 718 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 719 | mp_import( val3, test_012, sizeof(test_012) ); |
| |
| 720 | mp_mul_oper_add( val1, val2, val3, sizeof(test_011) / sizeof(leg_t ) ); |
| |
| 721 | mp_import( val3, test_013, sizeof(test_013) ); |
| |
| 722 | |
| |
| 723 | if( memcmp( val1, val3, sizeof(test_013)) ) |
| |
| 724 | REPORT( "MP_MUL_OPER_ADD test 1" ); |
| |
| 725 | |
| |
| 726 | printf( "2 " ); |
| |
| 727 | memset( val1, 0, sizeof(val1) ); |
| |
| 728 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 729 | mp_mul_oper_add( val1, val2, val2, sizeof(test_011) / sizeof(leg_t ) ); |
| |
| 730 | mp_import( val3, test_014, sizeof(test_014) ); |
| |
| 731 | |
| |
| 732 | if( memcmp( val1, val3, sizeof(test_014)) ) |
| |
| 733 | REPORT( "MP_MUL_OPER_ADD test 2" ); |
| |
| 734 | |
| |
| 735 | printf( "success.\n" ); |
| |
| 736 | /***********************************************************************/ |
| |
| 737 | |
| |
| 738 | printf( "Testing mp_sqr... " ); |
| |
| 739 | printf( "1 " ); |
| |
| 740 | memset( val1, 0, sizeof(val1) ); |
| |
| 741 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 742 | mp_sqr( val1, val2, sizeof(test_011) / sizeof(leg_t ) ); |
| |
| 743 | mp_import( val3, test_014, sizeof(test_014) ); |
| |
| 744 | |
| |
| 745 | if( memcmp( val1, val3, sizeof(test_014)) ) |
| |
| 746 | REPORT( "MP_SQR test 1" ); |
| |
| 747 | |
| |
| 748 | printf( "2 " ); |
| |
| 749 | memset( val1, 0, sizeof(val1) ); |
| |
| 750 | mp_import( val2, test_012, sizeof(test_012) ); |
| |
| 751 | mp_sqr( val1, val2, sizeof(test_012) / sizeof(leg_t ) ); |
| |
| 752 | mp_import( val3, test_015, sizeof(test_015) ); |
| |
| 753 | |
| |
| 754 | if( memcmp( val1, val3, sizeof(test_015)) ) |
| |
| 755 | REPORT( "MP_SQR test 2" ); |
| |
| 756 | |
| |
| 757 | printf( "success.\n" ); |
| |
| 758 | /***********************************************************************/ |
| |
| 759 | |
| |
| 760 | printf( "Testing mp_mul_kara_square... " ); |
| |
| 761 | printf( "1 " ); |
| |
| 762 | memset( val1, 0, sizeof(val1) ); |
| |
| 763 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 764 | mp_mul_kara_square( val1, val2, sizeof(test_011) / sizeof(leg_t ), scratch ); |
| |
| 765 | mp_import( val3, test_014, sizeof(test_014) ); |
| |
| 766 | |
| |
| 767 | if( memcmp( val1, val3, sizeof(test_014)) ) |
| |
| 768 | REPORT( "MP_MUL_KARA_SQUARE test 1" ); |
| |
| 769 | |
| |
| 770 | printf( "2 " ); |
| |
| 771 | memset( val1, 0, sizeof(val1) ); |
| |
| 772 | mp_import( val2, test_012, sizeof(test_012) ); |
| |
| 773 | mp_mul_kara_square( val1, val2, sizeof(test_012) / sizeof(leg_t ), scratch ); |
| |
| 774 | mp_import( val3, test_015, sizeof(test_015) ); |
| |
| 775 | |
| |
| 776 | if( memcmp( val1, val3, sizeof(test_015)) ) |
| |
| 777 | REPORT( "MP_MUL_KARA_SQUARE test 2" ); |
| |
| 778 | |
| |
| 779 | printf( "success.\n" ); |
| |
| 780 | /***********************************************************************/ |
| |
| 781 | |
| |
| 782 | printf( "Testing mp_mul_kara... " ); |
| |
| 783 | printf( "1 " ); |
| |
| 784 | memset( val1, 0, sizeof(val1) ); |
| |
| 785 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 786 | mp_import( val3, test_012, sizeof(test_012) ); |
| |
| 787 | mp_mul_kara( val1, val2, val3, sizeof(test_011) / sizeof(leg_t ), scratch ); |
| |
| 788 | mp_import( val3, test_013, sizeof(test_013) ); |
| |
| 789 | |
| |
| 790 | if( memcmp( val1, val3, sizeof(test_013)) ) |
| |
| 791 | REPORT( "MP_MUL_KARA test 1" ); |
| |
| 792 | |
| |
| 793 | printf( "2 " ); |
| |
| 794 | memset( val1, 0, sizeof(val1) ); |
| |
| 795 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 796 | mp_import( val2, test_011, sizeof(test_011) ); |
| |
| 797 | mp_mul_kara( val1, val2, val2, sizeof(test_011) / sizeof(leg_t ), scratch ); |
| |
| 798 | mp_import( val3, test_014, sizeof(test_014) ); |
| |
| 799 | |
| |
| 800 | if( memcmp( val1, val3, sizeof(test_014)) ) |
| |
| 801 | REPORT( "MP_MUL_KARA test 2" ); |
| |
| 802 | |
| |
| 803 | printf( "3 " ); |
| |
| 804 | memset( val1, 0, sizeof(val1) ); |
| |
| 805 | mp_import( val2, test_012, sizeof(test_012) ); |
| |
| 806 | mp_import( val2, test_012, sizeof(test_012) ); |
| |
| 807 | mp_mul_kara( val1, val2, val2, sizeof(test_012) / sizeof(leg_t ), scratch ); |
| |
| 808 | mp_import( val3, test_015, sizeof(test_015) ); |
| |
| 809 | |
| |
| 810 | if( memcmp( val1, val3, sizeof(test_015)) ) |
| |
| 811 | REPORT( "MP_MUL_KARA test 3" ); |
| |
| 812 | |
| |
| 813 | printf( "success.\n" ); |
| |
| 814 | /***********************************************************************/ |
| |
| 815 | |
| |
| 816 | |
| |
| 817 | // putchar(10); |
| |
| 818 | // for( i = sizeof(test_013)/sizeof(leg_t )-1; i>=0; --i) printf( "%016llX", val1[i] ); |
| |
| 819 | // putchar(10); |
| |
| 820 | // for( i = sizeof(test_013)/sizeof(leg_t )-1; i>=0; --i) printf( "%016llX", val3[i] ); |
| |
| 821 | // putchar(10); |
| |
| 822 | |
| |
| 823 | } |
400 | } |
| 824 | |
| |
| 825 | #endif |
| |
|
|
diff --git a/powm.h b/powm.h new file mode 100644 index 0000000..7290165 --- /dev/null +++ b/ powm.h |
| @@ -0,0 +1,44 @@ |
| |
1 | #ifndef __LIBGSMKBIGNUM_POWM_H__ |
| |
2 | #define __LIBGSMKBIGNUM_POWM_H__ |
| |
3 | |
| |
4 | #include <stdint.h> |
| |
5 | #include <stdlib.h> |
| |
6 | |
| |
7 | #ifdef __cplusplus |
| |
8 | extern "C" { |
| |
9 | #endif |
| |
10 | |
| |
11 | #ifdef USE32BITLIMB |
| |
12 | typedef uint32_t limb_t; |
| |
13 | typedef uint64_t dlimb_t; |
| |
14 | #else |
| |
15 | typedef uint64_t limb_t; |
| |
16 | typedef unsigned int uint128_t __attribute__((mode(TI))); |
| |
17 | typedef uint128_t dlimb_t; |
| |
18 | #endif |
| |
19 | |
| |
20 | |
| |
21 | /* import from serialized format, big endian byte representation |
| |
22 | return amount of limbs */ |
| |
23 | uint32_t mp_import( limb_t *dest, uint8_t const * src, uint32_t byte_size ); |
| |
24 | void mp_export( uint8_t *dest, const limb_t *scr, uint32_t limb_size ); |
| |
25 | |
| |
26 | /* This function works with precalculated parameters passed in r_square |
| |
27 | and r_inverse. The latter being 1, if the bottom limb_t is -1. */ |
| |
28 | void powm_internal( limb_t * result, |
| |
29 | const limb_t * base, uint32_t base_limbs, |
| |
30 | const limb_t * exponent, uint32_t exp_limbs, |
| |
31 | const limb_t * modulus, uint32_t mod_limbs, |
| |
32 | const limb_t * r_square, const limb_t r_inverse ); |
| |
33 | |
| |
34 | /* Not yet implemented */ |
| |
35 | void powm( limb_t * result, |
| |
36 | const limb_t * base, uint32_t base_limbs, |
| |
37 | const limb_t * exponent, int expbits, |
| |
38 | const limb_t * modulus, uint32_t mod_limbs ); |
| |
39 | |
| |
40 | #ifdef __cplusplus |
| |
41 | } |
| |
42 | #endif |
| |
43 | |
| |
44 | #endif |
|