nss/lib/freebl/ecl/ecp_256_32.c - Issue 2078763002: Delete bundled copy of NSS and replace with README.

Side by Side Diff: nss/lib/freebl/ecl/ecp_256_32.c

Issue 2078763002: Delete bundled copy of NSS and replace with README. (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/nss@master

Patch Set: Delete bundled copy of NSS and replace with README. Created 4 years, 6 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
	(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public

2 * License, v. 2.0. If a copy of the MPL was not distributed with this

3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

4

5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */

6

7 #include <string.h>

8

9 #include "prtypes.h"

10 #include "mpi.h"

11 #include "mpi-priv.h"

12 #include "ecp.h"

13

14 typedef PRUint8 u8;

15 typedef PRUint32 u32;

16 typedef PRUint64 u64;

17

18 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's

19 * MPI library calls them digits, but here they are called limbs, which is

20 * GMP's terminology.

21 *

22 * The value of an felem (field element) is:

23 * x[0] + (x[1] * 2*29) + (x[2] 2*57) + ... + (x[8] 2**228)

24 *

25 * That is, each limb is alternately 29 or 28-bits wide in little-endian

26 * order.

27 *

28 * This means that an felem hits 2257, rather than 2256 as we would like. A

29 * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems

30 * when multiplying as terms end up one bit short of a limb which would require

31 * much bit-shifting to correct.

32 *

33 * Finally, the values stored in an felem are in Montgomery form. So the value

34 * \|y\| is stored as (yR) mod p, where p is the P-256 prime and R is 2*257.

35 */

36 typedef u32 limb;

37 #define NLIMBS 9

38 typedef limb felem[NLIMBS];

39

40 static const limb kBottom28Bits = 0xfffffff;

41 static const limb kBottom29Bits = 0x1fffffff;

42

43 /* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and

44 * 28-bit words.

45 */

46 static const felem kOne = {

47 2, 0, 0, 0xffff800,

48 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,

49 0

50 };

51 static const felem kZero = {0};

52 static const felem kP = {

53 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,

54 0, 0, 0x200000, 0xf000000,

55 0xfffffff

56 };

57 static const felem k2P = {

58 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,

59 0, 0, 0x400000, 0xe000000,

60 0x1fffffff

61 };

62

63 /* kPrecomputed contains precomputed values to aid the calculation of scalar

64 * multiples of the base point, G. It's actually two, equal length, tables

65 * concatenated.

66 *

67 * The first table contains (x,y) felem pairs for 16 multiples of the base

68 * point, G.

69 *

70 * Index \| Index (binary) \| Value

71 * 0 \| 0000 \| 0G (all zeros, omitted)

72 * 1 \| 0001 \| G

73 * 2 \| 0010 \| 2**64G

74 * 3 \| 0011 \| 2**64G + G

75 * 4 \| 0100 \| 2**128G

76 * 5 \| 0101 \| 2**128G + G

77 * 6 \| 0110 \| 2128G + 264G

78 * 7 \| 0111 \| 2128G + 264G + G

79 * 8 \| 1000 \| 2**192G

80 * 9 \| 1001 \| 2**192G + G

81 * 10 \| 1010 \| 2192G + 264G

82 * 11 \| 1011 \| 2192G + 264G + G

83 * 12 \| 1100 \| 2192G + 2128G

84 * 13 \| 1101 \| 2192G + 2128G + G

85 * 14 \| 1110 \| 2192G + 2128G + 2**64G

86 * 15 \| 1111 \| 2192G + 2128G + 2**64G + G

87 *

88 * The second table follows the same style, but the terms are 2**32G,

89 * 296G, 2160G, 2**224G.

90 *

91 * This is ~2KB of data.

92 */

93 static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {

94 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7e dc, 0xd4a6eab, 0x3120bee,

95 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba 21, 0x14b10bb, 0xae3fe3,

96 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe4907 3, 0x3fa36cc, 0x5ebcd2c,

97 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea1244 6, 0xe1ade1e, 0xec91f22,

98 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109 , 0xa267a00, 0xb57c050,

99 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0 x7d6dee7, 0x2976e4b,

100 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a 5a9, 0x843a649, 0xc3ab0fa,

101 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11 , 0x58c43df, 0xf423fc2,

102 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db4 0f, 0x83e277d, 0xb0dd609,

103 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5 , 0xe10c9e, 0x33ab581,

104 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f , 0x48764cd, 0x76dbcca,

105 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b2 0, 0x4ba3173, 0xc168c33,

106 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0 , 0x65dd7ff, 0x3a1e4f6,

107 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f07 7, 0xa6add89, 0x4894acd,

108 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,

109 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c , 0xda0cf5b, 0x812e881,

110 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51 , 0xc22be3e, 0xe35e65a,

111 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9 , 0x1c5a839, 0x47a1e26,

112 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c5 02, 0x2f32042, 0xa17769b,

113 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a 02, 0x3fc93, 0x5620023,

114 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c , 0x407f75c, 0xbaab133,

115 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea 7, 0x3293ac0, 0xcdc98aa,

116 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,

117 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72 , 0x73e1c35, 0xee70fbc,

118 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85 , 0x27de188, 0x66f70b8,

119 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae 914, 0x2f3ec51, 0x3826b59,

120 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x 823d9d2, 0x8213f39,

121 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4 a, 0xf5ddc3d, 0x3786689,

122 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a72 9, 0x4be3499, 0x52b23aa,

123 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb0480 35, 0xe31de66, 0xc6ecaa3,

124 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a752 9, 0xcb7beb1, 0xb2a78a1,

125 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff6 58, 0xe3d6511, 0xc7d76f,

126 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,

127 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d3241 1, 0xb04a838, 0xd760d2d,

128 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11 e, 0x20bca9a, 0x66f496b,

129 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,

130 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56 ff, 0x65ef930, 0x21dc4a,

131 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15 f, 0x624e62e, 0xa90ae2f,

132 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522 b, 0xdc78583, 0x40eeabb,

133 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef3 4, 0xae2a960, 0x91b8bdc,

134 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0 x2413c8e, 0x5425bf9,

135 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633 , 0x7c91952, 0xd806dce,

136 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef7 3, 0x8956f34, 0xe4b5cf2,

137 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7 , 0x627b614, 0x7371cca,

138 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc 9, 0x9c19bf2, 0x5882229,

139 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b 3, 0xe85ff25, 0x408ef57,

140 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113 , 0xa4a1769, 0x11fbc6c,

141 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b 7, 0x4acbad9, 0x5efc5fa,

142 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc , 0x7bf0fa9, 0x957651,

143 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,

144 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c1 2d, 0xf20bd46, 0x1951fa7,

145 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74 , 0x99bb618, 0x2db944c,

146 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e7477 9, 0x576138, 0x9587927,

147 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782 d, 0xfc72e0b, 0x701b298,

148 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5 d8, 0xf858d3a, 0x942eea8,

149 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a 1, 0x8395659, 0x52ed4e2,

150 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c 0, 0x6bdf55a, 0x4e4457d,

151 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747 b, 0x878558d, 0x7d29aa4,

152 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8,

153 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,

154 };

155

156 /* Field element operations:

157 */

158

159 /* NON_ZERO_TO_ALL_ONES returns:

160 * 0xffffffff for 0 < x <= 2**31

161 * 0 for x == 0 or x > 2**31.

162 *

163 * x must be a u32 or an equivalent type such as limb.

164 */

165 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)

166

167 /* felem_reduce_carry adds a multiple of p in order to cancel \|carry\|,

168 * which is a term at 2**257.

169 *

170 * On entry: carry < 23, inout[0,2,...] < 229, inout[1,3,...] < 2**28.

171 * On exit: inout[0,2,..] < 230, inout[1,3,...] < 229.

172 */

173 static void felem_reduce_carry(felem inout, limb carry)

174 {

175 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);

176

177 inout[0] += carry << 1;

178 inout[3] += 0x10000000 & carry_mask;

179 /* carry < 23 thus (carry << 11) < 214 and we added 2**28 in the

180 * previous line therefore this doesn't underflow.

181 */

182 inout[3] -= carry << 11;

183 inout[4] += (0x20000000 - 1) & carry_mask;

184 inout[5] += (0x10000000 - 1) & carry_mask;

185 inout[6] += (0x20000000 - 1) & carry_mask;

186 inout[6] -= carry << 22;

187 /* This may underflow if carry is non-zero but, if so, we'll fix it in the

188 * next line.

189 */

190 inout[7] -= 1 & carry_mask;

191 inout[7] += carry << 25;

192 }

193

194 /* felem_sum sets out = in+in2.

195 *

196 * On entry, in[i]+in2[i] must not overflow a 32-bit word.

197 * On exit: out[0,2,...] < 230, out[1,3,...] < 229

198 */

199 static void felem_sum(felem out, const felem in, const felem in2)

200 {

201 limb carry = 0;

202 unsigned int i;

203 for (i = 0;; i++) {

204 out[i] = in[i] + in2[i];

205 out[i] += carry;

206 carry = out[i] >> 29;

207 out[i] &= kBottom29Bits;

208

209 i++;

210 if (i == NLIMBS)

211 break;

212

213 out[i] = in[i] + in2[i];

214 out[i] += carry;

215 carry = out[i] >> 28;

216 out[i] &= kBottom28Bits;

217 }

218

219 felem_reduce_carry(out, carry);

220 }

221

222 #define two31m3 (((limb)1) << 31) - (((limb)1) << 3)

223 #define two30m2 (((limb)1) << 30) - (((limb)1) << 2)

224 #define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)

225 #define two31m2 (((limb)1) << 31) - (((limb)1) << 2)

226 #define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)

227 #define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)

228

229 /* zero31 is 0 mod p.

230 */

231 static const felem zero31 = {

232 two31m3, two30m2, two31m2, two30p13m2,

233 two31m2, two30m2, two31p24m2, two30m27m2,

234 two31m2

235 };

236

237 /* felem_diff sets out = in-in2.

238 *

239 * On entry: in[0,2,...] < 230, in[1,3,...] < 229 and

240 * in2[0,2,...] < 230, in2[1,3,...] < 229.

241 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

242 */

243 static void felem_diff(felem out, const felem in, const felem in2)

244 {

245 limb carry = 0;

246 unsigned int i;

247

248 for (i = 0;; i++) {

249 out[i] = in[i] - in2[i];

250 out[i] += zero31[i];

251 out[i] += carry;

252 carry = out[i] >> 29;

253 out[i] &= kBottom29Bits;

254

255 i++;

256 if (i == NLIMBS)

257 break;

258

259 out[i] = in[i] - in2[i];

260 out[i] += zero31[i];

261 out[i] += carry;

262 carry = out[i] >> 28;

263 out[i] &= kBottom28Bits;

264 }

265

266 felem_reduce_carry(out, carry);

267 }

268

269 /* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words

270 * with the same 29,28,... bit positions as an felem.

271 *

272 * The values in felems are in Montgomery form: xR mod p where R = 2*257.

273 * Since we just multiplied two Montgomery values together, the result is

274 * xyR*R mod p. We wish to divide by R in order for the result also to be

275 * in Montgomery form.

276 *

277 * On entry: tmp[i] < 2**64

278 * On exit: out[0,2,...] < 230, out[1,3,...] < 229

279 */

280 static void felem_reduce_degree(felem out, u64 tmp[17])

281 {

282 /* The following table may be helpful when reading this code:

283 *

284 * Limb number: 0 \| 1 \| 2 \| 3 \| 4 \| 5 \| 6 \| 7 \| 8 \| 9 \| 10...

285 * Width (bits): 29\| 28\| 29\| 28\| 29\| 28\| 29\| 28\| 29\| 28\| 29

286 * Start bit: 0 \| 29\| 57\| 86\|114\|143\|171\|200\|228\|257\|285

287 * (odd phase): 0 \| 28\| 57\| 85\|114\|142\|171\|199\|228\|256\|285

288 */

289 limb tmp2[18], carry, x, xMask;

290 unsigned int i;

291

292 /* tmp contains 64-bit words with the same 29,28,29-bit positions as an

293 * felem. So the top of an element of tmp might overlap with another

294 * element two positions down. The following loop eliminates this

295 * overlap.

296 */

297 tmp2[0] = tmp[0] & kBottom29Bits;

298

299 /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try

300 * and hint to the compiler that it can do a single-word shift by selecting

301 * the right register rather than doing a double-word shift and truncating

302 * afterwards.

303 */

304 tmp2[1] = ((limb) tmp[0]) >> 29;

305 tmp2[1] \|= (((limb) (tmp[0] >> 32)) << 3) & kBottom28Bits;

306 tmp2[1] += ((limb) tmp[1]) & kBottom28Bits;

307 carry = tmp2[1] >> 28;

308 tmp2[1] &= kBottom28Bits;

309

310 for (i = 2; i < 17; i++) {

311 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;

312 tmp2[i] += ((limb) (tmp[i - 1])) >> 28;

313 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 4) & kBottom29Bits;

314 tmp2[i] += ((limb) tmp[i]) & kBottom29Bits;

315 tmp2[i] += carry;

316 carry = tmp2[i] >> 29;

317 tmp2[i] &= kBottom29Bits;

318

319 i++;

320 if (i == 17)

321 break;

322 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;

323 tmp2[i] += ((limb) (tmp[i - 1])) >> 29;

324 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 3) & kBottom28Bits;

325 tmp2[i] += ((limb) tmp[i]) & kBottom28Bits;

326 tmp2[i] += carry;

327 carry = tmp2[i] >> 28;

328 tmp2[i] &= kBottom28Bits;

329 }

330

331 tmp2[17] = ((limb) (tmp[15] >> 32)) >> 25;

332 tmp2[17] += ((limb) (tmp[16])) >> 29;

333 tmp2[17] += (((limb) (tmp[16] >> 32)) << 3);

334 tmp2[17] += carry;

335

336 /* Montgomery elimination of terms:

337 *

338 * Since R is 2**257, we can divide by R with a bitwise shift if we can

339 * ensure that the right-most 257 bits are all zero. We can make that true

340 * by adding multiplies of p without affecting the value.

341 *

342 * So we eliminate limbs from right to left. Since the bottom 29 bits of p

343 * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.

344 * We can do that for 8 further limbs and then right shift to eliminate the

345 * extra factor of R.

346 */

347 for (i = 0;; i += 2) {

348 tmp2[i + 1] += tmp2[i] >> 29;

349 x = tmp2[i] & kBottom29Bits;

350 xMask = NON_ZERO_TO_ALL_ONES(x);

351 tmp2[i] = 0;

352

353 /* The bounds calculations for this loop are tricky. Each iteration of

354 * the loop eliminates two words by adding values to words to their

355 * right.

356 *

357 * The following table contains the amounts added to each word (as an

358 * offset from the value of i at the top of the loop). The amounts are

359 * accounted for from the first and second half of the loop separately

360 * and are written as, for example, 28 to mean a value <2**28.

361 *

362 * Word: 3 4 5 6 7 8 9 10

363 * Added in top half: 28 11 29 21 29 28

364 * 28 29

365 * 29

366 * Added in bottom half: 29 10 28 21 28 28

367 * 29

368 *

369 * The value that is currently offset 7 will be offset 5 for the next

370 * iteration and then offset 3 for the iteration after that. Therefore

371 * the total value added will be the values added at 7, 5 and 3.

372 *

373 * The following table accumulates these values. The sums at the bottom

374 * are written as, for example, 29+28, to mean a value < 229+228.

375 *

376 * Word: 3 4 5 6 7 8 9 10 11 12 13

377 * 28 11 10 29 21 29 28 28 28 28 28

378 * 29 28 11 28 29 28 29 28 29 28

379 * 29 28 21 21 29 21 29 21

380 * 10 29 28 21 28 21 28

381 * 28 29 28 29 28 29 28

382 * 11 10 29 10 29 10

383 * 29 28 11 28 11

384 * 29 29

385 * --------------------------------------------

386 * 30+ 31+ 30+ 31+ 30+

387 * 28+ 29+ 28+ 29+ 21+

388 * 21+ 28+ 21+ 28+ 10

389 * 10 21+ 10 21+

390 * 11 11

391 *

392 * So the greatest amount is added to tmp2[10] and tmp2[12]. If

393 * tmp2[10/12] has an initial value of <2**29, then the maximum value

394 * will be < 231 + 230 + 228 + 221 + 211, which is < 232,

395 * as required.

396 */

397 tmp2[i + 3] += (x << 10) & kBottom28Bits;

398 tmp2[i + 4] += (x >> 18);

399

400 tmp2[i + 6] += (x << 21) & kBottom29Bits;

401 tmp2[i + 7] += x >> 8;

402

403 /* At position 200, which is the starting bit position for word 7, we

404 * have a factor of 0xf000000 = 228 - 224.

405 */

406 tmp2[i + 7] += 0x10000000 & xMask;

407 /* Word 7 is 28 bits wide, so the 2*28 term exactly hits word 8. /

408 tmp2[i + 8] += (x - 1) & xMask;

409 tmp2[i + 7] -= (x << 24) & kBottom28Bits;

410 tmp2[i + 8] -= x >> 4;

411

412 tmp2[i + 8] += 0x20000000 & xMask;

413 tmp2[i + 8] -= x;

414 tmp2[i + 8] += (x << 28) & kBottom29Bits;

415 tmp2[i + 9] += ((x >> 1) - 1) & xMask;

416

417 if (i+1 == NLIMBS)

418 break;

419 tmp2[i + 2] += tmp2[i + 1] >> 28;

420 x = tmp2[i + 1] & kBottom28Bits;

421 xMask = NON_ZERO_TO_ALL_ONES(x);

422 tmp2[i + 1] = 0;

423

424 tmp2[i + 4] += (x << 11) & kBottom29Bits;

425 tmp2[i + 5] += (x >> 18);

426

427 tmp2[i + 7] += (x << 21) & kBottom28Bits;

428 tmp2[i + 8] += x >> 7;

429

430 /* At position 199, which is the starting bit of the 8th word when

431 * dealing with a context starting on an odd word, we have a factor of

432 * 0x1e000000 = 229 - 225. Since we have not updated i, the 8th

433 * word from i+1 is i+8.

434 */

435 tmp2[i + 8] += 0x20000000 & xMask;

436 tmp2[i + 9] += (x - 1) & xMask;

437 tmp2[i + 8] -= (x << 25) & kBottom29Bits;

438 tmp2[i + 9] -= x >> 4;

439

440 tmp2[i + 9] += 0x10000000 & xMask;

441 tmp2[i + 9] -= x;

442 tmp2[i + 10] += (x - 1) & xMask;

443 }

444

445 /* We merge the right shift with a carry chain. The words above 2**257 have

446 * widths of 28,29,... which we need to correct when copying them down.

447 */

448 carry = 0;

449 for (i = 0; i < 8; i++) {

450 /* The maximum value of tmp2[i + 9] occurs on the first iteration and

451 * is < 230+229+228. Adding 229 (from tmp2[i + 10]) is

452 * therefore safe.

453 */

454 out[i] = tmp2[i + 9];

455 out[i] += carry;

456 out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;

457 carry = out[i] >> 29;

458 out[i] &= kBottom29Bits;

459

460 i++;

461 out[i] = tmp2[i + 9] >> 1;

462 out[i] += carry;

463 carry = out[i] >> 28;

464 out[i] &= kBottom28Bits;

465 }

466

467 out[8] = tmp2[17];

468 out[8] += carry;

469 carry = out[8] >> 29;

470 out[8] &= kBottom29Bits;

471

472 felem_reduce_carry(out, carry);

473 }

474

475 /* felem_square sets out=in*in.

476 *

477 * On entry: in[0,2,...] < 230, in[1,3,...] < 229.

478 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

479 */

480 static void felem_square(felem out, const felem in)

481 {

482 u64 tmp[17];

483

484 tmp[0] = ((u64) in[0]) * in[0];

485 tmp[1] = ((u64) in[0]) * (in[1] << 1);

486 tmp[2] = ((u64) in[0]) * (in[2] << 1) +

487 ((u64) in[1]) * (in[1] << 1);

488 tmp[3] = ((u64) in[0]) * (in[3] << 1) +

489 ((u64) in[1]) * (in[2] << 1);

490 tmp[4] = ((u64) in[0]) * (in[4] << 1) +

491 ((u64) in[1]) * (in[3] << 2) +

492 ((u64) in[2]) * in[2];

493 tmp[5] = ((u64) in[0]) * (in[5] << 1) +

494 ((u64) in[1]) * (in[4] << 1) +

495 ((u64) in[2]) * (in[3] << 1);

496 tmp[6] = ((u64) in[0]) * (in[6] << 1) +

497 ((u64) in[1]) * (in[5] << 2) +

498 ((u64) in[2]) * (in[4] << 1) +

499 ((u64) in[3]) * (in[3] << 1);

500 tmp[7] = ((u64) in[0]) * (in[7] << 1) +

501 ((u64) in[1]) * (in[6] << 1) +

502 ((u64) in[2]) * (in[5] << 1) +

503 ((u64) in[3]) * (in[4] << 1);

504 /* tmp[8] has the greatest value of 261 + 260 + 261 + 260 + 2**60,

505 * which is < 2**64 as required.

506 */

507 tmp[8] = ((u64) in[0]) * (in[8] << 1) +

508 ((u64) in[1]) * (in[7] << 2) +

509 ((u64) in[2]) * (in[6] << 1) +

510 ((u64) in[3]) * (in[5] << 2) +

511 ((u64) in[4]) * in[4];

512 tmp[9] = ((u64) in[1]) * (in[8] << 1) +

513 ((u64) in[2]) * (in[7] << 1) +

514 ((u64) in[3]) * (in[6] << 1) +

515 ((u64) in[4]) * (in[5] << 1);

516 tmp[10] = ((u64) in[2]) * (in[8] << 1) +

517 ((u64) in[3]) * (in[7] << 2) +

518 ((u64) in[4]) * (in[6] << 1) +

519 ((u64) in[5]) * (in[5] << 1);

520 tmp[11] = ((u64) in[3]) * (in[8] << 1) +

521 ((u64) in[4]) * (in[7] << 1) +

522 ((u64) in[5]) * (in[6] << 1);

523 tmp[12] = ((u64) in[4]) * (in[8] << 1) +

524 ((u64) in[5]) * (in[7] << 2) +

525 ((u64) in[6]) * in[6];

526 tmp[13] = ((u64) in[5]) * (in[8] << 1) +

527 ((u64) in[6]) * (in[7] << 1);

528 tmp[14] = ((u64) in[6]) * (in[8] << 1) +

529 ((u64) in[7]) * (in[7] << 1);

530 tmp[15] = ((u64) in[7]) * (in[8] << 1);

531 tmp[16] = ((u64) in[8]) * in[8];

532

533 felem_reduce_degree(out, tmp);

534 }

535

536 /* felem_mul sets out=in*in2.

537 *

538 * On entry: in[0,2,...] < 230, in[1,3,...] < 229 and

539 * in2[0,2,...] < 230, in2[1,3,...] < 229.

540 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

541 */

542 static void felem_mul(felem out, const felem in, const felem in2)

543 {

544 u64 tmp[17];

545

546 tmp[0] = ((u64) in[0]) * in2[0];

547 tmp[1] = ((u64) in[0]) * (in2[1] << 0) +

548 ((u64) in[1]) * (in2[0] << 0);

549 tmp[2] = ((u64) in[0]) * (in2[2] << 0) +

550 ((u64) in[1]) * (in2[1] << 1) +

551 ((u64) in[2]) * (in2[0] << 0);

552 tmp[3] = ((u64) in[0]) * (in2[3] << 0) +

553 ((u64) in[1]) * (in2[2] << 0) +

554 ((u64) in[2]) * (in2[1] << 0) +

555 ((u64) in[3]) * (in2[0] << 0);

556 tmp[4] = ((u64) in[0]) * (in2[4] << 0) +

557 ((u64) in[1]) * (in2[3] << 1) +

558 ((u64) in[2]) * (in2[2] << 0) +

559 ((u64) in[3]) * (in2[1] << 1) +

560 ((u64) in[4]) * (in2[0] << 0);

561 tmp[5] = ((u64) in[0]) * (in2[5] << 0) +

562 ((u64) in[1]) * (in2[4] << 0) +

563 ((u64) in[2]) * (in2[3] << 0) +

564 ((u64) in[3]) * (in2[2] << 0) +

565 ((u64) in[4]) * (in2[1] << 0) +

566 ((u64) in[5]) * (in2[0] << 0);

567 tmp[6] = ((u64) in[0]) * (in2[6] << 0) +

568 ((u64) in[1]) * (in2[5] << 1) +

569 ((u64) in[2]) * (in2[4] << 0) +

570 ((u64) in[3]) * (in2[3] << 1) +

571 ((u64) in[4]) * (in2[2] << 0) +

572 ((u64) in[5]) * (in2[1] << 1) +

573 ((u64) in[6]) * (in2[0] << 0);

574 tmp[7] = ((u64) in[0]) * (in2[7] << 0) +

575 ((u64) in[1]) * (in2[6] << 0) +

576 ((u64) in[2]) * (in2[5] << 0) +

577 ((u64) in[3]) * (in2[4] << 0) +

578 ((u64) in[4]) * (in2[3] << 0) +

579 ((u64) in[5]) * (in2[2] << 0) +

580 ((u64) in[6]) * (in2[1] << 0) +

581 ((u64) in[7]) * (in2[0] << 0);

582 /* tmp[8] has the greatest value but doesn't overflow. See logic in

583 * felem_square.

584 */

585 tmp[8] = ((u64) in[0]) * (in2[8] << 0) +

586 ((u64) in[1]) * (in2[7] << 1) +

587 ((u64) in[2]) * (in2[6] << 0) +

588 ((u64) in[3]) * (in2[5] << 1) +

589 ((u64) in[4]) * (in2[4] << 0) +

590 ((u64) in[5]) * (in2[3] << 1) +

591 ((u64) in[6]) * (in2[2] << 0) +

592 ((u64) in[7]) * (in2[1] << 1) +

593 ((u64) in[8]) * (in2[0] << 0);

594 tmp[9] = ((u64) in[1]) * (in2[8] << 0) +

595 ((u64) in[2]) * (in2[7] << 0) +

596 ((u64) in[3]) * (in2[6] << 0) +

597 ((u64) in[4]) * (in2[5] << 0) +

598 ((u64) in[5]) * (in2[4] << 0) +

599 ((u64) in[6]) * (in2[3] << 0) +

600 ((u64) in[7]) * (in2[2] << 0) +

601 ((u64) in[8]) * (in2[1] << 0);

602 tmp[10] = ((u64) in[2]) * (in2[8] << 0) +

603 ((u64) in[3]) * (in2[7] << 1) +

604 ((u64) in[4]) * (in2[6] << 0) +

605 ((u64) in[5]) * (in2[5] << 1) +

606 ((u64) in[6]) * (in2[4] << 0) +

607 ((u64) in[7]) * (in2[3] << 1) +

608 ((u64) in[8]) * (in2[2] << 0);

609 tmp[11] = ((u64) in[3]) * (in2[8] << 0) +

610 ((u64) in[4]) * (in2[7] << 0) +

611 ((u64) in[5]) * (in2[6] << 0) +

612 ((u64) in[6]) * (in2[5] << 0) +

613 ((u64) in[7]) * (in2[4] << 0) +

614 ((u64) in[8]) * (in2[3] << 0);

615 tmp[12] = ((u64) in[4]) * (in2[8] << 0) +

616 ((u64) in[5]) * (in2[7] << 1) +

617 ((u64) in[6]) * (in2[6] << 0) +

618 ((u64) in[7]) * (in2[5] << 1) +

619 ((u64) in[8]) * (in2[4] << 0);

620 tmp[13] = ((u64) in[5]) * (in2[8] << 0) +

621 ((u64) in[6]) * (in2[7] << 0) +

622 ((u64) in[7]) * (in2[6] << 0) +

623 ((u64) in[8]) * (in2[5] << 0);

624 tmp[14] = ((u64) in[6]) * (in2[8] << 0) +

625 ((u64) in[7]) * (in2[7] << 1) +

626 ((u64) in[8]) * (in2[6] << 0);

627 tmp[15] = ((u64) in[7]) * (in2[8] << 0) +

628 ((u64) in[8]) * (in2[7] << 0);

629 tmp[16] = ((u64) in[8]) * (in2[8] << 0);

630

631 felem_reduce_degree(out, tmp);

632 }

633

634 static void felem_assign(felem out, const felem in)

635 {

636 memcpy(out, in, sizeof(felem));

637 }

638

639 /* felem_inv calculates \|out\| = \|in\|^{-1}

640 *

641 * Based on Fermat's Little Theorem:

642 * a^p = a (mod p)

643 * a^{p-1} = 1 (mod p)

644 * a^{p-2} = a^{-1} (mod p)

645 */

646 static void felem_inv(felem out, const felem in)

647 {

648 felem ftmp, ftmp2;

649 /* each e_I will hold \|in\|^{2^I - 1} */

650 felem e2, e4, e8, e16, e32, e64;

651 unsigned int i;

652

653 felem_square(ftmp, in); /* 2^1 */

654 felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */

655 felem_assign(e2, ftmp);

656 felem_square(ftmp, ftmp); /* 2^3 - 2^1 */

657 felem_square(ftmp, ftmp); /* 2^4 - 2^2 */

658 felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */

659 felem_assign(e4, ftmp);

660 felem_square(ftmp, ftmp); /* 2^5 - 2^1 */

661 felem_square(ftmp, ftmp); /* 2^6 - 2^2 */

662 felem_square(ftmp, ftmp); /* 2^7 - 2^3 */

663 felem_square(ftmp, ftmp); /* 2^8 - 2^4 */

664 felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */

665 felem_assign(e8, ftmp);

666 for (i = 0; i < 8; i++) {

667 felem_square(ftmp, ftmp);

668 } /* 2^16 - 2^8 */

669 felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */

670 felem_assign(e16, ftmp);

671 for (i = 0; i < 16; i++) {

672 felem_square(ftmp, ftmp);

673 } /* 2^32 - 2^16 */

674 felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */

675 felem_assign(e32, ftmp);

676 for (i = 0; i < 32; i++) {

677 felem_square(ftmp, ftmp);

678 } /* 2^64 - 2^32 */

679 felem_assign(e64, ftmp);

680 felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */

681 for (i = 0; i < 192; i++) {

682 felem_square(ftmp, ftmp);

683 } /* 2^256 - 2^224 + 2^192 */

684

685 felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */

686 for (i = 0; i < 16; i++) {

687 felem_square(ftmp2, ftmp2);

688 } /* 2^80 - 2^16 */

689 felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */

690 for (i = 0; i < 8; i++) {

691 felem_square(ftmp2, ftmp2);

692 } /* 2^88 - 2^8 */

693 felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */

694 for (i = 0; i < 4; i++) {

695 felem_square(ftmp2, ftmp2);

696 } /* 2^92 - 2^4 */

697 felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */

698 felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */

699 felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */

700 felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */

701 felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */

702 felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */

703 felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */

704

705 felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */

706 }

707

708 /* felem_scalar_3 sets out=3*out.

709 *

710 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

711 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

712 */

713 static void felem_scalar_3(felem out)

714 {

715 limb carry = 0;

716 unsigned int i;

717

718 for (i = 0;; i++) {

719 out[i] *= 3;

720 out[i] += carry;

721 carry = out[i] >> 29;

722 out[i] &= kBottom29Bits;

723

724 i++;

725 if (i == NLIMBS)

726 break;

727

728 out[i] *= 3;

729 out[i] += carry;

730 carry = out[i] >> 28;

731 out[i] &= kBottom28Bits;

732 }

733

734 felem_reduce_carry(out, carry);

735 }

736

737 /* felem_scalar_4 sets out=4*out.

738 *

739 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

740 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

741 */

742 static void felem_scalar_4(felem out)

743 {

744 limb carry = 0, next_carry;

745 unsigned int i;

746

747 for (i = 0;; i++) {

748 next_carry = out[i] >> 27;

749 out[i] <<= 2;

750 out[i] &= kBottom29Bits;

751 out[i] += carry;

752 carry = next_carry + (out[i] >> 29);

753 out[i] &= kBottom29Bits;

754

755 i++;

756 if (i == NLIMBS)

757 break;

758 next_carry = out[i] >> 26;

759 out[i] <<= 2;

760 out[i] &= kBottom28Bits;

761 out[i] += carry;

762 carry = next_carry + (out[i] >> 28);

763 out[i] &= kBottom28Bits;

764 }

765

766 felem_reduce_carry(out, carry);

767 }

768

769 /* felem_scalar_8 sets out=8*out.

770 *

771 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

772 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

773 */

774 static void felem_scalar_8(felem out)

775 {

776 limb carry = 0, next_carry;

777 unsigned int i;

778

779 for (i = 0;; i++) {

780 next_carry = out[i] >> 26;

781 out[i] <<= 3;

782 out[i] &= kBottom29Bits;

783 out[i] += carry;

784 carry = next_carry + (out[i] >> 29);

785 out[i] &= kBottom29Bits;

786

787 i++;

788 if (i == NLIMBS)

789 break;

790 next_carry = out[i] >> 25;

791 out[i] <<= 3;

792 out[i] &= kBottom28Bits;

793 out[i] += carry;

794 carry = next_carry + (out[i] >> 28);

795 out[i] &= kBottom28Bits;

796 }

797

798 felem_reduce_carry(out, carry);

799 }

800

801 /* felem_is_zero_vartime returns 1 iff \|in\| == 0. It takes a variable amount of

802 * time depending on the value of \|in\|.

803 */

804 static char felem_is_zero_vartime(const felem in)

805 {

806 limb carry;

807 int i;

808 limb tmp[NLIMBS];

809 felem_assign(tmp, in);

810

811 /* First, reduce tmp to a minimal form.

812 */

813 do {

814 carry = 0;

815 for (i = 0;; i++) {

816 tmp[i] += carry;

817 carry = tmp[i] >> 29;

818 tmp[i] &= kBottom29Bits;

819

820 i++;

821 if (i == NLIMBS)

822 break;

823

824 tmp[i] += carry;

825 carry = tmp[i] >> 28;

826 tmp[i] &= kBottom28Bits;

827 }

828

829 felem_reduce_carry(tmp, carry);

830 } while (carry);

831

832 /* tmp < 2**257, so the only possible zero values are 0, p and 2p.

833 */

834 return memcmp(tmp, kZero, sizeof(tmp)) == 0 \|\|

835 memcmp(tmp, kP, sizeof(tmp)) == 0 \|\|

836 memcmp(tmp, k2P, sizeof(tmp)) == 0;

837 }

838

839 /* Group operations:

840 *

841 * Elements of the elliptic curve group are represented in Jacobian

842 * coordinates: (x, y, z). An affine point (x', y') is x'=x/z2, y'=y/z3 in

843 * Jacobian form.

844 */

845

846 /* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.

847 *

848 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling -dbl-2009-l

849 */

850 static void point_double(felem x_out, felem y_out, felem z_out,

851 const felem x, const felem y, const felem z)

852 {

853 felem delta, gamma, alpha, beta, tmp, tmp2;

854

855 felem_square(delta, z);

856 felem_square(gamma, y);

857 felem_mul(beta, x, gamma);

858

859 felem_sum(tmp, x, delta);

860 felem_diff(tmp2, x, delta);

861 felem_mul(alpha, tmp, tmp2);

862 felem_scalar_3(alpha);

863

864 felem_sum(tmp, y, z);

865 felem_square(tmp, tmp);

866 felem_diff(tmp, tmp, gamma);

867 felem_diff(z_out, tmp, delta);

868

869 felem_scalar_4(beta);

870 felem_square(x_out, alpha);

871 felem_diff(x_out, x_out, beta);

872 felem_diff(x_out, x_out, beta);

873

874 felem_diff(tmp, beta, x_out);

875 felem_mul(tmp, alpha, tmp);

876 felem_square(tmp2, gamma);

877 felem_scalar_8(tmp2);

878 felem_diff(y_out, tmp, tmp2);

879 }

880

881 /* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.

882 * (i.e. the second point is affine.)

883 *

884 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

885 *

886 * Note that this function does not handle P+P, infinity+P nor P+infinity

887 * correctly.

888 */

889 static void point_add_mixed(felem x_out, felem y_out, felem z_out,

890 const felem x1, const felem y1, const felem z1,

891 const felem x2, const felem y2)

892 {

893 felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;

894

895 felem_square(z1z1, z1);

896 felem_sum(tmp, z1, z1);

897

898 felem_mul(u2, x2, z1z1);

899 felem_mul(z1z1z1, z1, z1z1);

900 felem_mul(s2, y2, z1z1z1);

901 felem_diff(h, u2, x1);

902 felem_sum(i, h, h);

903 felem_square(i, i);

904 felem_mul(j, h, i);

905 felem_diff(r, s2, y1);

906 felem_sum(r, r, r);

907 felem_mul(v, x1, i);

908

909 felem_mul(z_out, tmp, h);

910 felem_square(rr, r);

911 felem_diff(x_out, rr, j);

912 felem_diff(x_out, x_out, v);

913 felem_diff(x_out, x_out, v);

914

915 felem_diff(tmp, v, x_out);

916 felem_mul(y_out, tmp, r);

917 felem_mul(tmp, y1, j);

918 felem_diff(y_out, y_out, tmp);

919 felem_diff(y_out, y_out, tmp);

920 }

921

922 /* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.

923 *

924 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

925 *

926 * Note that this function does not handle P+P, infinity+P nor P+infinity

927 * correctly.

928 */

929 static void point_add(felem x_out, felem y_out, felem z_out,

930 const felem x1, const felem y1, const felem z1,

931 const felem x2, const felem y2, const felem z2)

932 {

933 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;

934

935 felem_square(z1z1, z1);

936 felem_square(z2z2, z2);

937 felem_mul(u1, x1, z2z2);

938

939 felem_sum(tmp, z1, z2);

940 felem_square(tmp, tmp);

941 felem_diff(tmp, tmp, z1z1);

942 felem_diff(tmp, tmp, z2z2);

943

944 felem_mul(z2z2z2, z2, z2z2);

945 felem_mul(s1, y1, z2z2z2);

946

947 felem_mul(u2, x2, z1z1);

948 felem_mul(z1z1z1, z1, z1z1);

949 felem_mul(s2, y2, z1z1z1);

950 felem_diff(h, u2, u1);

951 felem_sum(i, h, h);

952 felem_square(i, i);

953 felem_mul(j, h, i);

954 felem_diff(r, s2, s1);

955 felem_sum(r, r, r);

956 felem_mul(v, u1, i);

957

958 felem_mul(z_out, tmp, h);

959 felem_square(rr, r);

960 felem_diff(x_out, rr, j);

961 felem_diff(x_out, x_out, v);

962 felem_diff(x_out, x_out, v);

963

964 felem_diff(tmp, v, x_out);

965 felem_mul(y_out, tmp, r);

966 felem_mul(tmp, s1, j);

967 felem_diff(y_out, y_out, tmp);

968 felem_diff(y_out, y_out, tmp);

969 }

970

971 /* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +

972 * {x2,y2,z2}.

973 *

974 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

975 *

976 * This function handles the case where {x1,y1,z1}={x2,y2,z2}.

977 */

978 static void point_add_or_double_vartime(

979 felem x_out, felem y_out, felem z_out,

980 const felem x1, const felem y1, const felem z1,

981 const felem x2, const felem y2, const felem z2)

982 {

983 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;

984 char x_equal, y_equal;

985

986 felem_square(z1z1, z1);

987 felem_square(z2z2, z2);

988 felem_mul(u1, x1, z2z2);

989

990 felem_sum(tmp, z1, z2);

991 felem_square(tmp, tmp);

992 felem_diff(tmp, tmp, z1z1);

993 felem_diff(tmp, tmp, z2z2);

994

995 felem_mul(z2z2z2, z2, z2z2);

996 felem_mul(s1, y1, z2z2z2);

997

998 felem_mul(u2, x2, z1z1);

999 felem_mul(z1z1z1, z1, z1z1);

1000 felem_mul(s2, y2, z1z1z1);

1001 felem_diff(h, u2, u1);

1002 x_equal = felem_is_zero_vartime(h);

1003 felem_sum(i, h, h);

1004 felem_square(i, i);

1005 felem_mul(j, h, i);

1006 felem_diff(r, s2, s1);

1007 y_equal = felem_is_zero_vartime(r);

1008 if (x_equal && y_equal) {

1009 point_double(x_out, y_out, z_out, x1, y1, z1);

1010 return;

1011 }

1012 felem_sum(r, r, r);

1013 felem_mul(v, u1, i);

1014

1015 felem_mul(z_out, tmp, h);

1016 felem_square(rr, r);

1017 felem_diff(x_out, rr, j);

1018 felem_diff(x_out, x_out, v);

1019 felem_diff(x_out, x_out, v);

1020

1021 felem_diff(tmp, v, x_out);

1022 felem_mul(y_out, tmp, r);

1023 felem_mul(tmp, s1, j);

1024 felem_diff(y_out, y_out, tmp);

1025 felem_diff(y_out, y_out, tmp);

1026 }

1027

1028 /* copy_conditional sets out=in if mask = 0xffffffff in constant time.

1029 *

1030 * On entry: mask is either 0 or 0xffffffff.

1031 */

1032 static void copy_conditional(felem out, const felem in, limb mask)

1033 {

1034 int i;

1035

1036 for (i = 0; i < NLIMBS; i++) {

1037 const limb tmp = mask & (in[i] ^ out[i]);

1038 out[i] ^= tmp;

1039 }

1040 }

1041

1042 /* select_affine_point sets {out_x,out_y} to the index'th entry of table.

1043 * On entry: index < 16, table[0] must be zero.

1044 */

1045 static void select_affine_point(felem out_x, felem out_y,

1046 const limb *table, limb index)

1047 {

1048 limb i, j;

1049

1050 memset(out_x, 0, sizeof(felem));

1051 memset(out_y, 0, sizeof(felem));

1052

1053 for (i = 1; i < 16; i++) {

1054 limb mask = i ^ index;

1055 mask \|= mask >> 2;

1056 mask \|= mask >> 1;

1057 mask &= 1;

1058 mask--;

1059 for (j = 0; j < NLIMBS; j++, table++) {

1060 out_x[j] \|= *table & mask;

1061 }

1062 for (j = 0; j < NLIMBS; j++, table++) {

1063 out_y[j] \|= *table & mask;

1064 }

1065 }

1066 }

1067

1068 /* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of

1069 * table. On entry: index < 16, table[0] must be zero.

1070 */

1071 static void select_jacobian_point(felem out_x, felem out_y, felem out_z,

1072 const limb *table, limb index)

1073 {

1074 limb i, j;

1075

1076 memset(out_x, 0, sizeof(felem));

1077 memset(out_y, 0, sizeof(felem));

1078 memset(out_z, 0, sizeof(felem));

1079

1080 /* The implicit value at index 0 is all zero. We don't need to perform that

1081 * iteration of the loop because we already set out_* to zero.

1082 */

1083 table += 3*NLIMBS;

1084

1085 for (i = 1; i < 16; i++) {

1086 limb mask = i ^ index;

1087 mask \|= mask >> 2;

1088 mask \|= mask >> 1;

1089 mask &= 1;

1090 mask--;

1091 for (j = 0; j < NLIMBS; j++, table++) {

1092 out_x[j] \|= *table & mask;

1093 }

1094 for (j = 0; j < NLIMBS; j++, table++) {

1095 out_y[j] \|= *table & mask;

1096 }

1097 for (j = 0; j < NLIMBS; j++, table++) {

1098 out_z[j] \|= *table & mask;

1099 }

1100 }

1101 }

1102

1103 /* get_bit returns the bit'th bit of scalar. */

1104 static char get_bit(const u8 scalar[32], int bit)

1105 {

1106 return ((scalar[bit >> 3]) >> (bit & 7)) & 1;

1107 }

1108

1109 /* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian

1110 * number. Note that the value of scalar must be less than the order of the

1111 * group.

1112 */

1113 static void scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])

1114 {

1115 int i, j;

1116 limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;

1117 u32 table_offset;

1118

1119 felem px, py;

1120 felem tx, ty, tz;

1121

1122 memset(nx, 0, sizeof(felem));

1123 memset(ny, 0, sizeof(felem));

1124 memset(nz, 0, sizeof(felem));

1125

1126 /* The loop adds bits at positions 0, 64, 128 and 192, followed by

1127 * positions 32,96,160 and 224 and does this 32 times.

1128 */

1129 for (i = 0; i < 32; i++) {

1130 if (i) {

1131 point_double(nx, ny, nz, nx, ny, nz);

1132 }

1133 table_offset = 0;

1134 for (j = 0; j <= 32; j += 32) {

1135 char bit0 = get_bit(scalar, 31 - i + j);

1136 char bit1 = get_bit(scalar, 95 - i + j);

1137 char bit2 = get_bit(scalar, 159 - i + j);

1138 char bit3 = get_bit(scalar, 223 - i + j);

1139 limb index = bit0 \| (bit1 << 1) \| (bit2 << 2) \| (bit3 << 3);

1140

1141 select_affine_point(px, py, kPrecomputed + table_offset, index);

1142 table_offset += 30 * NLIMBS;

1143

1144 /* Since scalar is less than the order of the group, we know that

1145 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle

1146 * below.

1147 */

1148 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);

1149 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero

1150 * (a.k.a. the point at infinity). We handle that situation by

1151 * copying the point from the table.

1152 */

1153 copy_conditional(nx, px, n_is_infinity_mask);

1154 copy_conditional(ny, py, n_is_infinity_mask);

1155 copy_conditional(nz, kOne, n_is_infinity_mask);

1156

1157 /* Equally, the result is also wrong if the point from the table is

1158 * zero, which happens when the index is zero. We handle that by

1159 * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.

1160 */

1161 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);

1162 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;

1163 copy_conditional(nx, tx, mask);

1164 copy_conditional(ny, ty, mask);

1165 copy_conditional(nz, tz, mask);

1166 /* If p was not zero, then n is now non-zero. */

1167 n_is_infinity_mask &= ~p_is_noninfinite_mask;

1168 }

1169 }

1170 }

1171

1172 /* point_to_affine converts a Jacobian point to an affine point. If the input

1173 * is the point at infinity then it returns (0, 0) in constant time.

1174 */

1175 static void point_to_affine(felem x_out, felem y_out,

1176 const felem nx, const felem ny, const felem nz) {

1177 felem z_inv, z_inv_sq;

1178 felem_inv(z_inv, nz);

1179 felem_square(z_inv_sq, z_inv);

1180 felem_mul(x_out, nx, z_inv_sq);

1181 felem_mul(z_inv, z_inv, z_inv_sq);

1182 felem_mul(y_out, ny, z_inv);

1183 }

1184

1185 /* scalar_mult sets {nx,ny,nz} = scalar{x,y}. /

1186 static void scalar_mult(felem nx, felem ny, felem nz,

1187 const felem x, const felem y, const u8 scalar[32])

1188 {

1189 int i;

1190 felem px, py, pz, tx, ty, tz;

1191 felem precomp[16][3];

1192 limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;

1193

1194 /* We precompute 0,1,2,... times {x,y}. */

1195 memset(precomp, 0, sizeof(felem) * 3);

1196 memcpy(&precomp[1][0], x, sizeof(felem));

1197 memcpy(&precomp[1][1], y, sizeof(felem));

1198 memcpy(&precomp[1][2], kOne, sizeof(felem));

1199

1200 for (i = 2; i < 16; i += 2) {

1201 point_double(precomp[i][0], precomp[i][1], precomp[i][2],

1202 precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);

1203

1204 point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],

1205 precomp[i][0], precomp[i][1], precomp[i][2], x, y);

1206 }

1207

1208 memset(nx, 0, sizeof(felem));

1209 memset(ny, 0, sizeof(felem));

1210 memset(nz, 0, sizeof(felem));

1211 n_is_infinity_mask = -1;

1212

1213 /* We add in a window of four bits each iteration and do this 64 times. */

1214 for (i = 0; i < 64; i++) {

1215 if (i) {

1216 point_double(nx, ny, nz, nx, ny, nz);

1217 point_double(nx, ny, nz, nx, ny, nz);

1218 point_double(nx, ny, nz, nx, ny, nz);

1219 point_double(nx, ny, nz, nx, ny, nz);

1220 }

1221

1222 index = scalar[31 - i / 2];

1223 if ((i & 1) == 1) {

1224 index &= 15;

1225 } else {

1226 index >>= 4;

1227 }

1228

1229 /* See the comments in scalar_base_mult about handling infinities. */

1230 select_jacobian_point(px, py, pz, precomp[0][0], index);

1231 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);

1232 copy_conditional(nx, px, n_is_infinity_mask);

1233 copy_conditional(ny, py, n_is_infinity_mask);

1234 copy_conditional(nz, pz, n_is_infinity_mask);

1235

1236 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);

1237 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;

1238 copy_conditional(nx, tx, mask);

1239 copy_conditional(ny, ty, mask);

1240 copy_conditional(nz, tz, mask);

1241 n_is_infinity_mask &= ~p_is_noninfinite_mask;

1242 }

1243 }

1244

1245 /* Interface with Freebl: */

1246

1247 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to

1248 * little-endian order.

1249 */

1250 #ifdef IS_BIG_ENDIAN

1251 #ifdef __APPLE__

1252 #include <libkern/OSByteOrder.h>

1253 #define BYTESWAP32(x) OSSwapInt32(x)

1254 #define BYTESWAP64(x) OSSwapInt64(x)

1255 #else

1256 #define BYTESWAP32(x) \

1257 (((x) >> 24) \| (((x) >> 8) & 0xff00) \| (((x) & 0xff00) << 8) \| ((x) << 24))

1258 #define BYTESWAP64(x) \

1259 (((x) >> 56) \| (((x) >> 40) & 0xff00) \| \

1260 (((x) >> 24) & 0xff0000) \| (((x) >> 8) & 0xff000000) \| \

1261 (((x) & 0xff000000) << 8) \| (((x) & 0xff0000) << 24) \| \

1262 (((x) & 0xff00) << 40) \| ((x) << 56))

1263 #endif

1264

1265 #ifdef MP_USE_UINT_DIGIT

1266 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)

1267 #else

1268 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)

1269 #endif

1270 #endif /* IS_BIG_ENDIAN */

1271

1272 #ifdef MP_USE_UINT_DIGIT

1273 static const mp_digit kRInvDigits[8] = {

1274 0x80000000, 1, 0xffffffff, 0,

1275 0x80000001, 0xfffffffe, 1, 0x7fffffff

1276 };

1277 #else

1278 static const mp_digit kRInvDigits[4] = {

1279 PR_UINT64(0x180000000), 0xffffffff,

1280 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)

1281 };

1282 #endif

1283 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))

1284 static const mp_int kRInv = {

1285 MP_ZPOS,

1286 MP_DIGITS_IN_256_BITS,

1287 MP_DIGITS_IN_256_BITS,

1288 (mp_digit*) kRInvDigits

1289 };

1290

1291 static const limb kTwo28 = 0x10000000;

1292 static const limb kTwo29 = 0x20000000;

1293

1294 /* to_montgomery sets out = Rin. /

1295 static mp_err to_montgomery(felem out, const mp_int in, const ECGroup group)

1296 {

1297 /* There are no MPI functions for bitshift operations and we wish to shift

1298 * in 257 bits left so we move the digits 256-bits left and then multiply

1299 * by two.

1300 */

1301 mp_int in_shifted;

1302 int i;

1303 mp_err res;

1304

1305 MP_CHECKOK(mp_init(&in_shifted));

1306 MP_CHECKOK(s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS));

1307 memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),

1308 MP_DIGITS(in),

1309 MP_USED(in)*sizeof(mp_digit));

1310 MP_CHECKOK(mp_mul_2(&in_shifted, &in_shifted));

1311 MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));

1312

1313 for (i = 0;; i++) {

1314 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;

1315 MP_CHECKOK(mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL));

1316

1317 i++;

1318 if (i == NLIMBS)

1319 break;

1320 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;

1321 MP_CHECKOK(mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL));

1322 }

1323

1324 CLEANUP:

1325 mp_clear(&in_shifted);

1326 return res;

1327 }

1328

1329 /* from_montgomery sets out=in/R. */

1330 static mp_err from_montgomery(mp_int *out, const felem in,

1331 const ECGroup *group)

1332 {

1333 mp_int result, tmp;

1334 mp_err res;

1335 int i;

1336

1337 MP_CHECKOK(mp_init(&result));

1338 MP_CHECKOK(mp_init(&tmp));

1339

1340 MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS-1], &result));

1341 for (i = NLIMBS-2; i >= 0; i--) {

1342 if ((i & 1) == 0) {

1343 MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));

1344 } else {

1345 MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));

1346 }

1347 MP_CHECKOK(mp_add_d(&tmp, in[i], &result));

1348 }

1349

1350 MP_CHECKOK(mp_mul(&result, &kRInv, out));

1351 MP_CHECKOK(group->meth->field_mod(out, out, group->meth));

1352

1353 CLEANUP:

1354 mp_clear(&result);

1355 mp_clear(&tmp);

1356 return res;

1357 }

1358

1359 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */

1360 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)

1361 {

1362 /* We require that \|n\| is less than the order of the group and therefore it

1363 * will fit into \|out_scalar\|. However, these is a timing side-channel here

1364 * that we cannot avoid: if \|n\| is sufficiently small it may be one or more

1365 * words too short and we'll copy less data.

1366 */

1367 memset(out_scalar, 0, 32);

1368 #ifdef IS_LITTLE_ENDIAN

1369 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));

1370 #else

1371 {

1372 mp_size i;

1373 mp_digit swapped[MP_DIGITS_IN_256_BITS];

1374 for (i = 0; i < MP_USED(n); i++) {

1375 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));

1376 }

1377 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));

1378 }

1379 #endif

1380 }

1381

1382 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the

1383 * order of the group.

1384 */

1385 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,

1386 mp_int out_x, mp_int out_y,

1387 const ECGroup *group)

1388 {

1389 u8 scalar[32];

1390 felem x, y, z, x_affine, y_affine;

1391 mp_err res;

1392

1393 /* FIXME(agl): test that n < order. */

1394

1395 scalar_from_mp_int(scalar, n);

1396 scalar_base_mult(x, y, z, scalar);

1397 point_to_affine(x_affine, y_affine, x, y, z);

1398 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1399 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1400

1401 CLEANUP:

1402 return res;

1403 }

1404

1405 /* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <

1406 * the order of the group.

1407 */

1408 static mp_err ec_GFp_nistp256_point_mul(const mp_int *n,

1409 const mp_int in_x, const mp_int in_y,

1410 mp_int out_x, mp_int out_y,

1411 const ECGroup *group)

1412 {

1413 u8 scalar[32];

1414 felem x, y, z, x_affine, y_affine, px, py;

1415 mp_err res;

1416

1417 scalar_from_mp_int(scalar, n);

1418

1419 MP_CHECKOK(to_montgomery(px, in_x, group));

1420 MP_CHECKOK(to_montgomery(py, in_y, group));

1421

1422 scalar_mult(x, y, z, px, py, scalar);

1423 point_to_affine(x_affine, y_affine, x, y, z);

1424 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1425 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1426

1427 CLEANUP:

1428 return res;

1429 }

1430

1431 /* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +

1432 * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.

1433 *

1434 * As indicated by the name, this function operates in variable time. This

1435 * is safe because it's used for signature validation which doesn't deal

1436 * with secrets.

1437 */

1438 static mp_err ec_GFp_nistp256_points_mul_vartime(

1439 const mp_int n1, const mp_int n2,

1440 const mp_int in_x, const mp_int in_y,

1441 mp_int out_x, mp_int out_y,

1442 const ECGroup *group)

1443 {

1444 u8 scalar1[32], scalar2[32];

1445 felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;

1446 mp_err res = MP_OKAY;

1447

1448 /* If n2 == NULL, this is just a base-point multiplication. */

1449 if (n2 == NULL) {

1450 return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);

1451 }

1452

1453 /* If n1 == nULL, this is just an arbitary-point multiplication. */

1454 if (n1 == NULL) {

1455 return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);

1456 }

1457

1458 /* If both scalars are zero, then the result is the point at infinity. */

1459 if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {

1460 mp_zero(out_x);

1461 mp_zero(out_y);

1462 return res;

1463 }

1464

1465 scalar_from_mp_int(scalar1, n1);

1466 scalar_from_mp_int(scalar2, n2);

1467

1468 MP_CHECKOK(to_montgomery(px, in_x, group));

1469 MP_CHECKOK(to_montgomery(py, in_y, group));

1470 scalar_base_mult(x1, y1, z1, scalar1);

1471 scalar_mult(x2, y2, z2, px, py, scalar2);

1472

1473 if (mp_cmp_z(n2) == 0) {

1474 /* If n2 == 0, then {x2,y2,z2} is zero and the result is just

1475 * {x1,y1,z1}. */

1476 } else if (mp_cmp_z(n1) == 0) {

1477 /* If n1 == 0, then {x1,y1,z1} is zero and the result is just

1478 * {x2,y2,z2}. */

1479 memcpy(x1, x2, sizeof(x2));

1480 memcpy(y1, y2, sizeof(y2));

1481 memcpy(z1, z2, sizeof(z2));

1482 } else {

1483 /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */

1484 point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);

1485 }

1486

1487 point_to_affine(x_affine, y_affine, x1, y1, z1);

1488 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1489 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1490

1491 CLEANUP:

1492 return res;

1493 }

1494

1495 /* Wire in fast point multiplication for named curves. */

1496 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)

1497 {

1498 if (name == ECCurve_NIST_P256) {

1499 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;

1500 group->point_mul = &ec_GFp_nistp256_point_mul;

1501 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;

1502 }

1503 return MP_OKAY;

1504 }

OLD	NEW

« no previous file with comments | « nss/lib/freebl/ecl/ecp_256.c ('k') | nss/lib/freebl/ecl/ecp_384.c » ('j') | no next file with comments »