mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c - Issue 14249009: Change the NSS and NSPR source tree to the new directory structure to be

Side by Side Diff: mozilla/security/nss/lib/freebl/ecl/ecp_256_32.c

Issue 14249009: Change the NSS and NSPR source tree to the new directory structure to be (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/deps/third_party/nss/

Patch Set: Created 7 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
	(Empty)
1 /* This Source Code Form is subject to the terms of the Mozilla Public

2 * License, v. 2.0. If a copy of the MPL was not distributed with this

3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

4

5 /* A 32-bit implementation of the NIST P-256 elliptic curve. */

6

7 #include <string.h>

8

9 #include "prtypes.h"

10 #include "mpi.h"

11 #include "mpi-priv.h"

12 #include "ecp.h"

13 #include "secport.h"

14

15 typedef PRUint8 u8;

16 typedef PRUint32 u32;

17 typedef PRUint64 u64;

18

19 /* Our field elements are represented as nine, unsigned 32-bit words. Freebl's

20 * MPI library calls them digits, but here they are called limbs, which is

21 * GMP's terminology.

22 *

23 * The value of an felem (field element) is:

24 * x[0] + (x[1] * 2*29) + (x[2] 2*57) + ... + (x[8] 2**228)

25 *

26 * That is, each limb is alternately 29 or 28-bits wide in little-endian

27 * order.

28 *

29 * This means that an felem hits 2257, rather than 2256 as we would like. A

30 * 28, 29, ... pattern would cause us to hit 2**256, but that causes problems

31 * when multiplying as terms end up one bit short of a limb which would require

32 * much bit-shifting to correct.

33 *

34 * Finally, the values stored in an felem are in Montgomery form. So the value

35 * \|y\| is stored as (yR) mod p, where p is the P-256 prime and R is 2*257.

36 */

37 typedef u32 limb;

38 #define NLIMBS 9

39 typedef limb felem[NLIMBS];

40

41 static const limb kBottom28Bits = 0xfffffff;

42 static const limb kBottom29Bits = 0x1fffffff;

43

44 /* kOne is the number 1 as an felem. It's 2**257 mod p split up into 29 and

45 * 28-bit words.

46 */

47 static const felem kOne = {

48 2, 0, 0, 0xffff800,

49 0x1fffffff, 0xfffffff, 0x1fbfffff, 0x1ffffff,

50 0

51 };

52 static const felem kZero = {0};

53 static const felem kP = {

54 0x1fffffff, 0xfffffff, 0x1fffffff, 0x3ff,

55 0, 0, 0x200000, 0xf000000,

56 0xfffffff

57 };

58 static const felem k2P = {

59 0x1ffffffe, 0xfffffff, 0x1fffffff, 0x7ff,

60 0, 0, 0x400000, 0xe000000,

61 0x1fffffff

62 };

63

64 /* kPrecomputed contains precomputed values to aid the calculation of scalar

65 * multiples of the base point, G. It's actually two, equal length, tables

66 * concatenated.

67 *

68 * The first table contains (x,y) felem pairs for 16 multiples of the base

69 * point, G.

70 *

71 * Index \| Index (binary) \| Value

72 * 0 \| 0000 \| 0G (all zeros, omitted)

73 * 1 \| 0001 \| G

74 * 2 \| 0010 \| 2**64G

75 * 3 \| 0011 \| 2**64G + G

76 * 4 \| 0100 \| 2**128G

77 * 5 \| 0101 \| 2**128G + G

78 * 6 \| 0110 \| 2128G + 264G

79 * 7 \| 0111 \| 2128G + 264G + G

80 * 8 \| 1000 \| 2**192G

81 * 9 \| 1001 \| 2**192G + G

82 * 10 \| 1010 \| 2192G + 264G

83 * 11 \| 1011 \| 2192G + 264G + G

84 * 12 \| 1100 \| 2192G + 2128G

85 * 13 \| 1101 \| 2192G + 2128G + G

86 * 14 \| 1110 \| 2192G + 2128G + 2**64G

87 * 15 \| 1111 \| 2192G + 2128G + 2**64G + G

88 *

89 * The second table follows the same style, but the terms are 2**32G,

90 * 296G, 2160G, 2**224G.

91 *

92 * This is ~2KB of data.

93 */

94 static const limb kPrecomputed[NLIMBS * 2 * 15 * 2] = {

95 0x11522878, 0xe730d41, 0xdb60179, 0x4afe2ff, 0x12883add, 0xcaddd88, 0x119e7e dc, 0xd4a6eab, 0x3120bee,

96 0x1d2aac15, 0xf25357c, 0x19e45cdd, 0x5c721d0, 0x1992c5a5, 0xa237487, 0x154ba 21, 0x14b10bb, 0xae3fe3,

97 0xd41a576, 0x922fc51, 0x234994f, 0x60b60d3, 0x164586ae, 0xce95f18, 0x1fe4907 3, 0x3fa36cc, 0x5ebcd2c,

98 0xb402f2f, 0x15c70bf, 0x1561925c, 0x5a26704, 0xda91e90, 0xcdc1c7f, 0x1ea1244 6, 0xe1ade1e, 0xec91f22,

99 0x26f7778, 0x566847e, 0xa0bec9e, 0x234f453, 0x1a31f21a, 0xd85e75c, 0x56c7109 , 0xa267a00, 0xb57c050,

100 0x98fb57, 0xaa837cc, 0x60c0792, 0xcfa5e19, 0x61bab9e, 0x589e39b, 0xa324c5, 0 x7d6dee7, 0x2976e4b,

101 0x1fc4124a, 0xa8c244b, 0x1ce86762, 0xcd61c7e, 0x1831c8e0, 0x75774e1, 0x1d96a 5a9, 0x843a649, 0xc3ab0fa,

102 0x6e2e7d5, 0x7673a2a, 0x178b65e8, 0x4003e9b, 0x1a1f11c2, 0x7816ea, 0xf643e11 , 0x58c43df, 0xf423fc2,

103 0x19633ffa, 0x891f2b2, 0x123c231c, 0x46add8c, 0x54700dd, 0x59e2b17, 0x172db4 0f, 0x83e277d, 0xb0dd609,

104 0xfd1da12, 0x35c6e52, 0x19ede20c, 0xd19e0c0, 0x97d0f40, 0xb015b19, 0x449e3f5 , 0xe10c9e, 0x33ab581,

105 0x56a67ab, 0x577734d, 0x1dddc062, 0xc57b10d, 0x149b39d, 0x26a9e7b, 0xc35df9f , 0x48764cd, 0x76dbcca,

106 0xca4b366, 0xe9303ab, 0x1a7480e7, 0x57e9e81, 0x1e13eb50, 0xf466cf3, 0x6f16b2 0, 0x4ba3173, 0xc168c33,

107 0x15cb5439, 0x6a38e11, 0x73658bd, 0xb29564f, 0x3f6dc5b, 0x53b97e, 0x1322c4c0 , 0x65dd7ff, 0x3a1e4f6,

108 0x14e614aa, 0x9246317, 0x1bc83aca, 0xad97eed, 0xd38ce4a, 0xf82b006, 0x341f07 7, 0xa6add89, 0x4894acd,

109 0x9f162d5, 0xf8410ef, 0x1b266a56, 0xd7f223, 0x3e0cb92, 0xe39b672, 0x6a2901a, 0x69a8556, 0x7e7c0,

110 0x9b7d8d3, 0x309a80, 0x1ad05f7f, 0xc2fb5dd, 0xcbfd41d, 0x9ceb638, 0x1051825c , 0xda0cf5b, 0x812e881,

111 0x6f35669, 0x6a56f2c, 0x1df8d184, 0x345820, 0x1477d477, 0x1645db1, 0xbe80c51 , 0xc22be3e, 0xe35e65a,

112 0x1aeb7aa0, 0xc375315, 0xf67bc99, 0x7fdd7b9, 0x191fc1be, 0x61235d, 0x2c184e9 , 0x1c5a839, 0x47a1e26,

113 0xb7cb456, 0x93e225d, 0x14f3c6ed, 0xccc1ac9, 0x17fe37f3, 0x4988989, 0x1a90c5 02, 0x2f32042, 0xa17769b,

114 0xafd8c7c, 0x8191c6e, 0x1dcdb237, 0x16200c0, 0x107b32a1, 0x66c08db, 0x10d06a 02, 0x3fc93, 0x5620023,

115 0x16722b27, 0x68b5c59, 0x270fcfc, 0xfad0ecc, 0xe5de1c2, 0xeab466b, 0x2fc513c , 0x407f75c, 0xbaab133,

116 0x9705fe9, 0xb88b8e7, 0x734c993, 0x1e1ff8f, 0x19156970, 0xabd0f00, 0x10469ea 7, 0x3293ac0, 0xcdc98aa,

117 0x1d843fd, 0xe14bfe8, 0x15be825f, 0x8b5212, 0xeb3fb67, 0x81cbd29, 0xbc62f16, 0x2b6fcc7, 0xf5a4e29,

118 0x13560b66, 0xc0b6ac2, 0x51ae690, 0xd41e271, 0xf3e9bd4, 0x1d70aab, 0x1029f72 , 0x73e1c35, 0xee70fbc,

119 0xad81baf, 0x9ecc49a, 0x86c741e, 0xfe6be30, 0x176752e7, 0x23d416, 0x1f83de85 , 0x27de188, 0x66f70b8,

120 0x181cd51f, 0x96b6e4c, 0x188f2335, 0xa5df759, 0x17a77eb6, 0xfeb0e73, 0x154ae 914, 0x2f3ec51, 0x3826b59,

121 0xb91f17d, 0x1c72949, 0x1362bf0a, 0xe23fddf, 0xa5614b0, 0xf7d8f, 0x79061, 0x 823d9d2, 0x8213f39,

122 0x1128ae0b, 0xd095d05, 0xb85c0c2, 0x1ecb2ef, 0x24ddc84, 0xe35e901, 0x18411a4 a, 0xf5ddc3d, 0x3786689,

123 0x52260e8, 0x5ae3564, 0x542b10d, 0x8d93a45, 0x19952aa4, 0x996cc41, 0x1051a72 9, 0x4be3499, 0x52b23aa,

124 0x109f307e, 0x6f5b6bb, 0x1f84e1e7, 0x77a0cfa, 0x10c4df3f, 0x25a02ea, 0xb0480 35, 0xe31de66, 0xc6ecaa3,

125 0x28ea335, 0x2886024, 0x1372f020, 0xf55d35, 0x15e4684c, 0xf2a9e17, 0x1a4a752 9, 0xcb7beb1, 0xb2a78a1,

126 0x1ab21f1f, 0x6361ccf, 0x6c9179d, 0xb135627, 0x1267b974, 0x4408bad, 0x1cbff6 58, 0xe3d6511, 0xc7d76f,

127 0x1cc7a69, 0xe7ee31b, 0x54fab4f, 0x2b914f, 0x1ad27a30, 0xcd3579e, 0xc50124c, 0x50daa90, 0xb13f72,

128 0xb06aa75, 0x70f5cc6, 0x1649e5aa, 0x84a5312, 0x329043c, 0x41c4011, 0x13d3241 1, 0xb04a838, 0xd760d2d,

129 0x1713b532, 0xbaa0c03, 0x84022ab, 0x6bcf5c1, 0x2f45379, 0x18ae070, 0x18c9e11 e, 0x20bca9a, 0x66f496b,

130 0x3eef294, 0x67500d2, 0xd7f613c, 0x2dbbeb, 0xb741038, 0xe04133f, 0x1582968d, 0xbe985f7, 0x1acbc1a,

131 0x1a6a939f, 0x33e50f6, 0xd665ed4, 0xb4b7bd6, 0x1e5a3799, 0x6b33847, 0x17fa56 ff, 0x65ef930, 0x21dc4a,

132 0x2b37659, 0x450fe17, 0xb357b65, 0xdf5efac, 0x15397bef, 0x9d35a7f, 0x112ac15 f, 0x624e62e, 0xa90ae2f,

133 0x107eecd2, 0x1f69bbe, 0x77d6bce, 0x5741394, 0x13c684fc, 0x950c910, 0x725522 b, 0xdc78583, 0x40eeabb,

134 0x1fde328a, 0xbd61d96, 0xd28c387, 0x9e77d89, 0x12550c40, 0x759cb7d, 0x367ef3 4, 0xae2a960, 0x91b8bdc,

135 0x93462a9, 0xf469ef, 0xb2e9aef, 0xd2ca771, 0x54e1f42, 0x7aaa49, 0x6316abb, 0 x2413c8e, 0x5425bf9,

136 0x1bed3e3a, 0xf272274, 0x1f5e7326, 0x6416517, 0xea27072, 0x9cedea7, 0x6e7633 , 0x7c91952, 0xd806dce,

137 0x8e2a7e1, 0xe421e1a, 0x418c9e1, 0x1dbc890, 0x1b395c36, 0xa1dc175, 0x1dc4ef7 3, 0x8956f34, 0xe4b5cf2,

138 0x1b0d3a18, 0x3194a36, 0x6c2641f, 0xe44124c, 0xa2f4eaa, 0xa8c25ba, 0xf927ed7 , 0x627b614, 0x7371cca,

139 0xba16694, 0x417bc03, 0x7c0a7e3, 0x9c35c19, 0x1168a205, 0x8b6b00d, 0x10e3edc 9, 0x9c19bf2, 0x5882229,

140 0x1b2b4162, 0xa5cef1a, 0x1543622b, 0x9bd433e, 0x364e04d, 0x7480792, 0x5c9b5b 3, 0xe85ff25, 0x408ef57,

141 0x1814cfa4, 0x121b41b, 0xd248a0f, 0x3b05222, 0x39bb16a, 0xc75966d, 0xa038113 , 0xa4a1769, 0x11fbc6c,

142 0x917e50e, 0xeec3da8, 0x169d6eac, 0x10c1699, 0xa416153, 0xf724912, 0x15cd60b 7, 0x4acbad9, 0x5efc5fa,

143 0xf150ed7, 0x122b51, 0x1104b40a, 0xcb7f442, 0xfbb28ff, 0x6ac53ca, 0x196142cc , 0x7bf0fa9, 0x957651,

144 0x4e0f215, 0xed439f8, 0x3f46bd5, 0x5ace82f, 0x110916b6, 0x6db078, 0xffd7d57, 0xf2ecaac, 0xca86dec,

145 0x15d6b2da, 0x965ecc9, 0x1c92b4c2, 0x1f3811, 0x1cb080f5, 0x2d8b804, 0x19d1c1 2d, 0xf20bd46, 0x1951fa7,

146 0xa3656c3, 0x523a425, 0xfcd0692, 0xd44ddc8, 0x131f0f5b, 0xaf80e4a, 0xcd9fc74 , 0x99bb618, 0x2db944c,

147 0xa673090, 0x1c210e1, 0x178c8d23, 0x1474383, 0x10b8743d, 0x985a55b, 0x2e7477 9, 0x576138, 0x9587927,

148 0x133130fa, 0xbe05516, 0x9f4d619, 0xbb62570, 0x99ec591, 0xd9468fe, 0x1d07782 d, 0xfc72e0b, 0x701b298,

149 0x1863863b, 0x85954b8, 0x121a0c36, 0x9e7fedf, 0xf64b429, 0x9b9d71e, 0x14e2f5 d8, 0xf858d3a, 0x942eea8,

150 0xda5b765, 0x6edafff, 0xa9d18cc, 0xc65e4ba, 0x1c747e86, 0xe4ea915, 0x1981d7a 1, 0x8395659, 0x52ed4e2,

151 0x87d43b7, 0x37ab11b, 0x19d292ce, 0xf8d4692, 0x18c3053f, 0x8863e13, 0x4c146c 0, 0x6bdf55a, 0x4e4457d,

152 0x16152289, 0xac78ec2, 0x1a59c5a2, 0x2028b97, 0x71c2d01, 0x295851f, 0x404747 b, 0x878558d, 0x7d29aa4,

153 0x13d8341f, 0x8daefd7, 0x139c972d, 0x6b7ea75, 0xd4a9dde, 0xff163d8, 0x81d55d 7, 0xa5bef68, 0xb7b30d8,

154 0xbe73d6f, 0xaa88141, 0xd976c81, 0x7e7a9cc, 0x18beb771, 0xd773cbd, 0x13f5195 1, 0x9d0c177, 0x1c49a78,

155 };

156

157 /* Field element operations:

158 */

159

160 /* NON_ZERO_TO_ALL_ONES returns:

161 * 0xffffffff for 0 < x <= 2**31

162 * 0 for x == 0 or x > 2**31.

163 *

164 * x must be a u32 or an equivalent type such as limb.

165 */

166 #define NON_ZERO_TO_ALL_ONES(x) ((((u32)(x) - 1) >> 31) - 1)

167

168 /* felem_reduce_carry adds a multiple of p in order to cancel \|carry\|,

169 * which is a term at 2**257.

170 *

171 * On entry: carry < 23, inout[0,2,...] < 229, inout[1,3,...] < 2**28.

172 * On exit: inout[0,2,..] < 230, inout[1,3,...] < 229.

173 */

174 static void felem_reduce_carry(felem inout, limb carry)

175 {

176 const u32 carry_mask = NON_ZERO_TO_ALL_ONES(carry);

177

178 inout[0] += carry << 1;

179 inout[3] += 0x10000000 & carry_mask;

180 /* carry < 23 thus (carry << 11) < 214 and we added 2**28 in the

181 * previous line therefore this doesn't underflow.

182 */

183 inout[3] -= carry << 11;

184 inout[4] += (0x20000000 - 1) & carry_mask;

185 inout[5] += (0x10000000 - 1) & carry_mask;

186 inout[6] += (0x20000000 - 1) & carry_mask;

187 inout[6] -= carry << 22;

188 /* This may underflow if carry is non-zero but, if so, we'll fix it in the

189 * next line.

190 */

191 inout[7] -= 1 & carry_mask;

192 inout[7] += carry << 25;

193 }

194

195 /* felem_sum sets out = in+in2.

196 *

197 * On entry, in[i]+in2[i] must not overflow a 32-bit word.

198 * On exit: out[0,2,...] < 230, out[1,3,...] < 229

199 */

200 static void felem_sum(felem out, const felem in, const felem in2)

201 {

202 limb carry = 0;

203 unsigned int i;

204 for (i = 0;; i++) {

205 out[i] = in[i] + in2[i];

206 out[i] += carry;

207 carry = out[i] >> 29;

208 out[i] &= kBottom29Bits;

209

210 i++;

211 if (i == NLIMBS)

212 break;

213

214 out[i] = in[i] + in2[i];

215 out[i] += carry;

216 carry = out[i] >> 28;

217 out[i] &= kBottom28Bits;

218 }

219

220 felem_reduce_carry(out, carry);

221 }

222

223 #define two31m3 (((limb)1) << 31) - (((limb)1) << 3)

224 #define two30m2 (((limb)1) << 30) - (((limb)1) << 2)

225 #define two30p13m2 (((limb)1) << 30) + (((limb)1) << 13) - (((limb)1) << 2)

226 #define two31m2 (((limb)1) << 31) - (((limb)1) << 2)

227 #define two31p24m2 (((limb)1) << 31) + (((limb)1) << 24) - (((limb)1) << 2)

228 #define two30m27m2 (((limb)1) << 30) - (((limb)1) << 27) - (((limb)1) << 2)

229

230 /* zero31 is 0 mod p.

231 */

232 static const felem zero31 = {

233 two31m3, two30m2, two31m2, two30p13m2,

234 two31m2, two30m2, two31p24m2, two30m27m2,

235 two31m2

236 };

237

238 /* felem_diff sets out = in-in2.

239 *

240 * On entry: in[0,2,...] < 230, in[1,3,...] < 229 and

241 * in2[0,2,...] < 230, in2[1,3,...] < 229.

242 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

243 */

244 static void felem_diff(felem out, const felem in, const felem in2)

245 {

246 limb carry = 0;

247 unsigned int i;

248

249 for (i = 0;; i++) {

250 out[i] = in[i] - in2[i];

251 out[i] += zero31[i];

252 out[i] += carry;

253 carry = out[i] >> 29;

254 out[i] &= kBottom29Bits;

255

256 i++;

257 if (i == NLIMBS)

258 break;

259

260 out[i] = in[i] - in2[i];

261 out[i] += zero31[i];

262 out[i] += carry;

263 carry = out[i] >> 28;

264 out[i] &= kBottom28Bits;

265 }

266

267 felem_reduce_carry(out, carry);

268 }

269

270 /* felem_reduce_degree sets out = tmp/R mod p where tmp contains 64-bit words

271 * with the same 29,28,... bit positions as an felem.

272 *

273 * The values in felems are in Montgomery form: xR mod p where R = 2*257.

274 * Since we just multiplied two Montgomery values together, the result is

275 * xyR*R mod p. We wish to divide by R in order for the result also to be

276 * in Montgomery form.

277 *

278 * On entry: tmp[i] < 2**64

279 * On exit: out[0,2,...] < 230, out[1,3,...] < 229

280 */

281 static void felem_reduce_degree(felem out, u64 tmp[17])

282 {

283 /* The following table may be helpful when reading this code:

284 *

285 * Limb number: 0 \| 1 \| 2 \| 3 \| 4 \| 5 \| 6 \| 7 \| 8 \| 9 \| 10...

286 * Width (bits): 29\| 28\| 29\| 28\| 29\| 28\| 29\| 28\| 29\| 28\| 29

287 * Start bit: 0 \| 29\| 57\| 86\|114\|143\|171\|200\|228\|257\|285

288 * (odd phase): 0 \| 28\| 57\| 85\|114\|142\|171\|199\|228\|256\|285

289 */

290 limb tmp2[18], carry, x, xMask;

291 unsigned int i;

292

293 /* tmp contains 64-bit words with the same 29,28,29-bit positions as an

294 * felem. So the top of an element of tmp might overlap with another

295 * element two positions down. The following loop eliminates this

296 * overlap.

297 */

298 tmp2[0] = tmp[0] & kBottom29Bits;

299

300 /* In the following we use "(limb) tmp[x]" and "(limb) (tmp[x]>>32)" to try

301 * and hint to the compiler that it can do a single-word shift by selecting

302 * the right register rather than doing a double-word shift and truncating

303 * afterwards.

304 */

305 tmp2[1] = ((limb) tmp[0]) >> 29;

306 tmp2[1] \|= (((limb) (tmp[0] >> 32)) << 3) & kBottom28Bits;

307 tmp2[1] += ((limb) tmp[1]) & kBottom28Bits;

308 carry = tmp2[1] >> 28;

309 tmp2[1] &= kBottom28Bits;

310

311 for (i = 2; i < 17; i++) {

312 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;

313 tmp2[i] += ((limb) (tmp[i - 1])) >> 28;

314 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 4) & kBottom29Bits;

315 tmp2[i] += ((limb) tmp[i]) & kBottom29Bits;

316 tmp2[i] += carry;

317 carry = tmp2[i] >> 29;

318 tmp2[i] &= kBottom29Bits;

319

320 i++;

321 if (i == 17)

322 break;

323 tmp2[i] = ((limb) (tmp[i - 2] >> 32)) >> 25;

324 tmp2[i] += ((limb) (tmp[i - 1])) >> 29;

325 tmp2[i] += (((limb) (tmp[i - 1] >> 32)) << 3) & kBottom28Bits;

326 tmp2[i] += ((limb) tmp[i]) & kBottom28Bits;

327 tmp2[i] += carry;

328 carry = tmp2[i] >> 28;

329 tmp2[i] &= kBottom28Bits;

330 }

331

332 tmp2[17] = ((limb) (tmp[15] >> 32)) >> 25;

333 tmp2[17] += ((limb) (tmp[16])) >> 29;

334 tmp2[17] += (((limb) (tmp[16] >> 32)) << 3);

335 tmp2[17] += carry;

336

337 /* Montgomery elimination of terms:

338 *

339 * Since R is 2**257, we can divide by R with a bitwise shift if we can

340 * ensure that the right-most 257 bits are all zero. We can make that true

341 * by adding multiplies of p without affecting the value.

342 *

343 * So we eliminate limbs from right to left. Since the bottom 29 bits of p

344 * are all ones, then by adding tmp2[0]*p to tmp2 we'll make tmp2[0] == 0.

345 * We can do that for 8 further limbs and then right shift to eliminate the

346 * extra factor of R.

347 */

348 for (i = 0;; i += 2) {

349 tmp2[i + 1] += tmp2[i] >> 29;

350 x = tmp2[i] & kBottom29Bits;

351 xMask = NON_ZERO_TO_ALL_ONES(x);

352 tmp2[i] = 0;

353

354 /* The bounds calculations for this loop are tricky. Each iteration of

355 * the loop eliminates two words by adding values to words to their

356 * right.

357 *

358 * The following table contains the amounts added to each word (as an

359 * offset from the value of i at the top of the loop). The amounts are

360 * accounted for from the first and second half of the loop separately

361 * and are written as, for example, 28 to mean a value <2**28.

362 *

363 * Word: 3 4 5 6 7 8 9 10

364 * Added in top half: 28 11 29 21 29 28

365 * 28 29

366 * 29

367 * Added in bottom half: 29 10 28 21 28 28

368 * 29

369 *

370 * The value that is currently offset 7 will be offset 5 for the next

371 * iteration and then offset 3 for the iteration after that. Therefore

372 * the total value added will be the values added at 7, 5 and 3.

373 *

374 * The following table accumulates these values. The sums at the bottom

375 * are written as, for example, 29+28, to mean a value < 229+228.

376 *

377 * Word: 3 4 5 6 7 8 9 10 11 12 13

378 * 28 11 10 29 21 29 28 28 28 28 28

379 * 29 28 11 28 29 28 29 28 29 28

380 * 29 28 21 21 29 21 29 21

381 * 10 29 28 21 28 21 28

382 * 28 29 28 29 28 29 28

383 * 11 10 29 10 29 10

384 * 29 28 11 28 11

385 * 29 29

386 * --------------------------------------------

387 * 30+ 31+ 30+ 31+ 30+

388 * 28+ 29+ 28+ 29+ 21+

389 * 21+ 28+ 21+ 28+ 10

390 * 10 21+ 10 21+

391 * 11 11

392 *

393 * So the greatest amount is added to tmp2[10] and tmp2[12]. If

394 * tmp2[10/12] has an initial value of <2**29, then the maximum value

395 * will be < 231 + 230 + 228 + 221 + 211, which is < 232,

396 * as required.

397 */

398 tmp2[i + 3] += (x << 10) & kBottom28Bits;

399 tmp2[i + 4] += (x >> 18);

400

401 tmp2[i + 6] += (x << 21) & kBottom29Bits;

402 tmp2[i + 7] += x >> 8;

403

404 /* At position 200, which is the starting bit position for word 7, we

405 * have a factor of 0xf000000 = 228 - 224.

406 */

407 tmp2[i + 7] += 0x10000000 & xMask;

408 /* Word 7 is 28 bits wide, so the 2*28 term exactly hits word 8. /

409 tmp2[i + 8] += (x - 1) & xMask;

410 tmp2[i + 7] -= (x << 24) & kBottom28Bits;

411 tmp2[i + 8] -= x >> 4;

412

413 tmp2[i + 8] += 0x20000000 & xMask;

414 tmp2[i + 8] -= x;

415 tmp2[i + 8] += (x << 28) & kBottom29Bits;

416 tmp2[i + 9] += ((x >> 1) - 1) & xMask;

417

418 if (i+1 == NLIMBS)

419 break;

420 tmp2[i + 2] += tmp2[i + 1] >> 28;

421 x = tmp2[i + 1] & kBottom28Bits;

422 xMask = NON_ZERO_TO_ALL_ONES(x);

423 tmp2[i + 1] = 0;

424

425 tmp2[i + 4] += (x << 11) & kBottom29Bits;

426 tmp2[i + 5] += (x >> 18);

427

428 tmp2[i + 7] += (x << 21) & kBottom28Bits;

429 tmp2[i + 8] += x >> 7;

430

431 /* At position 199, which is the starting bit of the 8th word when

432 * dealing with a context starting on an odd word, we have a factor of

433 * 0x1e000000 = 229 - 225. Since we have not updated i, the 8th

434 * word from i+1 is i+8.

435 */

436 tmp2[i + 8] += 0x20000000 & xMask;

437 tmp2[i + 9] += (x - 1) & xMask;

438 tmp2[i + 8] -= (x << 25) & kBottom29Bits;

439 tmp2[i + 9] -= x >> 4;

440

441 tmp2[i + 9] += 0x10000000 & xMask;

442 tmp2[i + 9] -= x;

443 tmp2[i + 10] += (x - 1) & xMask;

444 }

445

446 /* We merge the right shift with a carry chain. The words above 2**257 have

447 * widths of 28,29,... which we need to correct when copying them down.

448 */

449 carry = 0;

450 for (i = 0; i < 8; i++) {

451 /* The maximum value of tmp2[i + 9] occurs on the first iteration and

452 * is < 230+229+228. Adding 229 (from tmp2[i + 10]) is

453 * therefore safe.

454 */

455 out[i] = tmp2[i + 9];

456 out[i] += carry;

457 out[i] += (tmp2[i + 10] << 28) & kBottom29Bits;

458 carry = out[i] >> 29;

459 out[i] &= kBottom29Bits;

460

461 i++;

462 out[i] = tmp2[i + 9] >> 1;

463 out[i] += carry;

464 carry = out[i] >> 28;

465 out[i] &= kBottom28Bits;

466 }

467

468 out[8] = tmp2[17];

469 out[8] += carry;

470 carry = out[8] >> 29;

471 out[8] &= kBottom29Bits;

472

473 felem_reduce_carry(out, carry);

474 }

475

476 /* felem_square sets out=in*in.

477 *

478 * On entry: in[0,2,...] < 230, in[1,3,...] < 229.

479 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

480 */

481 static void felem_square(felem out, const felem in)

482 {

483 u64 tmp[17];

484

485 tmp[0] = ((u64) in[0]) * in[0];

486 tmp[1] = ((u64) in[0]) * (in[1] << 1);

487 tmp[2] = ((u64) in[0]) * (in[2] << 1) +

488 ((u64) in[1]) * (in[1] << 1);

489 tmp[3] = ((u64) in[0]) * (in[3] << 1) +

490 ((u64) in[1]) * (in[2] << 1);

491 tmp[4] = ((u64) in[0]) * (in[4] << 1) +

492 ((u64) in[1]) * (in[3] << 2) +

493 ((u64) in[2]) * in[2];

494 tmp[5] = ((u64) in[0]) * (in[5] << 1) +

495 ((u64) in[1]) * (in[4] << 1) +

496 ((u64) in[2]) * (in[3] << 1);

497 tmp[6] = ((u64) in[0]) * (in[6] << 1) +

498 ((u64) in[1]) * (in[5] << 2) +

499 ((u64) in[2]) * (in[4] << 1) +

500 ((u64) in[3]) * (in[3] << 1);

501 tmp[7] = ((u64) in[0]) * (in[7] << 1) +

502 ((u64) in[1]) * (in[6] << 1) +

503 ((u64) in[2]) * (in[5] << 1) +

504 ((u64) in[3]) * (in[4] << 1);

505 /* tmp[8] has the greatest value of 261 + 260 + 261 + 260 + 2**60,

506 * which is < 2**64 as required.

507 */

508 tmp[8] = ((u64) in[0]) * (in[8] << 1) +

509 ((u64) in[1]) * (in[7] << 2) +

510 ((u64) in[2]) * (in[6] << 1) +

511 ((u64) in[3]) * (in[5] << 2) +

512 ((u64) in[4]) * in[4];

513 tmp[9] = ((u64) in[1]) * (in[8] << 1) +

514 ((u64) in[2]) * (in[7] << 1) +

515 ((u64) in[3]) * (in[6] << 1) +

516 ((u64) in[4]) * (in[5] << 1);

517 tmp[10] = ((u64) in[2]) * (in[8] << 1) +

518 ((u64) in[3]) * (in[7] << 2) +

519 ((u64) in[4]) * (in[6] << 1) +

520 ((u64) in[5]) * (in[5] << 1);

521 tmp[11] = ((u64) in[3]) * (in[8] << 1) +

522 ((u64) in[4]) * (in[7] << 1) +

523 ((u64) in[5]) * (in[6] << 1);

524 tmp[12] = ((u64) in[4]) * (in[8] << 1) +

525 ((u64) in[5]) * (in[7] << 2) +

526 ((u64) in[6]) * in[6];

527 tmp[13] = ((u64) in[5]) * (in[8] << 1) +

528 ((u64) in[6]) * (in[7] << 1);

529 tmp[14] = ((u64) in[6]) * (in[8] << 1) +

530 ((u64) in[7]) * (in[7] << 1);

531 tmp[15] = ((u64) in[7]) * (in[8] << 1);

532 tmp[16] = ((u64) in[8]) * in[8];

533

534 felem_reduce_degree(out, tmp);

535 }

536

537 /* felem_mul sets out=in*in2.

538 *

539 * On entry: in[0,2,...] < 230, in[1,3,...] < 229 and

540 * in2[0,2,...] < 230, in2[1,3,...] < 229.

541 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

542 */

543 static void felem_mul(felem out, const felem in, const felem in2)

544 {

545 u64 tmp[17];

546

547 tmp[0] = ((u64) in[0]) * in2[0];

548 tmp[1] = ((u64) in[0]) * (in2[1] << 0) +

549 ((u64) in[1]) * (in2[0] << 0);

550 tmp[2] = ((u64) in[0]) * (in2[2] << 0) +

551 ((u64) in[1]) * (in2[1] << 1) +

552 ((u64) in[2]) * (in2[0] << 0);

553 tmp[3] = ((u64) in[0]) * (in2[3] << 0) +

554 ((u64) in[1]) * (in2[2] << 0) +

555 ((u64) in[2]) * (in2[1] << 0) +

556 ((u64) in[3]) * (in2[0] << 0);

557 tmp[4] = ((u64) in[0]) * (in2[4] << 0) +

558 ((u64) in[1]) * (in2[3] << 1) +

559 ((u64) in[2]) * (in2[2] << 0) +

560 ((u64) in[3]) * (in2[1] << 1) +

561 ((u64) in[4]) * (in2[0] << 0);

562 tmp[5] = ((u64) in[0]) * (in2[5] << 0) +

563 ((u64) in[1]) * (in2[4] << 0) +

564 ((u64) in[2]) * (in2[3] << 0) +

565 ((u64) in[3]) * (in2[2] << 0) +

566 ((u64) in[4]) * (in2[1] << 0) +

567 ((u64) in[5]) * (in2[0] << 0);

568 tmp[6] = ((u64) in[0]) * (in2[6] << 0) +

569 ((u64) in[1]) * (in2[5] << 1) +

570 ((u64) in[2]) * (in2[4] << 0) +

571 ((u64) in[3]) * (in2[3] << 1) +

572 ((u64) in[4]) * (in2[2] << 0) +

573 ((u64) in[5]) * (in2[1] << 1) +

574 ((u64) in[6]) * (in2[0] << 0);

575 tmp[7] = ((u64) in[0]) * (in2[7] << 0) +

576 ((u64) in[1]) * (in2[6] << 0) +

577 ((u64) in[2]) * (in2[5] << 0) +

578 ((u64) in[3]) * (in2[4] << 0) +

579 ((u64) in[4]) * (in2[3] << 0) +

580 ((u64) in[5]) * (in2[2] << 0) +

581 ((u64) in[6]) * (in2[1] << 0) +

582 ((u64) in[7]) * (in2[0] << 0);

583 /* tmp[8] has the greatest value but doesn't overflow. See logic in

584 * felem_square.

585 */

586 tmp[8] = ((u64) in[0]) * (in2[8] << 0) +

587 ((u64) in[1]) * (in2[7] << 1) +

588 ((u64) in[2]) * (in2[6] << 0) +

589 ((u64) in[3]) * (in2[5] << 1) +

590 ((u64) in[4]) * (in2[4] << 0) +

591 ((u64) in[5]) * (in2[3] << 1) +

592 ((u64) in[6]) * (in2[2] << 0) +

593 ((u64) in[7]) * (in2[1] << 1) +

594 ((u64) in[8]) * (in2[0] << 0);

595 tmp[9] = ((u64) in[1]) * (in2[8] << 0) +

596 ((u64) in[2]) * (in2[7] << 0) +

597 ((u64) in[3]) * (in2[6] << 0) +

598 ((u64) in[4]) * (in2[5] << 0) +

599 ((u64) in[5]) * (in2[4] << 0) +

600 ((u64) in[6]) * (in2[3] << 0) +

601 ((u64) in[7]) * (in2[2] << 0) +

602 ((u64) in[8]) * (in2[1] << 0);

603 tmp[10] = ((u64) in[2]) * (in2[8] << 0) +

604 ((u64) in[3]) * (in2[7] << 1) +

605 ((u64) in[4]) * (in2[6] << 0) +

606 ((u64) in[5]) * (in2[5] << 1) +

607 ((u64) in[6]) * (in2[4] << 0) +

608 ((u64) in[7]) * (in2[3] << 1) +

609 ((u64) in[8]) * (in2[2] << 0);

610 tmp[11] = ((u64) in[3]) * (in2[8] << 0) +

611 ((u64) in[4]) * (in2[7] << 0) +

612 ((u64) in[5]) * (in2[6] << 0) +

613 ((u64) in[6]) * (in2[5] << 0) +

614 ((u64) in[7]) * (in2[4] << 0) +

615 ((u64) in[8]) * (in2[3] << 0);

616 tmp[12] = ((u64) in[4]) * (in2[8] << 0) +

617 ((u64) in[5]) * (in2[7] << 1) +

618 ((u64) in[6]) * (in2[6] << 0) +

619 ((u64) in[7]) * (in2[5] << 1) +

620 ((u64) in[8]) * (in2[4] << 0);

621 tmp[13] = ((u64) in[5]) * (in2[8] << 0) +

622 ((u64) in[6]) * (in2[7] << 0) +

623 ((u64) in[7]) * (in2[6] << 0) +

624 ((u64) in[8]) * (in2[5] << 0);

625 tmp[14] = ((u64) in[6]) * (in2[8] << 0) +

626 ((u64) in[7]) * (in2[7] << 1) +

627 ((u64) in[8]) * (in2[6] << 0);

628 tmp[15] = ((u64) in[7]) * (in2[8] << 0) +

629 ((u64) in[8]) * (in2[7] << 0);

630 tmp[16] = ((u64) in[8]) * (in2[8] << 0);

631

632 felem_reduce_degree(out, tmp);

633 }

634

635 static void felem_assign(felem out, const felem in)

636 {

637 memcpy(out, in, sizeof(felem));

638 }

639

640 /* felem_inv calculates \|out\| = \|in\|^{-1}

641 *

642 * Based on Fermat's Little Theorem:

643 * a^p = a (mod p)

644 * a^{p-1} = 1 (mod p)

645 * a^{p-2} = a^{-1} (mod p)

646 */

647 static void felem_inv(felem out, const felem in)

648 {

649 felem ftmp, ftmp2;

650 /* each e_I will hold \|in\|^{2^I - 1} */

651 felem e2, e4, e8, e16, e32, e64;

652 unsigned int i;

653

654 felem_square(ftmp, in); /* 2^1 */

655 felem_mul(ftmp, in, ftmp); /* 2^2 - 2^0 */

656 felem_assign(e2, ftmp);

657 felem_square(ftmp, ftmp); /* 2^3 - 2^1 */

658 felem_square(ftmp, ftmp); /* 2^4 - 2^2 */

659 felem_mul(ftmp, ftmp, e2); /* 2^4 - 2^0 */

660 felem_assign(e4, ftmp);

661 felem_square(ftmp, ftmp); /* 2^5 - 2^1 */

662 felem_square(ftmp, ftmp); /* 2^6 - 2^2 */

663 felem_square(ftmp, ftmp); /* 2^7 - 2^3 */

664 felem_square(ftmp, ftmp); /* 2^8 - 2^4 */

665 felem_mul(ftmp, ftmp, e4); /* 2^8 - 2^0 */

666 felem_assign(e8, ftmp);

667 for (i = 0; i < 8; i++) {

668 felem_square(ftmp, ftmp);

669 } /* 2^16 - 2^8 */

670 felem_mul(ftmp, ftmp, e8); /* 2^16 - 2^0 */

671 felem_assign(e16, ftmp);

672 for (i = 0; i < 16; i++) {

673 felem_square(ftmp, ftmp);

674 } /* 2^32 - 2^16 */

675 felem_mul(ftmp, ftmp, e16); /* 2^32 - 2^0 */

676 felem_assign(e32, ftmp);

677 for (i = 0; i < 32; i++) {

678 felem_square(ftmp, ftmp);

679 } /* 2^64 - 2^32 */

680 felem_assign(e64, ftmp);

681 felem_mul(ftmp, ftmp, in); /* 2^64 - 2^32 + 2^0 */

682 for (i = 0; i < 192; i++) {

683 felem_square(ftmp, ftmp);

684 } /* 2^256 - 2^224 + 2^192 */

685

686 felem_mul(ftmp2, e64, e32); /* 2^64 - 2^0 */

687 for (i = 0; i < 16; i++) {

688 felem_square(ftmp2, ftmp2);

689 } /* 2^80 - 2^16 */

690 felem_mul(ftmp2, ftmp2, e16); /* 2^80 - 2^0 */

691 for (i = 0; i < 8; i++) {

692 felem_square(ftmp2, ftmp2);

693 } /* 2^88 - 2^8 */

694 felem_mul(ftmp2, ftmp2, e8); /* 2^88 - 2^0 */

695 for (i = 0; i < 4; i++) {

696 felem_square(ftmp2, ftmp2);

697 } /* 2^92 - 2^4 */

698 felem_mul(ftmp2, ftmp2, e4); /* 2^92 - 2^0 */

699 felem_square(ftmp2, ftmp2); /* 2^93 - 2^1 */

700 felem_square(ftmp2, ftmp2); /* 2^94 - 2^2 */

701 felem_mul(ftmp2, ftmp2, e2); /* 2^94 - 2^0 */

702 felem_square(ftmp2, ftmp2); /* 2^95 - 2^1 */

703 felem_square(ftmp2, ftmp2); /* 2^96 - 2^2 */

704 felem_mul(ftmp2, ftmp2, in); /* 2^96 - 3 */

705

706 felem_mul(out, ftmp2, ftmp); /* 2^256 - 2^224 + 2^192 + 2^96 - 3 */

707 }

708

709 /* felem_scalar_3 sets out=3*out.

710 *

711 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

712 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

713 */

714 static void felem_scalar_3(felem out)

715 {

716 limb carry = 0;

717 unsigned int i;

718

719 for (i = 0;; i++) {

720 out[i] *= 3;

721 out[i] += carry;

722 carry = out[i] >> 29;

723 out[i] &= kBottom29Bits;

724

725 i++;

726 if (i == NLIMBS)

727 break;

728

729 out[i] *= 3;

730 out[i] += carry;

731 carry = out[i] >> 28;

732 out[i] &= kBottom28Bits;

733 }

734

735 felem_reduce_carry(out, carry);

736 }

737

738 /* felem_scalar_4 sets out=4*out.

739 *

740 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

741 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

742 */

743 static void felem_scalar_4(felem out)

744 {

745 limb carry = 0, next_carry;

746 unsigned int i;

747

748 for (i = 0;; i++) {

749 next_carry = out[i] >> 27;

750 out[i] <<= 2;

751 out[i] &= kBottom29Bits;

752 out[i] += carry;

753 carry = next_carry + (out[i] >> 29);

754 out[i] &= kBottom29Bits;

755

756 i++;

757 if (i == NLIMBS)

758 break;

759 next_carry = out[i] >> 26;

760 out[i] <<= 2;

761 out[i] &= kBottom28Bits;

762 out[i] += carry;

763 carry = next_carry + (out[i] >> 28);

764 out[i] &= kBottom28Bits;

765 }

766

767 felem_reduce_carry(out, carry);

768 }

769

770 /* felem_scalar_8 sets out=8*out.

771 *

772 * On entry: out[0,2,...] < 230, out[1,3,...] < 229.

773 * On exit: out[0,2,...] < 230, out[1,3,...] < 229.

774 */

775 static void felem_scalar_8(felem out)

776 {

777 limb carry = 0, next_carry;

778 unsigned int i;

779

780 for (i = 0;; i++) {

781 next_carry = out[i] >> 26;

782 out[i] <<= 3;

783 out[i] &= kBottom29Bits;

784 out[i] += carry;

785 carry = next_carry + (out[i] >> 29);

786 out[i] &= kBottom29Bits;

787

788 i++;

789 if (i == NLIMBS)

790 break;

791 next_carry = out[i] >> 25;

792 out[i] <<= 3;

793 out[i] &= kBottom28Bits;

794 out[i] += carry;

795 carry = next_carry + (out[i] >> 28);

796 out[i] &= kBottom28Bits;

797 }

798

799 felem_reduce_carry(out, carry);

800 }

801

802 /* felem_is_zero_vartime returns 1 iff \|in\| == 0. It takes a variable amount of

803 * time depending on the value of \|in\|.

804 */

805 static char felem_is_zero_vartime(const felem in)

806 {

807 limb carry;

808 int i;

809 limb tmp[NLIMBS];

810 felem_assign(tmp, in);

811

812 /* First, reduce tmp to a minimal form.

813 */

814 do {

815 carry = 0;

816 for (i = 0;; i++) {

817 tmp[i] += carry;

818 carry = tmp[i] >> 29;

819 tmp[i] &= kBottom29Bits;

820

821 i++;

822 if (i == NLIMBS)

823 break;

824

825 tmp[i] += carry;

826 carry = tmp[i] >> 28;

827 tmp[i] &= kBottom28Bits;

828 }

829

830 felem_reduce_carry(tmp, carry);

831 } while (carry);

832

833 /* tmp < 2**257, so the only possible zero values are 0, p and 2p.

834 */

835 return memcmp(tmp, kZero, sizeof(tmp)) == 0 \|\|

836 memcmp(tmp, kP, sizeof(tmp)) == 0 \|\|

837 memcmp(tmp, k2P, sizeof(tmp)) == 0;

838 }

839

840 /* Group operations:

841 *

842 * Elements of the elliptic curve group are represented in Jacobian

843 * coordinates: (x, y, z). An affine point (x', y') is x'=x/z2, y'=y/z3 in

844 * Jacobian form.

845 */

846

847 /* point_double sets {x_out,y_out,z_out} = 2*{x,y,z}.

848 *

849 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling -dbl-2009-l

850 */

851 static void point_double(felem x_out, felem y_out, felem z_out,

852 const felem x, const felem y, const felem z)

853 {

854 felem delta, gamma, alpha, beta, tmp, tmp2;

855

856 felem_square(delta, z);

857 felem_square(gamma, y);

858 felem_mul(beta, x, gamma);

859

860 felem_sum(tmp, x, delta);

861 felem_diff(tmp2, x, delta);

862 felem_mul(alpha, tmp, tmp2);

863 felem_scalar_3(alpha);

864

865 felem_sum(tmp, y, z);

866 felem_square(tmp, tmp);

867 felem_diff(tmp, tmp, gamma);

868 felem_diff(z_out, tmp, delta);

869

870 felem_scalar_4(beta);

871 felem_square(x_out, alpha);

872 felem_diff(x_out, x_out, beta);

873 felem_diff(x_out, x_out, beta);

874

875 felem_diff(tmp, beta, x_out);

876 felem_mul(tmp, alpha, tmp);

877 felem_square(tmp2, gamma);

878 felem_scalar_8(tmp2);

879 felem_diff(y_out, tmp, tmp2);

880 }

881

882 /* point_add_mixed sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,1}.

883 * (i.e. the second point is affine.)

884 *

885 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

886 *

887 * Note that this function does not handle P+P, infinity+P nor P+infinity

888 * correctly.

889 */

890 static void point_add_mixed(felem x_out, felem y_out, felem z_out,

891 const felem x1, const felem y1, const felem z1,

892 const felem x2, const felem y2)

893 {

894 felem z1z1, z1z1z1, s2, u2, h, i, j, r, rr, v, tmp;

895

896 felem_square(z1z1, z1);

897 felem_sum(tmp, z1, z1);

898

899 felem_mul(u2, x2, z1z1);

900 felem_mul(z1z1z1, z1, z1z1);

901 felem_mul(s2, y2, z1z1z1);

902 felem_diff(h, u2, x1);

903 felem_sum(i, h, h);

904 felem_square(i, i);

905 felem_mul(j, h, i);

906 felem_diff(r, s2, y1);

907 felem_sum(r, r, r);

908 felem_mul(v, x1, i);

909

910 felem_mul(z_out, tmp, h);

911 felem_square(rr, r);

912 felem_diff(x_out, rr, j);

913 felem_diff(x_out, x_out, v);

914 felem_diff(x_out, x_out, v);

915

916 felem_diff(tmp, v, x_out);

917 felem_mul(y_out, tmp, r);

918 felem_mul(tmp, y1, j);

919 felem_diff(y_out, y_out, tmp);

920 felem_diff(y_out, y_out, tmp);

921 }

922

923 /* point_add sets {x_out,y_out,z_out} = {x1,y1,z1} + {x2,y2,z2}.

924 *

925 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

926 *

927 * Note that this function does not handle P+P, infinity+P nor P+infinity

928 * correctly.

929 */

930 static void point_add(felem x_out, felem y_out, felem z_out,

931 const felem x1, const felem y1, const felem z1,

932 const felem x2, const felem y2, const felem z2)

933 {

934 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;

935

936 felem_square(z1z1, z1);

937 felem_square(z2z2, z2);

938 felem_mul(u1, x1, z2z2);

939

940 felem_sum(tmp, z1, z2);

941 felem_square(tmp, tmp);

942 felem_diff(tmp, tmp, z1z1);

943 felem_diff(tmp, tmp, z2z2);

944

945 felem_mul(z2z2z2, z2, z2z2);

946 felem_mul(s1, y1, z2z2z2);

947

948 felem_mul(u2, x2, z1z1);

949 felem_mul(z1z1z1, z1, z1z1);

950 felem_mul(s2, y2, z1z1z1);

951 felem_diff(h, u2, u1);

952 felem_sum(i, h, h);

953 felem_square(i, i);

954 felem_mul(j, h, i);

955 felem_diff(r, s2, s1);

956 felem_sum(r, r, r);

957 felem_mul(v, u1, i);

958

959 felem_mul(z_out, tmp, h);

960 felem_square(rr, r);

961 felem_diff(x_out, rr, j);

962 felem_diff(x_out, x_out, v);

963 felem_diff(x_out, x_out, v);

964

965 felem_diff(tmp, v, x_out);

966 felem_mul(y_out, tmp, r);

967 felem_mul(tmp, s1, j);

968 felem_diff(y_out, y_out, tmp);

969 felem_diff(y_out, y_out, tmp);

970 }

971

972 /* point_add_or_double_vartime sets {x_out,y_out,z_out} = {x1,y1,z1} +

973 * {x2,y2,z2}.

974 *

975 * See http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#addition -add-2007-bl

976 *

977 * This function handles the case where {x1,y1,z1}={x2,y2,z2}.

978 */

979 static void point_add_or_double_vartime(

980 felem x_out, felem y_out, felem z_out,

981 const felem x1, const felem y1, const felem z1,

982 const felem x2, const felem y2, const felem z2)

983 {

984 felem z1z1, z1z1z1, z2z2, z2z2z2, s1, s2, u1, u2, h, i, j, r, rr, v, tmp;

985 char x_equal, y_equal;

986

987 felem_square(z1z1, z1);

988 felem_square(z2z2, z2);

989 felem_mul(u1, x1, z2z2);

990

991 felem_sum(tmp, z1, z2);

992 felem_square(tmp, tmp);

993 felem_diff(tmp, tmp, z1z1);

994 felem_diff(tmp, tmp, z2z2);

995

996 felem_mul(z2z2z2, z2, z2z2);

997 felem_mul(s1, y1, z2z2z2);

998

999 felem_mul(u2, x2, z1z1);

1000 felem_mul(z1z1z1, z1, z1z1);

1001 felem_mul(s2, y2, z1z1z1);

1002 felem_diff(h, u2, u1);

1003 x_equal = felem_is_zero_vartime(h);

1004 felem_sum(i, h, h);

1005 felem_square(i, i);

1006 felem_mul(j, h, i);

1007 felem_diff(r, s2, s1);

1008 y_equal = felem_is_zero_vartime(r);

1009 if (x_equal && y_equal) {

1010 point_double(x_out, y_out, z_out, x1, y1, z1);

1011 return;

1012 }

1013 felem_sum(r, r, r);

1014 felem_mul(v, u1, i);

1015

1016 felem_mul(z_out, tmp, h);

1017 felem_square(rr, r);

1018 felem_diff(x_out, rr, j);

1019 felem_diff(x_out, x_out, v);

1020 felem_diff(x_out, x_out, v);

1021

1022 felem_diff(tmp, v, x_out);

1023 felem_mul(y_out, tmp, r);

1024 felem_mul(tmp, s1, j);

1025 felem_diff(y_out, y_out, tmp);

1026 felem_diff(y_out, y_out, tmp);

1027 }

1028

1029 /* copy_conditional sets out=in if mask = 0xffffffff in constant time.

1030 *

1031 * On entry: mask is either 0 or 0xffffffff.

1032 */

1033 static void copy_conditional(felem out, const felem in, limb mask)

1034 {

1035 int i;

1036

1037 for (i = 0; i < NLIMBS; i++) {

1038 const limb tmp = mask & (in[i] ^ out[i]);

1039 out[i] ^= tmp;

1040 }

1041 }

1042

1043 /* select_affine_point sets {out_x,out_y} to the index'th entry of table.

1044 * On entry: index < 16, table[0] must be zero.

1045 */

1046 static void select_affine_point(felem out_x, felem out_y,

1047 const limb *table, limb index)

1048 {

1049 limb i, j;

1050

1051 memset(out_x, 0, sizeof(felem));

1052 memset(out_y, 0, sizeof(felem));

1053

1054 for (i = 1; i < 16; i++) {

1055 limb mask = i ^ index;

1056 mask \|= mask >> 2;

1057 mask \|= mask >> 1;

1058 mask &= 1;

1059 mask--;

1060 for (j = 0; j < NLIMBS; j++, table++) {

1061 out_x[j] \|= *table & mask;

1062 }

1063 for (j = 0; j < NLIMBS; j++, table++) {

1064 out_y[j] \|= *table & mask;

1065 }

1066 }

1067 }

1068

1069 /* select_jacobian_point sets {out_x,out_y,out_z} to the index'th entry of

1070 * table. On entry: index < 16, table[0] must be zero.

1071 */

1072 static void select_jacobian_point(felem out_x, felem out_y, felem out_z,

1073 const limb *table, limb index)

1074 {

1075 limb i, j;

1076

1077 memset(out_x, 0, sizeof(felem));

1078 memset(out_y, 0, sizeof(felem));

1079 memset(out_z, 0, sizeof(felem));

1080

1081 /* The implicit value at index 0 is all zero. We don't need to perform that

1082 * iteration of the loop because we already set out_* to zero.

1083 */

1084 table += 3*NLIMBS;

1085

1086 for (i = 1; i < 16; i++) {

1087 limb mask = i ^ index;

1088 mask \|= mask >> 2;

1089 mask \|= mask >> 1;

1090 mask &= 1;

1091 mask--;

1092 for (j = 0; j < NLIMBS; j++, table++) {

1093 out_x[j] \|= *table & mask;

1094 }

1095 for (j = 0; j < NLIMBS; j++, table++) {

1096 out_y[j] \|= *table & mask;

1097 }

1098 for (j = 0; j < NLIMBS; j++, table++) {

1099 out_z[j] \|= *table & mask;

1100 }

1101 }

1102 }

1103

1104 /* get_bit returns the bit'th bit of scalar. */

1105 static char get_bit(const u8 scalar[32], int bit)

1106 {

1107 return ((scalar[bit >> 3]) >> (bit & 7)) & 1;

1108 }

1109

1110 /* scalar_base_mult sets {nx,ny,nz} = scalar*G where scalar is a little-endian

1111 * number. Note that the value of scalar must be less than the order of the

1112 * group.

1113 */

1114 static void scalar_base_mult(felem nx, felem ny, felem nz, const u8 scalar[32])

1115 {

1116 int i, j;

1117 limb n_is_infinity_mask = -1, p_is_noninfinite_mask, mask;

1118 u32 table_offset;

1119

1120 felem px, py;

1121 felem tx, ty, tz;

1122

1123 memset(nx, 0, sizeof(felem));

1124 memset(ny, 0, sizeof(felem));

1125 memset(nz, 0, sizeof(felem));

1126

1127 /* The loop adds bits at positions 0, 64, 128 and 192, followed by

1128 * positions 32,96,160 and 224 and does this 32 times.

1129 */

1130 for (i = 0; i < 32; i++) {

1131 if (i) {

1132 point_double(nx, ny, nz, nx, ny, nz);

1133 }

1134 table_offset = 0;

1135 for (j = 0; j <= 32; j += 32) {

1136 char bit0 = get_bit(scalar, 31 - i + j);

1137 char bit1 = get_bit(scalar, 95 - i + j);

1138 char bit2 = get_bit(scalar, 159 - i + j);

1139 char bit3 = get_bit(scalar, 223 - i + j);

1140 limb index = bit0 \| (bit1 << 1) \| (bit2 << 2) \| (bit3 << 3);

1141

1142 select_affine_point(px, py, kPrecomputed + table_offset, index);

1143 table_offset += 30 * NLIMBS;

1144

1145 /* Since scalar is less than the order of the group, we know that

1146 * {nx,ny,nz} != {px,py,1}, unless both are zero, which we handle

1147 * below.

1148 */

1149 point_add_mixed(tx, ty, tz, nx, ny, nz, px, py);

1150 /* The result of point_add_mixed is incorrect if {nx,ny,nz} is zero

1151 * (a.k.a. the point at infinity). We handle that situation by

1152 * copying the point from the table.

1153 */

1154 copy_conditional(nx, px, n_is_infinity_mask);

1155 copy_conditional(ny, py, n_is_infinity_mask);

1156 copy_conditional(nz, kOne, n_is_infinity_mask);

1157

1158 /* Equally, the result is also wrong if the point from the table is

1159 * zero, which happens when the index is zero. We handle that by

1160 * only copying from {tx,ty,tz} to {nx,ny,nz} if index != 0.

1161 */

1162 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);

1163 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;

1164 copy_conditional(nx, tx, mask);

1165 copy_conditional(ny, ty, mask);

1166 copy_conditional(nz, tz, mask);

1167 /* If p was not zero, then n is now non-zero. */

1168 n_is_infinity_mask &= ~p_is_noninfinite_mask;

1169 }

1170 }

1171 }

1172

1173 /* point_to_affine converts a Jacobian point to an affine point. If the input

1174 * is the point at infinity then it returns (0, 0) in constant time.

1175 */

1176 static void point_to_affine(felem x_out, felem y_out,

1177 const felem nx, const felem ny, const felem nz) {

1178 felem z_inv, z_inv_sq;

1179 felem_inv(z_inv, nz);

1180 felem_square(z_inv_sq, z_inv);

1181 felem_mul(x_out, nx, z_inv_sq);

1182 felem_mul(z_inv, z_inv, z_inv_sq);

1183 felem_mul(y_out, ny, z_inv);

1184 }

1185

1186 /* scalar_mult sets {nx,ny,nz} = scalar{x,y}. /

1187 static void scalar_mult(felem nx, felem ny, felem nz,

1188 const felem x, const felem y, const u8 scalar[32])

1189 {

1190 int i;

1191 felem px, py, pz, tx, ty, tz;

1192 felem precomp[16][3];

1193 limb n_is_infinity_mask, index, p_is_noninfinite_mask, mask;

1194

1195 /* We precompute 0,1,2,... times {x,y}. */

1196 memset(precomp, 0, sizeof(felem) * 3);

1197 memcpy(&precomp[1][0], x, sizeof(felem));

1198 memcpy(&precomp[1][1], y, sizeof(felem));

1199 memcpy(&precomp[1][2], kOne, sizeof(felem));

1200

1201 for (i = 2; i < 16; i += 2) {

1202 point_double(precomp[i][0], precomp[i][1], precomp[i][2],

1203 precomp[i / 2][0], precomp[i / 2][1], precomp[i / 2][2]);

1204

1205 point_add_mixed(precomp[i + 1][0], precomp[i + 1][1], precomp[i + 1][2],

1206 precomp[i][0], precomp[i][1], precomp[i][2], x, y);

1207 }

1208

1209 memset(nx, 0, sizeof(felem));

1210 memset(ny, 0, sizeof(felem));

1211 memset(nz, 0, sizeof(felem));

1212 n_is_infinity_mask = -1;

1213

1214 /* We add in a window of four bits each iteration and do this 64 times. */

1215 for (i = 0; i < 64; i++) {

1216 if (i) {

1217 point_double(nx, ny, nz, nx, ny, nz);

1218 point_double(nx, ny, nz, nx, ny, nz);

1219 point_double(nx, ny, nz, nx, ny, nz);

1220 point_double(nx, ny, nz, nx, ny, nz);

1221 }

1222

1223 index = scalar[31 - i / 2];

1224 if ((i & 1) == 1) {

1225 index &= 15;

1226 } else {

1227 index >>= 4;

1228 }

1229

1230 /* See the comments in scalar_base_mult about handling infinities. */

1231 select_jacobian_point(px, py, pz, precomp[0][0], index);

1232 point_add(tx, ty, tz, nx, ny, nz, px, py, pz);

1233 copy_conditional(nx, px, n_is_infinity_mask);

1234 copy_conditional(ny, py, n_is_infinity_mask);

1235 copy_conditional(nz, pz, n_is_infinity_mask);

1236

1237 p_is_noninfinite_mask = NON_ZERO_TO_ALL_ONES(index);

1238 mask = p_is_noninfinite_mask & ~n_is_infinity_mask;

1239 copy_conditional(nx, tx, mask);

1240 copy_conditional(ny, ty, mask);

1241 copy_conditional(nz, tz, mask);

1242 n_is_infinity_mask &= ~p_is_noninfinite_mask;

1243 }

1244 }

1245

1246 /* Interface with Freebl: */

1247

1248 /* BYTESWAP_MP_DIGIT_TO_LE swaps the bytes of a mp_digit to

1249 * little-endian order.

1250 */

1251 #ifdef IS_BIG_ENDIAN

1252 #ifdef __APPLE__

1253 #include <libkern/OSByteOrder.h>

1254 #define BYTESWAP32(x) OSSwapInt32(x)

1255 #define BYTESWAP64(x) OSSwapInt64(x)

1256 #else

1257 #define BYTESWAP32(x) \

1258 ((x) >> 24 \| (x) >> 8 & 0xff00 \| ((x) & 0xff00) << 8 \| (x) << 24)

1259 #define BYTESWAP64(x) \

1260 ((x) >> 56 \| (x) >> 40 & 0xff00 \| \

1261 (x) >> 24 & 0xff0000 \| (x) >> 8 & 0xff000000 \| \

1262 ((x) & 0xff000000) << 8 \| ((x) & 0xff0000) << 24 \| \

1263 ((x) & 0xff00) << 40 \| (x) << 56)

1264 #endif

1265

1266 #ifdef MP_USE_UINT_DIGIT

1267 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP32(x)

1268 #else

1269 #define BYTESWAP_MP_DIGIT_TO_LE(x) BYTESWAP64(x)

1270 #endif

1271 #endif /* IS_BIG_ENDIAN */

1272

1273 #ifdef MP_USE_UINT_DIGIT

1274 static const mp_digit kRInvDigits[8] = {

1275 0x80000000, 1, 0xffffffff, 0,

1276 0x80000001, 0xfffffffe, 1, 0x7fffffff

1277 };

1278 #else

1279 static const mp_digit kRInvDigits[4] = {

1280 PR_UINT64(0x180000000), 0xffffffff,

1281 PR_UINT64(0xfffffffe80000001), PR_UINT64(0x7fffffff00000001)

1282 };

1283 #endif

1284 #define MP_DIGITS_IN_256_BITS (32/sizeof(mp_digit))

1285 static const mp_int kRInv = {

1286 MP_ZPOS,

1287 MP_DIGITS_IN_256_BITS,

1288 MP_DIGITS_IN_256_BITS,

1289 (mp_digit*) kRInvDigits

1290 };

1291

1292 static const limb kTwo28 = 0x10000000;

1293 static const limb kTwo29 = 0x20000000;

1294

1295 /* to_montgomery sets out = Rin. /

1296 static mp_err to_montgomery(felem out, const mp_int in, const ECGroup group)

1297 {

1298 /* There are no MPI functions for bitshift operations and we wish to shift

1299 * in 257 bits left so we move the digits 256-bits left and then multiply

1300 * by two.

1301 */

1302 mp_int in_shifted;

1303 int i;

1304 mp_err res;

1305

1306 mp_init(&in_shifted);

1307 s_mp_pad(&in_shifted, MP_USED(in) + MP_DIGITS_IN_256_BITS);

1308 memcpy(&MP_DIGIT(&in_shifted, MP_DIGITS_IN_256_BITS),

1309 MP_DIGITS(in),

1310 MP_USED(in)*sizeof(mp_digit));

1311 mp_mul_2(&in_shifted, &in_shifted);

1312 MP_CHECKOK(group->meth->field_mod(&in_shifted, &in_shifted, group->meth));

1313

1314 for (i = 0;; i++) {

1315 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom29Bits;

1316 mp_div_d(&in_shifted, kTwo29, &in_shifted, NULL);

1317

1318 i++;

1319 if (i == NLIMBS)

1320 break;

1321 out[i] = MP_DIGIT(&in_shifted, 0) & kBottom28Bits;

1322 mp_div_d(&in_shifted, kTwo28, &in_shifted, NULL);

1323 }

1324

1325 CLEANUP:

1326 mp_clear(&in_shifted);

1327 return res;

1328 }

1329

1330 /* from_montgomery sets out=in/R. */

1331 static mp_err from_montgomery(mp_int *out, const felem in,

1332 const ECGroup *group)

1333 {

1334 mp_int result, tmp;

1335 mp_err res;

1336 int i;

1337

1338 mp_init(&result);

1339 mp_init(&tmp);

1340

1341 MP_CHECKOK(mp_add_d(&tmp, in[NLIMBS-1], &result));

1342 for (i = NLIMBS-2; i >= 0; i--) {

1343 if ((i & 1) == 0) {

1344 MP_CHECKOK(mp_mul_d(&result, kTwo29, &tmp));

1345 } else {

1346 MP_CHECKOK(mp_mul_d(&result, kTwo28, &tmp));

1347 }

1348 MP_CHECKOK(mp_add_d(&tmp, in[i], &result));

1349 }

1350

1351 MP_CHECKOK(mp_mul(&result, &kRInv, out));

1352 MP_CHECKOK(group->meth->field_mod(out, out, group->meth));

1353

1354 CLEANUP:

1355 mp_clear(&result);

1356 mp_clear(&tmp);

1357 return res;

1358 }

1359

1360 /* scalar_from_mp_int sets out_scalar=n, where n < the group order. */

1361 static void scalar_from_mp_int(u8 out_scalar[32], const mp_int *n)

1362 {

1363 /* We require that \|n\| is less than the order of the group and therefore it

1364 * will fit into \|out_scalar\|. However, these is a timing side-channel here

1365 * that we cannot avoid: if \|n\| is sufficiently small it may be one or more

1366 * words too short and we'll copy less data.

1367 */

1368 PORT_Assert(MP_USED(n) * sizeof(mp_digit) <= 32);

1369 memset(out_scalar, 0, 32);

1370 #ifdef IS_LITTLE_ENDIAN

1371 memcpy(out_scalar, MP_DIGITS(n), MP_USED(n) * sizeof(mp_digit));

1372 #else

1373 {

1374 mp_size i;

1375 mp_digit swapped[MP_DIGITS_IN_256_BITS];

1376 for (i = 0; i < MP_USED(n); i++) {

1377 swapped[i] = BYTESWAP_MP_DIGIT_TO_LE(MP_DIGIT(n, i));

1378 }

1379 memcpy(out_scalar, swapped, MP_USED(n) * sizeof(mp_digit));

1380 }

1381 #endif

1382 }

1383

1384 /* ec_GFp_nistp256_base_point_mul sets {out_x,out_y} = nG, where n is < the

1385 * order of the group.

1386 */

1387 static mp_err ec_GFp_nistp256_base_point_mul(const mp_int *n,

1388 mp_int out_x, mp_int out_y,

1389 const ECGroup *group)

1390 {

1391 u8 scalar[32];

1392 felem x, y, z, x_affine, y_affine;

1393 mp_err res;

1394

1395 /* FIXME(agl): test that n < order. */

1396

1397 scalar_from_mp_int(scalar, n);

1398 scalar_base_mult(x, y, z, scalar);

1399 point_to_affine(x_affine, y_affine, x, y, z);

1400 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1401 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1402

1403 CLEANUP:

1404 return res;

1405 }

1406

1407 /* ec_GFp_nistp256_point_mul sets {out_x,out_y} = n*{in_x,in_y}, where n is <

1408 * the order of the group.

1409 */

1410 static mp_err ec_GFp_nistp256_point_mul(const mp_int *n,

1411 const mp_int in_x, const mp_int in_y,

1412 mp_int out_x, mp_int out_y,

1413 const ECGroup *group)

1414 {

1415 u8 scalar[32];

1416 felem x, y, z, x_affine, y_affine, px, py;

1417 mp_err res;

1418

1419 scalar_from_mp_int(scalar, n);

1420

1421 MP_CHECKOK(to_montgomery(px, in_x, group));

1422 MP_CHECKOK(to_montgomery(py, in_y, group));

1423

1424 scalar_mult(x, y, z, px, py, scalar);

1425 point_to_affine(x_affine, y_affine, x, y, z);

1426 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1427 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1428

1429 CLEANUP:

1430 return res;

1431 }

1432

1433 /* ec_GFp_nistp256_point_mul_vartime sets {out_x,out_y} = n1*G +

1434 * n2*{in_x,in_y}, where n1 and n2 are < the order of the group.

1435 *

1436 * As indicated by the name, this function operates in variable time. This

1437 * is safe because it's used for signature validation which doesn't deal

1438 * with secrets.

1439 */

1440 static mp_err ec_GFp_nistp256_points_mul_vartime(

1441 const mp_int n1, const mp_int n2,

1442 const mp_int in_x, const mp_int in_y,

1443 mp_int out_x, mp_int out_y,

1444 const ECGroup *group)

1445 {

1446 u8 scalar1[32], scalar2[32];

1447 felem x1, y1, z1, x2, y2, z2, x_affine, y_affine, px, py;

1448 mp_err res = MP_OKAY;

1449

1450 /* If n2 == NULL, this is just a base-point multiplication. */

1451 if (n2 == NULL) {

1452 return ec_GFp_nistp256_base_point_mul(n1, out_x, out_y, group);

1453 }

1454

1455 /* If n1 == nULL, this is just an arbitary-point multiplication. */

1456 if (n1 == NULL) {

1457 return ec_GFp_nistp256_point_mul(n2, in_x, in_y, out_x, out_y, group);

1458 }

1459

1460 /* If both scalars are zero, then the result is the point at infinity. */

1461 if (mp_cmp_z(n1) == 0 && mp_cmp_z(n2) == 0) {

1462 mp_zero(out_x);

1463 mp_zero(out_y);

1464 return res;

1465 }

1466

1467 scalar_from_mp_int(scalar1, n1);

1468 scalar_from_mp_int(scalar2, n2);

1469

1470 MP_CHECKOK(to_montgomery(px, in_x, group));

1471 MP_CHECKOK(to_montgomery(py, in_y, group));

1472 scalar_base_mult(x1, y1, z1, scalar1);

1473 scalar_mult(x2, y2, z2, px, py, scalar2);

1474

1475 if (mp_cmp_z(n2) == 0) {

1476 /* If n2 == 0, then {x2,y2,z2} is zero and the result is just

1477 * {x1,y1,z1}. */

1478 } else if (mp_cmp_z(n1) == 0) {

1479 /* If n1 == 0, then {x1,y1,z1} is zero and the result is just

1480 * {x2,y2,z2}. */

1481 memcpy(x1, x2, sizeof(x2));

1482 memcpy(y1, y2, sizeof(y2));

1483 memcpy(z1, z2, sizeof(z2));

1484 } else {

1485 /* This function handles the case where {x1,y1,z1} == {x2,y2,z2}. */

1486 point_add_or_double_vartime(x1, y1, z1, x1, y1, z1, x2, y2, z2);

1487 }

1488

1489 point_to_affine(x_affine, y_affine, x1, y1, z1);

1490 MP_CHECKOK(from_montgomery(out_x, x_affine, group));

1491 MP_CHECKOK(from_montgomery(out_y, y_affine, group));

1492

1493 CLEANUP:

1494 return res;

1495 }

1496

1497 /* Wire in fast point multiplication for named curves. */

1498 mp_err ec_group_set_gfp256_32(ECGroup *group, ECCurveName name)

1499 {

1500 if (name == ECCurve_NIST_P256) {

1501 group->base_point_mul = &ec_GFp_nistp256_base_point_mul;

1502 group->point_mul = &ec_GFp_nistp256_point_mul;

1503 group->points_mul = &ec_GFp_nistp256_points_mul_vartime;

1504 }

1505 return MP_OKAY;

1506 }

OLD	NEW

« no previous file with comments | « mozilla/security/nss/lib/freebl/ecl/ecp.h ('k') | mozilla/security/nss/lib/freebl/ecl/ecp_aff.c » ('j') | no next file with comments »