ecc.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719
  1. /*
  2. * Copyright (c) 2013, 2014 Kenneth MacKay. All rights reserved.
  3. * Copyright (c) 2019 Vitaly Chikunov <vt@altlinux.org>
  4. *
  5. * Redistribution and use in source and binary forms, with or without
  6. * modification, are permitted provided that the following conditions are
  7. * met:
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in the
  12. * documentation and/or other materials provided with the distribution.
  13. *
  14. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  15. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  16. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  17. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  18. * HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  19. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  20. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  21. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  22. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  23. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  24. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  25. */
  26. #include <crypto/ecc_curve.h>
  27. #include <linux/module.h>
  28. #include <linux/random.h>
  29. #include <linux/slab.h>
  30. #include <linux/swab.h>
  31. #include <linux/fips.h>
  32. #include <crypto/ecdh.h>
  33. #include <crypto/rng.h>
  34. #include <crypto/internal/ecc.h>
  35. #include <linux/unaligned.h>
  36. #include <linux/ratelimit.h>
  37. #include "ecc_curve_defs.h"
  38. typedef struct {
  39. u64 m_low;
  40. u64 m_high;
  41. } uint128_t;
  42. /* Returns curv25519 curve param */
  43. const struct ecc_curve *ecc_get_curve25519(void)
  44. {
  45. return &ecc_25519;
  46. }
  47. EXPORT_SYMBOL(ecc_get_curve25519);
  48. const struct ecc_curve *ecc_get_curve(unsigned int curve_id)
  49. {
  50. switch (curve_id) {
  51. /* In FIPS mode only allow P256 and higher */
  52. case ECC_CURVE_NIST_P192:
  53. return fips_enabled ? NULL : &nist_p192;
  54. case ECC_CURVE_NIST_P256:
  55. return &nist_p256;
  56. case ECC_CURVE_NIST_P384:
  57. return &nist_p384;
  58. case ECC_CURVE_NIST_P521:
  59. return &nist_p521;
  60. default:
  61. return NULL;
  62. }
  63. }
  64. EXPORT_SYMBOL(ecc_get_curve);
  65. void ecc_digits_from_bytes(const u8 *in, unsigned int nbytes,
  66. u64 *out, unsigned int ndigits)
  67. {
  68. int diff = ndigits - DIV_ROUND_UP_POW2(nbytes, sizeof(u64));
  69. unsigned int o = nbytes & 7;
  70. __be64 msd = 0;
  71. /* diff > 0: not enough input bytes: set most significant digits to 0 */
  72. if (diff > 0) {
  73. ndigits -= diff;
  74. memset(&out[ndigits], 0, diff * sizeof(u64));
  75. }
  76. if (o) {
  77. memcpy((u8 *)&msd + sizeof(msd) - o, in, o);
  78. out[--ndigits] = be64_to_cpu(msd);
  79. in += o;
  80. }
  81. ecc_swap_digits(in, out, ndigits);
  82. }
  83. EXPORT_SYMBOL(ecc_digits_from_bytes);
  84. static u64 *ecc_alloc_digits_space(unsigned int ndigits)
  85. {
  86. size_t len = ndigits * sizeof(u64);
  87. if (!len)
  88. return NULL;
  89. return kmalloc(len, GFP_KERNEL);
  90. }
  91. static void ecc_free_digits_space(u64 *space)
  92. {
  93. kfree_sensitive(space);
  94. }
  95. struct ecc_point *ecc_alloc_point(unsigned int ndigits)
  96. {
  97. struct ecc_point *p = kmalloc(sizeof(*p), GFP_KERNEL);
  98. if (!p)
  99. return NULL;
  100. p->x = ecc_alloc_digits_space(ndigits);
  101. if (!p->x)
  102. goto err_alloc_x;
  103. p->y = ecc_alloc_digits_space(ndigits);
  104. if (!p->y)
  105. goto err_alloc_y;
  106. p->ndigits = ndigits;
  107. return p;
  108. err_alloc_y:
  109. ecc_free_digits_space(p->x);
  110. err_alloc_x:
  111. kfree(p);
  112. return NULL;
  113. }
  114. EXPORT_SYMBOL(ecc_alloc_point);
  115. void ecc_free_point(struct ecc_point *p)
  116. {
  117. if (!p)
  118. return;
  119. kfree_sensitive(p->x);
  120. kfree_sensitive(p->y);
  121. kfree_sensitive(p);
  122. }
  123. EXPORT_SYMBOL(ecc_free_point);
  124. static void vli_clear(u64 *vli, unsigned int ndigits)
  125. {
  126. int i;
  127. for (i = 0; i < ndigits; i++)
  128. vli[i] = 0;
  129. }
  130. /* Returns true if vli == 0, false otherwise. */
  131. bool vli_is_zero(const u64 *vli, unsigned int ndigits)
  132. {
  133. int i;
  134. for (i = 0; i < ndigits; i++) {
  135. if (vli[i])
  136. return false;
  137. }
  138. return true;
  139. }
  140. EXPORT_SYMBOL(vli_is_zero);
  141. /* Returns nonzero if bit of vli is set. */
  142. static u64 vli_test_bit(const u64 *vli, unsigned int bit)
  143. {
  144. return (vli[bit / 64] & ((u64)1 << (bit % 64)));
  145. }
  146. static bool vli_is_negative(const u64 *vli, unsigned int ndigits)
  147. {
  148. return vli_test_bit(vli, ndigits * 64 - 1);
  149. }
  150. /* Counts the number of 64-bit "digits" in vli. */
  151. static unsigned int vli_num_digits(const u64 *vli, unsigned int ndigits)
  152. {
  153. int i;
  154. /* Search from the end until we find a non-zero digit.
  155. * We do it in reverse because we expect that most digits will
  156. * be nonzero.
  157. */
  158. for (i = ndigits - 1; i >= 0 && vli[i] == 0; i--);
  159. return (i + 1);
  160. }
  161. /* Counts the number of bits required for vli. */
  162. unsigned int vli_num_bits(const u64 *vli, unsigned int ndigits)
  163. {
  164. unsigned int i, num_digits;
  165. u64 digit;
  166. num_digits = vli_num_digits(vli, ndigits);
  167. if (num_digits == 0)
  168. return 0;
  169. digit = vli[num_digits - 1];
  170. for (i = 0; digit; i++)
  171. digit >>= 1;
  172. return ((num_digits - 1) * 64 + i);
  173. }
  174. EXPORT_SYMBOL(vli_num_bits);
  175. /* Set dest from unaligned bit string src. */
  176. void vli_from_be64(u64 *dest, const void *src, unsigned int ndigits)
  177. {
  178. int i;
  179. const u64 *from = src;
  180. for (i = 0; i < ndigits; i++)
  181. dest[i] = get_unaligned_be64(&from[ndigits - 1 - i]);
  182. }
  183. EXPORT_SYMBOL(vli_from_be64);
  184. void vli_from_le64(u64 *dest, const void *src, unsigned int ndigits)
  185. {
  186. int i;
  187. const u64 *from = src;
  188. for (i = 0; i < ndigits; i++)
  189. dest[i] = get_unaligned_le64(&from[i]);
  190. }
  191. EXPORT_SYMBOL(vli_from_le64);
  192. /* Sets dest = src. */
  193. static void vli_set(u64 *dest, const u64 *src, unsigned int ndigits)
  194. {
  195. int i;
  196. for (i = 0; i < ndigits; i++)
  197. dest[i] = src[i];
  198. }
  199. /* Returns sign of left - right. */
  200. int vli_cmp(const u64 *left, const u64 *right, unsigned int ndigits)
  201. {
  202. int i;
  203. for (i = ndigits - 1; i >= 0; i--) {
  204. if (left[i] > right[i])
  205. return 1;
  206. else if (left[i] < right[i])
  207. return -1;
  208. }
  209. return 0;
  210. }
  211. EXPORT_SYMBOL(vli_cmp);
  212. /* Computes result = in << c, returning carry. Can modify in place
  213. * (if result == in). 0 < shift < 64.
  214. */
  215. static u64 vli_lshift(u64 *result, const u64 *in, unsigned int shift,
  216. unsigned int ndigits)
  217. {
  218. u64 carry = 0;
  219. int i;
  220. for (i = 0; i < ndigits; i++) {
  221. u64 temp = in[i];
  222. result[i] = (temp << shift) | carry;
  223. carry = temp >> (64 - shift);
  224. }
  225. return carry;
  226. }
  227. /* Computes vli = vli >> 1. */
  228. static void vli_rshift1(u64 *vli, unsigned int ndigits)
  229. {
  230. u64 *end = vli;
  231. u64 carry = 0;
  232. vli += ndigits;
  233. while (vli-- > end) {
  234. u64 temp = *vli;
  235. *vli = (temp >> 1) | carry;
  236. carry = temp << 63;
  237. }
  238. }
  239. /* Computes result = left + right, returning carry. Can modify in place. */
  240. static u64 vli_add(u64 *result, const u64 *left, const u64 *right,
  241. unsigned int ndigits)
  242. {
  243. u64 carry = 0;
  244. int i;
  245. for (i = 0; i < ndigits; i++) {
  246. u64 sum;
  247. sum = left[i] + right[i] + carry;
  248. if (sum != left[i])
  249. carry = (sum < left[i]);
  250. result[i] = sum;
  251. }
  252. return carry;
  253. }
  254. /* Computes result = left + right, returning carry. Can modify in place. */
  255. static u64 vli_uadd(u64 *result, const u64 *left, u64 right,
  256. unsigned int ndigits)
  257. {
  258. u64 carry = right;
  259. int i;
  260. for (i = 0; i < ndigits; i++) {
  261. u64 sum;
  262. sum = left[i] + carry;
  263. if (sum != left[i])
  264. carry = (sum < left[i]);
  265. else
  266. carry = !!carry;
  267. result[i] = sum;
  268. }
  269. return carry;
  270. }
  271. /* Computes result = left - right, returning borrow. Can modify in place. */
  272. u64 vli_sub(u64 *result, const u64 *left, const u64 *right,
  273. unsigned int ndigits)
  274. {
  275. u64 borrow = 0;
  276. int i;
  277. for (i = 0; i < ndigits; i++) {
  278. u64 diff;
  279. diff = left[i] - right[i] - borrow;
  280. if (diff != left[i])
  281. borrow = (diff > left[i]);
  282. result[i] = diff;
  283. }
  284. return borrow;
  285. }
  286. EXPORT_SYMBOL(vli_sub);
  287. /* Computes result = left - right, returning borrow. Can modify in place. */
  288. static u64 vli_usub(u64 *result, const u64 *left, u64 right,
  289. unsigned int ndigits)
  290. {
  291. u64 borrow = right;
  292. int i;
  293. for (i = 0; i < ndigits; i++) {
  294. u64 diff;
  295. diff = left[i] - borrow;
  296. if (diff != left[i])
  297. borrow = (diff > left[i]);
  298. result[i] = diff;
  299. }
  300. return borrow;
  301. }
  302. static uint128_t mul_64_64(u64 left, u64 right)
  303. {
  304. uint128_t result;
  305. #if defined(CONFIG_ARCH_SUPPORTS_INT128)
  306. unsigned __int128 m = (unsigned __int128)left * right;
  307. result.m_low = m;
  308. result.m_high = m >> 64;
  309. #else
  310. u64 a0 = left & 0xffffffffull;
  311. u64 a1 = left >> 32;
  312. u64 b0 = right & 0xffffffffull;
  313. u64 b1 = right >> 32;
  314. u64 m0 = a0 * b0;
  315. u64 m1 = a0 * b1;
  316. u64 m2 = a1 * b0;
  317. u64 m3 = a1 * b1;
  318. m2 += (m0 >> 32);
  319. m2 += m1;
  320. /* Overflow */
  321. if (m2 < m1)
  322. m3 += 0x100000000ull;
  323. result.m_low = (m0 & 0xffffffffull) | (m2 << 32);
  324. result.m_high = m3 + (m2 >> 32);
  325. #endif
  326. return result;
  327. }
  328. static uint128_t add_128_128(uint128_t a, uint128_t b)
  329. {
  330. uint128_t result;
  331. result.m_low = a.m_low + b.m_low;
  332. result.m_high = a.m_high + b.m_high + (result.m_low < a.m_low);
  333. return result;
  334. }
  335. static void vli_mult(u64 *result, const u64 *left, const u64 *right,
  336. unsigned int ndigits)
  337. {
  338. uint128_t r01 = { 0, 0 };
  339. u64 r2 = 0;
  340. unsigned int i, k;
  341. /* Compute each digit of result in sequence, maintaining the
  342. * carries.
  343. */
  344. for (k = 0; k < ndigits * 2 - 1; k++) {
  345. unsigned int min;
  346. if (k < ndigits)
  347. min = 0;
  348. else
  349. min = (k + 1) - ndigits;
  350. for (i = min; i <= k && i < ndigits; i++) {
  351. uint128_t product;
  352. product = mul_64_64(left[i], right[k - i]);
  353. r01 = add_128_128(r01, product);
  354. r2 += (r01.m_high < product.m_high);
  355. }
  356. result[k] = r01.m_low;
  357. r01.m_low = r01.m_high;
  358. r01.m_high = r2;
  359. r2 = 0;
  360. }
  361. result[ndigits * 2 - 1] = r01.m_low;
  362. }
  363. /* Compute product = left * right, for a small right value. */
  364. static void vli_umult(u64 *result, const u64 *left, u32 right,
  365. unsigned int ndigits)
  366. {
  367. uint128_t r01 = { 0 };
  368. unsigned int k;
  369. for (k = 0; k < ndigits; k++) {
  370. uint128_t product;
  371. product = mul_64_64(left[k], right);
  372. r01 = add_128_128(r01, product);
  373. /* no carry */
  374. result[k] = r01.m_low;
  375. r01.m_low = r01.m_high;
  376. r01.m_high = 0;
  377. }
  378. result[k] = r01.m_low;
  379. for (++k; k < ndigits * 2; k++)
  380. result[k] = 0;
  381. }
  382. static void vli_square(u64 *result, const u64 *left, unsigned int ndigits)
  383. {
  384. uint128_t r01 = { 0, 0 };
  385. u64 r2 = 0;
  386. int i, k;
  387. for (k = 0; k < ndigits * 2 - 1; k++) {
  388. unsigned int min;
  389. if (k < ndigits)
  390. min = 0;
  391. else
  392. min = (k + 1) - ndigits;
  393. for (i = min; i <= k && i <= k - i; i++) {
  394. uint128_t product;
  395. product = mul_64_64(left[i], left[k - i]);
  396. if (i < k - i) {
  397. r2 += product.m_high >> 63;
  398. product.m_high = (product.m_high << 1) |
  399. (product.m_low >> 63);
  400. product.m_low <<= 1;
  401. }
  402. r01 = add_128_128(r01, product);
  403. r2 += (r01.m_high < product.m_high);
  404. }
  405. result[k] = r01.m_low;
  406. r01.m_low = r01.m_high;
  407. r01.m_high = r2;
  408. r2 = 0;
  409. }
  410. result[ndigits * 2 - 1] = r01.m_low;
  411. }
  412. /* Computes result = (left + right) % mod.
  413. * Assumes that left < mod and right < mod, result != mod.
  414. */
  415. static void vli_mod_add(u64 *result, const u64 *left, const u64 *right,
  416. const u64 *mod, unsigned int ndigits)
  417. {
  418. u64 carry;
  419. carry = vli_add(result, left, right, ndigits);
  420. /* result > mod (result = mod + remainder), so subtract mod to
  421. * get remainder.
  422. */
  423. if (carry || vli_cmp(result, mod, ndigits) >= 0)
  424. vli_sub(result, result, mod, ndigits);
  425. }
  426. /* Computes result = (left - right) % mod.
  427. * Assumes that left < mod and right < mod, result != mod.
  428. */
  429. static void vli_mod_sub(u64 *result, const u64 *left, const u64 *right,
  430. const u64 *mod, unsigned int ndigits)
  431. {
  432. u64 borrow = vli_sub(result, left, right, ndigits);
  433. /* In this case, p_result == -diff == (max int) - diff.
  434. * Since -x % d == d - x, we can get the correct result from
  435. * result + mod (with overflow).
  436. */
  437. if (borrow)
  438. vli_add(result, result, mod, ndigits);
  439. }
  440. /*
  441. * Computes result = product % mod
  442. * for special form moduli: p = 2^k-c, for small c (note the minus sign)
  443. *
  444. * References:
  445. * R. Crandall, C. Pomerance. Prime Numbers: A Computational Perspective.
  446. * 9 Fast Algorithms for Large-Integer Arithmetic. 9.2.3 Moduli of special form
  447. * Algorithm 9.2.13 (Fast mod operation for special-form moduli).
  448. */
  449. static void vli_mmod_special(u64 *result, const u64 *product,
  450. const u64 *mod, unsigned int ndigits)
  451. {
  452. u64 c = -mod[0];
  453. u64 t[ECC_MAX_DIGITS * 2];
  454. u64 r[ECC_MAX_DIGITS * 2];
  455. vli_set(r, product, ndigits * 2);
  456. while (!vli_is_zero(r + ndigits, ndigits)) {
  457. vli_umult(t, r + ndigits, c, ndigits);
  458. vli_clear(r + ndigits, ndigits);
  459. vli_add(r, r, t, ndigits * 2);
  460. }
  461. vli_set(t, mod, ndigits);
  462. vli_clear(t + ndigits, ndigits);
  463. while (vli_cmp(r, t, ndigits * 2) >= 0)
  464. vli_sub(r, r, t, ndigits * 2);
  465. vli_set(result, r, ndigits);
  466. }
  467. /*
  468. * Computes result = product % mod
  469. * for special form moduli: p = 2^{k-1}+c, for small c (note the plus sign)
  470. * where k-1 does not fit into qword boundary by -1 bit (such as 255).
  471. * References (loosely based on):
  472. * A. Menezes, P. van Oorschot, S. Vanstone. Handbook of Applied Cryptography.
  473. * 14.3.4 Reduction methods for moduli of special form. Algorithm 14.47.
  474. * URL: http://cacr.uwaterloo.ca/hac/about/chap14.pdf
  475. *
  476. * H. Cohen, G. Frey, R. Avanzi, C. Doche, T. Lange, K. Nguyen, F. Vercauteren.
  477. * Handbook of Elliptic and Hyperelliptic Curve Cryptography.
  478. * Algorithm 10.25 Fast reduction for special form moduli
  479. */
  480. static void vli_mmod_special2(u64 *result, const u64 *product,
  481. const u64 *mod, unsigned int ndigits)
  482. {
  483. u64 c2 = mod[0] * 2;
  484. u64 q[ECC_MAX_DIGITS];
  485. u64 r[ECC_MAX_DIGITS * 2];
  486. u64 m[ECC_MAX_DIGITS * 2]; /* expanded mod */
  487. int carry; /* last bit that doesn't fit into q */
  488. int i;
  489. vli_set(m, mod, ndigits);
  490. vli_clear(m + ndigits, ndigits);
  491. vli_set(r, product, ndigits);
  492. /* q and carry are top bits */
  493. vli_set(q, product + ndigits, ndigits);
  494. vli_clear(r + ndigits, ndigits);
  495. carry = vli_is_negative(r, ndigits);
  496. if (carry)
  497. r[ndigits - 1] &= (1ull << 63) - 1;
  498. for (i = 1; carry || !vli_is_zero(q, ndigits); i++) {
  499. u64 qc[ECC_MAX_DIGITS * 2];
  500. vli_umult(qc, q, c2, ndigits);
  501. if (carry)
  502. vli_uadd(qc, qc, mod[0], ndigits * 2);
  503. vli_set(q, qc + ndigits, ndigits);
  504. vli_clear(qc + ndigits, ndigits);
  505. carry = vli_is_negative(qc, ndigits);
  506. if (carry)
  507. qc[ndigits - 1] &= (1ull << 63) - 1;
  508. if (i & 1)
  509. vli_sub(r, r, qc, ndigits * 2);
  510. else
  511. vli_add(r, r, qc, ndigits * 2);
  512. }
  513. while (vli_is_negative(r, ndigits * 2))
  514. vli_add(r, r, m, ndigits * 2);
  515. while (vli_cmp(r, m, ndigits * 2) >= 0)
  516. vli_sub(r, r, m, ndigits * 2);
  517. vli_set(result, r, ndigits);
  518. }
  519. /*
  520. * Computes result = product % mod, where product is 2N words long.
  521. * Reference: Ken MacKay's micro-ecc.
  522. * Currently only designed to work for curve_p or curve_n.
  523. */
  524. static void vli_mmod_slow(u64 *result, u64 *product, const u64 *mod,
  525. unsigned int ndigits)
  526. {
  527. u64 mod_m[2 * ECC_MAX_DIGITS];
  528. u64 tmp[2 * ECC_MAX_DIGITS];
  529. u64 *v[2] = { tmp, product };
  530. u64 carry = 0;
  531. unsigned int i;
  532. /* Shift mod so its highest set bit is at the maximum position. */
  533. int shift = (ndigits * 2 * 64) - vli_num_bits(mod, ndigits);
  534. int word_shift = shift / 64;
  535. int bit_shift = shift % 64;
  536. vli_clear(mod_m, word_shift);
  537. if (bit_shift > 0) {
  538. for (i = 0; i < ndigits; ++i) {
  539. mod_m[word_shift + i] = (mod[i] << bit_shift) | carry;
  540. carry = mod[i] >> (64 - bit_shift);
  541. }
  542. } else
  543. vli_set(mod_m + word_shift, mod, ndigits);
  544. for (i = 1; shift >= 0; --shift) {
  545. u64 borrow = 0;
  546. unsigned int j;
  547. for (j = 0; j < ndigits * 2; ++j) {
  548. u64 diff = v[i][j] - mod_m[j] - borrow;
  549. if (diff != v[i][j])
  550. borrow = (diff > v[i][j]);
  551. v[1 - i][j] = diff;
  552. }
  553. i = !(i ^ borrow); /* Swap the index if there was no borrow */
  554. vli_rshift1(mod_m, ndigits);
  555. mod_m[ndigits - 1] |= mod_m[ndigits] << (64 - 1);
  556. vli_rshift1(mod_m + ndigits, ndigits);
  557. }
  558. vli_set(result, v[i], ndigits);
  559. }
  560. /* Computes result = product % mod using Barrett's reduction with precomputed
  561. * value mu appended to the mod after ndigits, mu = (2^{2w} / mod) and have
  562. * length ndigits + 1, where mu * (2^w - 1) should not overflow ndigits
  563. * boundary.
  564. *
  565. * Reference:
  566. * R. Brent, P. Zimmermann. Modern Computer Arithmetic. 2010.
  567. * 2.4.1 Barrett's algorithm. Algorithm 2.5.
  568. */
  569. static void vli_mmod_barrett(u64 *result, u64 *product, const u64 *mod,
  570. unsigned int ndigits)
  571. {
  572. u64 q[ECC_MAX_DIGITS * 2];
  573. u64 r[ECC_MAX_DIGITS * 2];
  574. const u64 *mu = mod + ndigits;
  575. vli_mult(q, product + ndigits, mu, ndigits);
  576. if (mu[ndigits])
  577. vli_add(q + ndigits, q + ndigits, product + ndigits, ndigits);
  578. vli_mult(r, mod, q + ndigits, ndigits);
  579. vli_sub(r, product, r, ndigits * 2);
  580. while (!vli_is_zero(r + ndigits, ndigits) ||
  581. vli_cmp(r, mod, ndigits) != -1) {
  582. u64 carry;
  583. carry = vli_sub(r, r, mod, ndigits);
  584. vli_usub(r + ndigits, r + ndigits, carry, ndigits);
  585. }
  586. vli_set(result, r, ndigits);
  587. }
  588. /* Computes p_result = p_product % curve_p.
  589. * See algorithm 5 and 6 from
  590. * http://www.isys.uni-klu.ac.at/PDF/2001-0126-MT.pdf
  591. */
  592. static void vli_mmod_fast_192(u64 *result, const u64 *product,
  593. const u64 *curve_prime, u64 *tmp)
  594. {
  595. const unsigned int ndigits = ECC_CURVE_NIST_P192_DIGITS;
  596. int carry;
  597. vli_set(result, product, ndigits);
  598. vli_set(tmp, &product[3], ndigits);
  599. carry = vli_add(result, result, tmp, ndigits);
  600. tmp[0] = 0;
  601. tmp[1] = product[3];
  602. tmp[2] = product[4];
  603. carry += vli_add(result, result, tmp, ndigits);
  604. tmp[0] = tmp[1] = product[5];
  605. tmp[2] = 0;
  606. carry += vli_add(result, result, tmp, ndigits);
  607. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  608. carry -= vli_sub(result, result, curve_prime, ndigits);
  609. }
  610. /* Computes result = product % curve_prime
  611. * from http://www.nsa.gov/ia/_files/nist-routines.pdf
  612. */
  613. static void vli_mmod_fast_256(u64 *result, const u64 *product,
  614. const u64 *curve_prime, u64 *tmp)
  615. {
  616. int carry;
  617. const unsigned int ndigits = ECC_CURVE_NIST_P256_DIGITS;
  618. /* t */
  619. vli_set(result, product, ndigits);
  620. /* s1 */
  621. tmp[0] = 0;
  622. tmp[1] = product[5] & 0xffffffff00000000ull;
  623. tmp[2] = product[6];
  624. tmp[3] = product[7];
  625. carry = vli_lshift(tmp, tmp, 1, ndigits);
  626. carry += vli_add(result, result, tmp, ndigits);
  627. /* s2 */
  628. tmp[1] = product[6] << 32;
  629. tmp[2] = (product[6] >> 32) | (product[7] << 32);
  630. tmp[3] = product[7] >> 32;
  631. carry += vli_lshift(tmp, tmp, 1, ndigits);
  632. carry += vli_add(result, result, tmp, ndigits);
  633. /* s3 */
  634. tmp[0] = product[4];
  635. tmp[1] = product[5] & 0xffffffff;
  636. tmp[2] = 0;
  637. tmp[3] = product[7];
  638. carry += vli_add(result, result, tmp, ndigits);
  639. /* s4 */
  640. tmp[0] = (product[4] >> 32) | (product[5] << 32);
  641. tmp[1] = (product[5] >> 32) | (product[6] & 0xffffffff00000000ull);
  642. tmp[2] = product[7];
  643. tmp[3] = (product[6] >> 32) | (product[4] << 32);
  644. carry += vli_add(result, result, tmp, ndigits);
  645. /* d1 */
  646. tmp[0] = (product[5] >> 32) | (product[6] << 32);
  647. tmp[1] = (product[6] >> 32);
  648. tmp[2] = 0;
  649. tmp[3] = (product[4] & 0xffffffff) | (product[5] << 32);
  650. carry -= vli_sub(result, result, tmp, ndigits);
  651. /* d2 */
  652. tmp[0] = product[6];
  653. tmp[1] = product[7];
  654. tmp[2] = 0;
  655. tmp[3] = (product[4] >> 32) | (product[5] & 0xffffffff00000000ull);
  656. carry -= vli_sub(result, result, tmp, ndigits);
  657. /* d3 */
  658. tmp[0] = (product[6] >> 32) | (product[7] << 32);
  659. tmp[1] = (product[7] >> 32) | (product[4] << 32);
  660. tmp[2] = (product[4] >> 32) | (product[5] << 32);
  661. tmp[3] = (product[6] << 32);
  662. carry -= vli_sub(result, result, tmp, ndigits);
  663. /* d4 */
  664. tmp[0] = product[7];
  665. tmp[1] = product[4] & 0xffffffff00000000ull;
  666. tmp[2] = product[5];
  667. tmp[3] = product[6] & 0xffffffff00000000ull;
  668. carry -= vli_sub(result, result, tmp, ndigits);
  669. if (carry < 0) {
  670. do {
  671. carry += vli_add(result, result, curve_prime, ndigits);
  672. } while (carry < 0);
  673. } else {
  674. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  675. carry -= vli_sub(result, result, curve_prime, ndigits);
  676. }
  677. }
  678. #define SL32OR32(x32, y32) (((u64)x32 << 32) | y32)
  679. #define AND64H(x64) (x64 & 0xffFFffFF00000000ull)
  680. #define AND64L(x64) (x64 & 0x00000000ffFFffFFull)
  681. /* Computes result = product % curve_prime
  682. * from "Mathematical routines for the NIST prime elliptic curves"
  683. */
  684. static void vli_mmod_fast_384(u64 *result, const u64 *product,
  685. const u64 *curve_prime, u64 *tmp)
  686. {
  687. int carry;
  688. const unsigned int ndigits = ECC_CURVE_NIST_P384_DIGITS;
  689. /* t */
  690. vli_set(result, product, ndigits);
  691. /* s1 */
  692. tmp[0] = 0; // 0 || 0
  693. tmp[1] = 0; // 0 || 0
  694. tmp[2] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  695. tmp[3] = product[11]>>32; // 0 ||a23
  696. tmp[4] = 0; // 0 || 0
  697. tmp[5] = 0; // 0 || 0
  698. carry = vli_lshift(tmp, tmp, 1, ndigits);
  699. carry += vli_add(result, result, tmp, ndigits);
  700. /* s2 */
  701. tmp[0] = product[6]; //a13||a12
  702. tmp[1] = product[7]; //a15||a14
  703. tmp[2] = product[8]; //a17||a16
  704. tmp[3] = product[9]; //a19||a18
  705. tmp[4] = product[10]; //a21||a20
  706. tmp[5] = product[11]; //a23||a22
  707. carry += vli_add(result, result, tmp, ndigits);
  708. /* s3 */
  709. tmp[0] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  710. tmp[1] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  711. tmp[2] = SL32OR32(product[7], (product[6])>>32); //a14||a13
  712. tmp[3] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  713. tmp[4] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  714. tmp[5] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  715. carry += vli_add(result, result, tmp, ndigits);
  716. /* s4 */
  717. tmp[0] = AND64H(product[11]); //a23|| 0
  718. tmp[1] = (product[10]<<32); //a20|| 0
  719. tmp[2] = product[6]; //a13||a12
  720. tmp[3] = product[7]; //a15||a14
  721. tmp[4] = product[8]; //a17||a16
  722. tmp[5] = product[9]; //a19||a18
  723. carry += vli_add(result, result, tmp, ndigits);
  724. /* s5 */
  725. tmp[0] = 0; // 0|| 0
  726. tmp[1] = 0; // 0|| 0
  727. tmp[2] = product[10]; //a21||a20
  728. tmp[3] = product[11]; //a23||a22
  729. tmp[4] = 0; // 0|| 0
  730. tmp[5] = 0; // 0|| 0
  731. carry += vli_add(result, result, tmp, ndigits);
  732. /* s6 */
  733. tmp[0] = AND64L(product[10]); // 0 ||a20
  734. tmp[1] = AND64H(product[10]); //a21|| 0
  735. tmp[2] = product[11]; //a23||a22
  736. tmp[3] = 0; // 0 || 0
  737. tmp[4] = 0; // 0 || 0
  738. tmp[5] = 0; // 0 || 0
  739. carry += vli_add(result, result, tmp, ndigits);
  740. /* d1 */
  741. tmp[0] = SL32OR32(product[6], (product[11]>>32)); //a12||a23
  742. tmp[1] = SL32OR32(product[7], (product[6]>>32)); //a14||a13
  743. tmp[2] = SL32OR32(product[8], (product[7]>>32)); //a16||a15
  744. tmp[3] = SL32OR32(product[9], (product[8]>>32)); //a18||a17
  745. tmp[4] = SL32OR32(product[10], (product[9]>>32)); //a20||a19
  746. tmp[5] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  747. carry -= vli_sub(result, result, tmp, ndigits);
  748. /* d2 */
  749. tmp[0] = (product[10]<<32); //a20|| 0
  750. tmp[1] = SL32OR32(product[11], (product[10]>>32)); //a22||a21
  751. tmp[2] = (product[11]>>32); // 0 ||a23
  752. tmp[3] = 0; // 0 || 0
  753. tmp[4] = 0; // 0 || 0
  754. tmp[5] = 0; // 0 || 0
  755. carry -= vli_sub(result, result, tmp, ndigits);
  756. /* d3 */
  757. tmp[0] = 0; // 0 || 0
  758. tmp[1] = AND64H(product[11]); //a23|| 0
  759. tmp[2] = product[11]>>32; // 0 ||a23
  760. tmp[3] = 0; // 0 || 0
  761. tmp[4] = 0; // 0 || 0
  762. tmp[5] = 0; // 0 || 0
  763. carry -= vli_sub(result, result, tmp, ndigits);
  764. if (carry < 0) {
  765. do {
  766. carry += vli_add(result, result, curve_prime, ndigits);
  767. } while (carry < 0);
  768. } else {
  769. while (carry || vli_cmp(curve_prime, result, ndigits) != 1)
  770. carry -= vli_sub(result, result, curve_prime, ndigits);
  771. }
  772. }
  773. #undef SL32OR32
  774. #undef AND64H
  775. #undef AND64L
  776. /*
  777. * Computes result = product % curve_prime
  778. * from "Recommendations for Discrete Logarithm-Based Cryptography:
  779. * Elliptic Curve Domain Parameters" section G.1.4
  780. */
  781. static void vli_mmod_fast_521(u64 *result, const u64 *product,
  782. const u64 *curve_prime, u64 *tmp)
  783. {
  784. const unsigned int ndigits = ECC_CURVE_NIST_P521_DIGITS;
  785. size_t i;
  786. /* Initialize result with lowest 521 bits from product */
  787. vli_set(result, product, ndigits);
  788. result[8] &= 0x1ff;
  789. for (i = 0; i < ndigits; i++)
  790. tmp[i] = (product[8 + i] >> 9) | (product[9 + i] << 55);
  791. tmp[8] &= 0x1ff;
  792. vli_mod_add(result, result, tmp, curve_prime, ndigits);
  793. }
  794. /* Computes result = product % curve_prime for different curve_primes.
  795. *
  796. * Note that curve_primes are distinguished just by heuristic check and
  797. * not by complete conformance check.
  798. */
  799. static bool vli_mmod_fast(u64 *result, u64 *product,
  800. const struct ecc_curve *curve)
  801. {
  802. u64 tmp[2 * ECC_MAX_DIGITS];
  803. const u64 *curve_prime = curve->p;
  804. const unsigned int ndigits = curve->g.ndigits;
  805. /* All NIST curves have name prefix 'nist_' */
  806. if (strncmp(curve->name, "nist_", 5) != 0) {
  807. /* Try to handle Pseudo-Marsenne primes. */
  808. if (curve_prime[ndigits - 1] == -1ull) {
  809. vli_mmod_special(result, product, curve_prime,
  810. ndigits);
  811. return true;
  812. } else if (curve_prime[ndigits - 1] == 1ull << 63 &&
  813. curve_prime[ndigits - 2] == 0) {
  814. vli_mmod_special2(result, product, curve_prime,
  815. ndigits);
  816. return true;
  817. }
  818. vli_mmod_barrett(result, product, curve_prime, ndigits);
  819. return true;
  820. }
  821. switch (ndigits) {
  822. case ECC_CURVE_NIST_P192_DIGITS:
  823. vli_mmod_fast_192(result, product, curve_prime, tmp);
  824. break;
  825. case ECC_CURVE_NIST_P256_DIGITS:
  826. vli_mmod_fast_256(result, product, curve_prime, tmp);
  827. break;
  828. case ECC_CURVE_NIST_P384_DIGITS:
  829. vli_mmod_fast_384(result, product, curve_prime, tmp);
  830. break;
  831. case ECC_CURVE_NIST_P521_DIGITS:
  832. vli_mmod_fast_521(result, product, curve_prime, tmp);
  833. break;
  834. default:
  835. pr_err_ratelimited("ecc: unsupported digits size!\n");
  836. return false;
  837. }
  838. return true;
  839. }
  840. /* Computes result = (left * right) % mod.
  841. * Assumes that mod is big enough curve order.
  842. */
  843. void vli_mod_mult_slow(u64 *result, const u64 *left, const u64 *right,
  844. const u64 *mod, unsigned int ndigits)
  845. {
  846. u64 product[ECC_MAX_DIGITS * 2];
  847. vli_mult(product, left, right, ndigits);
  848. vli_mmod_slow(result, product, mod, ndigits);
  849. }
  850. EXPORT_SYMBOL(vli_mod_mult_slow);
  851. /* Computes result = (left * right) % curve_prime. */
  852. static void vli_mod_mult_fast(u64 *result, const u64 *left, const u64 *right,
  853. const struct ecc_curve *curve)
  854. {
  855. u64 product[2 * ECC_MAX_DIGITS];
  856. vli_mult(product, left, right, curve->g.ndigits);
  857. vli_mmod_fast(result, product, curve);
  858. }
  859. /* Computes result = left^2 % curve_prime. */
  860. static void vli_mod_square_fast(u64 *result, const u64 *left,
  861. const struct ecc_curve *curve)
  862. {
  863. u64 product[2 * ECC_MAX_DIGITS];
  864. vli_square(product, left, curve->g.ndigits);
  865. vli_mmod_fast(result, product, curve);
  866. }
  867. #define EVEN(vli) (!(vli[0] & 1))
  868. /* Computes result = (1 / p_input) % mod. All VLIs are the same size.
  869. * See "From Euclid's GCD to Montgomery Multiplication to the Great Divide"
  870. * https://labs.oracle.com/techrep/2001/smli_tr-2001-95.pdf
  871. */
  872. void vli_mod_inv(u64 *result, const u64 *input, const u64 *mod,
  873. unsigned int ndigits)
  874. {
  875. u64 a[ECC_MAX_DIGITS], b[ECC_MAX_DIGITS];
  876. u64 u[ECC_MAX_DIGITS], v[ECC_MAX_DIGITS];
  877. u64 carry;
  878. int cmp_result;
  879. if (vli_is_zero(input, ndigits)) {
  880. vli_clear(result, ndigits);
  881. return;
  882. }
  883. vli_set(a, input, ndigits);
  884. vli_set(b, mod, ndigits);
  885. vli_clear(u, ndigits);
  886. u[0] = 1;
  887. vli_clear(v, ndigits);
  888. while ((cmp_result = vli_cmp(a, b, ndigits)) != 0) {
  889. carry = 0;
  890. if (EVEN(a)) {
  891. vli_rshift1(a, ndigits);
  892. if (!EVEN(u))
  893. carry = vli_add(u, u, mod, ndigits);
  894. vli_rshift1(u, ndigits);
  895. if (carry)
  896. u[ndigits - 1] |= 0x8000000000000000ull;
  897. } else if (EVEN(b)) {
  898. vli_rshift1(b, ndigits);
  899. if (!EVEN(v))
  900. carry = vli_add(v, v, mod, ndigits);
  901. vli_rshift1(v, ndigits);
  902. if (carry)
  903. v[ndigits - 1] |= 0x8000000000000000ull;
  904. } else if (cmp_result > 0) {
  905. vli_sub(a, a, b, ndigits);
  906. vli_rshift1(a, ndigits);
  907. if (vli_cmp(u, v, ndigits) < 0)
  908. vli_add(u, u, mod, ndigits);
  909. vli_sub(u, u, v, ndigits);
  910. if (!EVEN(u))
  911. carry = vli_add(u, u, mod, ndigits);
  912. vli_rshift1(u, ndigits);
  913. if (carry)
  914. u[ndigits - 1] |= 0x8000000000000000ull;
  915. } else {
  916. vli_sub(b, b, a, ndigits);
  917. vli_rshift1(b, ndigits);
  918. if (vli_cmp(v, u, ndigits) < 0)
  919. vli_add(v, v, mod, ndigits);
  920. vli_sub(v, v, u, ndigits);
  921. if (!EVEN(v))
  922. carry = vli_add(v, v, mod, ndigits);
  923. vli_rshift1(v, ndigits);
  924. if (carry)
  925. v[ndigits - 1] |= 0x8000000000000000ull;
  926. }
  927. }
  928. vli_set(result, u, ndigits);
  929. }
  930. EXPORT_SYMBOL(vli_mod_inv);
  931. /* ------ Point operations ------ */
  932. /* Returns true if p_point is the point at infinity, false otherwise. */
  933. bool ecc_point_is_zero(const struct ecc_point *point)
  934. {
  935. return (vli_is_zero(point->x, point->ndigits) &&
  936. vli_is_zero(point->y, point->ndigits));
  937. }
  938. EXPORT_SYMBOL(ecc_point_is_zero);
  939. /* Point multiplication algorithm using Montgomery's ladder with co-Z
  940. * coordinates. From https://eprint.iacr.org/2011/338.pdf
  941. */
  942. /* Double in place */
  943. static void ecc_point_double_jacobian(u64 *x1, u64 *y1, u64 *z1,
  944. const struct ecc_curve *curve)
  945. {
  946. /* t1 = x, t2 = y, t3 = z */
  947. u64 t4[ECC_MAX_DIGITS];
  948. u64 t5[ECC_MAX_DIGITS];
  949. const u64 *curve_prime = curve->p;
  950. const unsigned int ndigits = curve->g.ndigits;
  951. if (vli_is_zero(z1, ndigits))
  952. return;
  953. /* t4 = y1^2 */
  954. vli_mod_square_fast(t4, y1, curve);
  955. /* t5 = x1*y1^2 = A */
  956. vli_mod_mult_fast(t5, x1, t4, curve);
  957. /* t4 = y1^4 */
  958. vli_mod_square_fast(t4, t4, curve);
  959. /* t2 = y1*z1 = z3 */
  960. vli_mod_mult_fast(y1, y1, z1, curve);
  961. /* t3 = z1^2 */
  962. vli_mod_square_fast(z1, z1, curve);
  963. /* t1 = x1 + z1^2 */
  964. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  965. /* t3 = 2*z1^2 */
  966. vli_mod_add(z1, z1, z1, curve_prime, ndigits);
  967. /* t3 = x1 - z1^2 */
  968. vli_mod_sub(z1, x1, z1, curve_prime, ndigits);
  969. /* t1 = x1^2 - z1^4 */
  970. vli_mod_mult_fast(x1, x1, z1, curve);
  971. /* t3 = 2*(x1^2 - z1^4) */
  972. vli_mod_add(z1, x1, x1, curve_prime, ndigits);
  973. /* t1 = 3*(x1^2 - z1^4) */
  974. vli_mod_add(x1, x1, z1, curve_prime, ndigits);
  975. if (vli_test_bit(x1, 0)) {
  976. u64 carry = vli_add(x1, x1, curve_prime, ndigits);
  977. vli_rshift1(x1, ndigits);
  978. x1[ndigits - 1] |= carry << 63;
  979. } else {
  980. vli_rshift1(x1, ndigits);
  981. }
  982. /* t1 = 3/2*(x1^2 - z1^4) = B */
  983. /* t3 = B^2 */
  984. vli_mod_square_fast(z1, x1, curve);
  985. /* t3 = B^2 - A */
  986. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  987. /* t3 = B^2 - 2A = x3 */
  988. vli_mod_sub(z1, z1, t5, curve_prime, ndigits);
  989. /* t5 = A - x3 */
  990. vli_mod_sub(t5, t5, z1, curve_prime, ndigits);
  991. /* t1 = B * (A - x3) */
  992. vli_mod_mult_fast(x1, x1, t5, curve);
  993. /* t4 = B * (A - x3) - y1^4 = y3 */
  994. vli_mod_sub(t4, x1, t4, curve_prime, ndigits);
  995. vli_set(x1, z1, ndigits);
  996. vli_set(z1, y1, ndigits);
  997. vli_set(y1, t4, ndigits);
  998. }
  999. /* Modify (x1, y1) => (x1 * z^2, y1 * z^3) */
  1000. static void apply_z(u64 *x1, u64 *y1, u64 *z, const struct ecc_curve *curve)
  1001. {
  1002. u64 t1[ECC_MAX_DIGITS];
  1003. vli_mod_square_fast(t1, z, curve); /* z^2 */
  1004. vli_mod_mult_fast(x1, x1, t1, curve); /* x1 * z^2 */
  1005. vli_mod_mult_fast(t1, t1, z, curve); /* z^3 */
  1006. vli_mod_mult_fast(y1, y1, t1, curve); /* y1 * z^3 */
  1007. }
  1008. /* P = (x1, y1) => 2P, (x2, y2) => P' */
  1009. static void xycz_initial_double(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1010. u64 *p_initial_z, const struct ecc_curve *curve)
  1011. {
  1012. u64 z[ECC_MAX_DIGITS];
  1013. const unsigned int ndigits = curve->g.ndigits;
  1014. vli_set(x2, x1, ndigits);
  1015. vli_set(y2, y1, ndigits);
  1016. vli_clear(z, ndigits);
  1017. z[0] = 1;
  1018. if (p_initial_z)
  1019. vli_set(z, p_initial_z, ndigits);
  1020. apply_z(x1, y1, z, curve);
  1021. ecc_point_double_jacobian(x1, y1, z, curve);
  1022. apply_z(x2, y2, z, curve);
  1023. }
  1024. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  1025. * Output P' = (x1', y1', Z3), P + Q = (x3, y3, Z3)
  1026. * or P => P', Q => P + Q
  1027. */
  1028. static void xycz_add(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1029. const struct ecc_curve *curve)
  1030. {
  1031. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  1032. u64 t5[ECC_MAX_DIGITS];
  1033. const u64 *curve_prime = curve->p;
  1034. const unsigned int ndigits = curve->g.ndigits;
  1035. /* t5 = x2 - x1 */
  1036. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  1037. /* t5 = (x2 - x1)^2 = A */
  1038. vli_mod_square_fast(t5, t5, curve);
  1039. /* t1 = x1*A = B */
  1040. vli_mod_mult_fast(x1, x1, t5, curve);
  1041. /* t3 = x2*A = C */
  1042. vli_mod_mult_fast(x2, x2, t5, curve);
  1043. /* t4 = y2 - y1 */
  1044. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1045. /* t5 = (y2 - y1)^2 = D */
  1046. vli_mod_square_fast(t5, y2, curve);
  1047. /* t5 = D - B */
  1048. vli_mod_sub(t5, t5, x1, curve_prime, ndigits);
  1049. /* t5 = D - B - C = x3 */
  1050. vli_mod_sub(t5, t5, x2, curve_prime, ndigits);
  1051. /* t3 = C - B */
  1052. vli_mod_sub(x2, x2, x1, curve_prime, ndigits);
  1053. /* t2 = y1*(C - B) */
  1054. vli_mod_mult_fast(y1, y1, x2, curve);
  1055. /* t3 = B - x3 */
  1056. vli_mod_sub(x2, x1, t5, curve_prime, ndigits);
  1057. /* t4 = (y2 - y1)*(B - x3) */
  1058. vli_mod_mult_fast(y2, y2, x2, curve);
  1059. /* t4 = y3 */
  1060. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1061. vli_set(x2, t5, ndigits);
  1062. }
  1063. /* Input P = (x1, y1, Z), Q = (x2, y2, Z)
  1064. * Output P + Q = (x3, y3, Z3), P - Q = (x3', y3', Z3)
  1065. * or P => P - Q, Q => P + Q
  1066. */
  1067. static void xycz_add_c(u64 *x1, u64 *y1, u64 *x2, u64 *y2,
  1068. const struct ecc_curve *curve)
  1069. {
  1070. /* t1 = X1, t2 = Y1, t3 = X2, t4 = Y2 */
  1071. u64 t5[ECC_MAX_DIGITS];
  1072. u64 t6[ECC_MAX_DIGITS];
  1073. u64 t7[ECC_MAX_DIGITS];
  1074. const u64 *curve_prime = curve->p;
  1075. const unsigned int ndigits = curve->g.ndigits;
  1076. /* t5 = x2 - x1 */
  1077. vli_mod_sub(t5, x2, x1, curve_prime, ndigits);
  1078. /* t5 = (x2 - x1)^2 = A */
  1079. vli_mod_square_fast(t5, t5, curve);
  1080. /* t1 = x1*A = B */
  1081. vli_mod_mult_fast(x1, x1, t5, curve);
  1082. /* t3 = x2*A = C */
  1083. vli_mod_mult_fast(x2, x2, t5, curve);
  1084. /* t4 = y2 + y1 */
  1085. vli_mod_add(t5, y2, y1, curve_prime, ndigits);
  1086. /* t4 = y2 - y1 */
  1087. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1088. /* t6 = C - B */
  1089. vli_mod_sub(t6, x2, x1, curve_prime, ndigits);
  1090. /* t2 = y1 * (C - B) */
  1091. vli_mod_mult_fast(y1, y1, t6, curve);
  1092. /* t6 = B + C */
  1093. vli_mod_add(t6, x1, x2, curve_prime, ndigits);
  1094. /* t3 = (y2 - y1)^2 */
  1095. vli_mod_square_fast(x2, y2, curve);
  1096. /* t3 = x3 */
  1097. vli_mod_sub(x2, x2, t6, curve_prime, ndigits);
  1098. /* t7 = B - x3 */
  1099. vli_mod_sub(t7, x1, x2, curve_prime, ndigits);
  1100. /* t4 = (y2 - y1)*(B - x3) */
  1101. vli_mod_mult_fast(y2, y2, t7, curve);
  1102. /* t4 = y3 */
  1103. vli_mod_sub(y2, y2, y1, curve_prime, ndigits);
  1104. /* t7 = (y2 + y1)^2 = F */
  1105. vli_mod_square_fast(t7, t5, curve);
  1106. /* t7 = x3' */
  1107. vli_mod_sub(t7, t7, t6, curve_prime, ndigits);
  1108. /* t6 = x3' - B */
  1109. vli_mod_sub(t6, t7, x1, curve_prime, ndigits);
  1110. /* t6 = (y2 + y1)*(x3' - B) */
  1111. vli_mod_mult_fast(t6, t6, t5, curve);
  1112. /* t2 = y3' */
  1113. vli_mod_sub(y1, t6, y1, curve_prime, ndigits);
  1114. vli_set(x1, t7, ndigits);
  1115. }
  1116. static void ecc_point_mult(struct ecc_point *result,
  1117. const struct ecc_point *point, const u64 *scalar,
  1118. u64 *initial_z, const struct ecc_curve *curve,
  1119. unsigned int ndigits)
  1120. {
  1121. /* R0 and R1 */
  1122. u64 rx[2][ECC_MAX_DIGITS];
  1123. u64 ry[2][ECC_MAX_DIGITS];
  1124. u64 z[ECC_MAX_DIGITS];
  1125. u64 sk[2][ECC_MAX_DIGITS];
  1126. u64 *curve_prime = curve->p;
  1127. int i, nb;
  1128. int num_bits;
  1129. int carry;
  1130. carry = vli_add(sk[0], scalar, curve->n, ndigits);
  1131. vli_add(sk[1], sk[0], curve->n, ndigits);
  1132. scalar = sk[!carry];
  1133. if (curve->nbits == 521) /* NIST P521 */
  1134. num_bits = curve->nbits + 2;
  1135. else
  1136. num_bits = sizeof(u64) * ndigits * 8 + 1;
  1137. vli_set(rx[1], point->x, ndigits);
  1138. vli_set(ry[1], point->y, ndigits);
  1139. xycz_initial_double(rx[1], ry[1], rx[0], ry[0], initial_z, curve);
  1140. for (i = num_bits - 2; i > 0; i--) {
  1141. nb = !vli_test_bit(scalar, i);
  1142. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1143. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1144. }
  1145. nb = !vli_test_bit(scalar, 0);
  1146. xycz_add_c(rx[1 - nb], ry[1 - nb], rx[nb], ry[nb], curve);
  1147. /* Find final 1/Z value. */
  1148. /* X1 - X0 */
  1149. vli_mod_sub(z, rx[1], rx[0], curve_prime, ndigits);
  1150. /* Yb * (X1 - X0) */
  1151. vli_mod_mult_fast(z, z, ry[1 - nb], curve);
  1152. /* xP * Yb * (X1 - X0) */
  1153. vli_mod_mult_fast(z, z, point->x, curve);
  1154. /* 1 / (xP * Yb * (X1 - X0)) */
  1155. vli_mod_inv(z, z, curve_prime, point->ndigits);
  1156. /* yP / (xP * Yb * (X1 - X0)) */
  1157. vli_mod_mult_fast(z, z, point->y, curve);
  1158. /* Xb * yP / (xP * Yb * (X1 - X0)) */
  1159. vli_mod_mult_fast(z, z, rx[1 - nb], curve);
  1160. /* End 1/Z calculation */
  1161. xycz_add(rx[nb], ry[nb], rx[1 - nb], ry[1 - nb], curve);
  1162. apply_z(rx[0], ry[0], z, curve);
  1163. vli_set(result->x, rx[0], ndigits);
  1164. vli_set(result->y, ry[0], ndigits);
  1165. }
  1166. /* Computes R = P + Q mod p */
  1167. static void ecc_point_add(const struct ecc_point *result,
  1168. const struct ecc_point *p, const struct ecc_point *q,
  1169. const struct ecc_curve *curve)
  1170. {
  1171. u64 z[ECC_MAX_DIGITS];
  1172. u64 px[ECC_MAX_DIGITS];
  1173. u64 py[ECC_MAX_DIGITS];
  1174. unsigned int ndigits = curve->g.ndigits;
  1175. vli_set(result->x, q->x, ndigits);
  1176. vli_set(result->y, q->y, ndigits);
  1177. vli_mod_sub(z, result->x, p->x, curve->p, ndigits);
  1178. vli_set(px, p->x, ndigits);
  1179. vli_set(py, p->y, ndigits);
  1180. xycz_add(px, py, result->x, result->y, curve);
  1181. vli_mod_inv(z, z, curve->p, ndigits);
  1182. apply_z(result->x, result->y, z, curve);
  1183. }
  1184. /* Computes R = u1P + u2Q mod p using Shamir's trick.
  1185. * Based on: Kenneth MacKay's micro-ecc (2014).
  1186. */
  1187. void ecc_point_mult_shamir(const struct ecc_point *result,
  1188. const u64 *u1, const struct ecc_point *p,
  1189. const u64 *u2, const struct ecc_point *q,
  1190. const struct ecc_curve *curve)
  1191. {
  1192. u64 z[ECC_MAX_DIGITS];
  1193. u64 sump[2][ECC_MAX_DIGITS];
  1194. u64 *rx = result->x;
  1195. u64 *ry = result->y;
  1196. unsigned int ndigits = curve->g.ndigits;
  1197. unsigned int num_bits;
  1198. struct ecc_point sum = ECC_POINT_INIT(sump[0], sump[1], ndigits);
  1199. const struct ecc_point *points[4];
  1200. const struct ecc_point *point;
  1201. unsigned int idx;
  1202. int i;
  1203. ecc_point_add(&sum, p, q, curve);
  1204. points[0] = NULL;
  1205. points[1] = p;
  1206. points[2] = q;
  1207. points[3] = &sum;
  1208. num_bits = max(vli_num_bits(u1, ndigits), vli_num_bits(u2, ndigits));
  1209. i = num_bits - 1;
  1210. idx = !!vli_test_bit(u1, i);
  1211. idx |= (!!vli_test_bit(u2, i)) << 1;
  1212. point = points[idx];
  1213. vli_set(rx, point->x, ndigits);
  1214. vli_set(ry, point->y, ndigits);
  1215. vli_clear(z + 1, ndigits - 1);
  1216. z[0] = 1;
  1217. for (--i; i >= 0; i--) {
  1218. ecc_point_double_jacobian(rx, ry, z, curve);
  1219. idx = !!vli_test_bit(u1, i);
  1220. idx |= (!!vli_test_bit(u2, i)) << 1;
  1221. point = points[idx];
  1222. if (point) {
  1223. u64 tx[ECC_MAX_DIGITS];
  1224. u64 ty[ECC_MAX_DIGITS];
  1225. u64 tz[ECC_MAX_DIGITS];
  1226. vli_set(tx, point->x, ndigits);
  1227. vli_set(ty, point->y, ndigits);
  1228. apply_z(tx, ty, z, curve);
  1229. vli_mod_sub(tz, rx, tx, curve->p, ndigits);
  1230. xycz_add(tx, ty, rx, ry, curve);
  1231. vli_mod_mult_fast(z, z, tz, curve);
  1232. }
  1233. }
  1234. vli_mod_inv(z, z, curve->p, ndigits);
  1235. apply_z(rx, ry, z, curve);
  1236. }
  1237. EXPORT_SYMBOL(ecc_point_mult_shamir);
  1238. /*
  1239. * This function performs checks equivalent to Appendix A.4.2 of FIPS 186-5.
  1240. * Whereas A.4.2 results in an integer in the interval [1, n-1], this function
  1241. * ensures that the integer is in the range of [2, n-3]. We are slightly
  1242. * stricter because of the currently used scalar multiplication algorithm.
  1243. */
  1244. static int __ecc_is_key_valid(const struct ecc_curve *curve,
  1245. const u64 *private_key, unsigned int ndigits)
  1246. {
  1247. u64 one[ECC_MAX_DIGITS] = { 1, };
  1248. u64 res[ECC_MAX_DIGITS];
  1249. if (!private_key)
  1250. return -EINVAL;
  1251. if (curve->g.ndigits != ndigits)
  1252. return -EINVAL;
  1253. /* Make sure the private key is in the range [2, n-3]. */
  1254. if (vli_cmp(one, private_key, ndigits) != -1)
  1255. return -EINVAL;
  1256. vli_sub(res, curve->n, one, ndigits);
  1257. vli_sub(res, res, one, ndigits);
  1258. if (vli_cmp(res, private_key, ndigits) != 1)
  1259. return -EINVAL;
  1260. return 0;
  1261. }
  1262. int ecc_is_key_valid(unsigned int curve_id, unsigned int ndigits,
  1263. const u64 *private_key, unsigned int private_key_len)
  1264. {
  1265. int nbytes;
  1266. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1267. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1268. if (private_key_len != nbytes)
  1269. return -EINVAL;
  1270. return __ecc_is_key_valid(curve, private_key, ndigits);
  1271. }
  1272. EXPORT_SYMBOL(ecc_is_key_valid);
  1273. /*
  1274. * ECC private keys are generated using the method of rejection sampling,
  1275. * equivalent to that described in FIPS 186-5, Appendix A.2.2.
  1276. *
  1277. * This method generates a private key uniformly distributed in the range
  1278. * [2, n-3].
  1279. */
  1280. int ecc_gen_privkey(unsigned int curve_id, unsigned int ndigits,
  1281. u64 *private_key)
  1282. {
  1283. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1284. unsigned int nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1285. unsigned int nbits = vli_num_bits(curve->n, ndigits);
  1286. int err;
  1287. /*
  1288. * Step 1 & 2: check that N is included in Table 1 of FIPS 186-5,
  1289. * section 6.1.1.
  1290. */
  1291. if (nbits < 224)
  1292. return -EINVAL;
  1293. /*
  1294. * FIPS 186-5 recommends that the private key should be obtained from a
  1295. * RBG with a security strength equal to or greater than the security
  1296. * strength associated with N.
  1297. *
  1298. * The maximum security strength identified by NIST SP800-57pt1r4 for
  1299. * ECC is 256 (N >= 512).
  1300. *
  1301. * This condition is met by the default RNG because it selects a favored
  1302. * DRBG with a security strength of 256.
  1303. */
  1304. if (crypto_get_default_rng())
  1305. return -EFAULT;
  1306. /* Step 3: obtain N returned_bits from the DRBG. */
  1307. err = crypto_rng_get_bytes(crypto_default_rng,
  1308. (u8 *)private_key, nbytes);
  1309. crypto_put_default_rng();
  1310. if (err)
  1311. return err;
  1312. /* Step 4: make sure the private key is in the valid range. */
  1313. if (__ecc_is_key_valid(curve, private_key, ndigits))
  1314. return -EINVAL;
  1315. return 0;
  1316. }
  1317. EXPORT_SYMBOL(ecc_gen_privkey);
  1318. int ecc_make_pub_key(unsigned int curve_id, unsigned int ndigits,
  1319. const u64 *private_key, u64 *public_key)
  1320. {
  1321. int ret = 0;
  1322. struct ecc_point *pk;
  1323. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1324. if (!private_key) {
  1325. ret = -EINVAL;
  1326. goto out;
  1327. }
  1328. pk = ecc_alloc_point(ndigits);
  1329. if (!pk) {
  1330. ret = -ENOMEM;
  1331. goto out;
  1332. }
  1333. ecc_point_mult(pk, &curve->g, private_key, NULL, curve, ndigits);
  1334. /* SP800-56A rev 3 5.6.2.1.3 key check */
  1335. if (ecc_is_pubkey_valid_full(curve, pk)) {
  1336. ret = -EAGAIN;
  1337. goto err_free_point;
  1338. }
  1339. ecc_swap_digits(pk->x, public_key, ndigits);
  1340. ecc_swap_digits(pk->y, &public_key[ndigits], ndigits);
  1341. err_free_point:
  1342. ecc_free_point(pk);
  1343. out:
  1344. return ret;
  1345. }
  1346. EXPORT_SYMBOL(ecc_make_pub_key);
  1347. /* SP800-56A section 5.6.2.3.4 partial verification: ephemeral keys only */
  1348. int ecc_is_pubkey_valid_partial(const struct ecc_curve *curve,
  1349. struct ecc_point *pk)
  1350. {
  1351. u64 yy[ECC_MAX_DIGITS], xxx[ECC_MAX_DIGITS], w[ECC_MAX_DIGITS];
  1352. if (WARN_ON(pk->ndigits != curve->g.ndigits))
  1353. return -EINVAL;
  1354. /* Check 1: Verify key is not the zero point. */
  1355. if (ecc_point_is_zero(pk))
  1356. return -EINVAL;
  1357. /* Check 2: Verify key is in the range [1, p-1]. */
  1358. if (vli_cmp(curve->p, pk->x, pk->ndigits) != 1)
  1359. return -EINVAL;
  1360. if (vli_cmp(curve->p, pk->y, pk->ndigits) != 1)
  1361. return -EINVAL;
  1362. /* Check 3: Verify that y^2 == (x^3 + a·x + b) mod p */
  1363. vli_mod_square_fast(yy, pk->y, curve); /* y^2 */
  1364. vli_mod_square_fast(xxx, pk->x, curve); /* x^2 */
  1365. vli_mod_mult_fast(xxx, xxx, pk->x, curve); /* x^3 */
  1366. vli_mod_mult_fast(w, curve->a, pk->x, curve); /* a·x */
  1367. vli_mod_add(w, w, curve->b, curve->p, pk->ndigits); /* a·x + b */
  1368. vli_mod_add(w, w, xxx, curve->p, pk->ndigits); /* x^3 + a·x + b */
  1369. if (vli_cmp(yy, w, pk->ndigits) != 0) /* Equation */
  1370. return -EINVAL;
  1371. return 0;
  1372. }
  1373. EXPORT_SYMBOL(ecc_is_pubkey_valid_partial);
  1374. /* SP800-56A section 5.6.2.3.3 full verification */
  1375. int ecc_is_pubkey_valid_full(const struct ecc_curve *curve,
  1376. struct ecc_point *pk)
  1377. {
  1378. struct ecc_point *nQ;
  1379. /* Checks 1 through 3 */
  1380. int ret = ecc_is_pubkey_valid_partial(curve, pk);
  1381. if (ret)
  1382. return ret;
  1383. /* Check 4: Verify that nQ is the zero point. */
  1384. nQ = ecc_alloc_point(pk->ndigits);
  1385. if (!nQ)
  1386. return -ENOMEM;
  1387. ecc_point_mult(nQ, pk, curve->n, NULL, curve, pk->ndigits);
  1388. if (!ecc_point_is_zero(nQ))
  1389. ret = -EINVAL;
  1390. ecc_free_point(nQ);
  1391. return ret;
  1392. }
  1393. EXPORT_SYMBOL(ecc_is_pubkey_valid_full);
  1394. int crypto_ecdh_shared_secret(unsigned int curve_id, unsigned int ndigits,
  1395. const u64 *private_key, const u64 *public_key,
  1396. u64 *secret)
  1397. {
  1398. int ret = 0;
  1399. struct ecc_point *product, *pk;
  1400. u64 rand_z[ECC_MAX_DIGITS];
  1401. unsigned int nbytes;
  1402. const struct ecc_curve *curve = ecc_get_curve(curve_id);
  1403. if (!private_key || !public_key || ndigits > ARRAY_SIZE(rand_z)) {
  1404. ret = -EINVAL;
  1405. goto out;
  1406. }
  1407. nbytes = ndigits << ECC_DIGITS_TO_BYTES_SHIFT;
  1408. get_random_bytes(rand_z, nbytes);
  1409. pk = ecc_alloc_point(ndigits);
  1410. if (!pk) {
  1411. ret = -ENOMEM;
  1412. goto out;
  1413. }
  1414. ecc_swap_digits(public_key, pk->x, ndigits);
  1415. ecc_swap_digits(&public_key[ndigits], pk->y, ndigits);
  1416. ret = ecc_is_pubkey_valid_partial(curve, pk);
  1417. if (ret)
  1418. goto err_alloc_product;
  1419. product = ecc_alloc_point(ndigits);
  1420. if (!product) {
  1421. ret = -ENOMEM;
  1422. goto err_alloc_product;
  1423. }
  1424. ecc_point_mult(product, pk, private_key, rand_z, curve, ndigits);
  1425. if (ecc_point_is_zero(product)) {
  1426. ret = -EFAULT;
  1427. goto err_validity;
  1428. }
  1429. ecc_swap_digits(product->x, secret, ndigits);
  1430. err_validity:
  1431. memzero_explicit(rand_z, sizeof(rand_z));
  1432. ecc_free_point(product);
  1433. err_alloc_product:
  1434. ecc_free_point(pk);
  1435. out:
  1436. return ret;
  1437. }
  1438. EXPORT_SYMBOL(crypto_ecdh_shared_secret);
  1439. MODULE_DESCRIPTION("core elliptic curve module");
  1440. MODULE_LICENSE("Dual BSD/GPL");