cast5_avx_glue.c 9.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. /*
  2. * Glue Code for the AVX assembler implemention of the Cast5 Cipher
  3. *
  4. * Copyright (C) 2012 Johannes Goetzfried
  5. * <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License as published by
  9. * the Free Software Foundation; either version 2 of the License, or
  10. * (at your option) any later version.
  11. *
  12. * This program is distributed in the hope that it will be useful,
  13. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  15. * GNU General Public License for more details.
  16. *
  17. * You should have received a copy of the GNU General Public License
  18. * along with this program; if not, write to the Free Software
  19. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
  20. * USA
  21. *
  22. */
  23. #include <asm/crypto/glue_helper.h>
  24. #include <crypto/algapi.h>
  25. #include <crypto/cast5.h>
  26. #include <crypto/internal/simd.h>
  27. #include <linux/crypto.h>
  28. #include <linux/err.h>
  29. #include <linux/module.h>
  30. #include <linux/types.h>
  31. #define CAST5_PARALLEL_BLOCKS 16
  32. asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
  33. const u8 *src);
  34. asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
  35. const u8 *src);
  36. asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
  37. const u8 *src);
  38. asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
  39. __be64 *iv);
  40. static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
  41. unsigned int keylen)
  42. {
  43. return cast5_setkey(&tfm->base, key, keylen);
  44. }
  45. static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
  46. unsigned int nbytes)
  47. {
  48. return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
  49. walk, fpu_enabled, nbytes);
  50. }
  51. static inline void cast5_fpu_end(bool fpu_enabled)
  52. {
  53. return glue_fpu_end(fpu_enabled);
  54. }
  55. static int ecb_crypt(struct skcipher_request *req, bool enc)
  56. {
  57. bool fpu_enabled = false;
  58. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  59. struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
  60. struct skcipher_walk walk;
  61. const unsigned int bsize = CAST5_BLOCK_SIZE;
  62. unsigned int nbytes;
  63. void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
  64. int err;
  65. err = skcipher_walk_virt(&walk, req, false);
  66. while ((nbytes = walk.nbytes)) {
  67. u8 *wsrc = walk.src.virt.addr;
  68. u8 *wdst = walk.dst.virt.addr;
  69. fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
  70. /* Process multi-block batch */
  71. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  72. fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
  73. do {
  74. fn(ctx, wdst, wsrc);
  75. wsrc += bsize * CAST5_PARALLEL_BLOCKS;
  76. wdst += bsize * CAST5_PARALLEL_BLOCKS;
  77. nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
  78. } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
  79. if (nbytes < bsize)
  80. goto done;
  81. }
  82. fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
  83. /* Handle leftovers */
  84. do {
  85. fn(ctx, wdst, wsrc);
  86. wsrc += bsize;
  87. wdst += bsize;
  88. nbytes -= bsize;
  89. } while (nbytes >= bsize);
  90. done:
  91. err = skcipher_walk_done(&walk, nbytes);
  92. }
  93. cast5_fpu_end(fpu_enabled);
  94. return err;
  95. }
  96. static int ecb_encrypt(struct skcipher_request *req)
  97. {
  98. return ecb_crypt(req, true);
  99. }
  100. static int ecb_decrypt(struct skcipher_request *req)
  101. {
  102. return ecb_crypt(req, false);
  103. }
  104. static int cbc_encrypt(struct skcipher_request *req)
  105. {
  106. const unsigned int bsize = CAST5_BLOCK_SIZE;
  107. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  108. struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
  109. struct skcipher_walk walk;
  110. unsigned int nbytes;
  111. int err;
  112. err = skcipher_walk_virt(&walk, req, false);
  113. while ((nbytes = walk.nbytes)) {
  114. u64 *src = (u64 *)walk.src.virt.addr;
  115. u64 *dst = (u64 *)walk.dst.virt.addr;
  116. u64 *iv = (u64 *)walk.iv;
  117. do {
  118. *dst = *src ^ *iv;
  119. __cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
  120. iv = dst;
  121. src++;
  122. dst++;
  123. nbytes -= bsize;
  124. } while (nbytes >= bsize);
  125. *(u64 *)walk.iv = *iv;
  126. err = skcipher_walk_done(&walk, nbytes);
  127. }
  128. return err;
  129. }
  130. static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
  131. struct skcipher_walk *walk)
  132. {
  133. const unsigned int bsize = CAST5_BLOCK_SIZE;
  134. unsigned int nbytes = walk->nbytes;
  135. u64 *src = (u64 *)walk->src.virt.addr;
  136. u64 *dst = (u64 *)walk->dst.virt.addr;
  137. u64 last_iv;
  138. /* Start of the last block. */
  139. src += nbytes / bsize - 1;
  140. dst += nbytes / bsize - 1;
  141. last_iv = *src;
  142. /* Process multi-block batch */
  143. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  144. do {
  145. nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
  146. src -= CAST5_PARALLEL_BLOCKS - 1;
  147. dst -= CAST5_PARALLEL_BLOCKS - 1;
  148. cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
  149. nbytes -= bsize;
  150. if (nbytes < bsize)
  151. goto done;
  152. *dst ^= *(src - 1);
  153. src -= 1;
  154. dst -= 1;
  155. } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
  156. }
  157. /* Handle leftovers */
  158. for (;;) {
  159. __cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
  160. nbytes -= bsize;
  161. if (nbytes < bsize)
  162. break;
  163. *dst ^= *(src - 1);
  164. src -= 1;
  165. dst -= 1;
  166. }
  167. done:
  168. *dst ^= *(u64 *)walk->iv;
  169. *(u64 *)walk->iv = last_iv;
  170. return nbytes;
  171. }
  172. static int cbc_decrypt(struct skcipher_request *req)
  173. {
  174. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  175. struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
  176. bool fpu_enabled = false;
  177. struct skcipher_walk walk;
  178. unsigned int nbytes;
  179. int err;
  180. err = skcipher_walk_virt(&walk, req, false);
  181. while ((nbytes = walk.nbytes)) {
  182. fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
  183. nbytes = __cbc_decrypt(ctx, &walk);
  184. err = skcipher_walk_done(&walk, nbytes);
  185. }
  186. cast5_fpu_end(fpu_enabled);
  187. return err;
  188. }
  189. static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
  190. {
  191. u8 *ctrblk = walk->iv;
  192. u8 keystream[CAST5_BLOCK_SIZE];
  193. u8 *src = walk->src.virt.addr;
  194. u8 *dst = walk->dst.virt.addr;
  195. unsigned int nbytes = walk->nbytes;
  196. __cast5_encrypt(ctx, keystream, ctrblk);
  197. crypto_xor_cpy(dst, keystream, src, nbytes);
  198. crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
  199. }
  200. static unsigned int __ctr_crypt(struct skcipher_walk *walk,
  201. struct cast5_ctx *ctx)
  202. {
  203. const unsigned int bsize = CAST5_BLOCK_SIZE;
  204. unsigned int nbytes = walk->nbytes;
  205. u64 *src = (u64 *)walk->src.virt.addr;
  206. u64 *dst = (u64 *)walk->dst.virt.addr;
  207. /* Process multi-block batch */
  208. if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
  209. do {
  210. cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
  211. (__be64 *)walk->iv);
  212. src += CAST5_PARALLEL_BLOCKS;
  213. dst += CAST5_PARALLEL_BLOCKS;
  214. nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
  215. } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
  216. if (nbytes < bsize)
  217. goto done;
  218. }
  219. /* Handle leftovers */
  220. do {
  221. u64 ctrblk;
  222. if (dst != src)
  223. *dst = *src;
  224. ctrblk = *(u64 *)walk->iv;
  225. be64_add_cpu((__be64 *)walk->iv, 1);
  226. __cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
  227. *dst ^= ctrblk;
  228. src += 1;
  229. dst += 1;
  230. nbytes -= bsize;
  231. } while (nbytes >= bsize);
  232. done:
  233. return nbytes;
  234. }
  235. static int ctr_crypt(struct skcipher_request *req)
  236. {
  237. struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
  238. struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
  239. bool fpu_enabled = false;
  240. struct skcipher_walk walk;
  241. unsigned int nbytes;
  242. int err;
  243. err = skcipher_walk_virt(&walk, req, false);
  244. while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
  245. fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
  246. nbytes = __ctr_crypt(&walk, ctx);
  247. err = skcipher_walk_done(&walk, nbytes);
  248. }
  249. cast5_fpu_end(fpu_enabled);
  250. if (walk.nbytes) {
  251. ctr_crypt_final(&walk, ctx);
  252. err = skcipher_walk_done(&walk, 0);
  253. }
  254. return err;
  255. }
  256. static struct skcipher_alg cast5_algs[] = {
  257. {
  258. .base.cra_name = "__ecb(cast5)",
  259. .base.cra_driver_name = "__ecb-cast5-avx",
  260. .base.cra_priority = 200,
  261. .base.cra_flags = CRYPTO_ALG_INTERNAL,
  262. .base.cra_blocksize = CAST5_BLOCK_SIZE,
  263. .base.cra_ctxsize = sizeof(struct cast5_ctx),
  264. .base.cra_module = THIS_MODULE,
  265. .min_keysize = CAST5_MIN_KEY_SIZE,
  266. .max_keysize = CAST5_MAX_KEY_SIZE,
  267. .setkey = cast5_setkey_skcipher,
  268. .encrypt = ecb_encrypt,
  269. .decrypt = ecb_decrypt,
  270. }, {
  271. .base.cra_name = "__cbc(cast5)",
  272. .base.cra_driver_name = "__cbc-cast5-avx",
  273. .base.cra_priority = 200,
  274. .base.cra_flags = CRYPTO_ALG_INTERNAL,
  275. .base.cra_blocksize = CAST5_BLOCK_SIZE,
  276. .base.cra_ctxsize = sizeof(struct cast5_ctx),
  277. .base.cra_module = THIS_MODULE,
  278. .min_keysize = CAST5_MIN_KEY_SIZE,
  279. .max_keysize = CAST5_MAX_KEY_SIZE,
  280. .ivsize = CAST5_BLOCK_SIZE,
  281. .setkey = cast5_setkey_skcipher,
  282. .encrypt = cbc_encrypt,
  283. .decrypt = cbc_decrypt,
  284. }, {
  285. .base.cra_name = "__ctr(cast5)",
  286. .base.cra_driver_name = "__ctr-cast5-avx",
  287. .base.cra_priority = 200,
  288. .base.cra_flags = CRYPTO_ALG_INTERNAL,
  289. .base.cra_blocksize = 1,
  290. .base.cra_ctxsize = sizeof(struct cast5_ctx),
  291. .base.cra_module = THIS_MODULE,
  292. .min_keysize = CAST5_MIN_KEY_SIZE,
  293. .max_keysize = CAST5_MAX_KEY_SIZE,
  294. .ivsize = CAST5_BLOCK_SIZE,
  295. .chunksize = CAST5_BLOCK_SIZE,
  296. .setkey = cast5_setkey_skcipher,
  297. .encrypt = ctr_crypt,
  298. .decrypt = ctr_crypt,
  299. }
  300. };
  301. static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
  302. static int __init cast5_init(void)
  303. {
  304. const char *feature_name;
  305. if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
  306. &feature_name)) {
  307. pr_info("CPU feature '%s' is not supported.\n", feature_name);
  308. return -ENODEV;
  309. }
  310. return simd_register_skciphers_compat(cast5_algs,
  311. ARRAY_SIZE(cast5_algs),
  312. cast5_simd_algs);
  313. }
  314. static void __exit cast5_exit(void)
  315. {
  316. simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
  317. cast5_simd_algs);
  318. }
  319. module_init(cast5_init);
  320. module_exit(cast5_exit);
  321. MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
  322. MODULE_LICENSE("GPL");
  323. MODULE_ALIAS_CRYPTO("cast5");