vecemu.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Routines to emulate some Altivec/VMX instructions, specifically
  4. * those that can trap when given denormalized operands in Java mode.
  5. */
  6. #include <linux/kernel.h>
  7. #include <linux/errno.h>
  8. #include <linux/sched.h>
  9. #include <asm/ptrace.h>
  10. #include <asm/processor.h>
  11. #include <asm/switch_to.h>
  12. #include <linux/uaccess.h>
  13. /* Functions in vector.S */
  14. extern void vaddfp(vector128 *dst, vector128 *a, vector128 *b);
  15. extern void vsubfp(vector128 *dst, vector128 *a, vector128 *b);
  16. extern void vmaddfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  17. extern void vnmsubfp(vector128 *dst, vector128 *a, vector128 *b, vector128 *c);
  18. extern void vrefp(vector128 *dst, vector128 *src);
  19. extern void vrsqrtefp(vector128 *dst, vector128 *src);
  20. extern void vexptep(vector128 *dst, vector128 *src);
  21. static unsigned int exp2s[8] = {
  22. 0x800000,
  23. 0x8b95c2,
  24. 0x9837f0,
  25. 0xa5fed7,
  26. 0xb504f3,
  27. 0xc5672a,
  28. 0xd744fd,
  29. 0xeac0c7
  30. };
  31. /*
  32. * Computes an estimate of 2^x. The `s' argument is the 32-bit
  33. * single-precision floating-point representation of x.
  34. */
  35. static unsigned int eexp2(unsigned int s)
  36. {
  37. int exp, pwr;
  38. unsigned int mant, frac;
  39. /* extract exponent field from input */
  40. exp = ((s >> 23) & 0xff) - 127;
  41. if (exp > 7) {
  42. /* check for NaN input */
  43. if (exp == 128 && (s & 0x7fffff) != 0)
  44. return s | 0x400000; /* return QNaN */
  45. /* 2^-big = 0, 2^+big = +Inf */
  46. return (s & 0x80000000)? 0: 0x7f800000; /* 0 or +Inf */
  47. }
  48. if (exp < -23)
  49. return 0x3f800000; /* 1.0 */
  50. /* convert to fixed point integer in 9.23 representation */
  51. pwr = (s & 0x7fffff) | 0x800000;
  52. if (exp > 0)
  53. pwr <<= exp;
  54. else
  55. pwr >>= -exp;
  56. if (s & 0x80000000)
  57. pwr = -pwr;
  58. /* extract integer part, which becomes exponent part of result */
  59. exp = (pwr >> 23) + 126;
  60. if (exp >= 254)
  61. return 0x7f800000;
  62. if (exp < -23)
  63. return 0;
  64. /* table lookup on top 3 bits of fraction to get mantissa */
  65. mant = exp2s[(pwr >> 20) & 7];
  66. /* linear interpolation using remaining 20 bits of fraction */
  67. asm("mulhwu %0,%1,%2" : "=r" (frac)
  68. : "r" (pwr << 12), "r" (0x172b83ff));
  69. asm("mulhwu %0,%1,%2" : "=r" (frac) : "r" (frac), "r" (mant));
  70. mant += frac;
  71. if (exp >= 0)
  72. return mant + (exp << 23);
  73. /* denormalized result */
  74. exp = -exp;
  75. mant += 1 << (exp - 1);
  76. return mant >> exp;
  77. }
  78. /*
  79. * Computes an estimate of log_2(x). The `s' argument is the 32-bit
  80. * single-precision floating-point representation of x.
  81. */
  82. static unsigned int elog2(unsigned int s)
  83. {
  84. int exp, mant, lz, frac;
  85. exp = s & 0x7f800000;
  86. mant = s & 0x7fffff;
  87. if (exp == 0x7f800000) { /* Inf or NaN */
  88. if (mant != 0)
  89. s |= 0x400000; /* turn NaN into QNaN */
  90. return s;
  91. }
  92. if ((exp | mant) == 0) /* +0 or -0 */
  93. return 0xff800000; /* return -Inf */
  94. if (exp == 0) {
  95. /* denormalized */
  96. asm("cntlzw %0,%1" : "=r" (lz) : "r" (mant));
  97. mant <<= lz - 8;
  98. exp = (-118 - lz) << 23;
  99. } else {
  100. mant |= 0x800000;
  101. exp -= 127 << 23;
  102. }
  103. if (mant >= 0xb504f3) { /* 2^0.5 * 2^23 */
  104. exp |= 0x400000; /* 0.5 * 2^23 */
  105. asm("mulhwu %0,%1,%2" : "=r" (mant)
  106. : "r" (mant), "r" (0xb504f334)); /* 2^-0.5 * 2^32 */
  107. }
  108. if (mant >= 0x9837f0) { /* 2^0.25 * 2^23 */
  109. exp |= 0x200000; /* 0.25 * 2^23 */
  110. asm("mulhwu %0,%1,%2" : "=r" (mant)
  111. : "r" (mant), "r" (0xd744fccb)); /* 2^-0.25 * 2^32 */
  112. }
  113. if (mant >= 0x8b95c2) { /* 2^0.125 * 2^23 */
  114. exp |= 0x100000; /* 0.125 * 2^23 */
  115. asm("mulhwu %0,%1,%2" : "=r" (mant)
  116. : "r" (mant), "r" (0xeac0c6e8)); /* 2^-0.125 * 2^32 */
  117. }
  118. if (mant > 0x800000) { /* 1.0 * 2^23 */
  119. /* calculate (mant - 1) * 1.381097463 */
  120. /* 1.381097463 == 0.125 / (2^0.125 - 1) */
  121. asm("mulhwu %0,%1,%2" : "=r" (frac)
  122. : "r" ((mant - 0x800000) << 1), "r" (0xb0c7cd3a));
  123. exp += frac;
  124. }
  125. s = exp & 0x80000000;
  126. if (exp != 0) {
  127. if (s)
  128. exp = -exp;
  129. asm("cntlzw %0,%1" : "=r" (lz) : "r" (exp));
  130. lz = 8 - lz;
  131. if (lz > 0)
  132. exp >>= lz;
  133. else if (lz < 0)
  134. exp <<= -lz;
  135. s += ((lz + 126) << 23) + exp;
  136. }
  137. return s;
  138. }
  139. #define VSCR_SAT 1
  140. static int ctsxs(unsigned int x, int scale, unsigned int *vscrp)
  141. {
  142. int exp, mant;
  143. exp = (x >> 23) & 0xff;
  144. mant = x & 0x7fffff;
  145. if (exp == 255 && mant != 0)
  146. return 0; /* NaN -> 0 */
  147. exp = exp - 127 + scale;
  148. if (exp < 0)
  149. return 0; /* round towards zero */
  150. if (exp >= 31) {
  151. /* saturate, unless the result would be -2^31 */
  152. if (x + (scale << 23) != 0xcf000000)
  153. *vscrp |= VSCR_SAT;
  154. return (x & 0x80000000)? 0x80000000: 0x7fffffff;
  155. }
  156. mant |= 0x800000;
  157. mant = (mant << 7) >> (30 - exp);
  158. return (x & 0x80000000)? -mant: mant;
  159. }
  160. static unsigned int ctuxs(unsigned int x, int scale, unsigned int *vscrp)
  161. {
  162. int exp;
  163. unsigned int mant;
  164. exp = (x >> 23) & 0xff;
  165. mant = x & 0x7fffff;
  166. if (exp == 255 && mant != 0)
  167. return 0; /* NaN -> 0 */
  168. exp = exp - 127 + scale;
  169. if (exp < 0)
  170. return 0; /* round towards zero */
  171. if (x & 0x80000000) {
  172. /* negative => saturate to 0 */
  173. *vscrp |= VSCR_SAT;
  174. return 0;
  175. }
  176. if (exp >= 32) {
  177. /* saturate */
  178. *vscrp |= VSCR_SAT;
  179. return 0xffffffff;
  180. }
  181. mant |= 0x800000;
  182. mant = (mant << 8) >> (31 - exp);
  183. return mant;
  184. }
  185. /* Round to floating integer, towards 0 */
  186. static unsigned int rfiz(unsigned int x)
  187. {
  188. int exp;
  189. exp = ((x >> 23) & 0xff) - 127;
  190. if (exp == 128 && (x & 0x7fffff) != 0)
  191. return x | 0x400000; /* NaN -> make it a QNaN */
  192. if (exp >= 23)
  193. return x; /* it's an integer already (or Inf) */
  194. if (exp < 0)
  195. return x & 0x80000000; /* |x| < 1.0 rounds to 0 */
  196. return x & ~(0x7fffff >> exp);
  197. }
  198. /* Round to floating integer, towards +/- Inf */
  199. static unsigned int rfii(unsigned int x)
  200. {
  201. int exp, mask;
  202. exp = ((x >> 23) & 0xff) - 127;
  203. if (exp == 128 && (x & 0x7fffff) != 0)
  204. return x | 0x400000; /* NaN -> make it a QNaN */
  205. if (exp >= 23)
  206. return x; /* it's an integer already (or Inf) */
  207. if ((x & 0x7fffffff) == 0)
  208. return x; /* +/-0 -> +/-0 */
  209. if (exp < 0)
  210. /* 0 < |x| < 1.0 rounds to +/- 1.0 */
  211. return (x & 0x80000000) | 0x3f800000;
  212. mask = 0x7fffff >> exp;
  213. /* mantissa overflows into exponent - that's OK,
  214. it can't overflow into the sign bit */
  215. return (x + mask) & ~mask;
  216. }
  217. /* Round to floating integer, to nearest */
  218. static unsigned int rfin(unsigned int x)
  219. {
  220. int exp, half;
  221. exp = ((x >> 23) & 0xff) - 127;
  222. if (exp == 128 && (x & 0x7fffff) != 0)
  223. return x | 0x400000; /* NaN -> make it a QNaN */
  224. if (exp >= 23)
  225. return x; /* it's an integer already (or Inf) */
  226. if (exp < -1)
  227. return x & 0x80000000; /* |x| < 0.5 -> +/-0 */
  228. if (exp == -1)
  229. /* 0.5 <= |x| < 1.0 rounds to +/- 1.0 */
  230. return (x & 0x80000000) | 0x3f800000;
  231. half = 0x400000 >> exp;
  232. /* add 0.5 to the magnitude and chop off the fraction bits */
  233. return (x + half) & ~(0x7fffff >> exp);
  234. }
  235. int emulate_altivec(struct pt_regs *regs)
  236. {
  237. unsigned int instr, i;
  238. unsigned int va, vb, vc, vd;
  239. vector128 *vrs;
  240. if (get_user(instr, (unsigned int __user *) regs->nip))
  241. return -EFAULT;
  242. if ((instr >> 26) != 4)
  243. return -EINVAL; /* not an altivec instruction */
  244. vd = (instr >> 21) & 0x1f;
  245. va = (instr >> 16) & 0x1f;
  246. vb = (instr >> 11) & 0x1f;
  247. vc = (instr >> 6) & 0x1f;
  248. vrs = current->thread.vr_state.vr;
  249. switch (instr & 0x3f) {
  250. case 10:
  251. switch (vc) {
  252. case 0: /* vaddfp */
  253. vaddfp(&vrs[vd], &vrs[va], &vrs[vb]);
  254. break;
  255. case 1: /* vsubfp */
  256. vsubfp(&vrs[vd], &vrs[va], &vrs[vb]);
  257. break;
  258. case 4: /* vrefp */
  259. vrefp(&vrs[vd], &vrs[vb]);
  260. break;
  261. case 5: /* vrsqrtefp */
  262. vrsqrtefp(&vrs[vd], &vrs[vb]);
  263. break;
  264. case 6: /* vexptefp */
  265. for (i = 0; i < 4; ++i)
  266. vrs[vd].u[i] = eexp2(vrs[vb].u[i]);
  267. break;
  268. case 7: /* vlogefp */
  269. for (i = 0; i < 4; ++i)
  270. vrs[vd].u[i] = elog2(vrs[vb].u[i]);
  271. break;
  272. case 8: /* vrfin */
  273. for (i = 0; i < 4; ++i)
  274. vrs[vd].u[i] = rfin(vrs[vb].u[i]);
  275. break;
  276. case 9: /* vrfiz */
  277. for (i = 0; i < 4; ++i)
  278. vrs[vd].u[i] = rfiz(vrs[vb].u[i]);
  279. break;
  280. case 10: /* vrfip */
  281. for (i = 0; i < 4; ++i) {
  282. u32 x = vrs[vb].u[i];
  283. x = (x & 0x80000000)? rfiz(x): rfii(x);
  284. vrs[vd].u[i] = x;
  285. }
  286. break;
  287. case 11: /* vrfim */
  288. for (i = 0; i < 4; ++i) {
  289. u32 x = vrs[vb].u[i];
  290. x = (x & 0x80000000)? rfii(x): rfiz(x);
  291. vrs[vd].u[i] = x;
  292. }
  293. break;
  294. case 14: /* vctuxs */
  295. for (i = 0; i < 4; ++i)
  296. vrs[vd].u[i] = ctuxs(vrs[vb].u[i], va,
  297. &current->thread.vr_state.vscr.u[3]);
  298. break;
  299. case 15: /* vctsxs */
  300. for (i = 0; i < 4; ++i)
  301. vrs[vd].u[i] = ctsxs(vrs[vb].u[i], va,
  302. &current->thread.vr_state.vscr.u[3]);
  303. break;
  304. default:
  305. return -EINVAL;
  306. }
  307. break;
  308. case 46: /* vmaddfp */
  309. vmaddfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  310. break;
  311. case 47: /* vnmsubfp */
  312. vnmsubfp(&vrs[vd], &vrs[va], &vrs[vb], &vrs[vc]);
  313. break;
  314. default:
  315. return -EINVAL;
  316. }
  317. return 0;
  318. }