vicodec-codec.c 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803
  1. // SPDX-License-Identifier: GPL-2.0+
  2. /*
  3. * Copyright 2016 Tom aan de Wiel
  4. * Copyright 2018 Cisco Systems, Inc. and/or its affiliates. All rights reserved.
  5. *
  6. * 8x8 Fast Walsh Hadamard Transform in sequency order based on the paper:
  7. *
  8. * A Recursive Algorithm for Sequency-Ordered Fast Walsh Transforms,
  9. * R.D. Brown, 1977
  10. */
  11. #include <linux/string.h>
  12. #include "vicodec-codec.h"
  13. #define ALL_ZEROS 15
  14. #define DEADZONE_WIDTH 20
  15. static const uint8_t zigzag[64] = {
  16. 0,
  17. 1, 8,
  18. 2, 9, 16,
  19. 3, 10, 17, 24,
  20. 4, 11, 18, 25, 32,
  21. 5, 12, 19, 26, 33, 40,
  22. 6, 13, 20, 27, 34, 41, 48,
  23. 7, 14, 21, 28, 35, 42, 49, 56,
  24. 15, 22, 29, 36, 43, 50, 57,
  25. 23, 30, 37, 44, 51, 58,
  26. 31, 38, 45, 52, 59,
  27. 39, 46, 53, 60,
  28. 47, 54, 61,
  29. 55, 62,
  30. 63,
  31. };
  32. static int rlc(const s16 *in, __be16 *output, int blocktype)
  33. {
  34. s16 block[8 * 8];
  35. s16 *wp = block;
  36. int i = 0;
  37. int x, y;
  38. int ret = 0;
  39. /* read in block from framebuffer */
  40. int lastzero_run = 0;
  41. int to_encode;
  42. for (y = 0; y < 8; y++) {
  43. for (x = 0; x < 8; x++) {
  44. *wp = in[x + y * 8];
  45. wp++;
  46. }
  47. }
  48. /* keep track of amount of trailing zeros */
  49. for (i = 63; i >= 0 && !block[zigzag[i]]; i--)
  50. lastzero_run++;
  51. *output++ = (blocktype == PBLOCK ? htons(PFRAME_BIT) : 0);
  52. ret++;
  53. to_encode = 8 * 8 - (lastzero_run > 14 ? lastzero_run : 0);
  54. i = 0;
  55. while (i < to_encode) {
  56. int cnt = 0;
  57. int tmp;
  58. /* count leading zeros */
  59. while ((tmp = block[zigzag[i]]) == 0 && cnt < 14) {
  60. cnt++;
  61. i++;
  62. if (i == to_encode) {
  63. cnt--;
  64. break;
  65. }
  66. }
  67. /* 4 bits for run, 12 for coefficient (quantization by 4) */
  68. *output++ = htons((cnt | tmp << 4));
  69. i++;
  70. ret++;
  71. }
  72. if (lastzero_run > 14) {
  73. *output = htons(ALL_ZEROS | 0);
  74. ret++;
  75. }
  76. return ret;
  77. }
  78. /*
  79. * This function will worst-case increase rlc_in by 65*2 bytes:
  80. * one s16 value for the header and 8 * 8 coefficients of type s16.
  81. */
  82. static s16 derlc(const __be16 **rlc_in, s16 *dwht_out)
  83. {
  84. /* header */
  85. const __be16 *input = *rlc_in;
  86. s16 ret = ntohs(*input++);
  87. int dec_count = 0;
  88. s16 block[8 * 8 + 16];
  89. s16 *wp = block;
  90. int i;
  91. /*
  92. * Now de-compress, it expands one byte to up to 15 bytes
  93. * (or fills the remainder of the 64 bytes with zeroes if it
  94. * is the last byte to expand).
  95. *
  96. * So block has to be 8 * 8 + 16 bytes, the '+ 16' is to
  97. * allow for overflow if the incoming data was malformed.
  98. */
  99. while (dec_count < 8 * 8) {
  100. s16 in = ntohs(*input++);
  101. int length = in & 0xf;
  102. int coeff = in >> 4;
  103. /* fill remainder with zeros */
  104. if (length == 15) {
  105. for (i = 0; i < 64 - dec_count; i++)
  106. *wp++ = 0;
  107. break;
  108. }
  109. for (i = 0; i < length; i++)
  110. *wp++ = 0;
  111. *wp++ = coeff;
  112. dec_count += length + 1;
  113. }
  114. wp = block;
  115. for (i = 0; i < 64; i++) {
  116. int pos = zigzag[i];
  117. int y = pos / 8;
  118. int x = pos % 8;
  119. dwht_out[x + y * 8] = *wp++;
  120. }
  121. *rlc_in = input;
  122. return ret;
  123. }
  124. static const int quant_table[] = {
  125. 2, 2, 2, 2, 2, 2, 2, 2,
  126. 2, 2, 2, 2, 2, 2, 2, 2,
  127. 2, 2, 2, 2, 2, 2, 2, 3,
  128. 2, 2, 2, 2, 2, 2, 3, 6,
  129. 2, 2, 2, 2, 2, 3, 6, 6,
  130. 2, 2, 2, 2, 3, 6, 6, 6,
  131. 2, 2, 2, 3, 6, 6, 6, 6,
  132. 2, 2, 3, 6, 6, 6, 6, 8,
  133. };
  134. static const int quant_table_p[] = {
  135. 3, 3, 3, 3, 3, 3, 3, 3,
  136. 3, 3, 3, 3, 3, 3, 3, 3,
  137. 3, 3, 3, 3, 3, 3, 3, 3,
  138. 3, 3, 3, 3, 3, 3, 3, 6,
  139. 3, 3, 3, 3, 3, 3, 6, 6,
  140. 3, 3, 3, 3, 3, 6, 6, 9,
  141. 3, 3, 3, 3, 6, 6, 9, 9,
  142. 3, 3, 3, 6, 6, 9, 9, 10,
  143. };
  144. static void quantize_intra(s16 *coeff, s16 *de_coeff)
  145. {
  146. const int *quant = quant_table;
  147. int i, j;
  148. for (j = 0; j < 8; j++) {
  149. for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
  150. *coeff >>= *quant;
  151. if (*coeff >= -DEADZONE_WIDTH &&
  152. *coeff <= DEADZONE_WIDTH)
  153. *coeff = *de_coeff = 0;
  154. else
  155. *de_coeff = *coeff << *quant;
  156. }
  157. }
  158. }
  159. static void dequantize_intra(s16 *coeff)
  160. {
  161. const int *quant = quant_table;
  162. int i, j;
  163. for (j = 0; j < 8; j++)
  164. for (i = 0; i < 8; i++, quant++, coeff++)
  165. *coeff <<= *quant;
  166. }
  167. static void quantize_inter(s16 *coeff, s16 *de_coeff)
  168. {
  169. const int *quant = quant_table_p;
  170. int i, j;
  171. for (j = 0; j < 8; j++) {
  172. for (i = 0; i < 8; i++, quant++, coeff++, de_coeff++) {
  173. *coeff >>= *quant;
  174. if (*coeff >= -DEADZONE_WIDTH &&
  175. *coeff <= DEADZONE_WIDTH)
  176. *coeff = *de_coeff = 0;
  177. else
  178. *de_coeff = *coeff << *quant;
  179. }
  180. }
  181. }
  182. static void dequantize_inter(s16 *coeff)
  183. {
  184. const int *quant = quant_table_p;
  185. int i, j;
  186. for (j = 0; j < 8; j++)
  187. for (i = 0; i < 8; i++, quant++, coeff++)
  188. *coeff <<= *quant;
  189. }
  190. static void fwht(const u8 *block, s16 *output_block, unsigned int stride,
  191. unsigned int input_step, bool intra)
  192. {
  193. /* we'll need more than 8 bits for the transformed coefficients */
  194. s32 workspace1[8], workspace2[8];
  195. const u8 *tmp = block;
  196. s16 *out = output_block;
  197. int add = intra ? 256 : 0;
  198. unsigned int i;
  199. /* stage 1 */
  200. stride *= input_step;
  201. for (i = 0; i < 8; i++, tmp += stride, out += 8) {
  202. if (input_step == 1) {
  203. workspace1[0] = tmp[0] + tmp[1] - add;
  204. workspace1[1] = tmp[0] - tmp[1];
  205. workspace1[2] = tmp[2] + tmp[3] - add;
  206. workspace1[3] = tmp[2] - tmp[3];
  207. workspace1[4] = tmp[4] + tmp[5] - add;
  208. workspace1[5] = tmp[4] - tmp[5];
  209. workspace1[6] = tmp[6] + tmp[7] - add;
  210. workspace1[7] = tmp[6] - tmp[7];
  211. } else {
  212. workspace1[0] = tmp[0] + tmp[2] - add;
  213. workspace1[1] = tmp[0] - tmp[2];
  214. workspace1[2] = tmp[4] + tmp[6] - add;
  215. workspace1[3] = tmp[4] - tmp[6];
  216. workspace1[4] = tmp[8] + tmp[10] - add;
  217. workspace1[5] = tmp[8] - tmp[10];
  218. workspace1[6] = tmp[12] + tmp[14] - add;
  219. workspace1[7] = tmp[12] - tmp[14];
  220. }
  221. /* stage 2 */
  222. workspace2[0] = workspace1[0] + workspace1[2];
  223. workspace2[1] = workspace1[0] - workspace1[2];
  224. workspace2[2] = workspace1[1] - workspace1[3];
  225. workspace2[3] = workspace1[1] + workspace1[3];
  226. workspace2[4] = workspace1[4] + workspace1[6];
  227. workspace2[5] = workspace1[4] - workspace1[6];
  228. workspace2[6] = workspace1[5] - workspace1[7];
  229. workspace2[7] = workspace1[5] + workspace1[7];
  230. /* stage 3 */
  231. out[0] = workspace2[0] + workspace2[4];
  232. out[1] = workspace2[0] - workspace2[4];
  233. out[2] = workspace2[1] - workspace2[5];
  234. out[3] = workspace2[1] + workspace2[5];
  235. out[4] = workspace2[2] + workspace2[6];
  236. out[5] = workspace2[2] - workspace2[6];
  237. out[6] = workspace2[3] - workspace2[7];
  238. out[7] = workspace2[3] + workspace2[7];
  239. }
  240. out = output_block;
  241. for (i = 0; i < 8; i++, out++) {
  242. /* stage 1 */
  243. workspace1[0] = out[0] + out[1 * 8];
  244. workspace1[1] = out[0] - out[1 * 8];
  245. workspace1[2] = out[2 * 8] + out[3 * 8];
  246. workspace1[3] = out[2 * 8] - out[3 * 8];
  247. workspace1[4] = out[4 * 8] + out[5 * 8];
  248. workspace1[5] = out[4 * 8] - out[5 * 8];
  249. workspace1[6] = out[6 * 8] + out[7 * 8];
  250. workspace1[7] = out[6 * 8] - out[7 * 8];
  251. /* stage 2 */
  252. workspace2[0] = workspace1[0] + workspace1[2];
  253. workspace2[1] = workspace1[0] - workspace1[2];
  254. workspace2[2] = workspace1[1] - workspace1[3];
  255. workspace2[3] = workspace1[1] + workspace1[3];
  256. workspace2[4] = workspace1[4] + workspace1[6];
  257. workspace2[5] = workspace1[4] - workspace1[6];
  258. workspace2[6] = workspace1[5] - workspace1[7];
  259. workspace2[7] = workspace1[5] + workspace1[7];
  260. /* stage 3 */
  261. out[0 * 8] = workspace2[0] + workspace2[4];
  262. out[1 * 8] = workspace2[0] - workspace2[4];
  263. out[2 * 8] = workspace2[1] - workspace2[5];
  264. out[3 * 8] = workspace2[1] + workspace2[5];
  265. out[4 * 8] = workspace2[2] + workspace2[6];
  266. out[5 * 8] = workspace2[2] - workspace2[6];
  267. out[6 * 8] = workspace2[3] - workspace2[7];
  268. out[7 * 8] = workspace2[3] + workspace2[7];
  269. }
  270. }
  271. /*
  272. * Not the nicest way of doing it, but P-blocks get twice the range of
  273. * that of the I-blocks. Therefore we need a type bigger than 8 bits.
  274. * Furthermore values can be negative... This is just a version that
  275. * works with 16 signed data
  276. */
  277. static void fwht16(const s16 *block, s16 *output_block, int stride, int intra)
  278. {
  279. /* we'll need more than 8 bits for the transformed coefficients */
  280. s32 workspace1[8], workspace2[8];
  281. const s16 *tmp = block;
  282. s16 *out = output_block;
  283. int i;
  284. for (i = 0; i < 8; i++, tmp += stride, out += 8) {
  285. /* stage 1 */
  286. workspace1[0] = tmp[0] + tmp[1];
  287. workspace1[1] = tmp[0] - tmp[1];
  288. workspace1[2] = tmp[2] + tmp[3];
  289. workspace1[3] = tmp[2] - tmp[3];
  290. workspace1[4] = tmp[4] + tmp[5];
  291. workspace1[5] = tmp[4] - tmp[5];
  292. workspace1[6] = tmp[6] + tmp[7];
  293. workspace1[7] = tmp[6] - tmp[7];
  294. /* stage 2 */
  295. workspace2[0] = workspace1[0] + workspace1[2];
  296. workspace2[1] = workspace1[0] - workspace1[2];
  297. workspace2[2] = workspace1[1] - workspace1[3];
  298. workspace2[3] = workspace1[1] + workspace1[3];
  299. workspace2[4] = workspace1[4] + workspace1[6];
  300. workspace2[5] = workspace1[4] - workspace1[6];
  301. workspace2[6] = workspace1[5] - workspace1[7];
  302. workspace2[7] = workspace1[5] + workspace1[7];
  303. /* stage 3 */
  304. out[0] = workspace2[0] + workspace2[4];
  305. out[1] = workspace2[0] - workspace2[4];
  306. out[2] = workspace2[1] - workspace2[5];
  307. out[3] = workspace2[1] + workspace2[5];
  308. out[4] = workspace2[2] + workspace2[6];
  309. out[5] = workspace2[2] - workspace2[6];
  310. out[6] = workspace2[3] - workspace2[7];
  311. out[7] = workspace2[3] + workspace2[7];
  312. }
  313. out = output_block;
  314. for (i = 0; i < 8; i++, out++) {
  315. /* stage 1 */
  316. workspace1[0] = out[0] + out[1*8];
  317. workspace1[1] = out[0] - out[1*8];
  318. workspace1[2] = out[2*8] + out[3*8];
  319. workspace1[3] = out[2*8] - out[3*8];
  320. workspace1[4] = out[4*8] + out[5*8];
  321. workspace1[5] = out[4*8] - out[5*8];
  322. workspace1[6] = out[6*8] + out[7*8];
  323. workspace1[7] = out[6*8] - out[7*8];
  324. /* stage 2 */
  325. workspace2[0] = workspace1[0] + workspace1[2];
  326. workspace2[1] = workspace1[0] - workspace1[2];
  327. workspace2[2] = workspace1[1] - workspace1[3];
  328. workspace2[3] = workspace1[1] + workspace1[3];
  329. workspace2[4] = workspace1[4] + workspace1[6];
  330. workspace2[5] = workspace1[4] - workspace1[6];
  331. workspace2[6] = workspace1[5] - workspace1[7];
  332. workspace2[7] = workspace1[5] + workspace1[7];
  333. /* stage 3 */
  334. out[0*8] = workspace2[0] + workspace2[4];
  335. out[1*8] = workspace2[0] - workspace2[4];
  336. out[2*8] = workspace2[1] - workspace2[5];
  337. out[3*8] = workspace2[1] + workspace2[5];
  338. out[4*8] = workspace2[2] + workspace2[6];
  339. out[5*8] = workspace2[2] - workspace2[6];
  340. out[6*8] = workspace2[3] - workspace2[7];
  341. out[7*8] = workspace2[3] + workspace2[7];
  342. }
  343. }
  344. static void ifwht(const s16 *block, s16 *output_block, int intra)
  345. {
  346. /*
  347. * we'll need more than 8 bits for the transformed coefficients
  348. * use native unit of cpu
  349. */
  350. int workspace1[8], workspace2[8];
  351. int inter = intra ? 0 : 1;
  352. const s16 *tmp = block;
  353. s16 *out = output_block;
  354. int i;
  355. for (i = 0; i < 8; i++, tmp += 8, out += 8) {
  356. /* stage 1 */
  357. workspace1[0] = tmp[0] + tmp[1];
  358. workspace1[1] = tmp[0] - tmp[1];
  359. workspace1[2] = tmp[2] + tmp[3];
  360. workspace1[3] = tmp[2] - tmp[3];
  361. workspace1[4] = tmp[4] + tmp[5];
  362. workspace1[5] = tmp[4] - tmp[5];
  363. workspace1[6] = tmp[6] + tmp[7];
  364. workspace1[7] = tmp[6] - tmp[7];
  365. /* stage 2 */
  366. workspace2[0] = workspace1[0] + workspace1[2];
  367. workspace2[1] = workspace1[0] - workspace1[2];
  368. workspace2[2] = workspace1[1] - workspace1[3];
  369. workspace2[3] = workspace1[1] + workspace1[3];
  370. workspace2[4] = workspace1[4] + workspace1[6];
  371. workspace2[5] = workspace1[4] - workspace1[6];
  372. workspace2[6] = workspace1[5] - workspace1[7];
  373. workspace2[7] = workspace1[5] + workspace1[7];
  374. /* stage 3 */
  375. out[0] = workspace2[0] + workspace2[4];
  376. out[1] = workspace2[0] - workspace2[4];
  377. out[2] = workspace2[1] - workspace2[5];
  378. out[3] = workspace2[1] + workspace2[5];
  379. out[4] = workspace2[2] + workspace2[6];
  380. out[5] = workspace2[2] - workspace2[6];
  381. out[6] = workspace2[3] - workspace2[7];
  382. out[7] = workspace2[3] + workspace2[7];
  383. }
  384. out = output_block;
  385. for (i = 0; i < 8; i++, out++) {
  386. /* stage 1 */
  387. workspace1[0] = out[0] + out[1 * 8];
  388. workspace1[1] = out[0] - out[1 * 8];
  389. workspace1[2] = out[2 * 8] + out[3 * 8];
  390. workspace1[3] = out[2 * 8] - out[3 * 8];
  391. workspace1[4] = out[4 * 8] + out[5 * 8];
  392. workspace1[5] = out[4 * 8] - out[5 * 8];
  393. workspace1[6] = out[6 * 8] + out[7 * 8];
  394. workspace1[7] = out[6 * 8] - out[7 * 8];
  395. /* stage 2 */
  396. workspace2[0] = workspace1[0] + workspace1[2];
  397. workspace2[1] = workspace1[0] - workspace1[2];
  398. workspace2[2] = workspace1[1] - workspace1[3];
  399. workspace2[3] = workspace1[1] + workspace1[3];
  400. workspace2[4] = workspace1[4] + workspace1[6];
  401. workspace2[5] = workspace1[4] - workspace1[6];
  402. workspace2[6] = workspace1[5] - workspace1[7];
  403. workspace2[7] = workspace1[5] + workspace1[7];
  404. /* stage 3 */
  405. if (inter) {
  406. int d;
  407. out[0 * 8] = workspace2[0] + workspace2[4];
  408. out[1 * 8] = workspace2[0] - workspace2[4];
  409. out[2 * 8] = workspace2[1] - workspace2[5];
  410. out[3 * 8] = workspace2[1] + workspace2[5];
  411. out[4 * 8] = workspace2[2] + workspace2[6];
  412. out[5 * 8] = workspace2[2] - workspace2[6];
  413. out[6 * 8] = workspace2[3] - workspace2[7];
  414. out[7 * 8] = workspace2[3] + workspace2[7];
  415. for (d = 0; d < 8; d++)
  416. out[8 * d] >>= 6;
  417. } else {
  418. int d;
  419. out[0 * 8] = workspace2[0] + workspace2[4];
  420. out[1 * 8] = workspace2[0] - workspace2[4];
  421. out[2 * 8] = workspace2[1] - workspace2[5];
  422. out[3 * 8] = workspace2[1] + workspace2[5];
  423. out[4 * 8] = workspace2[2] + workspace2[6];
  424. out[5 * 8] = workspace2[2] - workspace2[6];
  425. out[6 * 8] = workspace2[3] - workspace2[7];
  426. out[7 * 8] = workspace2[3] + workspace2[7];
  427. for (d = 0; d < 8; d++) {
  428. out[8 * d] >>= 6;
  429. out[8 * d] += 128;
  430. }
  431. }
  432. }
  433. }
  434. static void fill_encoder_block(const u8 *input, s16 *dst,
  435. unsigned int stride, unsigned int input_step)
  436. {
  437. int i, j;
  438. for (i = 0; i < 8; i++) {
  439. for (j = 0; j < 8; j++, input += input_step)
  440. *dst++ = *input;
  441. input += (stride - 8) * input_step;
  442. }
  443. }
  444. static int var_intra(const s16 *input)
  445. {
  446. int32_t mean = 0;
  447. int32_t ret = 0;
  448. const s16 *tmp = input;
  449. int i;
  450. for (i = 0; i < 8 * 8; i++, tmp++)
  451. mean += *tmp;
  452. mean /= 64;
  453. tmp = input;
  454. for (i = 0; i < 8 * 8; i++, tmp++)
  455. ret += (*tmp - mean) < 0 ? -(*tmp - mean) : (*tmp - mean);
  456. return ret;
  457. }
  458. static int var_inter(const s16 *old, const s16 *new)
  459. {
  460. int32_t ret = 0;
  461. int i;
  462. for (i = 0; i < 8 * 8; i++, old++, new++)
  463. ret += (*old - *new) < 0 ? -(*old - *new) : (*old - *new);
  464. return ret;
  465. }
  466. static int decide_blocktype(const u8 *cur, const u8 *reference,
  467. s16 *deltablock, unsigned int stride,
  468. unsigned int input_step)
  469. {
  470. s16 tmp[64];
  471. s16 old[64];
  472. s16 *work = tmp;
  473. unsigned int k, l;
  474. int vari;
  475. int vard;
  476. fill_encoder_block(cur, tmp, stride, input_step);
  477. fill_encoder_block(reference, old, 8, 1);
  478. vari = var_intra(tmp);
  479. for (k = 0; k < 8; k++) {
  480. for (l = 0; l < 8; l++) {
  481. *deltablock = *work - *reference;
  482. deltablock++;
  483. work++;
  484. reference++;
  485. }
  486. }
  487. deltablock -= 64;
  488. vard = var_inter(old, tmp);
  489. return vari <= vard ? IBLOCK : PBLOCK;
  490. }
  491. static void fill_decoder_block(u8 *dst, const s16 *input, int stride)
  492. {
  493. int i, j;
  494. for (i = 0; i < 8; i++) {
  495. for (j = 0; j < 8; j++, input++, dst++) {
  496. if (*input < 0)
  497. *dst = 0;
  498. else if (*input > 255)
  499. *dst = 255;
  500. else
  501. *dst = *input;
  502. }
  503. dst += stride - 8;
  504. }
  505. }
  506. static void add_deltas(s16 *deltas, const u8 *ref, int stride)
  507. {
  508. int k, l;
  509. for (k = 0; k < 8; k++) {
  510. for (l = 0; l < 8; l++) {
  511. *deltas += *ref++;
  512. /*
  513. * Due to quantizing, it might possible that the
  514. * decoded coefficients are slightly out of range
  515. */
  516. if (*deltas < 0)
  517. *deltas = 0;
  518. else if (*deltas > 255)
  519. *deltas = 255;
  520. deltas++;
  521. }
  522. ref += stride - 8;
  523. }
  524. }
  525. static u32 encode_plane(u8 *input, u8 *refp, __be16 **rlco, __be16 *rlco_max,
  526. struct cframe *cf, u32 height, u32 width,
  527. unsigned int input_step,
  528. bool is_intra, bool next_is_intra)
  529. {
  530. u8 *input_start = input;
  531. __be16 *rlco_start = *rlco;
  532. s16 deltablock[64];
  533. __be16 pframe_bit = htons(PFRAME_BIT);
  534. u32 encoding = 0;
  535. unsigned int last_size = 0;
  536. unsigned int i, j;
  537. for (j = 0; j < height / 8; j++) {
  538. for (i = 0; i < width / 8; i++) {
  539. /* intra code, first frame is always intra coded. */
  540. int blocktype = IBLOCK;
  541. unsigned int size;
  542. if (!is_intra)
  543. blocktype = decide_blocktype(input, refp,
  544. deltablock, width, input_step);
  545. if (is_intra || blocktype == IBLOCK) {
  546. fwht(input, cf->coeffs, width, input_step, 1);
  547. quantize_intra(cf->coeffs, cf->de_coeffs);
  548. blocktype = IBLOCK;
  549. } else {
  550. /* inter code */
  551. encoding |= FRAME_PCODED;
  552. fwht16(deltablock, cf->coeffs, 8, 0);
  553. quantize_inter(cf->coeffs, cf->de_coeffs);
  554. }
  555. if (!next_is_intra) {
  556. ifwht(cf->de_coeffs, cf->de_fwht, blocktype);
  557. if (blocktype == PBLOCK)
  558. add_deltas(cf->de_fwht, refp, 8);
  559. fill_decoder_block(refp, cf->de_fwht, 8);
  560. }
  561. input += 8 * input_step;
  562. refp += 8 * 8;
  563. if (encoding & FRAME_UNENCODED)
  564. continue;
  565. size = rlc(cf->coeffs, *rlco, blocktype);
  566. if (last_size == size &&
  567. !memcmp(*rlco + 1, *rlco - size + 1, 2 * size - 2)) {
  568. __be16 *last_rlco = *rlco - size;
  569. s16 hdr = ntohs(*last_rlco);
  570. if (!((*last_rlco ^ **rlco) & pframe_bit) &&
  571. (hdr & DUPS_MASK) < DUPS_MASK)
  572. *last_rlco = htons(hdr + 2);
  573. else
  574. *rlco += size;
  575. } else {
  576. *rlco += size;
  577. }
  578. if (*rlco >= rlco_max)
  579. encoding |= FRAME_UNENCODED;
  580. last_size = size;
  581. }
  582. input += width * 7 * input_step;
  583. }
  584. if (encoding & FRAME_UNENCODED) {
  585. u8 *out = (u8 *)rlco_start;
  586. input = input_start;
  587. /*
  588. * The compressed stream should never contain the magic
  589. * header, so when we copy the YUV data we replace 0xff
  590. * by 0xfe. Since YUV is limited range such values
  591. * shouldn't appear anyway.
  592. */
  593. for (i = 0; i < height * width; i++, input += input_step)
  594. *out++ = (*input == 0xff) ? 0xfe : *input;
  595. *rlco = (__be16 *)out;
  596. }
  597. return encoding;
  598. }
  599. u32 encode_frame(struct raw_frame *frm, struct raw_frame *ref_frm,
  600. struct cframe *cf, bool is_intra, bool next_is_intra)
  601. {
  602. unsigned int size = frm->height * frm->width;
  603. __be16 *rlco = cf->rlc_data;
  604. __be16 *rlco_max;
  605. u32 encoding;
  606. rlco_max = rlco + size / 2 - 256;
  607. encoding = encode_plane(frm->luma, ref_frm->luma, &rlco, rlco_max, cf,
  608. frm->height, frm->width,
  609. 1, is_intra, next_is_intra);
  610. if (encoding & FRAME_UNENCODED)
  611. encoding |= LUMA_UNENCODED;
  612. encoding &= ~FRAME_UNENCODED;
  613. rlco_max = rlco + size / 8 - 256;
  614. encoding |= encode_plane(frm->cb, ref_frm->cb, &rlco, rlco_max, cf,
  615. frm->height / 2, frm->width / 2,
  616. frm->chroma_step, is_intra, next_is_intra);
  617. if (encoding & FRAME_UNENCODED)
  618. encoding |= CB_UNENCODED;
  619. encoding &= ~FRAME_UNENCODED;
  620. rlco_max = rlco + size / 8 - 256;
  621. encoding |= encode_plane(frm->cr, ref_frm->cr, &rlco, rlco_max, cf,
  622. frm->height / 2, frm->width / 2,
  623. frm->chroma_step, is_intra, next_is_intra);
  624. if (encoding & FRAME_UNENCODED)
  625. encoding |= CR_UNENCODED;
  626. encoding &= ~FRAME_UNENCODED;
  627. cf->size = (rlco - cf->rlc_data) * sizeof(*rlco);
  628. return encoding;
  629. }
  630. static void decode_plane(struct cframe *cf, const __be16 **rlco, u8 *ref,
  631. u32 height, u32 width, bool uncompressed)
  632. {
  633. unsigned int copies = 0;
  634. s16 copy[8 * 8];
  635. s16 stat;
  636. unsigned int i, j;
  637. if (uncompressed) {
  638. memcpy(ref, *rlco, width * height);
  639. *rlco += width * height / 2;
  640. return;
  641. }
  642. /*
  643. * When decoding each macroblock the rlco pointer will be increased
  644. * by 65 * 2 bytes worst-case.
  645. * To avoid overflow the buffer has to be 65/64th of the actual raw
  646. * image size, just in case someone feeds it malicious data.
  647. */
  648. for (j = 0; j < height / 8; j++) {
  649. for (i = 0; i < width / 8; i++) {
  650. u8 *refp = ref + j * 8 * width + i * 8;
  651. if (copies) {
  652. memcpy(cf->de_fwht, copy, sizeof(copy));
  653. if (stat & PFRAME_BIT)
  654. add_deltas(cf->de_fwht, refp, width);
  655. fill_decoder_block(refp, cf->de_fwht, width);
  656. copies--;
  657. continue;
  658. }
  659. stat = derlc(rlco, cf->coeffs);
  660. if (stat & PFRAME_BIT)
  661. dequantize_inter(cf->coeffs);
  662. else
  663. dequantize_intra(cf->coeffs);
  664. ifwht(cf->coeffs, cf->de_fwht,
  665. (stat & PFRAME_BIT) ? 0 : 1);
  666. copies = (stat & DUPS_MASK) >> 1;
  667. if (copies)
  668. memcpy(copy, cf->de_fwht, sizeof(copy));
  669. if (stat & PFRAME_BIT)
  670. add_deltas(cf->de_fwht, refp, width);
  671. fill_decoder_block(refp, cf->de_fwht, width);
  672. }
  673. }
  674. }
  675. void decode_frame(struct cframe *cf, struct raw_frame *ref, u32 hdr_flags)
  676. {
  677. const __be16 *rlco = cf->rlc_data;
  678. decode_plane(cf, &rlco, ref->luma, cf->height, cf->width,
  679. hdr_flags & VICODEC_FL_LUMA_IS_UNCOMPRESSED);
  680. decode_plane(cf, &rlco, ref->cb, cf->height / 2, cf->width / 2,
  681. hdr_flags & VICODEC_FL_CB_IS_UNCOMPRESSED);
  682. decode_plane(cf, &rlco, ref->cr, cf->height / 2, cf->width / 2,
  683. hdr_flags & VICODEC_FL_CR_IS_UNCOMPRESSED);
  684. }