lz4.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535
  1. // SPDX-License-Identifier: GPL 2.0+ OR BSD-2-Clause
  2. /*
  3. * LZ4 - Fast LZ compression algorithm
  4. * Copyright (C) 2011 - 2016, Yann Collet.
  5. * BSD 2 - Clause License (http://www.opensource.org/licenses/bsd - license.php)
  6. * Redistribution and use in source and binary forms, with or without
  7. * modification, are permitted provided that the following conditions are
  8. * met:
  9. * * Redistributions of source code must retain the above copyright
  10. * notice, this list of conditions and the following disclaimer.
  11. * * Redistributions in binary form must reproduce the above
  12. * copyright notice, this list of conditions and the following disclaimer
  13. * in the documentation and/or other materials provided with the
  14. * distribution.
  15. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  16. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  17. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  18. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  19. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  20. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  21. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  22. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  23. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  24. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  25. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  26. * You can contact the author at :
  27. * - LZ4 homepage : http://www.lz4.org
  28. * - LZ4 source repository : https://github.com/lz4/lz4
  29. */
  30. #include <common.h>
  31. #include <compiler.h>
  32. #include <linux/kernel.h>
  33. #include <linux/types.h>
  34. #include <linux/bug.h>
  35. #include <asm/unaligned.h>
  36. #include <u-boot/lz4.h>
  37. #define FORCE_INLINE inline __attribute__((always_inline))
  38. static FORCE_INLINE u16 LZ4_readLE16(const void *src)
  39. {
  40. return get_unaligned_le16(src);
  41. }
  42. static FORCE_INLINE void LZ4_copy8(void *dst, const void *src)
  43. {
  44. put_unaligned(get_unaligned((const u64 *)src), (u64 *)dst);
  45. }
  46. typedef uint8_t BYTE;
  47. typedef uint16_t U16;
  48. typedef uint32_t U32;
  49. typedef int32_t S32;
  50. typedef uint64_t U64;
  51. typedef uintptr_t uptrval;
  52. static FORCE_INLINE void LZ4_write32(void *memPtr, U32 value)
  53. {
  54. put_unaligned(value, (U32 *)memPtr);
  55. }
  56. /**************************************
  57. * Reading and writing into memory
  58. **************************************/
  59. /* customized version of memcpy, which may overwrite up to 7 bytes beyond dstEnd */
  60. static void LZ4_wildCopy(void* dstPtr, const void* srcPtr, void* dstEnd)
  61. {
  62. BYTE* d = (BYTE*)dstPtr;
  63. const BYTE* s = (const BYTE*)srcPtr;
  64. BYTE* e = (BYTE*)dstEnd;
  65. do { LZ4_copy8(d,s); d+=8; s+=8; } while (d<e);
  66. }
  67. /**************************************
  68. * Common Constants
  69. **************************************/
  70. #define MINMATCH 4
  71. #define WILDCOPYLENGTH 8
  72. #define LASTLITERALS 5
  73. #define MFLIMIT (WILDCOPYLENGTH + MINMATCH)
  74. /*
  75. * ensure it's possible to write 2 x wildcopyLength
  76. * without overflowing output buffer
  77. */
  78. #define MATCH_SAFEGUARD_DISTANCE ((2 * WILDCOPYLENGTH) - MINMATCH)
  79. #define KB (1 <<10)
  80. #define MAXD_LOG 16
  81. #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
  82. #define ML_BITS 4
  83. #define ML_MASK ((1U<<ML_BITS)-1)
  84. #define RUN_BITS (8-ML_BITS)
  85. #define RUN_MASK ((1U<<RUN_BITS)-1)
  86. #define LZ4_STATIC_ASSERT(c) BUILD_BUG_ON(!(c))
  87. /**************************************
  88. * Local Structures and types
  89. **************************************/
  90. typedef enum { noDict = 0, withPrefix64k, usingExtDict } dict_directive;
  91. typedef enum { endOnOutputSize = 0, endOnInputSize = 1 } endCondition_directive;
  92. typedef enum { decode_full_block = 0, partial_decode = 1 } earlyEnd_directive;
  93. #define DEBUGLOG(l, ...) {} /* disabled */
  94. #ifndef assert
  95. #define assert(condition) ((void)0)
  96. #endif
  97. /*
  98. * LZ4_decompress_generic() :
  99. * This generic decompression function covers all use cases.
  100. * It shall be instantiated several times, using different sets of directives.
  101. * Note that it is important for performance that this function really get inlined,
  102. * in order to remove useless branches during compilation optimization.
  103. */
  104. static FORCE_INLINE int LZ4_decompress_generic(
  105. const char * const src,
  106. char * const dst,
  107. int srcSize,
  108. /*
  109. * If endOnInput == endOnInputSize,
  110. * this value is `dstCapacity`
  111. */
  112. int outputSize,
  113. /* endOnOutputSize, endOnInputSize */
  114. endCondition_directive endOnInput,
  115. /* full, partial */
  116. earlyEnd_directive partialDecoding,
  117. /* noDict, withPrefix64k, usingExtDict */
  118. dict_directive dict,
  119. /* always <= dst, == dst when no prefix */
  120. const BYTE * const lowPrefix,
  121. /* only if dict == usingExtDict */
  122. const BYTE * const dictStart,
  123. /* note : = 0 if noDict */
  124. const size_t dictSize
  125. )
  126. {
  127. const BYTE *ip = (const BYTE *) src;
  128. const BYTE * const iend = ip + srcSize;
  129. BYTE *op = (BYTE *) dst;
  130. BYTE * const oend = op + outputSize;
  131. BYTE *cpy;
  132. const BYTE * const dictEnd = (const BYTE *)dictStart + dictSize;
  133. static const unsigned int inc32table[8] = {0, 1, 2, 1, 0, 4, 4, 4};
  134. static const int dec64table[8] = {0, 0, 0, -1, -4, 1, 2, 3};
  135. const int safeDecode = (endOnInput == endOnInputSize);
  136. const int checkOffset = ((safeDecode) && (dictSize < (int)(64 * KB)));
  137. /* Set up the "end" pointers for the shortcut. */
  138. const BYTE *const shortiend = iend -
  139. (endOnInput ? 14 : 8) /*maxLL*/ - 2 /*offset*/;
  140. const BYTE *const shortoend = oend -
  141. (endOnInput ? 14 : 8) /*maxLL*/ - 18 /*maxML*/;
  142. DEBUGLOG(5, "%s (srcSize:%i, dstSize:%i)", __func__,
  143. srcSize, outputSize);
  144. /* Special cases */
  145. assert(lowPrefix <= op);
  146. assert(src != NULL);
  147. /* Empty output buffer */
  148. if ((endOnInput) && (unlikely(outputSize == 0)))
  149. return ((srcSize == 1) && (*ip == 0)) ? 0 : -1;
  150. if ((!endOnInput) && (unlikely(outputSize == 0)))
  151. return (*ip == 0 ? 1 : -1);
  152. if ((endOnInput) && unlikely(srcSize == 0))
  153. return -1;
  154. /* Main Loop : decode sequences */
  155. while (1) {
  156. size_t length;
  157. const BYTE *match;
  158. size_t offset;
  159. /* get literal length */
  160. unsigned int const token = *ip++;
  161. length = token>>ML_BITS;
  162. /* ip < iend before the increment */
  163. assert(!endOnInput || ip <= iend);
  164. /*
  165. * A two-stage shortcut for the most common case:
  166. * 1) If the literal length is 0..14, and there is enough
  167. * space, enter the shortcut and copy 16 bytes on behalf
  168. * of the literals (in the fast mode, only 8 bytes can be
  169. * safely copied this way).
  170. * 2) Further if the match length is 4..18, copy 18 bytes
  171. * in a similar manner; but we ensure that there's enough
  172. * space in the output for those 18 bytes earlier, upon
  173. * entering the shortcut (in other words, there is a
  174. * combined check for both stages).
  175. *
  176. * The & in the likely() below is intentionally not && so that
  177. * some compilers can produce better parallelized runtime code
  178. */
  179. if ((endOnInput ? length != RUN_MASK : length <= 8)
  180. /*
  181. * strictly "less than" on input, to re-enter
  182. * the loop with at least one byte
  183. */
  184. && likely((endOnInput ? ip < shortiend : 1) &
  185. (op <= shortoend))) {
  186. /* Copy the literals */
  187. memcpy(op, ip, endOnInput ? 16 : 8);
  188. op += length; ip += length;
  189. /*
  190. * The second stage:
  191. * prepare for match copying, decode full info.
  192. * If it doesn't work out, the info won't be wasted.
  193. */
  194. length = token & ML_MASK; /* match length */
  195. offset = LZ4_readLE16(ip);
  196. ip += 2;
  197. match = op - offset;
  198. assert(match <= op); /* check overflow */
  199. /* Do not deal with overlapping matches. */
  200. if ((length != ML_MASK) &&
  201. (offset >= 8) &&
  202. (dict == withPrefix64k || match >= lowPrefix)) {
  203. /* Copy the match. */
  204. memcpy(op + 0, match + 0, 8);
  205. memcpy(op + 8, match + 8, 8);
  206. memcpy(op + 16, match + 16, 2);
  207. op += length + MINMATCH;
  208. /* Both stages worked, load the next token. */
  209. continue;
  210. }
  211. /*
  212. * The second stage didn't work out, but the info
  213. * is ready. Propel it right to the point of match
  214. * copying.
  215. */
  216. goto _copy_match;
  217. }
  218. /* decode literal length */
  219. if (length == RUN_MASK) {
  220. unsigned int s;
  221. if (unlikely(endOnInput ? ip >= iend - RUN_MASK : 0)) {
  222. /* overflow detection */
  223. goto _output_error;
  224. }
  225. do {
  226. s = *ip++;
  227. length += s;
  228. } while (likely(endOnInput
  229. ? ip < iend - RUN_MASK
  230. : 1) & (s == 255));
  231. if ((safeDecode)
  232. && unlikely((uptrval)(op) +
  233. length < (uptrval)(op))) {
  234. /* overflow detection */
  235. goto _output_error;
  236. }
  237. if ((safeDecode)
  238. && unlikely((uptrval)(ip) +
  239. length < (uptrval)(ip))) {
  240. /* overflow detection */
  241. goto _output_error;
  242. }
  243. }
  244. /* copy literals */
  245. cpy = op + length;
  246. LZ4_STATIC_ASSERT(MFLIMIT >= WILDCOPYLENGTH);
  247. if (((endOnInput) && ((cpy > oend - MFLIMIT)
  248. || (ip + length > iend - (2 + 1 + LASTLITERALS))))
  249. || ((!endOnInput) && (cpy > oend - WILDCOPYLENGTH))) {
  250. if (partialDecoding) {
  251. if (cpy > oend) {
  252. /*
  253. * Partial decoding :
  254. * stop in the middle of literal segment
  255. */
  256. cpy = oend;
  257. length = oend - op;
  258. }
  259. if ((endOnInput)
  260. && (ip + length > iend)) {
  261. /*
  262. * Error :
  263. * read attempt beyond
  264. * end of input buffer
  265. */
  266. goto _output_error;
  267. }
  268. } else {
  269. if ((!endOnInput)
  270. && (cpy != oend)) {
  271. /*
  272. * Error :
  273. * block decoding must
  274. * stop exactly there
  275. */
  276. goto _output_error;
  277. }
  278. if ((endOnInput)
  279. && ((ip + length != iend)
  280. || (cpy > oend))) {
  281. /*
  282. * Error :
  283. * input must be consumed
  284. */
  285. goto _output_error;
  286. }
  287. }
  288. /*
  289. * supports overlapping memory regions; only matters
  290. * for in-place decompression scenarios
  291. */
  292. memmove(op, ip, length);
  293. ip += length;
  294. op += length;
  295. /* Necessarily EOF, due to parsing restrictions */
  296. if (!partialDecoding || (cpy == oend))
  297. break;
  298. } else {
  299. /* may overwrite up to WILDCOPYLENGTH beyond cpy */
  300. LZ4_wildCopy(op, ip, cpy);
  301. ip += length;
  302. op = cpy;
  303. }
  304. /* get offset */
  305. offset = LZ4_readLE16(ip);
  306. ip += 2;
  307. match = op - offset;
  308. /* get matchlength */
  309. length = token & ML_MASK;
  310. _copy_match:
  311. if ((checkOffset) && (unlikely(match + dictSize < lowPrefix))) {
  312. /* Error : offset outside buffers */
  313. goto _output_error;
  314. }
  315. /* costs ~1%; silence an msan warning when offset == 0 */
  316. /*
  317. * note : when partialDecoding, there is no guarantee that
  318. * at least 4 bytes remain available in output buffer
  319. */
  320. if (!partialDecoding) {
  321. assert(oend > op);
  322. assert(oend - op >= 4);
  323. LZ4_write32(op, (U32)offset);
  324. }
  325. if (length == ML_MASK) {
  326. unsigned int s;
  327. do {
  328. s = *ip++;
  329. if ((endOnInput) && (ip > iend - LASTLITERALS))
  330. goto _output_error;
  331. length += s;
  332. } while (s == 255);
  333. if ((safeDecode)
  334. && unlikely(
  335. (uptrval)(op) + length < (uptrval)op)) {
  336. /* overflow detection */
  337. goto _output_error;
  338. }
  339. }
  340. length += MINMATCH;
  341. /* match starting within external dictionary */
  342. if ((dict == usingExtDict) && (match < lowPrefix)) {
  343. if (unlikely(op + length > oend - LASTLITERALS)) {
  344. /* doesn't respect parsing restriction */
  345. if (!partialDecoding)
  346. goto _output_error;
  347. length = min(length, (size_t)(oend - op));
  348. }
  349. if (length <= (size_t)(lowPrefix - match)) {
  350. /*
  351. * match fits entirely within external
  352. * dictionary : just copy
  353. */
  354. memmove(op, dictEnd - (lowPrefix - match),
  355. length);
  356. op += length;
  357. } else {
  358. /*
  359. * match stretches into both external
  360. * dictionary and current block
  361. */
  362. size_t const copySize = (size_t)(lowPrefix - match);
  363. size_t const restSize = length - copySize;
  364. memcpy(op, dictEnd - copySize, copySize);
  365. op += copySize;
  366. if (restSize > (size_t)(op - lowPrefix)) {
  367. /* overlap copy */
  368. BYTE * const endOfMatch = op + restSize;
  369. const BYTE *copyFrom = lowPrefix;
  370. while (op < endOfMatch)
  371. *op++ = *copyFrom++;
  372. } else {
  373. memcpy(op, lowPrefix, restSize);
  374. op += restSize;
  375. }
  376. }
  377. continue;
  378. }
  379. /* copy match within block */
  380. cpy = op + length;
  381. /*
  382. * partialDecoding :
  383. * may not respect endBlock parsing restrictions
  384. */
  385. assert(op <= oend);
  386. if (partialDecoding &&
  387. (cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
  388. size_t const mlen = min(length, (size_t)(oend - op));
  389. const BYTE * const matchEnd = match + mlen;
  390. BYTE * const copyEnd = op + mlen;
  391. if (matchEnd > op) {
  392. /* overlap copy */
  393. while (op < copyEnd)
  394. *op++ = *match++;
  395. } else {
  396. memcpy(op, match, mlen);
  397. }
  398. op = copyEnd;
  399. if (op == oend)
  400. break;
  401. continue;
  402. }
  403. if (unlikely(offset < 8)) {
  404. op[0] = match[0];
  405. op[1] = match[1];
  406. op[2] = match[2];
  407. op[3] = match[3];
  408. match += inc32table[offset];
  409. memcpy(op + 4, match, 4);
  410. match -= dec64table[offset];
  411. } else {
  412. LZ4_copy8(op, match);
  413. match += 8;
  414. }
  415. op += 8;
  416. if (unlikely(cpy > oend - MATCH_SAFEGUARD_DISTANCE)) {
  417. BYTE * const oCopyLimit = oend - (WILDCOPYLENGTH - 1);
  418. if (cpy > oend - LASTLITERALS) {
  419. /*
  420. * Error : last LASTLITERALS bytes
  421. * must be literals (uncompressed)
  422. */
  423. goto _output_error;
  424. }
  425. if (op < oCopyLimit) {
  426. LZ4_wildCopy(op, match, oCopyLimit);
  427. match += oCopyLimit - op;
  428. op = oCopyLimit;
  429. }
  430. while (op < cpy)
  431. *op++ = *match++;
  432. } else {
  433. LZ4_copy8(op, match);
  434. if (length > 16)
  435. LZ4_wildCopy(op + 8, match + 8, cpy);
  436. }
  437. op = cpy; /* wildcopy correction */
  438. }
  439. /* end of decoding */
  440. if (endOnInput) {
  441. /* Nb of output bytes decoded */
  442. return (int) (((char *)op) - dst);
  443. } else {
  444. /* Nb of input bytes read */
  445. return (int) (((const char *)ip) - src);
  446. }
  447. /* Overflow error detected */
  448. _output_error:
  449. return (int) (-(((const char *)ip) - src)) - 1;
  450. }
  451. int LZ4_decompress_safe(const char *source, char *dest,
  452. int compressedSize, int maxDecompressedSize)
  453. {
  454. return LZ4_decompress_generic(source, dest,
  455. compressedSize, maxDecompressedSize,
  456. endOnInputSize, decode_full_block,
  457. noDict, (BYTE *)dest, NULL, 0);
  458. }
  459. int LZ4_decompress_safe_partial(const char *src, char *dst,
  460. int compressedSize, int targetOutputSize, int dstCapacity)
  461. {
  462. dstCapacity = min(targetOutputSize, dstCapacity);
  463. return LZ4_decompress_generic(src, dst, compressedSize, dstCapacity,
  464. endOnInputSize, partial_decode,
  465. noDict, (BYTE *)dst, NULL, 0);
  466. }