zip.c 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333
  1. // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
  2. /*
  3. * Routines for dealing with .zip archives.
  4. *
  5. * Copyright (c) Meta Platforms, Inc. and affiliates.
  6. */
  7. #include <errno.h>
  8. #include <fcntl.h>
  9. #include <stdint.h>
  10. #include <stdlib.h>
  11. #include <string.h>
  12. #include <sys/mman.h>
  13. #include <unistd.h>
  14. #include "libbpf_internal.h"
  15. #include "zip.h"
  16. #pragma GCC diagnostic push
  17. #pragma GCC diagnostic ignored "-Wpacked"
  18. #pragma GCC diagnostic ignored "-Wattributes"
  19. /* Specification of ZIP file format can be found here:
  20. * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
  21. * For a high level overview of the structure of a ZIP file see
  22. * sections 4.3.1 - 4.3.6.
  23. *
  24. * Data structures appearing in ZIP files do not contain any
  25. * padding and they might be misaligned. To allow us to safely
  26. * operate on pointers to such structures and their members, we
  27. * declare the types as packed.
  28. */
  29. #define END_OF_CD_RECORD_MAGIC 0x06054b50
  30. /* See section 4.3.16 of the spec. */
  31. struct end_of_cd_record {
  32. /* Magic value equal to END_OF_CD_RECORD_MAGIC */
  33. __u32 magic;
  34. /* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
  35. * Zip archive might span multiple files (disks).
  36. */
  37. __u16 this_disk;
  38. /* Number of the file containing the beginning of the central directory or
  39. * 0xFFFF if ZIP64 archive.
  40. */
  41. __u16 cd_disk;
  42. /* Number of central directory records on this disk or 0xFFFF if ZIP64
  43. * archive.
  44. */
  45. __u16 cd_records;
  46. /* Number of central directory records on all disks or 0xFFFF if ZIP64
  47. * archive.
  48. */
  49. __u16 cd_records_total;
  50. /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
  51. __u32 cd_size;
  52. /* Offset of the central directory from the beginning of the archive or
  53. * 0xFFFFFFFF if ZIP64 archive.
  54. */
  55. __u32 cd_offset;
  56. /* Length of comment data following end of central directory record. */
  57. __u16 comment_length;
  58. /* Up to 64k of arbitrary bytes. */
  59. /* uint8_t comment[comment_length] */
  60. } __attribute__((packed));
  61. #define CD_FILE_HEADER_MAGIC 0x02014b50
  62. #define FLAG_ENCRYPTED (1 << 0)
  63. #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
  64. /* See section 4.3.12 of the spec. */
  65. struct cd_file_header {
  66. /* Magic value equal to CD_FILE_HEADER_MAGIC. */
  67. __u32 magic;
  68. __u16 version;
  69. /* Minimum zip version needed to extract the file. */
  70. __u16 min_version;
  71. __u16 flags;
  72. __u16 compression;
  73. __u16 last_modified_time;
  74. __u16 last_modified_date;
  75. __u32 crc;
  76. __u32 compressed_size;
  77. __u32 uncompressed_size;
  78. __u16 file_name_length;
  79. __u16 extra_field_length;
  80. __u16 file_comment_length;
  81. /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
  82. __u16 disk;
  83. __u16 internal_attributes;
  84. __u32 external_attributes;
  85. /* Offset from the start of the disk containing the local file header to the
  86. * start of the local file header.
  87. */
  88. __u32 offset;
  89. } __attribute__((packed));
  90. #define LOCAL_FILE_HEADER_MAGIC 0x04034b50
  91. /* See section 4.3.7 of the spec. */
  92. struct local_file_header {
  93. /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
  94. __u32 magic;
  95. /* Minimum zip version needed to extract the file. */
  96. __u16 min_version;
  97. __u16 flags;
  98. __u16 compression;
  99. __u16 last_modified_time;
  100. __u16 last_modified_date;
  101. __u32 crc;
  102. __u32 compressed_size;
  103. __u32 uncompressed_size;
  104. __u16 file_name_length;
  105. __u16 extra_field_length;
  106. } __attribute__((packed));
  107. #pragma GCC diagnostic pop
  108. struct zip_archive {
  109. void *data;
  110. __u32 size;
  111. __u32 cd_offset;
  112. __u32 cd_records;
  113. };
  114. static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
  115. {
  116. if (offset + size > archive->size || offset > offset + size)
  117. return NULL;
  118. return archive->data + offset;
  119. }
  120. /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
  121. * archive uses features which are not supported.
  122. */
  123. static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
  124. {
  125. __u16 comment_length, cd_records;
  126. struct end_of_cd_record *eocd;
  127. __u32 cd_offset, cd_size;
  128. eocd = check_access(archive, offset, sizeof(*eocd));
  129. if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
  130. return -EINVAL;
  131. comment_length = eocd->comment_length;
  132. if (offset + sizeof(*eocd) + comment_length != archive->size)
  133. return -EINVAL;
  134. cd_records = eocd->cd_records;
  135. if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
  136. /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
  137. return -ENOTSUP;
  138. cd_offset = eocd->cd_offset;
  139. cd_size = eocd->cd_size;
  140. if (!check_access(archive, cd_offset, cd_size))
  141. return -EINVAL;
  142. archive->cd_offset = cd_offset;
  143. archive->cd_records = cd_records;
  144. return 0;
  145. }
  146. static int find_cd(struct zip_archive *archive)
  147. {
  148. int64_t limit, offset;
  149. int rc = -EINVAL;
  150. if (archive->size <= sizeof(struct end_of_cd_record))
  151. return -EINVAL;
  152. /* Because the end of central directory ends with a variable length array of
  153. * up to 0xFFFF bytes we can't know exactly where it starts and need to
  154. * search for it at the end of the file, scanning the (limit, offset] range.
  155. */
  156. offset = archive->size - sizeof(struct end_of_cd_record);
  157. limit = (int64_t)offset - (1 << 16);
  158. for (; offset >= 0 && offset > limit && rc != 0; offset--) {
  159. rc = try_parse_end_of_cd(archive, offset);
  160. if (rc == -ENOTSUP)
  161. break;
  162. }
  163. return rc;
  164. }
  165. struct zip_archive *zip_archive_open(const char *path)
  166. {
  167. struct zip_archive *archive;
  168. int err, fd;
  169. off_t size;
  170. void *data;
  171. fd = open(path, O_RDONLY | O_CLOEXEC);
  172. if (fd < 0)
  173. return ERR_PTR(-errno);
  174. size = lseek(fd, 0, SEEK_END);
  175. if (size == (off_t)-1 || size > UINT32_MAX) {
  176. close(fd);
  177. return ERR_PTR(-EINVAL);
  178. }
  179. data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
  180. err = -errno;
  181. close(fd);
  182. if (data == MAP_FAILED)
  183. return ERR_PTR(err);
  184. archive = malloc(sizeof(*archive));
  185. if (!archive) {
  186. munmap(data, size);
  187. return ERR_PTR(-ENOMEM);
  188. };
  189. archive->data = data;
  190. archive->size = size;
  191. err = find_cd(archive);
  192. if (err) {
  193. munmap(data, size);
  194. free(archive);
  195. return ERR_PTR(err);
  196. }
  197. return archive;
  198. }
  199. void zip_archive_close(struct zip_archive *archive)
  200. {
  201. munmap(archive->data, archive->size);
  202. free(archive);
  203. }
  204. static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
  205. __u32 offset)
  206. {
  207. struct local_file_header *lfh;
  208. lfh = check_access(archive, offset, sizeof(*lfh));
  209. if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
  210. return NULL;
  211. return lfh;
  212. }
  213. static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
  214. {
  215. struct local_file_header *lfh;
  216. __u32 compressed_size;
  217. const char *name;
  218. void *data;
  219. lfh = local_file_header_at_offset(archive, offset);
  220. if (!lfh)
  221. return -EINVAL;
  222. offset += sizeof(*lfh);
  223. if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
  224. return -EINVAL;
  225. name = check_access(archive, offset, lfh->file_name_length);
  226. if (!name)
  227. return -EINVAL;
  228. offset += lfh->file_name_length;
  229. if (!check_access(archive, offset, lfh->extra_field_length))
  230. return -EINVAL;
  231. offset += lfh->extra_field_length;
  232. compressed_size = lfh->compressed_size;
  233. data = check_access(archive, offset, compressed_size);
  234. if (!data)
  235. return -EINVAL;
  236. out->compression = lfh->compression;
  237. out->name_length = lfh->file_name_length;
  238. out->name = name;
  239. out->data = data;
  240. out->data_length = compressed_size;
  241. out->data_offset = offset;
  242. return 0;
  243. }
  244. int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
  245. struct zip_entry *out)
  246. {
  247. size_t file_name_length = strlen(file_name);
  248. __u32 i, offset = archive->cd_offset;
  249. for (i = 0; i < archive->cd_records; ++i) {
  250. __u16 cdfh_name_length, cdfh_flags;
  251. struct cd_file_header *cdfh;
  252. const char *cdfh_name;
  253. cdfh = check_access(archive, offset, sizeof(*cdfh));
  254. if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
  255. return -EINVAL;
  256. offset += sizeof(*cdfh);
  257. cdfh_name_length = cdfh->file_name_length;
  258. cdfh_name = check_access(archive, offset, cdfh_name_length);
  259. if (!cdfh_name)
  260. return -EINVAL;
  261. cdfh_flags = cdfh->flags;
  262. if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
  263. (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
  264. file_name_length == cdfh_name_length &&
  265. memcmp(file_name, archive->data + offset, file_name_length) == 0) {
  266. return get_entry_at_offset(archive, cdfh->offset, out);
  267. }
  268. offset += cdfh_name_length;
  269. offset += cdfh->extra_field_length;
  270. offset += cdfh->file_comment_length;
  271. }
  272. return -ENOENT;
  273. }