verity.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * fs/ext4/verity.c: fs-verity support for ext4
  4. *
  5. * Copyright 2019 Google LLC
  6. */
  7. /*
  8. * Implementation of fsverity_operations for ext4.
  9. *
  10. * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past
  11. * the end of the file, starting at the first 64K boundary beyond i_size. This
  12. * approach works because (a) verity files are readonly, and (b) pages fully
  13. * beyond i_size aren't visible to userspace but can be read/written internally
  14. * by ext4 with only some relatively small changes to ext4. This approach
  15. * avoids having to depend on the EA_INODE feature and on rearchitecturing
  16. * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and
  17. * to support encrypting xattrs. Note that the verity metadata *must* be
  18. * encrypted when the file is, since it contains hashes of the plaintext data.
  19. *
  20. * Using a 64K boundary rather than a 4K one keeps things ready for
  21. * architectures with 64K pages, and it doesn't necessarily waste space on-disk
  22. * since there can be a hole between i_size and the start of the Merkle tree.
  23. */
  24. #include <linux/quotaops.h>
  25. #include "ext4.h"
  26. #include "ext4_extents.h"
  27. #include "ext4_jbd2.h"
  28. static inline loff_t ext4_verity_metadata_pos(const struct inode *inode)
  29. {
  30. return round_up(inode->i_size, 65536);
  31. }
  32. /*
  33. * Read some verity metadata from the inode. __vfs_read() can't be used because
  34. * we need to read beyond i_size.
  35. */
  36. static int pagecache_read(struct inode *inode, void *buf, size_t count,
  37. loff_t pos)
  38. {
  39. while (count) {
  40. struct folio *folio;
  41. size_t n;
  42. folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT,
  43. NULL);
  44. if (IS_ERR(folio))
  45. return PTR_ERR(folio);
  46. n = memcpy_from_file_folio(buf, folio, pos, count);
  47. folio_put(folio);
  48. buf += n;
  49. pos += n;
  50. count -= n;
  51. }
  52. return 0;
  53. }
  54. /*
  55. * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY.
  56. * kernel_write() can't be used because the file descriptor is readonly.
  57. */
  58. static int pagecache_write(struct inode *inode, const void *buf, size_t count,
  59. loff_t pos)
  60. {
  61. struct address_space *mapping = inode->i_mapping;
  62. const struct address_space_operations *aops = mapping->a_ops;
  63. if (pos + count > inode->i_sb->s_maxbytes)
  64. return -EFBIG;
  65. while (count) {
  66. size_t n = min_t(size_t, count,
  67. PAGE_SIZE - offset_in_page(pos));
  68. struct folio *folio;
  69. void *fsdata = NULL;
  70. int res;
  71. res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata);
  72. if (res)
  73. return res;
  74. memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n);
  75. res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata);
  76. if (res < 0)
  77. return res;
  78. if (res != n)
  79. return -EIO;
  80. buf += n;
  81. pos += n;
  82. count -= n;
  83. }
  84. return 0;
  85. }
  86. static int ext4_begin_enable_verity(struct file *filp)
  87. {
  88. struct inode *inode = file_inode(filp);
  89. const int credits = 2; /* superblock and inode for ext4_orphan_add() */
  90. handle_t *handle;
  91. int err;
  92. if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX))
  93. return -EINVAL;
  94. if (ext4_verity_in_progress(inode))
  95. return -EBUSY;
  96. /*
  97. * Since the file was opened readonly, we have to initialize the jbd
  98. * inode and quotas here and not rely on ->open() doing it. This must
  99. * be done before evicting the inline data.
  100. */
  101. err = ext4_inode_attach_jinode(inode);
  102. if (err)
  103. return err;
  104. err = dquot_initialize(inode);
  105. if (err)
  106. return err;
  107. err = ext4_convert_inline_data(inode);
  108. if (err)
  109. return err;
  110. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
  111. ext4_warning_inode(inode,
  112. "verity is only allowed on extent-based files");
  113. return -EOPNOTSUPP;
  114. }
  115. /*
  116. * ext4 uses the last allocated block to find the verity descriptor, so
  117. * we must remove any other blocks past EOF which might confuse things.
  118. */
  119. err = ext4_truncate(inode);
  120. if (err)
  121. return err;
  122. handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
  123. if (IS_ERR(handle))
  124. return PTR_ERR(handle);
  125. err = ext4_orphan_add(handle, inode);
  126. if (err == 0)
  127. ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  128. ext4_journal_stop(handle);
  129. return err;
  130. }
  131. /*
  132. * ext4 stores the verity descriptor beginning on the next filesystem block
  133. * boundary after the Merkle tree. Then, the descriptor size is stored in the
  134. * last 4 bytes of the last allocated filesystem block --- which is either the
  135. * block in which the descriptor ends, or the next block after that if there
  136. * weren't at least 4 bytes remaining.
  137. *
  138. * We can't simply store the descriptor in an xattr because it *must* be
  139. * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt
  140. * xattrs. Also, if the descriptor includes a large signature blob it may be
  141. * too large to store in an xattr without the EA_INODE feature.
  142. */
  143. static int ext4_write_verity_descriptor(struct inode *inode, const void *desc,
  144. size_t desc_size, u64 merkle_tree_size)
  145. {
  146. const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) +
  147. merkle_tree_size, i_blocksize(inode));
  148. const u64 desc_end = desc_pos + desc_size;
  149. const __le32 desc_size_disk = cpu_to_le32(desc_size);
  150. const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk),
  151. i_blocksize(inode)) -
  152. sizeof(desc_size_disk);
  153. int err;
  154. err = pagecache_write(inode, desc, desc_size, desc_pos);
  155. if (err)
  156. return err;
  157. return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk),
  158. desc_size_pos);
  159. }
  160. static int ext4_end_enable_verity(struct file *filp, const void *desc,
  161. size_t desc_size, u64 merkle_tree_size)
  162. {
  163. struct inode *inode = file_inode(filp);
  164. const int credits = 2; /* superblock and inode for ext4_orphan_del() */
  165. handle_t *handle;
  166. struct ext4_iloc iloc;
  167. int err = 0;
  168. /*
  169. * If an error already occurred (which fs/verity/ signals by passing
  170. * desc == NULL), then only clean-up is needed.
  171. */
  172. if (desc == NULL)
  173. goto cleanup;
  174. /* Append the verity descriptor. */
  175. err = ext4_write_verity_descriptor(inode, desc, desc_size,
  176. merkle_tree_size);
  177. if (err)
  178. goto cleanup;
  179. /*
  180. * Write all pages (both data and verity metadata). Note that this must
  181. * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
  182. * beyond i_size won't be written properly. For crash consistency, this
  183. * also must happen before the verity inode flag gets persisted.
  184. */
  185. err = filemap_write_and_wait(inode->i_mapping);
  186. if (err)
  187. goto cleanup;
  188. /*
  189. * Finally, set the verity inode flag and remove the inode from the
  190. * orphan list (in a single transaction).
  191. */
  192. handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
  193. if (IS_ERR(handle)) {
  194. err = PTR_ERR(handle);
  195. goto cleanup;
  196. }
  197. err = ext4_orphan_del(handle, inode);
  198. if (err)
  199. goto stop_and_cleanup;
  200. err = ext4_reserve_inode_write(handle, inode, &iloc);
  201. if (err)
  202. goto stop_and_cleanup;
  203. ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
  204. ext4_set_inode_flags(inode, false);
  205. err = ext4_mark_iloc_dirty(handle, inode, &iloc);
  206. if (err)
  207. goto stop_and_cleanup;
  208. ext4_journal_stop(handle);
  209. ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  210. return 0;
  211. stop_and_cleanup:
  212. ext4_journal_stop(handle);
  213. cleanup:
  214. /*
  215. * Verity failed to be enabled, so clean up by truncating any verity
  216. * metadata that was written beyond i_size (both from cache and from
  217. * disk), removing the inode from the orphan list (if it wasn't done
  218. * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
  219. */
  220. truncate_inode_pages(inode->i_mapping, inode->i_size);
  221. ext4_truncate(inode);
  222. ext4_orphan_del(NULL, inode);
  223. ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
  224. return err;
  225. }
  226. static int ext4_get_verity_descriptor_location(struct inode *inode,
  227. size_t *desc_size_ret,
  228. u64 *desc_pos_ret)
  229. {
  230. struct ext4_ext_path *path;
  231. struct ext4_extent *last_extent;
  232. u32 end_lblk;
  233. u64 desc_size_pos;
  234. __le32 desc_size_disk;
  235. u32 desc_size;
  236. u64 desc_pos;
  237. int err;
  238. /*
  239. * Descriptor size is in last 4 bytes of last allocated block.
  240. * See ext4_write_verity_descriptor().
  241. */
  242. if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
  243. EXT4_ERROR_INODE(inode, "verity file doesn't use extents");
  244. return -EFSCORRUPTED;
  245. }
  246. path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
  247. if (IS_ERR(path))
  248. return PTR_ERR(path);
  249. last_extent = path[path->p_depth].p_ext;
  250. if (!last_extent) {
  251. EXT4_ERROR_INODE(inode, "verity file has no extents");
  252. ext4_free_ext_path(path);
  253. return -EFSCORRUPTED;
  254. }
  255. end_lblk = le32_to_cpu(last_extent->ee_block) +
  256. ext4_ext_get_actual_len(last_extent);
  257. desc_size_pos = (u64)end_lblk << inode->i_blkbits;
  258. ext4_free_ext_path(path);
  259. if (desc_size_pos < sizeof(desc_size_disk))
  260. goto bad;
  261. desc_size_pos -= sizeof(desc_size_disk);
  262. err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk),
  263. desc_size_pos);
  264. if (err)
  265. return err;
  266. desc_size = le32_to_cpu(desc_size_disk);
  267. /*
  268. * The descriptor is stored just before the desc_size_disk, but starting
  269. * on a filesystem block boundary.
  270. */
  271. if (desc_size > INT_MAX || desc_size > desc_size_pos)
  272. goto bad;
  273. desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode));
  274. if (desc_pos < ext4_verity_metadata_pos(inode))
  275. goto bad;
  276. *desc_size_ret = desc_size;
  277. *desc_pos_ret = desc_pos;
  278. return 0;
  279. bad:
  280. EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor");
  281. return -EFSCORRUPTED;
  282. }
  283. static int ext4_get_verity_descriptor(struct inode *inode, void *buf,
  284. size_t buf_size)
  285. {
  286. size_t desc_size = 0;
  287. u64 desc_pos = 0;
  288. int err;
  289. err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos);
  290. if (err)
  291. return err;
  292. if (buf_size) {
  293. if (desc_size > buf_size)
  294. return -ERANGE;
  295. err = pagecache_read(inode, buf, desc_size, desc_pos);
  296. if (err)
  297. return err;
  298. }
  299. return desc_size;
  300. }
  301. static struct page *ext4_read_merkle_tree_page(struct inode *inode,
  302. pgoff_t index,
  303. unsigned long num_ra_pages)
  304. {
  305. struct folio *folio;
  306. index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT;
  307. folio = __filemap_get_folio(inode->i_mapping, index, FGP_ACCESSED, 0);
  308. if (IS_ERR(folio) || !folio_test_uptodate(folio)) {
  309. DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index);
  310. if (!IS_ERR(folio))
  311. folio_put(folio);
  312. else if (num_ra_pages > 1)
  313. page_cache_ra_unbounded(&ractl, num_ra_pages, 0);
  314. folio = read_mapping_folio(inode->i_mapping, index, NULL);
  315. if (IS_ERR(folio))
  316. return ERR_CAST(folio);
  317. }
  318. return folio_file_page(folio, index);
  319. }
  320. static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf,
  321. u64 pos, unsigned int size)
  322. {
  323. pos += ext4_verity_metadata_pos(inode);
  324. return pagecache_write(inode, buf, size, pos);
  325. }
  326. const struct fsverity_operations ext4_verityops = {
  327. .begin_enable_verity = ext4_begin_enable_verity,
  328. .end_enable_verity = ext4_end_enable_verity,
  329. .get_verity_descriptor = ext4_get_verity_descriptor,
  330. .read_merkle_tree_page = ext4_read_merkle_tree_page,
  331. .write_merkle_tree_block = ext4_write_merkle_tree_block,
  332. };