file.c 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653
  1. /*
  2. * Copyright (C) 2017 Red Hat, Inc.
  3. *
  4. * This program is free software; you can redistribute it and/or modify it
  5. * under the terms of the GNU General Public License version 2 as published by
  6. * the Free Software Foundation.
  7. */
  8. #include <linux/cred.h>
  9. #include <linux/file.h>
  10. #include <linux/mount.h>
  11. #include <linux/xattr.h>
  12. #include <linux/uio.h>
  13. #include <linux/uaccess.h>
  14. #include "overlayfs.h"
  15. static char ovl_whatisit(struct inode *inode, struct inode *realinode)
  16. {
  17. if (realinode != ovl_inode_upper(inode))
  18. return 'l';
  19. if (ovl_has_upperdata(inode))
  20. return 'u';
  21. else
  22. return 'm';
  23. }
  24. /* No atime modificaton nor notify on underlying */
  25. #define OVL_OPEN_FLAGS (O_NOATIME | FMODE_NONOTIFY)
  26. static struct file *ovl_open_realfile(const struct file *file,
  27. struct inode *realinode)
  28. {
  29. struct inode *inode = file_inode(file);
  30. struct file *realfile;
  31. const struct cred *old_cred;
  32. int flags = file->f_flags | OVL_OPEN_FLAGS;
  33. old_cred = ovl_override_creds(inode->i_sb);
  34. realfile = open_with_fake_path(&file->f_path, flags, realinode,
  35. current_cred());
  36. revert_creds(old_cred);
  37. pr_debug("open(%p[%pD2/%c], 0%o) -> (%p, 0%o)\n",
  38. file, file, ovl_whatisit(inode, realinode), file->f_flags,
  39. realfile, IS_ERR(realfile) ? 0 : realfile->f_flags);
  40. return realfile;
  41. }
  42. #define OVL_SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT)
  43. static int ovl_change_flags(struct file *file, unsigned int flags)
  44. {
  45. struct inode *inode = file_inode(file);
  46. int err;
  47. flags |= OVL_OPEN_FLAGS;
  48. /* If some flag changed that cannot be changed then something's amiss */
  49. if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
  50. return -EIO;
  51. flags &= OVL_SETFL_MASK;
  52. if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
  53. return -EPERM;
  54. if (flags & O_DIRECT) {
  55. if (!file->f_mapping->a_ops ||
  56. !file->f_mapping->a_ops->direct_IO)
  57. return -EINVAL;
  58. }
  59. if (file->f_op->check_flags) {
  60. err = file->f_op->check_flags(flags);
  61. if (err)
  62. return err;
  63. }
  64. spin_lock(&file->f_lock);
  65. file->f_flags = (file->f_flags & ~OVL_SETFL_MASK) | flags;
  66. spin_unlock(&file->f_lock);
  67. return 0;
  68. }
  69. static int ovl_real_fdget_meta(const struct file *file, struct fd *real,
  70. bool allow_meta)
  71. {
  72. struct inode *inode = file_inode(file);
  73. struct inode *realinode;
  74. real->flags = 0;
  75. real->file = file->private_data;
  76. if (allow_meta)
  77. realinode = ovl_inode_real(inode);
  78. else
  79. realinode = ovl_inode_realdata(inode);
  80. /* Has it been copied up since we'd opened it? */
  81. if (unlikely(file_inode(real->file) != realinode)) {
  82. real->flags = FDPUT_FPUT;
  83. real->file = ovl_open_realfile(file, realinode);
  84. return PTR_ERR_OR_ZERO(real->file);
  85. }
  86. /* Did the flags change since open? */
  87. if (unlikely((file->f_flags ^ real->file->f_flags) & ~OVL_OPEN_FLAGS))
  88. return ovl_change_flags(real->file, file->f_flags);
  89. return 0;
  90. }
  91. static int ovl_real_fdget(const struct file *file, struct fd *real)
  92. {
  93. return ovl_real_fdget_meta(file, real, false);
  94. }
  95. static int ovl_open(struct inode *inode, struct file *file)
  96. {
  97. struct file *realfile;
  98. int err;
  99. err = ovl_maybe_copy_up(file_dentry(file), file->f_flags);
  100. if (err)
  101. return err;
  102. /* No longer need these flags, so don't pass them on to underlying fs */
  103. file->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  104. realfile = ovl_open_realfile(file, ovl_inode_realdata(inode));
  105. if (IS_ERR(realfile))
  106. return PTR_ERR(realfile);
  107. file->private_data = realfile;
  108. return 0;
  109. }
  110. static int ovl_release(struct inode *inode, struct file *file)
  111. {
  112. fput(file->private_data);
  113. return 0;
  114. }
  115. static loff_t ovl_llseek(struct file *file, loff_t offset, int whence)
  116. {
  117. struct inode *inode = file_inode(file);
  118. struct fd real;
  119. const struct cred *old_cred;
  120. loff_t ret;
  121. /*
  122. * The two special cases below do not need to involve real fs,
  123. * so we can optimizing concurrent callers.
  124. */
  125. if (offset == 0) {
  126. if (whence == SEEK_CUR)
  127. return file->f_pos;
  128. if (whence == SEEK_SET)
  129. return vfs_setpos(file, 0, 0);
  130. }
  131. ret = ovl_real_fdget(file, &real);
  132. if (ret)
  133. return ret;
  134. /*
  135. * Overlay file f_pos is the master copy that is preserved
  136. * through copy up and modified on read/write, but only real
  137. * fs knows how to SEEK_HOLE/SEEK_DATA and real fs may impose
  138. * limitations that are more strict than ->s_maxbytes for specific
  139. * files, so we use the real file to perform seeks.
  140. */
  141. inode_lock(inode);
  142. real.file->f_pos = file->f_pos;
  143. old_cred = ovl_override_creds(inode->i_sb);
  144. ret = vfs_llseek(real.file, offset, whence);
  145. revert_creds(old_cred);
  146. file->f_pos = real.file->f_pos;
  147. inode_unlock(inode);
  148. fdput(real);
  149. return ret;
  150. }
  151. static void ovl_file_accessed(struct file *file)
  152. {
  153. struct inode *inode, *upperinode;
  154. if (file->f_flags & O_NOATIME)
  155. return;
  156. inode = file_inode(file);
  157. upperinode = ovl_inode_upper(inode);
  158. if (!upperinode)
  159. return;
  160. if ((!timespec64_equal(&inode->i_mtime, &upperinode->i_mtime) ||
  161. !timespec64_equal(&inode->i_ctime, &upperinode->i_ctime))) {
  162. inode->i_mtime = upperinode->i_mtime;
  163. inode->i_ctime = upperinode->i_ctime;
  164. }
  165. touch_atime(&file->f_path);
  166. }
  167. static rwf_t ovl_iocb_to_rwf(struct kiocb *iocb)
  168. {
  169. int ifl = iocb->ki_flags;
  170. rwf_t flags = 0;
  171. if (ifl & IOCB_NOWAIT)
  172. flags |= RWF_NOWAIT;
  173. if (ifl & IOCB_HIPRI)
  174. flags |= RWF_HIPRI;
  175. if (ifl & IOCB_DSYNC)
  176. flags |= RWF_DSYNC;
  177. if (ifl & IOCB_SYNC)
  178. flags |= RWF_SYNC;
  179. return flags;
  180. }
  181. static ssize_t ovl_read_iter(struct kiocb *iocb, struct iov_iter *iter)
  182. {
  183. struct file *file = iocb->ki_filp;
  184. struct fd real;
  185. const struct cred *old_cred;
  186. ssize_t ret;
  187. if (!iov_iter_count(iter))
  188. return 0;
  189. ret = ovl_real_fdget(file, &real);
  190. if (ret)
  191. return ret;
  192. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  193. ret = vfs_iter_read(real.file, iter, &iocb->ki_pos,
  194. ovl_iocb_to_rwf(iocb));
  195. revert_creds(old_cred);
  196. ovl_file_accessed(file);
  197. fdput(real);
  198. return ret;
  199. }
  200. static ssize_t ovl_write_iter(struct kiocb *iocb, struct iov_iter *iter)
  201. {
  202. struct file *file = iocb->ki_filp;
  203. struct inode *inode = file_inode(file);
  204. struct fd real;
  205. const struct cred *old_cred;
  206. ssize_t ret;
  207. if (!iov_iter_count(iter))
  208. return 0;
  209. inode_lock(inode);
  210. /* Update mode */
  211. ovl_copyattr(ovl_inode_real(inode), inode);
  212. ret = file_remove_privs(file);
  213. if (ret)
  214. goto out_unlock;
  215. ret = ovl_real_fdget(file, &real);
  216. if (ret)
  217. goto out_unlock;
  218. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  219. file_start_write(real.file);
  220. ret = vfs_iter_write(real.file, iter, &iocb->ki_pos,
  221. ovl_iocb_to_rwf(iocb));
  222. file_end_write(real.file);
  223. revert_creds(old_cred);
  224. /* Update size */
  225. ovl_copyattr(ovl_inode_real(inode), inode);
  226. fdput(real);
  227. out_unlock:
  228. inode_unlock(inode);
  229. return ret;
  230. }
  231. static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  232. {
  233. struct fd real;
  234. const struct cred *old_cred;
  235. int ret;
  236. ret = ovl_real_fdget_meta(file, &real, !datasync);
  237. if (ret)
  238. return ret;
  239. /* Don't sync lower file for fear of receiving EROFS error */
  240. if (file_inode(real.file) == ovl_inode_upper(file_inode(file))) {
  241. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  242. ret = vfs_fsync_range(real.file, start, end, datasync);
  243. revert_creds(old_cred);
  244. }
  245. fdput(real);
  246. return ret;
  247. }
  248. static int ovl_mmap(struct file *file, struct vm_area_struct *vma)
  249. {
  250. struct file *realfile = file->private_data;
  251. const struct cred *old_cred;
  252. int ret;
  253. if (!realfile->f_op->mmap)
  254. return -ENODEV;
  255. if (WARN_ON(file != vma->vm_file))
  256. return -EIO;
  257. vma->vm_file = get_file(realfile);
  258. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  259. ret = call_mmap(vma->vm_file, vma);
  260. revert_creds(old_cred);
  261. if (ret) {
  262. /* Drop reference count from new vm_file value */
  263. fput(realfile);
  264. } else {
  265. /* Drop reference count from previous vm_file value */
  266. fput(file);
  267. }
  268. ovl_file_accessed(file);
  269. return ret;
  270. }
  271. static long ovl_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
  272. {
  273. struct inode *inode = file_inode(file);
  274. struct fd real;
  275. const struct cred *old_cred;
  276. int ret;
  277. ret = ovl_real_fdget(file, &real);
  278. if (ret)
  279. return ret;
  280. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  281. ret = vfs_fallocate(real.file, mode, offset, len);
  282. revert_creds(old_cred);
  283. /* Update size */
  284. ovl_copyattr(ovl_inode_real(inode), inode);
  285. fdput(real);
  286. return ret;
  287. }
  288. static int ovl_fadvise(struct file *file, loff_t offset, loff_t len, int advice)
  289. {
  290. struct fd real;
  291. const struct cred *old_cred;
  292. int ret;
  293. ret = ovl_real_fdget(file, &real);
  294. if (ret)
  295. return ret;
  296. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  297. ret = vfs_fadvise(real.file, offset, len, advice);
  298. revert_creds(old_cred);
  299. fdput(real);
  300. return ret;
  301. }
  302. static long ovl_real_ioctl(struct file *file, unsigned int cmd,
  303. unsigned long arg)
  304. {
  305. struct fd real;
  306. const struct cred *old_cred;
  307. long ret;
  308. ret = ovl_real_fdget(file, &real);
  309. if (ret)
  310. return ret;
  311. old_cred = ovl_override_creds(file_inode(file)->i_sb);
  312. ret = vfs_ioctl(real.file, cmd, arg);
  313. revert_creds(old_cred);
  314. fdput(real);
  315. return ret;
  316. }
  317. static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
  318. unsigned long arg, unsigned int iflags)
  319. {
  320. long ret;
  321. struct inode *inode = file_inode(file);
  322. unsigned int old_iflags;
  323. if (!inode_owner_or_capable(inode))
  324. return -EACCES;
  325. ret = mnt_want_write_file(file);
  326. if (ret)
  327. return ret;
  328. inode_lock(inode);
  329. /* Check the capability before cred override */
  330. ret = -EPERM;
  331. old_iflags = READ_ONCE(inode->i_flags);
  332. if (((iflags ^ old_iflags) & (S_APPEND | S_IMMUTABLE)) &&
  333. !capable(CAP_LINUX_IMMUTABLE))
  334. goto unlock;
  335. ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
  336. if (ret)
  337. goto unlock;
  338. ret = ovl_real_ioctl(file, cmd, arg);
  339. ovl_copyflags(ovl_inode_real(inode), inode);
  340. unlock:
  341. inode_unlock(inode);
  342. mnt_drop_write_file(file);
  343. return ret;
  344. }
  345. static unsigned int ovl_fsflags_to_iflags(unsigned int flags)
  346. {
  347. unsigned int iflags = 0;
  348. if (flags & FS_SYNC_FL)
  349. iflags |= S_SYNC;
  350. if (flags & FS_APPEND_FL)
  351. iflags |= S_APPEND;
  352. if (flags & FS_IMMUTABLE_FL)
  353. iflags |= S_IMMUTABLE;
  354. if (flags & FS_NOATIME_FL)
  355. iflags |= S_NOATIME;
  356. return iflags;
  357. }
  358. static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
  359. unsigned long arg)
  360. {
  361. unsigned int flags;
  362. if (get_user(flags, (int __user *) arg))
  363. return -EFAULT;
  364. return ovl_ioctl_set_flags(file, cmd, arg,
  365. ovl_fsflags_to_iflags(flags));
  366. }
  367. static unsigned int ovl_fsxflags_to_iflags(unsigned int xflags)
  368. {
  369. unsigned int iflags = 0;
  370. if (xflags & FS_XFLAG_SYNC)
  371. iflags |= S_SYNC;
  372. if (xflags & FS_XFLAG_APPEND)
  373. iflags |= S_APPEND;
  374. if (xflags & FS_XFLAG_IMMUTABLE)
  375. iflags |= S_IMMUTABLE;
  376. if (xflags & FS_XFLAG_NOATIME)
  377. iflags |= S_NOATIME;
  378. return iflags;
  379. }
  380. static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
  381. unsigned long arg)
  382. {
  383. struct fsxattr fa;
  384. memset(&fa, 0, sizeof(fa));
  385. if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
  386. return -EFAULT;
  387. return ovl_ioctl_set_flags(file, cmd, arg,
  388. ovl_fsxflags_to_iflags(fa.fsx_xflags));
  389. }
  390. static long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
  391. {
  392. long ret;
  393. switch (cmd) {
  394. case FS_IOC_GETFLAGS:
  395. case FS_IOC_FSGETXATTR:
  396. ret = ovl_real_ioctl(file, cmd, arg);
  397. break;
  398. case FS_IOC_SETFLAGS:
  399. ret = ovl_ioctl_set_fsflags(file, cmd, arg);
  400. break;
  401. case FS_IOC_FSSETXATTR:
  402. ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
  403. break;
  404. default:
  405. ret = -ENOTTY;
  406. }
  407. return ret;
  408. }
  409. static long ovl_compat_ioctl(struct file *file, unsigned int cmd,
  410. unsigned long arg)
  411. {
  412. switch (cmd) {
  413. case FS_IOC32_GETFLAGS:
  414. cmd = FS_IOC_GETFLAGS;
  415. break;
  416. case FS_IOC32_SETFLAGS:
  417. cmd = FS_IOC_SETFLAGS;
  418. break;
  419. default:
  420. return -ENOIOCTLCMD;
  421. }
  422. return ovl_ioctl(file, cmd, arg);
  423. }
  424. enum ovl_copyop {
  425. OVL_COPY,
  426. OVL_CLONE,
  427. OVL_DEDUPE,
  428. };
  429. static ssize_t ovl_copyfile(struct file *file_in, loff_t pos_in,
  430. struct file *file_out, loff_t pos_out,
  431. u64 len, unsigned int flags, enum ovl_copyop op)
  432. {
  433. struct inode *inode_out = file_inode(file_out);
  434. struct fd real_in, real_out;
  435. const struct cred *old_cred;
  436. ssize_t ret;
  437. ret = ovl_real_fdget(file_out, &real_out);
  438. if (ret)
  439. return ret;
  440. ret = ovl_real_fdget(file_in, &real_in);
  441. if (ret) {
  442. fdput(real_out);
  443. return ret;
  444. }
  445. old_cred = ovl_override_creds(file_inode(file_out)->i_sb);
  446. switch (op) {
  447. case OVL_COPY:
  448. ret = vfs_copy_file_range(real_in.file, pos_in,
  449. real_out.file, pos_out, len, flags);
  450. break;
  451. case OVL_CLONE:
  452. ret = vfs_clone_file_range(real_in.file, pos_in,
  453. real_out.file, pos_out, len);
  454. break;
  455. case OVL_DEDUPE:
  456. ret = vfs_dedupe_file_range_one(real_in.file, pos_in,
  457. real_out.file, pos_out, len);
  458. break;
  459. }
  460. revert_creds(old_cred);
  461. /* Update size */
  462. ovl_copyattr(ovl_inode_real(inode_out), inode_out);
  463. fdput(real_in);
  464. fdput(real_out);
  465. return ret;
  466. }
  467. static ssize_t ovl_copy_file_range(struct file *file_in, loff_t pos_in,
  468. struct file *file_out, loff_t pos_out,
  469. size_t len, unsigned int flags)
  470. {
  471. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, flags,
  472. OVL_COPY);
  473. }
  474. static int ovl_clone_file_range(struct file *file_in, loff_t pos_in,
  475. struct file *file_out, loff_t pos_out, u64 len)
  476. {
  477. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
  478. OVL_CLONE);
  479. }
  480. static int ovl_dedupe_file_range(struct file *file_in, loff_t pos_in,
  481. struct file *file_out, loff_t pos_out, u64 len)
  482. {
  483. /*
  484. * Don't copy up because of a dedupe request, this wouldn't make sense
  485. * most of the time (data would be duplicated instead of deduplicated).
  486. */
  487. if (!ovl_inode_upper(file_inode(file_in)) ||
  488. !ovl_inode_upper(file_inode(file_out)))
  489. return -EPERM;
  490. return ovl_copyfile(file_in, pos_in, file_out, pos_out, len, 0,
  491. OVL_DEDUPE);
  492. }
  493. const struct file_operations ovl_file_operations = {
  494. .open = ovl_open,
  495. .release = ovl_release,
  496. .llseek = ovl_llseek,
  497. .read_iter = ovl_read_iter,
  498. .write_iter = ovl_write_iter,
  499. .fsync = ovl_fsync,
  500. .mmap = ovl_mmap,
  501. .fallocate = ovl_fallocate,
  502. .fadvise = ovl_fadvise,
  503. .unlocked_ioctl = ovl_ioctl,
  504. .compat_ioctl = ovl_compat_ioctl,
  505. .copy_file_range = ovl_copy_file_range,
  506. .clone_file_range = ovl_clone_file_range,
  507. .dedupe_file_range = ovl_dedupe_file_range,
  508. };