libfs.c 59 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * fs/libfs.c
  4. * Library for filesystems writers.
  5. */
  6. #include <linux/blkdev.h>
  7. #include <linux/export.h>
  8. #include <linux/pagemap.h>
  9. #include <linux/slab.h>
  10. #include <linux/cred.h>
  11. #include <linux/mount.h>
  12. #include <linux/vfs.h>
  13. #include <linux/quotaops.h>
  14. #include <linux/mutex.h>
  15. #include <linux/namei.h>
  16. #include <linux/exportfs.h>
  17. #include <linux/iversion.h>
  18. #include <linux/writeback.h>
  19. #include <linux/buffer_head.h> /* sync_mapping_buffers */
  20. #include <linux/fs_context.h>
  21. #include <linux/pseudo_fs.h>
  22. #include <linux/fsnotify.h>
  23. #include <linux/unicode.h>
  24. #include <linux/fscrypt.h>
  25. #include <linux/pidfs.h>
  26. #include <linux/uaccess.h>
  27. #include "internal.h"
  28. int simple_getattr(struct mnt_idmap *idmap, const struct path *path,
  29. struct kstat *stat, u32 request_mask,
  30. unsigned int query_flags)
  31. {
  32. struct inode *inode = d_inode(path->dentry);
  33. generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
  34. stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
  35. return 0;
  36. }
  37. EXPORT_SYMBOL(simple_getattr);
  38. int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
  39. {
  40. u64 id = huge_encode_dev(dentry->d_sb->s_dev);
  41. buf->f_fsid = u64_to_fsid(id);
  42. buf->f_type = dentry->d_sb->s_magic;
  43. buf->f_bsize = PAGE_SIZE;
  44. buf->f_namelen = NAME_MAX;
  45. return 0;
  46. }
  47. EXPORT_SYMBOL(simple_statfs);
  48. /*
  49. * Retaining negative dentries for an in-memory filesystem just wastes
  50. * memory and lookup time: arrange for them to be deleted immediately.
  51. */
  52. int always_delete_dentry(const struct dentry *dentry)
  53. {
  54. return 1;
  55. }
  56. EXPORT_SYMBOL(always_delete_dentry);
  57. const struct dentry_operations simple_dentry_operations = {
  58. .d_delete = always_delete_dentry,
  59. };
  60. EXPORT_SYMBOL(simple_dentry_operations);
  61. /*
  62. * Lookup the data. This is trivial - if the dentry didn't already
  63. * exist, we know it is negative. Set d_op to delete negative dentries.
  64. */
  65. struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  66. {
  67. if (dentry->d_name.len > NAME_MAX)
  68. return ERR_PTR(-ENAMETOOLONG);
  69. if (!dentry->d_sb->s_d_op)
  70. d_set_d_op(dentry, &simple_dentry_operations);
  71. d_add(dentry, NULL);
  72. return NULL;
  73. }
  74. EXPORT_SYMBOL(simple_lookup);
  75. int dcache_dir_open(struct inode *inode, struct file *file)
  76. {
  77. file->private_data = d_alloc_cursor(file->f_path.dentry);
  78. return file->private_data ? 0 : -ENOMEM;
  79. }
  80. EXPORT_SYMBOL(dcache_dir_open);
  81. int dcache_dir_close(struct inode *inode, struct file *file)
  82. {
  83. dput(file->private_data);
  84. return 0;
  85. }
  86. EXPORT_SYMBOL(dcache_dir_close);
  87. /* parent is locked at least shared */
  88. /*
  89. * Returns an element of siblings' list.
  90. * We are looking for <count>th positive after <p>; if
  91. * found, dentry is grabbed and returned to caller.
  92. * If no such element exists, NULL is returned.
  93. */
  94. static struct dentry *scan_positives(struct dentry *cursor,
  95. struct hlist_node **p,
  96. loff_t count,
  97. struct dentry *last)
  98. {
  99. struct dentry *dentry = cursor->d_parent, *found = NULL;
  100. spin_lock(&dentry->d_lock);
  101. while (*p) {
  102. struct dentry *d = hlist_entry(*p, struct dentry, d_sib);
  103. p = &d->d_sib.next;
  104. // we must at least skip cursors, to avoid livelocks
  105. if (d->d_flags & DCACHE_DENTRY_CURSOR)
  106. continue;
  107. if (simple_positive(d) && !--count) {
  108. spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
  109. if (simple_positive(d))
  110. found = dget_dlock(d);
  111. spin_unlock(&d->d_lock);
  112. if (likely(found))
  113. break;
  114. count = 1;
  115. }
  116. if (need_resched()) {
  117. if (!hlist_unhashed(&cursor->d_sib))
  118. __hlist_del(&cursor->d_sib);
  119. hlist_add_behind(&cursor->d_sib, &d->d_sib);
  120. p = &cursor->d_sib.next;
  121. spin_unlock(&dentry->d_lock);
  122. cond_resched();
  123. spin_lock(&dentry->d_lock);
  124. }
  125. }
  126. spin_unlock(&dentry->d_lock);
  127. dput(last);
  128. return found;
  129. }
  130. loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
  131. {
  132. struct dentry *dentry = file->f_path.dentry;
  133. switch (whence) {
  134. case 1:
  135. offset += file->f_pos;
  136. fallthrough;
  137. case 0:
  138. if (offset >= 0)
  139. break;
  140. fallthrough;
  141. default:
  142. return -EINVAL;
  143. }
  144. if (offset != file->f_pos) {
  145. struct dentry *cursor = file->private_data;
  146. struct dentry *to = NULL;
  147. inode_lock_shared(dentry->d_inode);
  148. if (offset > 2)
  149. to = scan_positives(cursor, &dentry->d_children.first,
  150. offset - 2, NULL);
  151. spin_lock(&dentry->d_lock);
  152. hlist_del_init(&cursor->d_sib);
  153. if (to)
  154. hlist_add_behind(&cursor->d_sib, &to->d_sib);
  155. spin_unlock(&dentry->d_lock);
  156. dput(to);
  157. file->f_pos = offset;
  158. inode_unlock_shared(dentry->d_inode);
  159. }
  160. return offset;
  161. }
  162. EXPORT_SYMBOL(dcache_dir_lseek);
  163. /*
  164. * Directory is locked and all positive dentries in it are safe, since
  165. * for ramfs-type trees they can't go away without unlink() or rmdir(),
  166. * both impossible due to the lock on directory.
  167. */
  168. int dcache_readdir(struct file *file, struct dir_context *ctx)
  169. {
  170. struct dentry *dentry = file->f_path.dentry;
  171. struct dentry *cursor = file->private_data;
  172. struct dentry *next = NULL;
  173. struct hlist_node **p;
  174. if (!dir_emit_dots(file, ctx))
  175. return 0;
  176. if (ctx->pos == 2)
  177. p = &dentry->d_children.first;
  178. else
  179. p = &cursor->d_sib.next;
  180. while ((next = scan_positives(cursor, p, 1, next)) != NULL) {
  181. if (!dir_emit(ctx, next->d_name.name, next->d_name.len,
  182. d_inode(next)->i_ino,
  183. fs_umode_to_dtype(d_inode(next)->i_mode)))
  184. break;
  185. ctx->pos++;
  186. p = &next->d_sib.next;
  187. }
  188. spin_lock(&dentry->d_lock);
  189. hlist_del_init(&cursor->d_sib);
  190. if (next)
  191. hlist_add_before(&cursor->d_sib, &next->d_sib);
  192. spin_unlock(&dentry->d_lock);
  193. dput(next);
  194. return 0;
  195. }
  196. EXPORT_SYMBOL(dcache_readdir);
  197. ssize_t generic_read_dir(struct file *filp, char __user *buf, size_t siz, loff_t *ppos)
  198. {
  199. return -EISDIR;
  200. }
  201. EXPORT_SYMBOL(generic_read_dir);
  202. const struct file_operations simple_dir_operations = {
  203. .open = dcache_dir_open,
  204. .release = dcache_dir_close,
  205. .llseek = dcache_dir_lseek,
  206. .read = generic_read_dir,
  207. .iterate_shared = dcache_readdir,
  208. .fsync = noop_fsync,
  209. };
  210. EXPORT_SYMBOL(simple_dir_operations);
  211. const struct inode_operations simple_dir_inode_operations = {
  212. .lookup = simple_lookup,
  213. };
  214. EXPORT_SYMBOL(simple_dir_inode_operations);
  215. /* simple_offset_add() never assigns these to a dentry */
  216. enum {
  217. DIR_OFFSET_FIRST = 2, /* Find first real entry */
  218. DIR_OFFSET_EOD = S32_MAX,
  219. };
  220. /* simple_offset_add() allocation range */
  221. enum {
  222. DIR_OFFSET_MIN = DIR_OFFSET_FIRST + 1,
  223. DIR_OFFSET_MAX = DIR_OFFSET_EOD - 1,
  224. };
  225. static void offset_set(struct dentry *dentry, long offset)
  226. {
  227. dentry->d_fsdata = (void *)offset;
  228. }
  229. static long dentry2offset(struct dentry *dentry)
  230. {
  231. return (long)dentry->d_fsdata;
  232. }
  233. static struct lock_class_key simple_offset_lock_class;
  234. /**
  235. * simple_offset_init - initialize an offset_ctx
  236. * @octx: directory offset map to be initialized
  237. *
  238. */
  239. void simple_offset_init(struct offset_ctx *octx)
  240. {
  241. mt_init_flags(&octx->mt, MT_FLAGS_ALLOC_RANGE);
  242. lockdep_set_class(&octx->mt.ma_lock, &simple_offset_lock_class);
  243. octx->next_offset = DIR_OFFSET_MIN;
  244. }
  245. /**
  246. * simple_offset_add - Add an entry to a directory's offset map
  247. * @octx: directory offset ctx to be updated
  248. * @dentry: new dentry being added
  249. *
  250. * Returns zero on success. @octx and the dentry's offset are updated.
  251. * Otherwise, a negative errno value is returned.
  252. */
  253. int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry)
  254. {
  255. unsigned long offset;
  256. int ret;
  257. if (dentry2offset(dentry) != 0)
  258. return -EBUSY;
  259. ret = mtree_alloc_cyclic(&octx->mt, &offset, dentry, DIR_OFFSET_MIN,
  260. DIR_OFFSET_MAX, &octx->next_offset,
  261. GFP_KERNEL);
  262. if (unlikely(ret < 0))
  263. return ret == -EBUSY ? -ENOSPC : ret;
  264. offset_set(dentry, offset);
  265. return 0;
  266. }
  267. static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry,
  268. long offset)
  269. {
  270. int ret;
  271. ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL);
  272. if (ret)
  273. return ret;
  274. offset_set(dentry, offset);
  275. return 0;
  276. }
  277. /**
  278. * simple_offset_remove - Remove an entry to a directory's offset map
  279. * @octx: directory offset ctx to be updated
  280. * @dentry: dentry being removed
  281. *
  282. */
  283. void simple_offset_remove(struct offset_ctx *octx, struct dentry *dentry)
  284. {
  285. long offset;
  286. offset = dentry2offset(dentry);
  287. if (offset == 0)
  288. return;
  289. mtree_erase(&octx->mt, offset);
  290. offset_set(dentry, 0);
  291. }
  292. /**
  293. * simple_offset_rename - handle directory offsets for rename
  294. * @old_dir: parent directory of source entry
  295. * @old_dentry: dentry of source entry
  296. * @new_dir: parent_directory of destination entry
  297. * @new_dentry: dentry of destination
  298. *
  299. * Caller provides appropriate serialization.
  300. *
  301. * User space expects the directory offset value of the replaced
  302. * (new) directory entry to be unchanged after a rename.
  303. *
  304. * Returns zero on success, a negative errno value on failure.
  305. */
  306. int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry,
  307. struct inode *new_dir, struct dentry *new_dentry)
  308. {
  309. struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
  310. struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
  311. long new_offset = dentry2offset(new_dentry);
  312. simple_offset_remove(old_ctx, old_dentry);
  313. if (new_offset) {
  314. offset_set(new_dentry, 0);
  315. return simple_offset_replace(new_ctx, old_dentry, new_offset);
  316. }
  317. return simple_offset_add(new_ctx, old_dentry);
  318. }
  319. /**
  320. * simple_offset_rename_exchange - exchange rename with directory offsets
  321. * @old_dir: parent of dentry being moved
  322. * @old_dentry: dentry being moved
  323. * @new_dir: destination parent
  324. * @new_dentry: destination dentry
  325. *
  326. * This API preserves the directory offset values. Caller provides
  327. * appropriate serialization.
  328. *
  329. * Returns zero on success. Otherwise a negative errno is returned and the
  330. * rename is rolled back.
  331. */
  332. int simple_offset_rename_exchange(struct inode *old_dir,
  333. struct dentry *old_dentry,
  334. struct inode *new_dir,
  335. struct dentry *new_dentry)
  336. {
  337. struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir);
  338. struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir);
  339. long old_index = dentry2offset(old_dentry);
  340. long new_index = dentry2offset(new_dentry);
  341. int ret;
  342. simple_offset_remove(old_ctx, old_dentry);
  343. simple_offset_remove(new_ctx, new_dentry);
  344. ret = simple_offset_replace(new_ctx, old_dentry, new_index);
  345. if (ret)
  346. goto out_restore;
  347. ret = simple_offset_replace(old_ctx, new_dentry, old_index);
  348. if (ret) {
  349. simple_offset_remove(new_ctx, old_dentry);
  350. goto out_restore;
  351. }
  352. ret = simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
  353. if (ret) {
  354. simple_offset_remove(new_ctx, old_dentry);
  355. simple_offset_remove(old_ctx, new_dentry);
  356. goto out_restore;
  357. }
  358. return 0;
  359. out_restore:
  360. (void)simple_offset_replace(old_ctx, old_dentry, old_index);
  361. (void)simple_offset_replace(new_ctx, new_dentry, new_index);
  362. return ret;
  363. }
  364. /**
  365. * simple_offset_destroy - Release offset map
  366. * @octx: directory offset ctx that is about to be destroyed
  367. *
  368. * During fs teardown (eg. umount), a directory's offset map might still
  369. * contain entries. xa_destroy() cleans out anything that remains.
  370. */
  371. void simple_offset_destroy(struct offset_ctx *octx)
  372. {
  373. mtree_destroy(&octx->mt);
  374. }
  375. /**
  376. * offset_dir_llseek - Advance the read position of a directory descriptor
  377. * @file: an open directory whose position is to be updated
  378. * @offset: a byte offset
  379. * @whence: enumerator describing the starting position for this update
  380. *
  381. * SEEK_END, SEEK_DATA, and SEEK_HOLE are not supported for directories.
  382. *
  383. * Returns the updated read position if successful; otherwise a
  384. * negative errno is returned and the read position remains unchanged.
  385. */
  386. static loff_t offset_dir_llseek(struct file *file, loff_t offset, int whence)
  387. {
  388. switch (whence) {
  389. case SEEK_CUR:
  390. offset += file->f_pos;
  391. fallthrough;
  392. case SEEK_SET:
  393. if (offset >= 0)
  394. break;
  395. fallthrough;
  396. default:
  397. return -EINVAL;
  398. }
  399. return vfs_setpos(file, offset, LONG_MAX);
  400. }
  401. static struct dentry *find_positive_dentry(struct dentry *parent,
  402. struct dentry *dentry,
  403. bool next)
  404. {
  405. struct dentry *found = NULL;
  406. spin_lock(&parent->d_lock);
  407. if (next)
  408. dentry = d_next_sibling(dentry);
  409. else if (!dentry)
  410. dentry = d_first_child(parent);
  411. hlist_for_each_entry_from(dentry, d_sib) {
  412. if (!simple_positive(dentry))
  413. continue;
  414. spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
  415. if (simple_positive(dentry))
  416. found = dget_dlock(dentry);
  417. spin_unlock(&dentry->d_lock);
  418. if (likely(found))
  419. break;
  420. }
  421. spin_unlock(&parent->d_lock);
  422. return found;
  423. }
  424. static noinline_for_stack struct dentry *
  425. offset_dir_lookup(struct dentry *parent, loff_t offset)
  426. {
  427. struct inode *inode = d_inode(parent);
  428. struct offset_ctx *octx = inode->i_op->get_offset_ctx(inode);
  429. struct dentry *child, *found = NULL;
  430. MA_STATE(mas, &octx->mt, offset, offset);
  431. if (offset == DIR_OFFSET_FIRST)
  432. found = find_positive_dentry(parent, NULL, false);
  433. else {
  434. rcu_read_lock();
  435. child = mas_find_rev(&mas, DIR_OFFSET_MIN);
  436. found = find_positive_dentry(parent, child, false);
  437. rcu_read_unlock();
  438. }
  439. return found;
  440. }
  441. static bool offset_dir_emit(struct dir_context *ctx, struct dentry *dentry)
  442. {
  443. struct inode *inode = d_inode(dentry);
  444. return dir_emit(ctx, dentry->d_name.name, dentry->d_name.len,
  445. inode->i_ino, fs_umode_to_dtype(inode->i_mode));
  446. }
  447. static void offset_iterate_dir(struct file *file, struct dir_context *ctx)
  448. {
  449. struct dentry *dir = file->f_path.dentry;
  450. struct dentry *dentry;
  451. dentry = offset_dir_lookup(dir, ctx->pos);
  452. if (!dentry)
  453. goto out_eod;
  454. while (true) {
  455. struct dentry *next;
  456. ctx->pos = dentry2offset(dentry);
  457. if (!offset_dir_emit(ctx, dentry))
  458. break;
  459. next = find_positive_dentry(dir, dentry, true);
  460. dput(dentry);
  461. if (!next)
  462. goto out_eod;
  463. dentry = next;
  464. }
  465. dput(dentry);
  466. return;
  467. out_eod:
  468. ctx->pos = DIR_OFFSET_EOD;
  469. }
  470. /**
  471. * offset_readdir - Emit entries starting at offset @ctx->pos
  472. * @file: an open directory to iterate over
  473. * @ctx: directory iteration context
  474. *
  475. * Caller must hold @file's i_rwsem to prevent insertion or removal of
  476. * entries during this call.
  477. *
  478. * On entry, @ctx->pos contains an offset that represents the first entry
  479. * to be read from the directory.
  480. *
  481. * The operation continues until there are no more entries to read, or
  482. * until the ctx->actor indicates there is no more space in the caller's
  483. * output buffer.
  484. *
  485. * On return, @ctx->pos contains an offset that will read the next entry
  486. * in this directory when offset_readdir() is called again with @ctx.
  487. * Caller places this value in the d_off field of the last entry in the
  488. * user's buffer.
  489. *
  490. * Return values:
  491. * %0 - Complete
  492. */
  493. static int offset_readdir(struct file *file, struct dir_context *ctx)
  494. {
  495. struct dentry *dir = file->f_path.dentry;
  496. lockdep_assert_held(&d_inode(dir)->i_rwsem);
  497. if (!dir_emit_dots(file, ctx))
  498. return 0;
  499. if (ctx->pos != DIR_OFFSET_EOD)
  500. offset_iterate_dir(file, ctx);
  501. return 0;
  502. }
  503. const struct file_operations simple_offset_dir_operations = {
  504. .llseek = offset_dir_llseek,
  505. .iterate_shared = offset_readdir,
  506. .read = generic_read_dir,
  507. .fsync = noop_fsync,
  508. };
  509. static struct dentry *find_next_child(struct dentry *parent, struct dentry *prev)
  510. {
  511. struct dentry *child = NULL, *d;
  512. spin_lock(&parent->d_lock);
  513. d = prev ? d_next_sibling(prev) : d_first_child(parent);
  514. hlist_for_each_entry_from(d, d_sib) {
  515. if (simple_positive(d)) {
  516. spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
  517. if (simple_positive(d))
  518. child = dget_dlock(d);
  519. spin_unlock(&d->d_lock);
  520. if (likely(child))
  521. break;
  522. }
  523. }
  524. spin_unlock(&parent->d_lock);
  525. dput(prev);
  526. return child;
  527. }
  528. void simple_recursive_removal(struct dentry *dentry,
  529. void (*callback)(struct dentry *))
  530. {
  531. struct dentry *this = dget(dentry);
  532. while (true) {
  533. struct dentry *victim = NULL, *child;
  534. struct inode *inode = this->d_inode;
  535. inode_lock_nested(inode, I_MUTEX_CHILD);
  536. if (d_is_dir(this))
  537. inode->i_flags |= S_DEAD;
  538. while ((child = find_next_child(this, victim)) == NULL) {
  539. // kill and ascend
  540. // update metadata while it's still locked
  541. inode_set_ctime_current(inode);
  542. clear_nlink(inode);
  543. inode_unlock(inode);
  544. victim = this;
  545. this = this->d_parent;
  546. inode = this->d_inode;
  547. inode_lock_nested(inode, I_MUTEX_CHILD);
  548. if (simple_positive(victim)) {
  549. d_invalidate(victim); // avoid lost mounts
  550. if (d_is_dir(victim))
  551. fsnotify_rmdir(inode, victim);
  552. else
  553. fsnotify_unlink(inode, victim);
  554. if (callback)
  555. callback(victim);
  556. dput(victim); // unpin it
  557. }
  558. if (victim == dentry) {
  559. inode_set_mtime_to_ts(inode,
  560. inode_set_ctime_current(inode));
  561. if (d_is_dir(dentry))
  562. drop_nlink(inode);
  563. inode_unlock(inode);
  564. dput(dentry);
  565. return;
  566. }
  567. }
  568. inode_unlock(inode);
  569. this = child;
  570. }
  571. }
  572. EXPORT_SYMBOL(simple_recursive_removal);
  573. static const struct super_operations simple_super_operations = {
  574. .statfs = simple_statfs,
  575. };
  576. static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
  577. {
  578. struct pseudo_fs_context *ctx = fc->fs_private;
  579. struct inode *root;
  580. s->s_maxbytes = MAX_LFS_FILESIZE;
  581. s->s_blocksize = PAGE_SIZE;
  582. s->s_blocksize_bits = PAGE_SHIFT;
  583. s->s_magic = ctx->magic;
  584. s->s_op = ctx->ops ?: &simple_super_operations;
  585. s->s_xattr = ctx->xattr;
  586. s->s_time_gran = 1;
  587. root = new_inode(s);
  588. if (!root)
  589. return -ENOMEM;
  590. /*
  591. * since this is the first inode, make it number 1. New inodes created
  592. * after this must take care not to collide with it (by passing
  593. * max_reserved of 1 to iunique).
  594. */
  595. root->i_ino = 1;
  596. root->i_mode = S_IFDIR | S_IRUSR | S_IWUSR;
  597. simple_inode_init_ts(root);
  598. s->s_root = d_make_root(root);
  599. if (!s->s_root)
  600. return -ENOMEM;
  601. s->s_d_op = ctx->dops;
  602. return 0;
  603. }
  604. static int pseudo_fs_get_tree(struct fs_context *fc)
  605. {
  606. return get_tree_nodev(fc, pseudo_fs_fill_super);
  607. }
  608. static void pseudo_fs_free(struct fs_context *fc)
  609. {
  610. kfree(fc->fs_private);
  611. }
  612. static const struct fs_context_operations pseudo_fs_context_ops = {
  613. .free = pseudo_fs_free,
  614. .get_tree = pseudo_fs_get_tree,
  615. };
  616. /*
  617. * Common helper for pseudo-filesystems (sockfs, pipefs, bdev - stuff that
  618. * will never be mountable)
  619. */
  620. struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
  621. unsigned long magic)
  622. {
  623. struct pseudo_fs_context *ctx;
  624. ctx = kzalloc(sizeof(struct pseudo_fs_context), GFP_KERNEL);
  625. if (likely(ctx)) {
  626. ctx->magic = magic;
  627. fc->fs_private = ctx;
  628. fc->ops = &pseudo_fs_context_ops;
  629. fc->sb_flags |= SB_NOUSER;
  630. fc->global = true;
  631. }
  632. return ctx;
  633. }
  634. EXPORT_SYMBOL(init_pseudo);
  635. int simple_open(struct inode *inode, struct file *file)
  636. {
  637. if (inode->i_private)
  638. file->private_data = inode->i_private;
  639. return 0;
  640. }
  641. EXPORT_SYMBOL(simple_open);
  642. int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
  643. {
  644. struct inode *inode = d_inode(old_dentry);
  645. inode_set_mtime_to_ts(dir,
  646. inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
  647. inc_nlink(inode);
  648. ihold(inode);
  649. dget(dentry);
  650. d_instantiate(dentry, inode);
  651. return 0;
  652. }
  653. EXPORT_SYMBOL(simple_link);
  654. int simple_empty(struct dentry *dentry)
  655. {
  656. struct dentry *child;
  657. int ret = 0;
  658. spin_lock(&dentry->d_lock);
  659. hlist_for_each_entry(child, &dentry->d_children, d_sib) {
  660. spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
  661. if (simple_positive(child)) {
  662. spin_unlock(&child->d_lock);
  663. goto out;
  664. }
  665. spin_unlock(&child->d_lock);
  666. }
  667. ret = 1;
  668. out:
  669. spin_unlock(&dentry->d_lock);
  670. return ret;
  671. }
  672. EXPORT_SYMBOL(simple_empty);
  673. int simple_unlink(struct inode *dir, struct dentry *dentry)
  674. {
  675. struct inode *inode = d_inode(dentry);
  676. inode_set_mtime_to_ts(dir,
  677. inode_set_ctime_to_ts(dir, inode_set_ctime_current(inode)));
  678. drop_nlink(inode);
  679. dput(dentry);
  680. return 0;
  681. }
  682. EXPORT_SYMBOL(simple_unlink);
  683. int simple_rmdir(struct inode *dir, struct dentry *dentry)
  684. {
  685. if (!simple_empty(dentry))
  686. return -ENOTEMPTY;
  687. drop_nlink(d_inode(dentry));
  688. simple_unlink(dir, dentry);
  689. drop_nlink(dir);
  690. return 0;
  691. }
  692. EXPORT_SYMBOL(simple_rmdir);
  693. /**
  694. * simple_rename_timestamp - update the various inode timestamps for rename
  695. * @old_dir: old parent directory
  696. * @old_dentry: dentry that is being renamed
  697. * @new_dir: new parent directory
  698. * @new_dentry: target for rename
  699. *
  700. * POSIX mandates that the old and new parent directories have their ctime and
  701. * mtime updated, and that inodes of @old_dentry and @new_dentry (if any), have
  702. * their ctime updated.
  703. */
  704. void simple_rename_timestamp(struct inode *old_dir, struct dentry *old_dentry,
  705. struct inode *new_dir, struct dentry *new_dentry)
  706. {
  707. struct inode *newino = d_inode(new_dentry);
  708. inode_set_mtime_to_ts(old_dir, inode_set_ctime_current(old_dir));
  709. if (new_dir != old_dir)
  710. inode_set_mtime_to_ts(new_dir,
  711. inode_set_ctime_current(new_dir));
  712. inode_set_ctime_current(d_inode(old_dentry));
  713. if (newino)
  714. inode_set_ctime_current(newino);
  715. }
  716. EXPORT_SYMBOL_GPL(simple_rename_timestamp);
  717. int simple_rename_exchange(struct inode *old_dir, struct dentry *old_dentry,
  718. struct inode *new_dir, struct dentry *new_dentry)
  719. {
  720. bool old_is_dir = d_is_dir(old_dentry);
  721. bool new_is_dir = d_is_dir(new_dentry);
  722. if (old_dir != new_dir && old_is_dir != new_is_dir) {
  723. if (old_is_dir) {
  724. drop_nlink(old_dir);
  725. inc_nlink(new_dir);
  726. } else {
  727. drop_nlink(new_dir);
  728. inc_nlink(old_dir);
  729. }
  730. }
  731. simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
  732. return 0;
  733. }
  734. EXPORT_SYMBOL_GPL(simple_rename_exchange);
  735. int simple_rename(struct mnt_idmap *idmap, struct inode *old_dir,
  736. struct dentry *old_dentry, struct inode *new_dir,
  737. struct dentry *new_dentry, unsigned int flags)
  738. {
  739. int they_are_dirs = d_is_dir(old_dentry);
  740. if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
  741. return -EINVAL;
  742. if (flags & RENAME_EXCHANGE)
  743. return simple_rename_exchange(old_dir, old_dentry, new_dir, new_dentry);
  744. if (!simple_empty(new_dentry))
  745. return -ENOTEMPTY;
  746. if (d_really_is_positive(new_dentry)) {
  747. simple_unlink(new_dir, new_dentry);
  748. if (they_are_dirs) {
  749. drop_nlink(d_inode(new_dentry));
  750. drop_nlink(old_dir);
  751. }
  752. } else if (they_are_dirs) {
  753. drop_nlink(old_dir);
  754. inc_nlink(new_dir);
  755. }
  756. simple_rename_timestamp(old_dir, old_dentry, new_dir, new_dentry);
  757. return 0;
  758. }
  759. EXPORT_SYMBOL(simple_rename);
  760. /**
  761. * simple_setattr - setattr for simple filesystem
  762. * @idmap: idmap of the target mount
  763. * @dentry: dentry
  764. * @iattr: iattr structure
  765. *
  766. * Returns 0 on success, -error on failure.
  767. *
  768. * simple_setattr is a simple ->setattr implementation without a proper
  769. * implementation of size changes.
  770. *
  771. * It can either be used for in-memory filesystems or special files
  772. * on simple regular filesystems. Anything that needs to change on-disk
  773. * or wire state on size changes needs its own setattr method.
  774. */
  775. int simple_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
  776. struct iattr *iattr)
  777. {
  778. struct inode *inode = d_inode(dentry);
  779. int error;
  780. error = setattr_prepare(idmap, dentry, iattr);
  781. if (error)
  782. return error;
  783. if (iattr->ia_valid & ATTR_SIZE)
  784. truncate_setsize(inode, iattr->ia_size);
  785. setattr_copy(idmap, inode, iattr);
  786. mark_inode_dirty(inode);
  787. return 0;
  788. }
  789. EXPORT_SYMBOL(simple_setattr);
  790. static int simple_read_folio(struct file *file, struct folio *folio)
  791. {
  792. folio_zero_range(folio, 0, folio_size(folio));
  793. flush_dcache_folio(folio);
  794. folio_mark_uptodate(folio);
  795. folio_unlock(folio);
  796. return 0;
  797. }
  798. int simple_write_begin(struct file *file, struct address_space *mapping,
  799. loff_t pos, unsigned len,
  800. struct folio **foliop, void **fsdata)
  801. {
  802. struct folio *folio;
  803. folio = __filemap_get_folio(mapping, pos / PAGE_SIZE, FGP_WRITEBEGIN,
  804. mapping_gfp_mask(mapping));
  805. if (IS_ERR(folio))
  806. return PTR_ERR(folio);
  807. *foliop = folio;
  808. if (!folio_test_uptodate(folio) && (len != folio_size(folio))) {
  809. size_t from = offset_in_folio(folio, pos);
  810. folio_zero_segments(folio, 0, from,
  811. from + len, folio_size(folio));
  812. }
  813. return 0;
  814. }
  815. EXPORT_SYMBOL(simple_write_begin);
  816. /**
  817. * simple_write_end - .write_end helper for non-block-device FSes
  818. * @file: See .write_end of address_space_operations
  819. * @mapping: "
  820. * @pos: "
  821. * @len: "
  822. * @copied: "
  823. * @folio: "
  824. * @fsdata: "
  825. *
  826. * simple_write_end does the minimum needed for updating a folio after
  827. * writing is done. It has the same API signature as the .write_end of
  828. * address_space_operations vector. So it can just be set onto .write_end for
  829. * FSes that don't need any other processing. i_mutex is assumed to be held.
  830. * Block based filesystems should use generic_write_end().
  831. * NOTE: Even though i_size might get updated by this function, mark_inode_dirty
  832. * is not called, so a filesystem that actually does store data in .write_inode
  833. * should extend on what's done here with a call to mark_inode_dirty() in the
  834. * case that i_size has changed.
  835. *
  836. * Use *ONLY* with simple_read_folio()
  837. */
  838. static int simple_write_end(struct file *file, struct address_space *mapping,
  839. loff_t pos, unsigned len, unsigned copied,
  840. struct folio *folio, void *fsdata)
  841. {
  842. struct inode *inode = folio->mapping->host;
  843. loff_t last_pos = pos + copied;
  844. /* zero the stale part of the folio if we did a short copy */
  845. if (!folio_test_uptodate(folio)) {
  846. if (copied < len) {
  847. size_t from = offset_in_folio(folio, pos);
  848. folio_zero_range(folio, from + copied, len - copied);
  849. }
  850. folio_mark_uptodate(folio);
  851. }
  852. /*
  853. * No need to use i_size_read() here, the i_size
  854. * cannot change under us because we hold the i_mutex.
  855. */
  856. if (last_pos > inode->i_size)
  857. i_size_write(inode, last_pos);
  858. folio_mark_dirty(folio);
  859. folio_unlock(folio);
  860. folio_put(folio);
  861. return copied;
  862. }
  863. /*
  864. * Provides ramfs-style behavior: data in the pagecache, but no writeback.
  865. */
  866. const struct address_space_operations ram_aops = {
  867. .read_folio = simple_read_folio,
  868. .write_begin = simple_write_begin,
  869. .write_end = simple_write_end,
  870. .dirty_folio = noop_dirty_folio,
  871. };
  872. EXPORT_SYMBOL(ram_aops);
  873. /*
  874. * the inodes created here are not hashed. If you use iunique to generate
  875. * unique inode values later for this filesystem, then you must take care
  876. * to pass it an appropriate max_reserved value to avoid collisions.
  877. */
  878. int simple_fill_super(struct super_block *s, unsigned long magic,
  879. const struct tree_descr *files)
  880. {
  881. struct inode *inode;
  882. struct dentry *dentry;
  883. int i;
  884. s->s_blocksize = PAGE_SIZE;
  885. s->s_blocksize_bits = PAGE_SHIFT;
  886. s->s_magic = magic;
  887. s->s_op = &simple_super_operations;
  888. s->s_time_gran = 1;
  889. inode = new_inode(s);
  890. if (!inode)
  891. return -ENOMEM;
  892. /*
  893. * because the root inode is 1, the files array must not contain an
  894. * entry at index 1
  895. */
  896. inode->i_ino = 1;
  897. inode->i_mode = S_IFDIR | 0755;
  898. simple_inode_init_ts(inode);
  899. inode->i_op = &simple_dir_inode_operations;
  900. inode->i_fop = &simple_dir_operations;
  901. set_nlink(inode, 2);
  902. s->s_root = d_make_root(inode);
  903. if (!s->s_root)
  904. return -ENOMEM;
  905. for (i = 0; !files->name || files->name[0]; i++, files++) {
  906. if (!files->name)
  907. continue;
  908. /* warn if it tries to conflict with the root inode */
  909. if (unlikely(i == 1))
  910. printk(KERN_WARNING "%s: %s passed in a files array"
  911. "with an index of 1!\n", __func__,
  912. s->s_type->name);
  913. dentry = d_alloc_name(s->s_root, files->name);
  914. if (!dentry)
  915. return -ENOMEM;
  916. inode = new_inode(s);
  917. if (!inode) {
  918. dput(dentry);
  919. return -ENOMEM;
  920. }
  921. inode->i_mode = S_IFREG | files->mode;
  922. simple_inode_init_ts(inode);
  923. inode->i_fop = files->ops;
  924. inode->i_ino = i;
  925. d_add(dentry, inode);
  926. }
  927. return 0;
  928. }
  929. EXPORT_SYMBOL(simple_fill_super);
  930. static DEFINE_SPINLOCK(pin_fs_lock);
  931. int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count)
  932. {
  933. struct vfsmount *mnt = NULL;
  934. spin_lock(&pin_fs_lock);
  935. if (unlikely(!*mount)) {
  936. spin_unlock(&pin_fs_lock);
  937. mnt = vfs_kern_mount(type, SB_KERNMOUNT, type->name, NULL);
  938. if (IS_ERR(mnt))
  939. return PTR_ERR(mnt);
  940. spin_lock(&pin_fs_lock);
  941. if (!*mount)
  942. *mount = mnt;
  943. }
  944. mntget(*mount);
  945. ++*count;
  946. spin_unlock(&pin_fs_lock);
  947. mntput(mnt);
  948. return 0;
  949. }
  950. EXPORT_SYMBOL(simple_pin_fs);
  951. void simple_release_fs(struct vfsmount **mount, int *count)
  952. {
  953. struct vfsmount *mnt;
  954. spin_lock(&pin_fs_lock);
  955. mnt = *mount;
  956. if (!--*count)
  957. *mount = NULL;
  958. spin_unlock(&pin_fs_lock);
  959. mntput(mnt);
  960. }
  961. EXPORT_SYMBOL(simple_release_fs);
  962. /**
  963. * simple_read_from_buffer - copy data from the buffer to user space
  964. * @to: the user space buffer to read to
  965. * @count: the maximum number of bytes to read
  966. * @ppos: the current position in the buffer
  967. * @from: the buffer to read from
  968. * @available: the size of the buffer
  969. *
  970. * The simple_read_from_buffer() function reads up to @count bytes from the
  971. * buffer @from at offset @ppos into the user space address starting at @to.
  972. *
  973. * On success, the number of bytes read is returned and the offset @ppos is
  974. * advanced by this number, or negative value is returned on error.
  975. **/
  976. ssize_t simple_read_from_buffer(void __user *to, size_t count, loff_t *ppos,
  977. const void *from, size_t available)
  978. {
  979. loff_t pos = *ppos;
  980. size_t ret;
  981. if (pos < 0)
  982. return -EINVAL;
  983. if (pos >= available || !count)
  984. return 0;
  985. if (count > available - pos)
  986. count = available - pos;
  987. ret = copy_to_user(to, from + pos, count);
  988. if (ret == count)
  989. return -EFAULT;
  990. count -= ret;
  991. *ppos = pos + count;
  992. return count;
  993. }
  994. EXPORT_SYMBOL(simple_read_from_buffer);
  995. /**
  996. * simple_write_to_buffer - copy data from user space to the buffer
  997. * @to: the buffer to write to
  998. * @available: the size of the buffer
  999. * @ppos: the current position in the buffer
  1000. * @from: the user space buffer to read from
  1001. * @count: the maximum number of bytes to read
  1002. *
  1003. * The simple_write_to_buffer() function reads up to @count bytes from the user
  1004. * space address starting at @from into the buffer @to at offset @ppos.
  1005. *
  1006. * On success, the number of bytes written is returned and the offset @ppos is
  1007. * advanced by this number, or negative value is returned on error.
  1008. **/
  1009. ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos,
  1010. const void __user *from, size_t count)
  1011. {
  1012. loff_t pos = *ppos;
  1013. size_t res;
  1014. if (pos < 0)
  1015. return -EINVAL;
  1016. if (pos >= available || !count)
  1017. return 0;
  1018. if (count > available - pos)
  1019. count = available - pos;
  1020. res = copy_from_user(to + pos, from, count);
  1021. if (res == count)
  1022. return -EFAULT;
  1023. count -= res;
  1024. *ppos = pos + count;
  1025. return count;
  1026. }
  1027. EXPORT_SYMBOL(simple_write_to_buffer);
  1028. /**
  1029. * memory_read_from_buffer - copy data from the buffer
  1030. * @to: the kernel space buffer to read to
  1031. * @count: the maximum number of bytes to read
  1032. * @ppos: the current position in the buffer
  1033. * @from: the buffer to read from
  1034. * @available: the size of the buffer
  1035. *
  1036. * The memory_read_from_buffer() function reads up to @count bytes from the
  1037. * buffer @from at offset @ppos into the kernel space address starting at @to.
  1038. *
  1039. * On success, the number of bytes read is returned and the offset @ppos is
  1040. * advanced by this number, or negative value is returned on error.
  1041. **/
  1042. ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
  1043. const void *from, size_t available)
  1044. {
  1045. loff_t pos = *ppos;
  1046. if (pos < 0)
  1047. return -EINVAL;
  1048. if (pos >= available)
  1049. return 0;
  1050. if (count > available - pos)
  1051. count = available - pos;
  1052. memcpy(to, from + pos, count);
  1053. *ppos = pos + count;
  1054. return count;
  1055. }
  1056. EXPORT_SYMBOL(memory_read_from_buffer);
  1057. /*
  1058. * Transaction based IO.
  1059. * The file expects a single write which triggers the transaction, and then
  1060. * possibly a read which collects the result - which is stored in a
  1061. * file-local buffer.
  1062. */
  1063. void simple_transaction_set(struct file *file, size_t n)
  1064. {
  1065. struct simple_transaction_argresp *ar = file->private_data;
  1066. BUG_ON(n > SIMPLE_TRANSACTION_LIMIT);
  1067. /*
  1068. * The barrier ensures that ar->size will really remain zero until
  1069. * ar->data is ready for reading.
  1070. */
  1071. smp_mb();
  1072. ar->size = n;
  1073. }
  1074. EXPORT_SYMBOL(simple_transaction_set);
  1075. char *simple_transaction_get(struct file *file, const char __user *buf, size_t size)
  1076. {
  1077. struct simple_transaction_argresp *ar;
  1078. static DEFINE_SPINLOCK(simple_transaction_lock);
  1079. if (size > SIMPLE_TRANSACTION_LIMIT - 1)
  1080. return ERR_PTR(-EFBIG);
  1081. ar = (struct simple_transaction_argresp *)get_zeroed_page(GFP_KERNEL);
  1082. if (!ar)
  1083. return ERR_PTR(-ENOMEM);
  1084. spin_lock(&simple_transaction_lock);
  1085. /* only one write allowed per open */
  1086. if (file->private_data) {
  1087. spin_unlock(&simple_transaction_lock);
  1088. free_page((unsigned long)ar);
  1089. return ERR_PTR(-EBUSY);
  1090. }
  1091. file->private_data = ar;
  1092. spin_unlock(&simple_transaction_lock);
  1093. if (copy_from_user(ar->data, buf, size))
  1094. return ERR_PTR(-EFAULT);
  1095. return ar->data;
  1096. }
  1097. EXPORT_SYMBOL(simple_transaction_get);
  1098. ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos)
  1099. {
  1100. struct simple_transaction_argresp *ar = file->private_data;
  1101. if (!ar)
  1102. return 0;
  1103. return simple_read_from_buffer(buf, size, pos, ar->data, ar->size);
  1104. }
  1105. EXPORT_SYMBOL(simple_transaction_read);
  1106. int simple_transaction_release(struct inode *inode, struct file *file)
  1107. {
  1108. free_page((unsigned long)file->private_data);
  1109. return 0;
  1110. }
  1111. EXPORT_SYMBOL(simple_transaction_release);
  1112. /* Simple attribute files */
  1113. struct simple_attr {
  1114. int (*get)(void *, u64 *);
  1115. int (*set)(void *, u64);
  1116. char get_buf[24]; /* enough to store a u64 and "\n\0" */
  1117. char set_buf[24];
  1118. void *data;
  1119. const char *fmt; /* format for read operation */
  1120. struct mutex mutex; /* protects access to these buffers */
  1121. };
  1122. /* simple_attr_open is called by an actual attribute open file operation
  1123. * to set the attribute specific access operations. */
  1124. int simple_attr_open(struct inode *inode, struct file *file,
  1125. int (*get)(void *, u64 *), int (*set)(void *, u64),
  1126. const char *fmt)
  1127. {
  1128. struct simple_attr *attr;
  1129. attr = kzalloc(sizeof(*attr), GFP_KERNEL);
  1130. if (!attr)
  1131. return -ENOMEM;
  1132. attr->get = get;
  1133. attr->set = set;
  1134. attr->data = inode->i_private;
  1135. attr->fmt = fmt;
  1136. mutex_init(&attr->mutex);
  1137. file->private_data = attr;
  1138. return nonseekable_open(inode, file);
  1139. }
  1140. EXPORT_SYMBOL_GPL(simple_attr_open);
  1141. int simple_attr_release(struct inode *inode, struct file *file)
  1142. {
  1143. kfree(file->private_data);
  1144. return 0;
  1145. }
  1146. EXPORT_SYMBOL_GPL(simple_attr_release); /* GPL-only? This? Really? */
  1147. /* read from the buffer that is filled with the get function */
  1148. ssize_t simple_attr_read(struct file *file, char __user *buf,
  1149. size_t len, loff_t *ppos)
  1150. {
  1151. struct simple_attr *attr;
  1152. size_t size;
  1153. ssize_t ret;
  1154. attr = file->private_data;
  1155. if (!attr->get)
  1156. return -EACCES;
  1157. ret = mutex_lock_interruptible(&attr->mutex);
  1158. if (ret)
  1159. return ret;
  1160. if (*ppos && attr->get_buf[0]) {
  1161. /* continued read */
  1162. size = strlen(attr->get_buf);
  1163. } else {
  1164. /* first read */
  1165. u64 val;
  1166. ret = attr->get(attr->data, &val);
  1167. if (ret)
  1168. goto out;
  1169. size = scnprintf(attr->get_buf, sizeof(attr->get_buf),
  1170. attr->fmt, (unsigned long long)val);
  1171. }
  1172. ret = simple_read_from_buffer(buf, len, ppos, attr->get_buf, size);
  1173. out:
  1174. mutex_unlock(&attr->mutex);
  1175. return ret;
  1176. }
  1177. EXPORT_SYMBOL_GPL(simple_attr_read);
  1178. /* interpret the buffer as a number to call the set function with */
  1179. static ssize_t simple_attr_write_xsigned(struct file *file, const char __user *buf,
  1180. size_t len, loff_t *ppos, bool is_signed)
  1181. {
  1182. struct simple_attr *attr;
  1183. unsigned long long val;
  1184. size_t size;
  1185. ssize_t ret;
  1186. attr = file->private_data;
  1187. if (!attr->set)
  1188. return -EACCES;
  1189. ret = mutex_lock_interruptible(&attr->mutex);
  1190. if (ret)
  1191. return ret;
  1192. ret = -EFAULT;
  1193. size = min(sizeof(attr->set_buf) - 1, len);
  1194. if (copy_from_user(attr->set_buf, buf, size))
  1195. goto out;
  1196. attr->set_buf[size] = '\0';
  1197. if (is_signed)
  1198. ret = kstrtoll(attr->set_buf, 0, &val);
  1199. else
  1200. ret = kstrtoull(attr->set_buf, 0, &val);
  1201. if (ret)
  1202. goto out;
  1203. ret = attr->set(attr->data, val);
  1204. if (ret == 0)
  1205. ret = len; /* on success, claim we got the whole input */
  1206. out:
  1207. mutex_unlock(&attr->mutex);
  1208. return ret;
  1209. }
  1210. ssize_t simple_attr_write(struct file *file, const char __user *buf,
  1211. size_t len, loff_t *ppos)
  1212. {
  1213. return simple_attr_write_xsigned(file, buf, len, ppos, false);
  1214. }
  1215. EXPORT_SYMBOL_GPL(simple_attr_write);
  1216. ssize_t simple_attr_write_signed(struct file *file, const char __user *buf,
  1217. size_t len, loff_t *ppos)
  1218. {
  1219. return simple_attr_write_xsigned(file, buf, len, ppos, true);
  1220. }
  1221. EXPORT_SYMBOL_GPL(simple_attr_write_signed);
  1222. /**
  1223. * generic_encode_ino32_fh - generic export_operations->encode_fh function
  1224. * @inode: the object to encode
  1225. * @fh: where to store the file handle fragment
  1226. * @max_len: maximum length to store there (in 4 byte units)
  1227. * @parent: parent directory inode, if wanted
  1228. *
  1229. * This generic encode_fh function assumes that the 32 inode number
  1230. * is suitable for locating an inode, and that the generation number
  1231. * can be used to check that it is still valid. It places them in the
  1232. * filehandle fragment where export_decode_fh expects to find them.
  1233. */
  1234. int generic_encode_ino32_fh(struct inode *inode, __u32 *fh, int *max_len,
  1235. struct inode *parent)
  1236. {
  1237. struct fid *fid = (void *)fh;
  1238. int len = *max_len;
  1239. int type = FILEID_INO32_GEN;
  1240. if (parent && (len < 4)) {
  1241. *max_len = 4;
  1242. return FILEID_INVALID;
  1243. } else if (len < 2) {
  1244. *max_len = 2;
  1245. return FILEID_INVALID;
  1246. }
  1247. len = 2;
  1248. fid->i32.ino = inode->i_ino;
  1249. fid->i32.gen = inode->i_generation;
  1250. if (parent) {
  1251. fid->i32.parent_ino = parent->i_ino;
  1252. fid->i32.parent_gen = parent->i_generation;
  1253. len = 4;
  1254. type = FILEID_INO32_GEN_PARENT;
  1255. }
  1256. *max_len = len;
  1257. return type;
  1258. }
  1259. EXPORT_SYMBOL_GPL(generic_encode_ino32_fh);
  1260. /**
  1261. * generic_fh_to_dentry - generic helper for the fh_to_dentry export operation
  1262. * @sb: filesystem to do the file handle conversion on
  1263. * @fid: file handle to convert
  1264. * @fh_len: length of the file handle in bytes
  1265. * @fh_type: type of file handle
  1266. * @get_inode: filesystem callback to retrieve inode
  1267. *
  1268. * This function decodes @fid as long as it has one of the well-known
  1269. * Linux filehandle types and calls @get_inode on it to retrieve the
  1270. * inode for the object specified in the file handle.
  1271. */
  1272. struct dentry *generic_fh_to_dentry(struct super_block *sb, struct fid *fid,
  1273. int fh_len, int fh_type, struct inode *(*get_inode)
  1274. (struct super_block *sb, u64 ino, u32 gen))
  1275. {
  1276. struct inode *inode = NULL;
  1277. if (fh_len < 2)
  1278. return NULL;
  1279. switch (fh_type) {
  1280. case FILEID_INO32_GEN:
  1281. case FILEID_INO32_GEN_PARENT:
  1282. inode = get_inode(sb, fid->i32.ino, fid->i32.gen);
  1283. break;
  1284. }
  1285. return d_obtain_alias(inode);
  1286. }
  1287. EXPORT_SYMBOL_GPL(generic_fh_to_dentry);
  1288. /**
  1289. * generic_fh_to_parent - generic helper for the fh_to_parent export operation
  1290. * @sb: filesystem to do the file handle conversion on
  1291. * @fid: file handle to convert
  1292. * @fh_len: length of the file handle in bytes
  1293. * @fh_type: type of file handle
  1294. * @get_inode: filesystem callback to retrieve inode
  1295. *
  1296. * This function decodes @fid as long as it has one of the well-known
  1297. * Linux filehandle types and calls @get_inode on it to retrieve the
  1298. * inode for the _parent_ object specified in the file handle if it
  1299. * is specified in the file handle, or NULL otherwise.
  1300. */
  1301. struct dentry *generic_fh_to_parent(struct super_block *sb, struct fid *fid,
  1302. int fh_len, int fh_type, struct inode *(*get_inode)
  1303. (struct super_block *sb, u64 ino, u32 gen))
  1304. {
  1305. struct inode *inode = NULL;
  1306. if (fh_len <= 2)
  1307. return NULL;
  1308. switch (fh_type) {
  1309. case FILEID_INO32_GEN_PARENT:
  1310. inode = get_inode(sb, fid->i32.parent_ino,
  1311. (fh_len > 3 ? fid->i32.parent_gen : 0));
  1312. break;
  1313. }
  1314. return d_obtain_alias(inode);
  1315. }
  1316. EXPORT_SYMBOL_GPL(generic_fh_to_parent);
  1317. /**
  1318. * __generic_file_fsync - generic fsync implementation for simple filesystems
  1319. *
  1320. * @file: file to synchronize
  1321. * @start: start offset in bytes
  1322. * @end: end offset in bytes (inclusive)
  1323. * @datasync: only synchronize essential metadata if true
  1324. *
  1325. * This is a generic implementation of the fsync method for simple
  1326. * filesystems which track all non-inode metadata in the buffers list
  1327. * hanging off the address_space structure.
  1328. */
  1329. int __generic_file_fsync(struct file *file, loff_t start, loff_t end,
  1330. int datasync)
  1331. {
  1332. struct inode *inode = file->f_mapping->host;
  1333. int err;
  1334. int ret;
  1335. err = file_write_and_wait_range(file, start, end);
  1336. if (err)
  1337. return err;
  1338. inode_lock(inode);
  1339. ret = sync_mapping_buffers(inode->i_mapping);
  1340. if (!(inode->i_state & I_DIRTY_ALL))
  1341. goto out;
  1342. if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
  1343. goto out;
  1344. err = sync_inode_metadata(inode, 1);
  1345. if (ret == 0)
  1346. ret = err;
  1347. out:
  1348. inode_unlock(inode);
  1349. /* check and advance again to catch errors after syncing out buffers */
  1350. err = file_check_and_advance_wb_err(file);
  1351. if (ret == 0)
  1352. ret = err;
  1353. return ret;
  1354. }
  1355. EXPORT_SYMBOL(__generic_file_fsync);
  1356. /**
  1357. * generic_file_fsync - generic fsync implementation for simple filesystems
  1358. * with flush
  1359. * @file: file to synchronize
  1360. * @start: start offset in bytes
  1361. * @end: end offset in bytes (inclusive)
  1362. * @datasync: only synchronize essential metadata if true
  1363. *
  1364. */
  1365. int generic_file_fsync(struct file *file, loff_t start, loff_t end,
  1366. int datasync)
  1367. {
  1368. struct inode *inode = file->f_mapping->host;
  1369. int err;
  1370. err = __generic_file_fsync(file, start, end, datasync);
  1371. if (err)
  1372. return err;
  1373. return blkdev_issue_flush(inode->i_sb->s_bdev);
  1374. }
  1375. EXPORT_SYMBOL(generic_file_fsync);
  1376. /**
  1377. * generic_check_addressable - Check addressability of file system
  1378. * @blocksize_bits: log of file system block size
  1379. * @num_blocks: number of blocks in file system
  1380. *
  1381. * Determine whether a file system with @num_blocks blocks (and a
  1382. * block size of 2**@blocksize_bits) is addressable by the sector_t
  1383. * and page cache of the system. Return 0 if so and -EFBIG otherwise.
  1384. */
  1385. int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
  1386. {
  1387. u64 last_fs_block = num_blocks - 1;
  1388. u64 last_fs_page =
  1389. last_fs_block >> (PAGE_SHIFT - blocksize_bits);
  1390. if (unlikely(num_blocks == 0))
  1391. return 0;
  1392. if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
  1393. return -EINVAL;
  1394. if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
  1395. (last_fs_page > (pgoff_t)(~0ULL))) {
  1396. return -EFBIG;
  1397. }
  1398. return 0;
  1399. }
  1400. EXPORT_SYMBOL(generic_check_addressable);
  1401. /*
  1402. * No-op implementation of ->fsync for in-memory filesystems.
  1403. */
  1404. int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
  1405. {
  1406. return 0;
  1407. }
  1408. EXPORT_SYMBOL(noop_fsync);
  1409. ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
  1410. {
  1411. /*
  1412. * iomap based filesystems support direct I/O without need for
  1413. * this callback. However, it still needs to be set in
  1414. * inode->a_ops so that open/fcntl know that direct I/O is
  1415. * generally supported.
  1416. */
  1417. return -EINVAL;
  1418. }
  1419. EXPORT_SYMBOL_GPL(noop_direct_IO);
  1420. /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
  1421. void kfree_link(void *p)
  1422. {
  1423. kfree(p);
  1424. }
  1425. EXPORT_SYMBOL(kfree_link);
  1426. struct inode *alloc_anon_inode(struct super_block *s)
  1427. {
  1428. static const struct address_space_operations anon_aops = {
  1429. .dirty_folio = noop_dirty_folio,
  1430. };
  1431. struct inode *inode = new_inode_pseudo(s);
  1432. if (!inode)
  1433. return ERR_PTR(-ENOMEM);
  1434. inode->i_ino = get_next_ino();
  1435. inode->i_mapping->a_ops = &anon_aops;
  1436. /*
  1437. * Mark the inode dirty from the very beginning,
  1438. * that way it will never be moved to the dirty
  1439. * list because mark_inode_dirty() will think
  1440. * that it already _is_ on the dirty list.
  1441. */
  1442. inode->i_state = I_DIRTY;
  1443. inode->i_mode = S_IRUSR | S_IWUSR;
  1444. inode->i_uid = current_fsuid();
  1445. inode->i_gid = current_fsgid();
  1446. inode->i_flags |= S_PRIVATE;
  1447. simple_inode_init_ts(inode);
  1448. return inode;
  1449. }
  1450. EXPORT_SYMBOL(alloc_anon_inode);
  1451. /**
  1452. * simple_nosetlease - generic helper for prohibiting leases
  1453. * @filp: file pointer
  1454. * @arg: type of lease to obtain
  1455. * @flp: new lease supplied for insertion
  1456. * @priv: private data for lm_setup operation
  1457. *
  1458. * Generic helper for filesystems that do not wish to allow leases to be set.
  1459. * All arguments are ignored and it just returns -EINVAL.
  1460. */
  1461. int
  1462. simple_nosetlease(struct file *filp, int arg, struct file_lease **flp,
  1463. void **priv)
  1464. {
  1465. return -EINVAL;
  1466. }
  1467. EXPORT_SYMBOL(simple_nosetlease);
  1468. /**
  1469. * simple_get_link - generic helper to get the target of "fast" symlinks
  1470. * @dentry: not used here
  1471. * @inode: the symlink inode
  1472. * @done: not used here
  1473. *
  1474. * Generic helper for filesystems to use for symlink inodes where a pointer to
  1475. * the symlink target is stored in ->i_link. NOTE: this isn't normally called,
  1476. * since as an optimization the path lookup code uses any non-NULL ->i_link
  1477. * directly, without calling ->get_link(). But ->get_link() still must be set,
  1478. * to mark the inode_operations as being for a symlink.
  1479. *
  1480. * Return: the symlink target
  1481. */
  1482. const char *simple_get_link(struct dentry *dentry, struct inode *inode,
  1483. struct delayed_call *done)
  1484. {
  1485. return inode->i_link;
  1486. }
  1487. EXPORT_SYMBOL(simple_get_link);
  1488. const struct inode_operations simple_symlink_inode_operations = {
  1489. .get_link = simple_get_link,
  1490. };
  1491. EXPORT_SYMBOL(simple_symlink_inode_operations);
  1492. /*
  1493. * Operations for a permanently empty directory.
  1494. */
  1495. static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  1496. {
  1497. return ERR_PTR(-ENOENT);
  1498. }
  1499. static int empty_dir_getattr(struct mnt_idmap *idmap,
  1500. const struct path *path, struct kstat *stat,
  1501. u32 request_mask, unsigned int query_flags)
  1502. {
  1503. struct inode *inode = d_inode(path->dentry);
  1504. generic_fillattr(&nop_mnt_idmap, request_mask, inode, stat);
  1505. return 0;
  1506. }
  1507. static int empty_dir_setattr(struct mnt_idmap *idmap,
  1508. struct dentry *dentry, struct iattr *attr)
  1509. {
  1510. return -EPERM;
  1511. }
  1512. static ssize_t empty_dir_listxattr(struct dentry *dentry, char *list, size_t size)
  1513. {
  1514. return -EOPNOTSUPP;
  1515. }
  1516. static const struct inode_operations empty_dir_inode_operations = {
  1517. .lookup = empty_dir_lookup,
  1518. .permission = generic_permission,
  1519. .setattr = empty_dir_setattr,
  1520. .getattr = empty_dir_getattr,
  1521. .listxattr = empty_dir_listxattr,
  1522. };
  1523. static loff_t empty_dir_llseek(struct file *file, loff_t offset, int whence)
  1524. {
  1525. /* An empty directory has two entries . and .. at offsets 0 and 1 */
  1526. return generic_file_llseek_size(file, offset, whence, 2, 2);
  1527. }
  1528. static int empty_dir_readdir(struct file *file, struct dir_context *ctx)
  1529. {
  1530. dir_emit_dots(file, ctx);
  1531. return 0;
  1532. }
  1533. static const struct file_operations empty_dir_operations = {
  1534. .llseek = empty_dir_llseek,
  1535. .read = generic_read_dir,
  1536. .iterate_shared = empty_dir_readdir,
  1537. .fsync = noop_fsync,
  1538. };
  1539. void make_empty_dir_inode(struct inode *inode)
  1540. {
  1541. set_nlink(inode, 2);
  1542. inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
  1543. inode->i_uid = GLOBAL_ROOT_UID;
  1544. inode->i_gid = GLOBAL_ROOT_GID;
  1545. inode->i_rdev = 0;
  1546. inode->i_size = 0;
  1547. inode->i_blkbits = PAGE_SHIFT;
  1548. inode->i_blocks = 0;
  1549. inode->i_op = &empty_dir_inode_operations;
  1550. inode->i_opflags &= ~IOP_XATTR;
  1551. inode->i_fop = &empty_dir_operations;
  1552. }
  1553. bool is_empty_dir_inode(struct inode *inode)
  1554. {
  1555. return (inode->i_fop == &empty_dir_operations) &&
  1556. (inode->i_op == &empty_dir_inode_operations);
  1557. }
  1558. #if IS_ENABLED(CONFIG_UNICODE)
  1559. /**
  1560. * generic_ci_d_compare - generic d_compare implementation for casefolding filesystems
  1561. * @dentry: dentry whose name we are checking against
  1562. * @len: len of name of dentry
  1563. * @str: str pointer to name of dentry
  1564. * @name: Name to compare against
  1565. *
  1566. * Return: 0 if names match, 1 if mismatch, or -ERRNO
  1567. */
  1568. static int generic_ci_d_compare(const struct dentry *dentry, unsigned int len,
  1569. const char *str, const struct qstr *name)
  1570. {
  1571. const struct dentry *parent;
  1572. const struct inode *dir;
  1573. char strbuf[DNAME_INLINE_LEN];
  1574. struct qstr qstr;
  1575. /*
  1576. * Attempt a case-sensitive match first. It is cheaper and
  1577. * should cover most lookups, including all the sane
  1578. * applications that expect a case-sensitive filesystem.
  1579. *
  1580. * This comparison is safe under RCU because the caller
  1581. * guarantees the consistency between str and len. See
  1582. * __d_lookup_rcu_op_compare() for details.
  1583. */
  1584. if (len == name->len && !memcmp(str, name->name, len))
  1585. return 0;
  1586. parent = READ_ONCE(dentry->d_parent);
  1587. dir = READ_ONCE(parent->d_inode);
  1588. if (!dir || !IS_CASEFOLDED(dir))
  1589. return 1;
  1590. /*
  1591. * If the dentry name is stored in-line, then it may be concurrently
  1592. * modified by a rename. If this happens, the VFS will eventually retry
  1593. * the lookup, so it doesn't matter what ->d_compare() returns.
  1594. * However, it's unsafe to call utf8_strncasecmp() with an unstable
  1595. * string. Therefore, we have to copy the name into a temporary buffer.
  1596. */
  1597. if (len <= DNAME_INLINE_LEN - 1) {
  1598. memcpy(strbuf, str, len);
  1599. strbuf[len] = 0;
  1600. str = strbuf;
  1601. /* prevent compiler from optimizing out the temporary buffer */
  1602. barrier();
  1603. }
  1604. qstr.len = len;
  1605. qstr.name = str;
  1606. return utf8_strncasecmp(dentry->d_sb->s_encoding, name, &qstr);
  1607. }
  1608. /**
  1609. * generic_ci_d_hash - generic d_hash implementation for casefolding filesystems
  1610. * @dentry: dentry of the parent directory
  1611. * @str: qstr of name whose hash we should fill in
  1612. *
  1613. * Return: 0 if hash was successful or unchanged, and -EINVAL on error
  1614. */
  1615. static int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
  1616. {
  1617. const struct inode *dir = READ_ONCE(dentry->d_inode);
  1618. struct super_block *sb = dentry->d_sb;
  1619. const struct unicode_map *um = sb->s_encoding;
  1620. int ret;
  1621. if (!dir || !IS_CASEFOLDED(dir))
  1622. return 0;
  1623. ret = utf8_casefold_hash(um, dentry, str);
  1624. if (ret < 0 && sb_has_strict_encoding(sb))
  1625. return -EINVAL;
  1626. return 0;
  1627. }
  1628. static const struct dentry_operations generic_ci_dentry_ops = {
  1629. .d_hash = generic_ci_d_hash,
  1630. .d_compare = generic_ci_d_compare,
  1631. #ifdef CONFIG_FS_ENCRYPTION
  1632. .d_revalidate = fscrypt_d_revalidate,
  1633. #endif
  1634. };
  1635. /**
  1636. * generic_ci_match() - Match a name (case-insensitively) with a dirent.
  1637. * This is a filesystem helper for comparison with directory entries.
  1638. * generic_ci_d_compare should be used in VFS' ->d_compare instead.
  1639. *
  1640. * @parent: Inode of the parent of the dirent under comparison
  1641. * @name: name under lookup.
  1642. * @folded_name: Optional pre-folded name under lookup
  1643. * @de_name: Dirent name.
  1644. * @de_name_len: dirent name length.
  1645. *
  1646. * Test whether a case-insensitive directory entry matches the filename
  1647. * being searched. If @folded_name is provided, it is used instead of
  1648. * recalculating the casefold of @name.
  1649. *
  1650. * Return: > 0 if the directory entry matches, 0 if it doesn't match, or
  1651. * < 0 on error.
  1652. */
  1653. int generic_ci_match(const struct inode *parent,
  1654. const struct qstr *name,
  1655. const struct qstr *folded_name,
  1656. const u8 *de_name, u32 de_name_len)
  1657. {
  1658. const struct super_block *sb = parent->i_sb;
  1659. const struct unicode_map *um = sb->s_encoding;
  1660. struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
  1661. struct qstr dirent = QSTR_INIT(de_name, de_name_len);
  1662. int res = 0;
  1663. if (IS_ENCRYPTED(parent)) {
  1664. const struct fscrypt_str encrypted_name =
  1665. FSTR_INIT((u8 *) de_name, de_name_len);
  1666. if (WARN_ON_ONCE(!fscrypt_has_encryption_key(parent)))
  1667. return -EINVAL;
  1668. decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
  1669. if (!decrypted_name.name)
  1670. return -ENOMEM;
  1671. res = fscrypt_fname_disk_to_usr(parent, 0, 0, &encrypted_name,
  1672. &decrypted_name);
  1673. if (res < 0) {
  1674. kfree(decrypted_name.name);
  1675. return res;
  1676. }
  1677. dirent.name = decrypted_name.name;
  1678. dirent.len = decrypted_name.len;
  1679. }
  1680. /*
  1681. * Attempt a case-sensitive match first. It is cheaper and
  1682. * should cover most lookups, including all the sane
  1683. * applications that expect a case-sensitive filesystem.
  1684. */
  1685. if (dirent.len == name->len &&
  1686. !memcmp(name->name, dirent.name, dirent.len))
  1687. goto out;
  1688. if (folded_name->name)
  1689. res = utf8_strncasecmp_folded(um, folded_name, &dirent);
  1690. else
  1691. res = utf8_strncasecmp(um, name, &dirent);
  1692. out:
  1693. kfree(decrypted_name.name);
  1694. if (res < 0 && sb_has_strict_encoding(sb)) {
  1695. pr_err_ratelimited("Directory contains filename that is invalid UTF-8");
  1696. return 0;
  1697. }
  1698. return !res;
  1699. }
  1700. EXPORT_SYMBOL(generic_ci_match);
  1701. #endif
  1702. #ifdef CONFIG_FS_ENCRYPTION
  1703. static const struct dentry_operations generic_encrypted_dentry_ops = {
  1704. .d_revalidate = fscrypt_d_revalidate,
  1705. };
  1706. #endif
  1707. /**
  1708. * generic_set_sb_d_ops - helper for choosing the set of
  1709. * filesystem-wide dentry operations for the enabled features
  1710. * @sb: superblock to be configured
  1711. *
  1712. * Filesystems supporting casefolding and/or fscrypt can call this
  1713. * helper at mount-time to configure sb->s_d_op to best set of dentry
  1714. * operations required for the enabled features. The helper must be
  1715. * called after these have been configured, but before the root dentry
  1716. * is created.
  1717. */
  1718. void generic_set_sb_d_ops(struct super_block *sb)
  1719. {
  1720. #if IS_ENABLED(CONFIG_UNICODE)
  1721. if (sb->s_encoding) {
  1722. sb->s_d_op = &generic_ci_dentry_ops;
  1723. return;
  1724. }
  1725. #endif
  1726. #ifdef CONFIG_FS_ENCRYPTION
  1727. if (sb->s_cop) {
  1728. sb->s_d_op = &generic_encrypted_dentry_ops;
  1729. return;
  1730. }
  1731. #endif
  1732. }
  1733. EXPORT_SYMBOL(generic_set_sb_d_ops);
  1734. /**
  1735. * inode_maybe_inc_iversion - increments i_version
  1736. * @inode: inode with the i_version that should be updated
  1737. * @force: increment the counter even if it's not necessary?
  1738. *
  1739. * Every time the inode is modified, the i_version field must be seen to have
  1740. * changed by any observer.
  1741. *
  1742. * If "force" is set or the QUERIED flag is set, then ensure that we increment
  1743. * the value, and clear the queried flag.
  1744. *
  1745. * In the common case where neither is set, then we can return "false" without
  1746. * updating i_version.
  1747. *
  1748. * If this function returns false, and no other metadata has changed, then we
  1749. * can avoid logging the metadata.
  1750. */
  1751. bool inode_maybe_inc_iversion(struct inode *inode, bool force)
  1752. {
  1753. u64 cur, new;
  1754. /*
  1755. * The i_version field is not strictly ordered with any other inode
  1756. * information, but the legacy inode_inc_iversion code used a spinlock
  1757. * to serialize increments.
  1758. *
  1759. * We add a full memory barrier to ensure that any de facto ordering
  1760. * with other state is preserved (either implicitly coming from cmpxchg
  1761. * or explicitly from smp_mb if we don't know upfront if we will execute
  1762. * the former).
  1763. *
  1764. * These barriers pair with inode_query_iversion().
  1765. */
  1766. cur = inode_peek_iversion_raw(inode);
  1767. if (!force && !(cur & I_VERSION_QUERIED)) {
  1768. smp_mb();
  1769. cur = inode_peek_iversion_raw(inode);
  1770. }
  1771. do {
  1772. /* If flag is clear then we needn't do anything */
  1773. if (!force && !(cur & I_VERSION_QUERIED))
  1774. return false;
  1775. /* Since lowest bit is flag, add 2 to avoid it */
  1776. new = (cur & ~I_VERSION_QUERIED) + I_VERSION_INCREMENT;
  1777. } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
  1778. return true;
  1779. }
  1780. EXPORT_SYMBOL(inode_maybe_inc_iversion);
  1781. /**
  1782. * inode_query_iversion - read i_version for later use
  1783. * @inode: inode from which i_version should be read
  1784. *
  1785. * Read the inode i_version counter. This should be used by callers that wish
  1786. * to store the returned i_version for later comparison. This will guarantee
  1787. * that a later query of the i_version will result in a different value if
  1788. * anything has changed.
  1789. *
  1790. * In this implementation, we fetch the current value, set the QUERIED flag and
  1791. * then try to swap it into place with a cmpxchg, if it wasn't already set. If
  1792. * that fails, we try again with the newly fetched value from the cmpxchg.
  1793. */
  1794. u64 inode_query_iversion(struct inode *inode)
  1795. {
  1796. u64 cur, new;
  1797. bool fenced = false;
  1798. /*
  1799. * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with
  1800. * inode_maybe_inc_iversion(), see that routine for more details.
  1801. */
  1802. cur = inode_peek_iversion_raw(inode);
  1803. do {
  1804. /* If flag is already set, then no need to swap */
  1805. if (cur & I_VERSION_QUERIED) {
  1806. if (!fenced)
  1807. smp_mb();
  1808. break;
  1809. }
  1810. fenced = true;
  1811. new = cur | I_VERSION_QUERIED;
  1812. } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
  1813. return cur >> I_VERSION_QUERIED_SHIFT;
  1814. }
  1815. EXPORT_SYMBOL(inode_query_iversion);
  1816. ssize_t direct_write_fallback(struct kiocb *iocb, struct iov_iter *iter,
  1817. ssize_t direct_written, ssize_t buffered_written)
  1818. {
  1819. struct address_space *mapping = iocb->ki_filp->f_mapping;
  1820. loff_t pos = iocb->ki_pos - buffered_written;
  1821. loff_t end = iocb->ki_pos - 1;
  1822. int err;
  1823. /*
  1824. * If the buffered write fallback returned an error, we want to return
  1825. * the number of bytes which were written by direct I/O, or the error
  1826. * code if that was zero.
  1827. *
  1828. * Note that this differs from normal direct-io semantics, which will
  1829. * return -EFOO even if some bytes were written.
  1830. */
  1831. if (unlikely(buffered_written < 0)) {
  1832. if (direct_written)
  1833. return direct_written;
  1834. return buffered_written;
  1835. }
  1836. /*
  1837. * We need to ensure that the page cache pages are written to disk and
  1838. * invalidated to preserve the expected O_DIRECT semantics.
  1839. */
  1840. err = filemap_write_and_wait_range(mapping, pos, end);
  1841. if (err < 0) {
  1842. /*
  1843. * We don't know how much we wrote, so just return the number of
  1844. * bytes which were direct-written
  1845. */
  1846. iocb->ki_pos -= buffered_written;
  1847. if (direct_written)
  1848. return direct_written;
  1849. return err;
  1850. }
  1851. invalidate_mapping_pages(mapping, pos >> PAGE_SHIFT, end >> PAGE_SHIFT);
  1852. return direct_written + buffered_written;
  1853. }
  1854. EXPORT_SYMBOL_GPL(direct_write_fallback);
  1855. /**
  1856. * simple_inode_init_ts - initialize the timestamps for a new inode
  1857. * @inode: inode to be initialized
  1858. *
  1859. * When a new inode is created, most filesystems set the timestamps to the
  1860. * current time. Add a helper to do this.
  1861. */
  1862. struct timespec64 simple_inode_init_ts(struct inode *inode)
  1863. {
  1864. struct timespec64 ts = inode_set_ctime_current(inode);
  1865. inode_set_atime_to_ts(inode, ts);
  1866. inode_set_mtime_to_ts(inode, ts);
  1867. return ts;
  1868. }
  1869. EXPORT_SYMBOL(simple_inode_init_ts);
  1870. static inline struct dentry *get_stashed_dentry(struct dentry **stashed)
  1871. {
  1872. struct dentry *dentry;
  1873. guard(rcu)();
  1874. dentry = rcu_dereference(*stashed);
  1875. if (!dentry)
  1876. return NULL;
  1877. if (!lockref_get_not_dead(&dentry->d_lockref))
  1878. return NULL;
  1879. return dentry;
  1880. }
  1881. static struct dentry *prepare_anon_dentry(struct dentry **stashed,
  1882. struct super_block *sb,
  1883. void *data)
  1884. {
  1885. struct dentry *dentry;
  1886. struct inode *inode;
  1887. const struct stashed_operations *sops = sb->s_fs_info;
  1888. int ret;
  1889. inode = new_inode_pseudo(sb);
  1890. if (!inode) {
  1891. sops->put_data(data);
  1892. return ERR_PTR(-ENOMEM);
  1893. }
  1894. inode->i_flags |= S_IMMUTABLE;
  1895. inode->i_mode = S_IFREG;
  1896. simple_inode_init_ts(inode);
  1897. ret = sops->init_inode(inode, data);
  1898. if (ret < 0) {
  1899. iput(inode);
  1900. return ERR_PTR(ret);
  1901. }
  1902. /* Notice when this is changed. */
  1903. WARN_ON_ONCE(!S_ISREG(inode->i_mode));
  1904. WARN_ON_ONCE(!IS_IMMUTABLE(inode));
  1905. dentry = d_alloc_anon(sb);
  1906. if (!dentry) {
  1907. iput(inode);
  1908. return ERR_PTR(-ENOMEM);
  1909. }
  1910. /* Store address of location where dentry's supposed to be stashed. */
  1911. dentry->d_fsdata = stashed;
  1912. /* @data is now owned by the fs */
  1913. d_instantiate(dentry, inode);
  1914. return dentry;
  1915. }
  1916. static struct dentry *stash_dentry(struct dentry **stashed,
  1917. struct dentry *dentry)
  1918. {
  1919. guard(rcu)();
  1920. for (;;) {
  1921. struct dentry *old;
  1922. /* Assume any old dentry was cleared out. */
  1923. old = cmpxchg(stashed, NULL, dentry);
  1924. if (likely(!old))
  1925. return dentry;
  1926. /* Check if somebody else installed a reusable dentry. */
  1927. if (lockref_get_not_dead(&old->d_lockref))
  1928. return old;
  1929. /* There's an old dead dentry there, try to take it over. */
  1930. if (likely(try_cmpxchg(stashed, &old, dentry)))
  1931. return dentry;
  1932. }
  1933. }
  1934. /**
  1935. * path_from_stashed - create path from stashed or new dentry
  1936. * @stashed: where to retrieve or stash dentry
  1937. * @mnt: mnt of the filesystems to use
  1938. * @data: data to store in inode->i_private
  1939. * @path: path to create
  1940. *
  1941. * The function tries to retrieve a stashed dentry from @stashed. If the dentry
  1942. * is still valid then it will be reused. If the dentry isn't able the function
  1943. * will allocate a new dentry and inode. It will then check again whether it
  1944. * can reuse an existing dentry in case one has been added in the meantime or
  1945. * update @stashed with the newly added dentry.
  1946. *
  1947. * Special-purpose helper for nsfs and pidfs.
  1948. *
  1949. * Return: On success zero and on failure a negative error is returned.
  1950. */
  1951. int path_from_stashed(struct dentry **stashed, struct vfsmount *mnt, void *data,
  1952. struct path *path)
  1953. {
  1954. struct dentry *dentry;
  1955. const struct stashed_operations *sops = mnt->mnt_sb->s_fs_info;
  1956. /* See if dentry can be reused. */
  1957. path->dentry = get_stashed_dentry(stashed);
  1958. if (path->dentry) {
  1959. sops->put_data(data);
  1960. goto out_path;
  1961. }
  1962. /* Allocate a new dentry. */
  1963. dentry = prepare_anon_dentry(stashed, mnt->mnt_sb, data);
  1964. if (IS_ERR(dentry))
  1965. return PTR_ERR(dentry);
  1966. /* Added a new dentry. @data is now owned by the filesystem. */
  1967. path->dentry = stash_dentry(stashed, dentry);
  1968. if (path->dentry != dentry)
  1969. dput(dentry);
  1970. out_path:
  1971. WARN_ON_ONCE(path->dentry->d_fsdata != stashed);
  1972. WARN_ON_ONCE(d_inode(path->dentry)->i_private != data);
  1973. path->mnt = mntget(mnt);
  1974. return 0;
  1975. }
  1976. void stashed_dentry_prune(struct dentry *dentry)
  1977. {
  1978. struct dentry **stashed = dentry->d_fsdata;
  1979. struct inode *inode = d_inode(dentry);
  1980. if (WARN_ON_ONCE(!stashed))
  1981. return;
  1982. if (!inode)
  1983. return;
  1984. /*
  1985. * Only replace our own @dentry as someone else might've
  1986. * already cleared out @dentry and stashed their own
  1987. * dentry in there.
  1988. */
  1989. cmpxchg(stashed, dentry, NULL);
  1990. }