dir.c 8.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Copyright 2017 Omnibond Systems, L.L.C.
  4. */
  5. #include "protocol.h"
  6. #include "orangefs-kernel.h"
  7. #include "orangefs-bufmap.h"
  8. struct orangefs_dir_part {
  9. struct orangefs_dir_part *next;
  10. size_t len;
  11. };
  12. struct orangefs_dir {
  13. __u64 token;
  14. struct orangefs_dir_part *part;
  15. loff_t end;
  16. int error;
  17. };
  18. #define PART_SHIFT (24)
  19. #define PART_SIZE (1<<24)
  20. #define PART_MASK (~(PART_SIZE - 1))
  21. /*
  22. * There can be up to 512 directory entries. Each entry is encoded as
  23. * follows:
  24. * 4 bytes: string size (n)
  25. * n bytes: string
  26. * 1 byte: trailing zero
  27. * padding to 8 bytes
  28. * 16 bytes: khandle
  29. * padding to 8 bytes
  30. *
  31. * The trailer_buf starts with a struct orangefs_readdir_response_s
  32. * which must be skipped to get to the directory data.
  33. *
  34. * The data which is received from the userspace daemon is termed a
  35. * part and is stored in a linked list in case more than one part is
  36. * needed for a large directory.
  37. *
  38. * The position pointer (ctx->pos) encodes the part and offset on which
  39. * to begin reading at. Bits above PART_SHIFT encode the part and bits
  40. * below PART_SHIFT encode the offset. Parts are stored in a linked
  41. * list which grows as data is received from the server. The overhead
  42. * associated with managing the list is presumed to be small compared to
  43. * the overhead of communicating with the server.
  44. *
  45. * As data is received from the server, it is placed at the end of the
  46. * part list. Data is parsed from the current position as it is needed.
  47. * When data is determined to be corrupt, it is either because the
  48. * userspace component has sent back corrupt data or because the file
  49. * pointer has been moved to an invalid location. Since the two cannot
  50. * be differentiated, return EIO.
  51. *
  52. * Part zero is synthesized to contains `.' and `..'. Part one is the
  53. * first part of the part list.
  54. */
  55. static int do_readdir(struct orangefs_dir *od, struct inode *inode,
  56. struct orangefs_kernel_op_s *op)
  57. {
  58. struct orangefs_inode_s *oi = ORANGEFS_I(inode);
  59. struct orangefs_readdir_response_s *resp;
  60. int bufi, r;
  61. /*
  62. * Despite the badly named field, readdir does not use shared
  63. * memory. However, there are a limited number of readdir
  64. * slots, which must be allocated here. This flag simply tells
  65. * the op scheduler to return the op here for retry.
  66. */
  67. op->uses_shared_memory = 1;
  68. op->upcall.req.readdir.refn = oi->refn;
  69. op->upcall.req.readdir.token = od->token;
  70. op->upcall.req.readdir.max_dirent_count =
  71. ORANGEFS_MAX_DIRENT_COUNT_READDIR;
  72. again:
  73. bufi = orangefs_readdir_index_get();
  74. if (bufi < 0) {
  75. od->error = bufi;
  76. return bufi;
  77. }
  78. op->upcall.req.readdir.buf_index = bufi;
  79. r = service_operation(op, "orangefs_readdir",
  80. get_interruptible_flag(inode));
  81. orangefs_readdir_index_put(bufi);
  82. if (op_state_purged(op)) {
  83. if (r == -EAGAIN) {
  84. vfree(op->downcall.trailer_buf);
  85. goto again;
  86. } else if (r == -EIO) {
  87. vfree(op->downcall.trailer_buf);
  88. od->error = r;
  89. return r;
  90. }
  91. }
  92. if (r < 0) {
  93. vfree(op->downcall.trailer_buf);
  94. od->error = r;
  95. return r;
  96. } else if (op->downcall.status) {
  97. vfree(op->downcall.trailer_buf);
  98. od->error = op->downcall.status;
  99. return op->downcall.status;
  100. }
  101. /*
  102. * The maximum size is size per entry times the 512 entries plus
  103. * the header. This is well under the limit.
  104. */
  105. if (op->downcall.trailer_size > PART_SIZE) {
  106. vfree(op->downcall.trailer_buf);
  107. od->error = -EIO;
  108. return -EIO;
  109. }
  110. resp = (struct orangefs_readdir_response_s *)
  111. op->downcall.trailer_buf;
  112. od->token = resp->token;
  113. return 0;
  114. }
  115. static int parse_readdir(struct orangefs_dir *od,
  116. struct orangefs_kernel_op_s *op)
  117. {
  118. struct orangefs_dir_part *part, *new;
  119. size_t count;
  120. count = 1;
  121. part = od->part;
  122. while (part) {
  123. count++;
  124. if (part->next)
  125. part = part->next;
  126. else
  127. break;
  128. }
  129. new = (void *)op->downcall.trailer_buf;
  130. new->next = NULL;
  131. new->len = op->downcall.trailer_size -
  132. sizeof(struct orangefs_readdir_response_s);
  133. if (!od->part)
  134. od->part = new;
  135. else
  136. part->next = new;
  137. count++;
  138. od->end = count << PART_SHIFT;
  139. return 0;
  140. }
  141. static int orangefs_dir_more(struct orangefs_dir *od, struct inode *inode)
  142. {
  143. struct orangefs_kernel_op_s *op;
  144. int r;
  145. op = op_alloc(ORANGEFS_VFS_OP_READDIR);
  146. if (!op) {
  147. od->error = -ENOMEM;
  148. return -ENOMEM;
  149. }
  150. r = do_readdir(od, inode, op);
  151. if (r) {
  152. od->error = r;
  153. goto out;
  154. }
  155. r = parse_readdir(od, op);
  156. if (r) {
  157. od->error = r;
  158. goto out;
  159. }
  160. od->error = 0;
  161. out:
  162. op_release(op);
  163. return od->error;
  164. }
  165. static int fill_from_part(struct orangefs_dir_part *part,
  166. struct dir_context *ctx)
  167. {
  168. const int offset = sizeof(struct orangefs_readdir_response_s);
  169. struct orangefs_khandle *khandle;
  170. __u32 *len, padlen;
  171. loff_t i;
  172. char *s;
  173. i = ctx->pos & ~PART_MASK;
  174. /* The file offset from userspace is too large. */
  175. if (i > part->len)
  176. return 1;
  177. /*
  178. * If the seek pointer is positioned just before an entry it
  179. * should find the next entry.
  180. */
  181. if (i % 8)
  182. i = i + (8 - i%8)%8;
  183. while (i < part->len) {
  184. if (part->len < i + sizeof *len)
  185. break;
  186. len = (void *)part + offset + i;
  187. /*
  188. * len is the size of the string itself. padlen is the
  189. * total size of the encoded string.
  190. */
  191. padlen = (sizeof *len + *len + 1) +
  192. (8 - (sizeof *len + *len + 1)%8)%8;
  193. if (part->len < i + padlen + sizeof *khandle)
  194. goto next;
  195. s = (void *)part + offset + i + sizeof *len;
  196. if (s[*len] != 0)
  197. goto next;
  198. khandle = (void *)part + offset + i + padlen;
  199. if (!dir_emit(ctx, s, *len,
  200. orangefs_khandle_to_ino(khandle),
  201. DT_UNKNOWN))
  202. return 0;
  203. i += padlen + sizeof *khandle;
  204. i = i + (8 - i%8)%8;
  205. BUG_ON(i > part->len);
  206. ctx->pos = (ctx->pos & PART_MASK) | i;
  207. continue;
  208. next:
  209. i += 8;
  210. }
  211. return 1;
  212. }
  213. static int orangefs_dir_fill(struct orangefs_dir *od, struct dir_context *ctx)
  214. {
  215. struct orangefs_dir_part *part;
  216. size_t count;
  217. count = ((ctx->pos & PART_MASK) >> PART_SHIFT) - 1;
  218. part = od->part;
  219. while (part->next && count) {
  220. count--;
  221. part = part->next;
  222. }
  223. /* This means the userspace file offset is invalid. */
  224. if (count) {
  225. od->error = -EIO;
  226. return -EIO;
  227. }
  228. while (part && part->len) {
  229. int r;
  230. r = fill_from_part(part, ctx);
  231. if (r < 0) {
  232. od->error = r;
  233. return r;
  234. } else if (r == 0) {
  235. /* Userspace buffer is full. */
  236. break;
  237. } else {
  238. /*
  239. * The part ran out of data. Move to the next
  240. * part. */
  241. ctx->pos = (ctx->pos & PART_MASK) +
  242. (1 << PART_SHIFT);
  243. part = part->next;
  244. }
  245. }
  246. return 0;
  247. }
  248. static loff_t orangefs_dir_llseek(struct file *file, loff_t offset,
  249. int whence)
  250. {
  251. struct orangefs_dir *od = file->private_data;
  252. /*
  253. * Delete the stored data so userspace sees new directory
  254. * entries.
  255. */
  256. if (!whence && offset < od->end) {
  257. struct orangefs_dir_part *part = od->part;
  258. while (part) {
  259. struct orangefs_dir_part *next = part->next;
  260. vfree(part);
  261. part = next;
  262. }
  263. od->token = ORANGEFS_ITERATE_START;
  264. od->part = NULL;
  265. od->end = 1 << PART_SHIFT;
  266. }
  267. return default_llseek(file, offset, whence);
  268. }
  269. static int orangefs_dir_iterate(struct file *file,
  270. struct dir_context *ctx)
  271. {
  272. struct orangefs_dir *od = file->private_data;
  273. struct inode *inode = file_inode(file);
  274. int r;
  275. if (od->error)
  276. return od->error;
  277. if (ctx->pos == 0) {
  278. if (!dir_emit_dot(file, ctx))
  279. return 0;
  280. ctx->pos++;
  281. }
  282. if (ctx->pos == 1) {
  283. if (!dir_emit_dotdot(file, ctx))
  284. return 0;
  285. ctx->pos = 1 << PART_SHIFT;
  286. }
  287. /*
  288. * The seek position is in the first synthesized part but is not
  289. * valid.
  290. */
  291. if ((ctx->pos & PART_MASK) == 0)
  292. return -EIO;
  293. r = 0;
  294. /*
  295. * Must read more if the user has sought past what has been read
  296. * so far. Stop a user who has sought past the end.
  297. */
  298. while (od->token != ORANGEFS_ITERATE_END &&
  299. ctx->pos > od->end) {
  300. r = orangefs_dir_more(od, inode);
  301. if (r)
  302. return r;
  303. }
  304. if (od->token == ORANGEFS_ITERATE_END && ctx->pos > od->end)
  305. return -EIO;
  306. /* Then try to fill if there's any left in the buffer. */
  307. if (ctx->pos < od->end) {
  308. r = orangefs_dir_fill(od, ctx);
  309. if (r)
  310. return r;
  311. }
  312. /* Finally get some more and try to fill. */
  313. if (od->token != ORANGEFS_ITERATE_END) {
  314. r = orangefs_dir_more(od, inode);
  315. if (r)
  316. return r;
  317. r = orangefs_dir_fill(od, ctx);
  318. }
  319. return r;
  320. }
  321. static int orangefs_dir_open(struct inode *inode, struct file *file)
  322. {
  323. struct orangefs_dir *od;
  324. file->private_data = kmalloc(sizeof(struct orangefs_dir),
  325. GFP_KERNEL);
  326. if (!file->private_data)
  327. return -ENOMEM;
  328. od = file->private_data;
  329. od->token = ORANGEFS_ITERATE_START;
  330. od->part = NULL;
  331. od->end = 1 << PART_SHIFT;
  332. od->error = 0;
  333. return 0;
  334. }
  335. static int orangefs_dir_release(struct inode *inode, struct file *file)
  336. {
  337. struct orangefs_dir *od = file->private_data;
  338. struct orangefs_dir_part *part = od->part;
  339. while (part) {
  340. struct orangefs_dir_part *next = part->next;
  341. vfree(part);
  342. part = next;
  343. }
  344. kfree(od);
  345. return 0;
  346. }
  347. const struct file_operations orangefs_dir_operations = {
  348. .llseek = orangefs_dir_llseek,
  349. .read = generic_read_dir,
  350. .iterate_shared = orangefs_dir_iterate,
  351. .open = orangefs_dir_open,
  352. .release = orangefs_dir_release
  353. };