pnfs_nfs.c 30 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Common NFS I/O operations for the pnfs file based
  4. * layout drivers.
  5. *
  6. * Copyright (c) 2014, Primary Data, Inc. All rights reserved.
  7. *
  8. * Tom Haynes <loghyr@primarydata.com>
  9. */
  10. #include <linux/nfs_fs.h>
  11. #include <linux/nfs_page.h>
  12. #include <linux/sunrpc/addr.h>
  13. #include <linux/module.h>
  14. #include "nfs4session.h"
  15. #include "internal.h"
  16. #include "pnfs.h"
  17. #define NFSDBG_FACILITY NFSDBG_PNFS
  18. void pnfs_generic_rw_release(void *data)
  19. {
  20. struct nfs_pgio_header *hdr = data;
  21. nfs_put_client(hdr->ds_clp);
  22. hdr->mds_ops->rpc_release(data);
  23. }
  24. EXPORT_SYMBOL_GPL(pnfs_generic_rw_release);
  25. /* Fake up some data that will cause nfs_commit_release to retry the writes. */
  26. void pnfs_generic_prepare_to_resend_writes(struct nfs_commit_data *data)
  27. {
  28. struct nfs_writeverf *verf = data->res.verf;
  29. data->task.tk_status = 0;
  30. memset(&verf->verifier, 0, sizeof(verf->verifier));
  31. verf->committed = NFS_UNSTABLE;
  32. }
  33. EXPORT_SYMBOL_GPL(pnfs_generic_prepare_to_resend_writes);
  34. void pnfs_generic_write_commit_done(struct rpc_task *task, void *data)
  35. {
  36. struct nfs_commit_data *wdata = data;
  37. /* Note this may cause RPC to be resent */
  38. wdata->mds_ops->rpc_call_done(task, data);
  39. }
  40. EXPORT_SYMBOL_GPL(pnfs_generic_write_commit_done);
  41. void pnfs_generic_commit_release(void *calldata)
  42. {
  43. struct nfs_commit_data *data = calldata;
  44. data->completion_ops->completion(data);
  45. pnfs_put_lseg(data->lseg);
  46. nfs_put_client(data->ds_clp);
  47. nfs_commitdata_release(data);
  48. }
  49. EXPORT_SYMBOL_GPL(pnfs_generic_commit_release);
  50. static struct pnfs_layout_segment *
  51. pnfs_free_bucket_lseg(struct pnfs_commit_bucket *bucket)
  52. {
  53. if (list_empty(&bucket->committing) && list_empty(&bucket->written)) {
  54. struct pnfs_layout_segment *freeme = bucket->lseg;
  55. bucket->lseg = NULL;
  56. return freeme;
  57. }
  58. return NULL;
  59. }
  60. /* The generic layer is about to remove the req from the commit list.
  61. * If this will make the bucket empty, it will need to put the lseg reference.
  62. * Note this must be called holding nfsi->commit_mutex
  63. */
  64. void
  65. pnfs_generic_clear_request_commit(struct nfs_page *req,
  66. struct nfs_commit_info *cinfo)
  67. {
  68. struct pnfs_commit_bucket *bucket = NULL;
  69. if (!test_and_clear_bit(PG_COMMIT_TO_DS, &req->wb_flags))
  70. goto out;
  71. cinfo->ds->nwritten--;
  72. if (list_is_singular(&req->wb_list))
  73. bucket = list_first_entry(&req->wb_list,
  74. struct pnfs_commit_bucket, written);
  75. out:
  76. nfs_request_remove_commit_list(req, cinfo);
  77. if (bucket)
  78. pnfs_put_lseg(pnfs_free_bucket_lseg(bucket));
  79. }
  80. EXPORT_SYMBOL_GPL(pnfs_generic_clear_request_commit);
  81. struct pnfs_commit_array *
  82. pnfs_alloc_commit_array(size_t n, gfp_t gfp_flags)
  83. {
  84. struct pnfs_commit_array *p;
  85. struct pnfs_commit_bucket *b;
  86. p = kmalloc(struct_size(p, buckets, n), gfp_flags);
  87. if (!p)
  88. return NULL;
  89. p->nbuckets = n;
  90. INIT_LIST_HEAD(&p->cinfo_list);
  91. INIT_LIST_HEAD(&p->lseg_list);
  92. p->lseg = NULL;
  93. for (b = &p->buckets[0]; n != 0; b++, n--) {
  94. INIT_LIST_HEAD(&b->written);
  95. INIT_LIST_HEAD(&b->committing);
  96. b->lseg = NULL;
  97. b->direct_verf.committed = NFS_INVALID_STABLE_HOW;
  98. }
  99. return p;
  100. }
  101. EXPORT_SYMBOL_GPL(pnfs_alloc_commit_array);
  102. void
  103. pnfs_free_commit_array(struct pnfs_commit_array *p)
  104. {
  105. kfree_rcu(p, rcu);
  106. }
  107. EXPORT_SYMBOL_GPL(pnfs_free_commit_array);
  108. static struct pnfs_commit_array *
  109. pnfs_find_commit_array_by_lseg(struct pnfs_ds_commit_info *fl_cinfo,
  110. struct pnfs_layout_segment *lseg)
  111. {
  112. struct pnfs_commit_array *array;
  113. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  114. if (array->lseg == lseg)
  115. return array;
  116. }
  117. return NULL;
  118. }
  119. struct pnfs_commit_array *
  120. pnfs_add_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
  121. struct pnfs_commit_array *new,
  122. struct pnfs_layout_segment *lseg)
  123. {
  124. struct pnfs_commit_array *array;
  125. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  126. if (array)
  127. return array;
  128. new->lseg = lseg;
  129. refcount_set(&new->refcount, 1);
  130. list_add_rcu(&new->cinfo_list, &fl_cinfo->commits);
  131. list_add(&new->lseg_list, &lseg->pls_commits);
  132. return new;
  133. }
  134. EXPORT_SYMBOL_GPL(pnfs_add_commit_array);
  135. static struct pnfs_commit_array *
  136. pnfs_lookup_commit_array(struct pnfs_ds_commit_info *fl_cinfo,
  137. struct pnfs_layout_segment *lseg)
  138. {
  139. struct pnfs_commit_array *array;
  140. rcu_read_lock();
  141. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  142. if (!array) {
  143. rcu_read_unlock();
  144. fl_cinfo->ops->setup_ds_info(fl_cinfo, lseg);
  145. rcu_read_lock();
  146. array = pnfs_find_commit_array_by_lseg(fl_cinfo, lseg);
  147. }
  148. rcu_read_unlock();
  149. return array;
  150. }
  151. static void
  152. pnfs_release_commit_array_locked(struct pnfs_commit_array *array)
  153. {
  154. list_del_rcu(&array->cinfo_list);
  155. list_del(&array->lseg_list);
  156. pnfs_free_commit_array(array);
  157. }
  158. static void
  159. pnfs_put_commit_array_locked(struct pnfs_commit_array *array)
  160. {
  161. if (refcount_dec_and_test(&array->refcount))
  162. pnfs_release_commit_array_locked(array);
  163. }
  164. static void
  165. pnfs_put_commit_array(struct pnfs_commit_array *array, struct inode *inode)
  166. {
  167. if (refcount_dec_and_lock(&array->refcount, &inode->i_lock)) {
  168. pnfs_release_commit_array_locked(array);
  169. spin_unlock(&inode->i_lock);
  170. }
  171. }
  172. static struct pnfs_commit_array *
  173. pnfs_get_commit_array(struct pnfs_commit_array *array)
  174. {
  175. if (refcount_inc_not_zero(&array->refcount))
  176. return array;
  177. return NULL;
  178. }
  179. static void
  180. pnfs_remove_and_free_commit_array(struct pnfs_commit_array *array)
  181. {
  182. array->lseg = NULL;
  183. list_del_init(&array->lseg_list);
  184. pnfs_put_commit_array_locked(array);
  185. }
  186. void
  187. pnfs_generic_ds_cinfo_release_lseg(struct pnfs_ds_commit_info *fl_cinfo,
  188. struct pnfs_layout_segment *lseg)
  189. {
  190. struct pnfs_commit_array *array, *tmp;
  191. list_for_each_entry_safe(array, tmp, &lseg->pls_commits, lseg_list)
  192. pnfs_remove_and_free_commit_array(array);
  193. }
  194. EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_release_lseg);
  195. void
  196. pnfs_generic_ds_cinfo_destroy(struct pnfs_ds_commit_info *fl_cinfo)
  197. {
  198. struct pnfs_commit_array *array, *tmp;
  199. list_for_each_entry_safe(array, tmp, &fl_cinfo->commits, cinfo_list)
  200. pnfs_remove_and_free_commit_array(array);
  201. }
  202. EXPORT_SYMBOL_GPL(pnfs_generic_ds_cinfo_destroy);
  203. /*
  204. * Locks the nfs_page requests for commit and moves them to
  205. * @bucket->committing.
  206. */
  207. static int
  208. pnfs_bucket_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
  209. struct nfs_commit_info *cinfo,
  210. int max)
  211. {
  212. struct list_head *src = &bucket->written;
  213. struct list_head *dst = &bucket->committing;
  214. int ret;
  215. lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
  216. ret = nfs_scan_commit_list(src, dst, cinfo, max);
  217. if (ret) {
  218. cinfo->ds->nwritten -= ret;
  219. cinfo->ds->ncommitting += ret;
  220. }
  221. return ret;
  222. }
  223. static int pnfs_bucket_scan_array(struct nfs_commit_info *cinfo,
  224. struct pnfs_commit_bucket *buckets,
  225. unsigned int nbuckets,
  226. int max)
  227. {
  228. unsigned int i;
  229. int rv = 0, cnt;
  230. for (i = 0; i < nbuckets && max != 0; i++) {
  231. cnt = pnfs_bucket_scan_ds_commit_list(&buckets[i], cinfo, max);
  232. rv += cnt;
  233. max -= cnt;
  234. }
  235. return rv;
  236. }
  237. /* Move reqs from written to committing lists, returning count
  238. * of number moved.
  239. */
  240. int pnfs_generic_scan_commit_lists(struct nfs_commit_info *cinfo, int max)
  241. {
  242. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  243. struct pnfs_commit_array *array;
  244. int rv = 0, cnt;
  245. rcu_read_lock();
  246. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  247. if (!array->lseg || !pnfs_get_commit_array(array))
  248. continue;
  249. rcu_read_unlock();
  250. cnt = pnfs_bucket_scan_array(cinfo, array->buckets,
  251. array->nbuckets, max);
  252. rcu_read_lock();
  253. pnfs_put_commit_array(array, cinfo->inode);
  254. rv += cnt;
  255. max -= cnt;
  256. if (!max)
  257. break;
  258. }
  259. rcu_read_unlock();
  260. return rv;
  261. }
  262. EXPORT_SYMBOL_GPL(pnfs_generic_scan_commit_lists);
  263. static unsigned int
  264. pnfs_bucket_recover_commit_reqs(struct list_head *dst,
  265. struct pnfs_commit_bucket *buckets,
  266. unsigned int nbuckets,
  267. struct nfs_commit_info *cinfo)
  268. {
  269. struct pnfs_commit_bucket *b;
  270. struct pnfs_layout_segment *freeme;
  271. unsigned int nwritten, ret = 0;
  272. unsigned int i;
  273. restart:
  274. for (i = 0, b = buckets; i < nbuckets; i++, b++) {
  275. nwritten = nfs_scan_commit_list(&b->written, dst, cinfo, 0);
  276. if (!nwritten)
  277. continue;
  278. ret += nwritten;
  279. freeme = pnfs_free_bucket_lseg(b);
  280. if (freeme) {
  281. pnfs_put_lseg(freeme);
  282. goto restart;
  283. }
  284. }
  285. return ret;
  286. }
  287. /* Pull everything off the committing lists and dump into @dst. */
  288. void pnfs_generic_recover_commit_reqs(struct list_head *dst,
  289. struct nfs_commit_info *cinfo)
  290. {
  291. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  292. struct pnfs_commit_array *array;
  293. unsigned int nwritten;
  294. lockdep_assert_held(&NFS_I(cinfo->inode)->commit_mutex);
  295. rcu_read_lock();
  296. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  297. if (!array->lseg || !pnfs_get_commit_array(array))
  298. continue;
  299. rcu_read_unlock();
  300. nwritten = pnfs_bucket_recover_commit_reqs(dst,
  301. array->buckets,
  302. array->nbuckets,
  303. cinfo);
  304. rcu_read_lock();
  305. pnfs_put_commit_array(array, cinfo->inode);
  306. fl_cinfo->nwritten -= nwritten;
  307. }
  308. rcu_read_unlock();
  309. }
  310. EXPORT_SYMBOL_GPL(pnfs_generic_recover_commit_reqs);
  311. static struct pnfs_layout_segment *
  312. pnfs_bucket_get_committing(struct list_head *head,
  313. struct pnfs_commit_bucket *bucket,
  314. struct nfs_commit_info *cinfo)
  315. {
  316. struct pnfs_layout_segment *lseg;
  317. struct list_head *pos;
  318. list_for_each(pos, &bucket->committing)
  319. cinfo->ds->ncommitting--;
  320. list_splice_init(&bucket->committing, head);
  321. lseg = pnfs_free_bucket_lseg(bucket);
  322. if (!lseg)
  323. lseg = pnfs_get_lseg(bucket->lseg);
  324. return lseg;
  325. }
  326. static struct nfs_commit_data *
  327. pnfs_bucket_fetch_commitdata(struct pnfs_commit_bucket *bucket,
  328. struct nfs_commit_info *cinfo)
  329. {
  330. struct nfs_commit_data *data = nfs_commitdata_alloc();
  331. if (!data)
  332. return NULL;
  333. data->lseg = pnfs_bucket_get_committing(&data->pages, bucket, cinfo);
  334. return data;
  335. }
  336. static void pnfs_generic_retry_commit(struct pnfs_commit_bucket *buckets,
  337. unsigned int nbuckets,
  338. struct nfs_commit_info *cinfo,
  339. unsigned int idx)
  340. {
  341. struct pnfs_commit_bucket *bucket;
  342. struct pnfs_layout_segment *freeme;
  343. LIST_HEAD(pages);
  344. for (bucket = buckets; idx < nbuckets; bucket++, idx++) {
  345. if (list_empty(&bucket->committing))
  346. continue;
  347. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  348. freeme = pnfs_bucket_get_committing(&pages, bucket, cinfo);
  349. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  350. nfs_retry_commit(&pages, freeme, cinfo, idx);
  351. pnfs_put_lseg(freeme);
  352. }
  353. }
  354. static unsigned int
  355. pnfs_bucket_alloc_ds_commits(struct list_head *list,
  356. struct pnfs_commit_bucket *buckets,
  357. unsigned int nbuckets,
  358. struct nfs_commit_info *cinfo)
  359. {
  360. struct pnfs_commit_bucket *bucket;
  361. struct nfs_commit_data *data;
  362. unsigned int i;
  363. unsigned int nreq = 0;
  364. for (i = 0, bucket = buckets; i < nbuckets; i++, bucket++) {
  365. if (list_empty(&bucket->committing))
  366. continue;
  367. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  368. if (!list_empty(&bucket->committing)) {
  369. data = pnfs_bucket_fetch_commitdata(bucket, cinfo);
  370. if (!data)
  371. goto out_error;
  372. data->ds_commit_index = i;
  373. list_add_tail(&data->list, list);
  374. nreq++;
  375. }
  376. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  377. }
  378. return nreq;
  379. out_error:
  380. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  381. /* Clean up on error */
  382. pnfs_generic_retry_commit(buckets, nbuckets, cinfo, i);
  383. return nreq;
  384. }
  385. static unsigned int
  386. pnfs_alloc_ds_commits_list(struct list_head *list,
  387. struct pnfs_ds_commit_info *fl_cinfo,
  388. struct nfs_commit_info *cinfo)
  389. {
  390. struct pnfs_commit_array *array;
  391. unsigned int ret = 0;
  392. rcu_read_lock();
  393. list_for_each_entry_rcu(array, &fl_cinfo->commits, cinfo_list) {
  394. if (!array->lseg || !pnfs_get_commit_array(array))
  395. continue;
  396. rcu_read_unlock();
  397. ret += pnfs_bucket_alloc_ds_commits(list, array->buckets,
  398. array->nbuckets, cinfo);
  399. rcu_read_lock();
  400. pnfs_put_commit_array(array, cinfo->inode);
  401. }
  402. rcu_read_unlock();
  403. return ret;
  404. }
  405. /* This follows nfs_commit_list pretty closely */
  406. int
  407. pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
  408. int how, struct nfs_commit_info *cinfo,
  409. int (*initiate_commit)(struct nfs_commit_data *data,
  410. int how))
  411. {
  412. struct pnfs_ds_commit_info *fl_cinfo = cinfo->ds;
  413. struct nfs_commit_data *data, *tmp;
  414. LIST_HEAD(list);
  415. unsigned int nreq = 0;
  416. if (!list_empty(mds_pages)) {
  417. data = nfs_commitdata_alloc();
  418. if (!data) {
  419. nfs_retry_commit(mds_pages, NULL, cinfo, -1);
  420. return -ENOMEM;
  421. }
  422. data->ds_commit_index = -1;
  423. list_splice_init(mds_pages, &data->pages);
  424. list_add_tail(&data->list, &list);
  425. nreq++;
  426. }
  427. nreq += pnfs_alloc_ds_commits_list(&list, fl_cinfo, cinfo);
  428. if (nreq == 0)
  429. goto out;
  430. list_for_each_entry_safe(data, tmp, &list, list) {
  431. list_del(&data->list);
  432. if (data->ds_commit_index < 0) {
  433. nfs_init_commit(data, NULL, NULL, cinfo);
  434. nfs_initiate_commit(NFS_CLIENT(inode), data,
  435. NFS_PROTO(data->inode),
  436. data->mds_ops, how,
  437. RPC_TASK_CRED_NOREF, NULL);
  438. } else {
  439. nfs_init_commit(data, NULL, data->lseg, cinfo);
  440. initiate_commit(data, how);
  441. }
  442. }
  443. out:
  444. return PNFS_ATTEMPTED;
  445. }
  446. EXPORT_SYMBOL_GPL(pnfs_generic_commit_pagelist);
  447. /*
  448. * Data server cache
  449. *
  450. * Data servers can be mapped to different device ids.
  451. * nfs4_pnfs_ds reference counting
  452. * - set to 1 on allocation
  453. * - incremented when a device id maps a data server already in the cache.
  454. * - decremented when deviceid is removed from the cache.
  455. */
  456. static DEFINE_SPINLOCK(nfs4_ds_cache_lock);
  457. static LIST_HEAD(nfs4_data_server_cache);
  458. /* Debug routines */
  459. static void
  460. print_ds(struct nfs4_pnfs_ds *ds)
  461. {
  462. if (ds == NULL) {
  463. printk(KERN_WARNING "%s NULL device\n", __func__);
  464. return;
  465. }
  466. printk(KERN_WARNING " ds %s\n"
  467. " ref count %d\n"
  468. " client %p\n"
  469. " cl_exchange_flags %x\n",
  470. ds->ds_remotestr,
  471. refcount_read(&ds->ds_count), ds->ds_clp,
  472. ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
  473. }
  474. static bool
  475. same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
  476. {
  477. struct sockaddr_in *a, *b;
  478. struct sockaddr_in6 *a6, *b6;
  479. if (addr1->sa_family != addr2->sa_family)
  480. return false;
  481. switch (addr1->sa_family) {
  482. case AF_INET:
  483. a = (struct sockaddr_in *)addr1;
  484. b = (struct sockaddr_in *)addr2;
  485. if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
  486. a->sin_port == b->sin_port)
  487. return true;
  488. break;
  489. case AF_INET6:
  490. a6 = (struct sockaddr_in6 *)addr1;
  491. b6 = (struct sockaddr_in6 *)addr2;
  492. /* LINKLOCAL addresses must have matching scope_id */
  493. if (ipv6_addr_src_scope(&a6->sin6_addr) ==
  494. IPV6_ADDR_SCOPE_LINKLOCAL &&
  495. a6->sin6_scope_id != b6->sin6_scope_id)
  496. return false;
  497. if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
  498. a6->sin6_port == b6->sin6_port)
  499. return true;
  500. break;
  501. default:
  502. dprintk("%s: unhandled address family: %u\n",
  503. __func__, addr1->sa_family);
  504. return false;
  505. }
  506. return false;
  507. }
  508. /*
  509. * Checks if 'dsaddrs1' contains a subset of 'dsaddrs2'. If it does,
  510. * declare a match.
  511. */
  512. static bool
  513. _same_data_server_addrs_locked(const struct list_head *dsaddrs1,
  514. const struct list_head *dsaddrs2)
  515. {
  516. struct nfs4_pnfs_ds_addr *da1, *da2;
  517. struct sockaddr *sa1, *sa2;
  518. bool match = false;
  519. list_for_each_entry(da1, dsaddrs1, da_node) {
  520. sa1 = (struct sockaddr *)&da1->da_addr;
  521. match = false;
  522. list_for_each_entry(da2, dsaddrs2, da_node) {
  523. sa2 = (struct sockaddr *)&da2->da_addr;
  524. match = same_sockaddr(sa1, sa2);
  525. if (match)
  526. break;
  527. }
  528. if (!match)
  529. break;
  530. }
  531. return match;
  532. }
  533. /*
  534. * Lookup DS by addresses. nfs4_ds_cache_lock is held
  535. */
  536. static struct nfs4_pnfs_ds *
  537. _data_server_lookup_locked(const struct net *net, const struct list_head *dsaddrs)
  538. {
  539. struct nfs4_pnfs_ds *ds;
  540. list_for_each_entry(ds, &nfs4_data_server_cache, ds_node)
  541. if (ds->ds_net == net && _same_data_server_addrs_locked(&ds->ds_addrs, dsaddrs))
  542. return ds;
  543. return NULL;
  544. }
  545. static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
  546. {
  547. struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
  548. if (da)
  549. INIT_LIST_HEAD(&da->da_node);
  550. return da;
  551. }
  552. static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
  553. {
  554. kfree(da->da_remotestr);
  555. kfree(da->da_netid);
  556. kfree(da);
  557. }
  558. static void destroy_ds(struct nfs4_pnfs_ds *ds)
  559. {
  560. struct nfs4_pnfs_ds_addr *da;
  561. dprintk("--> %s\n", __func__);
  562. ifdebug(FACILITY)
  563. print_ds(ds);
  564. nfs_put_client(ds->ds_clp);
  565. while (!list_empty(&ds->ds_addrs)) {
  566. da = list_first_entry(&ds->ds_addrs,
  567. struct nfs4_pnfs_ds_addr,
  568. da_node);
  569. list_del_init(&da->da_node);
  570. nfs4_pnfs_ds_addr_free(da);
  571. }
  572. kfree(ds->ds_remotestr);
  573. kfree(ds);
  574. }
  575. void nfs4_pnfs_ds_put(struct nfs4_pnfs_ds *ds)
  576. {
  577. if (refcount_dec_and_lock(&ds->ds_count,
  578. &nfs4_ds_cache_lock)) {
  579. list_del_init(&ds->ds_node);
  580. spin_unlock(&nfs4_ds_cache_lock);
  581. destroy_ds(ds);
  582. }
  583. }
  584. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_put);
  585. /*
  586. * Create a string with a human readable address and port to avoid
  587. * complicated setup around many dprinks.
  588. */
  589. static char *
  590. nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
  591. {
  592. struct nfs4_pnfs_ds_addr *da;
  593. char *remotestr;
  594. size_t len;
  595. char *p;
  596. len = 3; /* '{', '}' and eol */
  597. list_for_each_entry(da, dsaddrs, da_node) {
  598. len += strlen(da->da_remotestr) + 1; /* string plus comma */
  599. }
  600. remotestr = kzalloc(len, gfp_flags);
  601. if (!remotestr)
  602. return NULL;
  603. p = remotestr;
  604. *(p++) = '{';
  605. len--;
  606. list_for_each_entry(da, dsaddrs, da_node) {
  607. size_t ll = strlen(da->da_remotestr);
  608. if (ll > len)
  609. goto out_err;
  610. memcpy(p, da->da_remotestr, ll);
  611. p += ll;
  612. len -= ll;
  613. if (len < 1)
  614. goto out_err;
  615. (*p++) = ',';
  616. len--;
  617. }
  618. if (len < 2)
  619. goto out_err;
  620. *(p++) = '}';
  621. *p = '\0';
  622. return remotestr;
  623. out_err:
  624. kfree(remotestr);
  625. return NULL;
  626. }
  627. /*
  628. * Given a list of multipath struct nfs4_pnfs_ds_addr, add it to ds cache if
  629. * uncached and return cached struct nfs4_pnfs_ds.
  630. */
  631. struct nfs4_pnfs_ds *
  632. nfs4_pnfs_ds_add(const struct net *net, struct list_head *dsaddrs, gfp_t gfp_flags)
  633. {
  634. struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
  635. char *remotestr;
  636. if (list_empty(dsaddrs)) {
  637. dprintk("%s: no addresses defined\n", __func__);
  638. goto out;
  639. }
  640. ds = kzalloc(sizeof(*ds), gfp_flags);
  641. if (!ds)
  642. goto out;
  643. /* this is only used for debugging, so it's ok if its NULL */
  644. remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
  645. spin_lock(&nfs4_ds_cache_lock);
  646. tmp_ds = _data_server_lookup_locked(net, dsaddrs);
  647. if (tmp_ds == NULL) {
  648. INIT_LIST_HEAD(&ds->ds_addrs);
  649. list_splice_init(dsaddrs, &ds->ds_addrs);
  650. ds->ds_remotestr = remotestr;
  651. refcount_set(&ds->ds_count, 1);
  652. INIT_LIST_HEAD(&ds->ds_node);
  653. ds->ds_net = net;
  654. ds->ds_clp = NULL;
  655. list_add(&ds->ds_node, &nfs4_data_server_cache);
  656. dprintk("%s add new data server %s\n", __func__,
  657. ds->ds_remotestr);
  658. } else {
  659. kfree(remotestr);
  660. kfree(ds);
  661. refcount_inc(&tmp_ds->ds_count);
  662. dprintk("%s data server %s found, inc'ed ds_count to %d\n",
  663. __func__, tmp_ds->ds_remotestr,
  664. refcount_read(&tmp_ds->ds_count));
  665. ds = tmp_ds;
  666. }
  667. spin_unlock(&nfs4_ds_cache_lock);
  668. out:
  669. return ds;
  670. }
  671. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_add);
  672. static int nfs4_wait_ds_connect(struct nfs4_pnfs_ds *ds)
  673. {
  674. might_sleep();
  675. return wait_on_bit(&ds->ds_state, NFS4DS_CONNECTING, TASK_KILLABLE);
  676. }
  677. static void nfs4_clear_ds_conn_bit(struct nfs4_pnfs_ds *ds)
  678. {
  679. smp_mb__before_atomic();
  680. clear_and_wake_up_bit(NFS4DS_CONNECTING, &ds->ds_state);
  681. }
  682. static struct nfs_client *(*get_v3_ds_connect)(
  683. struct nfs_server *mds_srv,
  684. const struct sockaddr_storage *ds_addr,
  685. int ds_addrlen,
  686. int ds_proto,
  687. unsigned int ds_timeo,
  688. unsigned int ds_retrans);
  689. static bool load_v3_ds_connect(void)
  690. {
  691. if (!get_v3_ds_connect) {
  692. get_v3_ds_connect = symbol_request(nfs3_set_ds_client);
  693. WARN_ON_ONCE(!get_v3_ds_connect);
  694. }
  695. return(get_v3_ds_connect != NULL);
  696. }
  697. void nfs4_pnfs_v3_ds_connect_unload(void)
  698. {
  699. if (get_v3_ds_connect) {
  700. symbol_put(nfs3_set_ds_client);
  701. get_v3_ds_connect = NULL;
  702. }
  703. }
  704. static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
  705. struct nfs4_pnfs_ds *ds,
  706. unsigned int timeo,
  707. unsigned int retrans)
  708. {
  709. struct nfs_client *clp = ERR_PTR(-EIO);
  710. struct nfs4_pnfs_ds_addr *da;
  711. unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
  712. int status = 0;
  713. dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
  714. if (!load_v3_ds_connect())
  715. return -EPROTONOSUPPORT;
  716. list_for_each_entry(da, &ds->ds_addrs, da_node) {
  717. dprintk("%s: DS %s: trying address %s\n",
  718. __func__, ds->ds_remotestr, da->da_remotestr);
  719. if (!IS_ERR(clp)) {
  720. struct xprt_create xprt_args = {
  721. .ident = da->da_transport,
  722. .net = clp->cl_net,
  723. .dstaddr = (struct sockaddr *)&da->da_addr,
  724. .addrlen = da->da_addrlen,
  725. .servername = clp->cl_hostname,
  726. .connect_timeout = connect_timeout,
  727. .reconnect_timeout = connect_timeout,
  728. };
  729. if (da->da_transport != clp->cl_proto)
  730. continue;
  731. if (da->da_addr.ss_family != clp->cl_addr.ss_family)
  732. continue;
  733. /* Add this address as an alias */
  734. rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
  735. rpc_clnt_test_and_add_xprt, NULL);
  736. continue;
  737. }
  738. clp = get_v3_ds_connect(mds_srv,
  739. &da->da_addr,
  740. da->da_addrlen, da->da_transport,
  741. timeo, retrans);
  742. if (IS_ERR(clp))
  743. continue;
  744. clp->cl_rpcclient->cl_softerr = 0;
  745. clp->cl_rpcclient->cl_softrtry = 0;
  746. }
  747. if (IS_ERR(clp)) {
  748. status = PTR_ERR(clp);
  749. goto out;
  750. }
  751. smp_wmb();
  752. WRITE_ONCE(ds->ds_clp, clp);
  753. dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
  754. out:
  755. return status;
  756. }
  757. static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
  758. struct nfs4_pnfs_ds *ds,
  759. unsigned int timeo,
  760. unsigned int retrans,
  761. u32 minor_version)
  762. {
  763. struct nfs_client *clp = ERR_PTR(-EIO);
  764. struct nfs4_pnfs_ds_addr *da;
  765. int status = 0;
  766. dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
  767. list_for_each_entry(da, &ds->ds_addrs, da_node) {
  768. char servername[48];
  769. dprintk("%s: DS %s: trying address %s\n",
  770. __func__, ds->ds_remotestr, da->da_remotestr);
  771. if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
  772. struct xprt_create xprt_args = {
  773. .ident = da->da_transport,
  774. .net = clp->cl_net,
  775. .dstaddr = (struct sockaddr *)&da->da_addr,
  776. .addrlen = da->da_addrlen,
  777. .servername = clp->cl_hostname,
  778. .xprtsec = clp->cl_xprtsec,
  779. };
  780. struct nfs4_add_xprt_data xprtdata = {
  781. .clp = clp,
  782. };
  783. struct rpc_add_xprt_test rpcdata = {
  784. .add_xprt_test = clp->cl_mvops->session_trunk,
  785. .data = &xprtdata,
  786. };
  787. if (da->da_transport != clp->cl_proto &&
  788. clp->cl_proto != XPRT_TRANSPORT_TCP_TLS)
  789. continue;
  790. if (da->da_transport == XPRT_TRANSPORT_TCP &&
  791. mds_srv->nfs_client->cl_proto ==
  792. XPRT_TRANSPORT_TCP_TLS) {
  793. struct sockaddr *addr =
  794. (struct sockaddr *)&da->da_addr;
  795. struct sockaddr_in *sin =
  796. (struct sockaddr_in *)&da->da_addr;
  797. struct sockaddr_in6 *sin6 =
  798. (struct sockaddr_in6 *)&da->da_addr;
  799. /* for NFS with TLS we need to supply a correct
  800. * servername of the trunked transport, not the
  801. * servername of the main transport stored in
  802. * clp->cl_hostname. And set the protocol to
  803. * indicate to use TLS
  804. */
  805. servername[0] = '\0';
  806. switch(addr->sa_family) {
  807. case AF_INET:
  808. snprintf(servername, sizeof(servername),
  809. "%pI4", &sin->sin_addr.s_addr);
  810. break;
  811. case AF_INET6:
  812. snprintf(servername, sizeof(servername),
  813. "%pI6", &sin6->sin6_addr);
  814. break;
  815. default:
  816. /* do not consider this address */
  817. continue;
  818. }
  819. xprt_args.ident = XPRT_TRANSPORT_TCP_TLS;
  820. xprt_args.servername = servername;
  821. }
  822. if (da->da_addr.ss_family != clp->cl_addr.ss_family)
  823. continue;
  824. /**
  825. * Test this address for session trunking and
  826. * add as an alias
  827. */
  828. xprtdata.cred = nfs4_get_clid_cred(clp);
  829. rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
  830. rpc_clnt_setup_test_and_add_xprt,
  831. &rpcdata);
  832. if (xprtdata.cred)
  833. put_cred(xprtdata.cred);
  834. } else {
  835. if (da->da_transport == XPRT_TRANSPORT_TCP &&
  836. mds_srv->nfs_client->cl_proto ==
  837. XPRT_TRANSPORT_TCP_TLS)
  838. da->da_transport = XPRT_TRANSPORT_TCP_TLS;
  839. clp = nfs4_set_ds_client(mds_srv,
  840. &da->da_addr,
  841. da->da_addrlen,
  842. da->da_transport, timeo,
  843. retrans, minor_version);
  844. if (IS_ERR(clp))
  845. continue;
  846. status = nfs4_init_ds_session(clp,
  847. mds_srv->nfs_client->cl_lease_time);
  848. if (status) {
  849. nfs_put_client(clp);
  850. clp = ERR_PTR(-EIO);
  851. continue;
  852. }
  853. }
  854. }
  855. if (IS_ERR(clp)) {
  856. status = PTR_ERR(clp);
  857. goto out;
  858. }
  859. smp_wmb();
  860. WRITE_ONCE(ds->ds_clp, clp);
  861. dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
  862. out:
  863. return status;
  864. }
  865. /*
  866. * Create an rpc connection to the nfs4_pnfs_ds data server.
  867. * Currently only supports IPv4 and IPv6 addresses.
  868. * If connection fails, make devid unavailable and return a -errno.
  869. */
  870. int nfs4_pnfs_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds,
  871. struct nfs4_deviceid_node *devid, unsigned int timeo,
  872. unsigned int retrans, u32 version, u32 minor_version)
  873. {
  874. int err;
  875. do {
  876. err = nfs4_wait_ds_connect(ds);
  877. if (err || ds->ds_clp)
  878. goto out;
  879. if (nfs4_test_deviceid_unavailable(devid))
  880. return -ENODEV;
  881. } while (test_and_set_bit(NFS4DS_CONNECTING, &ds->ds_state) != 0);
  882. if (ds->ds_clp)
  883. goto connect_done;
  884. switch (version) {
  885. case 3:
  886. err = _nfs4_pnfs_v3_ds_connect(mds_srv, ds, timeo, retrans);
  887. break;
  888. case 4:
  889. err = _nfs4_pnfs_v4_ds_connect(mds_srv, ds, timeo, retrans,
  890. minor_version);
  891. break;
  892. default:
  893. dprintk("%s: unsupported DS version %d\n", __func__, version);
  894. err = -EPROTONOSUPPORT;
  895. }
  896. connect_done:
  897. nfs4_clear_ds_conn_bit(ds);
  898. out:
  899. /*
  900. * At this point the ds->ds_clp should be ready, but it might have
  901. * hit an error.
  902. */
  903. if (!err) {
  904. if (!ds->ds_clp || !nfs_client_init_is_complete(ds->ds_clp)) {
  905. WARN_ON_ONCE(ds->ds_clp ||
  906. !nfs4_test_deviceid_unavailable(devid));
  907. return -EINVAL;
  908. }
  909. err = nfs_client_init_status(ds->ds_clp);
  910. }
  911. return err;
  912. }
  913. EXPORT_SYMBOL_GPL(nfs4_pnfs_ds_connect);
  914. /*
  915. * Currently only supports ipv4, ipv6 and one multi-path address.
  916. */
  917. struct nfs4_pnfs_ds_addr *
  918. nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
  919. {
  920. struct nfs4_pnfs_ds_addr *da = NULL;
  921. char *buf, *portstr;
  922. __be16 port;
  923. ssize_t nlen, rlen;
  924. int tmp[2];
  925. char *netid;
  926. size_t len;
  927. char *startsep = "";
  928. char *endsep = "";
  929. /* r_netid */
  930. nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
  931. gfp_flags);
  932. if (unlikely(nlen < 0))
  933. goto out_err;
  934. /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
  935. /* port is ".ABC.DEF", 8 chars max */
  936. rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
  937. IPV6_SCOPE_ID_LEN + 8, gfp_flags);
  938. if (unlikely(rlen < 0))
  939. goto out_free_netid;
  940. /* replace port '.' with '-' */
  941. portstr = strrchr(buf, '.');
  942. if (!portstr) {
  943. dprintk("%s: Failed finding expected dot in port\n",
  944. __func__);
  945. goto out_free_buf;
  946. }
  947. *portstr = '-';
  948. /* find '.' between address and port */
  949. portstr = strrchr(buf, '.');
  950. if (!portstr) {
  951. dprintk("%s: Failed finding expected dot between address and "
  952. "port\n", __func__);
  953. goto out_free_buf;
  954. }
  955. *portstr = '\0';
  956. da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
  957. if (unlikely(!da))
  958. goto out_free_buf;
  959. if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
  960. sizeof(da->da_addr))) {
  961. dprintk("%s: error parsing address %s\n", __func__, buf);
  962. goto out_free_da;
  963. }
  964. portstr++;
  965. sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
  966. port = htons((tmp[0] << 8) | (tmp[1]));
  967. switch (da->da_addr.ss_family) {
  968. case AF_INET:
  969. ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
  970. da->da_addrlen = sizeof(struct sockaddr_in);
  971. break;
  972. case AF_INET6:
  973. ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
  974. da->da_addrlen = sizeof(struct sockaddr_in6);
  975. startsep = "[";
  976. endsep = "]";
  977. break;
  978. default:
  979. dprintk("%s: unsupported address family: %u\n",
  980. __func__, da->da_addr.ss_family);
  981. goto out_free_da;
  982. }
  983. da->da_transport = xprt_find_transport_ident(netid);
  984. if (da->da_transport < 0) {
  985. dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
  986. __func__, netid);
  987. goto out_free_da;
  988. }
  989. da->da_netid = netid;
  990. /* save human readable address */
  991. len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
  992. da->da_remotestr = kzalloc(len, gfp_flags);
  993. /* NULL is ok, only used for dprintk */
  994. if (da->da_remotestr)
  995. snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
  996. buf, endsep, ntohs(port));
  997. dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
  998. kfree(buf);
  999. return da;
  1000. out_free_da:
  1001. kfree(da);
  1002. out_free_buf:
  1003. dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
  1004. kfree(buf);
  1005. out_free_netid:
  1006. kfree(netid);
  1007. out_err:
  1008. return NULL;
  1009. }
  1010. EXPORT_SYMBOL_GPL(nfs4_decode_mp_ds_addr);
  1011. void
  1012. pnfs_layout_mark_request_commit(struct nfs_page *req,
  1013. struct pnfs_layout_segment *lseg,
  1014. struct nfs_commit_info *cinfo,
  1015. u32 ds_commit_idx)
  1016. {
  1017. struct list_head *list;
  1018. struct pnfs_commit_array *array;
  1019. struct pnfs_commit_bucket *bucket;
  1020. mutex_lock(&NFS_I(cinfo->inode)->commit_mutex);
  1021. array = pnfs_lookup_commit_array(cinfo->ds, lseg);
  1022. if (!array || !pnfs_is_valid_lseg(lseg))
  1023. goto out_resched;
  1024. bucket = &array->buckets[ds_commit_idx];
  1025. list = &bucket->written;
  1026. /* Non-empty buckets hold a reference on the lseg. That ref
  1027. * is normally transferred to the COMMIT call and released
  1028. * there. It could also be released if the last req is pulled
  1029. * off due to a rewrite, in which case it will be done in
  1030. * pnfs_common_clear_request_commit
  1031. */
  1032. if (!bucket->lseg)
  1033. bucket->lseg = pnfs_get_lseg(lseg);
  1034. set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
  1035. cinfo->ds->nwritten++;
  1036. nfs_request_add_commit_list_locked(req, list, cinfo);
  1037. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  1038. nfs_folio_mark_unstable(nfs_page_to_folio(req), cinfo);
  1039. return;
  1040. out_resched:
  1041. mutex_unlock(&NFS_I(cinfo->inode)->commit_mutex);
  1042. cinfo->completion_ops->resched_write(cinfo, req);
  1043. }
  1044. EXPORT_SYMBOL_GPL(pnfs_layout_mark_request_commit);
  1045. int
  1046. pnfs_nfs_generic_sync(struct inode *inode, bool datasync)
  1047. {
  1048. int ret;
  1049. if (!pnfs_layoutcommit_outstanding(inode))
  1050. return 0;
  1051. ret = nfs_commit_inode(inode, FLUSH_SYNC);
  1052. if (ret < 0)
  1053. return ret;
  1054. if (datasync)
  1055. return 0;
  1056. return pnfs_layoutcommit_inode(inode, true);
  1057. }
  1058. EXPORT_SYMBOL_GPL(pnfs_nfs_generic_sync);