rotate.c 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752
  1. // SPDX-License-Identifier: GPL-2.0-or-later
  2. /* Handle fileserver selection and rotation.
  3. *
  4. * Copyright (C) 2017 Red Hat, Inc. All Rights Reserved.
  5. * Written by David Howells (dhowells@redhat.com)
  6. */
  7. #include <linux/kernel.h>
  8. #include <linux/slab.h>
  9. #include <linux/fs.h>
  10. #include <linux/sched.h>
  11. #include <linux/delay.h>
  12. #include <linux/sched/signal.h>
  13. #include "internal.h"
  14. #include "afs_fs.h"
  15. #include "protocol_uae.h"
  16. void afs_clear_server_states(struct afs_operation *op)
  17. {
  18. unsigned int i;
  19. if (op->server_states) {
  20. for (i = 0; i < op->server_list->nr_servers; i++)
  21. afs_put_endpoint_state(op->server_states[i].endpoint_state,
  22. afs_estate_trace_put_server_state);
  23. kfree(op->server_states);
  24. }
  25. }
  26. /*
  27. * Begin iteration through a server list, starting with the vnode's last used
  28. * server if possible, or the last recorded good server if not.
  29. */
  30. static bool afs_start_fs_iteration(struct afs_operation *op,
  31. struct afs_vnode *vnode)
  32. {
  33. struct afs_server *server;
  34. void *cb_server;
  35. int i;
  36. trace_afs_rotate(op, afs_rotate_trace_start, 0);
  37. read_lock(&op->volume->servers_lock);
  38. op->server_list = afs_get_serverlist(
  39. rcu_dereference_protected(op->volume->servers,
  40. lockdep_is_held(&op->volume->servers_lock)));
  41. read_unlock(&op->volume->servers_lock);
  42. op->server_states = kcalloc(op->server_list->nr_servers, sizeof(op->server_states[0]),
  43. GFP_KERNEL);
  44. if (!op->server_states) {
  45. afs_op_nomem(op);
  46. trace_afs_rotate(op, afs_rotate_trace_nomem, 0);
  47. return false;
  48. }
  49. rcu_read_lock();
  50. for (i = 0; i < op->server_list->nr_servers; i++) {
  51. struct afs_endpoint_state *estate;
  52. struct afs_server_state *s = &op->server_states[i];
  53. server = op->server_list->servers[i].server;
  54. estate = rcu_dereference(server->endpoint_state);
  55. s->endpoint_state = afs_get_endpoint_state(estate,
  56. afs_estate_trace_get_server_state);
  57. s->probe_seq = estate->probe_seq;
  58. s->untried_addrs = (1UL << estate->addresses->nr_addrs) - 1;
  59. init_waitqueue_entry(&s->probe_waiter, current);
  60. afs_get_address_preferences(op->net, estate->addresses);
  61. }
  62. rcu_read_unlock();
  63. op->untried_servers = (1UL << op->server_list->nr_servers) - 1;
  64. op->server_index = -1;
  65. cb_server = vnode->cb_server;
  66. if (cb_server) {
  67. /* See if the vnode's preferred record is still available */
  68. for (i = 0; i < op->server_list->nr_servers; i++) {
  69. server = op->server_list->servers[i].server;
  70. if (server == cb_server) {
  71. op->server_index = i;
  72. goto found_interest;
  73. }
  74. }
  75. /* If we have a lock outstanding on a server that's no longer
  76. * serving this vnode, then we can't switch to another server
  77. * and have to return an error.
  78. */
  79. if (op->flags & AFS_OPERATION_CUR_ONLY) {
  80. afs_op_set_error(op, -ESTALE);
  81. trace_afs_rotate(op, afs_rotate_trace_stale_lock, 0);
  82. return false;
  83. }
  84. /* Note that the callback promise is effectively broken */
  85. write_seqlock(&vnode->cb_lock);
  86. ASSERTCMP(cb_server, ==, vnode->cb_server);
  87. vnode->cb_server = NULL;
  88. if (atomic64_xchg(&vnode->cb_expires_at, AFS_NO_CB_PROMISE) != AFS_NO_CB_PROMISE)
  89. vnode->cb_break++;
  90. write_sequnlock(&vnode->cb_lock);
  91. }
  92. found_interest:
  93. return true;
  94. }
  95. /*
  96. * Post volume busy note.
  97. */
  98. static void afs_busy(struct afs_operation *op, u32 abort_code)
  99. {
  100. const char *m;
  101. switch (abort_code) {
  102. case VOFFLINE: m = "offline"; break;
  103. case VRESTARTING: m = "restarting"; break;
  104. case VSALVAGING: m = "being salvaged"; break;
  105. default: m = "busy"; break;
  106. }
  107. pr_notice("kAFS: Volume %llu '%s' on server %pU is %s\n",
  108. op->volume->vid, op->volume->name, &op->server->uuid, m);
  109. }
  110. /*
  111. * Sleep and retry the operation to the same fileserver.
  112. */
  113. static bool afs_sleep_and_retry(struct afs_operation *op)
  114. {
  115. trace_afs_rotate(op, afs_rotate_trace_busy_sleep, 0);
  116. if (!(op->flags & AFS_OPERATION_UNINTR)) {
  117. msleep_interruptible(1000);
  118. if (signal_pending(current)) {
  119. afs_op_set_error(op, -ERESTARTSYS);
  120. return false;
  121. }
  122. } else {
  123. msleep(1000);
  124. }
  125. return true;
  126. }
  127. /*
  128. * Select the fileserver to use. May be called multiple times to rotate
  129. * through the fileservers.
  130. */
  131. bool afs_select_fileserver(struct afs_operation *op)
  132. {
  133. struct afs_addr_list *alist;
  134. struct afs_server *server;
  135. struct afs_vnode *vnode = op->file[0].vnode;
  136. unsigned long set, failed;
  137. s32 abort_code = op->call_abort_code;
  138. int best_prio = 0;
  139. int error = op->call_error, addr_index, i, j;
  140. op->nr_iterations++;
  141. _enter("OP=%x+%x,%llx,%u{%lx},%u{%lx},%d,%d",
  142. op->debug_id, op->nr_iterations, op->volume->vid,
  143. op->server_index, op->untried_servers,
  144. op->addr_index, op->addr_tried,
  145. error, abort_code);
  146. if (op->flags & AFS_OPERATION_STOP) {
  147. trace_afs_rotate(op, afs_rotate_trace_stopped, 0);
  148. _leave(" = f [stopped]");
  149. return false;
  150. }
  151. if (op->nr_iterations == 0)
  152. goto start;
  153. WRITE_ONCE(op->estate->addresses->addrs[op->addr_index].last_error, error);
  154. trace_afs_rotate(op, afs_rotate_trace_iter, op->call_error);
  155. /* Evaluate the result of the previous operation, if there was one. */
  156. switch (op->call_error) {
  157. case 0:
  158. clear_bit(AFS_SE_VOLUME_OFFLINE,
  159. &op->server_list->servers[op->server_index].flags);
  160. clear_bit(AFS_SE_VOLUME_BUSY,
  161. &op->server_list->servers[op->server_index].flags);
  162. op->cumul_error.responded = true;
  163. /* We succeeded, but we may need to redo the op from another
  164. * server if we're looking at a set of RO volumes where some of
  165. * the servers have not yet been brought up to date lest we
  166. * regress the data. We only switch to the new version once
  167. * >=50% of the servers are updated.
  168. */
  169. error = afs_update_volume_state(op);
  170. if (error != 0) {
  171. if (error == 1) {
  172. afs_sleep_and_retry(op);
  173. goto restart_from_beginning;
  174. }
  175. afs_op_set_error(op, error);
  176. goto failed;
  177. }
  178. fallthrough;
  179. default:
  180. /* Success or local failure. Stop. */
  181. afs_op_set_error(op, error);
  182. op->flags |= AFS_OPERATION_STOP;
  183. trace_afs_rotate(op, afs_rotate_trace_stop, error);
  184. _leave(" = f [okay/local %d]", error);
  185. return false;
  186. case -ECONNABORTED:
  187. /* The far side rejected the operation on some grounds. This
  188. * might involve the server being busy or the volume having been moved.
  189. *
  190. * Note that various V* errors should not be sent to a cache manager
  191. * by a fileserver as they should be translated to more modern UAE*
  192. * errors instead. IBM AFS and OpenAFS fileservers, however, do leak
  193. * these abort codes.
  194. */
  195. trace_afs_rotate(op, afs_rotate_trace_aborted, abort_code);
  196. op->cumul_error.responded = true;
  197. switch (abort_code) {
  198. case VNOVOL:
  199. /* This fileserver doesn't know about the volume.
  200. * - May indicate that the VL is wrong - retry once and compare
  201. * the results.
  202. * - May indicate that the fileserver couldn't attach to the vol.
  203. * - The volume might have been temporarily removed so that it can
  204. * be replaced by a volume restore. "vos" might have ended one
  205. * transaction and has yet to create the next.
  206. * - The volume might not be blessed or might not be in-service
  207. * (administrative action).
  208. */
  209. if (op->flags & AFS_OPERATION_VNOVOL) {
  210. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  211. goto next_server;
  212. }
  213. write_lock(&op->volume->servers_lock);
  214. op->server_list->vnovol_mask |= 1 << op->server_index;
  215. write_unlock(&op->volume->servers_lock);
  216. set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
  217. error = afs_check_volume_status(op->volume, op);
  218. if (error < 0) {
  219. afs_op_set_error(op, error);
  220. goto failed;
  221. }
  222. if (test_bit(AFS_VOLUME_DELETED, &op->volume->flags)) {
  223. afs_op_set_error(op, -ENOMEDIUM);
  224. goto failed;
  225. }
  226. /* If the server list didn't change, then assume that
  227. * it's the fileserver having trouble.
  228. */
  229. if (rcu_access_pointer(op->volume->servers) == op->server_list) {
  230. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  231. goto next_server;
  232. }
  233. /* Try again */
  234. op->flags |= AFS_OPERATION_VNOVOL;
  235. _leave(" = t [vnovol]");
  236. return true;
  237. case VVOLEXISTS:
  238. case VONLINE:
  239. /* These should not be returned from the fileserver. */
  240. pr_warn("Fileserver returned unexpected abort %d\n",
  241. abort_code);
  242. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  243. goto next_server;
  244. case VNOSERVICE:
  245. /* Prior to AFS 3.2 VNOSERVICE was returned from the fileserver
  246. * if the volume was neither in-service nor administratively
  247. * blessed. All usage was replaced by VNOVOL because AFS 3.1 and
  248. * earlier cache managers did not handle VNOSERVICE and assumed
  249. * it was the client OSes errno 105.
  250. *
  251. * Starting with OpenAFS 1.4.8 VNOSERVICE was repurposed as the
  252. * fileserver idle dead time error which was sent in place of
  253. * RX_CALL_TIMEOUT (-3). The error was intended to be sent if the
  254. * fileserver took too long to send a reply to the client.
  255. * RX_CALL_TIMEOUT would have caused the cache manager to mark the
  256. * server down whereas VNOSERVICE since AFS 3.2 would cause cache
  257. * manager to temporarily (up to 15 minutes) mark the volume
  258. * instance as unusable.
  259. *
  260. * The idle dead logic resulted in cache inconsistency since a
  261. * state changing call that the cache manager assumed was dead
  262. * could still be processed to completion by the fileserver. This
  263. * logic was removed in OpenAFS 1.8.0 and VNOSERVICE is no longer
  264. * returned. However, many 1.4.8 through 1.6.24 fileservers are
  265. * still in existence.
  266. *
  267. * AuriStorFS fileservers have never returned VNOSERVICE.
  268. *
  269. * VNOSERVICE should be treated as an alias for RX_CALL_TIMEOUT.
  270. */
  271. case RX_CALL_TIMEOUT:
  272. afs_op_accumulate_error(op, -ETIMEDOUT, abort_code);
  273. goto next_server;
  274. case VSALVAGING: /* This error should not be leaked to cache managers
  275. * but is from OpenAFS demand attach fileservers.
  276. * It should be treated as an alias for VOFFLINE.
  277. */
  278. case VSALVAGE: /* VSALVAGE should be treated as a synonym of VOFFLINE */
  279. case VOFFLINE:
  280. /* The volume is in use by the volserver or another volume utility
  281. * for an operation that might alter the contents. The volume is
  282. * expected to come back but it might take a long time (could be
  283. * days).
  284. */
  285. if (!test_and_set_bit(AFS_SE_VOLUME_OFFLINE,
  286. &op->server_list->servers[op->server_index].flags)) {
  287. afs_busy(op, abort_code);
  288. clear_bit(AFS_SE_VOLUME_BUSY,
  289. &op->server_list->servers[op->server_index].flags);
  290. }
  291. if (op->flags & AFS_OPERATION_NO_VSLEEP) {
  292. afs_op_set_error(op, -EADV);
  293. goto failed;
  294. }
  295. goto busy;
  296. case VRESTARTING: /* The fileserver is either shutting down or starting up. */
  297. case VBUSY:
  298. /* The volume is in use by the volserver or another volume
  299. * utility for an operation that is not expected to alter the
  300. * contents of the volume. VBUSY does not need to be returned
  301. * for a ROVOL or BACKVOL bound to an ITBusy volserver
  302. * transaction. The fileserver is permitted to continue serving
  303. * content from ROVOLs and BACKVOLs during an ITBusy transaction
  304. * because the content will not change. However, many fileserver
  305. * releases do return VBUSY for ROVOL and BACKVOL instances under
  306. * many circumstances.
  307. *
  308. * Retry after going round all the servers unless we have a file
  309. * lock we need to maintain.
  310. */
  311. if (op->flags & AFS_OPERATION_NO_VSLEEP) {
  312. afs_op_set_error(op, -EBUSY);
  313. goto failed;
  314. }
  315. if (!test_and_set_bit(AFS_SE_VOLUME_BUSY,
  316. &op->server_list->servers[op->server_index].flags)) {
  317. afs_busy(op, abort_code);
  318. clear_bit(AFS_SE_VOLUME_OFFLINE,
  319. &op->server_list->servers[op->server_index].flags);
  320. }
  321. busy:
  322. if (op->flags & AFS_OPERATION_CUR_ONLY) {
  323. if (!afs_sleep_and_retry(op))
  324. goto failed;
  325. /* Retry with same server & address */
  326. _leave(" = t [vbusy]");
  327. return true;
  328. }
  329. op->flags |= AFS_OPERATION_VBUSY;
  330. goto next_server;
  331. case VMOVED:
  332. /* The volume migrated to another server. We consider
  333. * consider all locks and callbacks broken and request
  334. * an update from the VLDB.
  335. *
  336. * We also limit the number of VMOVED hops we will
  337. * honour, just in case someone sets up a loop.
  338. */
  339. if (op->flags & AFS_OPERATION_VMOVED) {
  340. afs_op_set_error(op, -EREMOTEIO);
  341. goto failed;
  342. }
  343. op->flags |= AFS_OPERATION_VMOVED;
  344. set_bit(AFS_VOLUME_WAIT, &op->volume->flags);
  345. set_bit(AFS_VOLUME_NEEDS_UPDATE, &op->volume->flags);
  346. error = afs_check_volume_status(op->volume, op);
  347. if (error < 0) {
  348. afs_op_set_error(op, error);
  349. goto failed;
  350. }
  351. /* If the server list didn't change, then the VLDB is
  352. * out of sync with the fileservers. This is hopefully
  353. * a temporary condition, however, so we don't want to
  354. * permanently block access to the file.
  355. *
  356. * TODO: Try other fileservers if we can.
  357. *
  358. * TODO: Retry a few times with sleeps.
  359. */
  360. if (rcu_access_pointer(op->volume->servers) == op->server_list) {
  361. afs_op_accumulate_error(op, -ENOMEDIUM, abort_code);
  362. goto failed;
  363. }
  364. goto restart_from_beginning;
  365. case UAEIO:
  366. case VIO:
  367. afs_op_accumulate_error(op, -EREMOTEIO, abort_code);
  368. if (op->volume->type != AFSVL_RWVOL)
  369. goto next_server;
  370. goto failed;
  371. case VDISKFULL:
  372. case UAENOSPC:
  373. /* The partition is full. Only applies to RWVOLs.
  374. * Translate locally and return ENOSPC.
  375. * No replicas to failover to.
  376. */
  377. afs_op_set_error(op, -ENOSPC);
  378. goto failed_but_online;
  379. case VOVERQUOTA:
  380. case UAEDQUOT:
  381. /* Volume is full. Only applies to RWVOLs.
  382. * Translate locally and return EDQUOT.
  383. * No replicas to failover to.
  384. */
  385. afs_op_set_error(op, -EDQUOT);
  386. goto failed_but_online;
  387. default:
  388. afs_op_accumulate_error(op, error, abort_code);
  389. failed_but_online:
  390. clear_bit(AFS_SE_VOLUME_OFFLINE,
  391. &op->server_list->servers[op->server_index].flags);
  392. clear_bit(AFS_SE_VOLUME_BUSY,
  393. &op->server_list->servers[op->server_index].flags);
  394. goto failed;
  395. }
  396. case -ETIMEDOUT:
  397. case -ETIME:
  398. if (afs_op_error(op) != -EDESTADDRREQ)
  399. goto iterate_address;
  400. fallthrough;
  401. case -ERFKILL:
  402. case -EADDRNOTAVAIL:
  403. case -ENETUNREACH:
  404. case -EHOSTUNREACH:
  405. case -EHOSTDOWN:
  406. case -ECONNREFUSED:
  407. _debug("no conn");
  408. afs_op_accumulate_error(op, error, 0);
  409. goto iterate_address;
  410. case -ENETRESET:
  411. pr_warn("kAFS: Peer reset %s (op=%x)\n",
  412. op->type ? op->type->name : "???", op->debug_id);
  413. fallthrough;
  414. case -ECONNRESET:
  415. _debug("call reset");
  416. afs_op_set_error(op, error);
  417. goto failed;
  418. }
  419. restart_from_beginning:
  420. trace_afs_rotate(op, afs_rotate_trace_restart, 0);
  421. _debug("restart");
  422. op->estate = NULL;
  423. op->server = NULL;
  424. afs_clear_server_states(op);
  425. op->server_states = NULL;
  426. afs_put_serverlist(op->net, op->server_list);
  427. op->server_list = NULL;
  428. start:
  429. _debug("start");
  430. ASSERTCMP(op->estate, ==, NULL);
  431. /* See if we need to do an update of the volume record. Note that the
  432. * volume may have moved or even have been deleted.
  433. */
  434. error = afs_check_volume_status(op->volume, op);
  435. trace_afs_rotate(op, afs_rotate_trace_check_vol_status, error);
  436. if (error < 0) {
  437. afs_op_set_error(op, error);
  438. goto failed;
  439. }
  440. if (!afs_start_fs_iteration(op, vnode))
  441. goto failed;
  442. _debug("__ VOL %llx __", op->volume->vid);
  443. pick_server:
  444. _debug("pick [%lx]", op->untried_servers);
  445. ASSERTCMP(op->estate, ==, NULL);
  446. error = afs_wait_for_fs_probes(op, op->server_states,
  447. !(op->flags & AFS_OPERATION_UNINTR));
  448. switch (error) {
  449. case 0: /* No untried responsive servers and no outstanding probes */
  450. trace_afs_rotate(op, afs_rotate_trace_probe_none, 0);
  451. goto no_more_servers;
  452. case 1: /* Got a response */
  453. trace_afs_rotate(op, afs_rotate_trace_probe_response, 0);
  454. break;
  455. case 2: /* Probe data superseded */
  456. trace_afs_rotate(op, afs_rotate_trace_probe_superseded, 0);
  457. goto restart_from_beginning;
  458. default:
  459. trace_afs_rotate(op, afs_rotate_trace_probe_error, error);
  460. afs_op_set_error(op, error);
  461. goto failed;
  462. }
  463. /* Pick the untried server with the highest priority untried endpoint.
  464. * If we have outstanding callbacks, we stick with the server we're
  465. * already using if we can.
  466. */
  467. if (op->server) {
  468. _debug("server %u", op->server_index);
  469. if (test_bit(op->server_index, &op->untried_servers))
  470. goto selected_server;
  471. op->server = NULL;
  472. _debug("no server");
  473. }
  474. rcu_read_lock();
  475. op->server_index = -1;
  476. best_prio = -1;
  477. for (i = 0; i < op->server_list->nr_servers; i++) {
  478. struct afs_endpoint_state *es;
  479. struct afs_server_entry *se = &op->server_list->servers[i];
  480. struct afs_addr_list *sal;
  481. struct afs_server *s = se->server;
  482. if (!test_bit(i, &op->untried_servers) ||
  483. test_bit(AFS_SE_EXCLUDED, &se->flags) ||
  484. !test_bit(AFS_SERVER_FL_RESPONDING, &s->flags))
  485. continue;
  486. es = op->server_states[i].endpoint_state;
  487. sal = es->addresses;
  488. afs_get_address_preferences_rcu(op->net, sal);
  489. for (j = 0; j < sal->nr_addrs; j++) {
  490. if (es->failed_set & (1 << j))
  491. continue;
  492. if (!sal->addrs[j].peer)
  493. continue;
  494. if (sal->addrs[j].prio > best_prio) {
  495. op->server_index = i;
  496. best_prio = sal->addrs[j].prio;
  497. }
  498. }
  499. }
  500. rcu_read_unlock();
  501. if (op->server_index == -1)
  502. goto no_more_servers;
  503. selected_server:
  504. trace_afs_rotate(op, afs_rotate_trace_selected_server, best_prio);
  505. _debug("use %d prio %u", op->server_index, best_prio);
  506. __clear_bit(op->server_index, &op->untried_servers);
  507. /* We're starting on a different fileserver from the list. We need to
  508. * check it, create a callback intercept, find its address list and
  509. * probe its capabilities before we use it.
  510. */
  511. ASSERTCMP(op->estate, ==, NULL);
  512. server = op->server_list->servers[op->server_index].server;
  513. if (!afs_check_server_record(op, server, op->key))
  514. goto failed;
  515. _debug("USING SERVER: %pU", &server->uuid);
  516. op->flags |= AFS_OPERATION_RETRY_SERVER;
  517. op->server = server;
  518. if (vnode->cb_server != server) {
  519. vnode->cb_server = server;
  520. vnode->cb_v_check = atomic_read(&vnode->volume->cb_v_break);
  521. atomic64_set(&vnode->cb_expires_at, AFS_NO_CB_PROMISE);
  522. }
  523. retry_server:
  524. op->addr_tried = 0;
  525. op->addr_index = -1;
  526. iterate_address:
  527. /* Iterate over the current server's address list to try and find an
  528. * address on which it will respond to us.
  529. */
  530. op->estate = op->server_states[op->server_index].endpoint_state;
  531. set = READ_ONCE(op->estate->responsive_set);
  532. failed = READ_ONCE(op->estate->failed_set);
  533. _debug("iterate ES=%x rs=%lx fs=%lx", op->estate->probe_seq, set, failed);
  534. set &= ~(failed | op->addr_tried);
  535. trace_afs_rotate(op, afs_rotate_trace_iterate_addr, set);
  536. if (!set)
  537. goto wait_for_more_probe_results;
  538. alist = op->estate->addresses;
  539. best_prio = -1;
  540. addr_index = 0;
  541. for (i = 0; i < alist->nr_addrs; i++) {
  542. if (!(set & (1 << i)))
  543. continue;
  544. if (alist->addrs[i].prio > best_prio) {
  545. addr_index = i;
  546. best_prio = alist->addrs[i].prio;
  547. }
  548. }
  549. alist->preferred = addr_index;
  550. op->addr_index = addr_index;
  551. set_bit(addr_index, &op->addr_tried);
  552. op->volsync.creation = TIME64_MIN;
  553. op->volsync.update = TIME64_MIN;
  554. op->call_responded = false;
  555. _debug("address [%u] %u/%u %pISp",
  556. op->server_index, addr_index, alist->nr_addrs,
  557. rxrpc_kernel_remote_addr(alist->addrs[op->addr_index].peer));
  558. _leave(" = t");
  559. return true;
  560. wait_for_more_probe_results:
  561. error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
  562. !(op->flags & AFS_OPERATION_UNINTR));
  563. if (error == 1)
  564. goto iterate_address;
  565. if (!error)
  566. goto restart_from_beginning;
  567. /* We've now had a failure to respond on all of a server's addresses -
  568. * immediately probe them again and consider retrying the server.
  569. */
  570. trace_afs_rotate(op, afs_rotate_trace_probe_fileserver, 0);
  571. afs_probe_fileserver(op->net, op->server);
  572. if (op->flags & AFS_OPERATION_RETRY_SERVER) {
  573. error = afs_wait_for_one_fs_probe(op->server, op->estate, op->addr_tried,
  574. !(op->flags & AFS_OPERATION_UNINTR));
  575. switch (error) {
  576. case 1:
  577. op->flags &= ~AFS_OPERATION_RETRY_SERVER;
  578. trace_afs_rotate(op, afs_rotate_trace_retry_server, 1);
  579. goto retry_server;
  580. case 0:
  581. trace_afs_rotate(op, afs_rotate_trace_retry_server, 0);
  582. goto restart_from_beginning;
  583. case -ERESTARTSYS:
  584. afs_op_set_error(op, error);
  585. goto failed;
  586. case -ETIME:
  587. case -EDESTADDRREQ:
  588. goto next_server;
  589. }
  590. }
  591. next_server:
  592. trace_afs_rotate(op, afs_rotate_trace_next_server, 0);
  593. _debug("next");
  594. op->estate = NULL;
  595. goto pick_server;
  596. no_more_servers:
  597. /* That's all the servers poked to no good effect. Try again if some
  598. * of them were busy.
  599. */
  600. trace_afs_rotate(op, afs_rotate_trace_no_more_servers, 0);
  601. if (op->flags & AFS_OPERATION_VBUSY) {
  602. afs_sleep_and_retry(op);
  603. op->flags &= ~AFS_OPERATION_VBUSY;
  604. goto restart_from_beginning;
  605. }
  606. rcu_read_lock();
  607. for (i = 0; i < op->server_list->nr_servers; i++) {
  608. struct afs_endpoint_state *estate;
  609. estate = op->server_states[i].endpoint_state;
  610. error = READ_ONCE(estate->error);
  611. if (error < 0)
  612. afs_op_accumulate_error(op, error, estate->abort_code);
  613. }
  614. rcu_read_unlock();
  615. failed:
  616. trace_afs_rotate(op, afs_rotate_trace_failed, 0);
  617. op->flags |= AFS_OPERATION_STOP;
  618. op->estate = NULL;
  619. _leave(" = f [failed %d]", afs_op_error(op));
  620. return false;
  621. }
  622. /*
  623. * Dump cursor state in the case of the error being EDESTADDRREQ.
  624. */
  625. void afs_dump_edestaddrreq(const struct afs_operation *op)
  626. {
  627. static int count;
  628. int i;
  629. if (!IS_ENABLED(CONFIG_AFS_DEBUG_CURSOR) || count > 3)
  630. return;
  631. count++;
  632. rcu_read_lock();
  633. pr_notice("EDESTADDR occurred\n");
  634. pr_notice("OP: cbb=%x cbb2=%x fl=%x err=%hd\n",
  635. op->file[0].cb_break_before,
  636. op->file[1].cb_break_before, op->flags, op->cumul_error.error);
  637. pr_notice("OP: ut=%lx ix=%d ni=%u\n",
  638. op->untried_servers, op->server_index, op->nr_iterations);
  639. pr_notice("OP: call er=%d ac=%d r=%u\n",
  640. op->call_error, op->call_abort_code, op->call_responded);
  641. if (op->server_list) {
  642. const struct afs_server_list *sl = op->server_list;
  643. pr_notice("FC: SL nr=%u vnov=%hx\n",
  644. sl->nr_servers, sl->vnovol_mask);
  645. for (i = 0; i < sl->nr_servers; i++) {
  646. const struct afs_server *s = sl->servers[i].server;
  647. const struct afs_endpoint_state *e =
  648. rcu_dereference(s->endpoint_state);
  649. const struct afs_addr_list *a = e->addresses;
  650. pr_notice("FC: server fl=%lx av=%u %pU\n",
  651. s->flags, s->addr_version, &s->uuid);
  652. pr_notice("FC: - pq=%x R=%lx F=%lx\n",
  653. e->probe_seq, e->responsive_set, e->failed_set);
  654. if (a) {
  655. pr_notice("FC: - av=%u nr=%u/%u/%u pr=%u\n",
  656. a->version,
  657. a->nr_ipv4, a->nr_addrs, a->max_addrs,
  658. a->preferred);
  659. if (a == e->addresses)
  660. pr_notice("FC: - current\n");
  661. }
  662. }
  663. }
  664. pr_notice("AC: t=%lx ax=%d\n", op->addr_tried, op->addr_index);
  665. rcu_read_unlock();
  666. }