padata.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * padata.c - generic interface to process data streams in parallel
  4. *
  5. * See Documentation/core-api/padata.rst for more information.
  6. *
  7. * Copyright (C) 2008, 2009 secunet Security Networks AG
  8. * Copyright (C) 2008, 2009 Steffen Klassert <steffen.klassert@secunet.com>
  9. *
  10. * Copyright (c) 2020 Oracle and/or its affiliates.
  11. * Author: Daniel Jordan <daniel.m.jordan@oracle.com>
  12. */
  13. #include <linux/completion.h>
  14. #include <linux/export.h>
  15. #include <linux/cpumask.h>
  16. #include <linux/err.h>
  17. #include <linux/cpu.h>
  18. #include <linux/padata.h>
  19. #include <linux/mutex.h>
  20. #include <linux/sched.h>
  21. #include <linux/slab.h>
  22. #include <linux/sysfs.h>
  23. #include <linux/rcupdate.h>
  24. #define PADATA_WORK_ONSTACK 1 /* Work's memory is on stack */
  25. struct padata_work {
  26. struct work_struct pw_work;
  27. struct list_head pw_list; /* padata_free_works linkage */
  28. void *pw_data;
  29. };
  30. static DEFINE_SPINLOCK(padata_works_lock);
  31. static struct padata_work *padata_works;
  32. static LIST_HEAD(padata_free_works);
  33. struct padata_mt_job_state {
  34. spinlock_t lock;
  35. struct completion completion;
  36. struct padata_mt_job *job;
  37. int nworks;
  38. int nworks_fini;
  39. unsigned long chunk_size;
  40. };
  41. static void padata_free_pd(struct parallel_data *pd);
  42. static void __init padata_mt_helper(struct work_struct *work);
  43. static inline void padata_get_pd(struct parallel_data *pd)
  44. {
  45. refcount_inc(&pd->refcnt);
  46. }
  47. static inline void padata_put_pd_cnt(struct parallel_data *pd, int cnt)
  48. {
  49. if (refcount_sub_and_test(cnt, &pd->refcnt))
  50. padata_free_pd(pd);
  51. }
  52. static inline void padata_put_pd(struct parallel_data *pd)
  53. {
  54. padata_put_pd_cnt(pd, 1);
  55. }
  56. static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
  57. {
  58. int cpu, target_cpu;
  59. target_cpu = cpumask_first(pd->cpumask.pcpu);
  60. for (cpu = 0; cpu < cpu_index; cpu++)
  61. target_cpu = cpumask_next(target_cpu, pd->cpumask.pcpu);
  62. return target_cpu;
  63. }
  64. static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
  65. {
  66. /*
  67. * Hash the sequence numbers to the cpus by taking
  68. * seq_nr mod. number of cpus in use.
  69. */
  70. int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
  71. return padata_index_to_cpu(pd, cpu_index);
  72. }
  73. static struct padata_work *padata_work_alloc(void)
  74. {
  75. struct padata_work *pw;
  76. lockdep_assert_held(&padata_works_lock);
  77. if (list_empty(&padata_free_works))
  78. return NULL; /* No more work items allowed to be queued. */
  79. pw = list_first_entry(&padata_free_works, struct padata_work, pw_list);
  80. list_del(&pw->pw_list);
  81. return pw;
  82. }
  83. /*
  84. * This function is marked __ref because this function may be optimized in such
  85. * a way that it directly refers to work_fn's address, which causes modpost to
  86. * complain when work_fn is marked __init. This scenario was observed with clang
  87. * LTO, where padata_work_init() was optimized to refer directly to
  88. * padata_mt_helper() because the calls to padata_work_init() with other work_fn
  89. * values were eliminated or inlined.
  90. */
  91. static void __ref padata_work_init(struct padata_work *pw, work_func_t work_fn,
  92. void *data, int flags)
  93. {
  94. if (flags & PADATA_WORK_ONSTACK)
  95. INIT_WORK_ONSTACK(&pw->pw_work, work_fn);
  96. else
  97. INIT_WORK(&pw->pw_work, work_fn);
  98. pw->pw_data = data;
  99. }
  100. static int __init padata_work_alloc_mt(int nworks, void *data,
  101. struct list_head *head)
  102. {
  103. int i;
  104. spin_lock_bh(&padata_works_lock);
  105. /* Start at 1 because the current task participates in the job. */
  106. for (i = 1; i < nworks; ++i) {
  107. struct padata_work *pw = padata_work_alloc();
  108. if (!pw)
  109. break;
  110. padata_work_init(pw, padata_mt_helper, data, 0);
  111. list_add(&pw->pw_list, head);
  112. }
  113. spin_unlock_bh(&padata_works_lock);
  114. return i;
  115. }
  116. static void padata_work_free(struct padata_work *pw)
  117. {
  118. lockdep_assert_held(&padata_works_lock);
  119. list_add(&pw->pw_list, &padata_free_works);
  120. }
  121. static void __init padata_works_free(struct list_head *works)
  122. {
  123. struct padata_work *cur, *next;
  124. if (list_empty(works))
  125. return;
  126. spin_lock_bh(&padata_works_lock);
  127. list_for_each_entry_safe(cur, next, works, pw_list) {
  128. list_del(&cur->pw_list);
  129. padata_work_free(cur);
  130. }
  131. spin_unlock_bh(&padata_works_lock);
  132. }
  133. static void padata_parallel_worker(struct work_struct *parallel_work)
  134. {
  135. struct padata_work *pw = container_of(parallel_work, struct padata_work,
  136. pw_work);
  137. struct padata_priv *padata = pw->pw_data;
  138. local_bh_disable();
  139. padata->parallel(padata);
  140. spin_lock(&padata_works_lock);
  141. padata_work_free(pw);
  142. spin_unlock(&padata_works_lock);
  143. local_bh_enable();
  144. }
  145. /**
  146. * padata_do_parallel - padata parallelization function
  147. *
  148. * @ps: padatashell
  149. * @padata: object to be parallelized
  150. * @cb_cpu: pointer to the CPU that the serialization callback function should
  151. * run on. If it's not in the serial cpumask of @pinst
  152. * (i.e. cpumask.cbcpu), this function selects a fallback CPU and if
  153. * none found, returns -EINVAL.
  154. *
  155. * The parallelization callback function will run with BHs off.
  156. * Note: Every object which is parallelized by padata_do_parallel
  157. * must be seen by padata_do_serial.
  158. *
  159. * Return: 0 on success or else negative error code.
  160. */
  161. int padata_do_parallel(struct padata_shell *ps,
  162. struct padata_priv *padata, int *cb_cpu)
  163. {
  164. struct padata_instance *pinst = ps->pinst;
  165. int i, cpu, cpu_index, err;
  166. struct parallel_data *pd;
  167. struct padata_work *pw;
  168. rcu_read_lock_bh();
  169. pd = rcu_dereference_bh(ps->pd);
  170. err = -EINVAL;
  171. if (!(pinst->flags & PADATA_INIT) || pinst->flags & PADATA_INVALID)
  172. goto out;
  173. if (!cpumask_test_cpu(*cb_cpu, pd->cpumask.cbcpu)) {
  174. if (cpumask_empty(pd->cpumask.cbcpu))
  175. goto out;
  176. /* Select an alternate fallback CPU and notify the caller. */
  177. cpu_index = *cb_cpu % cpumask_weight(pd->cpumask.cbcpu);
  178. cpu = cpumask_first(pd->cpumask.cbcpu);
  179. for (i = 0; i < cpu_index; i++)
  180. cpu = cpumask_next(cpu, pd->cpumask.cbcpu);
  181. *cb_cpu = cpu;
  182. }
  183. err = -EBUSY;
  184. if ((pinst->flags & PADATA_RESET))
  185. goto out;
  186. padata_get_pd(pd);
  187. padata->pd = pd;
  188. padata->cb_cpu = *cb_cpu;
  189. spin_lock(&padata_works_lock);
  190. padata->seq_nr = ++pd->seq_nr;
  191. pw = padata_work_alloc();
  192. spin_unlock(&padata_works_lock);
  193. if (!pw) {
  194. /* Maximum works limit exceeded, run in the current task. */
  195. padata->parallel(padata);
  196. }
  197. rcu_read_unlock_bh();
  198. if (pw) {
  199. padata_work_init(pw, padata_parallel_worker, padata, 0);
  200. queue_work(pinst->parallel_wq, &pw->pw_work);
  201. }
  202. return 0;
  203. out:
  204. rcu_read_unlock_bh();
  205. return err;
  206. }
  207. EXPORT_SYMBOL(padata_do_parallel);
  208. /*
  209. * padata_find_next - Find the next object that needs serialization.
  210. *
  211. * Return:
  212. * * A pointer to the control struct of the next object that needs
  213. * serialization, if present in one of the percpu reorder queues.
  214. * * NULL, if the next object that needs serialization will
  215. * be parallel processed by another cpu and is not yet present in
  216. * the cpu's reorder queue.
  217. */
  218. static struct padata_priv *padata_find_next(struct parallel_data *pd,
  219. bool remove_object)
  220. {
  221. struct padata_priv *padata;
  222. struct padata_list *reorder;
  223. int cpu = pd->cpu;
  224. reorder = per_cpu_ptr(pd->reorder_list, cpu);
  225. spin_lock(&reorder->lock);
  226. if (list_empty(&reorder->list)) {
  227. spin_unlock(&reorder->lock);
  228. return NULL;
  229. }
  230. padata = list_entry(reorder->list.next, struct padata_priv, list);
  231. /*
  232. * Checks the rare case where two or more parallel jobs have hashed to
  233. * the same CPU and one of the later ones finishes first.
  234. */
  235. if (padata->seq_nr != pd->processed) {
  236. spin_unlock(&reorder->lock);
  237. return NULL;
  238. }
  239. if (remove_object) {
  240. list_del_init(&padata->list);
  241. ++pd->processed;
  242. pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
  243. }
  244. spin_unlock(&reorder->lock);
  245. return padata;
  246. }
  247. static void padata_reorder(struct parallel_data *pd)
  248. {
  249. struct padata_instance *pinst = pd->ps->pinst;
  250. int cb_cpu;
  251. struct padata_priv *padata;
  252. struct padata_serial_queue *squeue;
  253. struct padata_list *reorder;
  254. /*
  255. * We need to ensure that only one cpu can work on dequeueing of
  256. * the reorder queue the time. Calculating in which percpu reorder
  257. * queue the next object will arrive takes some time. A spinlock
  258. * would be highly contended. Also it is not clear in which order
  259. * the objects arrive to the reorder queues. So a cpu could wait to
  260. * get the lock just to notice that there is nothing to do at the
  261. * moment. Therefore we use a trylock and let the holder of the lock
  262. * care for all the objects enqueued during the holdtime of the lock.
  263. */
  264. if (!spin_trylock_bh(&pd->lock))
  265. return;
  266. while (1) {
  267. padata = padata_find_next(pd, true);
  268. /*
  269. * If the next object that needs serialization is parallel
  270. * processed by another cpu and is still on it's way to the
  271. * cpu's reorder queue, nothing to do for now.
  272. */
  273. if (!padata)
  274. break;
  275. cb_cpu = padata->cb_cpu;
  276. squeue = per_cpu_ptr(pd->squeue, cb_cpu);
  277. spin_lock(&squeue->serial.lock);
  278. list_add_tail(&padata->list, &squeue->serial.list);
  279. spin_unlock(&squeue->serial.lock);
  280. queue_work_on(cb_cpu, pinst->serial_wq, &squeue->work);
  281. }
  282. spin_unlock_bh(&pd->lock);
  283. /*
  284. * The next object that needs serialization might have arrived to
  285. * the reorder queues in the meantime.
  286. *
  287. * Ensure reorder queue is read after pd->lock is dropped so we see
  288. * new objects from another task in padata_do_serial. Pairs with
  289. * smp_mb in padata_do_serial.
  290. */
  291. smp_mb();
  292. reorder = per_cpu_ptr(pd->reorder_list, pd->cpu);
  293. if (!list_empty(&reorder->list) && padata_find_next(pd, false)) {
  294. /*
  295. * Other context(eg. the padata_serial_worker) can finish the request.
  296. * To avoid UAF issue, add pd ref here, and put pd ref after reorder_work finish.
  297. */
  298. padata_get_pd(pd);
  299. queue_work(pinst->serial_wq, &pd->reorder_work);
  300. }
  301. }
  302. static void invoke_padata_reorder(struct work_struct *work)
  303. {
  304. struct parallel_data *pd;
  305. local_bh_disable();
  306. pd = container_of(work, struct parallel_data, reorder_work);
  307. padata_reorder(pd);
  308. local_bh_enable();
  309. /* Pairs with putting the reorder_work in the serial_wq */
  310. padata_put_pd(pd);
  311. }
  312. static void padata_serial_worker(struct work_struct *serial_work)
  313. {
  314. struct padata_serial_queue *squeue;
  315. struct parallel_data *pd;
  316. LIST_HEAD(local_list);
  317. int cnt;
  318. local_bh_disable();
  319. squeue = container_of(serial_work, struct padata_serial_queue, work);
  320. pd = squeue->pd;
  321. spin_lock(&squeue->serial.lock);
  322. list_replace_init(&squeue->serial.list, &local_list);
  323. spin_unlock(&squeue->serial.lock);
  324. cnt = 0;
  325. while (!list_empty(&local_list)) {
  326. struct padata_priv *padata;
  327. padata = list_entry(local_list.next,
  328. struct padata_priv, list);
  329. list_del_init(&padata->list);
  330. padata->serial(padata);
  331. cnt++;
  332. }
  333. local_bh_enable();
  334. padata_put_pd_cnt(pd, cnt);
  335. }
  336. /**
  337. * padata_do_serial - padata serialization function
  338. *
  339. * @padata: object to be serialized.
  340. *
  341. * padata_do_serial must be called for every parallelized object.
  342. * The serialization callback function will run with BHs off.
  343. */
  344. void padata_do_serial(struct padata_priv *padata)
  345. {
  346. struct parallel_data *pd = padata->pd;
  347. int hashed_cpu = padata_cpu_hash(pd, padata->seq_nr);
  348. struct padata_list *reorder = per_cpu_ptr(pd->reorder_list, hashed_cpu);
  349. struct padata_priv *cur;
  350. struct list_head *pos;
  351. spin_lock(&reorder->lock);
  352. /* Sort in ascending order of sequence number. */
  353. list_for_each_prev(pos, &reorder->list) {
  354. cur = list_entry(pos, struct padata_priv, list);
  355. /* Compare by difference to consider integer wrap around */
  356. if ((signed int)(cur->seq_nr - padata->seq_nr) < 0)
  357. break;
  358. }
  359. list_add(&padata->list, pos);
  360. spin_unlock(&reorder->lock);
  361. /*
  362. * Ensure the addition to the reorder list is ordered correctly
  363. * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
  364. * in padata_reorder.
  365. */
  366. smp_mb();
  367. padata_reorder(pd);
  368. }
  369. EXPORT_SYMBOL(padata_do_serial);
  370. static int padata_setup_cpumasks(struct padata_instance *pinst)
  371. {
  372. struct workqueue_attrs *attrs;
  373. int err;
  374. attrs = alloc_workqueue_attrs();
  375. if (!attrs)
  376. return -ENOMEM;
  377. /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
  378. cpumask_copy(attrs->cpumask, pinst->cpumask.pcpu);
  379. err = apply_workqueue_attrs(pinst->parallel_wq, attrs);
  380. free_workqueue_attrs(attrs);
  381. return err;
  382. }
  383. static void __init padata_mt_helper(struct work_struct *w)
  384. {
  385. struct padata_work *pw = container_of(w, struct padata_work, pw_work);
  386. struct padata_mt_job_state *ps = pw->pw_data;
  387. struct padata_mt_job *job = ps->job;
  388. bool done;
  389. spin_lock(&ps->lock);
  390. while (job->size > 0) {
  391. unsigned long start, size, end;
  392. start = job->start;
  393. /* So end is chunk size aligned if enough work remains. */
  394. size = roundup(start + 1, ps->chunk_size) - start;
  395. size = min(size, job->size);
  396. end = start + size;
  397. job->start = end;
  398. job->size -= size;
  399. spin_unlock(&ps->lock);
  400. job->thread_fn(start, end, job->fn_arg);
  401. spin_lock(&ps->lock);
  402. }
  403. ++ps->nworks_fini;
  404. done = (ps->nworks_fini == ps->nworks);
  405. spin_unlock(&ps->lock);
  406. if (done)
  407. complete(&ps->completion);
  408. }
  409. /**
  410. * padata_do_multithreaded - run a multithreaded job
  411. * @job: Description of the job.
  412. *
  413. * See the definition of struct padata_mt_job for more details.
  414. */
  415. void __init padata_do_multithreaded(struct padata_mt_job *job)
  416. {
  417. /* In case threads finish at different times. */
  418. static const unsigned long load_balance_factor = 4;
  419. struct padata_work my_work, *pw;
  420. struct padata_mt_job_state ps;
  421. LIST_HEAD(works);
  422. int nworks, nid;
  423. static atomic_t last_used_nid __initdata;
  424. if (job->size == 0)
  425. return;
  426. /* Ensure at least one thread when size < min_chunk. */
  427. nworks = max(job->size / max(job->min_chunk, job->align), 1ul);
  428. nworks = min(nworks, job->max_threads);
  429. if (nworks == 1) {
  430. /* Single thread, no coordination needed, cut to the chase. */
  431. job->thread_fn(job->start, job->start + job->size, job->fn_arg);
  432. return;
  433. }
  434. spin_lock_init(&ps.lock);
  435. init_completion(&ps.completion);
  436. ps.job = job;
  437. ps.nworks = padata_work_alloc_mt(nworks, &ps, &works);
  438. ps.nworks_fini = 0;
  439. /*
  440. * Chunk size is the amount of work a helper does per call to the
  441. * thread function. Load balance large jobs between threads by
  442. * increasing the number of chunks, guarantee at least the minimum
  443. * chunk size from the caller, and honor the caller's alignment.
  444. * Ensure chunk_size is at least 1 to prevent divide-by-0
  445. * panic in padata_mt_helper().
  446. */
  447. ps.chunk_size = job->size / (ps.nworks * load_balance_factor);
  448. ps.chunk_size = max(ps.chunk_size, job->min_chunk);
  449. ps.chunk_size = max(ps.chunk_size, 1ul);
  450. ps.chunk_size = roundup(ps.chunk_size, job->align);
  451. /*
  452. * chunk_size can be 0 if the caller sets min_chunk to 0. So force it
  453. * to at least 1 to prevent divide-by-0 panic in padata_mt_helper().`
  454. */
  455. if (!ps.chunk_size)
  456. ps.chunk_size = 1U;
  457. list_for_each_entry(pw, &works, pw_list)
  458. if (job->numa_aware) {
  459. int old_node = atomic_read(&last_used_nid);
  460. do {
  461. nid = next_node_in(old_node, node_states[N_CPU]);
  462. } while (!atomic_try_cmpxchg(&last_used_nid, &old_node, nid));
  463. queue_work_node(nid, system_unbound_wq, &pw->pw_work);
  464. } else {
  465. queue_work(system_unbound_wq, &pw->pw_work);
  466. }
  467. /* Use the current thread, which saves starting a workqueue worker. */
  468. padata_work_init(&my_work, padata_mt_helper, &ps, PADATA_WORK_ONSTACK);
  469. padata_mt_helper(&my_work.pw_work);
  470. /* Wait for all the helpers to finish. */
  471. wait_for_completion(&ps.completion);
  472. destroy_work_on_stack(&my_work.pw_work);
  473. padata_works_free(&works);
  474. }
  475. static void __padata_list_init(struct padata_list *pd_list)
  476. {
  477. INIT_LIST_HEAD(&pd_list->list);
  478. spin_lock_init(&pd_list->lock);
  479. }
  480. /* Initialize all percpu queues used by serial workers */
  481. static void padata_init_squeues(struct parallel_data *pd)
  482. {
  483. int cpu;
  484. struct padata_serial_queue *squeue;
  485. for_each_cpu(cpu, pd->cpumask.cbcpu) {
  486. squeue = per_cpu_ptr(pd->squeue, cpu);
  487. squeue->pd = pd;
  488. __padata_list_init(&squeue->serial);
  489. INIT_WORK(&squeue->work, padata_serial_worker);
  490. }
  491. }
  492. /* Initialize per-CPU reorder lists */
  493. static void padata_init_reorder_list(struct parallel_data *pd)
  494. {
  495. int cpu;
  496. struct padata_list *list;
  497. for_each_cpu(cpu, pd->cpumask.pcpu) {
  498. list = per_cpu_ptr(pd->reorder_list, cpu);
  499. __padata_list_init(list);
  500. }
  501. }
  502. /* Allocate and initialize the internal cpumask dependend resources. */
  503. static struct parallel_data *padata_alloc_pd(struct padata_shell *ps)
  504. {
  505. struct padata_instance *pinst = ps->pinst;
  506. struct parallel_data *pd;
  507. pd = kzalloc(sizeof(struct parallel_data), GFP_KERNEL);
  508. if (!pd)
  509. goto err;
  510. pd->reorder_list = alloc_percpu(struct padata_list);
  511. if (!pd->reorder_list)
  512. goto err_free_pd;
  513. pd->squeue = alloc_percpu(struct padata_serial_queue);
  514. if (!pd->squeue)
  515. goto err_free_reorder_list;
  516. pd->ps = ps;
  517. if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
  518. goto err_free_squeue;
  519. if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
  520. goto err_free_pcpu;
  521. cpumask_and(pd->cpumask.pcpu, pinst->cpumask.pcpu, cpu_online_mask);
  522. cpumask_and(pd->cpumask.cbcpu, pinst->cpumask.cbcpu, cpu_online_mask);
  523. padata_init_reorder_list(pd);
  524. padata_init_squeues(pd);
  525. pd->seq_nr = -1;
  526. refcount_set(&pd->refcnt, 1);
  527. spin_lock_init(&pd->lock);
  528. pd->cpu = cpumask_first(pd->cpumask.pcpu);
  529. INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
  530. return pd;
  531. err_free_pcpu:
  532. free_cpumask_var(pd->cpumask.pcpu);
  533. err_free_squeue:
  534. free_percpu(pd->squeue);
  535. err_free_reorder_list:
  536. free_percpu(pd->reorder_list);
  537. err_free_pd:
  538. kfree(pd);
  539. err:
  540. return NULL;
  541. }
  542. static void padata_free_pd(struct parallel_data *pd)
  543. {
  544. free_cpumask_var(pd->cpumask.pcpu);
  545. free_cpumask_var(pd->cpumask.cbcpu);
  546. free_percpu(pd->reorder_list);
  547. free_percpu(pd->squeue);
  548. kfree(pd);
  549. }
  550. static void __padata_start(struct padata_instance *pinst)
  551. {
  552. pinst->flags |= PADATA_INIT;
  553. }
  554. static void __padata_stop(struct padata_instance *pinst)
  555. {
  556. if (!(pinst->flags & PADATA_INIT))
  557. return;
  558. pinst->flags &= ~PADATA_INIT;
  559. synchronize_rcu();
  560. }
  561. /* Replace the internal control structure with a new one. */
  562. static int padata_replace_one(struct padata_shell *ps)
  563. {
  564. struct parallel_data *pd_new;
  565. pd_new = padata_alloc_pd(ps);
  566. if (!pd_new)
  567. return -ENOMEM;
  568. ps->opd = rcu_dereference_protected(ps->pd, 1);
  569. rcu_assign_pointer(ps->pd, pd_new);
  570. return 0;
  571. }
  572. static int padata_replace(struct padata_instance *pinst)
  573. {
  574. struct padata_shell *ps;
  575. int err = 0;
  576. pinst->flags |= PADATA_RESET;
  577. list_for_each_entry(ps, &pinst->pslist, list) {
  578. err = padata_replace_one(ps);
  579. if (err)
  580. break;
  581. }
  582. synchronize_rcu();
  583. list_for_each_entry_continue_reverse(ps, &pinst->pslist, list)
  584. padata_put_pd(ps->opd);
  585. pinst->flags &= ~PADATA_RESET;
  586. return err;
  587. }
  588. /* If cpumask contains no active cpu, we mark the instance as invalid. */
  589. static bool padata_validate_cpumask(struct padata_instance *pinst,
  590. const struct cpumask *cpumask)
  591. {
  592. if (!cpumask_intersects(cpumask, cpu_online_mask)) {
  593. pinst->flags |= PADATA_INVALID;
  594. return false;
  595. }
  596. pinst->flags &= ~PADATA_INVALID;
  597. return true;
  598. }
  599. static int __padata_set_cpumasks(struct padata_instance *pinst,
  600. cpumask_var_t pcpumask,
  601. cpumask_var_t cbcpumask)
  602. {
  603. int valid;
  604. int err;
  605. valid = padata_validate_cpumask(pinst, pcpumask);
  606. if (!valid) {
  607. __padata_stop(pinst);
  608. goto out_replace;
  609. }
  610. valid = padata_validate_cpumask(pinst, cbcpumask);
  611. if (!valid)
  612. __padata_stop(pinst);
  613. out_replace:
  614. cpumask_copy(pinst->cpumask.pcpu, pcpumask);
  615. cpumask_copy(pinst->cpumask.cbcpu, cbcpumask);
  616. err = padata_setup_cpumasks(pinst) ?: padata_replace(pinst);
  617. if (valid)
  618. __padata_start(pinst);
  619. return err;
  620. }
  621. /**
  622. * padata_set_cpumask - Sets specified by @cpumask_type cpumask to the value
  623. * equivalent to @cpumask.
  624. * @pinst: padata instance
  625. * @cpumask_type: PADATA_CPU_SERIAL or PADATA_CPU_PARALLEL corresponding
  626. * to parallel and serial cpumasks respectively.
  627. * @cpumask: the cpumask to use
  628. *
  629. * Return: 0 on success or negative error code
  630. */
  631. int padata_set_cpumask(struct padata_instance *pinst, int cpumask_type,
  632. cpumask_var_t cpumask)
  633. {
  634. struct cpumask *serial_mask, *parallel_mask;
  635. int err = -EINVAL;
  636. cpus_read_lock();
  637. mutex_lock(&pinst->lock);
  638. switch (cpumask_type) {
  639. case PADATA_CPU_PARALLEL:
  640. serial_mask = pinst->cpumask.cbcpu;
  641. parallel_mask = cpumask;
  642. break;
  643. case PADATA_CPU_SERIAL:
  644. parallel_mask = pinst->cpumask.pcpu;
  645. serial_mask = cpumask;
  646. break;
  647. default:
  648. goto out;
  649. }
  650. err = __padata_set_cpumasks(pinst, parallel_mask, serial_mask);
  651. out:
  652. mutex_unlock(&pinst->lock);
  653. cpus_read_unlock();
  654. return err;
  655. }
  656. EXPORT_SYMBOL(padata_set_cpumask);
  657. #ifdef CONFIG_HOTPLUG_CPU
  658. static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
  659. {
  660. int err = 0;
  661. if (cpumask_test_cpu(cpu, cpu_online_mask)) {
  662. err = padata_replace(pinst);
  663. if (padata_validate_cpumask(pinst, pinst->cpumask.pcpu) &&
  664. padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
  665. __padata_start(pinst);
  666. }
  667. return err;
  668. }
  669. static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
  670. {
  671. int err = 0;
  672. if (!cpumask_test_cpu(cpu, cpu_online_mask)) {
  673. if (!padata_validate_cpumask(pinst, pinst->cpumask.pcpu) ||
  674. !padata_validate_cpumask(pinst, pinst->cpumask.cbcpu))
  675. __padata_stop(pinst);
  676. err = padata_replace(pinst);
  677. }
  678. return err;
  679. }
  680. static inline int pinst_has_cpu(struct padata_instance *pinst, int cpu)
  681. {
  682. return cpumask_test_cpu(cpu, pinst->cpumask.pcpu) ||
  683. cpumask_test_cpu(cpu, pinst->cpumask.cbcpu);
  684. }
  685. static int padata_cpu_online(unsigned int cpu, struct hlist_node *node)
  686. {
  687. struct padata_instance *pinst;
  688. int ret;
  689. pinst = hlist_entry_safe(node, struct padata_instance, cpu_online_node);
  690. if (!pinst_has_cpu(pinst, cpu))
  691. return 0;
  692. mutex_lock(&pinst->lock);
  693. ret = __padata_add_cpu(pinst, cpu);
  694. mutex_unlock(&pinst->lock);
  695. return ret;
  696. }
  697. static int padata_cpu_dead(unsigned int cpu, struct hlist_node *node)
  698. {
  699. struct padata_instance *pinst;
  700. int ret;
  701. pinst = hlist_entry_safe(node, struct padata_instance, cpu_dead_node);
  702. if (!pinst_has_cpu(pinst, cpu))
  703. return 0;
  704. mutex_lock(&pinst->lock);
  705. ret = __padata_remove_cpu(pinst, cpu);
  706. mutex_unlock(&pinst->lock);
  707. return ret;
  708. }
  709. static enum cpuhp_state hp_online;
  710. #endif
  711. static void __padata_free(struct padata_instance *pinst)
  712. {
  713. #ifdef CONFIG_HOTPLUG_CPU
  714. cpuhp_state_remove_instance_nocalls(CPUHP_PADATA_DEAD,
  715. &pinst->cpu_dead_node);
  716. cpuhp_state_remove_instance_nocalls(hp_online, &pinst->cpu_online_node);
  717. #endif
  718. WARN_ON(!list_empty(&pinst->pslist));
  719. free_cpumask_var(pinst->cpumask.pcpu);
  720. free_cpumask_var(pinst->cpumask.cbcpu);
  721. destroy_workqueue(pinst->serial_wq);
  722. destroy_workqueue(pinst->parallel_wq);
  723. kfree(pinst);
  724. }
  725. #define kobj2pinst(_kobj) \
  726. container_of(_kobj, struct padata_instance, kobj)
  727. #define attr2pentry(_attr) \
  728. container_of(_attr, struct padata_sysfs_entry, attr)
  729. static void padata_sysfs_release(struct kobject *kobj)
  730. {
  731. struct padata_instance *pinst = kobj2pinst(kobj);
  732. __padata_free(pinst);
  733. }
  734. struct padata_sysfs_entry {
  735. struct attribute attr;
  736. ssize_t (*show)(struct padata_instance *, struct attribute *, char *);
  737. ssize_t (*store)(struct padata_instance *, struct attribute *,
  738. const char *, size_t);
  739. };
  740. static ssize_t show_cpumask(struct padata_instance *pinst,
  741. struct attribute *attr, char *buf)
  742. {
  743. struct cpumask *cpumask;
  744. ssize_t len;
  745. mutex_lock(&pinst->lock);
  746. if (!strcmp(attr->name, "serial_cpumask"))
  747. cpumask = pinst->cpumask.cbcpu;
  748. else
  749. cpumask = pinst->cpumask.pcpu;
  750. len = snprintf(buf, PAGE_SIZE, "%*pb\n",
  751. nr_cpu_ids, cpumask_bits(cpumask));
  752. mutex_unlock(&pinst->lock);
  753. return len < PAGE_SIZE ? len : -EINVAL;
  754. }
  755. static ssize_t store_cpumask(struct padata_instance *pinst,
  756. struct attribute *attr,
  757. const char *buf, size_t count)
  758. {
  759. cpumask_var_t new_cpumask;
  760. ssize_t ret;
  761. int mask_type;
  762. if (!alloc_cpumask_var(&new_cpumask, GFP_KERNEL))
  763. return -ENOMEM;
  764. ret = bitmap_parse(buf, count, cpumask_bits(new_cpumask),
  765. nr_cpumask_bits);
  766. if (ret < 0)
  767. goto out;
  768. mask_type = !strcmp(attr->name, "serial_cpumask") ?
  769. PADATA_CPU_SERIAL : PADATA_CPU_PARALLEL;
  770. ret = padata_set_cpumask(pinst, mask_type, new_cpumask);
  771. if (!ret)
  772. ret = count;
  773. out:
  774. free_cpumask_var(new_cpumask);
  775. return ret;
  776. }
  777. #define PADATA_ATTR_RW(_name, _show_name, _store_name) \
  778. static struct padata_sysfs_entry _name##_attr = \
  779. __ATTR(_name, 0644, _show_name, _store_name)
  780. #define PADATA_ATTR_RO(_name, _show_name) \
  781. static struct padata_sysfs_entry _name##_attr = \
  782. __ATTR(_name, 0400, _show_name, NULL)
  783. PADATA_ATTR_RW(serial_cpumask, show_cpumask, store_cpumask);
  784. PADATA_ATTR_RW(parallel_cpumask, show_cpumask, store_cpumask);
  785. /*
  786. * Padata sysfs provides the following objects:
  787. * serial_cpumask [RW] - cpumask for serial workers
  788. * parallel_cpumask [RW] - cpumask for parallel workers
  789. */
  790. static struct attribute *padata_default_attrs[] = {
  791. &serial_cpumask_attr.attr,
  792. &parallel_cpumask_attr.attr,
  793. NULL,
  794. };
  795. ATTRIBUTE_GROUPS(padata_default);
  796. static ssize_t padata_sysfs_show(struct kobject *kobj,
  797. struct attribute *attr, char *buf)
  798. {
  799. struct padata_instance *pinst;
  800. struct padata_sysfs_entry *pentry;
  801. ssize_t ret = -EIO;
  802. pinst = kobj2pinst(kobj);
  803. pentry = attr2pentry(attr);
  804. if (pentry->show)
  805. ret = pentry->show(pinst, attr, buf);
  806. return ret;
  807. }
  808. static ssize_t padata_sysfs_store(struct kobject *kobj, struct attribute *attr,
  809. const char *buf, size_t count)
  810. {
  811. struct padata_instance *pinst;
  812. struct padata_sysfs_entry *pentry;
  813. ssize_t ret = -EIO;
  814. pinst = kobj2pinst(kobj);
  815. pentry = attr2pentry(attr);
  816. if (pentry->store)
  817. ret = pentry->store(pinst, attr, buf, count);
  818. return ret;
  819. }
  820. static const struct sysfs_ops padata_sysfs_ops = {
  821. .show = padata_sysfs_show,
  822. .store = padata_sysfs_store,
  823. };
  824. static const struct kobj_type padata_attr_type = {
  825. .sysfs_ops = &padata_sysfs_ops,
  826. .default_groups = padata_default_groups,
  827. .release = padata_sysfs_release,
  828. };
  829. /**
  830. * padata_alloc - allocate and initialize a padata instance
  831. * @name: used to identify the instance
  832. *
  833. * Return: new instance on success, NULL on error
  834. */
  835. struct padata_instance *padata_alloc(const char *name)
  836. {
  837. struct padata_instance *pinst;
  838. pinst = kzalloc(sizeof(struct padata_instance), GFP_KERNEL);
  839. if (!pinst)
  840. goto err;
  841. pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
  842. name);
  843. if (!pinst->parallel_wq)
  844. goto err_free_inst;
  845. cpus_read_lock();
  846. pinst->serial_wq = alloc_workqueue("%s_serial", WQ_MEM_RECLAIM |
  847. WQ_CPU_INTENSIVE, 1, name);
  848. if (!pinst->serial_wq)
  849. goto err_put_cpus;
  850. if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
  851. goto err_free_serial_wq;
  852. if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
  853. free_cpumask_var(pinst->cpumask.pcpu);
  854. goto err_free_serial_wq;
  855. }
  856. INIT_LIST_HEAD(&pinst->pslist);
  857. cpumask_copy(pinst->cpumask.pcpu, cpu_possible_mask);
  858. cpumask_copy(pinst->cpumask.cbcpu, cpu_possible_mask);
  859. if (padata_setup_cpumasks(pinst))
  860. goto err_free_masks;
  861. __padata_start(pinst);
  862. kobject_init(&pinst->kobj, &padata_attr_type);
  863. mutex_init(&pinst->lock);
  864. #ifdef CONFIG_HOTPLUG_CPU
  865. cpuhp_state_add_instance_nocalls_cpuslocked(hp_online,
  866. &pinst->cpu_online_node);
  867. cpuhp_state_add_instance_nocalls_cpuslocked(CPUHP_PADATA_DEAD,
  868. &pinst->cpu_dead_node);
  869. #endif
  870. cpus_read_unlock();
  871. return pinst;
  872. err_free_masks:
  873. free_cpumask_var(pinst->cpumask.pcpu);
  874. free_cpumask_var(pinst->cpumask.cbcpu);
  875. err_free_serial_wq:
  876. destroy_workqueue(pinst->serial_wq);
  877. err_put_cpus:
  878. cpus_read_unlock();
  879. destroy_workqueue(pinst->parallel_wq);
  880. err_free_inst:
  881. kfree(pinst);
  882. err:
  883. return NULL;
  884. }
  885. EXPORT_SYMBOL(padata_alloc);
  886. /**
  887. * padata_free - free a padata instance
  888. *
  889. * @pinst: padata instance to free
  890. */
  891. void padata_free(struct padata_instance *pinst)
  892. {
  893. kobject_put(&pinst->kobj);
  894. }
  895. EXPORT_SYMBOL(padata_free);
  896. /**
  897. * padata_alloc_shell - Allocate and initialize padata shell.
  898. *
  899. * @pinst: Parent padata_instance object.
  900. *
  901. * Return: new shell on success, NULL on error
  902. */
  903. struct padata_shell *padata_alloc_shell(struct padata_instance *pinst)
  904. {
  905. struct parallel_data *pd;
  906. struct padata_shell *ps;
  907. ps = kzalloc(sizeof(*ps), GFP_KERNEL);
  908. if (!ps)
  909. goto out;
  910. ps->pinst = pinst;
  911. cpus_read_lock();
  912. pd = padata_alloc_pd(ps);
  913. cpus_read_unlock();
  914. if (!pd)
  915. goto out_free_ps;
  916. mutex_lock(&pinst->lock);
  917. RCU_INIT_POINTER(ps->pd, pd);
  918. list_add(&ps->list, &pinst->pslist);
  919. mutex_unlock(&pinst->lock);
  920. return ps;
  921. out_free_ps:
  922. kfree(ps);
  923. out:
  924. return NULL;
  925. }
  926. EXPORT_SYMBOL(padata_alloc_shell);
  927. /**
  928. * padata_free_shell - free a padata shell
  929. *
  930. * @ps: padata shell to free
  931. */
  932. void padata_free_shell(struct padata_shell *ps)
  933. {
  934. struct parallel_data *pd;
  935. if (!ps)
  936. return;
  937. /*
  938. * Wait for all _do_serial calls to finish to avoid touching
  939. * freed pd's and ps's.
  940. */
  941. synchronize_rcu();
  942. mutex_lock(&ps->pinst->lock);
  943. list_del(&ps->list);
  944. pd = rcu_dereference_protected(ps->pd, 1);
  945. padata_put_pd(pd);
  946. mutex_unlock(&ps->pinst->lock);
  947. kfree(ps);
  948. }
  949. EXPORT_SYMBOL(padata_free_shell);
  950. void __init padata_init(void)
  951. {
  952. unsigned int i, possible_cpus;
  953. #ifdef CONFIG_HOTPLUG_CPU
  954. int ret;
  955. ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "padata:online",
  956. padata_cpu_online, NULL);
  957. if (ret < 0)
  958. goto err;
  959. hp_online = ret;
  960. ret = cpuhp_setup_state_multi(CPUHP_PADATA_DEAD, "padata:dead",
  961. NULL, padata_cpu_dead);
  962. if (ret < 0)
  963. goto remove_online_state;
  964. #endif
  965. possible_cpus = num_possible_cpus();
  966. padata_works = kmalloc_array(possible_cpus, sizeof(struct padata_work),
  967. GFP_KERNEL);
  968. if (!padata_works)
  969. goto remove_dead_state;
  970. for (i = 0; i < possible_cpus; ++i)
  971. list_add(&padata_works[i].pw_list, &padata_free_works);
  972. return;
  973. remove_dead_state:
  974. #ifdef CONFIG_HOTPLUG_CPU
  975. cpuhp_remove_multi_state(CPUHP_PADATA_DEAD);
  976. remove_online_state:
  977. cpuhp_remove_multi_state(hp_online);
  978. err:
  979. #endif
  980. pr_warn("padata: initialization failed\n");
  981. }