swap_slots.c 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * Manage cache of swap slots to be used for and returned from
  4. * swap.
  5. *
  6. * Copyright(c) 2016 Intel Corporation.
  7. *
  8. * Author: Tim Chen <tim.c.chen@linux.intel.com>
  9. *
  10. * We allocate the swap slots from the global pool and put
  11. * it into local per cpu caches. This has the advantage
  12. * of no needing to acquire the swap_info lock every time
  13. * we need a new slot.
  14. *
  15. * There is also opportunity to simply return the slot
  16. * to local caches without needing to acquire swap_info
  17. * lock. We do not reuse the returned slots directly but
  18. * move them back to the global pool in a batch. This
  19. * allows the slots to coaellesce and reduce fragmentation.
  20. *
  21. * The swap entry allocated is marked with SWAP_HAS_CACHE
  22. * flag in map_count that prevents it from being allocated
  23. * again from the global pool.
  24. *
  25. * The swap slots cache is protected by a mutex instead of
  26. * a spin lock as when we search for slots with scan_swap_map,
  27. * we can possibly sleep.
  28. */
  29. #include <linux/swap_slots.h>
  30. #include <linux/cpu.h>
  31. #include <linux/cpumask.h>
  32. #include <linux/vmalloc.h>
  33. #include <linux/mutex.h>
  34. #include <linux/mm.h>
  35. static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
  36. static bool swap_slot_cache_active;
  37. bool swap_slot_cache_enabled;
  38. static bool swap_slot_cache_initialized;
  39. static DEFINE_MUTEX(swap_slots_cache_mutex);
  40. /* Serialize swap slots cache enable/disable operations */
  41. static DEFINE_MUTEX(swap_slots_cache_enable_mutex);
  42. static void __drain_swap_slots_cache(unsigned int type);
  43. static void deactivate_swap_slots_cache(void);
  44. static void reactivate_swap_slots_cache(void);
  45. #define use_swap_slot_cache (swap_slot_cache_active && \
  46. swap_slot_cache_enabled && swap_slot_cache_initialized)
  47. #define SLOTS_CACHE 0x1
  48. #define SLOTS_CACHE_RET 0x2
  49. static void deactivate_swap_slots_cache(void)
  50. {
  51. mutex_lock(&swap_slots_cache_mutex);
  52. swap_slot_cache_active = false;
  53. __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  54. mutex_unlock(&swap_slots_cache_mutex);
  55. }
  56. static void reactivate_swap_slots_cache(void)
  57. {
  58. mutex_lock(&swap_slots_cache_mutex);
  59. swap_slot_cache_active = true;
  60. mutex_unlock(&swap_slots_cache_mutex);
  61. }
  62. /* Must not be called with cpu hot plug lock */
  63. void disable_swap_slots_cache_lock(void)
  64. {
  65. mutex_lock(&swap_slots_cache_enable_mutex);
  66. swap_slot_cache_enabled = false;
  67. if (swap_slot_cache_initialized) {
  68. /* serialize with cpu hotplug operations */
  69. get_online_cpus();
  70. __drain_swap_slots_cache(SLOTS_CACHE|SLOTS_CACHE_RET);
  71. put_online_cpus();
  72. }
  73. }
  74. static void __reenable_swap_slots_cache(void)
  75. {
  76. swap_slot_cache_enabled = has_usable_swap();
  77. }
  78. void reenable_swap_slots_cache_unlock(void)
  79. {
  80. __reenable_swap_slots_cache();
  81. mutex_unlock(&swap_slots_cache_enable_mutex);
  82. }
  83. static bool check_cache_active(void)
  84. {
  85. long pages;
  86. if (!swap_slot_cache_enabled || !swap_slot_cache_initialized)
  87. return false;
  88. pages = get_nr_swap_pages();
  89. if (!swap_slot_cache_active) {
  90. if (pages > num_online_cpus() *
  91. THRESHOLD_ACTIVATE_SWAP_SLOTS_CACHE)
  92. reactivate_swap_slots_cache();
  93. goto out;
  94. }
  95. /* if global pool of slot caches too low, deactivate cache */
  96. if (pages < num_online_cpus() * THRESHOLD_DEACTIVATE_SWAP_SLOTS_CACHE)
  97. deactivate_swap_slots_cache();
  98. out:
  99. return swap_slot_cache_active;
  100. }
  101. static int alloc_swap_slot_cache(unsigned int cpu)
  102. {
  103. struct swap_slots_cache *cache;
  104. swp_entry_t *slots, *slots_ret;
  105. /*
  106. * Do allocation outside swap_slots_cache_mutex
  107. * as kvzalloc could trigger reclaim and get_swap_page,
  108. * which can lock swap_slots_cache_mutex.
  109. */
  110. slots = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
  111. GFP_KERNEL);
  112. if (!slots)
  113. return -ENOMEM;
  114. slots_ret = kvcalloc(SWAP_SLOTS_CACHE_SIZE, sizeof(swp_entry_t),
  115. GFP_KERNEL);
  116. if (!slots_ret) {
  117. kvfree(slots);
  118. return -ENOMEM;
  119. }
  120. mutex_lock(&swap_slots_cache_mutex);
  121. cache = &per_cpu(swp_slots, cpu);
  122. if (cache->slots || cache->slots_ret)
  123. /* cache already allocated */
  124. goto out;
  125. if (!cache->lock_initialized) {
  126. mutex_init(&cache->alloc_lock);
  127. spin_lock_init(&cache->free_lock);
  128. cache->lock_initialized = true;
  129. }
  130. cache->nr = 0;
  131. cache->cur = 0;
  132. cache->n_ret = 0;
  133. /*
  134. * We initialized alloc_lock and free_lock earlier. We use
  135. * !cache->slots or !cache->slots_ret to know if it is safe to acquire
  136. * the corresponding lock and use the cache. Memory barrier below
  137. * ensures the assumption.
  138. */
  139. mb();
  140. cache->slots = slots;
  141. slots = NULL;
  142. cache->slots_ret = slots_ret;
  143. slots_ret = NULL;
  144. out:
  145. mutex_unlock(&swap_slots_cache_mutex);
  146. if (slots)
  147. kvfree(slots);
  148. if (slots_ret)
  149. kvfree(slots_ret);
  150. return 0;
  151. }
  152. static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
  153. bool free_slots)
  154. {
  155. struct swap_slots_cache *cache;
  156. swp_entry_t *slots = NULL;
  157. cache = &per_cpu(swp_slots, cpu);
  158. if ((type & SLOTS_CACHE) && cache->slots) {
  159. mutex_lock(&cache->alloc_lock);
  160. swapcache_free_entries(cache->slots + cache->cur, cache->nr);
  161. cache->cur = 0;
  162. cache->nr = 0;
  163. if (free_slots && cache->slots) {
  164. kvfree(cache->slots);
  165. cache->slots = NULL;
  166. }
  167. mutex_unlock(&cache->alloc_lock);
  168. }
  169. if ((type & SLOTS_CACHE_RET) && cache->slots_ret) {
  170. spin_lock_irq(&cache->free_lock);
  171. swapcache_free_entries(cache->slots_ret, cache->n_ret);
  172. cache->n_ret = 0;
  173. if (free_slots && cache->slots_ret) {
  174. slots = cache->slots_ret;
  175. cache->slots_ret = NULL;
  176. }
  177. spin_unlock_irq(&cache->free_lock);
  178. if (slots)
  179. kvfree(slots);
  180. }
  181. }
  182. static void __drain_swap_slots_cache(unsigned int type)
  183. {
  184. unsigned int cpu;
  185. /*
  186. * This function is called during
  187. * 1) swapoff, when we have to make sure no
  188. * left over slots are in cache when we remove
  189. * a swap device;
  190. * 2) disabling of swap slot cache, when we run low
  191. * on swap slots when allocating memory and need
  192. * to return swap slots to global pool.
  193. *
  194. * We cannot acquire cpu hot plug lock here as
  195. * this function can be invoked in the cpu
  196. * hot plug path:
  197. * cpu_up -> lock cpu_hotplug -> cpu hotplug state callback
  198. * -> memory allocation -> direct reclaim -> get_swap_page
  199. * -> drain_swap_slots_cache
  200. *
  201. * Hence the loop over current online cpu below could miss cpu that
  202. * is being brought online but not yet marked as online.
  203. * That is okay as we do not schedule and run anything on a
  204. * cpu before it has been marked online. Hence, we will not
  205. * fill any swap slots in slots cache of such cpu.
  206. * There are no slots on such cpu that need to be drained.
  207. */
  208. for_each_online_cpu(cpu)
  209. drain_slots_cache_cpu(cpu, type, false);
  210. }
  211. static int free_slot_cache(unsigned int cpu)
  212. {
  213. mutex_lock(&swap_slots_cache_mutex);
  214. drain_slots_cache_cpu(cpu, SLOTS_CACHE | SLOTS_CACHE_RET, true);
  215. mutex_unlock(&swap_slots_cache_mutex);
  216. return 0;
  217. }
  218. int enable_swap_slots_cache(void)
  219. {
  220. int ret = 0;
  221. mutex_lock(&swap_slots_cache_enable_mutex);
  222. if (swap_slot_cache_initialized) {
  223. __reenable_swap_slots_cache();
  224. goto out_unlock;
  225. }
  226. ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "swap_slots_cache",
  227. alloc_swap_slot_cache, free_slot_cache);
  228. if (WARN_ONCE(ret < 0, "Cache allocation failed (%s), operating "
  229. "without swap slots cache.\n", __func__))
  230. goto out_unlock;
  231. swap_slot_cache_initialized = true;
  232. __reenable_swap_slots_cache();
  233. out_unlock:
  234. mutex_unlock(&swap_slots_cache_enable_mutex);
  235. return 0;
  236. }
  237. /* called with swap slot cache's alloc lock held */
  238. static int refill_swap_slots_cache(struct swap_slots_cache *cache)
  239. {
  240. if (!use_swap_slot_cache || cache->nr)
  241. return 0;
  242. cache->cur = 0;
  243. if (swap_slot_cache_active)
  244. cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
  245. cache->slots, 1);
  246. return cache->nr;
  247. }
  248. int free_swap_slot(swp_entry_t entry)
  249. {
  250. struct swap_slots_cache *cache;
  251. cache = raw_cpu_ptr(&swp_slots);
  252. if (likely(use_swap_slot_cache && cache->slots_ret)) {
  253. spin_lock_irq(&cache->free_lock);
  254. /* Swap slots cache may be deactivated before acquiring lock */
  255. if (!use_swap_slot_cache || !cache->slots_ret) {
  256. spin_unlock_irq(&cache->free_lock);
  257. goto direct_free;
  258. }
  259. if (cache->n_ret >= SWAP_SLOTS_CACHE_SIZE) {
  260. /*
  261. * Return slots to global pool.
  262. * The current swap_map value is SWAP_HAS_CACHE.
  263. * Set it to 0 to indicate it is available for
  264. * allocation in global pool
  265. */
  266. swapcache_free_entries(cache->slots_ret, cache->n_ret);
  267. cache->n_ret = 0;
  268. }
  269. cache->slots_ret[cache->n_ret++] = entry;
  270. spin_unlock_irq(&cache->free_lock);
  271. } else {
  272. direct_free:
  273. swapcache_free_entries(&entry, 1);
  274. }
  275. return 0;
  276. }
  277. swp_entry_t get_swap_page(struct page *page)
  278. {
  279. swp_entry_t entry, *pentry;
  280. struct swap_slots_cache *cache;
  281. entry.val = 0;
  282. if (PageTransHuge(page)) {
  283. if (IS_ENABLED(CONFIG_THP_SWAP))
  284. get_swap_pages(1, &entry, HPAGE_PMD_NR);
  285. goto out;
  286. }
  287. /*
  288. * Preemption is allowed here, because we may sleep
  289. * in refill_swap_slots_cache(). But it is safe, because
  290. * accesses to the per-CPU data structure are protected by the
  291. * mutex cache->alloc_lock.
  292. *
  293. * The alloc path here does not touch cache->slots_ret
  294. * so cache->free_lock is not taken.
  295. */
  296. cache = raw_cpu_ptr(&swp_slots);
  297. if (likely(check_cache_active() && cache->slots)) {
  298. mutex_lock(&cache->alloc_lock);
  299. if (cache->slots) {
  300. repeat:
  301. if (cache->nr) {
  302. pentry = &cache->slots[cache->cur++];
  303. entry = *pentry;
  304. pentry->val = 0;
  305. cache->nr--;
  306. } else {
  307. if (refill_swap_slots_cache(cache))
  308. goto repeat;
  309. }
  310. }
  311. mutex_unlock(&cache->alloc_lock);
  312. if (entry.val)
  313. goto out;
  314. }
  315. get_swap_pages(1, &entry, 1);
  316. out:
  317. if (mem_cgroup_try_charge_swap(page, entry)) {
  318. put_swap_page(page, entry);
  319. entry.val = 0;
  320. }
  321. return entry;
  322. }