zbud.c 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * zbud.c
  4. *
  5. * Copyright (C) 2013, Seth Jennings, IBM
  6. *
  7. * Concepts based on zcache internal zbud allocator by Dan Magenheimer.
  8. *
  9. * zbud is an special purpose allocator for storing compressed pages. Contrary
  10. * to what its name may suggest, zbud is not a buddy allocator, but rather an
  11. * allocator that "buddies" two compressed pages together in a single memory
  12. * page.
  13. *
  14. * While this design limits storage density, it has simple and deterministic
  15. * reclaim properties that make it preferable to a higher density approach when
  16. * reclaim will be used.
  17. *
  18. * zbud works by storing compressed pages, or "zpages", together in pairs in a
  19. * single memory page called a "zbud page". The first buddy is "left
  20. * justified" at the beginning of the zbud page, and the last buddy is "right
  21. * justified" at the end of the zbud page. The benefit is that if either
  22. * buddy is freed, the freed buddy space, coalesced with whatever slack space
  23. * that existed between the buddies, results in the largest possible free region
  24. * within the zbud page.
  25. *
  26. * zbud also provides an attractive lower bound on density. The ratio of zpages
  27. * to zbud pages can not be less than 1. This ensures that zbud can never "do
  28. * harm" by using more pages to store zpages than the uncompressed zpages would
  29. * have used on their own.
  30. *
  31. * zbud pages are divided into "chunks". The size of the chunks is fixed at
  32. * compile time and determined by NCHUNKS_ORDER below. Dividing zbud pages
  33. * into chunks allows organizing unbuddied zbud pages into a manageable number
  34. * of unbuddied lists according to the number of free chunks available in the
  35. * zbud page.
  36. *
  37. * The zbud API differs from that of conventional allocators in that the
  38. * allocation function, zbud_alloc(), returns an opaque handle to the user,
  39. * not a dereferenceable pointer. The user must map the handle using
  40. * zbud_map() in order to get a usable pointer by which to access the
  41. * allocation data and unmap the handle with zbud_unmap() when operations
  42. * on the allocation data are complete.
  43. */
  44. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  45. #include <linux/atomic.h>
  46. #include <linux/list.h>
  47. #include <linux/mm.h>
  48. #include <linux/module.h>
  49. #include <linux/preempt.h>
  50. #include <linux/slab.h>
  51. #include <linux/spinlock.h>
  52. #include <linux/zpool.h>
  53. /*****************
  54. * Structures
  55. *****************/
  56. /*
  57. * NCHUNKS_ORDER determines the internal allocation granularity, effectively
  58. * adjusting internal fragmentation. It also determines the number of
  59. * freelists maintained in each pool. NCHUNKS_ORDER of 6 means that the
  60. * allocation granularity will be in chunks of size PAGE_SIZE/64. As one chunk
  61. * in allocated page is occupied by zbud header, NCHUNKS will be calculated to
  62. * 63 which shows the max number of free chunks in zbud page, also there will be
  63. * 63 freelists per pool.
  64. */
  65. #define NCHUNKS_ORDER 6
  66. #define CHUNK_SHIFT (PAGE_SHIFT - NCHUNKS_ORDER)
  67. #define CHUNK_SIZE (1 << CHUNK_SHIFT)
  68. #define ZHDR_SIZE_ALIGNED CHUNK_SIZE
  69. #define NCHUNKS ((PAGE_SIZE - ZHDR_SIZE_ALIGNED) >> CHUNK_SHIFT)
  70. struct zbud_pool;
  71. /**
  72. * struct zbud_pool - stores metadata for each zbud pool
  73. * @lock: protects all pool fields and first|last_chunk fields of any
  74. * zbud page in the pool
  75. * @unbuddied: array of lists tracking zbud pages that only contain one buddy;
  76. * the lists each zbud page is added to depends on the size of
  77. * its free region.
  78. * @buddied: list tracking the zbud pages that contain two buddies;
  79. * these zbud pages are full
  80. * @pages_nr: number of zbud pages in the pool.
  81. *
  82. * This structure is allocated at pool creation time and maintains metadata
  83. * pertaining to a particular zbud pool.
  84. */
  85. struct zbud_pool {
  86. spinlock_t lock;
  87. union {
  88. /*
  89. * Reuse unbuddied[0] as buddied on the ground that
  90. * unbuddied[0] is unused.
  91. */
  92. struct list_head buddied;
  93. struct list_head unbuddied[NCHUNKS];
  94. };
  95. u64 pages_nr;
  96. };
  97. /*
  98. * struct zbud_header - zbud page metadata occupying the first chunk of each
  99. * zbud page.
  100. * @buddy: links the zbud page into the unbuddied/buddied lists in the pool
  101. * @first_chunks: the size of the first buddy in chunks, 0 if free
  102. * @last_chunks: the size of the last buddy in chunks, 0 if free
  103. */
  104. struct zbud_header {
  105. struct list_head buddy;
  106. unsigned int first_chunks;
  107. unsigned int last_chunks;
  108. };
  109. /*****************
  110. * Helpers
  111. *****************/
  112. /* Just to make the code easier to read */
  113. enum buddy {
  114. FIRST,
  115. LAST
  116. };
  117. /* Converts an allocation size in bytes to size in zbud chunks */
  118. static int size_to_chunks(size_t size)
  119. {
  120. return (size + CHUNK_SIZE - 1) >> CHUNK_SHIFT;
  121. }
  122. #define for_each_unbuddied_list(_iter, _begin) \
  123. for ((_iter) = (_begin); (_iter) < NCHUNKS; (_iter)++)
  124. /* Initializes the zbud header of a newly allocated zbud page */
  125. static struct zbud_header *init_zbud_page(struct page *page)
  126. {
  127. struct zbud_header *zhdr = page_address(page);
  128. zhdr->first_chunks = 0;
  129. zhdr->last_chunks = 0;
  130. INIT_LIST_HEAD(&zhdr->buddy);
  131. return zhdr;
  132. }
  133. /* Resets the struct page fields and frees the page */
  134. static void free_zbud_page(struct zbud_header *zhdr)
  135. {
  136. __free_page(virt_to_page(zhdr));
  137. }
  138. /*
  139. * Encodes the handle of a particular buddy within a zbud page
  140. * Pool lock should be held as this function accesses first|last_chunks
  141. */
  142. static unsigned long encode_handle(struct zbud_header *zhdr, enum buddy bud)
  143. {
  144. unsigned long handle;
  145. /*
  146. * For now, the encoded handle is actually just the pointer to the data
  147. * but this might not always be the case. A little information hiding.
  148. * Add CHUNK_SIZE to the handle if it is the first allocation to jump
  149. * over the zbud header in the first chunk.
  150. */
  151. handle = (unsigned long)zhdr;
  152. if (bud == FIRST)
  153. /* skip over zbud header */
  154. handle += ZHDR_SIZE_ALIGNED;
  155. else /* bud == LAST */
  156. handle += PAGE_SIZE - (zhdr->last_chunks << CHUNK_SHIFT);
  157. return handle;
  158. }
  159. /* Returns the zbud page where a given handle is stored */
  160. static struct zbud_header *handle_to_zbud_header(unsigned long handle)
  161. {
  162. return (struct zbud_header *)(handle & PAGE_MASK);
  163. }
  164. /* Returns the number of free chunks in a zbud page */
  165. static int num_free_chunks(struct zbud_header *zhdr)
  166. {
  167. /*
  168. * Rather than branch for different situations, just use the fact that
  169. * free buddies have a length of zero to simplify everything.
  170. */
  171. return NCHUNKS - zhdr->first_chunks - zhdr->last_chunks;
  172. }
  173. /*****************
  174. * API Functions
  175. *****************/
  176. /**
  177. * zbud_create_pool() - create a new zbud pool
  178. * @gfp: gfp flags when allocating the zbud pool structure
  179. *
  180. * Return: pointer to the new zbud pool or NULL if the metadata allocation
  181. * failed.
  182. */
  183. static struct zbud_pool *zbud_create_pool(gfp_t gfp)
  184. {
  185. struct zbud_pool *pool;
  186. int i;
  187. pool = kzalloc(sizeof(struct zbud_pool), gfp);
  188. if (!pool)
  189. return NULL;
  190. spin_lock_init(&pool->lock);
  191. for_each_unbuddied_list(i, 0)
  192. INIT_LIST_HEAD(&pool->unbuddied[i]);
  193. INIT_LIST_HEAD(&pool->buddied);
  194. pool->pages_nr = 0;
  195. return pool;
  196. }
  197. /**
  198. * zbud_destroy_pool() - destroys an existing zbud pool
  199. * @pool: the zbud pool to be destroyed
  200. *
  201. * The pool should be emptied before this function is called.
  202. */
  203. static void zbud_destroy_pool(struct zbud_pool *pool)
  204. {
  205. kfree(pool);
  206. }
  207. /**
  208. * zbud_alloc() - allocates a region of a given size
  209. * @pool: zbud pool from which to allocate
  210. * @size: size in bytes of the desired allocation
  211. * @gfp: gfp flags used if the pool needs to grow
  212. * @handle: handle of the new allocation
  213. *
  214. * This function will attempt to find a free region in the pool large enough to
  215. * satisfy the allocation request. A search of the unbuddied lists is
  216. * performed first. If no suitable free region is found, then a new page is
  217. * allocated and added to the pool to satisfy the request.
  218. *
  219. * gfp should not set __GFP_HIGHMEM as highmem pages cannot be used
  220. * as zbud pool pages.
  221. *
  222. * Return: 0 if success and handle is set, otherwise -EINVAL if the size or
  223. * gfp arguments are invalid or -ENOMEM if the pool was unable to allocate
  224. * a new page.
  225. */
  226. static int zbud_alloc(struct zbud_pool *pool, size_t size, gfp_t gfp,
  227. unsigned long *handle)
  228. {
  229. int chunks, i, freechunks;
  230. struct zbud_header *zhdr = NULL;
  231. enum buddy bud;
  232. struct page *page;
  233. if (!size || (gfp & __GFP_HIGHMEM))
  234. return -EINVAL;
  235. if (size > PAGE_SIZE - ZHDR_SIZE_ALIGNED - CHUNK_SIZE)
  236. return -ENOSPC;
  237. chunks = size_to_chunks(size);
  238. spin_lock(&pool->lock);
  239. /* First, try to find an unbuddied zbud page. */
  240. for_each_unbuddied_list(i, chunks) {
  241. if (!list_empty(&pool->unbuddied[i])) {
  242. zhdr = list_first_entry(&pool->unbuddied[i],
  243. struct zbud_header, buddy);
  244. list_del(&zhdr->buddy);
  245. if (zhdr->first_chunks == 0)
  246. bud = FIRST;
  247. else
  248. bud = LAST;
  249. goto found;
  250. }
  251. }
  252. /* Couldn't find unbuddied zbud page, create new one */
  253. spin_unlock(&pool->lock);
  254. page = alloc_page(gfp);
  255. if (!page)
  256. return -ENOMEM;
  257. spin_lock(&pool->lock);
  258. pool->pages_nr++;
  259. zhdr = init_zbud_page(page);
  260. bud = FIRST;
  261. found:
  262. if (bud == FIRST)
  263. zhdr->first_chunks = chunks;
  264. else
  265. zhdr->last_chunks = chunks;
  266. if (zhdr->first_chunks == 0 || zhdr->last_chunks == 0) {
  267. /* Add to unbuddied list */
  268. freechunks = num_free_chunks(zhdr);
  269. list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
  270. } else {
  271. /* Add to buddied list */
  272. list_add(&zhdr->buddy, &pool->buddied);
  273. }
  274. *handle = encode_handle(zhdr, bud);
  275. spin_unlock(&pool->lock);
  276. return 0;
  277. }
  278. /**
  279. * zbud_free() - frees the allocation associated with the given handle
  280. * @pool: pool in which the allocation resided
  281. * @handle: handle associated with the allocation returned by zbud_alloc()
  282. */
  283. static void zbud_free(struct zbud_pool *pool, unsigned long handle)
  284. {
  285. struct zbud_header *zhdr;
  286. int freechunks;
  287. spin_lock(&pool->lock);
  288. zhdr = handle_to_zbud_header(handle);
  289. /* If first buddy, handle will be page aligned */
  290. if ((handle - ZHDR_SIZE_ALIGNED) & ~PAGE_MASK)
  291. zhdr->last_chunks = 0;
  292. else
  293. zhdr->first_chunks = 0;
  294. /* Remove from existing buddy list */
  295. list_del(&zhdr->buddy);
  296. if (zhdr->first_chunks == 0 && zhdr->last_chunks == 0) {
  297. /* zbud page is empty, free */
  298. free_zbud_page(zhdr);
  299. pool->pages_nr--;
  300. } else {
  301. /* Add to unbuddied list */
  302. freechunks = num_free_chunks(zhdr);
  303. list_add(&zhdr->buddy, &pool->unbuddied[freechunks]);
  304. }
  305. spin_unlock(&pool->lock);
  306. }
  307. /**
  308. * zbud_map() - maps the allocation associated with the given handle
  309. * @pool: pool in which the allocation resides
  310. * @handle: handle associated with the allocation to be mapped
  311. *
  312. * While trivial for zbud, the mapping functions for others allocators
  313. * implementing this allocation API could have more complex information encoded
  314. * in the handle and could create temporary mappings to make the data
  315. * accessible to the user.
  316. *
  317. * Returns: a pointer to the mapped allocation
  318. */
  319. static void *zbud_map(struct zbud_pool *pool, unsigned long handle)
  320. {
  321. return (void *)(handle);
  322. }
  323. /**
  324. * zbud_unmap() - maps the allocation associated with the given handle
  325. * @pool: pool in which the allocation resides
  326. * @handle: handle associated with the allocation to be unmapped
  327. */
  328. static void zbud_unmap(struct zbud_pool *pool, unsigned long handle)
  329. {
  330. }
  331. /**
  332. * zbud_get_pool_pages() - gets the zbud pool size in pages
  333. * @pool: pool whose size is being queried
  334. *
  335. * Returns: size in pages of the given pool. The pool lock need not be
  336. * taken to access pages_nr.
  337. */
  338. static u64 zbud_get_pool_pages(struct zbud_pool *pool)
  339. {
  340. return pool->pages_nr;
  341. }
  342. /*****************
  343. * zpool
  344. ****************/
  345. static void *zbud_zpool_create(const char *name, gfp_t gfp)
  346. {
  347. return zbud_create_pool(gfp);
  348. }
  349. static void zbud_zpool_destroy(void *pool)
  350. {
  351. zbud_destroy_pool(pool);
  352. }
  353. static int zbud_zpool_malloc(void *pool, size_t size, gfp_t gfp,
  354. unsigned long *handle)
  355. {
  356. return zbud_alloc(pool, size, gfp, handle);
  357. }
  358. static void zbud_zpool_free(void *pool, unsigned long handle)
  359. {
  360. zbud_free(pool, handle);
  361. }
  362. static void *zbud_zpool_map(void *pool, unsigned long handle,
  363. enum zpool_mapmode mm)
  364. {
  365. return zbud_map(pool, handle);
  366. }
  367. static void zbud_zpool_unmap(void *pool, unsigned long handle)
  368. {
  369. zbud_unmap(pool, handle);
  370. }
  371. static u64 zbud_zpool_total_pages(void *pool)
  372. {
  373. return zbud_get_pool_pages(pool);
  374. }
  375. static struct zpool_driver zbud_zpool_driver = {
  376. .type = "zbud",
  377. .sleep_mapped = true,
  378. .owner = THIS_MODULE,
  379. .create = zbud_zpool_create,
  380. .destroy = zbud_zpool_destroy,
  381. .malloc = zbud_zpool_malloc,
  382. .free = zbud_zpool_free,
  383. .map = zbud_zpool_map,
  384. .unmap = zbud_zpool_unmap,
  385. .total_pages = zbud_zpool_total_pages,
  386. };
  387. MODULE_ALIAS("zpool-zbud");
  388. static int __init init_zbud(void)
  389. {
  390. /* Make sure the zbud header will fit in one chunk */
  391. BUILD_BUG_ON(sizeof(struct zbud_header) > ZHDR_SIZE_ALIGNED);
  392. pr_info("loaded\n");
  393. zpool_register_driver(&zbud_zpool_driver);
  394. return 0;
  395. }
  396. static void __exit exit_zbud(void)
  397. {
  398. zpool_unregister_driver(&zbud_zpool_driver);
  399. pr_info("unloaded\n");
  400. }
  401. module_init(init_zbud);
  402. module_exit(exit_zbud);
  403. MODULE_LICENSE("GPL");
  404. MODULE_AUTHOR("Seth Jennings <sjennings@variantweb.net>");
  405. MODULE_DESCRIPTION("Buddy Allocator for Compressed Pages");