vmw_balloon.c 53 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * VMware Balloon driver.
  4. *
  5. * Copyright (C) 2000-2018, VMware, Inc. All Rights Reserved.
  6. *
  7. * This is VMware physical memory management driver for Linux. The driver
  8. * acts like a "balloon" that can be inflated to reclaim physical pages by
  9. * reserving them in the guest and invalidating them in the monitor,
  10. * freeing up the underlying machine pages so they can be allocated to
  11. * other guests. The balloon can also be deflated to allow the guest to
  12. * use more physical memory. Higher level policies can control the sizes
  13. * of balloons in VMs in order to manage physical memory resources.
  14. */
  15. //#define DEBUG
  16. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  17. #include <linux/types.h>
  18. #include <linux/io.h>
  19. #include <linux/kernel.h>
  20. #include <linux/mm.h>
  21. #include <linux/vmalloc.h>
  22. #include <linux/sched.h>
  23. #include <linux/module.h>
  24. #include <linux/workqueue.h>
  25. #include <linux/debugfs.h>
  26. #include <linux/seq_file.h>
  27. #include <linux/rwsem.h>
  28. #include <linux/slab.h>
  29. #include <linux/spinlock.h>
  30. #include <linux/balloon_compaction.h>
  31. #include <linux/vmw_vmci_defs.h>
  32. #include <linux/vmw_vmci_api.h>
  33. #include <asm/hypervisor.h>
  34. MODULE_AUTHOR("VMware, Inc.");
  35. MODULE_DESCRIPTION("VMware Memory Control (Balloon) Driver");
  36. MODULE_ALIAS("dmi:*:svnVMware*:*");
  37. MODULE_ALIAS("vmware_vmmemctl");
  38. MODULE_LICENSE("GPL");
  39. static bool __read_mostly vmwballoon_shrinker_enable;
  40. module_param(vmwballoon_shrinker_enable, bool, 0444);
  41. MODULE_PARM_DESC(vmwballoon_shrinker_enable,
  42. "Enable non-cooperative out-of-memory protection. Disabled by default as it may degrade performance.");
  43. /* Delay in seconds after shrink before inflation. */
  44. #define VMBALLOON_SHRINK_DELAY (5)
  45. /* Maximum number of refused pages we accumulate during inflation cycle */
  46. #define VMW_BALLOON_MAX_REFUSED 16
  47. /* Magic number for the balloon mount-point */
  48. #define BALLOON_VMW_MAGIC 0x0ba11007
  49. /*
  50. * Hypervisor communication port definitions.
  51. */
  52. #define VMW_BALLOON_HV_PORT 0x5670
  53. #define VMW_BALLOON_HV_MAGIC 0x456c6d6f
  54. #define VMW_BALLOON_GUEST_ID 1 /* Linux */
  55. enum vmwballoon_capabilities {
  56. /*
  57. * Bit 0 is reserved and not associated to any capability.
  58. */
  59. VMW_BALLOON_BASIC_CMDS = (1 << 1),
  60. VMW_BALLOON_BATCHED_CMDS = (1 << 2),
  61. VMW_BALLOON_BATCHED_2M_CMDS = (1 << 3),
  62. VMW_BALLOON_SIGNALLED_WAKEUP_CMD = (1 << 4),
  63. VMW_BALLOON_64_BIT_TARGET = (1 << 5)
  64. };
  65. #define VMW_BALLOON_CAPABILITIES_COMMON (VMW_BALLOON_BASIC_CMDS \
  66. | VMW_BALLOON_BATCHED_CMDS \
  67. | VMW_BALLOON_BATCHED_2M_CMDS \
  68. | VMW_BALLOON_SIGNALLED_WAKEUP_CMD)
  69. #define VMW_BALLOON_2M_ORDER (PMD_SHIFT - PAGE_SHIFT)
  70. /*
  71. * 64-bit targets are only supported in 64-bit
  72. */
  73. #ifdef CONFIG_64BIT
  74. #define VMW_BALLOON_CAPABILITIES (VMW_BALLOON_CAPABILITIES_COMMON \
  75. | VMW_BALLOON_64_BIT_TARGET)
  76. #else
  77. #define VMW_BALLOON_CAPABILITIES VMW_BALLOON_CAPABILITIES_COMMON
  78. #endif
  79. enum vmballoon_page_size_type {
  80. VMW_BALLOON_4K_PAGE,
  81. VMW_BALLOON_2M_PAGE,
  82. VMW_BALLOON_LAST_SIZE = VMW_BALLOON_2M_PAGE
  83. };
  84. #define VMW_BALLOON_NUM_PAGE_SIZES (VMW_BALLOON_LAST_SIZE + 1)
  85. static const char * const vmballoon_page_size_names[] = {
  86. [VMW_BALLOON_4K_PAGE] = "4k",
  87. [VMW_BALLOON_2M_PAGE] = "2M"
  88. };
  89. enum vmballoon_op {
  90. VMW_BALLOON_INFLATE,
  91. VMW_BALLOON_DEFLATE
  92. };
  93. enum vmballoon_op_stat_type {
  94. VMW_BALLOON_OP_STAT,
  95. VMW_BALLOON_OP_FAIL_STAT
  96. };
  97. #define VMW_BALLOON_OP_STAT_TYPES (VMW_BALLOON_OP_FAIL_STAT + 1)
  98. /**
  99. * enum vmballoon_cmd_type - backdoor commands.
  100. *
  101. * Availability of the commands is as followed:
  102. *
  103. * %VMW_BALLOON_CMD_START, %VMW_BALLOON_CMD_GET_TARGET and
  104. * %VMW_BALLOON_CMD_GUEST_ID are always available.
  105. *
  106. * If the host reports %VMW_BALLOON_BASIC_CMDS are supported then
  107. * %VMW_BALLOON_CMD_LOCK and %VMW_BALLOON_CMD_UNLOCK commands are available.
  108. *
  109. * If the host reports %VMW_BALLOON_BATCHED_CMDS are supported then
  110. * %VMW_BALLOON_CMD_BATCHED_LOCK and VMW_BALLOON_CMD_BATCHED_UNLOCK commands
  111. * are available.
  112. *
  113. * If the host reports %VMW_BALLOON_BATCHED_2M_CMDS are supported then
  114. * %VMW_BALLOON_CMD_BATCHED_2M_LOCK and %VMW_BALLOON_CMD_BATCHED_2M_UNLOCK
  115. * are supported.
  116. *
  117. * If the host reports VMW_BALLOON_SIGNALLED_WAKEUP_CMD is supported then
  118. * VMW_BALLOON_CMD_VMCI_DOORBELL_SET command is supported.
  119. *
  120. * @VMW_BALLOON_CMD_START: Communicating supported version with the hypervisor.
  121. * @VMW_BALLOON_CMD_GET_TARGET: Gets the balloon target size.
  122. * @VMW_BALLOON_CMD_LOCK: Informs the hypervisor about a ballooned page.
  123. * @VMW_BALLOON_CMD_UNLOCK: Informs the hypervisor about a page that is about
  124. * to be deflated from the balloon.
  125. * @VMW_BALLOON_CMD_GUEST_ID: Informs the hypervisor about the type of OS that
  126. * runs in the VM.
  127. * @VMW_BALLOON_CMD_BATCHED_LOCK: Inform the hypervisor about a batch of
  128. * ballooned pages (up to 512).
  129. * @VMW_BALLOON_CMD_BATCHED_UNLOCK: Inform the hypervisor about a batch of
  130. * pages that are about to be deflated from the
  131. * balloon (up to 512).
  132. * @VMW_BALLOON_CMD_BATCHED_2M_LOCK: Similar to @VMW_BALLOON_CMD_BATCHED_LOCK
  133. * for 2MB pages.
  134. * @VMW_BALLOON_CMD_BATCHED_2M_UNLOCK: Similar to
  135. * @VMW_BALLOON_CMD_BATCHED_UNLOCK for 2MB
  136. * pages.
  137. * @VMW_BALLOON_CMD_VMCI_DOORBELL_SET: A command to set doorbell notification
  138. * that would be invoked when the balloon
  139. * size changes.
  140. * @VMW_BALLOON_CMD_LAST: Value of the last command.
  141. */
  142. enum vmballoon_cmd_type {
  143. VMW_BALLOON_CMD_START,
  144. VMW_BALLOON_CMD_GET_TARGET,
  145. VMW_BALLOON_CMD_LOCK,
  146. VMW_BALLOON_CMD_UNLOCK,
  147. VMW_BALLOON_CMD_GUEST_ID,
  148. /* No command 5 */
  149. VMW_BALLOON_CMD_BATCHED_LOCK = 6,
  150. VMW_BALLOON_CMD_BATCHED_UNLOCK,
  151. VMW_BALLOON_CMD_BATCHED_2M_LOCK,
  152. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK,
  153. VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  154. VMW_BALLOON_CMD_LAST = VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  155. };
  156. #define VMW_BALLOON_CMD_NUM (VMW_BALLOON_CMD_LAST + 1)
  157. enum vmballoon_error_codes {
  158. VMW_BALLOON_SUCCESS,
  159. VMW_BALLOON_ERROR_CMD_INVALID,
  160. VMW_BALLOON_ERROR_PPN_INVALID,
  161. VMW_BALLOON_ERROR_PPN_LOCKED,
  162. VMW_BALLOON_ERROR_PPN_UNLOCKED,
  163. VMW_BALLOON_ERROR_PPN_PINNED,
  164. VMW_BALLOON_ERROR_PPN_NOTNEEDED,
  165. VMW_BALLOON_ERROR_RESET,
  166. VMW_BALLOON_ERROR_BUSY
  167. };
  168. #define VMW_BALLOON_SUCCESS_WITH_CAPABILITIES (0x03000000)
  169. #define VMW_BALLOON_CMD_WITH_TARGET_MASK \
  170. ((1UL << VMW_BALLOON_CMD_GET_TARGET) | \
  171. (1UL << VMW_BALLOON_CMD_LOCK) | \
  172. (1UL << VMW_BALLOON_CMD_UNLOCK) | \
  173. (1UL << VMW_BALLOON_CMD_BATCHED_LOCK) | \
  174. (1UL << VMW_BALLOON_CMD_BATCHED_UNLOCK) | \
  175. (1UL << VMW_BALLOON_CMD_BATCHED_2M_LOCK) | \
  176. (1UL << VMW_BALLOON_CMD_BATCHED_2M_UNLOCK))
  177. static const char * const vmballoon_cmd_names[] = {
  178. [VMW_BALLOON_CMD_START] = "start",
  179. [VMW_BALLOON_CMD_GET_TARGET] = "target",
  180. [VMW_BALLOON_CMD_LOCK] = "lock",
  181. [VMW_BALLOON_CMD_UNLOCK] = "unlock",
  182. [VMW_BALLOON_CMD_GUEST_ID] = "guestType",
  183. [VMW_BALLOON_CMD_BATCHED_LOCK] = "batchLock",
  184. [VMW_BALLOON_CMD_BATCHED_UNLOCK] = "batchUnlock",
  185. [VMW_BALLOON_CMD_BATCHED_2M_LOCK] = "2m-lock",
  186. [VMW_BALLOON_CMD_BATCHED_2M_UNLOCK] = "2m-unlock",
  187. [VMW_BALLOON_CMD_VMCI_DOORBELL_SET] = "doorbellSet"
  188. };
  189. enum vmballoon_stat_page {
  190. VMW_BALLOON_PAGE_STAT_ALLOC,
  191. VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  192. VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  193. VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  194. VMW_BALLOON_PAGE_STAT_FREE,
  195. VMW_BALLOON_PAGE_STAT_LAST = VMW_BALLOON_PAGE_STAT_FREE
  196. };
  197. #define VMW_BALLOON_PAGE_STAT_NUM (VMW_BALLOON_PAGE_STAT_LAST + 1)
  198. enum vmballoon_stat_general {
  199. VMW_BALLOON_STAT_TIMER,
  200. VMW_BALLOON_STAT_DOORBELL,
  201. VMW_BALLOON_STAT_RESET,
  202. VMW_BALLOON_STAT_SHRINK,
  203. VMW_BALLOON_STAT_SHRINK_FREE,
  204. VMW_BALLOON_STAT_LAST = VMW_BALLOON_STAT_SHRINK_FREE
  205. };
  206. #define VMW_BALLOON_STAT_NUM (VMW_BALLOON_STAT_LAST + 1)
  207. static DEFINE_STATIC_KEY_TRUE(vmw_balloon_batching);
  208. static DEFINE_STATIC_KEY_FALSE(balloon_stat_enabled);
  209. struct vmballoon_ctl {
  210. struct list_head pages;
  211. struct list_head refused_pages;
  212. struct list_head prealloc_pages;
  213. unsigned int n_refused_pages;
  214. unsigned int n_pages;
  215. enum vmballoon_page_size_type page_size;
  216. enum vmballoon_op op;
  217. };
  218. /**
  219. * struct vmballoon_batch_entry - a batch entry for lock or unlock.
  220. *
  221. * @status: the status of the operation, which is written by the hypervisor.
  222. * @reserved: reserved for future use. Must be set to zero.
  223. * @pfn: the physical frame number of the page to be locked or unlocked.
  224. */
  225. struct vmballoon_batch_entry {
  226. u64 status : 5;
  227. u64 reserved : PAGE_SHIFT - 5;
  228. u64 pfn : 52;
  229. } __packed;
  230. struct vmballoon {
  231. /**
  232. * @max_page_size: maximum supported page size for ballooning.
  233. *
  234. * Protected by @conf_sem
  235. */
  236. enum vmballoon_page_size_type max_page_size;
  237. /**
  238. * @size: balloon actual size in basic page size (frames).
  239. *
  240. * While we currently do not support size which is bigger than 32-bit,
  241. * in preparation for future support, use 64-bits.
  242. */
  243. atomic64_t size;
  244. /**
  245. * @target: balloon target size in basic page size (frames).
  246. *
  247. * We do not protect the target under the assumption that setting the
  248. * value is always done through a single write. If this assumption ever
  249. * breaks, we would have to use X_ONCE for accesses, and suffer the less
  250. * optimized code. Although we may read stale target value if multiple
  251. * accesses happen at once, the performance impact should be minor.
  252. */
  253. unsigned long target;
  254. /**
  255. * @reset_required: reset flag
  256. *
  257. * Setting this flag may introduce races, but the code is expected to
  258. * handle them gracefully. In the worst case, another operation will
  259. * fail as reset did not take place. Clearing the flag is done while
  260. * holding @conf_sem for write.
  261. */
  262. bool reset_required;
  263. /**
  264. * @capabilities: hypervisor balloon capabilities.
  265. *
  266. * Protected by @conf_sem.
  267. */
  268. unsigned long capabilities;
  269. /**
  270. * @batch_page: pointer to communication batch page.
  271. *
  272. * When batching is used, batch_page points to a page, which holds up to
  273. * %VMW_BALLOON_BATCH_MAX_PAGES entries for locking or unlocking.
  274. */
  275. struct vmballoon_batch_entry *batch_page;
  276. /**
  277. * @batch_max_pages: maximum pages that can be locked/unlocked.
  278. *
  279. * Indicates the number of pages that the hypervisor can lock or unlock
  280. * at once, according to whether batching is enabled. If batching is
  281. * disabled, only a single page can be locked/unlock on each operation.
  282. *
  283. * Protected by @conf_sem.
  284. */
  285. unsigned int batch_max_pages;
  286. /**
  287. * @page: page to be locked/unlocked by the hypervisor
  288. *
  289. * @page is only used when batching is disabled and a single page is
  290. * reclaimed on each iteration.
  291. *
  292. * Protected by @comm_lock.
  293. */
  294. struct page *page;
  295. /**
  296. * @shrink_timeout: timeout until the next inflation.
  297. *
  298. * After an shrink event, indicates the time in jiffies after which
  299. * inflation is allowed again. Can be written concurrently with reads,
  300. * so must use READ_ONCE/WRITE_ONCE when accessing.
  301. */
  302. unsigned long shrink_timeout;
  303. /* statistics */
  304. struct vmballoon_stats *stats;
  305. /**
  306. * @b_dev_info: balloon device information descriptor.
  307. */
  308. struct balloon_dev_info b_dev_info;
  309. struct delayed_work dwork;
  310. /**
  311. * @huge_pages - list of the inflated 2MB pages.
  312. *
  313. * Protected by @b_dev_info.pages_lock .
  314. */
  315. struct list_head huge_pages;
  316. /**
  317. * @vmci_doorbell.
  318. *
  319. * Protected by @conf_sem.
  320. */
  321. struct vmci_handle vmci_doorbell;
  322. /**
  323. * @conf_sem: semaphore to protect the configuration and the statistics.
  324. */
  325. struct rw_semaphore conf_sem;
  326. /**
  327. * @comm_lock: lock to protect the communication with the host.
  328. *
  329. * Lock ordering: @conf_sem -> @comm_lock .
  330. */
  331. spinlock_t comm_lock;
  332. /**
  333. * @shrinker: shrinker interface that is used to avoid over-inflation.
  334. */
  335. struct shrinker *shrinker;
  336. };
  337. static struct vmballoon balloon;
  338. struct vmballoon_stats {
  339. /* timer / doorbell operations */
  340. atomic64_t general_stat[VMW_BALLOON_STAT_NUM];
  341. /* allocation statistics for huge and small pages */
  342. atomic64_t
  343. page_stat[VMW_BALLOON_PAGE_STAT_NUM][VMW_BALLOON_NUM_PAGE_SIZES];
  344. /* Monitor operations: total operations, and failures */
  345. atomic64_t ops[VMW_BALLOON_CMD_NUM][VMW_BALLOON_OP_STAT_TYPES];
  346. };
  347. static inline bool is_vmballoon_stats_on(void)
  348. {
  349. return IS_ENABLED(CONFIG_DEBUG_FS) &&
  350. static_branch_unlikely(&balloon_stat_enabled);
  351. }
  352. static inline void vmballoon_stats_op_inc(struct vmballoon *b, unsigned int op,
  353. enum vmballoon_op_stat_type type)
  354. {
  355. if (is_vmballoon_stats_on())
  356. atomic64_inc(&b->stats->ops[op][type]);
  357. }
  358. static inline void vmballoon_stats_gen_inc(struct vmballoon *b,
  359. enum vmballoon_stat_general stat)
  360. {
  361. if (is_vmballoon_stats_on())
  362. atomic64_inc(&b->stats->general_stat[stat]);
  363. }
  364. static inline void vmballoon_stats_gen_add(struct vmballoon *b,
  365. enum vmballoon_stat_general stat,
  366. unsigned int val)
  367. {
  368. if (is_vmballoon_stats_on())
  369. atomic64_add(val, &b->stats->general_stat[stat]);
  370. }
  371. static inline void vmballoon_stats_page_inc(struct vmballoon *b,
  372. enum vmballoon_stat_page stat,
  373. enum vmballoon_page_size_type size)
  374. {
  375. if (is_vmballoon_stats_on())
  376. atomic64_inc(&b->stats->page_stat[stat][size]);
  377. }
  378. static inline void vmballoon_stats_page_add(struct vmballoon *b,
  379. enum vmballoon_stat_page stat,
  380. enum vmballoon_page_size_type size,
  381. unsigned int val)
  382. {
  383. if (is_vmballoon_stats_on())
  384. atomic64_add(val, &b->stats->page_stat[stat][size]);
  385. }
  386. static inline unsigned long
  387. __vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  388. unsigned long arg2, unsigned long *result)
  389. {
  390. unsigned long status, dummy1, dummy2, dummy3, local_result;
  391. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_STAT);
  392. asm volatile ("inl %%dx" :
  393. "=a"(status),
  394. "=c"(dummy1),
  395. "=d"(dummy2),
  396. "=b"(local_result),
  397. "=S"(dummy3) :
  398. "0"(VMW_BALLOON_HV_MAGIC),
  399. "1"(cmd),
  400. "2"(VMW_BALLOON_HV_PORT),
  401. "3"(arg1),
  402. "4"(arg2) :
  403. "memory");
  404. /* update the result if needed */
  405. if (result)
  406. *result = (cmd == VMW_BALLOON_CMD_START) ? dummy1 :
  407. local_result;
  408. /* update target when applicable */
  409. if (status == VMW_BALLOON_SUCCESS &&
  410. ((1ul << cmd) & VMW_BALLOON_CMD_WITH_TARGET_MASK))
  411. WRITE_ONCE(b->target, local_result);
  412. if (status != VMW_BALLOON_SUCCESS &&
  413. status != VMW_BALLOON_SUCCESS_WITH_CAPABILITIES) {
  414. vmballoon_stats_op_inc(b, cmd, VMW_BALLOON_OP_FAIL_STAT);
  415. pr_debug("%s: %s [0x%lx,0x%lx) failed, returned %ld\n",
  416. __func__, vmballoon_cmd_names[cmd], arg1, arg2,
  417. status);
  418. }
  419. /* mark reset required accordingly */
  420. if (status == VMW_BALLOON_ERROR_RESET)
  421. b->reset_required = true;
  422. return status;
  423. }
  424. static __always_inline unsigned long
  425. vmballoon_cmd(struct vmballoon *b, unsigned long cmd, unsigned long arg1,
  426. unsigned long arg2)
  427. {
  428. unsigned long dummy;
  429. return __vmballoon_cmd(b, cmd, arg1, arg2, &dummy);
  430. }
  431. /*
  432. * Send "start" command to the host, communicating supported version
  433. * of the protocol.
  434. */
  435. static int vmballoon_send_start(struct vmballoon *b, unsigned long req_caps)
  436. {
  437. unsigned long status, capabilities;
  438. status = __vmballoon_cmd(b, VMW_BALLOON_CMD_START, req_caps, 0,
  439. &capabilities);
  440. switch (status) {
  441. case VMW_BALLOON_SUCCESS_WITH_CAPABILITIES:
  442. b->capabilities = capabilities;
  443. break;
  444. case VMW_BALLOON_SUCCESS:
  445. b->capabilities = VMW_BALLOON_BASIC_CMDS;
  446. break;
  447. default:
  448. return -EIO;
  449. }
  450. /*
  451. * 2MB pages are only supported with batching. If batching is for some
  452. * reason disabled, do not use 2MB pages, since otherwise the legacy
  453. * mechanism is used with 2MB pages, causing a failure.
  454. */
  455. b->max_page_size = VMW_BALLOON_4K_PAGE;
  456. if ((b->capabilities & VMW_BALLOON_BATCHED_2M_CMDS) &&
  457. (b->capabilities & VMW_BALLOON_BATCHED_CMDS))
  458. b->max_page_size = VMW_BALLOON_2M_PAGE;
  459. return 0;
  460. }
  461. /**
  462. * vmballoon_send_guest_id - communicate guest type to the host.
  463. *
  464. * @b: pointer to the balloon.
  465. *
  466. * Communicate guest type to the host so that it can adjust ballooning
  467. * algorithm to the one most appropriate for the guest. This command
  468. * is normally issued after sending "start" command and is part of
  469. * standard reset sequence.
  470. *
  471. * Return: zero on success or appropriate error code.
  472. */
  473. static int vmballoon_send_guest_id(struct vmballoon *b)
  474. {
  475. unsigned long status;
  476. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GUEST_ID,
  477. VMW_BALLOON_GUEST_ID, 0);
  478. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  479. }
  480. /**
  481. * vmballoon_page_order() - return the order of the page
  482. * @page_size: the size of the page.
  483. *
  484. * Return: the allocation order.
  485. */
  486. static inline
  487. unsigned int vmballoon_page_order(enum vmballoon_page_size_type page_size)
  488. {
  489. return page_size == VMW_BALLOON_2M_PAGE ? VMW_BALLOON_2M_ORDER : 0;
  490. }
  491. /**
  492. * vmballoon_page_in_frames() - returns the number of frames in a page.
  493. * @page_size: the size of the page.
  494. *
  495. * Return: the number of 4k frames.
  496. */
  497. static inline unsigned int
  498. vmballoon_page_in_frames(enum vmballoon_page_size_type page_size)
  499. {
  500. return 1 << vmballoon_page_order(page_size);
  501. }
  502. /**
  503. * vmballoon_mark_page_offline() - mark a page as offline
  504. * @page: pointer for the page.
  505. * @page_size: the size of the page.
  506. */
  507. static void
  508. vmballoon_mark_page_offline(struct page *page,
  509. enum vmballoon_page_size_type page_size)
  510. {
  511. int i;
  512. for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
  513. __SetPageOffline(page + i);
  514. }
  515. /**
  516. * vmballoon_mark_page_online() - mark a page as online
  517. * @page: pointer for the page.
  518. * @page_size: the size of the page.
  519. */
  520. static void
  521. vmballoon_mark_page_online(struct page *page,
  522. enum vmballoon_page_size_type page_size)
  523. {
  524. int i;
  525. for (i = 0; i < vmballoon_page_in_frames(page_size); i++)
  526. __ClearPageOffline(page + i);
  527. }
  528. /**
  529. * vmballoon_send_get_target() - Retrieve desired balloon size from the host.
  530. *
  531. * @b: pointer to the balloon.
  532. *
  533. * Return: zero on success, EINVAL if limit does not fit in 32-bit, as required
  534. * by the host-guest protocol and EIO if an error occurred in communicating with
  535. * the host.
  536. */
  537. static int vmballoon_send_get_target(struct vmballoon *b)
  538. {
  539. unsigned long status;
  540. unsigned long limit;
  541. limit = totalram_pages();
  542. /* Ensure limit fits in 32-bits if 64-bit targets are not supported */
  543. if (!(b->capabilities & VMW_BALLOON_64_BIT_TARGET) &&
  544. limit != (u32)limit)
  545. return -EINVAL;
  546. status = vmballoon_cmd(b, VMW_BALLOON_CMD_GET_TARGET, limit, 0);
  547. return status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  548. }
  549. /**
  550. * vmballoon_alloc_page_list - allocates a list of pages.
  551. *
  552. * @b: pointer to the balloon.
  553. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  554. * @req_n_pages: the number of requested pages.
  555. *
  556. * Tries to allocate @req_n_pages. Add them to the list of balloon pages in
  557. * @ctl.pages and updates @ctl.n_pages to reflect the number of pages.
  558. *
  559. * Return: zero on success or error code otherwise.
  560. */
  561. static int vmballoon_alloc_page_list(struct vmballoon *b,
  562. struct vmballoon_ctl *ctl,
  563. unsigned int req_n_pages)
  564. {
  565. struct page *page;
  566. unsigned int i;
  567. for (i = 0; i < req_n_pages; i++) {
  568. /*
  569. * First check if we happen to have pages that were allocated
  570. * before. This happens when 2MB page rejected during inflation
  571. * by the hypervisor, and then split into 4KB pages.
  572. */
  573. if (!list_empty(&ctl->prealloc_pages)) {
  574. page = list_first_entry(&ctl->prealloc_pages,
  575. struct page, lru);
  576. list_del(&page->lru);
  577. } else {
  578. if (ctl->page_size == VMW_BALLOON_2M_PAGE)
  579. page = alloc_pages(__GFP_HIGHMEM|__GFP_NOWARN|
  580. __GFP_NOMEMALLOC, VMW_BALLOON_2M_ORDER);
  581. else
  582. page = balloon_page_alloc();
  583. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC,
  584. ctl->page_size);
  585. }
  586. if (page) {
  587. /* Success. Add the page to the list and continue. */
  588. list_add(&page->lru, &ctl->pages);
  589. continue;
  590. }
  591. /* Allocation failed. Update statistics and stop. */
  592. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_ALLOC_FAIL,
  593. ctl->page_size);
  594. break;
  595. }
  596. ctl->n_pages = i;
  597. return req_n_pages == ctl->n_pages ? 0 : -ENOMEM;
  598. }
  599. /**
  600. * vmballoon_handle_one_result - Handle lock/unlock result for a single page.
  601. *
  602. * @b: pointer for %struct vmballoon.
  603. * @page: pointer for the page whose result should be handled.
  604. * @page_size: size of the page.
  605. * @status: status of the operation as provided by the hypervisor.
  606. */
  607. static int vmballoon_handle_one_result(struct vmballoon *b, struct page *page,
  608. enum vmballoon_page_size_type page_size,
  609. unsigned long status)
  610. {
  611. /* On success do nothing. The page is already on the balloon list. */
  612. if (likely(status == VMW_BALLOON_SUCCESS))
  613. return 0;
  614. pr_debug("%s: failed comm pfn %lx status %lu page_size %s\n", __func__,
  615. page_to_pfn(page), status,
  616. vmballoon_page_size_names[page_size]);
  617. /* Error occurred */
  618. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC,
  619. page_size);
  620. return -EIO;
  621. }
  622. /**
  623. * vmballoon_status_page - returns the status of (un)lock operation
  624. *
  625. * @b: pointer to the balloon.
  626. * @idx: index for the page for which the operation is performed.
  627. * @p: pointer to where the page struct is returned.
  628. *
  629. * Following a lock or unlock operation, returns the status of the operation for
  630. * an individual page. Provides the page that the operation was performed on on
  631. * the @page argument.
  632. *
  633. * Returns: The status of a lock or unlock operation for an individual page.
  634. */
  635. static unsigned long vmballoon_status_page(struct vmballoon *b, int idx,
  636. struct page **p)
  637. {
  638. if (static_branch_likely(&vmw_balloon_batching)) {
  639. /* batching mode */
  640. *p = pfn_to_page(b->batch_page[idx].pfn);
  641. return b->batch_page[idx].status;
  642. }
  643. /* non-batching mode */
  644. *p = b->page;
  645. /*
  646. * If a failure occurs, the indication will be provided in the status
  647. * of the entire operation, which is considered before the individual
  648. * page status. So for non-batching mode, the indication is always of
  649. * success.
  650. */
  651. return VMW_BALLOON_SUCCESS;
  652. }
  653. /**
  654. * vmballoon_lock_op - notifies the host about inflated/deflated pages.
  655. * @b: pointer to the balloon.
  656. * @num_pages: number of inflated/deflated pages.
  657. * @page_size: size of the page.
  658. * @op: the type of operation (lock or unlock).
  659. *
  660. * Notify the host about page(s) that were ballooned (or removed from the
  661. * balloon) so that host can use it without fear that guest will need it (or
  662. * stop using them since the VM does). Host may reject some pages, we need to
  663. * check the return value and maybe submit a different page. The pages that are
  664. * inflated/deflated are pointed by @b->page.
  665. *
  666. * Return: result as provided by the hypervisor.
  667. */
  668. static unsigned long vmballoon_lock_op(struct vmballoon *b,
  669. unsigned int num_pages,
  670. enum vmballoon_page_size_type page_size,
  671. enum vmballoon_op op)
  672. {
  673. unsigned long cmd, pfn;
  674. lockdep_assert_held(&b->comm_lock);
  675. if (static_branch_likely(&vmw_balloon_batching)) {
  676. if (op == VMW_BALLOON_INFLATE)
  677. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  678. VMW_BALLOON_CMD_BATCHED_2M_LOCK :
  679. VMW_BALLOON_CMD_BATCHED_LOCK;
  680. else
  681. cmd = page_size == VMW_BALLOON_2M_PAGE ?
  682. VMW_BALLOON_CMD_BATCHED_2M_UNLOCK :
  683. VMW_BALLOON_CMD_BATCHED_UNLOCK;
  684. pfn = PHYS_PFN(virt_to_phys(b->batch_page));
  685. } else {
  686. cmd = op == VMW_BALLOON_INFLATE ? VMW_BALLOON_CMD_LOCK :
  687. VMW_BALLOON_CMD_UNLOCK;
  688. pfn = page_to_pfn(b->page);
  689. /* In non-batching mode, PFNs must fit in 32-bit */
  690. if (unlikely(pfn != (u32)pfn))
  691. return VMW_BALLOON_ERROR_PPN_INVALID;
  692. }
  693. return vmballoon_cmd(b, cmd, pfn, num_pages);
  694. }
  695. /**
  696. * vmballoon_add_page - adds a page towards lock/unlock operation.
  697. *
  698. * @b: pointer to the balloon.
  699. * @idx: index of the page to be ballooned in this batch.
  700. * @p: pointer to the page that is about to be ballooned.
  701. *
  702. * Adds the page to be ballooned. Must be called while holding @comm_lock.
  703. */
  704. static void vmballoon_add_page(struct vmballoon *b, unsigned int idx,
  705. struct page *p)
  706. {
  707. lockdep_assert_held(&b->comm_lock);
  708. if (static_branch_likely(&vmw_balloon_batching))
  709. b->batch_page[idx] = (struct vmballoon_batch_entry)
  710. { .pfn = page_to_pfn(p) };
  711. else
  712. b->page = p;
  713. }
  714. /**
  715. * vmballoon_lock - lock or unlock a batch of pages.
  716. *
  717. * @b: pointer to the balloon.
  718. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  719. *
  720. * Notifies the host of about ballooned pages (after inflation or deflation,
  721. * according to @ctl). If the host rejects the page put it on the
  722. * @ctl refuse list. These refused page are then released when moving to the
  723. * next size of pages.
  724. *
  725. * Note that we neither free any @page here nor put them back on the ballooned
  726. * pages list. Instead we queue it for later processing. We do that for several
  727. * reasons. First, we do not want to free the page under the lock. Second, it
  728. * allows us to unify the handling of lock and unlock. In the inflate case, the
  729. * caller will check if there are too many refused pages and release them.
  730. * Although it is not identical to the past behavior, it should not affect
  731. * performance.
  732. */
  733. static int vmballoon_lock(struct vmballoon *b, struct vmballoon_ctl *ctl)
  734. {
  735. unsigned long batch_status;
  736. struct page *page;
  737. unsigned int i, num_pages;
  738. num_pages = ctl->n_pages;
  739. if (num_pages == 0)
  740. return 0;
  741. /* communication with the host is done under the communication lock */
  742. spin_lock(&b->comm_lock);
  743. i = 0;
  744. list_for_each_entry(page, &ctl->pages, lru)
  745. vmballoon_add_page(b, i++, page);
  746. batch_status = vmballoon_lock_op(b, ctl->n_pages, ctl->page_size,
  747. ctl->op);
  748. /*
  749. * Iterate over the pages in the provided list. Since we are changing
  750. * @ctl->n_pages we are saving the original value in @num_pages and
  751. * use this value to bound the loop.
  752. */
  753. for (i = 0; i < num_pages; i++) {
  754. unsigned long status;
  755. status = vmballoon_status_page(b, i, &page);
  756. /*
  757. * Failure of the whole batch overrides a single operation
  758. * results.
  759. */
  760. if (batch_status != VMW_BALLOON_SUCCESS)
  761. status = batch_status;
  762. /* Continue if no error happened */
  763. if (!vmballoon_handle_one_result(b, page, ctl->page_size,
  764. status))
  765. continue;
  766. /*
  767. * Error happened. Move the pages to the refused list and update
  768. * the pages number.
  769. */
  770. list_move(&page->lru, &ctl->refused_pages);
  771. ctl->n_pages--;
  772. ctl->n_refused_pages++;
  773. }
  774. spin_unlock(&b->comm_lock);
  775. return batch_status == VMW_BALLOON_SUCCESS ? 0 : -EIO;
  776. }
  777. /**
  778. * vmballoon_release_page_list() - Releases a page list
  779. *
  780. * @page_list: list of pages to release.
  781. * @n_pages: pointer to the number of pages.
  782. * @page_size: whether the pages in the list are 2MB (or else 4KB).
  783. *
  784. * Releases the list of pages and zeros the number of pages.
  785. */
  786. static void vmballoon_release_page_list(struct list_head *page_list,
  787. int *n_pages,
  788. enum vmballoon_page_size_type page_size)
  789. {
  790. struct page *page, *tmp;
  791. list_for_each_entry_safe(page, tmp, page_list, lru) {
  792. list_del(&page->lru);
  793. __free_pages(page, vmballoon_page_order(page_size));
  794. }
  795. if (n_pages)
  796. *n_pages = 0;
  797. }
  798. /*
  799. * Release pages that were allocated while attempting to inflate the
  800. * balloon but were refused by the host for one reason or another.
  801. */
  802. static void vmballoon_release_refused_pages(struct vmballoon *b,
  803. struct vmballoon_ctl *ctl)
  804. {
  805. vmballoon_stats_page_inc(b, VMW_BALLOON_PAGE_STAT_REFUSED_FREE,
  806. ctl->page_size);
  807. vmballoon_release_page_list(&ctl->refused_pages, &ctl->n_refused_pages,
  808. ctl->page_size);
  809. }
  810. /**
  811. * vmballoon_change - retrieve the required balloon change
  812. *
  813. * @b: pointer for the balloon.
  814. *
  815. * Return: the required change for the balloon size. A positive number
  816. * indicates inflation, a negative number indicates a deflation.
  817. */
  818. static int64_t vmballoon_change(struct vmballoon *b)
  819. {
  820. int64_t size, target;
  821. size = atomic64_read(&b->size);
  822. target = READ_ONCE(b->target);
  823. /*
  824. * We must cast first because of int sizes
  825. * Otherwise we might get huge positives instead of negatives
  826. */
  827. if (b->reset_required)
  828. return 0;
  829. /* consider a 2MB slack on deflate, unless the balloon is emptied */
  830. if (target < size && target != 0 &&
  831. size - target < vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE))
  832. return 0;
  833. /* If an out-of-memory recently occurred, inflation is disallowed. */
  834. if (target > size && time_before(jiffies, READ_ONCE(b->shrink_timeout)))
  835. return 0;
  836. return target - size;
  837. }
  838. /**
  839. * vmballoon_enqueue_page_list() - Enqueues list of pages after inflation.
  840. *
  841. * @b: pointer to balloon.
  842. * @pages: list of pages to enqueue.
  843. * @n_pages: pointer to number of pages in list. The value is zeroed.
  844. * @page_size: whether the pages are 2MB or 4KB pages.
  845. *
  846. * Enqueues the provides list of pages in the ballooned page list, clears the
  847. * list and zeroes the number of pages that was provided.
  848. */
  849. static void vmballoon_enqueue_page_list(struct vmballoon *b,
  850. struct list_head *pages,
  851. unsigned int *n_pages,
  852. enum vmballoon_page_size_type page_size)
  853. {
  854. unsigned long flags;
  855. struct page *page;
  856. if (page_size == VMW_BALLOON_4K_PAGE) {
  857. balloon_page_list_enqueue(&b->b_dev_info, pages);
  858. } else {
  859. /*
  860. * Keep the huge pages in a local list which is not available
  861. * for the balloon compaction mechanism.
  862. */
  863. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  864. list_for_each_entry(page, pages, lru) {
  865. vmballoon_mark_page_offline(page, VMW_BALLOON_2M_PAGE);
  866. }
  867. list_splice_init(pages, &b->huge_pages);
  868. __count_vm_events(BALLOON_INFLATE, *n_pages *
  869. vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
  870. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  871. }
  872. *n_pages = 0;
  873. }
  874. /**
  875. * vmballoon_dequeue_page_list() - Dequeues page lists for deflation.
  876. *
  877. * @b: pointer to balloon.
  878. * @pages: list of pages to enqueue.
  879. * @n_pages: pointer to number of pages in list. The value is zeroed.
  880. * @page_size: whether the pages are 2MB or 4KB pages.
  881. * @n_req_pages: the number of requested pages.
  882. *
  883. * Dequeues the number of requested pages from the balloon for deflation. The
  884. * number of dequeued pages may be lower, if not enough pages in the requested
  885. * size are available.
  886. */
  887. static void vmballoon_dequeue_page_list(struct vmballoon *b,
  888. struct list_head *pages,
  889. unsigned int *n_pages,
  890. enum vmballoon_page_size_type page_size,
  891. unsigned int n_req_pages)
  892. {
  893. struct page *page, *tmp;
  894. unsigned int i = 0;
  895. unsigned long flags;
  896. /* In the case of 4k pages, use the compaction infrastructure */
  897. if (page_size == VMW_BALLOON_4K_PAGE) {
  898. *n_pages = balloon_page_list_dequeue(&b->b_dev_info, pages,
  899. n_req_pages);
  900. return;
  901. }
  902. /* 2MB pages */
  903. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  904. list_for_each_entry_safe(page, tmp, &b->huge_pages, lru) {
  905. vmballoon_mark_page_online(page, VMW_BALLOON_2M_PAGE);
  906. list_move(&page->lru, pages);
  907. if (++i == n_req_pages)
  908. break;
  909. }
  910. __count_vm_events(BALLOON_DEFLATE,
  911. i * vmballoon_page_in_frames(VMW_BALLOON_2M_PAGE));
  912. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  913. *n_pages = i;
  914. }
  915. /**
  916. * vmballoon_split_refused_pages() - Split the 2MB refused pages to 4k.
  917. *
  918. * If inflation of 2MB pages was denied by the hypervisor, it is likely to be
  919. * due to one or few 4KB pages. These 2MB pages may keep being allocated and
  920. * then being refused. To prevent this case, this function splits the refused
  921. * pages into 4KB pages and adds them into @prealloc_pages list.
  922. *
  923. * @ctl: pointer for the %struct vmballoon_ctl, which defines the operation.
  924. */
  925. static void vmballoon_split_refused_pages(struct vmballoon_ctl *ctl)
  926. {
  927. struct page *page, *tmp;
  928. unsigned int i, order;
  929. order = vmballoon_page_order(ctl->page_size);
  930. list_for_each_entry_safe(page, tmp, &ctl->refused_pages, lru) {
  931. list_del(&page->lru);
  932. split_page(page, order);
  933. for (i = 0; i < (1 << order); i++)
  934. list_add(&page[i].lru, &ctl->prealloc_pages);
  935. }
  936. ctl->n_refused_pages = 0;
  937. }
  938. /**
  939. * vmballoon_inflate() - Inflate the balloon towards its target size.
  940. *
  941. * @b: pointer to the balloon.
  942. */
  943. static void vmballoon_inflate(struct vmballoon *b)
  944. {
  945. int64_t to_inflate_frames;
  946. struct vmballoon_ctl ctl = {
  947. .pages = LIST_HEAD_INIT(ctl.pages),
  948. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  949. .prealloc_pages = LIST_HEAD_INIT(ctl.prealloc_pages),
  950. .page_size = b->max_page_size,
  951. .op = VMW_BALLOON_INFLATE
  952. };
  953. while ((to_inflate_frames = vmballoon_change(b)) > 0) {
  954. unsigned int to_inflate_pages, page_in_frames;
  955. int alloc_error, lock_error = 0;
  956. VM_BUG_ON(!list_empty(&ctl.pages));
  957. VM_BUG_ON(ctl.n_pages != 0);
  958. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  959. to_inflate_pages = min_t(unsigned long, b->batch_max_pages,
  960. DIV_ROUND_UP_ULL(to_inflate_frames,
  961. page_in_frames));
  962. /* Start by allocating */
  963. alloc_error = vmballoon_alloc_page_list(b, &ctl,
  964. to_inflate_pages);
  965. /* Actually lock the pages by telling the hypervisor */
  966. lock_error = vmballoon_lock(b, &ctl);
  967. /*
  968. * If an error indicates that something serious went wrong,
  969. * stop the inflation.
  970. */
  971. if (lock_error)
  972. break;
  973. /* Update the balloon size */
  974. atomic64_add(ctl.n_pages * page_in_frames, &b->size);
  975. vmballoon_enqueue_page_list(b, &ctl.pages, &ctl.n_pages,
  976. ctl.page_size);
  977. /*
  978. * If allocation failed or the number of refused pages exceeds
  979. * the maximum allowed, move to the next page size.
  980. */
  981. if (alloc_error ||
  982. ctl.n_refused_pages >= VMW_BALLOON_MAX_REFUSED) {
  983. if (ctl.page_size == VMW_BALLOON_4K_PAGE)
  984. break;
  985. /*
  986. * Split the refused pages to 4k. This will also empty
  987. * the refused pages list.
  988. */
  989. vmballoon_split_refused_pages(&ctl);
  990. ctl.page_size--;
  991. }
  992. cond_resched();
  993. }
  994. /*
  995. * Release pages that were allocated while attempting to inflate the
  996. * balloon but were refused by the host for one reason or another,
  997. * and update the statistics.
  998. */
  999. if (ctl.n_refused_pages != 0)
  1000. vmballoon_release_refused_pages(b, &ctl);
  1001. vmballoon_release_page_list(&ctl.prealloc_pages, NULL, ctl.page_size);
  1002. }
  1003. /**
  1004. * vmballoon_deflate() - Decrease the size of the balloon.
  1005. *
  1006. * @b: pointer to the balloon
  1007. * @n_frames: the number of frames to deflate. If zero, automatically
  1008. * calculated according to the target size.
  1009. * @coordinated: whether to coordinate with the host
  1010. *
  1011. * Decrease the size of the balloon allowing guest to use more memory.
  1012. *
  1013. * Return: The number of deflated frames (i.e., basic page size units)
  1014. */
  1015. static unsigned long vmballoon_deflate(struct vmballoon *b, uint64_t n_frames,
  1016. bool coordinated)
  1017. {
  1018. unsigned long deflated_frames = 0;
  1019. unsigned long tried_frames = 0;
  1020. struct vmballoon_ctl ctl = {
  1021. .pages = LIST_HEAD_INIT(ctl.pages),
  1022. .refused_pages = LIST_HEAD_INIT(ctl.refused_pages),
  1023. .page_size = VMW_BALLOON_4K_PAGE,
  1024. .op = VMW_BALLOON_DEFLATE
  1025. };
  1026. /* free pages to reach target */
  1027. while (true) {
  1028. unsigned int to_deflate_pages, n_unlocked_frames;
  1029. unsigned int page_in_frames;
  1030. int64_t to_deflate_frames;
  1031. bool deflated_all;
  1032. page_in_frames = vmballoon_page_in_frames(ctl.page_size);
  1033. VM_BUG_ON(!list_empty(&ctl.pages));
  1034. VM_BUG_ON(ctl.n_pages);
  1035. VM_BUG_ON(!list_empty(&ctl.refused_pages));
  1036. VM_BUG_ON(ctl.n_refused_pages);
  1037. /*
  1038. * If we were requested a specific number of frames, we try to
  1039. * deflate this number of frames. Otherwise, deflation is
  1040. * performed according to the target and balloon size.
  1041. */
  1042. to_deflate_frames = n_frames ? n_frames - tried_frames :
  1043. -vmballoon_change(b);
  1044. /* break if no work to do */
  1045. if (to_deflate_frames <= 0)
  1046. break;
  1047. /*
  1048. * Calculate the number of frames based on current page size,
  1049. * but limit the deflated frames to a single chunk
  1050. */
  1051. to_deflate_pages = min_t(unsigned long, b->batch_max_pages,
  1052. DIV_ROUND_UP_ULL(to_deflate_frames,
  1053. page_in_frames));
  1054. /* First take the pages from the balloon pages. */
  1055. vmballoon_dequeue_page_list(b, &ctl.pages, &ctl.n_pages,
  1056. ctl.page_size, to_deflate_pages);
  1057. /*
  1058. * Before pages are moving to the refused list, count their
  1059. * frames as frames that we tried to deflate.
  1060. */
  1061. tried_frames += ctl.n_pages * page_in_frames;
  1062. /*
  1063. * Unlock the pages by communicating with the hypervisor if the
  1064. * communication is coordinated (i.e., not pop). We ignore the
  1065. * return code. Instead we check if all the pages we manage to
  1066. * unlock all the pages. If we failed, we will move to the next
  1067. * page size, and would eventually try again later.
  1068. */
  1069. if (coordinated)
  1070. vmballoon_lock(b, &ctl);
  1071. /*
  1072. * Check if we deflated enough. We will move to the next page
  1073. * size if we did not manage to do so. This calculation takes
  1074. * place now, as once the pages are released, the number of
  1075. * pages is zeroed.
  1076. */
  1077. deflated_all = (ctl.n_pages == to_deflate_pages);
  1078. /* Update local and global counters */
  1079. n_unlocked_frames = ctl.n_pages * page_in_frames;
  1080. atomic64_sub(n_unlocked_frames, &b->size);
  1081. deflated_frames += n_unlocked_frames;
  1082. vmballoon_stats_page_add(b, VMW_BALLOON_PAGE_STAT_FREE,
  1083. ctl.page_size, ctl.n_pages);
  1084. /* free the ballooned pages */
  1085. vmballoon_release_page_list(&ctl.pages, &ctl.n_pages,
  1086. ctl.page_size);
  1087. /* Return the refused pages to the ballooned list. */
  1088. vmballoon_enqueue_page_list(b, &ctl.refused_pages,
  1089. &ctl.n_refused_pages,
  1090. ctl.page_size);
  1091. /* If we failed to unlock all the pages, move to next size. */
  1092. if (!deflated_all) {
  1093. if (ctl.page_size == b->max_page_size)
  1094. break;
  1095. ctl.page_size++;
  1096. }
  1097. cond_resched();
  1098. }
  1099. return deflated_frames;
  1100. }
  1101. /**
  1102. * vmballoon_deinit_batching - disables batching mode.
  1103. *
  1104. * @b: pointer to &struct vmballoon.
  1105. *
  1106. * Disables batching, by deallocating the page for communication with the
  1107. * hypervisor and disabling the static key to indicate that batching is off.
  1108. */
  1109. static void vmballoon_deinit_batching(struct vmballoon *b)
  1110. {
  1111. free_page((unsigned long)b->batch_page);
  1112. b->batch_page = NULL;
  1113. static_branch_disable(&vmw_balloon_batching);
  1114. b->batch_max_pages = 1;
  1115. }
  1116. /**
  1117. * vmballoon_init_batching - enable batching mode.
  1118. *
  1119. * @b: pointer to &struct vmballoon.
  1120. *
  1121. * Enables batching, by allocating a page for communication with the hypervisor
  1122. * and enabling the static_key to use batching.
  1123. *
  1124. * Return: zero on success or an appropriate error-code.
  1125. */
  1126. static int vmballoon_init_batching(struct vmballoon *b)
  1127. {
  1128. struct page *page;
  1129. page = alloc_page(GFP_KERNEL | __GFP_ZERO);
  1130. if (!page)
  1131. return -ENOMEM;
  1132. b->batch_page = page_address(page);
  1133. b->batch_max_pages = PAGE_SIZE / sizeof(struct vmballoon_batch_entry);
  1134. static_branch_enable(&vmw_balloon_batching);
  1135. return 0;
  1136. }
  1137. /*
  1138. * Receive notification and resize balloon
  1139. */
  1140. static void vmballoon_doorbell(void *client_data)
  1141. {
  1142. struct vmballoon *b = client_data;
  1143. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_DOORBELL);
  1144. mod_delayed_work(system_freezable_wq, &b->dwork, 0);
  1145. }
  1146. /*
  1147. * Clean up vmci doorbell
  1148. */
  1149. static void vmballoon_vmci_cleanup(struct vmballoon *b)
  1150. {
  1151. vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1152. VMCI_INVALID_ID, VMCI_INVALID_ID);
  1153. if (!vmci_handle_is_invalid(b->vmci_doorbell)) {
  1154. vmci_doorbell_destroy(b->vmci_doorbell);
  1155. b->vmci_doorbell = VMCI_INVALID_HANDLE;
  1156. }
  1157. }
  1158. /**
  1159. * vmballoon_vmci_init - Initialize vmci doorbell.
  1160. *
  1161. * @b: pointer to the balloon.
  1162. *
  1163. * Return: zero on success or when wakeup command not supported. Error-code
  1164. * otherwise.
  1165. *
  1166. * Initialize vmci doorbell, to get notified as soon as balloon changes.
  1167. */
  1168. static int vmballoon_vmci_init(struct vmballoon *b)
  1169. {
  1170. unsigned long error;
  1171. if ((b->capabilities & VMW_BALLOON_SIGNALLED_WAKEUP_CMD) == 0)
  1172. return 0;
  1173. error = vmci_doorbell_create(&b->vmci_doorbell, VMCI_FLAG_DELAYED_CB,
  1174. VMCI_PRIVILEGE_FLAG_RESTRICTED,
  1175. vmballoon_doorbell, b);
  1176. if (error != VMCI_SUCCESS)
  1177. goto fail;
  1178. error = __vmballoon_cmd(b, VMW_BALLOON_CMD_VMCI_DOORBELL_SET,
  1179. b->vmci_doorbell.context,
  1180. b->vmci_doorbell.resource, NULL);
  1181. if (error != VMW_BALLOON_SUCCESS)
  1182. goto fail;
  1183. return 0;
  1184. fail:
  1185. vmballoon_vmci_cleanup(b);
  1186. return -EIO;
  1187. }
  1188. /**
  1189. * vmballoon_pop - Quickly release all pages allocate for the balloon.
  1190. *
  1191. * @b: pointer to the balloon.
  1192. *
  1193. * This function is called when host decides to "reset" balloon for one reason
  1194. * or another. Unlike normal "deflate" we do not (shall not) notify host of the
  1195. * pages being released.
  1196. */
  1197. static void vmballoon_pop(struct vmballoon *b)
  1198. {
  1199. unsigned long size;
  1200. while ((size = atomic64_read(&b->size)))
  1201. vmballoon_deflate(b, size, false);
  1202. }
  1203. /*
  1204. * Perform standard reset sequence by popping the balloon (in case it
  1205. * is not empty) and then restarting protocol. This operation normally
  1206. * happens when host responds with VMW_BALLOON_ERROR_RESET to a command.
  1207. */
  1208. static void vmballoon_reset(struct vmballoon *b)
  1209. {
  1210. int error;
  1211. down_write(&b->conf_sem);
  1212. vmballoon_vmci_cleanup(b);
  1213. /* free all pages, skipping monitor unlock */
  1214. vmballoon_pop(b);
  1215. if (vmballoon_send_start(b, VMW_BALLOON_CAPABILITIES))
  1216. goto unlock;
  1217. if ((b->capabilities & VMW_BALLOON_BATCHED_CMDS) != 0) {
  1218. if (vmballoon_init_batching(b)) {
  1219. /*
  1220. * We failed to initialize batching, inform the monitor
  1221. * about it by sending a null capability.
  1222. *
  1223. * The guest will retry in one second.
  1224. */
  1225. vmballoon_send_start(b, 0);
  1226. goto unlock;
  1227. }
  1228. } else if ((b->capabilities & VMW_BALLOON_BASIC_CMDS) != 0) {
  1229. vmballoon_deinit_batching(b);
  1230. }
  1231. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_RESET);
  1232. b->reset_required = false;
  1233. error = vmballoon_vmci_init(b);
  1234. if (error)
  1235. pr_err_once("failed to initialize vmci doorbell\n");
  1236. if (vmballoon_send_guest_id(b))
  1237. pr_err_once("failed to send guest ID to the host\n");
  1238. unlock:
  1239. up_write(&b->conf_sem);
  1240. }
  1241. /**
  1242. * vmballoon_work - periodic balloon worker for reset, inflation and deflation.
  1243. *
  1244. * @work: pointer to the &work_struct which is provided by the workqueue.
  1245. *
  1246. * Resets the protocol if needed, gets the new size and adjusts balloon as
  1247. * needed. Repeat in 1 sec.
  1248. */
  1249. static void vmballoon_work(struct work_struct *work)
  1250. {
  1251. struct delayed_work *dwork = to_delayed_work(work);
  1252. struct vmballoon *b = container_of(dwork, struct vmballoon, dwork);
  1253. int64_t change = 0;
  1254. if (b->reset_required)
  1255. vmballoon_reset(b);
  1256. down_read(&b->conf_sem);
  1257. /*
  1258. * Update the stats while holding the semaphore to ensure that
  1259. * @stats_enabled is consistent with whether the stats are actually
  1260. * enabled
  1261. */
  1262. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_TIMER);
  1263. if (!vmballoon_send_get_target(b))
  1264. change = vmballoon_change(b);
  1265. if (change != 0) {
  1266. pr_debug("%s - size: %llu, target %lu\n", __func__,
  1267. atomic64_read(&b->size), READ_ONCE(b->target));
  1268. if (change > 0)
  1269. vmballoon_inflate(b);
  1270. else /* (change < 0) */
  1271. vmballoon_deflate(b, 0, true);
  1272. }
  1273. up_read(&b->conf_sem);
  1274. /*
  1275. * We are using a freezable workqueue so that balloon operations are
  1276. * stopped while the system transitions to/from sleep/hibernation.
  1277. */
  1278. queue_delayed_work(system_freezable_wq,
  1279. dwork, round_jiffies_relative(HZ));
  1280. }
  1281. /**
  1282. * vmballoon_shrinker_scan() - deflate the balloon due to memory pressure.
  1283. * @shrinker: pointer to the balloon shrinker.
  1284. * @sc: page reclaim information.
  1285. *
  1286. * Returns: number of pages that were freed during deflation.
  1287. */
  1288. static unsigned long vmballoon_shrinker_scan(struct shrinker *shrinker,
  1289. struct shrink_control *sc)
  1290. {
  1291. struct vmballoon *b = &balloon;
  1292. unsigned long deflated_frames;
  1293. pr_debug("%s - size: %llu", __func__, atomic64_read(&b->size));
  1294. vmballoon_stats_gen_inc(b, VMW_BALLOON_STAT_SHRINK);
  1295. /*
  1296. * If the lock is also contended for read, we cannot easily reclaim and
  1297. * we bail out.
  1298. */
  1299. if (!down_read_trylock(&b->conf_sem))
  1300. return 0;
  1301. deflated_frames = vmballoon_deflate(b, sc->nr_to_scan, true);
  1302. vmballoon_stats_gen_add(b, VMW_BALLOON_STAT_SHRINK_FREE,
  1303. deflated_frames);
  1304. /*
  1305. * Delay future inflation for some time to mitigate the situations in
  1306. * which balloon continuously grows and shrinks. Use WRITE_ONCE() since
  1307. * the access is asynchronous.
  1308. */
  1309. WRITE_ONCE(b->shrink_timeout, jiffies + HZ * VMBALLOON_SHRINK_DELAY);
  1310. up_read(&b->conf_sem);
  1311. return deflated_frames;
  1312. }
  1313. /**
  1314. * vmballoon_shrinker_count() - return the number of ballooned pages.
  1315. * @shrinker: pointer to the balloon shrinker.
  1316. * @sc: page reclaim information.
  1317. *
  1318. * Returns: number of 4k pages that are allocated for the balloon and can
  1319. * therefore be reclaimed under pressure.
  1320. */
  1321. static unsigned long vmballoon_shrinker_count(struct shrinker *shrinker,
  1322. struct shrink_control *sc)
  1323. {
  1324. struct vmballoon *b = &balloon;
  1325. return atomic64_read(&b->size);
  1326. }
  1327. static void vmballoon_unregister_shrinker(struct vmballoon *b)
  1328. {
  1329. shrinker_free(b->shrinker);
  1330. b->shrinker = NULL;
  1331. }
  1332. static int vmballoon_register_shrinker(struct vmballoon *b)
  1333. {
  1334. /* Do nothing if the shrinker is not enabled */
  1335. if (!vmwballoon_shrinker_enable)
  1336. return 0;
  1337. b->shrinker = shrinker_alloc(0, "vmw-balloon");
  1338. if (!b->shrinker)
  1339. return -ENOMEM;
  1340. b->shrinker->scan_objects = vmballoon_shrinker_scan;
  1341. b->shrinker->count_objects = vmballoon_shrinker_count;
  1342. b->shrinker->private_data = b;
  1343. shrinker_register(b->shrinker);
  1344. return 0;
  1345. }
  1346. /*
  1347. * DEBUGFS Interface
  1348. */
  1349. #ifdef CONFIG_DEBUG_FS
  1350. static const char * const vmballoon_stat_page_names[] = {
  1351. [VMW_BALLOON_PAGE_STAT_ALLOC] = "alloc",
  1352. [VMW_BALLOON_PAGE_STAT_ALLOC_FAIL] = "allocFail",
  1353. [VMW_BALLOON_PAGE_STAT_REFUSED_ALLOC] = "errAlloc",
  1354. [VMW_BALLOON_PAGE_STAT_REFUSED_FREE] = "errFree",
  1355. [VMW_BALLOON_PAGE_STAT_FREE] = "free"
  1356. };
  1357. static const char * const vmballoon_stat_names[] = {
  1358. [VMW_BALLOON_STAT_TIMER] = "timer",
  1359. [VMW_BALLOON_STAT_DOORBELL] = "doorbell",
  1360. [VMW_BALLOON_STAT_RESET] = "reset",
  1361. [VMW_BALLOON_STAT_SHRINK] = "shrink",
  1362. [VMW_BALLOON_STAT_SHRINK_FREE] = "shrinkFree"
  1363. };
  1364. static int vmballoon_enable_stats(struct vmballoon *b)
  1365. {
  1366. int r = 0;
  1367. down_write(&b->conf_sem);
  1368. /* did we somehow race with another reader which enabled stats? */
  1369. if (b->stats)
  1370. goto out;
  1371. b->stats = kzalloc(sizeof(*b->stats), GFP_KERNEL);
  1372. if (!b->stats) {
  1373. /* allocation failed */
  1374. r = -ENOMEM;
  1375. goto out;
  1376. }
  1377. static_key_enable(&balloon_stat_enabled.key);
  1378. out:
  1379. up_write(&b->conf_sem);
  1380. return r;
  1381. }
  1382. /**
  1383. * vmballoon_debug_show - shows statistics of balloon operations.
  1384. * @f: pointer to the &struct seq_file.
  1385. * @offset: ignored.
  1386. *
  1387. * Provides the statistics that can be accessed in vmmemctl in the debugfs.
  1388. * To avoid the overhead - mainly that of memory - of collecting the statistics,
  1389. * we only collect statistics after the first time the counters are read.
  1390. *
  1391. * Return: zero on success or an error code.
  1392. */
  1393. static int vmballoon_debug_show(struct seq_file *f, void *offset)
  1394. {
  1395. struct vmballoon *b = f->private;
  1396. int i, j;
  1397. /* enables stats if they are disabled */
  1398. if (!b->stats) {
  1399. int r = vmballoon_enable_stats(b);
  1400. if (r)
  1401. return r;
  1402. }
  1403. /* format capabilities info */
  1404. seq_printf(f, "%-22s: %#16x\n", "balloon capabilities",
  1405. VMW_BALLOON_CAPABILITIES);
  1406. seq_printf(f, "%-22s: %#16lx\n", "used capabilities", b->capabilities);
  1407. seq_printf(f, "%-22s: %16s\n", "is resetting",
  1408. b->reset_required ? "y" : "n");
  1409. /* format size info */
  1410. seq_printf(f, "%-22s: %16lu\n", "target", READ_ONCE(b->target));
  1411. seq_printf(f, "%-22s: %16llu\n", "current", atomic64_read(&b->size));
  1412. for (i = 0; i < VMW_BALLOON_CMD_NUM; i++) {
  1413. if (vmballoon_cmd_names[i] == NULL)
  1414. continue;
  1415. seq_printf(f, "%-22s: %16llu (%llu failed)\n",
  1416. vmballoon_cmd_names[i],
  1417. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_STAT]),
  1418. atomic64_read(&b->stats->ops[i][VMW_BALLOON_OP_FAIL_STAT]));
  1419. }
  1420. for (i = 0; i < VMW_BALLOON_STAT_NUM; i++)
  1421. seq_printf(f, "%-22s: %16llu\n",
  1422. vmballoon_stat_names[i],
  1423. atomic64_read(&b->stats->general_stat[i]));
  1424. for (i = 0; i < VMW_BALLOON_PAGE_STAT_NUM; i++) {
  1425. for (j = 0; j < VMW_BALLOON_NUM_PAGE_SIZES; j++)
  1426. seq_printf(f, "%-18s(%s): %16llu\n",
  1427. vmballoon_stat_page_names[i],
  1428. vmballoon_page_size_names[j],
  1429. atomic64_read(&b->stats->page_stat[i][j]));
  1430. }
  1431. return 0;
  1432. }
  1433. DEFINE_SHOW_ATTRIBUTE(vmballoon_debug);
  1434. static void __init vmballoon_debugfs_init(struct vmballoon *b)
  1435. {
  1436. debugfs_create_file("vmmemctl", S_IRUGO, NULL, b,
  1437. &vmballoon_debug_fops);
  1438. }
  1439. static void __exit vmballoon_debugfs_exit(struct vmballoon *b)
  1440. {
  1441. static_key_disable(&balloon_stat_enabled.key);
  1442. debugfs_lookup_and_remove("vmmemctl", NULL);
  1443. kfree(b->stats);
  1444. b->stats = NULL;
  1445. }
  1446. #else
  1447. static inline void vmballoon_debugfs_init(struct vmballoon *b)
  1448. {
  1449. }
  1450. static inline void vmballoon_debugfs_exit(struct vmballoon *b)
  1451. {
  1452. }
  1453. #endif /* CONFIG_DEBUG_FS */
  1454. #ifdef CONFIG_BALLOON_COMPACTION
  1455. /**
  1456. * vmballoon_migratepage() - migrates a balloon page.
  1457. * @b_dev_info: balloon device information descriptor.
  1458. * @newpage: the page to which @page should be migrated.
  1459. * @page: a ballooned page that should be migrated.
  1460. * @mode: migration mode, ignored.
  1461. *
  1462. * This function is really open-coded, but that is according to the interface
  1463. * that balloon_compaction provides.
  1464. *
  1465. * Return: zero on success, -EAGAIN when migration cannot be performed
  1466. * momentarily, and -EBUSY if migration failed and should be retried
  1467. * with that specific page.
  1468. */
  1469. static int vmballoon_migratepage(struct balloon_dev_info *b_dev_info,
  1470. struct page *newpage, struct page *page,
  1471. enum migrate_mode mode)
  1472. {
  1473. unsigned long status, flags;
  1474. struct vmballoon *b;
  1475. int ret;
  1476. b = container_of(b_dev_info, struct vmballoon, b_dev_info);
  1477. /*
  1478. * If the semaphore is taken, there is ongoing configuration change
  1479. * (i.e., balloon reset), so try again.
  1480. */
  1481. if (!down_read_trylock(&b->conf_sem))
  1482. return -EAGAIN;
  1483. spin_lock(&b->comm_lock);
  1484. /*
  1485. * We must start by deflating and not inflating, as otherwise the
  1486. * hypervisor may tell us that it has enough memory and the new page is
  1487. * not needed. Since the old page is isolated, we cannot use the list
  1488. * interface to unlock it, as the LRU field is used for isolation.
  1489. * Instead, we use the native interface directly.
  1490. */
  1491. vmballoon_add_page(b, 0, page);
  1492. status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
  1493. VMW_BALLOON_DEFLATE);
  1494. if (status == VMW_BALLOON_SUCCESS)
  1495. status = vmballoon_status_page(b, 0, &page);
  1496. /*
  1497. * If a failure happened, let the migration mechanism know that it
  1498. * should not retry.
  1499. */
  1500. if (status != VMW_BALLOON_SUCCESS) {
  1501. spin_unlock(&b->comm_lock);
  1502. ret = -EBUSY;
  1503. goto out_unlock;
  1504. }
  1505. /*
  1506. * The page is isolated, so it is safe to delete it without holding
  1507. * @pages_lock . We keep holding @comm_lock since we will need it in a
  1508. * second.
  1509. */
  1510. balloon_page_delete(page);
  1511. put_page(page);
  1512. /* Inflate */
  1513. vmballoon_add_page(b, 0, newpage);
  1514. status = vmballoon_lock_op(b, 1, VMW_BALLOON_4K_PAGE,
  1515. VMW_BALLOON_INFLATE);
  1516. if (status == VMW_BALLOON_SUCCESS)
  1517. status = vmballoon_status_page(b, 0, &newpage);
  1518. spin_unlock(&b->comm_lock);
  1519. if (status != VMW_BALLOON_SUCCESS) {
  1520. /*
  1521. * A failure happened. While we can deflate the page we just
  1522. * inflated, this deflation can also encounter an error. Instead
  1523. * we will decrease the size of the balloon to reflect the
  1524. * change and report failure.
  1525. */
  1526. atomic64_dec(&b->size);
  1527. ret = -EBUSY;
  1528. } else {
  1529. /*
  1530. * Success. Take a reference for the page, and we will add it to
  1531. * the list after acquiring the lock.
  1532. */
  1533. get_page(newpage);
  1534. ret = MIGRATEPAGE_SUCCESS;
  1535. }
  1536. /* Update the balloon list under the @pages_lock */
  1537. spin_lock_irqsave(&b->b_dev_info.pages_lock, flags);
  1538. /*
  1539. * On inflation success, we already took a reference for the @newpage.
  1540. * If we succeed just insert it to the list and update the statistics
  1541. * under the lock.
  1542. */
  1543. if (ret == MIGRATEPAGE_SUCCESS) {
  1544. balloon_page_insert(&b->b_dev_info, newpage);
  1545. __count_vm_event(BALLOON_MIGRATE);
  1546. }
  1547. /*
  1548. * We deflated successfully, so regardless to the inflation success, we
  1549. * need to reduce the number of isolated_pages.
  1550. */
  1551. b->b_dev_info.isolated_pages--;
  1552. spin_unlock_irqrestore(&b->b_dev_info.pages_lock, flags);
  1553. out_unlock:
  1554. up_read(&b->conf_sem);
  1555. return ret;
  1556. }
  1557. /**
  1558. * vmballoon_compaction_init() - initialized compaction for the balloon.
  1559. *
  1560. * @b: pointer to the balloon.
  1561. *
  1562. * If during the initialization a failure occurred, this function does not
  1563. * perform cleanup. The caller must call vmballoon_compaction_deinit() in this
  1564. * case.
  1565. *
  1566. * Return: zero on success or error code on failure.
  1567. */
  1568. static __init void vmballoon_compaction_init(struct vmballoon *b)
  1569. {
  1570. b->b_dev_info.migratepage = vmballoon_migratepage;
  1571. }
  1572. #else /* CONFIG_BALLOON_COMPACTION */
  1573. static inline void vmballoon_compaction_init(struct vmballoon *b)
  1574. {
  1575. }
  1576. #endif /* CONFIG_BALLOON_COMPACTION */
  1577. static int __init vmballoon_init(void)
  1578. {
  1579. int error;
  1580. /*
  1581. * Check if we are running on VMware's hypervisor and bail out
  1582. * if we are not.
  1583. */
  1584. if (x86_hyper_type != X86_HYPER_VMWARE)
  1585. return -ENODEV;
  1586. INIT_DELAYED_WORK(&balloon.dwork, vmballoon_work);
  1587. error = vmballoon_register_shrinker(&balloon);
  1588. if (error)
  1589. return error;
  1590. /*
  1591. * Initialization of compaction must be done after the call to
  1592. * balloon_devinfo_init() .
  1593. */
  1594. balloon_devinfo_init(&balloon.b_dev_info);
  1595. vmballoon_compaction_init(&balloon);
  1596. INIT_LIST_HEAD(&balloon.huge_pages);
  1597. spin_lock_init(&balloon.comm_lock);
  1598. init_rwsem(&balloon.conf_sem);
  1599. balloon.vmci_doorbell = VMCI_INVALID_HANDLE;
  1600. balloon.batch_page = NULL;
  1601. balloon.page = NULL;
  1602. balloon.reset_required = true;
  1603. queue_delayed_work(system_freezable_wq, &balloon.dwork, 0);
  1604. vmballoon_debugfs_init(&balloon);
  1605. return 0;
  1606. }
  1607. /*
  1608. * Using late_initcall() instead of module_init() allows the balloon to use the
  1609. * VMCI doorbell even when the balloon is built into the kernel. Otherwise the
  1610. * VMCI is probed only after the balloon is initialized. If the balloon is used
  1611. * as a module, late_initcall() is equivalent to module_init().
  1612. */
  1613. late_initcall(vmballoon_init);
  1614. static void __exit vmballoon_exit(void)
  1615. {
  1616. vmballoon_unregister_shrinker(&balloon);
  1617. vmballoon_vmci_cleanup(&balloon);
  1618. cancel_delayed_work_sync(&balloon.dwork);
  1619. vmballoon_debugfs_exit(&balloon);
  1620. /*
  1621. * Deallocate all reserved memory, and reset connection with monitor.
  1622. * Reset connection before deallocating memory to avoid potential for
  1623. * additional spurious resets from guest touching deallocated pages.
  1624. */
  1625. vmballoon_send_start(&balloon, 0);
  1626. vmballoon_pop(&balloon);
  1627. }
  1628. module_exit(vmballoon_exit);