grant-table.c 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699
  1. /******************************************************************************
  2. * grant_table.c
  3. *
  4. * Granting foreign access to our memory reservation.
  5. *
  6. * Copyright (c) 2005-2006, Christopher Clark
  7. * Copyright (c) 2004-2005, K A Fraser
  8. *
  9. * This program is free software; you can redistribute it and/or
  10. * modify it under the terms of the GNU General Public License version 2
  11. * as published by the Free Software Foundation; or, when distributed
  12. * separately from the Linux kernel or incorporated into other
  13. * software packages, subject to the following license:
  14. *
  15. * Permission is hereby granted, free of charge, to any person obtaining a copy
  16. * of this source file (the "Software"), to deal in the Software without
  17. * restriction, including without limitation the rights to use, copy, modify,
  18. * merge, publish, distribute, sublicense, and/or sell copies of the Software,
  19. * and to permit persons to whom the Software is furnished to do so, subject to
  20. * the following conditions:
  21. *
  22. * The above copyright notice and this permission notice shall be included in
  23. * all copies or substantial portions of the Software.
  24. *
  25. * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  26. * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  27. * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  28. * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  29. * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  30. * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  31. * IN THE SOFTWARE.
  32. */
  33. #define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
  34. #include <linux/bitmap.h>
  35. #include <linux/memblock.h>
  36. #include <linux/sched.h>
  37. #include <linux/mm.h>
  38. #include <linux/slab.h>
  39. #include <linux/vmalloc.h>
  40. #include <linux/uaccess.h>
  41. #include <linux/io.h>
  42. #include <linux/delay.h>
  43. #include <linux/hardirq.h>
  44. #include <linux/workqueue.h>
  45. #include <linux/ratelimit.h>
  46. #include <linux/moduleparam.h>
  47. #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
  48. #include <linux/dma-mapping.h>
  49. #endif
  50. #include <xen/xen.h>
  51. #include <xen/interface/xen.h>
  52. #include <xen/page.h>
  53. #include <xen/grant_table.h>
  54. #include <xen/interface/memory.h>
  55. #include <xen/hvc-console.h>
  56. #include <xen/swiotlb-xen.h>
  57. #include <xen/balloon.h>
  58. #ifdef CONFIG_X86
  59. #include <asm/xen/cpuid.h>
  60. #endif
  61. #include <xen/mem-reservation.h>
  62. #include <asm/xen/hypercall.h>
  63. #include <asm/xen/interface.h>
  64. #include <asm/sync_bitops.h>
  65. #define GNTTAB_LIST_END 0xffffffff
  66. static grant_ref_t **gnttab_list;
  67. static unsigned int nr_grant_frames;
  68. /*
  69. * Handling of free grants:
  70. *
  71. * Free grants are in a simple list anchored in gnttab_free_head. They are
  72. * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
  73. * of free entries is stored in gnttab_free_count.
  74. * Additionally there is a bitmap of free entries anchored in
  75. * gnttab_free_bitmap. This is being used for simplifying allocation of
  76. * multiple consecutive grants, which is needed e.g. for support of virtio.
  77. * gnttab_last_free is used to add free entries of new frames at the end of
  78. * the free list.
  79. * gnttab_free_tail_ptr specifies the variable which references the start
  80. * of consecutive free grants ending with gnttab_last_free. This pointer is
  81. * updated in a rather defensive way, in order to avoid performance hits in
  82. * hot paths.
  83. * All those variables are protected by gnttab_list_lock.
  84. */
  85. static int gnttab_free_count;
  86. static unsigned int gnttab_size;
  87. static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
  88. static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
  89. static grant_ref_t *gnttab_free_tail_ptr;
  90. static unsigned long *gnttab_free_bitmap;
  91. static DEFINE_SPINLOCK(gnttab_list_lock);
  92. struct grant_frames xen_auto_xlat_grant_frames;
  93. static unsigned int xen_gnttab_version;
  94. module_param_named(version, xen_gnttab_version, uint, 0);
  95. static union {
  96. struct grant_entry_v1 *v1;
  97. union grant_entry_v2 *v2;
  98. void *addr;
  99. } gnttab_shared;
  100. /*This is a structure of function pointers for grant table*/
  101. struct gnttab_ops {
  102. /*
  103. * Version of the grant interface.
  104. */
  105. unsigned int version;
  106. /*
  107. * Grant refs per grant frame.
  108. */
  109. unsigned int grefs_per_grant_frame;
  110. /*
  111. * Mapping a list of frames for storing grant entries. Frames parameter
  112. * is used to store grant table address when grant table being setup,
  113. * nr_gframes is the number of frames to map grant table. Returning
  114. * GNTST_okay means success and negative value means failure.
  115. */
  116. int (*map_frames)(xen_pfn_t *frames, unsigned int nr_gframes);
  117. /*
  118. * Release a list of frames which are mapped in map_frames for grant
  119. * entry status.
  120. */
  121. void (*unmap_frames)(void);
  122. /*
  123. * Introducing a valid entry into the grant table, granting the frame of
  124. * this grant entry to domain for accessing. Ref
  125. * parameter is reference of this introduced grant entry, domid is id of
  126. * granted domain, frame is the page frame to be granted, and flags is
  127. * status of the grant entry to be updated.
  128. */
  129. void (*update_entry)(grant_ref_t ref, domid_t domid,
  130. unsigned long frame, unsigned flags);
  131. /*
  132. * Stop granting a grant entry to domain for accessing. Ref parameter is
  133. * reference of a grant entry whose grant access will be stopped.
  134. * If the grant entry is currently mapped for reading or writing, just
  135. * return failure(==0) directly and don't tear down the grant access.
  136. * Otherwise, stop grant access for this entry and return success(==1).
  137. */
  138. int (*end_foreign_access_ref)(grant_ref_t ref);
  139. /*
  140. * Read the frame number related to a given grant reference.
  141. */
  142. unsigned long (*read_frame)(grant_ref_t ref);
  143. };
  144. struct unmap_refs_callback_data {
  145. struct completion completion;
  146. int result;
  147. };
  148. static const struct gnttab_ops *gnttab_interface;
  149. /* This reflects status of grant entries, so act as a global value. */
  150. static grant_status_t *grstatus;
  151. static struct gnttab_free_callback *gnttab_free_callback_list;
  152. static int gnttab_expand(unsigned int req_entries);
  153. #define RPP (PAGE_SIZE / sizeof(grant_ref_t))
  154. #define SPP (PAGE_SIZE / sizeof(grant_status_t))
  155. static inline grant_ref_t *__gnttab_entry(grant_ref_t entry)
  156. {
  157. return &gnttab_list[(entry) / RPP][(entry) % RPP];
  158. }
  159. /* This can be used as an l-value */
  160. #define gnttab_entry(entry) (*__gnttab_entry(entry))
  161. static int get_free_entries(unsigned count)
  162. {
  163. unsigned long flags;
  164. int ref, rc = 0;
  165. grant_ref_t head;
  166. spin_lock_irqsave(&gnttab_list_lock, flags);
  167. if ((gnttab_free_count < count) &&
  168. ((rc = gnttab_expand(count - gnttab_free_count)) < 0)) {
  169. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  170. return rc;
  171. }
  172. ref = head = gnttab_free_head;
  173. gnttab_free_count -= count;
  174. while (count--) {
  175. bitmap_clear(gnttab_free_bitmap, head, 1);
  176. if (gnttab_free_tail_ptr == __gnttab_entry(head))
  177. gnttab_free_tail_ptr = &gnttab_free_head;
  178. if (count)
  179. head = gnttab_entry(head);
  180. }
  181. gnttab_free_head = gnttab_entry(head);
  182. gnttab_entry(head) = GNTTAB_LIST_END;
  183. if (!gnttab_free_count) {
  184. gnttab_last_free = GNTTAB_LIST_END;
  185. gnttab_free_tail_ptr = NULL;
  186. }
  187. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  188. return ref;
  189. }
  190. static int get_seq_entry_count(void)
  191. {
  192. if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
  193. *gnttab_free_tail_ptr == GNTTAB_LIST_END)
  194. return 0;
  195. return gnttab_last_free - *gnttab_free_tail_ptr + 1;
  196. }
  197. /* Rebuilds the free grant list and tries to find count consecutive entries. */
  198. static int get_free_seq(unsigned int count)
  199. {
  200. int ret = -ENOSPC;
  201. unsigned int from, to;
  202. grant_ref_t *last;
  203. gnttab_free_tail_ptr = &gnttab_free_head;
  204. last = &gnttab_free_head;
  205. for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
  206. from < gnttab_size;
  207. from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
  208. to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
  209. from + 1);
  210. if (ret < 0 && to - from >= count) {
  211. ret = from;
  212. bitmap_clear(gnttab_free_bitmap, ret, count);
  213. from += count;
  214. gnttab_free_count -= count;
  215. if (from == to)
  216. continue;
  217. }
  218. /*
  219. * Recreate the free list in order to have it properly sorted.
  220. * This is needed to make sure that the free tail has the maximum
  221. * possible size.
  222. */
  223. while (from < to) {
  224. *last = from;
  225. last = __gnttab_entry(from);
  226. gnttab_last_free = from;
  227. from++;
  228. }
  229. if (to < gnttab_size)
  230. gnttab_free_tail_ptr = __gnttab_entry(to - 1);
  231. }
  232. *last = GNTTAB_LIST_END;
  233. if (gnttab_last_free != gnttab_size - 1)
  234. gnttab_free_tail_ptr = NULL;
  235. return ret;
  236. }
  237. static int get_free_entries_seq(unsigned int count)
  238. {
  239. unsigned long flags;
  240. int ret = 0;
  241. spin_lock_irqsave(&gnttab_list_lock, flags);
  242. if (gnttab_free_count < count) {
  243. ret = gnttab_expand(count - gnttab_free_count);
  244. if (ret < 0)
  245. goto out;
  246. }
  247. if (get_seq_entry_count() < count) {
  248. ret = get_free_seq(count);
  249. if (ret >= 0)
  250. goto out;
  251. ret = gnttab_expand(count - get_seq_entry_count());
  252. if (ret < 0)
  253. goto out;
  254. }
  255. ret = *gnttab_free_tail_ptr;
  256. *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
  257. gnttab_free_count -= count;
  258. if (!gnttab_free_count)
  259. gnttab_free_tail_ptr = NULL;
  260. bitmap_clear(gnttab_free_bitmap, ret, count);
  261. out:
  262. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  263. return ret;
  264. }
  265. static void do_free_callbacks(void)
  266. {
  267. struct gnttab_free_callback *callback, *next;
  268. callback = gnttab_free_callback_list;
  269. gnttab_free_callback_list = NULL;
  270. while (callback != NULL) {
  271. next = callback->next;
  272. if (gnttab_free_count >= callback->count) {
  273. callback->next = NULL;
  274. callback->fn(callback->arg);
  275. } else {
  276. callback->next = gnttab_free_callback_list;
  277. gnttab_free_callback_list = callback;
  278. }
  279. callback = next;
  280. }
  281. }
  282. static inline void check_free_callbacks(void)
  283. {
  284. if (unlikely(gnttab_free_callback_list))
  285. do_free_callbacks();
  286. }
  287. static void put_free_entry_locked(grant_ref_t ref)
  288. {
  289. if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
  290. return;
  291. gnttab_entry(ref) = gnttab_free_head;
  292. gnttab_free_head = ref;
  293. if (!gnttab_free_count)
  294. gnttab_last_free = ref;
  295. if (gnttab_free_tail_ptr == &gnttab_free_head)
  296. gnttab_free_tail_ptr = __gnttab_entry(ref);
  297. gnttab_free_count++;
  298. bitmap_set(gnttab_free_bitmap, ref, 1);
  299. }
  300. static void put_free_entry(grant_ref_t ref)
  301. {
  302. unsigned long flags;
  303. spin_lock_irqsave(&gnttab_list_lock, flags);
  304. put_free_entry_locked(ref);
  305. check_free_callbacks();
  306. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  307. }
  308. static void gnttab_set_free(unsigned int start, unsigned int n)
  309. {
  310. unsigned int i;
  311. for (i = start; i < start + n - 1; i++)
  312. gnttab_entry(i) = i + 1;
  313. gnttab_entry(i) = GNTTAB_LIST_END;
  314. if (!gnttab_free_count) {
  315. gnttab_free_head = start;
  316. gnttab_free_tail_ptr = &gnttab_free_head;
  317. } else {
  318. gnttab_entry(gnttab_last_free) = start;
  319. }
  320. gnttab_free_count += n;
  321. gnttab_last_free = i;
  322. bitmap_set(gnttab_free_bitmap, start, n);
  323. }
  324. /*
  325. * Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
  326. * Introducing a valid entry into the grant table:
  327. * 1. Write ent->domid.
  328. * 2. Write ent->frame: Frame to which access is permitted.
  329. * 3. Write memory barrier (WMB).
  330. * 4. Write ent->flags, inc. valid type.
  331. */
  332. static void gnttab_update_entry_v1(grant_ref_t ref, domid_t domid,
  333. unsigned long frame, unsigned flags)
  334. {
  335. gnttab_shared.v1[ref].domid = domid;
  336. gnttab_shared.v1[ref].frame = frame;
  337. wmb();
  338. gnttab_shared.v1[ref].flags = flags;
  339. }
  340. static void gnttab_update_entry_v2(grant_ref_t ref, domid_t domid,
  341. unsigned long frame, unsigned int flags)
  342. {
  343. gnttab_shared.v2[ref].hdr.domid = domid;
  344. gnttab_shared.v2[ref].full_page.frame = frame;
  345. wmb(); /* Hypervisor concurrent accesses. */
  346. gnttab_shared.v2[ref].hdr.flags = GTF_permit_access | flags;
  347. }
  348. /*
  349. * Public grant-issuing interface functions
  350. */
  351. void gnttab_grant_foreign_access_ref(grant_ref_t ref, domid_t domid,
  352. unsigned long frame, int readonly)
  353. {
  354. gnttab_interface->update_entry(ref, domid, frame,
  355. GTF_permit_access | (readonly ? GTF_readonly : 0));
  356. }
  357. EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access_ref);
  358. int gnttab_grant_foreign_access(domid_t domid, unsigned long frame,
  359. int readonly)
  360. {
  361. int ref;
  362. ref = get_free_entries(1);
  363. if (unlikely(ref < 0))
  364. return -ENOSPC;
  365. gnttab_grant_foreign_access_ref(ref, domid, frame, readonly);
  366. return ref;
  367. }
  368. EXPORT_SYMBOL_GPL(gnttab_grant_foreign_access);
  369. static int gnttab_end_foreign_access_ref_v1(grant_ref_t ref)
  370. {
  371. u16 *pflags = &gnttab_shared.v1[ref].flags;
  372. u16 flags;
  373. flags = *pflags;
  374. do {
  375. if (flags & (GTF_reading|GTF_writing))
  376. return 0;
  377. } while (!sync_try_cmpxchg(pflags, &flags, 0));
  378. return 1;
  379. }
  380. static int gnttab_end_foreign_access_ref_v2(grant_ref_t ref)
  381. {
  382. gnttab_shared.v2[ref].hdr.flags = 0;
  383. mb(); /* Concurrent access by hypervisor. */
  384. if (grstatus[ref] & (GTF_reading|GTF_writing)) {
  385. return 0;
  386. } else {
  387. /*
  388. * The read of grstatus needs to have acquire semantics.
  389. * On x86, reads already have that, and we just need to
  390. * protect against compiler reorderings.
  391. * On other architectures we may need a full barrier.
  392. */
  393. #ifdef CONFIG_X86
  394. barrier();
  395. #else
  396. mb();
  397. #endif
  398. }
  399. return 1;
  400. }
  401. static inline int _gnttab_end_foreign_access_ref(grant_ref_t ref)
  402. {
  403. return gnttab_interface->end_foreign_access_ref(ref);
  404. }
  405. int gnttab_end_foreign_access_ref(grant_ref_t ref)
  406. {
  407. if (_gnttab_end_foreign_access_ref(ref))
  408. return 1;
  409. pr_warn("WARNING: g.e. %#x still in use!\n", ref);
  410. return 0;
  411. }
  412. EXPORT_SYMBOL_GPL(gnttab_end_foreign_access_ref);
  413. static unsigned long gnttab_read_frame_v1(grant_ref_t ref)
  414. {
  415. return gnttab_shared.v1[ref].frame;
  416. }
  417. static unsigned long gnttab_read_frame_v2(grant_ref_t ref)
  418. {
  419. return gnttab_shared.v2[ref].full_page.frame;
  420. }
  421. struct deferred_entry {
  422. struct list_head list;
  423. grant_ref_t ref;
  424. uint16_t warn_delay;
  425. struct page *page;
  426. };
  427. static LIST_HEAD(deferred_list);
  428. static void gnttab_handle_deferred(struct timer_list *);
  429. static DEFINE_TIMER(deferred_timer, gnttab_handle_deferred);
  430. static atomic64_t deferred_count;
  431. static atomic64_t leaked_count;
  432. static unsigned int free_per_iteration = 10;
  433. module_param(free_per_iteration, uint, 0600);
  434. static void gnttab_handle_deferred(struct timer_list *unused)
  435. {
  436. unsigned int nr = READ_ONCE(free_per_iteration);
  437. const bool ignore_limit = nr == 0;
  438. struct deferred_entry *first = NULL;
  439. unsigned long flags;
  440. size_t freed = 0;
  441. spin_lock_irqsave(&gnttab_list_lock, flags);
  442. while ((ignore_limit || nr--) && !list_empty(&deferred_list)) {
  443. struct deferred_entry *entry
  444. = list_first_entry(&deferred_list,
  445. struct deferred_entry, list);
  446. if (entry == first)
  447. break;
  448. list_del(&entry->list);
  449. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  450. if (_gnttab_end_foreign_access_ref(entry->ref)) {
  451. uint64_t ret = atomic64_dec_return(&deferred_count);
  452. put_free_entry(entry->ref);
  453. pr_debug("freeing g.e. %#x (pfn %#lx), %llu remaining\n",
  454. entry->ref, page_to_pfn(entry->page),
  455. (unsigned long long)ret);
  456. put_page(entry->page);
  457. freed++;
  458. kfree(entry);
  459. entry = NULL;
  460. } else {
  461. if (!--entry->warn_delay)
  462. pr_info("g.e. %#x still pending\n", entry->ref);
  463. if (!first)
  464. first = entry;
  465. }
  466. spin_lock_irqsave(&gnttab_list_lock, flags);
  467. if (entry)
  468. list_add_tail(&entry->list, &deferred_list);
  469. }
  470. if (list_empty(&deferred_list))
  471. WARN_ON(atomic64_read(&deferred_count));
  472. else if (!timer_pending(&deferred_timer)) {
  473. deferred_timer.expires = jiffies + HZ;
  474. add_timer(&deferred_timer);
  475. }
  476. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  477. pr_debug("Freed %zu references", freed);
  478. }
  479. static void gnttab_add_deferred(grant_ref_t ref, struct page *page)
  480. {
  481. struct deferred_entry *entry;
  482. gfp_t gfp = (in_atomic() || irqs_disabled()) ? GFP_ATOMIC : GFP_KERNEL;
  483. uint64_t leaked, deferred;
  484. entry = kmalloc(sizeof(*entry), gfp);
  485. if (!page) {
  486. unsigned long gfn = gnttab_interface->read_frame(ref);
  487. page = pfn_to_page(gfn_to_pfn(gfn));
  488. get_page(page);
  489. }
  490. if (entry) {
  491. unsigned long flags;
  492. entry->ref = ref;
  493. entry->page = page;
  494. entry->warn_delay = 60;
  495. spin_lock_irqsave(&gnttab_list_lock, flags);
  496. list_add_tail(&entry->list, &deferred_list);
  497. if (!timer_pending(&deferred_timer)) {
  498. deferred_timer.expires = jiffies + HZ;
  499. add_timer(&deferred_timer);
  500. }
  501. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  502. deferred = atomic64_inc_return(&deferred_count);
  503. leaked = atomic64_read(&leaked_count);
  504. pr_debug("deferring g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
  505. ref, page ? page_to_pfn(page) : -1, deferred, leaked);
  506. } else {
  507. deferred = atomic64_read(&deferred_count);
  508. leaked = atomic64_inc_return(&leaked_count);
  509. pr_warn("leaking g.e. %#x (pfn %#lx) (total deferred %llu, total leaked %llu)\n",
  510. ref, page ? page_to_pfn(page) : -1, deferred, leaked);
  511. }
  512. }
  513. int gnttab_try_end_foreign_access(grant_ref_t ref)
  514. {
  515. int ret = _gnttab_end_foreign_access_ref(ref);
  516. if (ret)
  517. put_free_entry(ref);
  518. return ret;
  519. }
  520. EXPORT_SYMBOL_GPL(gnttab_try_end_foreign_access);
  521. void gnttab_end_foreign_access(grant_ref_t ref, struct page *page)
  522. {
  523. if (gnttab_try_end_foreign_access(ref)) {
  524. if (page)
  525. put_page(page);
  526. } else
  527. gnttab_add_deferred(ref, page);
  528. }
  529. EXPORT_SYMBOL_GPL(gnttab_end_foreign_access);
  530. void gnttab_free_grant_reference(grant_ref_t ref)
  531. {
  532. put_free_entry(ref);
  533. }
  534. EXPORT_SYMBOL_GPL(gnttab_free_grant_reference);
  535. void gnttab_free_grant_references(grant_ref_t head)
  536. {
  537. grant_ref_t ref;
  538. unsigned long flags;
  539. spin_lock_irqsave(&gnttab_list_lock, flags);
  540. while (head != GNTTAB_LIST_END) {
  541. ref = gnttab_entry(head);
  542. put_free_entry_locked(head);
  543. head = ref;
  544. }
  545. check_free_callbacks();
  546. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  547. }
  548. EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
  549. void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
  550. {
  551. unsigned long flags;
  552. unsigned int i;
  553. spin_lock_irqsave(&gnttab_list_lock, flags);
  554. for (i = count; i > 0; i--)
  555. put_free_entry_locked(head + i - 1);
  556. check_free_callbacks();
  557. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  558. }
  559. EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
  560. int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
  561. {
  562. int h = get_free_entries(count);
  563. if (h < 0)
  564. return -ENOSPC;
  565. *head = h;
  566. return 0;
  567. }
  568. EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
  569. int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
  570. {
  571. int h;
  572. if (count == 1)
  573. h = get_free_entries(1);
  574. else
  575. h = get_free_entries_seq(count);
  576. if (h < 0)
  577. return -ENOSPC;
  578. *first = h;
  579. return 0;
  580. }
  581. EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
  582. int gnttab_empty_grant_references(const grant_ref_t *private_head)
  583. {
  584. return (*private_head == GNTTAB_LIST_END);
  585. }
  586. EXPORT_SYMBOL_GPL(gnttab_empty_grant_references);
  587. int gnttab_claim_grant_reference(grant_ref_t *private_head)
  588. {
  589. grant_ref_t g = *private_head;
  590. if (unlikely(g == GNTTAB_LIST_END))
  591. return -ENOSPC;
  592. *private_head = gnttab_entry(g);
  593. return g;
  594. }
  595. EXPORT_SYMBOL_GPL(gnttab_claim_grant_reference);
  596. void gnttab_release_grant_reference(grant_ref_t *private_head,
  597. grant_ref_t release)
  598. {
  599. gnttab_entry(release) = *private_head;
  600. *private_head = release;
  601. }
  602. EXPORT_SYMBOL_GPL(gnttab_release_grant_reference);
  603. void gnttab_request_free_callback(struct gnttab_free_callback *callback,
  604. void (*fn)(void *), void *arg, u16 count)
  605. {
  606. unsigned long flags;
  607. struct gnttab_free_callback *cb;
  608. spin_lock_irqsave(&gnttab_list_lock, flags);
  609. /* Check if the callback is already on the list */
  610. cb = gnttab_free_callback_list;
  611. while (cb) {
  612. if (cb == callback)
  613. goto out;
  614. cb = cb->next;
  615. }
  616. callback->fn = fn;
  617. callback->arg = arg;
  618. callback->count = count;
  619. callback->next = gnttab_free_callback_list;
  620. gnttab_free_callback_list = callback;
  621. check_free_callbacks();
  622. out:
  623. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  624. }
  625. EXPORT_SYMBOL_GPL(gnttab_request_free_callback);
  626. void gnttab_cancel_free_callback(struct gnttab_free_callback *callback)
  627. {
  628. struct gnttab_free_callback **pcb;
  629. unsigned long flags;
  630. spin_lock_irqsave(&gnttab_list_lock, flags);
  631. for (pcb = &gnttab_free_callback_list; *pcb; pcb = &(*pcb)->next) {
  632. if (*pcb == callback) {
  633. *pcb = callback->next;
  634. break;
  635. }
  636. }
  637. spin_unlock_irqrestore(&gnttab_list_lock, flags);
  638. }
  639. EXPORT_SYMBOL_GPL(gnttab_cancel_free_callback);
  640. static unsigned int gnttab_frames(unsigned int frames, unsigned int align)
  641. {
  642. return (frames * gnttab_interface->grefs_per_grant_frame + align - 1) /
  643. align;
  644. }
  645. static int grow_gnttab_list(unsigned int more_frames)
  646. {
  647. unsigned int new_nr_grant_frames, extra_entries, i;
  648. unsigned int nr_glist_frames, new_nr_glist_frames;
  649. unsigned int grefs_per_frame;
  650. grefs_per_frame = gnttab_interface->grefs_per_grant_frame;
  651. new_nr_grant_frames = nr_grant_frames + more_frames;
  652. extra_entries = more_frames * grefs_per_frame;
  653. nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
  654. new_nr_glist_frames = gnttab_frames(new_nr_grant_frames, RPP);
  655. for (i = nr_glist_frames; i < new_nr_glist_frames; i++) {
  656. gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_ATOMIC);
  657. if (!gnttab_list[i])
  658. goto grow_nomem;
  659. }
  660. gnttab_set_free(gnttab_size, extra_entries);
  661. if (!gnttab_free_tail_ptr)
  662. gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
  663. nr_grant_frames = new_nr_grant_frames;
  664. gnttab_size += extra_entries;
  665. check_free_callbacks();
  666. return 0;
  667. grow_nomem:
  668. while (i-- > nr_glist_frames)
  669. free_page((unsigned long) gnttab_list[i]);
  670. return -ENOMEM;
  671. }
  672. static unsigned int __max_nr_grant_frames(void)
  673. {
  674. struct gnttab_query_size query;
  675. int rc;
  676. query.dom = DOMID_SELF;
  677. rc = HYPERVISOR_grant_table_op(GNTTABOP_query_size, &query, 1);
  678. if ((rc < 0) || (query.status != GNTST_okay))
  679. return 4; /* Legacy max supported number of frames */
  680. return query.max_nr_frames;
  681. }
  682. unsigned int gnttab_max_grant_frames(void)
  683. {
  684. unsigned int xen_max = __max_nr_grant_frames();
  685. static unsigned int boot_max_nr_grant_frames;
  686. /* First time, initialize it properly. */
  687. if (!boot_max_nr_grant_frames)
  688. boot_max_nr_grant_frames = __max_nr_grant_frames();
  689. if (xen_max > boot_max_nr_grant_frames)
  690. return boot_max_nr_grant_frames;
  691. return xen_max;
  692. }
  693. EXPORT_SYMBOL_GPL(gnttab_max_grant_frames);
  694. int gnttab_setup_auto_xlat_frames(phys_addr_t addr)
  695. {
  696. xen_pfn_t *pfn;
  697. unsigned int max_nr_gframes = __max_nr_grant_frames();
  698. unsigned int i;
  699. void *vaddr;
  700. if (xen_auto_xlat_grant_frames.count)
  701. return -EINVAL;
  702. vaddr = memremap(addr, XEN_PAGE_SIZE * max_nr_gframes, MEMREMAP_WB);
  703. if (vaddr == NULL) {
  704. pr_warn("Failed to ioremap gnttab share frames (addr=%pa)!\n",
  705. &addr);
  706. return -ENOMEM;
  707. }
  708. pfn = kcalloc(max_nr_gframes, sizeof(pfn[0]), GFP_KERNEL);
  709. if (!pfn) {
  710. memunmap(vaddr);
  711. return -ENOMEM;
  712. }
  713. for (i = 0; i < max_nr_gframes; i++)
  714. pfn[i] = XEN_PFN_DOWN(addr) + i;
  715. xen_auto_xlat_grant_frames.vaddr = vaddr;
  716. xen_auto_xlat_grant_frames.pfn = pfn;
  717. xen_auto_xlat_grant_frames.count = max_nr_gframes;
  718. return 0;
  719. }
  720. EXPORT_SYMBOL_GPL(gnttab_setup_auto_xlat_frames);
  721. void gnttab_free_auto_xlat_frames(void)
  722. {
  723. if (!xen_auto_xlat_grant_frames.count)
  724. return;
  725. kfree(xen_auto_xlat_grant_frames.pfn);
  726. memunmap(xen_auto_xlat_grant_frames.vaddr);
  727. xen_auto_xlat_grant_frames.pfn = NULL;
  728. xen_auto_xlat_grant_frames.count = 0;
  729. xen_auto_xlat_grant_frames.vaddr = NULL;
  730. }
  731. EXPORT_SYMBOL_GPL(gnttab_free_auto_xlat_frames);
  732. int gnttab_pages_set_private(int nr_pages, struct page **pages)
  733. {
  734. int i;
  735. for (i = 0; i < nr_pages; i++) {
  736. #if BITS_PER_LONG < 64
  737. struct xen_page_foreign *foreign;
  738. foreign = kzalloc(sizeof(*foreign), GFP_KERNEL);
  739. if (!foreign)
  740. return -ENOMEM;
  741. set_page_private(pages[i], (unsigned long)foreign);
  742. #endif
  743. SetPagePrivate(pages[i]);
  744. }
  745. return 0;
  746. }
  747. EXPORT_SYMBOL_GPL(gnttab_pages_set_private);
  748. /**
  749. * gnttab_alloc_pages - alloc pages suitable for grant mapping into
  750. * @nr_pages: number of pages to alloc
  751. * @pages: returns the pages
  752. */
  753. int gnttab_alloc_pages(int nr_pages, struct page **pages)
  754. {
  755. int ret;
  756. ret = xen_alloc_unpopulated_pages(nr_pages, pages);
  757. if (ret < 0)
  758. return ret;
  759. ret = gnttab_pages_set_private(nr_pages, pages);
  760. if (ret < 0)
  761. gnttab_free_pages(nr_pages, pages);
  762. return ret;
  763. }
  764. EXPORT_SYMBOL_GPL(gnttab_alloc_pages);
  765. #ifdef CONFIG_XEN_UNPOPULATED_ALLOC
  766. static inline void cache_init(struct gnttab_page_cache *cache)
  767. {
  768. cache->pages = NULL;
  769. }
  770. static inline bool cache_empty(struct gnttab_page_cache *cache)
  771. {
  772. return !cache->pages;
  773. }
  774. static inline struct page *cache_deq(struct gnttab_page_cache *cache)
  775. {
  776. struct page *page;
  777. page = cache->pages;
  778. cache->pages = page->zone_device_data;
  779. return page;
  780. }
  781. static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
  782. {
  783. page->zone_device_data = cache->pages;
  784. cache->pages = page;
  785. }
  786. #else
  787. static inline void cache_init(struct gnttab_page_cache *cache)
  788. {
  789. INIT_LIST_HEAD(&cache->pages);
  790. }
  791. static inline bool cache_empty(struct gnttab_page_cache *cache)
  792. {
  793. return list_empty(&cache->pages);
  794. }
  795. static inline struct page *cache_deq(struct gnttab_page_cache *cache)
  796. {
  797. struct page *page;
  798. page = list_first_entry(&cache->pages, struct page, lru);
  799. list_del(&page->lru);
  800. return page;
  801. }
  802. static inline void cache_enq(struct gnttab_page_cache *cache, struct page *page)
  803. {
  804. list_add(&page->lru, &cache->pages);
  805. }
  806. #endif
  807. void gnttab_page_cache_init(struct gnttab_page_cache *cache)
  808. {
  809. spin_lock_init(&cache->lock);
  810. cache_init(cache);
  811. cache->num_pages = 0;
  812. }
  813. EXPORT_SYMBOL_GPL(gnttab_page_cache_init);
  814. int gnttab_page_cache_get(struct gnttab_page_cache *cache, struct page **page)
  815. {
  816. unsigned long flags;
  817. spin_lock_irqsave(&cache->lock, flags);
  818. if (cache_empty(cache)) {
  819. spin_unlock_irqrestore(&cache->lock, flags);
  820. return gnttab_alloc_pages(1, page);
  821. }
  822. page[0] = cache_deq(cache);
  823. cache->num_pages--;
  824. spin_unlock_irqrestore(&cache->lock, flags);
  825. return 0;
  826. }
  827. EXPORT_SYMBOL_GPL(gnttab_page_cache_get);
  828. void gnttab_page_cache_put(struct gnttab_page_cache *cache, struct page **page,
  829. unsigned int num)
  830. {
  831. unsigned long flags;
  832. unsigned int i;
  833. spin_lock_irqsave(&cache->lock, flags);
  834. for (i = 0; i < num; i++)
  835. cache_enq(cache, page[i]);
  836. cache->num_pages += num;
  837. spin_unlock_irqrestore(&cache->lock, flags);
  838. }
  839. EXPORT_SYMBOL_GPL(gnttab_page_cache_put);
  840. void gnttab_page_cache_shrink(struct gnttab_page_cache *cache, unsigned int num)
  841. {
  842. struct page *page[10];
  843. unsigned int i = 0;
  844. unsigned long flags;
  845. spin_lock_irqsave(&cache->lock, flags);
  846. while (cache->num_pages > num) {
  847. page[i] = cache_deq(cache);
  848. cache->num_pages--;
  849. if (++i == ARRAY_SIZE(page)) {
  850. spin_unlock_irqrestore(&cache->lock, flags);
  851. gnttab_free_pages(i, page);
  852. i = 0;
  853. spin_lock_irqsave(&cache->lock, flags);
  854. }
  855. }
  856. spin_unlock_irqrestore(&cache->lock, flags);
  857. if (i != 0)
  858. gnttab_free_pages(i, page);
  859. }
  860. EXPORT_SYMBOL_GPL(gnttab_page_cache_shrink);
  861. void gnttab_pages_clear_private(int nr_pages, struct page **pages)
  862. {
  863. int i;
  864. for (i = 0; i < nr_pages; i++) {
  865. if (PagePrivate(pages[i])) {
  866. #if BITS_PER_LONG < 64
  867. kfree((void *)page_private(pages[i]));
  868. #endif
  869. ClearPagePrivate(pages[i]);
  870. }
  871. }
  872. }
  873. EXPORT_SYMBOL_GPL(gnttab_pages_clear_private);
  874. /**
  875. * gnttab_free_pages - free pages allocated by gnttab_alloc_pages()
  876. * @nr_pages: number of pages to free
  877. * @pages: the pages
  878. */
  879. void gnttab_free_pages(int nr_pages, struct page **pages)
  880. {
  881. gnttab_pages_clear_private(nr_pages, pages);
  882. xen_free_unpopulated_pages(nr_pages, pages);
  883. }
  884. EXPORT_SYMBOL_GPL(gnttab_free_pages);
  885. #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
  886. /**
  887. * gnttab_dma_alloc_pages - alloc DMAable pages suitable for grant mapping into
  888. * @args: arguments to the function
  889. */
  890. int gnttab_dma_alloc_pages(struct gnttab_dma_alloc_args *args)
  891. {
  892. unsigned long pfn, start_pfn;
  893. size_t size;
  894. int i, ret;
  895. if (args->nr_pages < 0 || args->nr_pages > (INT_MAX >> PAGE_SHIFT))
  896. return -ENOMEM;
  897. size = args->nr_pages << PAGE_SHIFT;
  898. if (args->coherent)
  899. args->vaddr = dma_alloc_coherent(args->dev, size,
  900. &args->dev_bus_addr,
  901. GFP_KERNEL | __GFP_NOWARN);
  902. else
  903. args->vaddr = dma_alloc_wc(args->dev, size,
  904. &args->dev_bus_addr,
  905. GFP_KERNEL | __GFP_NOWARN);
  906. if (!args->vaddr) {
  907. pr_debug("Failed to allocate DMA buffer of size %zu\n", size);
  908. return -ENOMEM;
  909. }
  910. start_pfn = __phys_to_pfn(args->dev_bus_addr);
  911. for (pfn = start_pfn, i = 0; pfn < start_pfn + args->nr_pages;
  912. pfn++, i++) {
  913. struct page *page = pfn_to_page(pfn);
  914. args->pages[i] = page;
  915. args->frames[i] = xen_page_to_gfn(page);
  916. xenmem_reservation_scrub_page(page);
  917. }
  918. xenmem_reservation_va_mapping_reset(args->nr_pages, args->pages);
  919. ret = xenmem_reservation_decrease(args->nr_pages, args->frames);
  920. if (ret != args->nr_pages) {
  921. pr_debug("Failed to decrease reservation for DMA buffer\n");
  922. ret = -EFAULT;
  923. goto fail;
  924. }
  925. ret = gnttab_pages_set_private(args->nr_pages, args->pages);
  926. if (ret < 0)
  927. goto fail;
  928. return 0;
  929. fail:
  930. gnttab_dma_free_pages(args);
  931. return ret;
  932. }
  933. EXPORT_SYMBOL_GPL(gnttab_dma_alloc_pages);
  934. /**
  935. * gnttab_dma_free_pages - free DMAable pages
  936. * @args: arguments to the function
  937. */
  938. int gnttab_dma_free_pages(struct gnttab_dma_alloc_args *args)
  939. {
  940. size_t size;
  941. int i, ret;
  942. gnttab_pages_clear_private(args->nr_pages, args->pages);
  943. for (i = 0; i < args->nr_pages; i++)
  944. args->frames[i] = page_to_xen_pfn(args->pages[i]);
  945. ret = xenmem_reservation_increase(args->nr_pages, args->frames);
  946. if (ret != args->nr_pages) {
  947. pr_debug("Failed to increase reservation for DMA buffer\n");
  948. ret = -EFAULT;
  949. } else {
  950. ret = 0;
  951. }
  952. xenmem_reservation_va_mapping_update(args->nr_pages, args->pages,
  953. args->frames);
  954. size = args->nr_pages << PAGE_SHIFT;
  955. if (args->coherent)
  956. dma_free_coherent(args->dev, size,
  957. args->vaddr, args->dev_bus_addr);
  958. else
  959. dma_free_wc(args->dev, size,
  960. args->vaddr, args->dev_bus_addr);
  961. return ret;
  962. }
  963. EXPORT_SYMBOL_GPL(gnttab_dma_free_pages);
  964. #endif
  965. /* Handling of paged out grant targets (GNTST_eagain) */
  966. #define MAX_DELAY 256
  967. static inline void
  968. gnttab_retry_eagain_gop(unsigned int cmd, void *gop, int16_t *status,
  969. const char *func)
  970. {
  971. unsigned delay = 1;
  972. do {
  973. BUG_ON(HYPERVISOR_grant_table_op(cmd, gop, 1));
  974. if (*status == GNTST_eagain)
  975. msleep(delay++);
  976. } while ((*status == GNTST_eagain) && (delay < MAX_DELAY));
  977. if (delay >= MAX_DELAY) {
  978. pr_err("%s: %s eagain grant\n", func, current->comm);
  979. *status = GNTST_bad_page;
  980. }
  981. }
  982. void gnttab_batch_map(struct gnttab_map_grant_ref *batch, unsigned count)
  983. {
  984. struct gnttab_map_grant_ref *op;
  985. if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, batch, count))
  986. BUG();
  987. for (op = batch; op < batch + count; op++)
  988. if (op->status == GNTST_eagain)
  989. gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref, op,
  990. &op->status, __func__);
  991. }
  992. EXPORT_SYMBOL_GPL(gnttab_batch_map);
  993. void gnttab_batch_copy(struct gnttab_copy *batch, unsigned count)
  994. {
  995. struct gnttab_copy *op;
  996. if (HYPERVISOR_grant_table_op(GNTTABOP_copy, batch, count))
  997. BUG();
  998. for (op = batch; op < batch + count; op++)
  999. if (op->status == GNTST_eagain)
  1000. gnttab_retry_eagain_gop(GNTTABOP_copy, op,
  1001. &op->status, __func__);
  1002. }
  1003. EXPORT_SYMBOL_GPL(gnttab_batch_copy);
  1004. void gnttab_foreach_grant_in_range(struct page *page,
  1005. unsigned int offset,
  1006. unsigned int len,
  1007. xen_grant_fn_t fn,
  1008. void *data)
  1009. {
  1010. unsigned int goffset;
  1011. unsigned int glen;
  1012. unsigned long xen_pfn;
  1013. len = min_t(unsigned int, PAGE_SIZE - offset, len);
  1014. goffset = xen_offset_in_page(offset);
  1015. xen_pfn = page_to_xen_pfn(page) + XEN_PFN_DOWN(offset);
  1016. while (len) {
  1017. glen = min_t(unsigned int, XEN_PAGE_SIZE - goffset, len);
  1018. fn(pfn_to_gfn(xen_pfn), goffset, glen, data);
  1019. goffset = 0;
  1020. xen_pfn++;
  1021. len -= glen;
  1022. }
  1023. }
  1024. EXPORT_SYMBOL_GPL(gnttab_foreach_grant_in_range);
  1025. void gnttab_foreach_grant(struct page **pages,
  1026. unsigned int nr_grefs,
  1027. xen_grant_fn_t fn,
  1028. void *data)
  1029. {
  1030. unsigned int goffset = 0;
  1031. unsigned long xen_pfn = 0;
  1032. unsigned int i;
  1033. for (i = 0; i < nr_grefs; i++) {
  1034. if ((i % XEN_PFN_PER_PAGE) == 0) {
  1035. xen_pfn = page_to_xen_pfn(pages[i / XEN_PFN_PER_PAGE]);
  1036. goffset = 0;
  1037. }
  1038. fn(pfn_to_gfn(xen_pfn), goffset, XEN_PAGE_SIZE, data);
  1039. goffset += XEN_PAGE_SIZE;
  1040. xen_pfn++;
  1041. }
  1042. }
  1043. int gnttab_map_refs(struct gnttab_map_grant_ref *map_ops,
  1044. struct gnttab_map_grant_ref *kmap_ops,
  1045. struct page **pages, unsigned int count)
  1046. {
  1047. int i, ret;
  1048. ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map_ops, count);
  1049. if (ret)
  1050. return ret;
  1051. for (i = 0; i < count; i++) {
  1052. switch (map_ops[i].status) {
  1053. case GNTST_okay:
  1054. {
  1055. struct xen_page_foreign *foreign;
  1056. SetPageForeign(pages[i]);
  1057. foreign = xen_page_foreign(pages[i]);
  1058. foreign->domid = map_ops[i].dom;
  1059. foreign->gref = map_ops[i].ref;
  1060. break;
  1061. }
  1062. case GNTST_no_device_space:
  1063. pr_warn_ratelimited("maptrack limit reached, can't map all guest pages\n");
  1064. break;
  1065. case GNTST_eagain:
  1066. /* Retry eagain maps */
  1067. gnttab_retry_eagain_gop(GNTTABOP_map_grant_ref,
  1068. map_ops + i,
  1069. &map_ops[i].status, __func__);
  1070. /* Test status in next loop iteration. */
  1071. i--;
  1072. break;
  1073. default:
  1074. break;
  1075. }
  1076. }
  1077. return set_foreign_p2m_mapping(map_ops, kmap_ops, pages, count);
  1078. }
  1079. EXPORT_SYMBOL_GPL(gnttab_map_refs);
  1080. int gnttab_unmap_refs(struct gnttab_unmap_grant_ref *unmap_ops,
  1081. struct gnttab_unmap_grant_ref *kunmap_ops,
  1082. struct page **pages, unsigned int count)
  1083. {
  1084. unsigned int i;
  1085. int ret;
  1086. ret = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, unmap_ops, count);
  1087. if (ret)
  1088. return ret;
  1089. for (i = 0; i < count; i++)
  1090. ClearPageForeign(pages[i]);
  1091. return clear_foreign_p2m_mapping(unmap_ops, kunmap_ops, pages, count);
  1092. }
  1093. EXPORT_SYMBOL_GPL(gnttab_unmap_refs);
  1094. #define GNTTAB_UNMAP_REFS_DELAY 5
  1095. static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item);
  1096. static void gnttab_unmap_work(struct work_struct *work)
  1097. {
  1098. struct gntab_unmap_queue_data
  1099. *unmap_data = container_of(work,
  1100. struct gntab_unmap_queue_data,
  1101. gnttab_work.work);
  1102. if (unmap_data->age != UINT_MAX)
  1103. unmap_data->age++;
  1104. __gnttab_unmap_refs_async(unmap_data);
  1105. }
  1106. static void __gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
  1107. {
  1108. int ret;
  1109. int pc;
  1110. for (pc = 0; pc < item->count; pc++) {
  1111. if (page_count(item->pages[pc]) > 1) {
  1112. unsigned long delay = GNTTAB_UNMAP_REFS_DELAY * (item->age + 1);
  1113. schedule_delayed_work(&item->gnttab_work,
  1114. msecs_to_jiffies(delay));
  1115. return;
  1116. }
  1117. }
  1118. ret = gnttab_unmap_refs(item->unmap_ops, item->kunmap_ops,
  1119. item->pages, item->count);
  1120. item->done(ret, item);
  1121. }
  1122. void gnttab_unmap_refs_async(struct gntab_unmap_queue_data* item)
  1123. {
  1124. INIT_DELAYED_WORK(&item->gnttab_work, gnttab_unmap_work);
  1125. item->age = 0;
  1126. __gnttab_unmap_refs_async(item);
  1127. }
  1128. EXPORT_SYMBOL_GPL(gnttab_unmap_refs_async);
  1129. static void unmap_refs_callback(int result,
  1130. struct gntab_unmap_queue_data *data)
  1131. {
  1132. struct unmap_refs_callback_data *d = data->data;
  1133. d->result = result;
  1134. complete(&d->completion);
  1135. }
  1136. int gnttab_unmap_refs_sync(struct gntab_unmap_queue_data *item)
  1137. {
  1138. struct unmap_refs_callback_data data;
  1139. init_completion(&data.completion);
  1140. item->data = &data;
  1141. item->done = &unmap_refs_callback;
  1142. gnttab_unmap_refs_async(item);
  1143. wait_for_completion(&data.completion);
  1144. return data.result;
  1145. }
  1146. EXPORT_SYMBOL_GPL(gnttab_unmap_refs_sync);
  1147. static unsigned int nr_status_frames(unsigned int nr_grant_frames)
  1148. {
  1149. return gnttab_frames(nr_grant_frames, SPP);
  1150. }
  1151. static int gnttab_map_frames_v1(xen_pfn_t *frames, unsigned int nr_gframes)
  1152. {
  1153. int rc;
  1154. rc = arch_gnttab_map_shared(frames, nr_gframes,
  1155. gnttab_max_grant_frames(),
  1156. &gnttab_shared.addr);
  1157. BUG_ON(rc);
  1158. return 0;
  1159. }
  1160. static void gnttab_unmap_frames_v1(void)
  1161. {
  1162. arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
  1163. }
  1164. static int gnttab_map_frames_v2(xen_pfn_t *frames, unsigned int nr_gframes)
  1165. {
  1166. uint64_t *sframes;
  1167. unsigned int nr_sframes;
  1168. struct gnttab_get_status_frames getframes;
  1169. int rc;
  1170. nr_sframes = nr_status_frames(nr_gframes);
  1171. /* No need for kzalloc as it is initialized in following hypercall
  1172. * GNTTABOP_get_status_frames.
  1173. */
  1174. sframes = kmalloc_array(nr_sframes, sizeof(uint64_t), GFP_ATOMIC);
  1175. if (!sframes)
  1176. return -ENOMEM;
  1177. getframes.dom = DOMID_SELF;
  1178. getframes.nr_frames = nr_sframes;
  1179. set_xen_guest_handle(getframes.frame_list, sframes);
  1180. rc = HYPERVISOR_grant_table_op(GNTTABOP_get_status_frames,
  1181. &getframes, 1);
  1182. if (rc == -ENOSYS) {
  1183. kfree(sframes);
  1184. return -ENOSYS;
  1185. }
  1186. BUG_ON(rc || getframes.status);
  1187. rc = arch_gnttab_map_status(sframes, nr_sframes,
  1188. nr_status_frames(gnttab_max_grant_frames()),
  1189. &grstatus);
  1190. BUG_ON(rc);
  1191. kfree(sframes);
  1192. rc = arch_gnttab_map_shared(frames, nr_gframes,
  1193. gnttab_max_grant_frames(),
  1194. &gnttab_shared.addr);
  1195. BUG_ON(rc);
  1196. return 0;
  1197. }
  1198. static void gnttab_unmap_frames_v2(void)
  1199. {
  1200. arch_gnttab_unmap(gnttab_shared.addr, nr_grant_frames);
  1201. arch_gnttab_unmap(grstatus, nr_status_frames(nr_grant_frames));
  1202. }
  1203. static int gnttab_map(unsigned int start_idx, unsigned int end_idx)
  1204. {
  1205. struct gnttab_setup_table setup;
  1206. xen_pfn_t *frames;
  1207. unsigned int nr_gframes = end_idx + 1;
  1208. int rc;
  1209. if (xen_feature(XENFEAT_auto_translated_physmap)) {
  1210. struct xen_add_to_physmap xatp;
  1211. unsigned int i = end_idx;
  1212. rc = 0;
  1213. BUG_ON(xen_auto_xlat_grant_frames.count < nr_gframes);
  1214. /*
  1215. * Loop backwards, so that the first hypercall has the largest
  1216. * index, ensuring that the table will grow only once.
  1217. */
  1218. do {
  1219. xatp.domid = DOMID_SELF;
  1220. xatp.idx = i;
  1221. xatp.space = XENMAPSPACE_grant_table;
  1222. xatp.gpfn = xen_auto_xlat_grant_frames.pfn[i];
  1223. rc = HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp);
  1224. if (rc != 0) {
  1225. pr_warn("grant table add_to_physmap failed, err=%d\n",
  1226. rc);
  1227. break;
  1228. }
  1229. } while (i-- > start_idx);
  1230. return rc;
  1231. }
  1232. /* No need for kzalloc as it is initialized in following hypercall
  1233. * GNTTABOP_setup_table.
  1234. */
  1235. frames = kmalloc_array(nr_gframes, sizeof(unsigned long), GFP_ATOMIC);
  1236. if (!frames)
  1237. return -ENOMEM;
  1238. setup.dom = DOMID_SELF;
  1239. setup.nr_frames = nr_gframes;
  1240. set_xen_guest_handle(setup.frame_list, frames);
  1241. rc = HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1);
  1242. if (rc == -ENOSYS) {
  1243. kfree(frames);
  1244. return -ENOSYS;
  1245. }
  1246. BUG_ON(rc || setup.status);
  1247. rc = gnttab_interface->map_frames(frames, nr_gframes);
  1248. kfree(frames);
  1249. return rc;
  1250. }
  1251. static const struct gnttab_ops gnttab_v1_ops = {
  1252. .version = 1,
  1253. .grefs_per_grant_frame = XEN_PAGE_SIZE /
  1254. sizeof(struct grant_entry_v1),
  1255. .map_frames = gnttab_map_frames_v1,
  1256. .unmap_frames = gnttab_unmap_frames_v1,
  1257. .update_entry = gnttab_update_entry_v1,
  1258. .end_foreign_access_ref = gnttab_end_foreign_access_ref_v1,
  1259. .read_frame = gnttab_read_frame_v1,
  1260. };
  1261. static const struct gnttab_ops gnttab_v2_ops = {
  1262. .version = 2,
  1263. .grefs_per_grant_frame = XEN_PAGE_SIZE /
  1264. sizeof(union grant_entry_v2),
  1265. .map_frames = gnttab_map_frames_v2,
  1266. .unmap_frames = gnttab_unmap_frames_v2,
  1267. .update_entry = gnttab_update_entry_v2,
  1268. .end_foreign_access_ref = gnttab_end_foreign_access_ref_v2,
  1269. .read_frame = gnttab_read_frame_v2,
  1270. };
  1271. static bool gnttab_need_v2(void)
  1272. {
  1273. #ifdef CONFIG_X86
  1274. uint32_t base, width;
  1275. if (xen_pv_domain()) {
  1276. base = xen_cpuid_base();
  1277. if (cpuid_eax(base) < 5)
  1278. return false; /* Information not available, use V1. */
  1279. width = cpuid_ebx(base + 5) &
  1280. XEN_CPUID_MACHINE_ADDRESS_WIDTH_MASK;
  1281. return width > 32 + PAGE_SHIFT;
  1282. }
  1283. #endif
  1284. return !!(max_possible_pfn >> 32);
  1285. }
  1286. static void gnttab_request_version(void)
  1287. {
  1288. long rc;
  1289. struct gnttab_set_version gsv;
  1290. if (gnttab_need_v2())
  1291. gsv.version = 2;
  1292. else
  1293. gsv.version = 1;
  1294. /* Boot parameter overrides automatic selection. */
  1295. if (xen_gnttab_version >= 1 && xen_gnttab_version <= 2)
  1296. gsv.version = xen_gnttab_version;
  1297. rc = HYPERVISOR_grant_table_op(GNTTABOP_set_version, &gsv, 1);
  1298. if (rc == 0 && gsv.version == 2)
  1299. gnttab_interface = &gnttab_v2_ops;
  1300. else
  1301. gnttab_interface = &gnttab_v1_ops;
  1302. pr_info("Grant tables using version %d layout\n",
  1303. gnttab_interface->version);
  1304. }
  1305. static int gnttab_setup(void)
  1306. {
  1307. unsigned int max_nr_gframes;
  1308. max_nr_gframes = gnttab_max_grant_frames();
  1309. if (max_nr_gframes < nr_grant_frames)
  1310. return -ENOSYS;
  1311. if (xen_feature(XENFEAT_auto_translated_physmap) && gnttab_shared.addr == NULL) {
  1312. gnttab_shared.addr = xen_auto_xlat_grant_frames.vaddr;
  1313. if (gnttab_shared.addr == NULL) {
  1314. pr_warn("gnttab share frames is not mapped!\n");
  1315. return -ENOMEM;
  1316. }
  1317. }
  1318. return gnttab_map(0, nr_grant_frames - 1);
  1319. }
  1320. int gnttab_resume(void)
  1321. {
  1322. gnttab_request_version();
  1323. return gnttab_setup();
  1324. }
  1325. int gnttab_suspend(void)
  1326. {
  1327. if (!xen_feature(XENFEAT_auto_translated_physmap))
  1328. gnttab_interface->unmap_frames();
  1329. return 0;
  1330. }
  1331. static int gnttab_expand(unsigned int req_entries)
  1332. {
  1333. int rc;
  1334. unsigned int cur, extra;
  1335. cur = nr_grant_frames;
  1336. extra = ((req_entries + gnttab_interface->grefs_per_grant_frame - 1) /
  1337. gnttab_interface->grefs_per_grant_frame);
  1338. if (cur + extra > gnttab_max_grant_frames()) {
  1339. pr_warn_ratelimited("xen/grant-table: max_grant_frames reached"
  1340. " cur=%u extra=%u limit=%u"
  1341. " gnttab_free_count=%u req_entries=%u\n",
  1342. cur, extra, gnttab_max_grant_frames(),
  1343. gnttab_free_count, req_entries);
  1344. return -ENOSPC;
  1345. }
  1346. rc = gnttab_map(cur, cur + extra - 1);
  1347. if (rc == 0)
  1348. rc = grow_gnttab_list(extra);
  1349. return rc;
  1350. }
  1351. int gnttab_init(void)
  1352. {
  1353. int i;
  1354. unsigned long max_nr_grant_frames, max_nr_grefs;
  1355. unsigned int max_nr_glist_frames, nr_glist_frames;
  1356. int ret;
  1357. gnttab_request_version();
  1358. max_nr_grant_frames = gnttab_max_grant_frames();
  1359. max_nr_grefs = max_nr_grant_frames *
  1360. gnttab_interface->grefs_per_grant_frame;
  1361. nr_grant_frames = 1;
  1362. /* Determine the maximum number of frames required for the
  1363. * grant reference free list on the current hypervisor.
  1364. */
  1365. max_nr_glist_frames = max_nr_grefs / RPP;
  1366. gnttab_list = kmalloc_array(max_nr_glist_frames,
  1367. sizeof(grant_ref_t *),
  1368. GFP_KERNEL);
  1369. if (gnttab_list == NULL)
  1370. return -ENOMEM;
  1371. nr_glist_frames = gnttab_frames(nr_grant_frames, RPP);
  1372. for (i = 0; i < nr_glist_frames; i++) {
  1373. gnttab_list[i] = (grant_ref_t *)__get_free_page(GFP_KERNEL);
  1374. if (gnttab_list[i] == NULL) {
  1375. ret = -ENOMEM;
  1376. goto ini_nomem;
  1377. }
  1378. }
  1379. gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
  1380. if (!gnttab_free_bitmap) {
  1381. ret = -ENOMEM;
  1382. goto ini_nomem;
  1383. }
  1384. ret = arch_gnttab_init(max_nr_grant_frames,
  1385. nr_status_frames(max_nr_grant_frames));
  1386. if (ret < 0)
  1387. goto ini_nomem;
  1388. if (gnttab_setup() < 0) {
  1389. ret = -ENODEV;
  1390. goto ini_nomem;
  1391. }
  1392. gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
  1393. gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
  1394. gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
  1395. printk("Grant table initialized\n");
  1396. return 0;
  1397. ini_nomem:
  1398. for (i--; i >= 0; i--)
  1399. free_page((unsigned long)gnttab_list[i]);
  1400. kfree(gnttab_list);
  1401. bitmap_free(gnttab_free_bitmap);
  1402. return ret;
  1403. }
  1404. EXPORT_SYMBOL_GPL(gnttab_init);
  1405. static int __gnttab_init(void)
  1406. {
  1407. if (!xen_domain())
  1408. return -ENODEV;
  1409. /* Delay grant-table initialization in the PV on HVM case */
  1410. if (xen_hvm_domain() && !xen_pvh_domain())
  1411. return 0;
  1412. return gnttab_init();
  1413. }
  1414. /* Starts after core_initcall so that xen_pvh_gnttab_setup can be called
  1415. * beforehand to initialize xen_auto_xlat_grant_frames. */
  1416. core_initcall_sync(__gnttab_init);