hyperv.c 77 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * KVM Microsoft Hyper-V emulation
  4. *
  5. * derived from arch/x86/kvm/x86.c
  6. *
  7. * Copyright (C) 2006 Qumranet, Inc.
  8. * Copyright (C) 2008 Qumranet, Inc.
  9. * Copyright IBM Corporation, 2008
  10. * Copyright 2010 Red Hat, Inc. and/or its affiliates.
  11. * Copyright (C) 2015 Andrey Smetanin <asmetanin@virtuozzo.com>
  12. *
  13. * Authors:
  14. * Avi Kivity <avi@qumranet.com>
  15. * Yaniv Kamay <yaniv@qumranet.com>
  16. * Amit Shah <amit.shah@qumranet.com>
  17. * Ben-Ami Yassour <benami@il.ibm.com>
  18. * Andrey Smetanin <asmetanin@virtuozzo.com>
  19. */
  20. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  21. #include "x86.h"
  22. #include "lapic.h"
  23. #include "ioapic.h"
  24. #include "cpuid.h"
  25. #include "hyperv.h"
  26. #include "mmu.h"
  27. #include "xen.h"
  28. #include <linux/cpu.h>
  29. #include <linux/kvm_host.h>
  30. #include <linux/highmem.h>
  31. #include <linux/sched/cputime.h>
  32. #include <linux/spinlock.h>
  33. #include <linux/eventfd.h>
  34. #include <asm/apicdef.h>
  35. #include <asm/mshyperv.h>
  36. #include <trace/events/kvm.h>
  37. #include "trace.h"
  38. #include "irq.h"
  39. #include "fpu.h"
  40. #define KVM_HV_MAX_SPARSE_VCPU_SET_BITS DIV_ROUND_UP(KVM_MAX_VCPUS, HV_VCPUS_PER_SPARSE_BANK)
  41. /*
  42. * As per Hyper-V TLFS, extended hypercalls start from 0x8001
  43. * (HvExtCallQueryCapabilities). Response of this hypercalls is a 64 bit value
  44. * where each bit tells which extended hypercall is available besides
  45. * HvExtCallQueryCapabilities.
  46. *
  47. * 0x8001 - First extended hypercall, HvExtCallQueryCapabilities, no bit
  48. * assigned.
  49. *
  50. * 0x8002 - Bit 0
  51. * 0x8003 - Bit 1
  52. * ..
  53. * 0x8041 - Bit 63
  54. *
  55. * Therefore, HV_EXT_CALL_MAX = 0x8001 + 64
  56. */
  57. #define HV_EXT_CALL_MAX (HV_EXT_CALL_QUERY_CAPABILITIES + 64)
  58. static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
  59. bool vcpu_kick);
  60. static inline u64 synic_read_sint(struct kvm_vcpu_hv_synic *synic, int sint)
  61. {
  62. return atomic64_read(&synic->sint[sint]);
  63. }
  64. static inline int synic_get_sint_vector(u64 sint_value)
  65. {
  66. if (sint_value & HV_SYNIC_SINT_MASKED)
  67. return -1;
  68. return sint_value & HV_SYNIC_SINT_VECTOR_MASK;
  69. }
  70. static bool synic_has_vector_connected(struct kvm_vcpu_hv_synic *synic,
  71. int vector)
  72. {
  73. int i;
  74. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  75. if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  76. return true;
  77. }
  78. return false;
  79. }
  80. static bool synic_has_vector_auto_eoi(struct kvm_vcpu_hv_synic *synic,
  81. int vector)
  82. {
  83. int i;
  84. u64 sint_value;
  85. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  86. sint_value = synic_read_sint(synic, i);
  87. if (synic_get_sint_vector(sint_value) == vector &&
  88. sint_value & HV_SYNIC_SINT_AUTO_EOI)
  89. return true;
  90. }
  91. return false;
  92. }
  93. static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
  94. int vector)
  95. {
  96. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  97. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  98. bool auto_eoi_old, auto_eoi_new;
  99. if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
  100. return;
  101. if (synic_has_vector_connected(synic, vector))
  102. __set_bit(vector, synic->vec_bitmap);
  103. else
  104. __clear_bit(vector, synic->vec_bitmap);
  105. auto_eoi_old = !bitmap_empty(synic->auto_eoi_bitmap, 256);
  106. if (synic_has_vector_auto_eoi(synic, vector))
  107. __set_bit(vector, synic->auto_eoi_bitmap);
  108. else
  109. __clear_bit(vector, synic->auto_eoi_bitmap);
  110. auto_eoi_new = !bitmap_empty(synic->auto_eoi_bitmap, 256);
  111. if (auto_eoi_old == auto_eoi_new)
  112. return;
  113. if (!enable_apicv)
  114. return;
  115. down_write(&vcpu->kvm->arch.apicv_update_lock);
  116. if (auto_eoi_new)
  117. hv->synic_auto_eoi_used++;
  118. else
  119. hv->synic_auto_eoi_used--;
  120. /*
  121. * Inhibit APICv if any vCPU is using SynIC's AutoEOI, which relies on
  122. * the hypervisor to manually inject IRQs.
  123. */
  124. __kvm_set_or_clear_apicv_inhibit(vcpu->kvm,
  125. APICV_INHIBIT_REASON_HYPERV,
  126. !!hv->synic_auto_eoi_used);
  127. up_write(&vcpu->kvm->arch.apicv_update_lock);
  128. }
  129. static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
  130. u64 data, bool host)
  131. {
  132. int vector, old_vector;
  133. bool masked;
  134. vector = data & HV_SYNIC_SINT_VECTOR_MASK;
  135. masked = data & HV_SYNIC_SINT_MASKED;
  136. /*
  137. * Valid vectors are 16-255, however, nested Hyper-V attempts to write
  138. * default '0x10000' value on boot and this should not #GP. We need to
  139. * allow zero-initing the register from host as well.
  140. */
  141. if (vector < HV_SYNIC_FIRST_VALID_VECTOR && !host && !masked)
  142. return 1;
  143. /*
  144. * Guest may configure multiple SINTs to use the same vector, so
  145. * we maintain a bitmap of vectors handled by synic, and a
  146. * bitmap of vectors with auto-eoi behavior. The bitmaps are
  147. * updated here, and atomically queried on fast paths.
  148. */
  149. old_vector = synic_read_sint(synic, sint) & HV_SYNIC_SINT_VECTOR_MASK;
  150. atomic64_set(&synic->sint[sint], data);
  151. synic_update_vector(synic, old_vector);
  152. synic_update_vector(synic, vector);
  153. /* Load SynIC vectors into EOI exit bitmap */
  154. kvm_make_request(KVM_REQ_SCAN_IOAPIC, hv_synic_to_vcpu(synic));
  155. return 0;
  156. }
  157. static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
  158. {
  159. struct kvm_vcpu *vcpu = NULL;
  160. unsigned long i;
  161. if (vpidx >= KVM_MAX_VCPUS)
  162. return NULL;
  163. vcpu = kvm_get_vcpu(kvm, vpidx);
  164. if (vcpu && kvm_hv_get_vpindex(vcpu) == vpidx)
  165. return vcpu;
  166. kvm_for_each_vcpu(i, vcpu, kvm)
  167. if (kvm_hv_get_vpindex(vcpu) == vpidx)
  168. return vcpu;
  169. return NULL;
  170. }
  171. static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
  172. {
  173. struct kvm_vcpu *vcpu;
  174. struct kvm_vcpu_hv_synic *synic;
  175. vcpu = get_vcpu_by_vpidx(kvm, vpidx);
  176. if (!vcpu || !to_hv_vcpu(vcpu))
  177. return NULL;
  178. synic = to_hv_synic(vcpu);
  179. return (synic->active) ? synic : NULL;
  180. }
  181. static void kvm_hv_notify_acked_sint(struct kvm_vcpu *vcpu, u32 sint)
  182. {
  183. struct kvm *kvm = vcpu->kvm;
  184. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  185. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  186. struct kvm_vcpu_hv_stimer *stimer;
  187. int gsi, idx;
  188. trace_kvm_hv_notify_acked_sint(vcpu->vcpu_id, sint);
  189. /* Try to deliver pending Hyper-V SynIC timers messages */
  190. for (idx = 0; idx < ARRAY_SIZE(hv_vcpu->stimer); idx++) {
  191. stimer = &hv_vcpu->stimer[idx];
  192. if (stimer->msg_pending && stimer->config.enable &&
  193. !stimer->config.direct_mode &&
  194. stimer->config.sintx == sint)
  195. stimer_mark_pending(stimer, false);
  196. }
  197. idx = srcu_read_lock(&kvm->irq_srcu);
  198. gsi = atomic_read(&synic->sint_to_gsi[sint]);
  199. if (gsi != -1)
  200. kvm_notify_acked_gsi(kvm, gsi);
  201. srcu_read_unlock(&kvm->irq_srcu, idx);
  202. }
  203. static void synic_exit(struct kvm_vcpu_hv_synic *synic, u32 msr)
  204. {
  205. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  206. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  207. hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNIC;
  208. hv_vcpu->exit.u.synic.msr = msr;
  209. hv_vcpu->exit.u.synic.control = synic->control;
  210. hv_vcpu->exit.u.synic.evt_page = synic->evt_page;
  211. hv_vcpu->exit.u.synic.msg_page = synic->msg_page;
  212. kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
  213. }
  214. static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
  215. u32 msr, u64 data, bool host)
  216. {
  217. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  218. int ret;
  219. if (!synic->active && (!host || data))
  220. return 1;
  221. trace_kvm_hv_synic_set_msr(vcpu->vcpu_id, msr, data, host);
  222. ret = 0;
  223. switch (msr) {
  224. case HV_X64_MSR_SCONTROL:
  225. synic->control = data;
  226. if (!host)
  227. synic_exit(synic, msr);
  228. break;
  229. case HV_X64_MSR_SVERSION:
  230. if (!host) {
  231. ret = 1;
  232. break;
  233. }
  234. synic->version = data;
  235. break;
  236. case HV_X64_MSR_SIEFP:
  237. if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
  238. !synic->dont_zero_synic_pages)
  239. if (kvm_clear_guest(vcpu->kvm,
  240. data & PAGE_MASK, PAGE_SIZE)) {
  241. ret = 1;
  242. break;
  243. }
  244. synic->evt_page = data;
  245. if (!host)
  246. synic_exit(synic, msr);
  247. break;
  248. case HV_X64_MSR_SIMP:
  249. if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
  250. !synic->dont_zero_synic_pages)
  251. if (kvm_clear_guest(vcpu->kvm,
  252. data & PAGE_MASK, PAGE_SIZE)) {
  253. ret = 1;
  254. break;
  255. }
  256. synic->msg_page = data;
  257. if (!host)
  258. synic_exit(synic, msr);
  259. break;
  260. case HV_X64_MSR_EOM: {
  261. int i;
  262. if (!synic->active)
  263. break;
  264. for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
  265. kvm_hv_notify_acked_sint(vcpu, i);
  266. break;
  267. }
  268. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  269. ret = synic_set_sint(synic, msr - HV_X64_MSR_SINT0, data, host);
  270. break;
  271. default:
  272. ret = 1;
  273. break;
  274. }
  275. return ret;
  276. }
  277. static bool kvm_hv_is_syndbg_enabled(struct kvm_vcpu *vcpu)
  278. {
  279. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  280. return hv_vcpu->cpuid_cache.syndbg_cap_eax &
  281. HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
  282. }
  283. static int kvm_hv_syndbg_complete_userspace(struct kvm_vcpu *vcpu)
  284. {
  285. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  286. if (vcpu->run->hyperv.u.syndbg.msr == HV_X64_MSR_SYNDBG_CONTROL)
  287. hv->hv_syndbg.control.status =
  288. vcpu->run->hyperv.u.syndbg.status;
  289. return 1;
  290. }
  291. static void syndbg_exit(struct kvm_vcpu *vcpu, u32 msr)
  292. {
  293. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  294. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  295. hv_vcpu->exit.type = KVM_EXIT_HYPERV_SYNDBG;
  296. hv_vcpu->exit.u.syndbg.msr = msr;
  297. hv_vcpu->exit.u.syndbg.control = syndbg->control.control;
  298. hv_vcpu->exit.u.syndbg.send_page = syndbg->control.send_page;
  299. hv_vcpu->exit.u.syndbg.recv_page = syndbg->control.recv_page;
  300. hv_vcpu->exit.u.syndbg.pending_page = syndbg->control.pending_page;
  301. vcpu->arch.complete_userspace_io =
  302. kvm_hv_syndbg_complete_userspace;
  303. kvm_make_request(KVM_REQ_HV_EXIT, vcpu);
  304. }
  305. static int syndbg_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  306. {
  307. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  308. if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
  309. return 1;
  310. trace_kvm_hv_syndbg_set_msr(vcpu->vcpu_id,
  311. to_hv_vcpu(vcpu)->vp_index, msr, data);
  312. switch (msr) {
  313. case HV_X64_MSR_SYNDBG_CONTROL:
  314. syndbg->control.control = data;
  315. if (!host)
  316. syndbg_exit(vcpu, msr);
  317. break;
  318. case HV_X64_MSR_SYNDBG_STATUS:
  319. syndbg->control.status = data;
  320. break;
  321. case HV_X64_MSR_SYNDBG_SEND_BUFFER:
  322. syndbg->control.send_page = data;
  323. break;
  324. case HV_X64_MSR_SYNDBG_RECV_BUFFER:
  325. syndbg->control.recv_page = data;
  326. break;
  327. case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  328. syndbg->control.pending_page = data;
  329. if (!host)
  330. syndbg_exit(vcpu, msr);
  331. break;
  332. case HV_X64_MSR_SYNDBG_OPTIONS:
  333. syndbg->options = data;
  334. break;
  335. default:
  336. break;
  337. }
  338. return 0;
  339. }
  340. static int syndbg_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
  341. {
  342. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  343. if (!kvm_hv_is_syndbg_enabled(vcpu) && !host)
  344. return 1;
  345. switch (msr) {
  346. case HV_X64_MSR_SYNDBG_CONTROL:
  347. *pdata = syndbg->control.control;
  348. break;
  349. case HV_X64_MSR_SYNDBG_STATUS:
  350. *pdata = syndbg->control.status;
  351. break;
  352. case HV_X64_MSR_SYNDBG_SEND_BUFFER:
  353. *pdata = syndbg->control.send_page;
  354. break;
  355. case HV_X64_MSR_SYNDBG_RECV_BUFFER:
  356. *pdata = syndbg->control.recv_page;
  357. break;
  358. case HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  359. *pdata = syndbg->control.pending_page;
  360. break;
  361. case HV_X64_MSR_SYNDBG_OPTIONS:
  362. *pdata = syndbg->options;
  363. break;
  364. default:
  365. break;
  366. }
  367. trace_kvm_hv_syndbg_get_msr(vcpu->vcpu_id, kvm_hv_get_vpindex(vcpu), msr, *pdata);
  368. return 0;
  369. }
  370. static int synic_get_msr(struct kvm_vcpu_hv_synic *synic, u32 msr, u64 *pdata,
  371. bool host)
  372. {
  373. int ret;
  374. if (!synic->active && !host)
  375. return 1;
  376. ret = 0;
  377. switch (msr) {
  378. case HV_X64_MSR_SCONTROL:
  379. *pdata = synic->control;
  380. break;
  381. case HV_X64_MSR_SVERSION:
  382. *pdata = synic->version;
  383. break;
  384. case HV_X64_MSR_SIEFP:
  385. *pdata = synic->evt_page;
  386. break;
  387. case HV_X64_MSR_SIMP:
  388. *pdata = synic->msg_page;
  389. break;
  390. case HV_X64_MSR_EOM:
  391. *pdata = 0;
  392. break;
  393. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  394. *pdata = atomic64_read(&synic->sint[msr - HV_X64_MSR_SINT0]);
  395. break;
  396. default:
  397. ret = 1;
  398. break;
  399. }
  400. return ret;
  401. }
  402. static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
  403. {
  404. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  405. struct kvm_lapic_irq irq;
  406. int ret, vector;
  407. if (KVM_BUG_ON(!lapic_in_kernel(vcpu), vcpu->kvm))
  408. return -EINVAL;
  409. if (sint >= ARRAY_SIZE(synic->sint))
  410. return -EINVAL;
  411. vector = synic_get_sint_vector(synic_read_sint(synic, sint));
  412. if (vector < 0)
  413. return -ENOENT;
  414. memset(&irq, 0, sizeof(irq));
  415. irq.shorthand = APIC_DEST_SELF;
  416. irq.dest_mode = APIC_DEST_PHYSICAL;
  417. irq.delivery_mode = APIC_DM_FIXED;
  418. irq.vector = vector;
  419. irq.level = 1;
  420. ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL);
  421. trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret);
  422. return ret;
  423. }
  424. int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
  425. {
  426. struct kvm_vcpu_hv_synic *synic;
  427. synic = synic_get(kvm, vpidx);
  428. if (!synic)
  429. return -EINVAL;
  430. return synic_set_irq(synic, sint);
  431. }
  432. void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
  433. {
  434. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  435. int i;
  436. trace_kvm_hv_synic_send_eoi(vcpu->vcpu_id, vector);
  437. for (i = 0; i < ARRAY_SIZE(synic->sint); i++)
  438. if (synic_get_sint_vector(synic_read_sint(synic, i)) == vector)
  439. kvm_hv_notify_acked_sint(vcpu, i);
  440. }
  441. static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
  442. {
  443. struct kvm_vcpu_hv_synic *synic;
  444. synic = synic_get(kvm, vpidx);
  445. if (!synic)
  446. return -EINVAL;
  447. if (sint >= ARRAY_SIZE(synic->sint_to_gsi))
  448. return -EINVAL;
  449. atomic_set(&synic->sint_to_gsi[sint], gsi);
  450. return 0;
  451. }
  452. void kvm_hv_irq_routing_update(struct kvm *kvm)
  453. {
  454. struct kvm_irq_routing_table *irq_rt;
  455. struct kvm_kernel_irq_routing_entry *e;
  456. u32 gsi;
  457. irq_rt = srcu_dereference_check(kvm->irq_routing, &kvm->irq_srcu,
  458. lockdep_is_held(&kvm->irq_lock));
  459. for (gsi = 0; gsi < irq_rt->nr_rt_entries; gsi++) {
  460. hlist_for_each_entry(e, &irq_rt->map[gsi], link) {
  461. if (e->type == KVM_IRQ_ROUTING_HV_SINT)
  462. kvm_hv_set_sint_gsi(kvm, e->hv_sint.vcpu,
  463. e->hv_sint.sint, gsi);
  464. }
  465. }
  466. }
  467. static void synic_init(struct kvm_vcpu_hv_synic *synic)
  468. {
  469. int i;
  470. memset(synic, 0, sizeof(*synic));
  471. synic->version = HV_SYNIC_VERSION_1;
  472. for (i = 0; i < ARRAY_SIZE(synic->sint); i++) {
  473. atomic64_set(&synic->sint[i], HV_SYNIC_SINT_MASKED);
  474. atomic_set(&synic->sint_to_gsi[i], -1);
  475. }
  476. }
  477. static u64 get_time_ref_counter(struct kvm *kvm)
  478. {
  479. struct kvm_hv *hv = to_kvm_hv(kvm);
  480. struct kvm_vcpu *vcpu;
  481. u64 tsc;
  482. /*
  483. * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
  484. * is broken, disabled or being updated.
  485. */
  486. if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
  487. return div_u64(get_kvmclock_ns(kvm), 100);
  488. vcpu = kvm_get_vcpu(kvm, 0);
  489. tsc = kvm_read_l1_tsc(vcpu, rdtsc());
  490. return mul_u64_u64_shr(tsc, hv->tsc_ref.tsc_scale, 64)
  491. + hv->tsc_ref.tsc_offset;
  492. }
  493. static void stimer_mark_pending(struct kvm_vcpu_hv_stimer *stimer,
  494. bool vcpu_kick)
  495. {
  496. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  497. set_bit(stimer->index,
  498. to_hv_vcpu(vcpu)->stimer_pending_bitmap);
  499. kvm_make_request(KVM_REQ_HV_STIMER, vcpu);
  500. if (vcpu_kick)
  501. kvm_vcpu_kick(vcpu);
  502. }
  503. static void stimer_cleanup(struct kvm_vcpu_hv_stimer *stimer)
  504. {
  505. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  506. trace_kvm_hv_stimer_cleanup(hv_stimer_to_vcpu(stimer)->vcpu_id,
  507. stimer->index);
  508. hrtimer_cancel(&stimer->timer);
  509. clear_bit(stimer->index,
  510. to_hv_vcpu(vcpu)->stimer_pending_bitmap);
  511. stimer->msg_pending = false;
  512. stimer->exp_time = 0;
  513. }
  514. static enum hrtimer_restart stimer_timer_callback(struct hrtimer *timer)
  515. {
  516. struct kvm_vcpu_hv_stimer *stimer;
  517. stimer = container_of(timer, struct kvm_vcpu_hv_stimer, timer);
  518. trace_kvm_hv_stimer_callback(hv_stimer_to_vcpu(stimer)->vcpu_id,
  519. stimer->index);
  520. stimer_mark_pending(stimer, true);
  521. return HRTIMER_NORESTART;
  522. }
  523. /*
  524. * stimer_start() assumptions:
  525. * a) stimer->count is not equal to 0
  526. * b) stimer->config has HV_STIMER_ENABLE flag
  527. */
  528. static int stimer_start(struct kvm_vcpu_hv_stimer *stimer)
  529. {
  530. u64 time_now;
  531. ktime_t ktime_now;
  532. time_now = get_time_ref_counter(hv_stimer_to_vcpu(stimer)->kvm);
  533. ktime_now = ktime_get();
  534. if (stimer->config.periodic) {
  535. if (stimer->exp_time) {
  536. if (time_now >= stimer->exp_time) {
  537. u64 remainder;
  538. div64_u64_rem(time_now - stimer->exp_time,
  539. stimer->count, &remainder);
  540. stimer->exp_time =
  541. time_now + (stimer->count - remainder);
  542. }
  543. } else
  544. stimer->exp_time = time_now + stimer->count;
  545. trace_kvm_hv_stimer_start_periodic(
  546. hv_stimer_to_vcpu(stimer)->vcpu_id,
  547. stimer->index,
  548. time_now, stimer->exp_time);
  549. hrtimer_start(&stimer->timer,
  550. ktime_add_ns(ktime_now,
  551. 100 * (stimer->exp_time - time_now)),
  552. HRTIMER_MODE_ABS);
  553. return 0;
  554. }
  555. stimer->exp_time = stimer->count;
  556. if (time_now >= stimer->count) {
  557. /*
  558. * Expire timer according to Hypervisor Top-Level Functional
  559. * specification v4(15.3.1):
  560. * "If a one shot is enabled and the specified count is in
  561. * the past, it will expire immediately."
  562. */
  563. stimer_mark_pending(stimer, false);
  564. return 0;
  565. }
  566. trace_kvm_hv_stimer_start_one_shot(hv_stimer_to_vcpu(stimer)->vcpu_id,
  567. stimer->index,
  568. time_now, stimer->count);
  569. hrtimer_start(&stimer->timer,
  570. ktime_add_ns(ktime_now, 100 * (stimer->count - time_now)),
  571. HRTIMER_MODE_ABS);
  572. return 0;
  573. }
  574. static int stimer_set_config(struct kvm_vcpu_hv_stimer *stimer, u64 config,
  575. bool host)
  576. {
  577. union hv_stimer_config new_config = {.as_uint64 = config},
  578. old_config = {.as_uint64 = stimer->config.as_uint64};
  579. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  580. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  581. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  582. if (!synic->active && (!host || config))
  583. return 1;
  584. if (unlikely(!host && hv_vcpu->enforce_cpuid && new_config.direct_mode &&
  585. !(hv_vcpu->cpuid_cache.features_edx &
  586. HV_STIMER_DIRECT_MODE_AVAILABLE)))
  587. return 1;
  588. trace_kvm_hv_stimer_set_config(hv_stimer_to_vcpu(stimer)->vcpu_id,
  589. stimer->index, config, host);
  590. stimer_cleanup(stimer);
  591. if (old_config.enable &&
  592. !new_config.direct_mode && new_config.sintx == 0)
  593. new_config.enable = 0;
  594. stimer->config.as_uint64 = new_config.as_uint64;
  595. if (stimer->config.enable)
  596. stimer_mark_pending(stimer, false);
  597. return 0;
  598. }
  599. static int stimer_set_count(struct kvm_vcpu_hv_stimer *stimer, u64 count,
  600. bool host)
  601. {
  602. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  603. struct kvm_vcpu_hv_synic *synic = to_hv_synic(vcpu);
  604. if (!synic->active && (!host || count))
  605. return 1;
  606. trace_kvm_hv_stimer_set_count(hv_stimer_to_vcpu(stimer)->vcpu_id,
  607. stimer->index, count, host);
  608. stimer_cleanup(stimer);
  609. stimer->count = count;
  610. if (!host) {
  611. if (stimer->count == 0)
  612. stimer->config.enable = 0;
  613. else if (stimer->config.auto_enable)
  614. stimer->config.enable = 1;
  615. }
  616. if (stimer->config.enable)
  617. stimer_mark_pending(stimer, false);
  618. return 0;
  619. }
  620. static int stimer_get_config(struct kvm_vcpu_hv_stimer *stimer, u64 *pconfig)
  621. {
  622. *pconfig = stimer->config.as_uint64;
  623. return 0;
  624. }
  625. static int stimer_get_count(struct kvm_vcpu_hv_stimer *stimer, u64 *pcount)
  626. {
  627. *pcount = stimer->count;
  628. return 0;
  629. }
  630. static int synic_deliver_msg(struct kvm_vcpu_hv_synic *synic, u32 sint,
  631. struct hv_message *src_msg, bool no_retry)
  632. {
  633. struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
  634. int msg_off = offsetof(struct hv_message_page, sint_message[sint]);
  635. gfn_t msg_page_gfn;
  636. struct hv_message_header hv_hdr;
  637. int r;
  638. if (!(synic->msg_page & HV_SYNIC_SIMP_ENABLE))
  639. return -ENOENT;
  640. msg_page_gfn = synic->msg_page >> PAGE_SHIFT;
  641. /*
  642. * Strictly following the spec-mandated ordering would assume setting
  643. * .msg_pending before checking .message_type. However, this function
  644. * is only called in vcpu context so the entire update is atomic from
  645. * guest POV and thus the exact order here doesn't matter.
  646. */
  647. r = kvm_vcpu_read_guest_page(vcpu, msg_page_gfn, &hv_hdr.message_type,
  648. msg_off + offsetof(struct hv_message,
  649. header.message_type),
  650. sizeof(hv_hdr.message_type));
  651. if (r < 0)
  652. return r;
  653. if (hv_hdr.message_type != HVMSG_NONE) {
  654. if (no_retry)
  655. return 0;
  656. hv_hdr.message_flags.msg_pending = 1;
  657. r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn,
  658. &hv_hdr.message_flags,
  659. msg_off +
  660. offsetof(struct hv_message,
  661. header.message_flags),
  662. sizeof(hv_hdr.message_flags));
  663. if (r < 0)
  664. return r;
  665. return -EAGAIN;
  666. }
  667. r = kvm_vcpu_write_guest_page(vcpu, msg_page_gfn, src_msg, msg_off,
  668. sizeof(src_msg->header) +
  669. src_msg->header.payload_size);
  670. if (r < 0)
  671. return r;
  672. r = synic_set_irq(synic, sint);
  673. if (r < 0)
  674. return r;
  675. if (r == 0)
  676. return -EFAULT;
  677. return 0;
  678. }
  679. static int stimer_send_msg(struct kvm_vcpu_hv_stimer *stimer)
  680. {
  681. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  682. struct hv_message *msg = &stimer->msg;
  683. struct hv_timer_message_payload *payload =
  684. (struct hv_timer_message_payload *)&msg->u.payload;
  685. /*
  686. * To avoid piling up periodic ticks, don't retry message
  687. * delivery for them (within "lazy" lost ticks policy).
  688. */
  689. bool no_retry = stimer->config.periodic;
  690. payload->expiration_time = stimer->exp_time;
  691. payload->delivery_time = get_time_ref_counter(vcpu->kvm);
  692. return synic_deliver_msg(to_hv_synic(vcpu),
  693. stimer->config.sintx, msg,
  694. no_retry);
  695. }
  696. static int stimer_notify_direct(struct kvm_vcpu_hv_stimer *stimer)
  697. {
  698. struct kvm_vcpu *vcpu = hv_stimer_to_vcpu(stimer);
  699. struct kvm_lapic_irq irq = {
  700. .delivery_mode = APIC_DM_FIXED,
  701. .vector = stimer->config.apic_vector
  702. };
  703. if (lapic_in_kernel(vcpu))
  704. return !kvm_apic_set_irq(vcpu, &irq, NULL);
  705. return 0;
  706. }
  707. static void stimer_expiration(struct kvm_vcpu_hv_stimer *stimer)
  708. {
  709. int r, direct = stimer->config.direct_mode;
  710. stimer->msg_pending = true;
  711. if (!direct)
  712. r = stimer_send_msg(stimer);
  713. else
  714. r = stimer_notify_direct(stimer);
  715. trace_kvm_hv_stimer_expiration(hv_stimer_to_vcpu(stimer)->vcpu_id,
  716. stimer->index, direct, r);
  717. if (!r) {
  718. stimer->msg_pending = false;
  719. if (!(stimer->config.periodic))
  720. stimer->config.enable = 0;
  721. }
  722. }
  723. void kvm_hv_process_stimers(struct kvm_vcpu *vcpu)
  724. {
  725. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  726. struct kvm_vcpu_hv_stimer *stimer;
  727. u64 time_now, exp_time;
  728. int i;
  729. if (!hv_vcpu)
  730. return;
  731. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  732. if (test_and_clear_bit(i, hv_vcpu->stimer_pending_bitmap)) {
  733. stimer = &hv_vcpu->stimer[i];
  734. if (stimer->config.enable) {
  735. exp_time = stimer->exp_time;
  736. if (exp_time) {
  737. time_now =
  738. get_time_ref_counter(vcpu->kvm);
  739. if (time_now >= exp_time)
  740. stimer_expiration(stimer);
  741. }
  742. if ((stimer->config.enable) &&
  743. stimer->count) {
  744. if (!stimer->msg_pending)
  745. stimer_start(stimer);
  746. } else
  747. stimer_cleanup(stimer);
  748. }
  749. }
  750. }
  751. void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu)
  752. {
  753. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  754. int i;
  755. if (!hv_vcpu)
  756. return;
  757. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  758. stimer_cleanup(&hv_vcpu->stimer[i]);
  759. kfree(hv_vcpu);
  760. vcpu->arch.hyperv = NULL;
  761. }
  762. bool kvm_hv_assist_page_enabled(struct kvm_vcpu *vcpu)
  763. {
  764. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  765. if (!hv_vcpu)
  766. return false;
  767. if (!(hv_vcpu->hv_vapic & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE))
  768. return false;
  769. return vcpu->arch.pv_eoi.msr_val & KVM_MSR_ENABLED;
  770. }
  771. EXPORT_SYMBOL_GPL(kvm_hv_assist_page_enabled);
  772. int kvm_hv_get_assist_page(struct kvm_vcpu *vcpu)
  773. {
  774. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  775. if (!hv_vcpu || !kvm_hv_assist_page_enabled(vcpu))
  776. return -EFAULT;
  777. return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data,
  778. &hv_vcpu->vp_assist_page, sizeof(struct hv_vp_assist_page));
  779. }
  780. EXPORT_SYMBOL_GPL(kvm_hv_get_assist_page);
  781. static void stimer_prepare_msg(struct kvm_vcpu_hv_stimer *stimer)
  782. {
  783. struct hv_message *msg = &stimer->msg;
  784. struct hv_timer_message_payload *payload =
  785. (struct hv_timer_message_payload *)&msg->u.payload;
  786. memset(&msg->header, 0, sizeof(msg->header));
  787. msg->header.message_type = HVMSG_TIMER_EXPIRED;
  788. msg->header.payload_size = sizeof(*payload);
  789. payload->timer_index = stimer->index;
  790. payload->expiration_time = 0;
  791. payload->delivery_time = 0;
  792. }
  793. static void stimer_init(struct kvm_vcpu_hv_stimer *stimer, int timer_index)
  794. {
  795. memset(stimer, 0, sizeof(*stimer));
  796. stimer->index = timer_index;
  797. hrtimer_init(&stimer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
  798. stimer->timer.function = stimer_timer_callback;
  799. stimer_prepare_msg(stimer);
  800. }
  801. int kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
  802. {
  803. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  804. int i;
  805. if (hv_vcpu)
  806. return 0;
  807. hv_vcpu = kzalloc(sizeof(struct kvm_vcpu_hv), GFP_KERNEL_ACCOUNT);
  808. if (!hv_vcpu)
  809. return -ENOMEM;
  810. vcpu->arch.hyperv = hv_vcpu;
  811. hv_vcpu->vcpu = vcpu;
  812. synic_init(&hv_vcpu->synic);
  813. bitmap_zero(hv_vcpu->stimer_pending_bitmap, HV_SYNIC_STIMER_COUNT);
  814. for (i = 0; i < ARRAY_SIZE(hv_vcpu->stimer); i++)
  815. stimer_init(&hv_vcpu->stimer[i], i);
  816. hv_vcpu->vp_index = vcpu->vcpu_idx;
  817. for (i = 0; i < HV_NR_TLB_FLUSH_FIFOS; i++) {
  818. INIT_KFIFO(hv_vcpu->tlb_flush_fifo[i].entries);
  819. spin_lock_init(&hv_vcpu->tlb_flush_fifo[i].write_lock);
  820. }
  821. return 0;
  822. }
  823. int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
  824. {
  825. struct kvm_vcpu_hv_synic *synic;
  826. int r;
  827. r = kvm_hv_vcpu_init(vcpu);
  828. if (r)
  829. return r;
  830. synic = to_hv_synic(vcpu);
  831. synic->active = true;
  832. synic->dont_zero_synic_pages = dont_zero_synic_pages;
  833. synic->control = HV_SYNIC_CONTROL_ENABLE;
  834. return 0;
  835. }
  836. static bool kvm_hv_msr_partition_wide(u32 msr)
  837. {
  838. bool r = false;
  839. switch (msr) {
  840. case HV_X64_MSR_GUEST_OS_ID:
  841. case HV_X64_MSR_HYPERCALL:
  842. case HV_X64_MSR_REFERENCE_TSC:
  843. case HV_X64_MSR_TIME_REF_COUNT:
  844. case HV_X64_MSR_CRASH_CTL:
  845. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  846. case HV_X64_MSR_RESET:
  847. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  848. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  849. case HV_X64_MSR_TSC_EMULATION_STATUS:
  850. case HV_X64_MSR_TSC_INVARIANT_CONTROL:
  851. case HV_X64_MSR_SYNDBG_OPTIONS:
  852. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  853. r = true;
  854. break;
  855. }
  856. return r;
  857. }
  858. static int kvm_hv_msr_get_crash_data(struct kvm *kvm, u32 index, u64 *pdata)
  859. {
  860. struct kvm_hv *hv = to_kvm_hv(kvm);
  861. size_t size = ARRAY_SIZE(hv->hv_crash_param);
  862. if (WARN_ON_ONCE(index >= size))
  863. return -EINVAL;
  864. *pdata = hv->hv_crash_param[array_index_nospec(index, size)];
  865. return 0;
  866. }
  867. static int kvm_hv_msr_get_crash_ctl(struct kvm *kvm, u64 *pdata)
  868. {
  869. struct kvm_hv *hv = to_kvm_hv(kvm);
  870. *pdata = hv->hv_crash_ctl;
  871. return 0;
  872. }
  873. static int kvm_hv_msr_set_crash_ctl(struct kvm *kvm, u64 data)
  874. {
  875. struct kvm_hv *hv = to_kvm_hv(kvm);
  876. hv->hv_crash_ctl = data & HV_CRASH_CTL_CRASH_NOTIFY;
  877. return 0;
  878. }
  879. static int kvm_hv_msr_set_crash_data(struct kvm *kvm, u32 index, u64 data)
  880. {
  881. struct kvm_hv *hv = to_kvm_hv(kvm);
  882. size_t size = ARRAY_SIZE(hv->hv_crash_param);
  883. if (WARN_ON_ONCE(index >= size))
  884. return -EINVAL;
  885. hv->hv_crash_param[array_index_nospec(index, size)] = data;
  886. return 0;
  887. }
  888. /*
  889. * The kvmclock and Hyper-V TSC page use similar formulas, and converting
  890. * between them is possible:
  891. *
  892. * kvmclock formula:
  893. * nsec = (ticks - tsc_timestamp) * tsc_to_system_mul * 2^(tsc_shift-32)
  894. * + system_time
  895. *
  896. * Hyper-V formula:
  897. * nsec/100 = ticks * scale / 2^64 + offset
  898. *
  899. * When tsc_timestamp = system_time = 0, offset is zero in the Hyper-V formula.
  900. * By dividing the kvmclock formula by 100 and equating what's left we get:
  901. * ticks * scale / 2^64 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  902. * scale / 2^64 = tsc_to_system_mul * 2^(tsc_shift-32) / 100
  903. * scale = tsc_to_system_mul * 2^(32+tsc_shift) / 100
  904. *
  905. * Now expand the kvmclock formula and divide by 100:
  906. * nsec = ticks * tsc_to_system_mul * 2^(tsc_shift-32)
  907. * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32)
  908. * + system_time
  909. * nsec/100 = ticks * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  910. * - tsc_timestamp * tsc_to_system_mul * 2^(tsc_shift-32) / 100
  911. * + system_time / 100
  912. *
  913. * Replace tsc_to_system_mul * 2^(tsc_shift-32) / 100 by scale / 2^64:
  914. * nsec/100 = ticks * scale / 2^64
  915. * - tsc_timestamp * scale / 2^64
  916. * + system_time / 100
  917. *
  918. * Equate with the Hyper-V formula so that ticks * scale / 2^64 cancels out:
  919. * offset = system_time / 100 - tsc_timestamp * scale / 2^64
  920. *
  921. * These two equivalencies are implemented in this function.
  922. */
  923. static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
  924. struct ms_hyperv_tsc_page *tsc_ref)
  925. {
  926. u64 max_mul;
  927. if (!(hv_clock->flags & PVCLOCK_TSC_STABLE_BIT))
  928. return false;
  929. /*
  930. * check if scale would overflow, if so we use the time ref counter
  931. * tsc_to_system_mul * 2^(tsc_shift+32) / 100 >= 2^64
  932. * tsc_to_system_mul / 100 >= 2^(32-tsc_shift)
  933. * tsc_to_system_mul >= 100 * 2^(32-tsc_shift)
  934. */
  935. max_mul = 100ull << (32 - hv_clock->tsc_shift);
  936. if (hv_clock->tsc_to_system_mul >= max_mul)
  937. return false;
  938. /*
  939. * Otherwise compute the scale and offset according to the formulas
  940. * derived above.
  941. */
  942. tsc_ref->tsc_scale =
  943. mul_u64_u32_div(1ULL << (32 + hv_clock->tsc_shift),
  944. hv_clock->tsc_to_system_mul,
  945. 100);
  946. tsc_ref->tsc_offset = hv_clock->system_time;
  947. do_div(tsc_ref->tsc_offset, 100);
  948. tsc_ref->tsc_offset -=
  949. mul_u64_u64_shr(hv_clock->tsc_timestamp, tsc_ref->tsc_scale, 64);
  950. return true;
  951. }
  952. /*
  953. * Don't touch TSC page values if the guest has opted for TSC emulation after
  954. * migration. KVM doesn't fully support reenlightenment notifications and TSC
  955. * access emulation and Hyper-V is known to expect the values in TSC page to
  956. * stay constant before TSC access emulation is disabled from guest side
  957. * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
  958. * frequency and guest visible TSC value across migration (and prevent it when
  959. * TSC scaling is unsupported).
  960. */
  961. static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
  962. {
  963. return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
  964. hv->hv_tsc_emulation_control;
  965. }
  966. void kvm_hv_setup_tsc_page(struct kvm *kvm,
  967. struct pvclock_vcpu_time_info *hv_clock)
  968. {
  969. struct kvm_hv *hv = to_kvm_hv(kvm);
  970. u32 tsc_seq;
  971. u64 gfn;
  972. BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
  973. BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
  974. mutex_lock(&hv->hv_lock);
  975. if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
  976. hv->hv_tsc_page_status == HV_TSC_PAGE_SET ||
  977. hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
  978. goto out_unlock;
  979. if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
  980. goto out_unlock;
  981. gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
  982. /*
  983. * Because the TSC parameters only vary when there is a
  984. * change in the master clock, do not bother with caching.
  985. */
  986. if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
  987. &tsc_seq, sizeof(tsc_seq))))
  988. goto out_err;
  989. if (tsc_seq && tsc_page_update_unsafe(hv)) {
  990. if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
  991. goto out_err;
  992. hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
  993. goto out_unlock;
  994. }
  995. /*
  996. * While we're computing and writing the parameters, force the
  997. * guest to use the time reference count MSR.
  998. */
  999. hv->tsc_ref.tsc_sequence = 0;
  1000. if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
  1001. &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
  1002. goto out_err;
  1003. if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
  1004. goto out_err;
  1005. /* Ensure sequence is zero before writing the rest of the struct. */
  1006. smp_wmb();
  1007. if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
  1008. goto out_err;
  1009. /*
  1010. * Now switch to the TSC page mechanism by writing the sequence.
  1011. */
  1012. tsc_seq++;
  1013. if (tsc_seq == 0xFFFFFFFF || tsc_seq == 0)
  1014. tsc_seq = 1;
  1015. /* Write the struct entirely before the non-zero sequence. */
  1016. smp_wmb();
  1017. hv->tsc_ref.tsc_sequence = tsc_seq;
  1018. if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
  1019. &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
  1020. goto out_err;
  1021. hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
  1022. goto out_unlock;
  1023. out_err:
  1024. hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
  1025. out_unlock:
  1026. mutex_unlock(&hv->hv_lock);
  1027. }
  1028. void kvm_hv_request_tsc_page_update(struct kvm *kvm)
  1029. {
  1030. struct kvm_hv *hv = to_kvm_hv(kvm);
  1031. mutex_lock(&hv->hv_lock);
  1032. if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET &&
  1033. !tsc_page_update_unsafe(hv))
  1034. hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
  1035. mutex_unlock(&hv->hv_lock);
  1036. }
  1037. static bool hv_check_msr_access(struct kvm_vcpu_hv *hv_vcpu, u32 msr)
  1038. {
  1039. if (!hv_vcpu->enforce_cpuid)
  1040. return true;
  1041. switch (msr) {
  1042. case HV_X64_MSR_GUEST_OS_ID:
  1043. case HV_X64_MSR_HYPERCALL:
  1044. return hv_vcpu->cpuid_cache.features_eax &
  1045. HV_MSR_HYPERCALL_AVAILABLE;
  1046. case HV_X64_MSR_VP_RUNTIME:
  1047. return hv_vcpu->cpuid_cache.features_eax &
  1048. HV_MSR_VP_RUNTIME_AVAILABLE;
  1049. case HV_X64_MSR_TIME_REF_COUNT:
  1050. return hv_vcpu->cpuid_cache.features_eax &
  1051. HV_MSR_TIME_REF_COUNT_AVAILABLE;
  1052. case HV_X64_MSR_VP_INDEX:
  1053. return hv_vcpu->cpuid_cache.features_eax &
  1054. HV_MSR_VP_INDEX_AVAILABLE;
  1055. case HV_X64_MSR_RESET:
  1056. return hv_vcpu->cpuid_cache.features_eax &
  1057. HV_MSR_RESET_AVAILABLE;
  1058. case HV_X64_MSR_REFERENCE_TSC:
  1059. return hv_vcpu->cpuid_cache.features_eax &
  1060. HV_MSR_REFERENCE_TSC_AVAILABLE;
  1061. case HV_X64_MSR_SCONTROL:
  1062. case HV_X64_MSR_SVERSION:
  1063. case HV_X64_MSR_SIEFP:
  1064. case HV_X64_MSR_SIMP:
  1065. case HV_X64_MSR_EOM:
  1066. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1067. return hv_vcpu->cpuid_cache.features_eax &
  1068. HV_MSR_SYNIC_AVAILABLE;
  1069. case HV_X64_MSR_STIMER0_CONFIG:
  1070. case HV_X64_MSR_STIMER1_CONFIG:
  1071. case HV_X64_MSR_STIMER2_CONFIG:
  1072. case HV_X64_MSR_STIMER3_CONFIG:
  1073. case HV_X64_MSR_STIMER0_COUNT:
  1074. case HV_X64_MSR_STIMER1_COUNT:
  1075. case HV_X64_MSR_STIMER2_COUNT:
  1076. case HV_X64_MSR_STIMER3_COUNT:
  1077. return hv_vcpu->cpuid_cache.features_eax &
  1078. HV_MSR_SYNTIMER_AVAILABLE;
  1079. case HV_X64_MSR_EOI:
  1080. case HV_X64_MSR_ICR:
  1081. case HV_X64_MSR_TPR:
  1082. case HV_X64_MSR_VP_ASSIST_PAGE:
  1083. return hv_vcpu->cpuid_cache.features_eax &
  1084. HV_MSR_APIC_ACCESS_AVAILABLE;
  1085. case HV_X64_MSR_TSC_FREQUENCY:
  1086. case HV_X64_MSR_APIC_FREQUENCY:
  1087. return hv_vcpu->cpuid_cache.features_eax &
  1088. HV_ACCESS_FREQUENCY_MSRS;
  1089. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1090. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1091. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1092. return hv_vcpu->cpuid_cache.features_eax &
  1093. HV_ACCESS_REENLIGHTENMENT;
  1094. case HV_X64_MSR_TSC_INVARIANT_CONTROL:
  1095. return hv_vcpu->cpuid_cache.features_eax &
  1096. HV_ACCESS_TSC_INVARIANT;
  1097. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1098. case HV_X64_MSR_CRASH_CTL:
  1099. return hv_vcpu->cpuid_cache.features_edx &
  1100. HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
  1101. case HV_X64_MSR_SYNDBG_OPTIONS:
  1102. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1103. return hv_vcpu->cpuid_cache.features_edx &
  1104. HV_FEATURE_DEBUG_MSRS_AVAILABLE;
  1105. default:
  1106. break;
  1107. }
  1108. return false;
  1109. }
  1110. #define KVM_HV_WIN2016_GUEST_ID 0x1040a00003839
  1111. #define KVM_HV_WIN2016_GUEST_ID_MASK (~GENMASK_ULL(23, 16)) /* mask out the service version */
  1112. /*
  1113. * Hyper-V enabled Windows Server 2016 SMP VMs fail to boot in !XSAVES && XSAVEC
  1114. * configuration.
  1115. * Such configuration can result from, for example, AMD Erratum 1386 workaround.
  1116. *
  1117. * Print a notice so users aren't left wondering what's suddenly gone wrong.
  1118. */
  1119. static void __kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
  1120. {
  1121. struct kvm *kvm = vcpu->kvm;
  1122. struct kvm_hv *hv = to_kvm_hv(kvm);
  1123. /* Check again under the hv_lock. */
  1124. if (hv->xsaves_xsavec_checked)
  1125. return;
  1126. if ((hv->hv_guest_os_id & KVM_HV_WIN2016_GUEST_ID_MASK) !=
  1127. KVM_HV_WIN2016_GUEST_ID)
  1128. return;
  1129. hv->xsaves_xsavec_checked = true;
  1130. /* UP configurations aren't affected */
  1131. if (atomic_read(&kvm->online_vcpus) < 2)
  1132. return;
  1133. if (guest_cpuid_has(vcpu, X86_FEATURE_XSAVES) ||
  1134. !guest_cpuid_has(vcpu, X86_FEATURE_XSAVEC))
  1135. return;
  1136. pr_notice_ratelimited("Booting SMP Windows KVM VM with !XSAVES && XSAVEC. "
  1137. "If it fails to boot try disabling XSAVEC in the VM config.\n");
  1138. }
  1139. void kvm_hv_xsaves_xsavec_maybe_warn(struct kvm_vcpu *vcpu)
  1140. {
  1141. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1142. if (!vcpu->arch.hyperv_enabled ||
  1143. hv->xsaves_xsavec_checked)
  1144. return;
  1145. mutex_lock(&hv->hv_lock);
  1146. __kvm_hv_xsaves_xsavec_maybe_warn(vcpu);
  1147. mutex_unlock(&hv->hv_lock);
  1148. }
  1149. static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
  1150. bool host)
  1151. {
  1152. struct kvm *kvm = vcpu->kvm;
  1153. struct kvm_hv *hv = to_kvm_hv(kvm);
  1154. if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
  1155. return 1;
  1156. switch (msr) {
  1157. case HV_X64_MSR_GUEST_OS_ID:
  1158. hv->hv_guest_os_id = data;
  1159. /* setting guest os id to zero disables hypercall page */
  1160. if (!hv->hv_guest_os_id)
  1161. hv->hv_hypercall &= ~HV_X64_MSR_HYPERCALL_ENABLE;
  1162. break;
  1163. case HV_X64_MSR_HYPERCALL: {
  1164. u8 instructions[9];
  1165. int i = 0;
  1166. u64 addr;
  1167. /* if guest os id is not set hypercall should remain disabled */
  1168. if (!hv->hv_guest_os_id)
  1169. break;
  1170. if (!(data & HV_X64_MSR_HYPERCALL_ENABLE)) {
  1171. hv->hv_hypercall = data;
  1172. break;
  1173. }
  1174. /*
  1175. * If Xen and Hyper-V hypercalls are both enabled, disambiguate
  1176. * the same way Xen itself does, by setting the bit 31 of EAX
  1177. * which is RsvdZ in the 32-bit Hyper-V hypercall ABI and just
  1178. * going to be clobbered on 64-bit.
  1179. */
  1180. if (kvm_xen_hypercall_enabled(kvm)) {
  1181. /* orl $0x80000000, %eax */
  1182. instructions[i++] = 0x0d;
  1183. instructions[i++] = 0x00;
  1184. instructions[i++] = 0x00;
  1185. instructions[i++] = 0x00;
  1186. instructions[i++] = 0x80;
  1187. }
  1188. /* vmcall/vmmcall */
  1189. kvm_x86_call(patch_hypercall)(vcpu, instructions + i);
  1190. i += 3;
  1191. /* ret */
  1192. ((unsigned char *)instructions)[i++] = 0xc3;
  1193. addr = data & HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK;
  1194. if (kvm_vcpu_write_guest(vcpu, addr, instructions, i))
  1195. return 1;
  1196. hv->hv_hypercall = data;
  1197. break;
  1198. }
  1199. case HV_X64_MSR_REFERENCE_TSC:
  1200. hv->hv_tsc_page = data;
  1201. if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
  1202. if (!host)
  1203. hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
  1204. else
  1205. hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
  1206. kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
  1207. } else {
  1208. hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
  1209. }
  1210. break;
  1211. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1212. return kvm_hv_msr_set_crash_data(kvm,
  1213. msr - HV_X64_MSR_CRASH_P0,
  1214. data);
  1215. case HV_X64_MSR_CRASH_CTL:
  1216. if (host)
  1217. return kvm_hv_msr_set_crash_ctl(kvm, data);
  1218. if (data & HV_CRASH_CTL_CRASH_NOTIFY) {
  1219. vcpu_debug(vcpu, "hv crash (0x%llx 0x%llx 0x%llx 0x%llx 0x%llx)\n",
  1220. hv->hv_crash_param[0],
  1221. hv->hv_crash_param[1],
  1222. hv->hv_crash_param[2],
  1223. hv->hv_crash_param[3],
  1224. hv->hv_crash_param[4]);
  1225. /* Send notification about crash to user space */
  1226. kvm_make_request(KVM_REQ_HV_CRASH, vcpu);
  1227. }
  1228. break;
  1229. case HV_X64_MSR_RESET:
  1230. if (data == 1) {
  1231. vcpu_debug(vcpu, "hyper-v reset requested\n");
  1232. kvm_make_request(KVM_REQ_HV_RESET, vcpu);
  1233. }
  1234. break;
  1235. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1236. hv->hv_reenlightenment_control = data;
  1237. break;
  1238. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1239. hv->hv_tsc_emulation_control = data;
  1240. break;
  1241. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1242. if (data && !host)
  1243. return 1;
  1244. hv->hv_tsc_emulation_status = data;
  1245. break;
  1246. case HV_X64_MSR_TIME_REF_COUNT:
  1247. /* read-only, but still ignore it if host-initiated */
  1248. if (!host)
  1249. return 1;
  1250. break;
  1251. case HV_X64_MSR_TSC_INVARIANT_CONTROL:
  1252. /* Only bit 0 is supported */
  1253. if (data & ~HV_EXPOSE_INVARIANT_TSC)
  1254. return 1;
  1255. /* The feature can't be disabled from the guest */
  1256. if (!host && hv->hv_invtsc_control && !data)
  1257. return 1;
  1258. hv->hv_invtsc_control = data;
  1259. break;
  1260. case HV_X64_MSR_SYNDBG_OPTIONS:
  1261. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1262. return syndbg_set_msr(vcpu, msr, data, host);
  1263. default:
  1264. kvm_pr_unimpl_wrmsr(vcpu, msr, data);
  1265. return 1;
  1266. }
  1267. return 0;
  1268. }
  1269. /* Calculate cpu time spent by current task in 100ns units */
  1270. static u64 current_task_runtime_100ns(void)
  1271. {
  1272. u64 utime, stime;
  1273. task_cputime_adjusted(current, &utime, &stime);
  1274. return div_u64(utime + stime, 100);
  1275. }
  1276. static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  1277. {
  1278. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1279. if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
  1280. return 1;
  1281. switch (msr) {
  1282. case HV_X64_MSR_VP_INDEX: {
  1283. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1284. u32 new_vp_index = (u32)data;
  1285. if (!host || new_vp_index >= KVM_MAX_VCPUS)
  1286. return 1;
  1287. if (new_vp_index == hv_vcpu->vp_index)
  1288. return 0;
  1289. /*
  1290. * The VP index is initialized to vcpu_index by
  1291. * kvm_hv_vcpu_postcreate so they initially match. Now the
  1292. * VP index is changing, adjust num_mismatched_vp_indexes if
  1293. * it now matches or no longer matches vcpu_idx.
  1294. */
  1295. if (hv_vcpu->vp_index == vcpu->vcpu_idx)
  1296. atomic_inc(&hv->num_mismatched_vp_indexes);
  1297. else if (new_vp_index == vcpu->vcpu_idx)
  1298. atomic_dec(&hv->num_mismatched_vp_indexes);
  1299. hv_vcpu->vp_index = new_vp_index;
  1300. break;
  1301. }
  1302. case HV_X64_MSR_VP_ASSIST_PAGE: {
  1303. u64 gfn;
  1304. unsigned long addr;
  1305. if (!(data & HV_X64_MSR_VP_ASSIST_PAGE_ENABLE)) {
  1306. hv_vcpu->hv_vapic = data;
  1307. if (kvm_lapic_set_pv_eoi(vcpu, 0, 0))
  1308. return 1;
  1309. break;
  1310. }
  1311. gfn = data >> HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT;
  1312. addr = kvm_vcpu_gfn_to_hva(vcpu, gfn);
  1313. if (kvm_is_error_hva(addr))
  1314. return 1;
  1315. /*
  1316. * Clear apic_assist portion of struct hv_vp_assist_page
  1317. * only, there can be valuable data in the rest which needs
  1318. * to be preserved e.g. on migration.
  1319. */
  1320. if (__put_user(0, (u32 __user *)addr))
  1321. return 1;
  1322. hv_vcpu->hv_vapic = data;
  1323. kvm_vcpu_mark_page_dirty(vcpu, gfn);
  1324. if (kvm_lapic_set_pv_eoi(vcpu,
  1325. gfn_to_gpa(gfn) | KVM_MSR_ENABLED,
  1326. sizeof(struct hv_vp_assist_page)))
  1327. return 1;
  1328. break;
  1329. }
  1330. case HV_X64_MSR_EOI:
  1331. return kvm_hv_vapic_msr_write(vcpu, APIC_EOI, data);
  1332. case HV_X64_MSR_ICR:
  1333. return kvm_hv_vapic_msr_write(vcpu, APIC_ICR, data);
  1334. case HV_X64_MSR_TPR:
  1335. return kvm_hv_vapic_msr_write(vcpu, APIC_TASKPRI, data);
  1336. case HV_X64_MSR_VP_RUNTIME:
  1337. if (!host)
  1338. return 1;
  1339. hv_vcpu->runtime_offset = data - current_task_runtime_100ns();
  1340. break;
  1341. case HV_X64_MSR_SCONTROL:
  1342. case HV_X64_MSR_SVERSION:
  1343. case HV_X64_MSR_SIEFP:
  1344. case HV_X64_MSR_SIMP:
  1345. case HV_X64_MSR_EOM:
  1346. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1347. return synic_set_msr(to_hv_synic(vcpu), msr, data, host);
  1348. case HV_X64_MSR_STIMER0_CONFIG:
  1349. case HV_X64_MSR_STIMER1_CONFIG:
  1350. case HV_X64_MSR_STIMER2_CONFIG:
  1351. case HV_X64_MSR_STIMER3_CONFIG: {
  1352. int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
  1353. return stimer_set_config(to_hv_stimer(vcpu, timer_index),
  1354. data, host);
  1355. }
  1356. case HV_X64_MSR_STIMER0_COUNT:
  1357. case HV_X64_MSR_STIMER1_COUNT:
  1358. case HV_X64_MSR_STIMER2_COUNT:
  1359. case HV_X64_MSR_STIMER3_COUNT: {
  1360. int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
  1361. return stimer_set_count(to_hv_stimer(vcpu, timer_index),
  1362. data, host);
  1363. }
  1364. case HV_X64_MSR_TSC_FREQUENCY:
  1365. case HV_X64_MSR_APIC_FREQUENCY:
  1366. /* read-only, but still ignore it if host-initiated */
  1367. if (!host)
  1368. return 1;
  1369. break;
  1370. default:
  1371. kvm_pr_unimpl_wrmsr(vcpu, msr, data);
  1372. return 1;
  1373. }
  1374. return 0;
  1375. }
  1376. static int kvm_hv_get_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
  1377. bool host)
  1378. {
  1379. u64 data = 0;
  1380. struct kvm *kvm = vcpu->kvm;
  1381. struct kvm_hv *hv = to_kvm_hv(kvm);
  1382. if (unlikely(!host && !hv_check_msr_access(to_hv_vcpu(vcpu), msr)))
  1383. return 1;
  1384. switch (msr) {
  1385. case HV_X64_MSR_GUEST_OS_ID:
  1386. data = hv->hv_guest_os_id;
  1387. break;
  1388. case HV_X64_MSR_HYPERCALL:
  1389. data = hv->hv_hypercall;
  1390. break;
  1391. case HV_X64_MSR_TIME_REF_COUNT:
  1392. data = get_time_ref_counter(kvm);
  1393. break;
  1394. case HV_X64_MSR_REFERENCE_TSC:
  1395. data = hv->hv_tsc_page;
  1396. break;
  1397. case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
  1398. return kvm_hv_msr_get_crash_data(kvm,
  1399. msr - HV_X64_MSR_CRASH_P0,
  1400. pdata);
  1401. case HV_X64_MSR_CRASH_CTL:
  1402. return kvm_hv_msr_get_crash_ctl(kvm, pdata);
  1403. case HV_X64_MSR_RESET:
  1404. data = 0;
  1405. break;
  1406. case HV_X64_MSR_REENLIGHTENMENT_CONTROL:
  1407. data = hv->hv_reenlightenment_control;
  1408. break;
  1409. case HV_X64_MSR_TSC_EMULATION_CONTROL:
  1410. data = hv->hv_tsc_emulation_control;
  1411. break;
  1412. case HV_X64_MSR_TSC_EMULATION_STATUS:
  1413. data = hv->hv_tsc_emulation_status;
  1414. break;
  1415. case HV_X64_MSR_TSC_INVARIANT_CONTROL:
  1416. data = hv->hv_invtsc_control;
  1417. break;
  1418. case HV_X64_MSR_SYNDBG_OPTIONS:
  1419. case HV_X64_MSR_SYNDBG_CONTROL ... HV_X64_MSR_SYNDBG_PENDING_BUFFER:
  1420. return syndbg_get_msr(vcpu, msr, pdata, host);
  1421. default:
  1422. kvm_pr_unimpl_rdmsr(vcpu, msr);
  1423. return 1;
  1424. }
  1425. *pdata = data;
  1426. return 0;
  1427. }
  1428. static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata,
  1429. bool host)
  1430. {
  1431. u64 data = 0;
  1432. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1433. if (unlikely(!host && !hv_check_msr_access(hv_vcpu, msr)))
  1434. return 1;
  1435. switch (msr) {
  1436. case HV_X64_MSR_VP_INDEX:
  1437. data = hv_vcpu->vp_index;
  1438. break;
  1439. case HV_X64_MSR_EOI:
  1440. return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
  1441. case HV_X64_MSR_ICR:
  1442. return kvm_hv_vapic_msr_read(vcpu, APIC_ICR, pdata);
  1443. case HV_X64_MSR_TPR:
  1444. return kvm_hv_vapic_msr_read(vcpu, APIC_TASKPRI, pdata);
  1445. case HV_X64_MSR_VP_ASSIST_PAGE:
  1446. data = hv_vcpu->hv_vapic;
  1447. break;
  1448. case HV_X64_MSR_VP_RUNTIME:
  1449. data = current_task_runtime_100ns() + hv_vcpu->runtime_offset;
  1450. break;
  1451. case HV_X64_MSR_SCONTROL:
  1452. case HV_X64_MSR_SVERSION:
  1453. case HV_X64_MSR_SIEFP:
  1454. case HV_X64_MSR_SIMP:
  1455. case HV_X64_MSR_EOM:
  1456. case HV_X64_MSR_SINT0 ... HV_X64_MSR_SINT15:
  1457. return synic_get_msr(to_hv_synic(vcpu), msr, pdata, host);
  1458. case HV_X64_MSR_STIMER0_CONFIG:
  1459. case HV_X64_MSR_STIMER1_CONFIG:
  1460. case HV_X64_MSR_STIMER2_CONFIG:
  1461. case HV_X64_MSR_STIMER3_CONFIG: {
  1462. int timer_index = (msr - HV_X64_MSR_STIMER0_CONFIG)/2;
  1463. return stimer_get_config(to_hv_stimer(vcpu, timer_index),
  1464. pdata);
  1465. }
  1466. case HV_X64_MSR_STIMER0_COUNT:
  1467. case HV_X64_MSR_STIMER1_COUNT:
  1468. case HV_X64_MSR_STIMER2_COUNT:
  1469. case HV_X64_MSR_STIMER3_COUNT: {
  1470. int timer_index = (msr - HV_X64_MSR_STIMER0_COUNT)/2;
  1471. return stimer_get_count(to_hv_stimer(vcpu, timer_index),
  1472. pdata);
  1473. }
  1474. case HV_X64_MSR_TSC_FREQUENCY:
  1475. data = (u64)vcpu->arch.virtual_tsc_khz * 1000;
  1476. break;
  1477. case HV_X64_MSR_APIC_FREQUENCY:
  1478. data = div64_u64(1000000000ULL,
  1479. vcpu->kvm->arch.apic_bus_cycle_ns);
  1480. break;
  1481. default:
  1482. kvm_pr_unimpl_rdmsr(vcpu, msr);
  1483. return 1;
  1484. }
  1485. *pdata = data;
  1486. return 0;
  1487. }
  1488. int kvm_hv_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
  1489. {
  1490. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1491. if (!host && !vcpu->arch.hyperv_enabled)
  1492. return 1;
  1493. if (kvm_hv_vcpu_init(vcpu))
  1494. return 1;
  1495. if (kvm_hv_msr_partition_wide(msr)) {
  1496. int r;
  1497. mutex_lock(&hv->hv_lock);
  1498. r = kvm_hv_set_msr_pw(vcpu, msr, data, host);
  1499. mutex_unlock(&hv->hv_lock);
  1500. return r;
  1501. } else
  1502. return kvm_hv_set_msr(vcpu, msr, data, host);
  1503. }
  1504. int kvm_hv_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata, bool host)
  1505. {
  1506. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  1507. if (!host && !vcpu->arch.hyperv_enabled)
  1508. return 1;
  1509. if (kvm_hv_vcpu_init(vcpu))
  1510. return 1;
  1511. if (kvm_hv_msr_partition_wide(msr)) {
  1512. int r;
  1513. mutex_lock(&hv->hv_lock);
  1514. r = kvm_hv_get_msr_pw(vcpu, msr, pdata, host);
  1515. mutex_unlock(&hv->hv_lock);
  1516. return r;
  1517. } else
  1518. return kvm_hv_get_msr(vcpu, msr, pdata, host);
  1519. }
  1520. static void sparse_set_to_vcpu_mask(struct kvm *kvm, u64 *sparse_banks,
  1521. u64 valid_bank_mask, unsigned long *vcpu_mask)
  1522. {
  1523. struct kvm_hv *hv = to_kvm_hv(kvm);
  1524. bool has_mismatch = atomic_read(&hv->num_mismatched_vp_indexes);
  1525. u64 vp_bitmap[KVM_HV_MAX_SPARSE_VCPU_SET_BITS];
  1526. struct kvm_vcpu *vcpu;
  1527. int bank, sbank = 0;
  1528. unsigned long i;
  1529. u64 *bitmap;
  1530. BUILD_BUG_ON(sizeof(vp_bitmap) >
  1531. sizeof(*vcpu_mask) * BITS_TO_LONGS(KVM_MAX_VCPUS));
  1532. /*
  1533. * If vp_index == vcpu_idx for all vCPUs, fill vcpu_mask directly, else
  1534. * fill a temporary buffer and manually test each vCPU's VP index.
  1535. */
  1536. if (likely(!has_mismatch))
  1537. bitmap = (u64 *)vcpu_mask;
  1538. else
  1539. bitmap = vp_bitmap;
  1540. /*
  1541. * Each set of 64 VPs is packed into sparse_banks, with valid_bank_mask
  1542. * having a '1' for each bank that exists in sparse_banks. Sets must
  1543. * be in ascending order, i.e. bank0..bankN.
  1544. */
  1545. memset(bitmap, 0, sizeof(vp_bitmap));
  1546. for_each_set_bit(bank, (unsigned long *)&valid_bank_mask,
  1547. KVM_HV_MAX_SPARSE_VCPU_SET_BITS)
  1548. bitmap[bank] = sparse_banks[sbank++];
  1549. if (likely(!has_mismatch))
  1550. return;
  1551. bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
  1552. kvm_for_each_vcpu(i, vcpu, kvm) {
  1553. if (test_bit(kvm_hv_get_vpindex(vcpu), (unsigned long *)vp_bitmap))
  1554. __set_bit(i, vcpu_mask);
  1555. }
  1556. }
  1557. static bool hv_is_vp_in_sparse_set(u32 vp_id, u64 valid_bank_mask, u64 sparse_banks[])
  1558. {
  1559. int valid_bit_nr = vp_id / HV_VCPUS_PER_SPARSE_BANK;
  1560. unsigned long sbank;
  1561. if (!test_bit(valid_bit_nr, (unsigned long *)&valid_bank_mask))
  1562. return false;
  1563. /*
  1564. * The index into the sparse bank is the number of preceding bits in
  1565. * the valid mask. Optimize for VMs with <64 vCPUs by skipping the
  1566. * fancy math if there can't possibly be preceding bits.
  1567. */
  1568. if (valid_bit_nr)
  1569. sbank = hweight64(valid_bank_mask & GENMASK_ULL(valid_bit_nr - 1, 0));
  1570. else
  1571. sbank = 0;
  1572. return test_bit(vp_id % HV_VCPUS_PER_SPARSE_BANK,
  1573. (unsigned long *)&sparse_banks[sbank]);
  1574. }
  1575. struct kvm_hv_hcall {
  1576. /* Hypercall input data */
  1577. u64 param;
  1578. u64 ingpa;
  1579. u64 outgpa;
  1580. u16 code;
  1581. u16 var_cnt;
  1582. u16 rep_cnt;
  1583. u16 rep_idx;
  1584. bool fast;
  1585. bool rep;
  1586. sse128_t xmm[HV_HYPERCALL_MAX_XMM_REGISTERS];
  1587. /*
  1588. * Current read offset when KVM reads hypercall input data gradually,
  1589. * either offset in bytes from 'ingpa' for regular hypercalls or the
  1590. * number of already consumed 'XMM halves' for 'fast' hypercalls.
  1591. */
  1592. union {
  1593. gpa_t data_offset;
  1594. int consumed_xmm_halves;
  1595. };
  1596. };
  1597. static int kvm_hv_get_hc_data(struct kvm *kvm, struct kvm_hv_hcall *hc,
  1598. u16 orig_cnt, u16 cnt_cap, u64 *data)
  1599. {
  1600. /*
  1601. * Preserve the original count when ignoring entries via a "cap", KVM
  1602. * still needs to validate the guest input (though the non-XMM path
  1603. * punts on the checks).
  1604. */
  1605. u16 cnt = min(orig_cnt, cnt_cap);
  1606. int i, j;
  1607. if (hc->fast) {
  1608. /*
  1609. * Each XMM holds two sparse banks, but do not count halves that
  1610. * have already been consumed for hypercall parameters.
  1611. */
  1612. if (orig_cnt > 2 * HV_HYPERCALL_MAX_XMM_REGISTERS - hc->consumed_xmm_halves)
  1613. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1614. for (i = 0; i < cnt; i++) {
  1615. j = i + hc->consumed_xmm_halves;
  1616. if (j % 2)
  1617. data[i] = sse128_hi(hc->xmm[j / 2]);
  1618. else
  1619. data[i] = sse128_lo(hc->xmm[j / 2]);
  1620. }
  1621. return 0;
  1622. }
  1623. return kvm_read_guest(kvm, hc->ingpa + hc->data_offset, data,
  1624. cnt * sizeof(*data));
  1625. }
  1626. static u64 kvm_get_sparse_vp_set(struct kvm *kvm, struct kvm_hv_hcall *hc,
  1627. u64 *sparse_banks)
  1628. {
  1629. if (hc->var_cnt > HV_MAX_SPARSE_VCPU_BANKS)
  1630. return -EINVAL;
  1631. /* Cap var_cnt to ignore banks that cannot contain a legal VP index. */
  1632. return kvm_hv_get_hc_data(kvm, hc, hc->var_cnt, KVM_HV_MAX_SPARSE_VCPU_SET_BITS,
  1633. sparse_banks);
  1634. }
  1635. static int kvm_hv_get_tlb_flush_entries(struct kvm *kvm, struct kvm_hv_hcall *hc, u64 entries[])
  1636. {
  1637. return kvm_hv_get_hc_data(kvm, hc, hc->rep_cnt, hc->rep_cnt, entries);
  1638. }
  1639. static void hv_tlb_flush_enqueue(struct kvm_vcpu *vcpu,
  1640. struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo,
  1641. u64 *entries, int count)
  1642. {
  1643. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1644. u64 flush_all_entry = KVM_HV_TLB_FLUSHALL_ENTRY;
  1645. if (!hv_vcpu)
  1646. return;
  1647. spin_lock(&tlb_flush_fifo->write_lock);
  1648. /*
  1649. * All entries should fit on the fifo leaving one free for 'flush all'
  1650. * entry in case another request comes in. In case there's not enough
  1651. * space, just put 'flush all' entry there.
  1652. */
  1653. if (count && entries && count < kfifo_avail(&tlb_flush_fifo->entries)) {
  1654. WARN_ON(kfifo_in(&tlb_flush_fifo->entries, entries, count) != count);
  1655. goto out_unlock;
  1656. }
  1657. /*
  1658. * Note: full fifo always contains 'flush all' entry, no need to check the
  1659. * return value.
  1660. */
  1661. kfifo_in(&tlb_flush_fifo->entries, &flush_all_entry, 1);
  1662. out_unlock:
  1663. spin_unlock(&tlb_flush_fifo->write_lock);
  1664. }
  1665. int kvm_hv_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
  1666. {
  1667. struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
  1668. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1669. u64 entries[KVM_HV_TLB_FLUSH_FIFO_SIZE];
  1670. int i, j, count;
  1671. gva_t gva;
  1672. if (!tdp_enabled || !hv_vcpu)
  1673. return -EINVAL;
  1674. tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(vcpu, is_guest_mode(vcpu));
  1675. count = kfifo_out(&tlb_flush_fifo->entries, entries, KVM_HV_TLB_FLUSH_FIFO_SIZE);
  1676. for (i = 0; i < count; i++) {
  1677. if (entries[i] == KVM_HV_TLB_FLUSHALL_ENTRY)
  1678. goto out_flush_all;
  1679. /*
  1680. * Lower 12 bits of 'address' encode the number of additional
  1681. * pages to flush.
  1682. */
  1683. gva = entries[i] & PAGE_MASK;
  1684. for (j = 0; j < (entries[i] & ~PAGE_MASK) + 1; j++)
  1685. kvm_x86_call(flush_tlb_gva)(vcpu, gva + j * PAGE_SIZE);
  1686. ++vcpu->stat.tlb_flush;
  1687. }
  1688. return 0;
  1689. out_flush_all:
  1690. kfifo_reset_out(&tlb_flush_fifo->entries);
  1691. /* Fall back to full flush. */
  1692. return -ENOSPC;
  1693. }
  1694. static u64 kvm_hv_flush_tlb(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  1695. {
  1696. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1697. u64 *sparse_banks = hv_vcpu->sparse_banks;
  1698. struct kvm *kvm = vcpu->kvm;
  1699. struct hv_tlb_flush_ex flush_ex;
  1700. struct hv_tlb_flush flush;
  1701. DECLARE_BITMAP(vcpu_mask, KVM_MAX_VCPUS);
  1702. struct kvm_vcpu_hv_tlb_flush_fifo *tlb_flush_fifo;
  1703. /*
  1704. * Normally, there can be no more than 'KVM_HV_TLB_FLUSH_FIFO_SIZE'
  1705. * entries on the TLB flush fifo. The last entry, however, needs to be
  1706. * always left free for 'flush all' entry which gets placed when
  1707. * there is not enough space to put all the requested entries.
  1708. */
  1709. u64 __tlb_flush_entries[KVM_HV_TLB_FLUSH_FIFO_SIZE - 1];
  1710. u64 *tlb_flush_entries;
  1711. u64 valid_bank_mask;
  1712. struct kvm_vcpu *v;
  1713. unsigned long i;
  1714. bool all_cpus;
  1715. /*
  1716. * The Hyper-V TLFS doesn't allow more than HV_MAX_SPARSE_VCPU_BANKS
  1717. * sparse banks. Fail the build if KVM's max allowed number of
  1718. * vCPUs (>4096) exceeds this limit.
  1719. */
  1720. BUILD_BUG_ON(KVM_HV_MAX_SPARSE_VCPU_SET_BITS > HV_MAX_SPARSE_VCPU_BANKS);
  1721. /*
  1722. * 'Slow' hypercall's first parameter is the address in guest's memory
  1723. * where hypercall parameters are placed. This is either a GPA or a
  1724. * nested GPA when KVM is handling the call from L2 ('direct' TLB
  1725. * flush). Translate the address here so the memory can be uniformly
  1726. * read with kvm_read_guest().
  1727. */
  1728. if (!hc->fast && is_guest_mode(vcpu)) {
  1729. hc->ingpa = translate_nested_gpa(vcpu, hc->ingpa, 0, NULL);
  1730. if (unlikely(hc->ingpa == INVALID_GPA))
  1731. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1732. }
  1733. if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST ||
  1734. hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE) {
  1735. if (hc->fast) {
  1736. flush.address_space = hc->ingpa;
  1737. flush.flags = hc->outgpa;
  1738. flush.processor_mask = sse128_lo(hc->xmm[0]);
  1739. hc->consumed_xmm_halves = 1;
  1740. } else {
  1741. if (unlikely(kvm_read_guest(kvm, hc->ingpa,
  1742. &flush, sizeof(flush))))
  1743. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1744. hc->data_offset = sizeof(flush);
  1745. }
  1746. trace_kvm_hv_flush_tlb(flush.processor_mask,
  1747. flush.address_space, flush.flags,
  1748. is_guest_mode(vcpu));
  1749. valid_bank_mask = BIT_ULL(0);
  1750. sparse_banks[0] = flush.processor_mask;
  1751. /*
  1752. * Work around possible WS2012 bug: it sends hypercalls
  1753. * with processor_mask = 0x0 and HV_FLUSH_ALL_PROCESSORS clear,
  1754. * while also expecting us to flush something and crashing if
  1755. * we don't. Let's treat processor_mask == 0 same as
  1756. * HV_FLUSH_ALL_PROCESSORS.
  1757. */
  1758. all_cpus = (flush.flags & HV_FLUSH_ALL_PROCESSORS) ||
  1759. flush.processor_mask == 0;
  1760. } else {
  1761. if (hc->fast) {
  1762. flush_ex.address_space = hc->ingpa;
  1763. flush_ex.flags = hc->outgpa;
  1764. memcpy(&flush_ex.hv_vp_set,
  1765. &hc->xmm[0], sizeof(hc->xmm[0]));
  1766. hc->consumed_xmm_halves = 2;
  1767. } else {
  1768. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &flush_ex,
  1769. sizeof(flush_ex))))
  1770. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1771. hc->data_offset = sizeof(flush_ex);
  1772. }
  1773. trace_kvm_hv_flush_tlb_ex(flush_ex.hv_vp_set.valid_bank_mask,
  1774. flush_ex.hv_vp_set.format,
  1775. flush_ex.address_space,
  1776. flush_ex.flags, is_guest_mode(vcpu));
  1777. valid_bank_mask = flush_ex.hv_vp_set.valid_bank_mask;
  1778. all_cpus = flush_ex.hv_vp_set.format !=
  1779. HV_GENERIC_SET_SPARSE_4K;
  1780. if (hc->var_cnt != hweight64(valid_bank_mask))
  1781. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1782. if (!all_cpus) {
  1783. if (!hc->var_cnt)
  1784. goto ret_success;
  1785. if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
  1786. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1787. }
  1788. /*
  1789. * Hyper-V TLFS doesn't explicitly forbid non-empty sparse vCPU
  1790. * banks (and, thus, non-zero 'var_cnt') for the 'all vCPUs'
  1791. * case (HV_GENERIC_SET_ALL). Always adjust data_offset and
  1792. * consumed_xmm_halves to make sure TLB flush entries are read
  1793. * from the correct offset.
  1794. */
  1795. if (hc->fast)
  1796. hc->consumed_xmm_halves += hc->var_cnt;
  1797. else
  1798. hc->data_offset += hc->var_cnt * sizeof(sparse_banks[0]);
  1799. }
  1800. if (hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE ||
  1801. hc->code == HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX ||
  1802. hc->rep_cnt > ARRAY_SIZE(__tlb_flush_entries)) {
  1803. tlb_flush_entries = NULL;
  1804. } else {
  1805. if (kvm_hv_get_tlb_flush_entries(kvm, hc, __tlb_flush_entries))
  1806. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1807. tlb_flush_entries = __tlb_flush_entries;
  1808. }
  1809. /*
  1810. * vcpu->arch.cr3 may not be up-to-date for running vCPUs so we can't
  1811. * analyze it here, flush TLB regardless of the specified address space.
  1812. */
  1813. if (all_cpus && !is_guest_mode(vcpu)) {
  1814. kvm_for_each_vcpu(i, v, kvm) {
  1815. tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
  1816. hv_tlb_flush_enqueue(v, tlb_flush_fifo,
  1817. tlb_flush_entries, hc->rep_cnt);
  1818. }
  1819. kvm_make_all_cpus_request(kvm, KVM_REQ_HV_TLB_FLUSH);
  1820. } else if (!is_guest_mode(vcpu)) {
  1821. sparse_set_to_vcpu_mask(kvm, sparse_banks, valid_bank_mask, vcpu_mask);
  1822. for_each_set_bit(i, vcpu_mask, KVM_MAX_VCPUS) {
  1823. v = kvm_get_vcpu(kvm, i);
  1824. if (!v)
  1825. continue;
  1826. tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, false);
  1827. hv_tlb_flush_enqueue(v, tlb_flush_fifo,
  1828. tlb_flush_entries, hc->rep_cnt);
  1829. }
  1830. kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
  1831. } else {
  1832. struct kvm_vcpu_hv *hv_v;
  1833. bitmap_zero(vcpu_mask, KVM_MAX_VCPUS);
  1834. kvm_for_each_vcpu(i, v, kvm) {
  1835. hv_v = to_hv_vcpu(v);
  1836. /*
  1837. * The following check races with nested vCPUs entering/exiting
  1838. * and/or migrating between L1's vCPUs, however the only case when
  1839. * KVM *must* flush the TLB is when the target L2 vCPU keeps
  1840. * running on the same L1 vCPU from the moment of the request until
  1841. * kvm_hv_flush_tlb() returns. TLB is fully flushed in all other
  1842. * cases, e.g. when the target L2 vCPU migrates to a different L1
  1843. * vCPU or when the corresponding L1 vCPU temporary switches to a
  1844. * different L2 vCPU while the request is being processed.
  1845. */
  1846. if (!hv_v || hv_v->nested.vm_id != hv_vcpu->nested.vm_id)
  1847. continue;
  1848. if (!all_cpus &&
  1849. !hv_is_vp_in_sparse_set(hv_v->nested.vp_id, valid_bank_mask,
  1850. sparse_banks))
  1851. continue;
  1852. __set_bit(i, vcpu_mask);
  1853. tlb_flush_fifo = kvm_hv_get_tlb_flush_fifo(v, true);
  1854. hv_tlb_flush_enqueue(v, tlb_flush_fifo,
  1855. tlb_flush_entries, hc->rep_cnt);
  1856. }
  1857. kvm_make_vcpus_request_mask(kvm, KVM_REQ_HV_TLB_FLUSH, vcpu_mask);
  1858. }
  1859. ret_success:
  1860. /* We always do full TLB flush, set 'Reps completed' = 'Rep Count' */
  1861. return (u64)HV_STATUS_SUCCESS |
  1862. ((u64)hc->rep_cnt << HV_HYPERCALL_REP_COMP_OFFSET);
  1863. }
  1864. static void kvm_hv_send_ipi_to_many(struct kvm *kvm, u32 vector,
  1865. u64 *sparse_banks, u64 valid_bank_mask)
  1866. {
  1867. struct kvm_lapic_irq irq = {
  1868. .delivery_mode = APIC_DM_FIXED,
  1869. .vector = vector
  1870. };
  1871. struct kvm_vcpu *vcpu;
  1872. unsigned long i;
  1873. kvm_for_each_vcpu(i, vcpu, kvm) {
  1874. if (sparse_banks &&
  1875. !hv_is_vp_in_sparse_set(kvm_hv_get_vpindex(vcpu),
  1876. valid_bank_mask, sparse_banks))
  1877. continue;
  1878. /* We fail only when APIC is disabled */
  1879. kvm_apic_set_irq(vcpu, &irq, NULL);
  1880. }
  1881. }
  1882. static u64 kvm_hv_send_ipi(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  1883. {
  1884. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1885. u64 *sparse_banks = hv_vcpu->sparse_banks;
  1886. struct kvm *kvm = vcpu->kvm;
  1887. struct hv_send_ipi_ex send_ipi_ex;
  1888. struct hv_send_ipi send_ipi;
  1889. u64 valid_bank_mask;
  1890. u32 vector;
  1891. bool all_cpus;
  1892. if (!lapic_in_kernel(vcpu))
  1893. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1894. if (hc->code == HVCALL_SEND_IPI) {
  1895. if (!hc->fast) {
  1896. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi,
  1897. sizeof(send_ipi))))
  1898. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1899. sparse_banks[0] = send_ipi.cpu_mask;
  1900. vector = send_ipi.vector;
  1901. } else {
  1902. /* 'reserved' part of hv_send_ipi should be 0 */
  1903. if (unlikely(hc->ingpa >> 32 != 0))
  1904. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1905. sparse_banks[0] = hc->outgpa;
  1906. vector = (u32)hc->ingpa;
  1907. }
  1908. all_cpus = false;
  1909. valid_bank_mask = BIT_ULL(0);
  1910. trace_kvm_hv_send_ipi(vector, sparse_banks[0]);
  1911. } else {
  1912. if (!hc->fast) {
  1913. if (unlikely(kvm_read_guest(kvm, hc->ingpa, &send_ipi_ex,
  1914. sizeof(send_ipi_ex))))
  1915. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1916. } else {
  1917. send_ipi_ex.vector = (u32)hc->ingpa;
  1918. send_ipi_ex.vp_set.format = hc->outgpa;
  1919. send_ipi_ex.vp_set.valid_bank_mask = sse128_lo(hc->xmm[0]);
  1920. }
  1921. trace_kvm_hv_send_ipi_ex(send_ipi_ex.vector,
  1922. send_ipi_ex.vp_set.format,
  1923. send_ipi_ex.vp_set.valid_bank_mask);
  1924. vector = send_ipi_ex.vector;
  1925. valid_bank_mask = send_ipi_ex.vp_set.valid_bank_mask;
  1926. all_cpus = send_ipi_ex.vp_set.format == HV_GENERIC_SET_ALL;
  1927. if (hc->var_cnt != hweight64(valid_bank_mask))
  1928. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1929. if (all_cpus)
  1930. goto check_and_send_ipi;
  1931. if (!hc->var_cnt)
  1932. goto ret_success;
  1933. if (!hc->fast)
  1934. hc->data_offset = offsetof(struct hv_send_ipi_ex,
  1935. vp_set.bank_contents);
  1936. else
  1937. hc->consumed_xmm_halves = 1;
  1938. if (kvm_get_sparse_vp_set(kvm, hc, sparse_banks))
  1939. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1940. }
  1941. check_and_send_ipi:
  1942. if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR))
  1943. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  1944. if (all_cpus)
  1945. kvm_hv_send_ipi_to_many(kvm, vector, NULL, 0);
  1946. else
  1947. kvm_hv_send_ipi_to_many(kvm, vector, sparse_banks, valid_bank_mask);
  1948. ret_success:
  1949. return HV_STATUS_SUCCESS;
  1950. }
  1951. void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu, bool hyperv_enabled)
  1952. {
  1953. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  1954. struct kvm_cpuid_entry2 *entry;
  1955. vcpu->arch.hyperv_enabled = hyperv_enabled;
  1956. if (!hv_vcpu) {
  1957. /*
  1958. * KVM should have already allocated kvm_vcpu_hv if Hyper-V is
  1959. * enabled in CPUID.
  1960. */
  1961. WARN_ON_ONCE(vcpu->arch.hyperv_enabled);
  1962. return;
  1963. }
  1964. memset(&hv_vcpu->cpuid_cache, 0, sizeof(hv_vcpu->cpuid_cache));
  1965. if (!vcpu->arch.hyperv_enabled)
  1966. return;
  1967. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
  1968. if (entry) {
  1969. hv_vcpu->cpuid_cache.features_eax = entry->eax;
  1970. hv_vcpu->cpuid_cache.features_ebx = entry->ebx;
  1971. hv_vcpu->cpuid_cache.features_edx = entry->edx;
  1972. }
  1973. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
  1974. if (entry) {
  1975. hv_vcpu->cpuid_cache.enlightenments_eax = entry->eax;
  1976. hv_vcpu->cpuid_cache.enlightenments_ebx = entry->ebx;
  1977. }
  1978. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
  1979. if (entry)
  1980. hv_vcpu->cpuid_cache.syndbg_cap_eax = entry->eax;
  1981. entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_NESTED_FEATURES);
  1982. if (entry) {
  1983. hv_vcpu->cpuid_cache.nested_eax = entry->eax;
  1984. hv_vcpu->cpuid_cache.nested_ebx = entry->ebx;
  1985. }
  1986. }
  1987. int kvm_hv_set_enforce_cpuid(struct kvm_vcpu *vcpu, bool enforce)
  1988. {
  1989. struct kvm_vcpu_hv *hv_vcpu;
  1990. int ret = 0;
  1991. if (!to_hv_vcpu(vcpu)) {
  1992. if (enforce) {
  1993. ret = kvm_hv_vcpu_init(vcpu);
  1994. if (ret)
  1995. return ret;
  1996. } else {
  1997. return 0;
  1998. }
  1999. }
  2000. hv_vcpu = to_hv_vcpu(vcpu);
  2001. hv_vcpu->enforce_cpuid = enforce;
  2002. return ret;
  2003. }
  2004. static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result)
  2005. {
  2006. bool longmode;
  2007. longmode = is_64_bit_hypercall(vcpu);
  2008. if (longmode)
  2009. kvm_rax_write(vcpu, result);
  2010. else {
  2011. kvm_rdx_write(vcpu, result >> 32);
  2012. kvm_rax_write(vcpu, result & 0xffffffff);
  2013. }
  2014. }
  2015. static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result)
  2016. {
  2017. u32 tlb_lock_count = 0;
  2018. int ret;
  2019. if (hv_result_success(result) && is_guest_mode(vcpu) &&
  2020. kvm_hv_is_tlb_flush_hcall(vcpu) &&
  2021. kvm_read_guest(vcpu->kvm, to_hv_vcpu(vcpu)->nested.pa_page_gpa,
  2022. &tlb_lock_count, sizeof(tlb_lock_count)))
  2023. result = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2024. trace_kvm_hv_hypercall_done(result);
  2025. kvm_hv_hypercall_set_result(vcpu, result);
  2026. ++vcpu->stat.hypercalls;
  2027. ret = kvm_skip_emulated_instruction(vcpu);
  2028. if (tlb_lock_count)
  2029. kvm_x86_ops.nested_ops->hv_inject_synthetic_vmexit_post_tlb_flush(vcpu);
  2030. return ret;
  2031. }
  2032. static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu)
  2033. {
  2034. return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result);
  2035. }
  2036. static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, struct kvm_hv_hcall *hc)
  2037. {
  2038. struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
  2039. struct eventfd_ctx *eventfd;
  2040. if (unlikely(!hc->fast)) {
  2041. int ret;
  2042. gpa_t gpa = hc->ingpa;
  2043. if ((gpa & (__alignof__(hc->ingpa) - 1)) ||
  2044. offset_in_page(gpa) + sizeof(hc->ingpa) > PAGE_SIZE)
  2045. return HV_STATUS_INVALID_ALIGNMENT;
  2046. ret = kvm_vcpu_read_guest(vcpu, gpa,
  2047. &hc->ingpa, sizeof(hc->ingpa));
  2048. if (ret < 0)
  2049. return HV_STATUS_INVALID_ALIGNMENT;
  2050. }
  2051. /*
  2052. * Per spec, bits 32-47 contain the extra "flag number". However, we
  2053. * have no use for it, and in all known usecases it is zero, so just
  2054. * report lookup failure if it isn't.
  2055. */
  2056. if (hc->ingpa & 0xffff00000000ULL)
  2057. return HV_STATUS_INVALID_PORT_ID;
  2058. /* remaining bits are reserved-zero */
  2059. if (hc->ingpa & ~KVM_HYPERV_CONN_ID_MASK)
  2060. return HV_STATUS_INVALID_HYPERCALL_INPUT;
  2061. /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */
  2062. rcu_read_lock();
  2063. eventfd = idr_find(&hv->conn_to_evt, hc->ingpa);
  2064. rcu_read_unlock();
  2065. if (!eventfd)
  2066. return HV_STATUS_INVALID_PORT_ID;
  2067. eventfd_signal(eventfd);
  2068. return HV_STATUS_SUCCESS;
  2069. }
  2070. static bool is_xmm_fast_hypercall(struct kvm_hv_hcall *hc)
  2071. {
  2072. switch (hc->code) {
  2073. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  2074. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  2075. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  2076. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  2077. case HVCALL_SEND_IPI_EX:
  2078. return true;
  2079. }
  2080. return false;
  2081. }
  2082. static void kvm_hv_hypercall_read_xmm(struct kvm_hv_hcall *hc)
  2083. {
  2084. int reg;
  2085. kvm_fpu_get();
  2086. for (reg = 0; reg < HV_HYPERCALL_MAX_XMM_REGISTERS; reg++)
  2087. _kvm_read_sse_reg(reg, &hc->xmm[reg]);
  2088. kvm_fpu_put();
  2089. }
  2090. static bool hv_check_hypercall_access(struct kvm_vcpu_hv *hv_vcpu, u16 code)
  2091. {
  2092. if (!hv_vcpu->enforce_cpuid)
  2093. return true;
  2094. switch (code) {
  2095. case HVCALL_NOTIFY_LONG_SPIN_WAIT:
  2096. return hv_vcpu->cpuid_cache.enlightenments_ebx &&
  2097. hv_vcpu->cpuid_cache.enlightenments_ebx != U32_MAX;
  2098. case HVCALL_POST_MESSAGE:
  2099. return hv_vcpu->cpuid_cache.features_ebx & HV_POST_MESSAGES;
  2100. case HVCALL_SIGNAL_EVENT:
  2101. return hv_vcpu->cpuid_cache.features_ebx & HV_SIGNAL_EVENTS;
  2102. case HVCALL_POST_DEBUG_DATA:
  2103. case HVCALL_RETRIEVE_DEBUG_DATA:
  2104. case HVCALL_RESET_DEBUG_SESSION:
  2105. /*
  2106. * Return 'true' when SynDBG is disabled so the resulting code
  2107. * will be HV_STATUS_INVALID_HYPERCALL_CODE.
  2108. */
  2109. return !kvm_hv_is_syndbg_enabled(hv_vcpu->vcpu) ||
  2110. hv_vcpu->cpuid_cache.features_ebx & HV_DEBUGGING;
  2111. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  2112. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  2113. if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
  2114. HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
  2115. return false;
  2116. fallthrough;
  2117. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  2118. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  2119. return hv_vcpu->cpuid_cache.enlightenments_eax &
  2120. HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
  2121. case HVCALL_SEND_IPI_EX:
  2122. if (!(hv_vcpu->cpuid_cache.enlightenments_eax &
  2123. HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED))
  2124. return false;
  2125. fallthrough;
  2126. case HVCALL_SEND_IPI:
  2127. return hv_vcpu->cpuid_cache.enlightenments_eax &
  2128. HV_X64_CLUSTER_IPI_RECOMMENDED;
  2129. case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
  2130. return hv_vcpu->cpuid_cache.features_ebx &
  2131. HV_ENABLE_EXTENDED_HYPERCALLS;
  2132. default:
  2133. break;
  2134. }
  2135. return true;
  2136. }
  2137. int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
  2138. {
  2139. struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
  2140. struct kvm_hv_hcall hc;
  2141. u64 ret = HV_STATUS_SUCCESS;
  2142. /*
  2143. * hypercall generates UD from non zero cpl and real mode
  2144. * per HYPER-V spec
  2145. */
  2146. if (kvm_x86_call(get_cpl)(vcpu) != 0 || !is_protmode(vcpu)) {
  2147. kvm_queue_exception(vcpu, UD_VECTOR);
  2148. return 1;
  2149. }
  2150. #ifdef CONFIG_X86_64
  2151. if (is_64_bit_hypercall(vcpu)) {
  2152. hc.param = kvm_rcx_read(vcpu);
  2153. hc.ingpa = kvm_rdx_read(vcpu);
  2154. hc.outgpa = kvm_r8_read(vcpu);
  2155. } else
  2156. #endif
  2157. {
  2158. hc.param = ((u64)kvm_rdx_read(vcpu) << 32) |
  2159. (kvm_rax_read(vcpu) & 0xffffffff);
  2160. hc.ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
  2161. (kvm_rcx_read(vcpu) & 0xffffffff);
  2162. hc.outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
  2163. (kvm_rsi_read(vcpu) & 0xffffffff);
  2164. }
  2165. hc.code = hc.param & 0xffff;
  2166. hc.var_cnt = (hc.param & HV_HYPERCALL_VARHEAD_MASK) >> HV_HYPERCALL_VARHEAD_OFFSET;
  2167. hc.fast = !!(hc.param & HV_HYPERCALL_FAST_BIT);
  2168. hc.rep_cnt = (hc.param >> HV_HYPERCALL_REP_COMP_OFFSET) & 0xfff;
  2169. hc.rep_idx = (hc.param >> HV_HYPERCALL_REP_START_OFFSET) & 0xfff;
  2170. hc.rep = !!(hc.rep_cnt || hc.rep_idx);
  2171. trace_kvm_hv_hypercall(hc.code, hc.fast, hc.var_cnt, hc.rep_cnt,
  2172. hc.rep_idx, hc.ingpa, hc.outgpa);
  2173. if (unlikely(!hv_check_hypercall_access(hv_vcpu, hc.code))) {
  2174. ret = HV_STATUS_ACCESS_DENIED;
  2175. goto hypercall_complete;
  2176. }
  2177. if (unlikely(hc.param & HV_HYPERCALL_RSVD_MASK)) {
  2178. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2179. goto hypercall_complete;
  2180. }
  2181. if (hc.fast && is_xmm_fast_hypercall(&hc)) {
  2182. if (unlikely(hv_vcpu->enforce_cpuid &&
  2183. !(hv_vcpu->cpuid_cache.features_edx &
  2184. HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE))) {
  2185. kvm_queue_exception(vcpu, UD_VECTOR);
  2186. return 1;
  2187. }
  2188. kvm_hv_hypercall_read_xmm(&hc);
  2189. }
  2190. switch (hc.code) {
  2191. case HVCALL_NOTIFY_LONG_SPIN_WAIT:
  2192. if (unlikely(hc.rep || hc.var_cnt)) {
  2193. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2194. break;
  2195. }
  2196. kvm_vcpu_on_spin(vcpu, true);
  2197. break;
  2198. case HVCALL_SIGNAL_EVENT:
  2199. if (unlikely(hc.rep || hc.var_cnt)) {
  2200. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2201. break;
  2202. }
  2203. ret = kvm_hvcall_signal_event(vcpu, &hc);
  2204. if (ret != HV_STATUS_INVALID_PORT_ID)
  2205. break;
  2206. fallthrough; /* maybe userspace knows this conn_id */
  2207. case HVCALL_POST_MESSAGE:
  2208. /* don't bother userspace if it has no way to handle it */
  2209. if (unlikely(hc.rep || hc.var_cnt || !to_hv_synic(vcpu)->active)) {
  2210. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2211. break;
  2212. }
  2213. goto hypercall_userspace_exit;
  2214. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST:
  2215. if (unlikely(hc.var_cnt)) {
  2216. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2217. break;
  2218. }
  2219. fallthrough;
  2220. case HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX:
  2221. if (unlikely(!hc.rep_cnt || hc.rep_idx)) {
  2222. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2223. break;
  2224. }
  2225. ret = kvm_hv_flush_tlb(vcpu, &hc);
  2226. break;
  2227. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE:
  2228. if (unlikely(hc.var_cnt)) {
  2229. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2230. break;
  2231. }
  2232. fallthrough;
  2233. case HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX:
  2234. if (unlikely(hc.rep)) {
  2235. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2236. break;
  2237. }
  2238. ret = kvm_hv_flush_tlb(vcpu, &hc);
  2239. break;
  2240. case HVCALL_SEND_IPI:
  2241. if (unlikely(hc.var_cnt)) {
  2242. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2243. break;
  2244. }
  2245. fallthrough;
  2246. case HVCALL_SEND_IPI_EX:
  2247. if (unlikely(hc.rep)) {
  2248. ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
  2249. break;
  2250. }
  2251. ret = kvm_hv_send_ipi(vcpu, &hc);
  2252. break;
  2253. case HVCALL_POST_DEBUG_DATA:
  2254. case HVCALL_RETRIEVE_DEBUG_DATA:
  2255. if (unlikely(hc.fast)) {
  2256. ret = HV_STATUS_INVALID_PARAMETER;
  2257. break;
  2258. }
  2259. fallthrough;
  2260. case HVCALL_RESET_DEBUG_SESSION: {
  2261. struct kvm_hv_syndbg *syndbg = to_hv_syndbg(vcpu);
  2262. if (!kvm_hv_is_syndbg_enabled(vcpu)) {
  2263. ret = HV_STATUS_INVALID_HYPERCALL_CODE;
  2264. break;
  2265. }
  2266. if (!(syndbg->options & HV_X64_SYNDBG_OPTION_USE_HCALLS)) {
  2267. ret = HV_STATUS_OPERATION_DENIED;
  2268. break;
  2269. }
  2270. goto hypercall_userspace_exit;
  2271. }
  2272. case HV_EXT_CALL_QUERY_CAPABILITIES ... HV_EXT_CALL_MAX:
  2273. if (unlikely(hc.fast)) {
  2274. ret = HV_STATUS_INVALID_PARAMETER;
  2275. break;
  2276. }
  2277. goto hypercall_userspace_exit;
  2278. default:
  2279. ret = HV_STATUS_INVALID_HYPERCALL_CODE;
  2280. break;
  2281. }
  2282. hypercall_complete:
  2283. return kvm_hv_hypercall_complete(vcpu, ret);
  2284. hypercall_userspace_exit:
  2285. vcpu->run->exit_reason = KVM_EXIT_HYPERV;
  2286. vcpu->run->hyperv.type = KVM_EXIT_HYPERV_HCALL;
  2287. vcpu->run->hyperv.u.hcall.input = hc.param;
  2288. vcpu->run->hyperv.u.hcall.params[0] = hc.ingpa;
  2289. vcpu->run->hyperv.u.hcall.params[1] = hc.outgpa;
  2290. vcpu->arch.complete_userspace_io = kvm_hv_hypercall_complete_userspace;
  2291. return 0;
  2292. }
  2293. void kvm_hv_init_vm(struct kvm *kvm)
  2294. {
  2295. struct kvm_hv *hv = to_kvm_hv(kvm);
  2296. mutex_init(&hv->hv_lock);
  2297. idr_init(&hv->conn_to_evt);
  2298. }
  2299. void kvm_hv_destroy_vm(struct kvm *kvm)
  2300. {
  2301. struct kvm_hv *hv = to_kvm_hv(kvm);
  2302. struct eventfd_ctx *eventfd;
  2303. int i;
  2304. idr_for_each_entry(&hv->conn_to_evt, eventfd, i)
  2305. eventfd_ctx_put(eventfd);
  2306. idr_destroy(&hv->conn_to_evt);
  2307. }
  2308. static int kvm_hv_eventfd_assign(struct kvm *kvm, u32 conn_id, int fd)
  2309. {
  2310. struct kvm_hv *hv = to_kvm_hv(kvm);
  2311. struct eventfd_ctx *eventfd;
  2312. int ret;
  2313. eventfd = eventfd_ctx_fdget(fd);
  2314. if (IS_ERR(eventfd))
  2315. return PTR_ERR(eventfd);
  2316. mutex_lock(&hv->hv_lock);
  2317. ret = idr_alloc(&hv->conn_to_evt, eventfd, conn_id, conn_id + 1,
  2318. GFP_KERNEL_ACCOUNT);
  2319. mutex_unlock(&hv->hv_lock);
  2320. if (ret >= 0)
  2321. return 0;
  2322. if (ret == -ENOSPC)
  2323. ret = -EEXIST;
  2324. eventfd_ctx_put(eventfd);
  2325. return ret;
  2326. }
  2327. static int kvm_hv_eventfd_deassign(struct kvm *kvm, u32 conn_id)
  2328. {
  2329. struct kvm_hv *hv = to_kvm_hv(kvm);
  2330. struct eventfd_ctx *eventfd;
  2331. mutex_lock(&hv->hv_lock);
  2332. eventfd = idr_remove(&hv->conn_to_evt, conn_id);
  2333. mutex_unlock(&hv->hv_lock);
  2334. if (!eventfd)
  2335. return -ENOENT;
  2336. synchronize_srcu(&kvm->srcu);
  2337. eventfd_ctx_put(eventfd);
  2338. return 0;
  2339. }
  2340. int kvm_vm_ioctl_hv_eventfd(struct kvm *kvm, struct kvm_hyperv_eventfd *args)
  2341. {
  2342. if ((args->flags & ~KVM_HYPERV_EVENTFD_DEASSIGN) ||
  2343. (args->conn_id & ~KVM_HYPERV_CONN_ID_MASK))
  2344. return -EINVAL;
  2345. if (args->flags == KVM_HYPERV_EVENTFD_DEASSIGN)
  2346. return kvm_hv_eventfd_deassign(kvm, args->conn_id);
  2347. return kvm_hv_eventfd_assign(kvm, args->conn_id, args->fd);
  2348. }
  2349. int kvm_get_hv_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid2 *cpuid,
  2350. struct kvm_cpuid_entry2 __user *entries)
  2351. {
  2352. uint16_t evmcs_ver = 0;
  2353. struct kvm_cpuid_entry2 cpuid_entries[] = {
  2354. { .function = HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS },
  2355. { .function = HYPERV_CPUID_INTERFACE },
  2356. { .function = HYPERV_CPUID_VERSION },
  2357. { .function = HYPERV_CPUID_FEATURES },
  2358. { .function = HYPERV_CPUID_ENLIGHTMENT_INFO },
  2359. { .function = HYPERV_CPUID_IMPLEMENT_LIMITS },
  2360. { .function = HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS },
  2361. { .function = HYPERV_CPUID_SYNDBG_INTERFACE },
  2362. { .function = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES },
  2363. { .function = HYPERV_CPUID_NESTED_FEATURES },
  2364. };
  2365. int i, nent = ARRAY_SIZE(cpuid_entries);
  2366. if (kvm_x86_ops.nested_ops->get_evmcs_version)
  2367. evmcs_ver = kvm_x86_ops.nested_ops->get_evmcs_version(vcpu);
  2368. if (cpuid->nent < nent)
  2369. return -E2BIG;
  2370. if (cpuid->nent > nent)
  2371. cpuid->nent = nent;
  2372. for (i = 0; i < nent; i++) {
  2373. struct kvm_cpuid_entry2 *ent = &cpuid_entries[i];
  2374. u32 signature[3];
  2375. switch (ent->function) {
  2376. case HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS:
  2377. memcpy(signature, "Linux KVM Hv", 12);
  2378. ent->eax = HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES;
  2379. ent->ebx = signature[0];
  2380. ent->ecx = signature[1];
  2381. ent->edx = signature[2];
  2382. break;
  2383. case HYPERV_CPUID_INTERFACE:
  2384. ent->eax = HYPERV_CPUID_SIGNATURE_EAX;
  2385. break;
  2386. case HYPERV_CPUID_VERSION:
  2387. /*
  2388. * We implement some Hyper-V 2016 functions so let's use
  2389. * this version.
  2390. */
  2391. ent->eax = 0x00003839;
  2392. ent->ebx = 0x000A0000;
  2393. break;
  2394. case HYPERV_CPUID_FEATURES:
  2395. ent->eax |= HV_MSR_VP_RUNTIME_AVAILABLE;
  2396. ent->eax |= HV_MSR_TIME_REF_COUNT_AVAILABLE;
  2397. ent->eax |= HV_MSR_SYNIC_AVAILABLE;
  2398. ent->eax |= HV_MSR_SYNTIMER_AVAILABLE;
  2399. ent->eax |= HV_MSR_APIC_ACCESS_AVAILABLE;
  2400. ent->eax |= HV_MSR_HYPERCALL_AVAILABLE;
  2401. ent->eax |= HV_MSR_VP_INDEX_AVAILABLE;
  2402. ent->eax |= HV_MSR_RESET_AVAILABLE;
  2403. ent->eax |= HV_MSR_REFERENCE_TSC_AVAILABLE;
  2404. ent->eax |= HV_ACCESS_FREQUENCY_MSRS;
  2405. ent->eax |= HV_ACCESS_REENLIGHTENMENT;
  2406. ent->eax |= HV_ACCESS_TSC_INVARIANT;
  2407. ent->ebx |= HV_POST_MESSAGES;
  2408. ent->ebx |= HV_SIGNAL_EVENTS;
  2409. ent->ebx |= HV_ENABLE_EXTENDED_HYPERCALLS;
  2410. ent->edx |= HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE;
  2411. ent->edx |= HV_FEATURE_FREQUENCY_MSRS_AVAILABLE;
  2412. ent->edx |= HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE;
  2413. ent->ebx |= HV_DEBUGGING;
  2414. ent->edx |= HV_X64_GUEST_DEBUGGING_AVAILABLE;
  2415. ent->edx |= HV_FEATURE_DEBUG_MSRS_AVAILABLE;
  2416. ent->edx |= HV_FEATURE_EXT_GVA_RANGES_FLUSH;
  2417. /*
  2418. * Direct Synthetic timers only make sense with in-kernel
  2419. * LAPIC
  2420. */
  2421. if (!vcpu || lapic_in_kernel(vcpu))
  2422. ent->edx |= HV_STIMER_DIRECT_MODE_AVAILABLE;
  2423. break;
  2424. case HYPERV_CPUID_ENLIGHTMENT_INFO:
  2425. ent->eax |= HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED;
  2426. ent->eax |= HV_X64_APIC_ACCESS_RECOMMENDED;
  2427. ent->eax |= HV_X64_RELAXED_TIMING_RECOMMENDED;
  2428. if (!vcpu || lapic_in_kernel(vcpu))
  2429. ent->eax |= HV_X64_CLUSTER_IPI_RECOMMENDED;
  2430. ent->eax |= HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED;
  2431. if (evmcs_ver)
  2432. ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
  2433. if (!cpu_smt_possible())
  2434. ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
  2435. ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
  2436. /*
  2437. * Default number of spinlock retry attempts, matches
  2438. * HyperV 2016.
  2439. */
  2440. ent->ebx = 0x00000FFF;
  2441. break;
  2442. case HYPERV_CPUID_IMPLEMENT_LIMITS:
  2443. /* Maximum number of virtual processors */
  2444. ent->eax = KVM_MAX_VCPUS;
  2445. /*
  2446. * Maximum number of logical processors, matches
  2447. * HyperV 2016.
  2448. */
  2449. ent->ebx = 64;
  2450. break;
  2451. case HYPERV_CPUID_NESTED_FEATURES:
  2452. ent->eax = evmcs_ver;
  2453. ent->eax |= HV_X64_NESTED_DIRECT_FLUSH;
  2454. ent->eax |= HV_X64_NESTED_MSR_BITMAP;
  2455. ent->ebx |= HV_X64_NESTED_EVMCS1_PERF_GLOBAL_CTRL;
  2456. break;
  2457. case HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS:
  2458. memcpy(signature, "Linux KVM Hv", 12);
  2459. ent->eax = 0;
  2460. ent->ebx = signature[0];
  2461. ent->ecx = signature[1];
  2462. ent->edx = signature[2];
  2463. break;
  2464. case HYPERV_CPUID_SYNDBG_INTERFACE:
  2465. memcpy(signature, "VS#1\0\0\0\0\0\0\0\0", 12);
  2466. ent->eax = signature[0];
  2467. break;
  2468. case HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES:
  2469. ent->eax |= HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING;
  2470. break;
  2471. default:
  2472. break;
  2473. }
  2474. }
  2475. if (copy_to_user(entries, cpuid_entries,
  2476. nent * sizeof(struct kvm_cpuid_entry2)))
  2477. return -EFAULT;
  2478. return 0;
  2479. }