channel_mgmt.c 45 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. * Copyright (c) 2009, Microsoft Corporation.
  4. *
  5. * Authors:
  6. * Haiyang Zhang <haiyangz@microsoft.com>
  7. * Hank Janssen <hjanssen@microsoft.com>
  8. */
  9. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  10. #include <linux/kernel.h>
  11. #include <linux/interrupt.h>
  12. #include <linux/sched.h>
  13. #include <linux/wait.h>
  14. #include <linux/mm.h>
  15. #include <linux/slab.h>
  16. #include <linux/list.h>
  17. #include <linux/module.h>
  18. #include <linux/completion.h>
  19. #include <linux/delay.h>
  20. #include <linux/cpu.h>
  21. #include <linux/hyperv.h>
  22. #include <asm/mshyperv.h>
  23. #include <linux/sched/isolation.h>
  24. #include "hyperv_vmbus.h"
  25. static void init_vp_index(struct vmbus_channel *channel);
  26. const struct vmbus_device vmbus_devs[] = {
  27. /* IDE */
  28. { .dev_type = HV_IDE,
  29. HV_IDE_GUID,
  30. .perf_device = true,
  31. .allowed_in_isolated = false,
  32. },
  33. /* SCSI */
  34. { .dev_type = HV_SCSI,
  35. HV_SCSI_GUID,
  36. .perf_device = true,
  37. .allowed_in_isolated = true,
  38. },
  39. /* Fibre Channel */
  40. { .dev_type = HV_FC,
  41. HV_SYNTHFC_GUID,
  42. .perf_device = true,
  43. .allowed_in_isolated = false,
  44. },
  45. /* Synthetic NIC */
  46. { .dev_type = HV_NIC,
  47. HV_NIC_GUID,
  48. .perf_device = true,
  49. .allowed_in_isolated = true,
  50. },
  51. /* Network Direct */
  52. { .dev_type = HV_ND,
  53. HV_ND_GUID,
  54. .perf_device = true,
  55. .allowed_in_isolated = false,
  56. },
  57. /* PCIE */
  58. { .dev_type = HV_PCIE,
  59. HV_PCIE_GUID,
  60. .perf_device = false,
  61. .allowed_in_isolated = true,
  62. },
  63. /* Synthetic Frame Buffer */
  64. { .dev_type = HV_FB,
  65. HV_SYNTHVID_GUID,
  66. .perf_device = false,
  67. .allowed_in_isolated = false,
  68. },
  69. /* Synthetic Keyboard */
  70. { .dev_type = HV_KBD,
  71. HV_KBD_GUID,
  72. .perf_device = false,
  73. .allowed_in_isolated = false,
  74. },
  75. /* Synthetic MOUSE */
  76. { .dev_type = HV_MOUSE,
  77. HV_MOUSE_GUID,
  78. .perf_device = false,
  79. .allowed_in_isolated = false,
  80. },
  81. /* KVP */
  82. { .dev_type = HV_KVP,
  83. HV_KVP_GUID,
  84. .perf_device = false,
  85. .allowed_in_isolated = false,
  86. },
  87. /* Time Synch */
  88. { .dev_type = HV_TS,
  89. HV_TS_GUID,
  90. .perf_device = false,
  91. .allowed_in_isolated = true,
  92. },
  93. /* Heartbeat */
  94. { .dev_type = HV_HB,
  95. HV_HEART_BEAT_GUID,
  96. .perf_device = false,
  97. .allowed_in_isolated = true,
  98. },
  99. /* Shutdown */
  100. { .dev_type = HV_SHUTDOWN,
  101. HV_SHUTDOWN_GUID,
  102. .perf_device = false,
  103. .allowed_in_isolated = true,
  104. },
  105. /* File copy */
  106. /* fcopy always uses 16KB ring buffer size and is working well for last many years */
  107. { .pref_ring_size = 0x4000,
  108. .dev_type = HV_FCOPY,
  109. HV_FCOPY_GUID,
  110. .perf_device = false,
  111. .allowed_in_isolated = false,
  112. },
  113. /* Backup */
  114. { .dev_type = HV_BACKUP,
  115. HV_VSS_GUID,
  116. .perf_device = false,
  117. .allowed_in_isolated = false,
  118. },
  119. /* Dynamic Memory */
  120. { .dev_type = HV_DM,
  121. HV_DM_GUID,
  122. .perf_device = false,
  123. .allowed_in_isolated = false,
  124. },
  125. /*
  126. * Unknown GUID
  127. * 64 KB ring buffer + 4 KB header should be sufficient size for any Hyper-V device apart
  128. * from HV_NIC and HV_SCSI. This case avoid the fallback for unknown devices to allocate
  129. * much bigger (2 MB) of ring size.
  130. */
  131. { .pref_ring_size = 0x11000,
  132. .dev_type = HV_UNKNOWN,
  133. .perf_device = false,
  134. .allowed_in_isolated = false,
  135. },
  136. };
  137. EXPORT_SYMBOL_GPL(vmbus_devs);
  138. static const struct {
  139. guid_t guid;
  140. } vmbus_unsupported_devs[] = {
  141. { HV_AVMA1_GUID },
  142. { HV_AVMA2_GUID },
  143. { HV_RDV_GUID },
  144. { HV_IMC_GUID },
  145. };
  146. /*
  147. * The rescinded channel may be blocked waiting for a response from the host;
  148. * take care of that.
  149. */
  150. static void vmbus_rescind_cleanup(struct vmbus_channel *channel)
  151. {
  152. struct vmbus_channel_msginfo *msginfo;
  153. unsigned long flags;
  154. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  155. channel->rescind = true;
  156. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  157. msglistentry) {
  158. if (msginfo->waiting_channel == channel) {
  159. complete(&msginfo->waitevent);
  160. break;
  161. }
  162. }
  163. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  164. }
  165. static bool is_unsupported_vmbus_devs(const guid_t *guid)
  166. {
  167. int i;
  168. for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
  169. if (guid_equal(guid, &vmbus_unsupported_devs[i].guid))
  170. return true;
  171. return false;
  172. }
  173. static u16 hv_get_dev_type(const struct vmbus_channel *channel)
  174. {
  175. const guid_t *guid = &channel->offermsg.offer.if_type;
  176. u16 i;
  177. if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
  178. return HV_UNKNOWN;
  179. for (i = HV_IDE; i < HV_UNKNOWN; i++) {
  180. if (guid_equal(guid, &vmbus_devs[i].guid))
  181. return i;
  182. }
  183. pr_info("Unknown GUID: %pUl\n", guid);
  184. return i;
  185. }
  186. /**
  187. * vmbus_prep_negotiate_resp() - Create default response for Negotiate message
  188. * @icmsghdrp: Pointer to msg header structure
  189. * @buf: Raw buffer channel data
  190. * @buflen: Length of the raw buffer channel data.
  191. * @fw_version: The framework versions we can support.
  192. * @fw_vercnt: The size of @fw_version.
  193. * @srv_version: The service versions we can support.
  194. * @srv_vercnt: The size of @srv_version.
  195. * @nego_fw_version: The selected framework version.
  196. * @nego_srv_version: The selected service version.
  197. *
  198. * Note: Versions are given in decreasing order.
  199. *
  200. * Set up and fill in default negotiate response message.
  201. * Mainly used by Hyper-V drivers.
  202. */
  203. bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp, u8 *buf,
  204. u32 buflen, const int *fw_version, int fw_vercnt,
  205. const int *srv_version, int srv_vercnt,
  206. int *nego_fw_version, int *nego_srv_version)
  207. {
  208. int icframe_major, icframe_minor;
  209. int icmsg_major, icmsg_minor;
  210. int fw_major, fw_minor;
  211. int srv_major, srv_minor;
  212. int i, j;
  213. bool found_match = false;
  214. struct icmsg_negotiate *negop;
  215. /* Check that there's enough space for icframe_vercnt, icmsg_vercnt */
  216. if (buflen < ICMSG_HDR + offsetof(struct icmsg_negotiate, reserved)) {
  217. pr_err_ratelimited("Invalid icmsg negotiate\n");
  218. return false;
  219. }
  220. icmsghdrp->icmsgsize = 0x10;
  221. negop = (struct icmsg_negotiate *)&buf[ICMSG_HDR];
  222. icframe_major = negop->icframe_vercnt;
  223. icframe_minor = 0;
  224. icmsg_major = negop->icmsg_vercnt;
  225. icmsg_minor = 0;
  226. /* Validate negop packet */
  227. if (icframe_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  228. icmsg_major > IC_VERSION_NEGOTIATION_MAX_VER_COUNT ||
  229. ICMSG_NEGOTIATE_PKT_SIZE(icframe_major, icmsg_major) > buflen) {
  230. pr_err_ratelimited("Invalid icmsg negotiate - icframe_major: %u, icmsg_major: %u\n",
  231. icframe_major, icmsg_major);
  232. goto fw_error;
  233. }
  234. /*
  235. * Select the framework version number we will
  236. * support.
  237. */
  238. for (i = 0; i < fw_vercnt; i++) {
  239. fw_major = (fw_version[i] >> 16);
  240. fw_minor = (fw_version[i] & 0xFFFF);
  241. for (j = 0; j < negop->icframe_vercnt; j++) {
  242. if ((negop->icversion_data[j].major == fw_major) &&
  243. (negop->icversion_data[j].minor == fw_minor)) {
  244. icframe_major = negop->icversion_data[j].major;
  245. icframe_minor = negop->icversion_data[j].minor;
  246. found_match = true;
  247. break;
  248. }
  249. }
  250. if (found_match)
  251. break;
  252. }
  253. if (!found_match)
  254. goto fw_error;
  255. found_match = false;
  256. for (i = 0; i < srv_vercnt; i++) {
  257. srv_major = (srv_version[i] >> 16);
  258. srv_minor = (srv_version[i] & 0xFFFF);
  259. for (j = negop->icframe_vercnt;
  260. (j < negop->icframe_vercnt + negop->icmsg_vercnt);
  261. j++) {
  262. if ((negop->icversion_data[j].major == srv_major) &&
  263. (negop->icversion_data[j].minor == srv_minor)) {
  264. icmsg_major = negop->icversion_data[j].major;
  265. icmsg_minor = negop->icversion_data[j].minor;
  266. found_match = true;
  267. break;
  268. }
  269. }
  270. if (found_match)
  271. break;
  272. }
  273. /*
  274. * Respond with the framework and service
  275. * version numbers we can support.
  276. */
  277. fw_error:
  278. if (!found_match) {
  279. negop->icframe_vercnt = 0;
  280. negop->icmsg_vercnt = 0;
  281. } else {
  282. negop->icframe_vercnt = 1;
  283. negop->icmsg_vercnt = 1;
  284. }
  285. if (nego_fw_version)
  286. *nego_fw_version = (icframe_major << 16) | icframe_minor;
  287. if (nego_srv_version)
  288. *nego_srv_version = (icmsg_major << 16) | icmsg_minor;
  289. negop->icversion_data[0].major = icframe_major;
  290. negop->icversion_data[0].minor = icframe_minor;
  291. negop->icversion_data[1].major = icmsg_major;
  292. negop->icversion_data[1].minor = icmsg_minor;
  293. return found_match;
  294. }
  295. EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
  296. /*
  297. * alloc_channel - Allocate and initialize a vmbus channel object
  298. */
  299. static struct vmbus_channel *alloc_channel(void)
  300. {
  301. struct vmbus_channel *channel;
  302. channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
  303. if (!channel)
  304. return NULL;
  305. spin_lock_init(&channel->sched_lock);
  306. init_completion(&channel->rescind_event);
  307. INIT_LIST_HEAD(&channel->sc_list);
  308. tasklet_init(&channel->callback_event,
  309. vmbus_on_event, (unsigned long)channel);
  310. hv_ringbuffer_pre_init(channel);
  311. return channel;
  312. }
  313. /*
  314. * free_channel - Release the resources used by the vmbus channel object
  315. */
  316. static void free_channel(struct vmbus_channel *channel)
  317. {
  318. tasklet_kill(&channel->callback_event);
  319. vmbus_remove_channel_attr_group(channel);
  320. kobject_put(&channel->kobj);
  321. }
  322. void vmbus_channel_map_relid(struct vmbus_channel *channel)
  323. {
  324. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  325. return;
  326. /*
  327. * The mapping of the channel's relid is visible from the CPUs that
  328. * execute vmbus_chan_sched() by the time that vmbus_chan_sched() will
  329. * execute:
  330. *
  331. * (a) In the "normal (i.e., not resuming from hibernation)" path,
  332. * the full barrier in virt_store_mb() guarantees that the store
  333. * is propagated to all CPUs before the add_channel_work work
  334. * is queued. In turn, add_channel_work is queued before the
  335. * channel's ring buffer is allocated/initialized and the
  336. * OPENCHANNEL message for the channel is sent in vmbus_open().
  337. * Hyper-V won't start sending the interrupts for the channel
  338. * before the OPENCHANNEL message is acked. The memory barrier
  339. * in vmbus_chan_sched() -> sync_test_and_clear_bit() ensures
  340. * that vmbus_chan_sched() must find the channel's relid in
  341. * recv_int_page before retrieving the channel pointer from the
  342. * array of channels.
  343. *
  344. * (b) In the "resuming from hibernation" path, the virt_store_mb()
  345. * guarantees that the store is propagated to all CPUs before
  346. * the VMBus connection is marked as ready for the resume event
  347. * (cf. check_ready_for_resume_event()). The interrupt handler
  348. * of the VMBus driver and vmbus_chan_sched() can not run before
  349. * vmbus_bus_resume() has completed execution (cf. resume_noirq).
  350. */
  351. virt_store_mb(
  352. vmbus_connection.channels[channel->offermsg.child_relid],
  353. channel);
  354. }
  355. void vmbus_channel_unmap_relid(struct vmbus_channel *channel)
  356. {
  357. if (WARN_ON(channel->offermsg.child_relid >= MAX_CHANNEL_RELIDS))
  358. return;
  359. WRITE_ONCE(
  360. vmbus_connection.channels[channel->offermsg.child_relid],
  361. NULL);
  362. }
  363. static void vmbus_release_relid(u32 relid)
  364. {
  365. struct vmbus_channel_relid_released msg;
  366. int ret;
  367. memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
  368. msg.child_relid = relid;
  369. msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
  370. ret = vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
  371. true);
  372. trace_vmbus_release_relid(&msg, ret);
  373. }
  374. void hv_process_channel_removal(struct vmbus_channel *channel)
  375. {
  376. lockdep_assert_held(&vmbus_connection.channel_mutex);
  377. BUG_ON(!channel->rescind);
  378. /*
  379. * hv_process_channel_removal() could find INVALID_RELID only for
  380. * hv_sock channels. See the inline comments in vmbus_onoffer().
  381. */
  382. WARN_ON(channel->offermsg.child_relid == INVALID_RELID &&
  383. !is_hvsock_channel(channel));
  384. /*
  385. * Upon suspend, an in-use hv_sock channel is removed from the array of
  386. * channels and the relid is invalidated. After hibernation, when the
  387. * user-space application destroys the channel, it's unnecessary and
  388. * unsafe to remove the channel from the array of channels. See also
  389. * the inline comments before the call of vmbus_release_relid() below.
  390. */
  391. if (channel->offermsg.child_relid != INVALID_RELID)
  392. vmbus_channel_unmap_relid(channel);
  393. if (channel->primary_channel == NULL)
  394. list_del(&channel->listentry);
  395. else
  396. list_del(&channel->sc_list);
  397. /*
  398. * If this is a "perf" channel, updates the hv_numa_map[] masks so that
  399. * init_vp_index() can (re-)use the CPU.
  400. */
  401. if (hv_is_perf_channel(channel))
  402. hv_clear_allocated_cpu(channel->target_cpu);
  403. /*
  404. * Upon suspend, an in-use hv_sock channel is marked as "rescinded" and
  405. * the relid is invalidated; after hibernation, when the user-space app
  406. * destroys the channel, the relid is INVALID_RELID, and in this case
  407. * it's unnecessary and unsafe to release the old relid, since the same
  408. * relid can refer to a completely different channel now.
  409. */
  410. if (channel->offermsg.child_relid != INVALID_RELID)
  411. vmbus_release_relid(channel->offermsg.child_relid);
  412. free_channel(channel);
  413. }
  414. void vmbus_free_channels(void)
  415. {
  416. struct vmbus_channel *channel, *tmp;
  417. list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
  418. listentry) {
  419. /* hv_process_channel_removal() needs this */
  420. channel->rescind = true;
  421. vmbus_device_unregister(channel->device_obj);
  422. }
  423. }
  424. /* Note: the function can run concurrently for primary/sub channels. */
  425. static void vmbus_add_channel_work(struct work_struct *work)
  426. {
  427. struct vmbus_channel *newchannel =
  428. container_of(work, struct vmbus_channel, add_channel_work);
  429. struct vmbus_channel *primary_channel = newchannel->primary_channel;
  430. int ret;
  431. /*
  432. * This state is used to indicate a successful open
  433. * so that when we do close the channel normally, we
  434. * can cleanup properly.
  435. */
  436. newchannel->state = CHANNEL_OPEN_STATE;
  437. if (primary_channel != NULL) {
  438. /* newchannel is a sub-channel. */
  439. struct hv_device *dev = primary_channel->device_obj;
  440. if (vmbus_add_channel_kobj(dev, newchannel))
  441. goto err_deq_chan;
  442. if (primary_channel->sc_creation_callback != NULL)
  443. primary_channel->sc_creation_callback(newchannel);
  444. newchannel->probe_done = true;
  445. return;
  446. }
  447. /*
  448. * Start the process of binding the primary channel to the driver
  449. */
  450. newchannel->device_obj = vmbus_device_create(
  451. &newchannel->offermsg.offer.if_type,
  452. &newchannel->offermsg.offer.if_instance,
  453. newchannel);
  454. if (!newchannel->device_obj)
  455. goto err_deq_chan;
  456. newchannel->device_obj->device_id = newchannel->device_id;
  457. /*
  458. * Add the new device to the bus. This will kick off device-driver
  459. * binding which eventually invokes the device driver's AddDevice()
  460. * method.
  461. *
  462. * If vmbus_device_register() fails, the 'device_obj' is freed in
  463. * vmbus_device_release() as called by device_unregister() in the
  464. * error path of vmbus_device_register(). In the outside error
  465. * path, there's no need to free it.
  466. */
  467. ret = vmbus_device_register(newchannel->device_obj);
  468. if (ret != 0) {
  469. pr_err("unable to add child device object (relid %d)\n",
  470. newchannel->offermsg.child_relid);
  471. goto err_deq_chan;
  472. }
  473. newchannel->probe_done = true;
  474. return;
  475. err_deq_chan:
  476. mutex_lock(&vmbus_connection.channel_mutex);
  477. /*
  478. * We need to set the flag, otherwise
  479. * vmbus_onoffer_rescind() can be blocked.
  480. */
  481. newchannel->probe_done = true;
  482. if (primary_channel == NULL)
  483. list_del(&newchannel->listentry);
  484. else
  485. list_del(&newchannel->sc_list);
  486. /* vmbus_process_offer() has mapped the channel. */
  487. vmbus_channel_unmap_relid(newchannel);
  488. mutex_unlock(&vmbus_connection.channel_mutex);
  489. vmbus_release_relid(newchannel->offermsg.child_relid);
  490. free_channel(newchannel);
  491. }
  492. /*
  493. * vmbus_process_offer - Process the offer by creating a channel/device
  494. * associated with this offer
  495. */
  496. static void vmbus_process_offer(struct vmbus_channel *newchannel)
  497. {
  498. struct vmbus_channel *channel;
  499. struct workqueue_struct *wq;
  500. bool fnew = true;
  501. /*
  502. * Synchronize vmbus_process_offer() and CPU hotplugging:
  503. *
  504. * CPU1 CPU2
  505. *
  506. * [vmbus_process_offer()] [Hot removal of the CPU]
  507. *
  508. * CPU_READ_LOCK CPUS_WRITE_LOCK
  509. * LOAD cpu_online_mask SEARCH chn_list
  510. * STORE target_cpu LOAD target_cpu
  511. * INSERT chn_list STORE cpu_online_mask
  512. * CPUS_READ_UNLOCK CPUS_WRITE_UNLOCK
  513. *
  514. * Forbids: CPU1's LOAD from *not* seing CPU2's STORE &&
  515. * CPU2's SEARCH from *not* seeing CPU1's INSERT
  516. *
  517. * Forbids: CPU2's SEARCH from seeing CPU1's INSERT &&
  518. * CPU2's LOAD from *not* seing CPU1's STORE
  519. */
  520. cpus_read_lock();
  521. /*
  522. * Serializes the modifications of the chn_list list as well as
  523. * the accesses to next_numa_node_id in init_vp_index().
  524. */
  525. mutex_lock(&vmbus_connection.channel_mutex);
  526. list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
  527. if (guid_equal(&channel->offermsg.offer.if_type,
  528. &newchannel->offermsg.offer.if_type) &&
  529. guid_equal(&channel->offermsg.offer.if_instance,
  530. &newchannel->offermsg.offer.if_instance)) {
  531. fnew = false;
  532. newchannel->primary_channel = channel;
  533. break;
  534. }
  535. }
  536. init_vp_index(newchannel);
  537. /* Remember the channels that should be cleaned up upon suspend. */
  538. if (is_hvsock_channel(newchannel) || is_sub_channel(newchannel))
  539. atomic_inc(&vmbus_connection.nr_chan_close_on_suspend);
  540. /*
  541. * Now that we have acquired the channel_mutex,
  542. * we can release the potentially racing rescind thread.
  543. */
  544. atomic_dec(&vmbus_connection.offer_in_progress);
  545. if (fnew) {
  546. list_add_tail(&newchannel->listentry,
  547. &vmbus_connection.chn_list);
  548. } else {
  549. /*
  550. * Check to see if this is a valid sub-channel.
  551. */
  552. if (newchannel->offermsg.offer.sub_channel_index == 0) {
  553. mutex_unlock(&vmbus_connection.channel_mutex);
  554. cpus_read_unlock();
  555. /*
  556. * Don't call free_channel(), because newchannel->kobj
  557. * is not initialized yet.
  558. */
  559. kfree(newchannel);
  560. WARN_ON_ONCE(1);
  561. return;
  562. }
  563. /*
  564. * Process the sub-channel.
  565. */
  566. list_add_tail(&newchannel->sc_list, &channel->sc_list);
  567. }
  568. vmbus_channel_map_relid(newchannel);
  569. mutex_unlock(&vmbus_connection.channel_mutex);
  570. cpus_read_unlock();
  571. /*
  572. * vmbus_process_offer() mustn't call channel->sc_creation_callback()
  573. * directly for sub-channels, because sc_creation_callback() ->
  574. * vmbus_open() may never get the host's response to the
  575. * OPEN_CHANNEL message (the host may rescind a channel at any time,
  576. * e.g. in the case of hot removing a NIC), and vmbus_onoffer_rescind()
  577. * may not wake up the vmbus_open() as it's blocked due to a non-zero
  578. * vmbus_connection.offer_in_progress, and finally we have a deadlock.
  579. *
  580. * The above is also true for primary channels, if the related device
  581. * drivers use sync probing mode by default.
  582. *
  583. * And, usually the handling of primary channels and sub-channels can
  584. * depend on each other, so we should offload them to different
  585. * workqueues to avoid possible deadlock, e.g. in sync-probing mode,
  586. * NIC1's netvsc_subchan_work() can race with NIC2's netvsc_probe() ->
  587. * rtnl_lock(), and causes deadlock: the former gets the rtnl_lock
  588. * and waits for all the sub-channels to appear, but the latter
  589. * can't get the rtnl_lock and this blocks the handling of
  590. * sub-channels.
  591. */
  592. INIT_WORK(&newchannel->add_channel_work, vmbus_add_channel_work);
  593. wq = fnew ? vmbus_connection.handle_primary_chan_wq :
  594. vmbus_connection.handle_sub_chan_wq;
  595. queue_work(wq, &newchannel->add_channel_work);
  596. }
  597. /*
  598. * Check if CPUs used by other channels of the same device.
  599. * It should only be called by init_vp_index().
  600. */
  601. static bool hv_cpuself_used(u32 cpu, struct vmbus_channel *chn)
  602. {
  603. struct vmbus_channel *primary = chn->primary_channel;
  604. struct vmbus_channel *sc;
  605. lockdep_assert_held(&vmbus_connection.channel_mutex);
  606. if (!primary)
  607. return false;
  608. if (primary->target_cpu == cpu)
  609. return true;
  610. list_for_each_entry(sc, &primary->sc_list, sc_list)
  611. if (sc != chn && sc->target_cpu == cpu)
  612. return true;
  613. return false;
  614. }
  615. /*
  616. * We use this state to statically distribute the channel interrupt load.
  617. */
  618. static int next_numa_node_id;
  619. /*
  620. * We can statically distribute the incoming channel interrupt load
  621. * by binding a channel to VCPU.
  622. *
  623. * For non-performance critical channels we assign the VMBUS_CONNECT_CPU.
  624. * Performance critical channels will be distributed evenly among all
  625. * the available NUMA nodes. Once the node is assigned, we will assign
  626. * the CPU based on a simple round robin scheme.
  627. */
  628. static void init_vp_index(struct vmbus_channel *channel)
  629. {
  630. bool perf_chn = hv_is_perf_channel(channel);
  631. u32 i, ncpu = num_online_cpus();
  632. cpumask_var_t available_mask;
  633. struct cpumask *allocated_mask;
  634. const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
  635. u32 target_cpu;
  636. int numa_node;
  637. if (!perf_chn ||
  638. !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
  639. cpumask_empty(hk_mask)) {
  640. /*
  641. * If the channel is not a performance critical
  642. * channel, bind it to VMBUS_CONNECT_CPU.
  643. * In case alloc_cpumask_var() fails, bind it to
  644. * VMBUS_CONNECT_CPU.
  645. * If all the cpus are isolated, bind it to
  646. * VMBUS_CONNECT_CPU.
  647. */
  648. channel->target_cpu = VMBUS_CONNECT_CPU;
  649. if (perf_chn)
  650. hv_set_allocated_cpu(VMBUS_CONNECT_CPU);
  651. return;
  652. }
  653. for (i = 1; i <= ncpu + 1; i++) {
  654. while (true) {
  655. numa_node = next_numa_node_id++;
  656. if (numa_node == nr_node_ids) {
  657. next_numa_node_id = 0;
  658. continue;
  659. }
  660. if (cpumask_empty(cpumask_of_node(numa_node)))
  661. continue;
  662. break;
  663. }
  664. allocated_mask = &hv_context.hv_numa_map[numa_node];
  665. retry:
  666. cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
  667. cpumask_and(available_mask, available_mask, hk_mask);
  668. if (cpumask_empty(available_mask)) {
  669. /*
  670. * We have cycled through all the CPUs in the node;
  671. * reset the allocated map.
  672. */
  673. cpumask_clear(allocated_mask);
  674. goto retry;
  675. }
  676. target_cpu = cpumask_first(available_mask);
  677. cpumask_set_cpu(target_cpu, allocated_mask);
  678. if (channel->offermsg.offer.sub_channel_index >= ncpu ||
  679. i > ncpu || !hv_cpuself_used(target_cpu, channel))
  680. break;
  681. }
  682. channel->target_cpu = target_cpu;
  683. free_cpumask_var(available_mask);
  684. }
  685. #define UNLOAD_DELAY_UNIT_MS 10 /* 10 milliseconds */
  686. #define UNLOAD_WAIT_MS (100*1000) /* 100 seconds */
  687. #define UNLOAD_WAIT_LOOPS (UNLOAD_WAIT_MS/UNLOAD_DELAY_UNIT_MS)
  688. #define UNLOAD_MSG_MS (5*1000) /* Every 5 seconds */
  689. #define UNLOAD_MSG_LOOPS (UNLOAD_MSG_MS/UNLOAD_DELAY_UNIT_MS)
  690. static void vmbus_wait_for_unload(void)
  691. {
  692. int cpu;
  693. void *page_addr;
  694. struct hv_message *msg;
  695. struct vmbus_channel_message_header *hdr;
  696. u32 message_type, i;
  697. /*
  698. * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
  699. * used for initial contact or to CPU0 depending on host version. When
  700. * we're crashing on a different CPU let's hope that IRQ handler on
  701. * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
  702. * functional and vmbus_unload_response() will complete
  703. * vmbus_connection.unload_event. If not, the last thing we can do is
  704. * read message pages for all CPUs directly.
  705. *
  706. * Wait up to 100 seconds since an Azure host must writeback any dirty
  707. * data in its disk cache before the VMbus UNLOAD request will
  708. * complete. This flushing has been empirically observed to take up
  709. * to 50 seconds in cases with a lot of dirty data, so allow additional
  710. * leeway and for inaccuracies in mdelay(). But eventually time out so
  711. * that the panic path can't get hung forever in case the response
  712. * message isn't seen.
  713. */
  714. for (i = 1; i <= UNLOAD_WAIT_LOOPS; i++) {
  715. if (completion_done(&vmbus_connection.unload_event))
  716. goto completed;
  717. for_each_present_cpu(cpu) {
  718. struct hv_per_cpu_context *hv_cpu
  719. = per_cpu_ptr(hv_context.cpu_context, cpu);
  720. /*
  721. * In a CoCo VM the synic_message_page is not allocated
  722. * in hv_synic_alloc(). Instead it is set/cleared in
  723. * hv_synic_enable_regs() and hv_synic_disable_regs()
  724. * such that it is set only when the CPU is online. If
  725. * not all present CPUs are online, the message page
  726. * might be NULL, so skip such CPUs.
  727. */
  728. page_addr = hv_cpu->synic_message_page;
  729. if (!page_addr)
  730. continue;
  731. msg = (struct hv_message *)page_addr
  732. + VMBUS_MESSAGE_SINT;
  733. message_type = READ_ONCE(msg->header.message_type);
  734. if (message_type == HVMSG_NONE)
  735. continue;
  736. hdr = (struct vmbus_channel_message_header *)
  737. msg->u.payload;
  738. if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
  739. complete(&vmbus_connection.unload_event);
  740. vmbus_signal_eom(msg, message_type);
  741. }
  742. /*
  743. * Give a notice periodically so someone watching the
  744. * serial output won't think it is completely hung.
  745. */
  746. if (!(i % UNLOAD_MSG_LOOPS))
  747. pr_notice("Waiting for VMBus UNLOAD to complete\n");
  748. mdelay(UNLOAD_DELAY_UNIT_MS);
  749. }
  750. pr_err("Continuing even though VMBus UNLOAD did not complete\n");
  751. completed:
  752. /*
  753. * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
  754. * maybe-pending messages on all CPUs to be able to receive new
  755. * messages after we reconnect.
  756. */
  757. for_each_present_cpu(cpu) {
  758. struct hv_per_cpu_context *hv_cpu
  759. = per_cpu_ptr(hv_context.cpu_context, cpu);
  760. page_addr = hv_cpu->synic_message_page;
  761. if (!page_addr)
  762. continue;
  763. msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
  764. msg->header.message_type = HVMSG_NONE;
  765. }
  766. }
  767. /*
  768. * vmbus_unload_response - Handler for the unload response.
  769. */
  770. static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
  771. {
  772. /*
  773. * This is a global event; just wakeup the waiting thread.
  774. * Once we successfully unload, we can cleanup the monitor state.
  775. *
  776. * NB. A malicious or compromised Hyper-V could send a spurious
  777. * message of type CHANNELMSG_UNLOAD_RESPONSE, and trigger a call
  778. * of the complete() below. Make sure that unload_event has been
  779. * initialized by the time this complete() is executed.
  780. */
  781. complete(&vmbus_connection.unload_event);
  782. }
  783. void vmbus_initiate_unload(bool crash)
  784. {
  785. struct vmbus_channel_message_header hdr;
  786. if (xchg(&vmbus_connection.conn_state, DISCONNECTED) == DISCONNECTED)
  787. return;
  788. /* Pre-Win2012R2 hosts don't support reconnect */
  789. if (vmbus_proto_version < VERSION_WIN8_1)
  790. return;
  791. reinit_completion(&vmbus_connection.unload_event);
  792. memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
  793. hdr.msgtype = CHANNELMSG_UNLOAD;
  794. vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
  795. !crash);
  796. /*
  797. * vmbus_initiate_unload() is also called on crash and the crash can be
  798. * happening in an interrupt context, where scheduling is impossible.
  799. */
  800. if (!crash)
  801. wait_for_completion(&vmbus_connection.unload_event);
  802. else
  803. vmbus_wait_for_unload();
  804. }
  805. static void check_ready_for_resume_event(void)
  806. {
  807. /*
  808. * If all the old primary channels have been fixed up, then it's safe
  809. * to resume.
  810. */
  811. if (atomic_dec_and_test(&vmbus_connection.nr_chan_fixup_on_resume))
  812. complete(&vmbus_connection.ready_for_resume_event);
  813. }
  814. static void vmbus_setup_channel_state(struct vmbus_channel *channel,
  815. struct vmbus_channel_offer_channel *offer)
  816. {
  817. /*
  818. * Setup state for signalling the host.
  819. */
  820. channel->sig_event = VMBUS_EVENT_CONNECTION_ID;
  821. channel->is_dedicated_interrupt =
  822. (offer->is_dedicated_interrupt != 0);
  823. channel->sig_event = offer->connection_id;
  824. memcpy(&channel->offermsg, offer,
  825. sizeof(struct vmbus_channel_offer_channel));
  826. channel->monitor_grp = (u8)offer->monitorid / 32;
  827. channel->monitor_bit = (u8)offer->monitorid % 32;
  828. channel->device_id = hv_get_dev_type(channel);
  829. }
  830. /*
  831. * find_primary_channel_by_offer - Get the channel object given the new offer.
  832. * This is only used in the resume path of hibernation.
  833. */
  834. static struct vmbus_channel *
  835. find_primary_channel_by_offer(const struct vmbus_channel_offer_channel *offer)
  836. {
  837. struct vmbus_channel *channel = NULL, *iter;
  838. const guid_t *inst1, *inst2;
  839. /* Ignore sub-channel offers. */
  840. if (offer->offer.sub_channel_index != 0)
  841. return NULL;
  842. mutex_lock(&vmbus_connection.channel_mutex);
  843. list_for_each_entry(iter, &vmbus_connection.chn_list, listentry) {
  844. inst1 = &iter->offermsg.offer.if_instance;
  845. inst2 = &offer->offer.if_instance;
  846. if (guid_equal(inst1, inst2)) {
  847. channel = iter;
  848. break;
  849. }
  850. }
  851. mutex_unlock(&vmbus_connection.channel_mutex);
  852. return channel;
  853. }
  854. static bool vmbus_is_valid_offer(const struct vmbus_channel_offer_channel *offer)
  855. {
  856. const guid_t *guid = &offer->offer.if_type;
  857. u16 i;
  858. if (!hv_is_isolation_supported())
  859. return true;
  860. if (is_hvsock_offer(offer))
  861. return true;
  862. for (i = 0; i < ARRAY_SIZE(vmbus_devs); i++) {
  863. if (guid_equal(guid, &vmbus_devs[i].guid))
  864. return vmbus_devs[i].allowed_in_isolated;
  865. }
  866. return false;
  867. }
  868. /*
  869. * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
  870. *
  871. */
  872. static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
  873. {
  874. struct vmbus_channel_offer_channel *offer;
  875. struct vmbus_channel *oldchannel, *newchannel;
  876. size_t offer_sz;
  877. offer = (struct vmbus_channel_offer_channel *)hdr;
  878. trace_vmbus_onoffer(offer);
  879. if (!vmbus_is_valid_offer(offer)) {
  880. pr_err_ratelimited("Invalid offer %d from the host supporting isolation\n",
  881. offer->child_relid);
  882. atomic_dec(&vmbus_connection.offer_in_progress);
  883. return;
  884. }
  885. oldchannel = find_primary_channel_by_offer(offer);
  886. if (oldchannel != NULL) {
  887. /*
  888. * We're resuming from hibernation: all the sub-channel and
  889. * hv_sock channels we had before the hibernation should have
  890. * been cleaned up, and now we must be seeing a re-offered
  891. * primary channel that we had before the hibernation.
  892. */
  893. /*
  894. * { Initially: channel relid = INVALID_RELID,
  895. * channels[valid_relid] = NULL }
  896. *
  897. * CPU1 CPU2
  898. *
  899. * [vmbus_onoffer()] [vmbus_device_release()]
  900. *
  901. * LOCK channel_mutex LOCK channel_mutex
  902. * STORE channel relid = valid_relid LOAD r1 = channel relid
  903. * MAP_RELID channel if (r1 != INVALID_RELID)
  904. * UNLOCK channel_mutex UNMAP_RELID channel
  905. * UNLOCK channel_mutex
  906. *
  907. * Forbids: r1 == valid_relid &&
  908. * channels[valid_relid] == channel
  909. *
  910. * Note. r1 can be INVALID_RELID only for an hv_sock channel.
  911. * None of the hv_sock channels which were present before the
  912. * suspend are re-offered upon the resume. See the WARN_ON()
  913. * in hv_process_channel_removal().
  914. */
  915. mutex_lock(&vmbus_connection.channel_mutex);
  916. atomic_dec(&vmbus_connection.offer_in_progress);
  917. WARN_ON(oldchannel->offermsg.child_relid != INVALID_RELID);
  918. /* Fix up the relid. */
  919. oldchannel->offermsg.child_relid = offer->child_relid;
  920. offer_sz = sizeof(*offer);
  921. if (memcmp(offer, &oldchannel->offermsg, offer_sz) != 0) {
  922. /*
  923. * This is not an error, since the host can also change
  924. * the other field(s) of the offer, e.g. on WS RS5
  925. * (Build 17763), the offer->connection_id of the
  926. * Mellanox VF vmbus device can change when the host
  927. * reoffers the device upon resume.
  928. */
  929. pr_debug("vmbus offer changed: relid=%d\n",
  930. offer->child_relid);
  931. print_hex_dump_debug("Old vmbus offer: ",
  932. DUMP_PREFIX_OFFSET, 16, 4,
  933. &oldchannel->offermsg, offer_sz,
  934. false);
  935. print_hex_dump_debug("New vmbus offer: ",
  936. DUMP_PREFIX_OFFSET, 16, 4,
  937. offer, offer_sz, false);
  938. /* Fix up the old channel. */
  939. vmbus_setup_channel_state(oldchannel, offer);
  940. }
  941. /* Add the channel back to the array of channels. */
  942. vmbus_channel_map_relid(oldchannel);
  943. check_ready_for_resume_event();
  944. mutex_unlock(&vmbus_connection.channel_mutex);
  945. return;
  946. }
  947. /* Allocate the channel object and save this offer. */
  948. newchannel = alloc_channel();
  949. if (!newchannel) {
  950. vmbus_release_relid(offer->child_relid);
  951. atomic_dec(&vmbus_connection.offer_in_progress);
  952. pr_err("Unable to allocate channel object\n");
  953. return;
  954. }
  955. vmbus_setup_channel_state(newchannel, offer);
  956. vmbus_process_offer(newchannel);
  957. }
  958. static void check_ready_for_suspend_event(void)
  959. {
  960. /*
  961. * If all the sub-channels or hv_sock channels have been cleaned up,
  962. * then it's safe to suspend.
  963. */
  964. if (atomic_dec_and_test(&vmbus_connection.nr_chan_close_on_suspend))
  965. complete(&vmbus_connection.ready_for_suspend_event);
  966. }
  967. /*
  968. * vmbus_onoffer_rescind - Rescind offer handler.
  969. *
  970. * We queue a work item to process this offer synchronously
  971. */
  972. static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
  973. {
  974. struct vmbus_channel_rescind_offer *rescind;
  975. struct vmbus_channel *channel;
  976. struct device *dev;
  977. bool clean_up_chan_for_suspend;
  978. rescind = (struct vmbus_channel_rescind_offer *)hdr;
  979. trace_vmbus_onoffer_rescind(rescind);
  980. /*
  981. * The offer msg and the corresponding rescind msg
  982. * from the host are guranteed to be ordered -
  983. * offer comes in first and then the rescind.
  984. * Since we process these events in work elements,
  985. * and with preemption, we may end up processing
  986. * the events out of order. We rely on the synchronization
  987. * provided by offer_in_progress and by channel_mutex for
  988. * ordering these events:
  989. *
  990. * { Initially: offer_in_progress = 1 }
  991. *
  992. * CPU1 CPU2
  993. *
  994. * [vmbus_onoffer()] [vmbus_onoffer_rescind()]
  995. *
  996. * LOCK channel_mutex WAIT_ON offer_in_progress == 0
  997. * DECREMENT offer_in_progress LOCK channel_mutex
  998. * STORE channels[] LOAD channels[]
  999. * UNLOCK channel_mutex UNLOCK channel_mutex
  1000. *
  1001. * Forbids: CPU2's LOAD from *not* seeing CPU1's STORE
  1002. */
  1003. while (atomic_read(&vmbus_connection.offer_in_progress) != 0) {
  1004. /*
  1005. * We wait here until any channel offer is currently
  1006. * being processed.
  1007. */
  1008. msleep(1);
  1009. }
  1010. mutex_lock(&vmbus_connection.channel_mutex);
  1011. channel = relid2channel(rescind->child_relid);
  1012. if (channel != NULL) {
  1013. /*
  1014. * Guarantee that no other instance of vmbus_onoffer_rescind()
  1015. * has got a reference to the channel object. Synchronize on
  1016. * &vmbus_connection.channel_mutex.
  1017. */
  1018. if (channel->rescind_ref) {
  1019. mutex_unlock(&vmbus_connection.channel_mutex);
  1020. return;
  1021. }
  1022. channel->rescind_ref = true;
  1023. }
  1024. mutex_unlock(&vmbus_connection.channel_mutex);
  1025. if (channel == NULL) {
  1026. /*
  1027. * We failed in processing the offer message;
  1028. * we would have cleaned up the relid in that
  1029. * failure path.
  1030. */
  1031. return;
  1032. }
  1033. clean_up_chan_for_suspend = is_hvsock_channel(channel) ||
  1034. is_sub_channel(channel);
  1035. /*
  1036. * Before setting channel->rescind in vmbus_rescind_cleanup(), we
  1037. * should make sure the channel callback is not running any more.
  1038. */
  1039. vmbus_reset_channel_cb(channel);
  1040. /*
  1041. * Now wait for offer handling to complete.
  1042. */
  1043. vmbus_rescind_cleanup(channel);
  1044. while (READ_ONCE(channel->probe_done) == false) {
  1045. /*
  1046. * We wait here until any channel offer is currently
  1047. * being processed.
  1048. */
  1049. msleep(1);
  1050. }
  1051. /*
  1052. * At this point, the rescind handling can proceed safely.
  1053. */
  1054. if (channel->device_obj) {
  1055. if (channel->chn_rescind_callback) {
  1056. channel->chn_rescind_callback(channel);
  1057. if (clean_up_chan_for_suspend)
  1058. check_ready_for_suspend_event();
  1059. return;
  1060. }
  1061. /*
  1062. * We will have to unregister this device from the
  1063. * driver core.
  1064. */
  1065. dev = get_device(&channel->device_obj->device);
  1066. if (dev) {
  1067. vmbus_device_unregister(channel->device_obj);
  1068. put_device(dev);
  1069. }
  1070. } else if (channel->primary_channel != NULL) {
  1071. /*
  1072. * Sub-channel is being rescinded. Following is the channel
  1073. * close sequence when initiated from the driveri (refer to
  1074. * vmbus_close() for details):
  1075. * 1. Close all sub-channels first
  1076. * 2. Then close the primary channel.
  1077. */
  1078. mutex_lock(&vmbus_connection.channel_mutex);
  1079. if (channel->state == CHANNEL_OPEN_STATE) {
  1080. /*
  1081. * The channel is currently not open;
  1082. * it is safe for us to cleanup the channel.
  1083. */
  1084. hv_process_channel_removal(channel);
  1085. } else {
  1086. complete(&channel->rescind_event);
  1087. }
  1088. mutex_unlock(&vmbus_connection.channel_mutex);
  1089. }
  1090. /* The "channel" may have been freed. Do not access it any longer. */
  1091. if (clean_up_chan_for_suspend)
  1092. check_ready_for_suspend_event();
  1093. }
  1094. void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
  1095. {
  1096. BUG_ON(!is_hvsock_channel(channel));
  1097. /* We always get a rescind msg when a connection is closed. */
  1098. while (!READ_ONCE(channel->probe_done) || !READ_ONCE(channel->rescind))
  1099. msleep(1);
  1100. vmbus_device_unregister(channel->device_obj);
  1101. }
  1102. EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);
  1103. /*
  1104. * vmbus_onoffers_delivered -
  1105. * This is invoked when all offers have been delivered.
  1106. *
  1107. * Nothing to do here.
  1108. */
  1109. static void vmbus_onoffers_delivered(
  1110. struct vmbus_channel_message_header *hdr)
  1111. {
  1112. }
  1113. /*
  1114. * vmbus_onopen_result - Open result handler.
  1115. *
  1116. * This is invoked when we received a response to our channel open request.
  1117. * Find the matching request, copy the response and signal the requesting
  1118. * thread.
  1119. */
  1120. static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
  1121. {
  1122. struct vmbus_channel_open_result *result;
  1123. struct vmbus_channel_msginfo *msginfo;
  1124. struct vmbus_channel_message_header *requestheader;
  1125. struct vmbus_channel_open_channel *openmsg;
  1126. unsigned long flags;
  1127. result = (struct vmbus_channel_open_result *)hdr;
  1128. trace_vmbus_onopen_result(result);
  1129. /*
  1130. * Find the open msg, copy the result and signal/unblock the wait event
  1131. */
  1132. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1133. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1134. msglistentry) {
  1135. requestheader =
  1136. (struct vmbus_channel_message_header *)msginfo->msg;
  1137. if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
  1138. openmsg =
  1139. (struct vmbus_channel_open_channel *)msginfo->msg;
  1140. if (openmsg->child_relid == result->child_relid &&
  1141. openmsg->openid == result->openid) {
  1142. memcpy(&msginfo->response.open_result,
  1143. result,
  1144. sizeof(
  1145. struct vmbus_channel_open_result));
  1146. complete(&msginfo->waitevent);
  1147. break;
  1148. }
  1149. }
  1150. }
  1151. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1152. }
  1153. /*
  1154. * vmbus_ongpadl_created - GPADL created handler.
  1155. *
  1156. * This is invoked when we received a response to our gpadl create request.
  1157. * Find the matching request, copy the response and signal the requesting
  1158. * thread.
  1159. */
  1160. static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
  1161. {
  1162. struct vmbus_channel_gpadl_created *gpadlcreated;
  1163. struct vmbus_channel_msginfo *msginfo;
  1164. struct vmbus_channel_message_header *requestheader;
  1165. struct vmbus_channel_gpadl_header *gpadlheader;
  1166. unsigned long flags;
  1167. gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
  1168. trace_vmbus_ongpadl_created(gpadlcreated);
  1169. /*
  1170. * Find the establish msg, copy the result and signal/unblock the wait
  1171. * event
  1172. */
  1173. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1174. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1175. msglistentry) {
  1176. requestheader =
  1177. (struct vmbus_channel_message_header *)msginfo->msg;
  1178. if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
  1179. gpadlheader =
  1180. (struct vmbus_channel_gpadl_header *)requestheader;
  1181. if ((gpadlcreated->child_relid ==
  1182. gpadlheader->child_relid) &&
  1183. (gpadlcreated->gpadl == gpadlheader->gpadl)) {
  1184. memcpy(&msginfo->response.gpadl_created,
  1185. gpadlcreated,
  1186. sizeof(
  1187. struct vmbus_channel_gpadl_created));
  1188. complete(&msginfo->waitevent);
  1189. break;
  1190. }
  1191. }
  1192. }
  1193. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1194. }
  1195. /*
  1196. * vmbus_onmodifychannel_response - Modify Channel response handler.
  1197. *
  1198. * This is invoked when we received a response to our channel modify request.
  1199. * Find the matching request, copy the response and signal the requesting thread.
  1200. */
  1201. static void vmbus_onmodifychannel_response(struct vmbus_channel_message_header *hdr)
  1202. {
  1203. struct vmbus_channel_modifychannel_response *response;
  1204. struct vmbus_channel_msginfo *msginfo;
  1205. unsigned long flags;
  1206. response = (struct vmbus_channel_modifychannel_response *)hdr;
  1207. trace_vmbus_onmodifychannel_response(response);
  1208. /*
  1209. * Find the modify msg, copy the response and signal/unblock the wait event.
  1210. */
  1211. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1212. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, msglistentry) {
  1213. struct vmbus_channel_message_header *responseheader =
  1214. (struct vmbus_channel_message_header *)msginfo->msg;
  1215. if (responseheader->msgtype == CHANNELMSG_MODIFYCHANNEL) {
  1216. struct vmbus_channel_modifychannel *modifymsg;
  1217. modifymsg = (struct vmbus_channel_modifychannel *)msginfo->msg;
  1218. if (modifymsg->child_relid == response->child_relid) {
  1219. memcpy(&msginfo->response.modify_response, response,
  1220. sizeof(*response));
  1221. complete(&msginfo->waitevent);
  1222. break;
  1223. }
  1224. }
  1225. }
  1226. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1227. }
  1228. /*
  1229. * vmbus_ongpadl_torndown - GPADL torndown handler.
  1230. *
  1231. * This is invoked when we received a response to our gpadl teardown request.
  1232. * Find the matching request, copy the response and signal the requesting
  1233. * thread.
  1234. */
  1235. static void vmbus_ongpadl_torndown(
  1236. struct vmbus_channel_message_header *hdr)
  1237. {
  1238. struct vmbus_channel_gpadl_torndown *gpadl_torndown;
  1239. struct vmbus_channel_msginfo *msginfo;
  1240. struct vmbus_channel_message_header *requestheader;
  1241. struct vmbus_channel_gpadl_teardown *gpadl_teardown;
  1242. unsigned long flags;
  1243. gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
  1244. trace_vmbus_ongpadl_torndown(gpadl_torndown);
  1245. /*
  1246. * Find the open msg, copy the result and signal/unblock the wait event
  1247. */
  1248. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1249. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1250. msglistentry) {
  1251. requestheader =
  1252. (struct vmbus_channel_message_header *)msginfo->msg;
  1253. if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
  1254. gpadl_teardown =
  1255. (struct vmbus_channel_gpadl_teardown *)requestheader;
  1256. if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
  1257. memcpy(&msginfo->response.gpadl_torndown,
  1258. gpadl_torndown,
  1259. sizeof(
  1260. struct vmbus_channel_gpadl_torndown));
  1261. complete(&msginfo->waitevent);
  1262. break;
  1263. }
  1264. }
  1265. }
  1266. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1267. }
  1268. /*
  1269. * vmbus_onversion_response - Version response handler
  1270. *
  1271. * This is invoked when we received a response to our initiate contact request.
  1272. * Find the matching request, copy the response and signal the requesting
  1273. * thread.
  1274. */
  1275. static void vmbus_onversion_response(
  1276. struct vmbus_channel_message_header *hdr)
  1277. {
  1278. struct vmbus_channel_msginfo *msginfo;
  1279. struct vmbus_channel_message_header *requestheader;
  1280. struct vmbus_channel_version_response *version_response;
  1281. unsigned long flags;
  1282. version_response = (struct vmbus_channel_version_response *)hdr;
  1283. trace_vmbus_onversion_response(version_response);
  1284. spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
  1285. list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
  1286. msglistentry) {
  1287. requestheader =
  1288. (struct vmbus_channel_message_header *)msginfo->msg;
  1289. if (requestheader->msgtype ==
  1290. CHANNELMSG_INITIATE_CONTACT) {
  1291. memcpy(&msginfo->response.version_response,
  1292. version_response,
  1293. sizeof(struct vmbus_channel_version_response));
  1294. complete(&msginfo->waitevent);
  1295. }
  1296. }
  1297. spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
  1298. }
  1299. /* Channel message dispatch table */
  1300. const struct vmbus_channel_message_table_entry
  1301. channel_message_table[CHANNELMSG_COUNT] = {
  1302. { CHANNELMSG_INVALID, 0, NULL, 0},
  1303. { CHANNELMSG_OFFERCHANNEL, 0, vmbus_onoffer,
  1304. sizeof(struct vmbus_channel_offer_channel)},
  1305. { CHANNELMSG_RESCIND_CHANNELOFFER, 0, vmbus_onoffer_rescind,
  1306. sizeof(struct vmbus_channel_rescind_offer) },
  1307. { CHANNELMSG_REQUESTOFFERS, 0, NULL, 0},
  1308. { CHANNELMSG_ALLOFFERS_DELIVERED, 1, vmbus_onoffers_delivered, 0},
  1309. { CHANNELMSG_OPENCHANNEL, 0, NULL, 0},
  1310. { CHANNELMSG_OPENCHANNEL_RESULT, 1, vmbus_onopen_result,
  1311. sizeof(struct vmbus_channel_open_result)},
  1312. { CHANNELMSG_CLOSECHANNEL, 0, NULL, 0},
  1313. { CHANNELMSG_GPADL_HEADER, 0, NULL, 0},
  1314. { CHANNELMSG_GPADL_BODY, 0, NULL, 0},
  1315. { CHANNELMSG_GPADL_CREATED, 1, vmbus_ongpadl_created,
  1316. sizeof(struct vmbus_channel_gpadl_created)},
  1317. { CHANNELMSG_GPADL_TEARDOWN, 0, NULL, 0},
  1318. { CHANNELMSG_GPADL_TORNDOWN, 1, vmbus_ongpadl_torndown,
  1319. sizeof(struct vmbus_channel_gpadl_torndown) },
  1320. { CHANNELMSG_RELID_RELEASED, 0, NULL, 0},
  1321. { CHANNELMSG_INITIATE_CONTACT, 0, NULL, 0},
  1322. { CHANNELMSG_VERSION_RESPONSE, 1, vmbus_onversion_response,
  1323. sizeof(struct vmbus_channel_version_response)},
  1324. { CHANNELMSG_UNLOAD, 0, NULL, 0},
  1325. { CHANNELMSG_UNLOAD_RESPONSE, 1, vmbus_unload_response, 0},
  1326. { CHANNELMSG_18, 0, NULL, 0},
  1327. { CHANNELMSG_19, 0, NULL, 0},
  1328. { CHANNELMSG_20, 0, NULL, 0},
  1329. { CHANNELMSG_TL_CONNECT_REQUEST, 0, NULL, 0},
  1330. { CHANNELMSG_MODIFYCHANNEL, 0, NULL, 0},
  1331. { CHANNELMSG_TL_CONNECT_RESULT, 0, NULL, 0},
  1332. { CHANNELMSG_MODIFYCHANNEL_RESPONSE, 1, vmbus_onmodifychannel_response,
  1333. sizeof(struct vmbus_channel_modifychannel_response)},
  1334. };
  1335. /*
  1336. * vmbus_onmessage - Handler for channel protocol messages.
  1337. *
  1338. * This is invoked in the vmbus worker thread context.
  1339. */
  1340. void vmbus_onmessage(struct vmbus_channel_message_header *hdr)
  1341. {
  1342. trace_vmbus_on_message(hdr);
  1343. /*
  1344. * vmbus_on_msg_dpc() makes sure the hdr->msgtype here can not go
  1345. * out of bound and the message_handler pointer can not be NULL.
  1346. */
  1347. channel_message_table[hdr->msgtype].message_handler(hdr);
  1348. }
  1349. /*
  1350. * vmbus_request_offers - Send a request to get all our pending offers.
  1351. */
  1352. int vmbus_request_offers(void)
  1353. {
  1354. struct vmbus_channel_message_header *msg;
  1355. struct vmbus_channel_msginfo *msginfo;
  1356. int ret;
  1357. msginfo = kzalloc(sizeof(*msginfo) +
  1358. sizeof(struct vmbus_channel_message_header),
  1359. GFP_KERNEL);
  1360. if (!msginfo)
  1361. return -ENOMEM;
  1362. msg = (struct vmbus_channel_message_header *)msginfo->msg;
  1363. msg->msgtype = CHANNELMSG_REQUESTOFFERS;
  1364. ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
  1365. true);
  1366. trace_vmbus_request_offers(ret);
  1367. if (ret != 0) {
  1368. pr_err("Unable to request offers - %d\n", ret);
  1369. goto cleanup;
  1370. }
  1371. cleanup:
  1372. kfree(msginfo);
  1373. return ret;
  1374. }
  1375. void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
  1376. void (*sc_cr_cb)(struct vmbus_channel *new_sc))
  1377. {
  1378. primary_channel->sc_creation_callback = sc_cr_cb;
  1379. }
  1380. EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);
  1381. void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
  1382. void (*chn_rescind_cb)(struct vmbus_channel *))
  1383. {
  1384. channel->chn_rescind_callback = chn_rescind_cb;
  1385. }
  1386. EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);