ring_buffer.c 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654
  1. // SPDX-License-Identifier: GPL-2.0-only
  2. /*
  3. *
  4. * Copyright (c) 2009, Microsoft Corporation.
  5. *
  6. * Authors:
  7. * Haiyang Zhang <haiyangz@microsoft.com>
  8. * Hank Janssen <hjanssen@microsoft.com>
  9. * K. Y. Srinivasan <kys@microsoft.com>
  10. */
  11. #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  12. #include <linux/kernel.h>
  13. #include <linux/mm.h>
  14. #include <linux/hyperv.h>
  15. #include <linux/uio.h>
  16. #include <linux/vmalloc.h>
  17. #include <linux/slab.h>
  18. #include <linux/prefetch.h>
  19. #include <linux/io.h>
  20. #include <asm/mshyperv.h>
  21. #include "hyperv_vmbus.h"
  22. #define VMBUS_PKT_TRAILER 8
  23. /*
  24. * When we write to the ring buffer, check if the host needs to
  25. * be signaled. Here is the details of this protocol:
  26. *
  27. * 1. The host guarantees that while it is draining the
  28. * ring buffer, it will set the interrupt_mask to
  29. * indicate it does not need to be interrupted when
  30. * new data is placed.
  31. *
  32. * 2. The host guarantees that it will completely drain
  33. * the ring buffer before exiting the read loop. Further,
  34. * once the ring buffer is empty, it will clear the
  35. * interrupt_mask and re-check to see if new data has
  36. * arrived.
  37. *
  38. * KYS: Oct. 30, 2016:
  39. * It looks like Windows hosts have logic to deal with DOS attacks that
  40. * can be triggered if it receives interrupts when it is not expecting
  41. * the interrupt. The host expects interrupts only when the ring
  42. * transitions from empty to non-empty (or full to non full on the guest
  43. * to host ring).
  44. * So, base the signaling decision solely on the ring state until the
  45. * host logic is fixed.
  46. */
  47. static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel)
  48. {
  49. struct hv_ring_buffer_info *rbi = &channel->outbound;
  50. virt_mb();
  51. if (READ_ONCE(rbi->ring_buffer->interrupt_mask))
  52. return;
  53. /* check interrupt_mask before read_index */
  54. virt_rmb();
  55. /*
  56. * This is the only case we need to signal when the
  57. * ring transitions from being empty to non-empty.
  58. */
  59. if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) {
  60. ++channel->intr_out_empty;
  61. vmbus_setevent(channel);
  62. }
  63. }
  64. /* Get the next write location for the specified ring buffer. */
  65. static inline u32
  66. hv_get_next_write_location(struct hv_ring_buffer_info *ring_info)
  67. {
  68. u32 next = ring_info->ring_buffer->write_index;
  69. return next;
  70. }
  71. /* Set the next write location for the specified ring buffer. */
  72. static inline void
  73. hv_set_next_write_location(struct hv_ring_buffer_info *ring_info,
  74. u32 next_write_location)
  75. {
  76. ring_info->ring_buffer->write_index = next_write_location;
  77. }
  78. /* Get the size of the ring buffer. */
  79. static inline u32
  80. hv_get_ring_buffersize(const struct hv_ring_buffer_info *ring_info)
  81. {
  82. return ring_info->ring_datasize;
  83. }
  84. /* Get the read and write indices as u64 of the specified ring buffer. */
  85. static inline u64
  86. hv_get_ring_bufferindices(struct hv_ring_buffer_info *ring_info)
  87. {
  88. return (u64)ring_info->ring_buffer->write_index << 32;
  89. }
  90. /*
  91. * Helper routine to copy from source to ring buffer.
  92. * Assume there is enough room. Handles wrap-around in dest case only!!
  93. */
  94. static u32 hv_copyto_ringbuffer(
  95. struct hv_ring_buffer_info *ring_info,
  96. u32 start_write_offset,
  97. const void *src,
  98. u32 srclen)
  99. {
  100. void *ring_buffer = hv_get_ring_buffer(ring_info);
  101. u32 ring_buffer_size = hv_get_ring_buffersize(ring_info);
  102. memcpy(ring_buffer + start_write_offset, src, srclen);
  103. start_write_offset += srclen;
  104. if (start_write_offset >= ring_buffer_size)
  105. start_write_offset -= ring_buffer_size;
  106. return start_write_offset;
  107. }
  108. /*
  109. *
  110. * hv_get_ringbuffer_availbytes()
  111. *
  112. * Get number of bytes available to read and to write to
  113. * for the specified ring buffer
  114. */
  115. static void
  116. hv_get_ringbuffer_availbytes(const struct hv_ring_buffer_info *rbi,
  117. u32 *read, u32 *write)
  118. {
  119. u32 read_loc, write_loc, dsize;
  120. /* Capture the read/write indices before they changed */
  121. read_loc = READ_ONCE(rbi->ring_buffer->read_index);
  122. write_loc = READ_ONCE(rbi->ring_buffer->write_index);
  123. dsize = rbi->ring_datasize;
  124. *write = write_loc >= read_loc ? dsize - (write_loc - read_loc) :
  125. read_loc - write_loc;
  126. *read = dsize - *write;
  127. }
  128. /* Get various debug metrics for the specified ring buffer. */
  129. int hv_ringbuffer_get_debuginfo(struct hv_ring_buffer_info *ring_info,
  130. struct hv_ring_buffer_debug_info *debug_info)
  131. {
  132. u32 bytes_avail_towrite;
  133. u32 bytes_avail_toread;
  134. mutex_lock(&ring_info->ring_buffer_mutex);
  135. if (!ring_info->ring_buffer) {
  136. mutex_unlock(&ring_info->ring_buffer_mutex);
  137. return -EINVAL;
  138. }
  139. hv_get_ringbuffer_availbytes(ring_info,
  140. &bytes_avail_toread,
  141. &bytes_avail_towrite);
  142. debug_info->bytes_avail_toread = bytes_avail_toread;
  143. debug_info->bytes_avail_towrite = bytes_avail_towrite;
  144. debug_info->current_read_index = ring_info->ring_buffer->read_index;
  145. debug_info->current_write_index = ring_info->ring_buffer->write_index;
  146. debug_info->current_interrupt_mask
  147. = ring_info->ring_buffer->interrupt_mask;
  148. mutex_unlock(&ring_info->ring_buffer_mutex);
  149. return 0;
  150. }
  151. EXPORT_SYMBOL_GPL(hv_ringbuffer_get_debuginfo);
  152. /* Initialize a channel's ring buffer info mutex locks */
  153. void hv_ringbuffer_pre_init(struct vmbus_channel *channel)
  154. {
  155. mutex_init(&channel->inbound.ring_buffer_mutex);
  156. mutex_init(&channel->outbound.ring_buffer_mutex);
  157. }
  158. /* Initialize the ring buffer. */
  159. int hv_ringbuffer_init(struct hv_ring_buffer_info *ring_info,
  160. struct page *pages, u32 page_cnt, u32 max_pkt_size)
  161. {
  162. struct page **pages_wraparound;
  163. int i;
  164. BUILD_BUG_ON((sizeof(struct hv_ring_buffer) != PAGE_SIZE));
  165. /*
  166. * First page holds struct hv_ring_buffer, do wraparound mapping for
  167. * the rest.
  168. */
  169. pages_wraparound = kcalloc(page_cnt * 2 - 1,
  170. sizeof(struct page *),
  171. GFP_KERNEL);
  172. if (!pages_wraparound)
  173. return -ENOMEM;
  174. pages_wraparound[0] = pages;
  175. for (i = 0; i < 2 * (page_cnt - 1); i++)
  176. pages_wraparound[i + 1] =
  177. &pages[i % (page_cnt - 1) + 1];
  178. ring_info->ring_buffer = (struct hv_ring_buffer *)
  179. vmap(pages_wraparound, page_cnt * 2 - 1, VM_MAP,
  180. pgprot_decrypted(PAGE_KERNEL));
  181. kfree(pages_wraparound);
  182. if (!ring_info->ring_buffer)
  183. return -ENOMEM;
  184. /*
  185. * Ensure the header page is zero'ed since
  186. * encryption status may have changed.
  187. */
  188. memset(ring_info->ring_buffer, 0, HV_HYP_PAGE_SIZE);
  189. ring_info->ring_buffer->read_index =
  190. ring_info->ring_buffer->write_index = 0;
  191. /* Set the feature bit for enabling flow control. */
  192. ring_info->ring_buffer->feature_bits.value = 1;
  193. ring_info->ring_size = page_cnt << PAGE_SHIFT;
  194. ring_info->ring_size_div10_reciprocal =
  195. reciprocal_value(ring_info->ring_size / 10);
  196. ring_info->ring_datasize = ring_info->ring_size -
  197. sizeof(struct hv_ring_buffer);
  198. ring_info->priv_read_index = 0;
  199. /* Initialize buffer that holds copies of incoming packets */
  200. if (max_pkt_size) {
  201. ring_info->pkt_buffer = kzalloc(max_pkt_size, GFP_KERNEL);
  202. if (!ring_info->pkt_buffer)
  203. return -ENOMEM;
  204. ring_info->pkt_buffer_size = max_pkt_size;
  205. }
  206. spin_lock_init(&ring_info->ring_lock);
  207. return 0;
  208. }
  209. /* Cleanup the ring buffer. */
  210. void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info)
  211. {
  212. mutex_lock(&ring_info->ring_buffer_mutex);
  213. vunmap(ring_info->ring_buffer);
  214. ring_info->ring_buffer = NULL;
  215. mutex_unlock(&ring_info->ring_buffer_mutex);
  216. kfree(ring_info->pkt_buffer);
  217. ring_info->pkt_buffer = NULL;
  218. ring_info->pkt_buffer_size = 0;
  219. }
  220. /*
  221. * Check if the ring buffer spinlock is available to take or not; used on
  222. * atomic contexts, like panic path (see the Hyper-V framebuffer driver).
  223. */
  224. bool hv_ringbuffer_spinlock_busy(struct vmbus_channel *channel)
  225. {
  226. struct hv_ring_buffer_info *rinfo = &channel->outbound;
  227. return spin_is_locked(&rinfo->ring_lock);
  228. }
  229. EXPORT_SYMBOL_GPL(hv_ringbuffer_spinlock_busy);
  230. /* Write to the ring buffer. */
  231. int hv_ringbuffer_write(struct vmbus_channel *channel,
  232. const struct kvec *kv_list, u32 kv_count,
  233. u64 requestid, u64 *trans_id)
  234. {
  235. int i;
  236. u32 bytes_avail_towrite;
  237. u32 totalbytes_towrite = sizeof(u64);
  238. u32 next_write_location;
  239. u32 old_write;
  240. u64 prev_indices;
  241. unsigned long flags;
  242. struct hv_ring_buffer_info *outring_info = &channel->outbound;
  243. struct vmpacket_descriptor *desc = kv_list[0].iov_base;
  244. u64 __trans_id, rqst_id = VMBUS_NO_RQSTOR;
  245. if (channel->rescind)
  246. return -ENODEV;
  247. for (i = 0; i < kv_count; i++)
  248. totalbytes_towrite += kv_list[i].iov_len;
  249. spin_lock_irqsave(&outring_info->ring_lock, flags);
  250. bytes_avail_towrite = hv_get_bytes_to_write(outring_info);
  251. /*
  252. * If there is only room for the packet, assume it is full.
  253. * Otherwise, the next time around, we think the ring buffer
  254. * is empty since the read index == write index.
  255. */
  256. if (bytes_avail_towrite <= totalbytes_towrite) {
  257. ++channel->out_full_total;
  258. if (!channel->out_full_flag) {
  259. ++channel->out_full_first;
  260. channel->out_full_flag = true;
  261. }
  262. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  263. return -EAGAIN;
  264. }
  265. channel->out_full_flag = false;
  266. /* Write to the ring buffer */
  267. next_write_location = hv_get_next_write_location(outring_info);
  268. old_write = next_write_location;
  269. for (i = 0; i < kv_count; i++) {
  270. next_write_location = hv_copyto_ringbuffer(outring_info,
  271. next_write_location,
  272. kv_list[i].iov_base,
  273. kv_list[i].iov_len);
  274. }
  275. /*
  276. * Allocate the request ID after the data has been copied into the
  277. * ring buffer. Once this request ID is allocated, the completion
  278. * path could find the data and free it.
  279. */
  280. if (desc->flags == VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED) {
  281. if (channel->next_request_id_callback != NULL) {
  282. rqst_id = channel->next_request_id_callback(channel, requestid);
  283. if (rqst_id == VMBUS_RQST_ERROR) {
  284. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  285. return -EAGAIN;
  286. }
  287. }
  288. }
  289. desc = hv_get_ring_buffer(outring_info) + old_write;
  290. __trans_id = (rqst_id == VMBUS_NO_RQSTOR) ? requestid : rqst_id;
  291. /*
  292. * Ensure the compiler doesn't generate code that reads the value of
  293. * the transaction ID from the ring buffer, which is shared with the
  294. * Hyper-V host and subject to being changed at any time.
  295. */
  296. WRITE_ONCE(desc->trans_id, __trans_id);
  297. if (trans_id)
  298. *trans_id = __trans_id;
  299. /* Set previous packet start */
  300. prev_indices = hv_get_ring_bufferindices(outring_info);
  301. next_write_location = hv_copyto_ringbuffer(outring_info,
  302. next_write_location,
  303. &prev_indices,
  304. sizeof(u64));
  305. /* Issue a full memory barrier before updating the write index */
  306. virt_mb();
  307. /* Now, update the write location */
  308. hv_set_next_write_location(outring_info, next_write_location);
  309. spin_unlock_irqrestore(&outring_info->ring_lock, flags);
  310. hv_signal_on_write(old_write, channel);
  311. if (channel->rescind) {
  312. if (rqst_id != VMBUS_NO_RQSTOR) {
  313. /* Reclaim request ID to avoid leak of IDs */
  314. if (channel->request_addr_callback != NULL)
  315. channel->request_addr_callback(channel, rqst_id);
  316. }
  317. return -ENODEV;
  318. }
  319. return 0;
  320. }
  321. int hv_ringbuffer_read(struct vmbus_channel *channel,
  322. void *buffer, u32 buflen, u32 *buffer_actual_len,
  323. u64 *requestid, bool raw)
  324. {
  325. struct vmpacket_descriptor *desc;
  326. u32 packetlen, offset;
  327. if (unlikely(buflen == 0))
  328. return -EINVAL;
  329. *buffer_actual_len = 0;
  330. *requestid = 0;
  331. /* Make sure there is something to read */
  332. desc = hv_pkt_iter_first(channel);
  333. if (desc == NULL) {
  334. /*
  335. * No error is set when there is even no header, drivers are
  336. * supposed to analyze buffer_actual_len.
  337. */
  338. return 0;
  339. }
  340. offset = raw ? 0 : (desc->offset8 << 3);
  341. packetlen = (desc->len8 << 3) - offset;
  342. *buffer_actual_len = packetlen;
  343. *requestid = desc->trans_id;
  344. if (unlikely(packetlen > buflen))
  345. return -ENOBUFS;
  346. /* since ring is double mapped, only one copy is necessary */
  347. memcpy(buffer, (const char *)desc + offset, packetlen);
  348. /* Advance ring index to next packet descriptor */
  349. __hv_pkt_iter_next(channel, desc);
  350. /* Notify host of update */
  351. hv_pkt_iter_close(channel);
  352. return 0;
  353. }
  354. /*
  355. * Determine number of bytes available in ring buffer after
  356. * the current iterator (priv_read_index) location.
  357. *
  358. * This is similar to hv_get_bytes_to_read but with private
  359. * read index instead.
  360. */
  361. static u32 hv_pkt_iter_avail(const struct hv_ring_buffer_info *rbi)
  362. {
  363. u32 priv_read_loc = rbi->priv_read_index;
  364. u32 write_loc;
  365. /*
  366. * The Hyper-V host writes the packet data, then uses
  367. * store_release() to update the write_index. Use load_acquire()
  368. * here to prevent loads of the packet data from being re-ordered
  369. * before the read of the write_index and potentially getting
  370. * stale data.
  371. */
  372. write_loc = virt_load_acquire(&rbi->ring_buffer->write_index);
  373. if (write_loc >= priv_read_loc)
  374. return write_loc - priv_read_loc;
  375. else
  376. return (rbi->ring_datasize - priv_read_loc) + write_loc;
  377. }
  378. /*
  379. * Get first vmbus packet from ring buffer after read_index
  380. *
  381. * If ring buffer is empty, returns NULL and no other action needed.
  382. */
  383. struct vmpacket_descriptor *hv_pkt_iter_first(struct vmbus_channel *channel)
  384. {
  385. struct hv_ring_buffer_info *rbi = &channel->inbound;
  386. struct vmpacket_descriptor *desc, *desc_copy;
  387. u32 bytes_avail, pkt_len, pkt_offset;
  388. hv_debug_delay_test(channel, MESSAGE_DELAY);
  389. bytes_avail = hv_pkt_iter_avail(rbi);
  390. if (bytes_avail < sizeof(struct vmpacket_descriptor))
  391. return NULL;
  392. bytes_avail = min(rbi->pkt_buffer_size, bytes_avail);
  393. desc = (struct vmpacket_descriptor *)(hv_get_ring_buffer(rbi) + rbi->priv_read_index);
  394. /*
  395. * Ensure the compiler does not use references to incoming Hyper-V values (which
  396. * could change at any moment) when reading local variables later in the code
  397. */
  398. pkt_len = READ_ONCE(desc->len8) << 3;
  399. pkt_offset = READ_ONCE(desc->offset8) << 3;
  400. /*
  401. * If pkt_len is invalid, set it to the smaller of hv_pkt_iter_avail() and
  402. * rbi->pkt_buffer_size
  403. */
  404. if (pkt_len < sizeof(struct vmpacket_descriptor) || pkt_len > bytes_avail)
  405. pkt_len = bytes_avail;
  406. /*
  407. * If pkt_offset is invalid, arbitrarily set it to
  408. * the size of vmpacket_descriptor
  409. */
  410. if (pkt_offset < sizeof(struct vmpacket_descriptor) || pkt_offset > pkt_len)
  411. pkt_offset = sizeof(struct vmpacket_descriptor);
  412. /* Copy the Hyper-V packet out of the ring buffer */
  413. desc_copy = (struct vmpacket_descriptor *)rbi->pkt_buffer;
  414. memcpy(desc_copy, desc, pkt_len);
  415. /*
  416. * Hyper-V could still change len8 and offset8 after the earlier read.
  417. * Ensure that desc_copy has legal values for len8 and offset8 that
  418. * are consistent with the copy we just made
  419. */
  420. desc_copy->len8 = pkt_len >> 3;
  421. desc_copy->offset8 = pkt_offset >> 3;
  422. return desc_copy;
  423. }
  424. EXPORT_SYMBOL_GPL(hv_pkt_iter_first);
  425. /*
  426. * Get next vmbus packet from ring buffer.
  427. *
  428. * Advances the current location (priv_read_index) and checks for more
  429. * data. If the end of the ring buffer is reached, then return NULL.
  430. */
  431. struct vmpacket_descriptor *
  432. __hv_pkt_iter_next(struct vmbus_channel *channel,
  433. const struct vmpacket_descriptor *desc)
  434. {
  435. struct hv_ring_buffer_info *rbi = &channel->inbound;
  436. u32 packetlen = desc->len8 << 3;
  437. u32 dsize = rbi->ring_datasize;
  438. hv_debug_delay_test(channel, MESSAGE_DELAY);
  439. /* bump offset to next potential packet */
  440. rbi->priv_read_index += packetlen + VMBUS_PKT_TRAILER;
  441. if (rbi->priv_read_index >= dsize)
  442. rbi->priv_read_index -= dsize;
  443. /* more data? */
  444. return hv_pkt_iter_first(channel);
  445. }
  446. EXPORT_SYMBOL_GPL(__hv_pkt_iter_next);
  447. /* How many bytes were read in this iterator cycle */
  448. static u32 hv_pkt_iter_bytes_read(const struct hv_ring_buffer_info *rbi,
  449. u32 start_read_index)
  450. {
  451. if (rbi->priv_read_index >= start_read_index)
  452. return rbi->priv_read_index - start_read_index;
  453. else
  454. return rbi->ring_datasize - start_read_index +
  455. rbi->priv_read_index;
  456. }
  457. /*
  458. * Update host ring buffer after iterating over packets. If the host has
  459. * stopped queuing new entries because it found the ring buffer full, and
  460. * sufficient space is being freed up, signal the host. But be careful to
  461. * only signal the host when necessary, both for performance reasons and
  462. * because Hyper-V protects itself by throttling guests that signal
  463. * inappropriately.
  464. *
  465. * Determining when to signal is tricky. There are three key data inputs
  466. * that must be handled in this order to avoid race conditions:
  467. *
  468. * 1. Update the read_index
  469. * 2. Read the pending_send_sz
  470. * 3. Read the current write_index
  471. *
  472. * The interrupt_mask is not used to determine when to signal. The
  473. * interrupt_mask is used only on the guest->host ring buffer when
  474. * sending requests to the host. The host does not use it on the host->
  475. * guest ring buffer to indicate whether it should be signaled.
  476. */
  477. void hv_pkt_iter_close(struct vmbus_channel *channel)
  478. {
  479. struct hv_ring_buffer_info *rbi = &channel->inbound;
  480. u32 curr_write_sz, pending_sz, bytes_read, start_read_index;
  481. /*
  482. * Make sure all reads are done before we update the read index since
  483. * the writer may start writing to the read area once the read index
  484. * is updated.
  485. */
  486. virt_rmb();
  487. start_read_index = rbi->ring_buffer->read_index;
  488. rbi->ring_buffer->read_index = rbi->priv_read_index;
  489. /*
  490. * Older versions of Hyper-V (before WS2102 and Win8) do not
  491. * implement pending_send_sz and simply poll if the host->guest
  492. * ring buffer is full. No signaling is needed or expected.
  493. */
  494. if (!rbi->ring_buffer->feature_bits.feat_pending_send_sz)
  495. return;
  496. /*
  497. * Issue a full memory barrier before making the signaling decision.
  498. * If reading pending_send_sz were to be reordered and happen
  499. * before we commit the new read_index, a race could occur. If the
  500. * host were to set the pending_send_sz after we have sampled
  501. * pending_send_sz, and the ring buffer blocks before we commit the
  502. * read index, we could miss sending the interrupt. Issue a full
  503. * memory barrier to address this.
  504. */
  505. virt_mb();
  506. /*
  507. * If the pending_send_sz is zero, then the ring buffer is not
  508. * blocked and there is no need to signal. This is far by the
  509. * most common case, so exit quickly for best performance.
  510. */
  511. pending_sz = READ_ONCE(rbi->ring_buffer->pending_send_sz);
  512. if (!pending_sz)
  513. return;
  514. /*
  515. * Ensure the read of write_index in hv_get_bytes_to_write()
  516. * happens after the read of pending_send_sz.
  517. */
  518. virt_rmb();
  519. curr_write_sz = hv_get_bytes_to_write(rbi);
  520. bytes_read = hv_pkt_iter_bytes_read(rbi, start_read_index);
  521. /*
  522. * We want to signal the host only if we're transitioning
  523. * from a "not enough free space" state to a "enough free
  524. * space" state. For example, it's possible that this function
  525. * could run and free up enough space to signal the host, and then
  526. * run again and free up additional space before the host has a
  527. * chance to clear the pending_send_sz. The 2nd invocation would
  528. * be a null transition from "enough free space" to "enough free
  529. * space", which doesn't warrant a signal.
  530. *
  531. * Exactly filling the ring buffer is treated as "not enough
  532. * space". The ring buffer always must have at least one byte
  533. * empty so the empty and full conditions are distinguishable.
  534. * hv_get_bytes_to_write() doesn't fully tell the truth in
  535. * this regard.
  536. *
  537. * So first check if we were in the "enough free space" state
  538. * before we began the iteration. If so, the host was not
  539. * blocked, and there's no need to signal.
  540. */
  541. if (curr_write_sz - bytes_read > pending_sz)
  542. return;
  543. /*
  544. * Similarly, if the new state is "not enough space", then
  545. * there's no need to signal.
  546. */
  547. if (curr_write_sz <= pending_sz)
  548. return;
  549. ++channel->intr_in_full;
  550. vmbus_setevent(channel);
  551. }
  552. EXPORT_SYMBOL_GPL(hv_pkt_iter_close);