Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Drivers: hv: vmbus: On write cleanup the logic to interrupt the host

Signal the host when we determine the host is to be signaled.
The currrent code determines the need to signal in the ringbuffer
code and actually issues the signal elsewhere. This can result
in the host viewing this interrupt as spurious since the host may also
poll the channel. Make the necessary adjustments.

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by

K. Y. Srinivasan and committed by
Greg Kroah-Hartman
1f6ee4e7 74198eb4

+35 -101
+10 -89
drivers/hv/channel.c
··· 39 39 * vmbus_setevent- Trigger an event notification on the specified 40 40 * channel. 41 41 */ 42 - static void vmbus_setevent(struct vmbus_channel *channel) 42 + void vmbus_setevent(struct vmbus_channel *channel) 43 43 { 44 44 struct hv_monitor_page *monitorpage; 45 45 ··· 65 65 vmbus_set_event(channel); 66 66 } 67 67 } 68 + EXPORT_SYMBOL_GPL(vmbus_setevent); 68 69 69 70 /* 70 71 * vmbus_open - Open the specified channel. ··· 636 635 u32 packetlen_aligned = ALIGN(packetlen, sizeof(u64)); 637 636 struct kvec bufferlist[3]; 638 637 u64 aligned_data = 0; 639 - int ret; 640 - bool signal = false; 641 638 bool lock = channel->acquire_ring_lock; 642 639 int num_vecs = ((bufferlen != 0) ? 3 : 1); 643 640 ··· 655 656 bufferlist[2].iov_base = &aligned_data; 656 657 bufferlist[2].iov_len = (packetlen_aligned - packetlen); 657 658 658 - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, num_vecs, 659 - &signal, lock, channel->signal_policy); 659 + return hv_ringbuffer_write(channel, bufferlist, num_vecs, 660 + lock, kick_q); 660 661 661 - /* 662 - * Signalling the host is conditional on many factors: 663 - * 1. The ring state changed from being empty to non-empty. 664 - * This is tracked by the variable "signal". 665 - * 2. The variable kick_q tracks if more data will be placed 666 - * on the ring. We will not signal if more data is 667 - * to be placed. 668 - * 669 - * Based on the channel signal state, we will decide 670 - * which signaling policy will be applied. 671 - * 672 - * If we cannot write to the ring-buffer; signal the host 673 - * even if we may not have written anything. This is a rare 674 - * enough condition that it should not matter. 675 - * NOTE: in this case, the hvsock channel is an exception, because 676 - * it looks the host side's hvsock implementation has a throttling 677 - * mechanism which can hurt the performance otherwise. 678 - * 679 - * KYS: Oct. 30, 2016: 680 - * It looks like Windows hosts have logic to deal with DOS attacks that 681 - * can be triggered if it receives interrupts when it is not expecting 682 - * the interrupt. The host expects interrupts only when the ring 683 - * transitions from empty to non-empty (or full to non full on the guest 684 - * to host ring). 685 - * So, base the signaling decision solely on the ring state until the 686 - * host logic is fixed. 687 - */ 688 - 689 - if (((ret == 0) && signal)) 690 - vmbus_setevent(channel); 691 - 692 - return ret; 693 662 } 694 663 EXPORT_SYMBOL(vmbus_sendpacket_ctl); 695 664 ··· 698 731 u32 flags, 699 732 bool kick_q) 700 733 { 701 - int ret; 702 734 int i; 703 735 struct vmbus_channel_packet_page_buffer desc; 704 736 u32 descsize; ··· 705 739 u32 packetlen_aligned; 706 740 struct kvec bufferlist[3]; 707 741 u64 aligned_data = 0; 708 - bool signal = false; 709 742 bool lock = channel->acquire_ring_lock; 710 743 711 744 if (pagecount > MAX_PAGE_BUFFER_COUNT) ··· 742 777 bufferlist[2].iov_base = &aligned_data; 743 778 bufferlist[2].iov_len = (packetlen_aligned - packetlen); 744 779 745 - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, 746 - &signal, lock, channel->signal_policy); 747 - 748 - /* 749 - * Signalling the host is conditional on many factors: 750 - * 1. The ring state changed from being empty to non-empty. 751 - * This is tracked by the variable "signal". 752 - * 2. The variable kick_q tracks if more data will be placed 753 - * on the ring. We will not signal if more data is 754 - * to be placed. 755 - * 756 - * Based on the channel signal state, we will decide 757 - * which signaling policy will be applied. 758 - * 759 - * If we cannot write to the ring-buffer; signal the host 760 - * even if we may not have written anything. This is a rare 761 - * enough condition that it should not matter. 762 - * 763 - * KYS: Oct. 30, 2016: 764 - * It looks like Windows hosts have logic to deal with DOS attacks that 765 - * can be triggered if it receives interrupts when it is not expecting 766 - * the interrupt. The host expects interrupts only when the ring 767 - * transitions from empty to non-empty (or full to non full on the guest 768 - * to host ring). 769 - * So, base the signaling decision solely on the ring state until the 770 - * host logic is fixed. 771 - */ 772 - 773 - if (((ret == 0) && signal)) 774 - vmbus_setevent(channel); 775 - 776 - return ret; 780 + return hv_ringbuffer_write(channel, bufferlist, 3, 781 + lock, kick_q); 777 782 } 778 783 EXPORT_SYMBOL_GPL(vmbus_sendpacket_pagebuffer_ctl); 779 784 ··· 774 839 u32 desc_size, 775 840 void *buffer, u32 bufferlen, u64 requestid) 776 841 { 777 - int ret; 778 842 u32 packetlen; 779 843 u32 packetlen_aligned; 780 844 struct kvec bufferlist[3]; 781 845 u64 aligned_data = 0; 782 - bool signal = false; 783 846 bool lock = channel->acquire_ring_lock; 784 847 785 848 packetlen = desc_size + bufferlen; ··· 798 865 bufferlist[2].iov_base = &aligned_data; 799 866 bufferlist[2].iov_len = (packetlen_aligned - packetlen); 800 867 801 - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, 802 - &signal, lock, channel->signal_policy); 803 - 804 - if (ret == 0 && signal) 805 - vmbus_setevent(channel); 806 - 807 - return ret; 868 + return hv_ringbuffer_write(channel, bufferlist, 3, 869 + lock, true); 808 870 } 809 871 EXPORT_SYMBOL_GPL(vmbus_sendpacket_mpb_desc); 810 872 ··· 811 883 struct hv_multipage_buffer *multi_pagebuffer, 812 884 void *buffer, u32 bufferlen, u64 requestid) 813 885 { 814 - int ret; 815 886 struct vmbus_channel_packet_multipage_buffer desc; 816 887 u32 descsize; 817 888 u32 packetlen; 818 889 u32 packetlen_aligned; 819 890 struct kvec bufferlist[3]; 820 891 u64 aligned_data = 0; 821 - bool signal = false; 822 892 bool lock = channel->acquire_ring_lock; 823 893 u32 pfncount = NUM_PAGES_SPANNED(multi_pagebuffer->offset, 824 894 multi_pagebuffer->len); ··· 856 930 bufferlist[2].iov_base = &aligned_data; 857 931 bufferlist[2].iov_len = (packetlen_aligned - packetlen); 858 932 859 - ret = hv_ringbuffer_write(&channel->outbound, bufferlist, 3, 860 - &signal, lock, channel->signal_policy); 861 - 862 - if (ret == 0 && signal) 863 - vmbus_setevent(channel); 864 - 865 - return ret; 933 + return hv_ringbuffer_write(channel, bufferlist, 3, 934 + lock, true); 866 935 } 867 936 EXPORT_SYMBOL_GPL(vmbus_sendpacket_multipagebuffer); 868 937
+3 -3
drivers/hv/hyperv_vmbus.h
··· 527 527 528 528 void hv_ringbuffer_cleanup(struct hv_ring_buffer_info *ring_info); 529 529 530 - int hv_ringbuffer_write(struct hv_ring_buffer_info *ring_info, 530 + int hv_ringbuffer_write(struct vmbus_channel *channel, 531 531 struct kvec *kv_list, 532 - u32 kv_count, bool *signal, bool lock, 533 - enum hv_signal_policy policy); 532 + u32 kv_count, bool lock, 533 + bool kick_q); 534 534 535 535 int hv_ringbuffer_read(struct hv_ring_buffer_info *inring_info, 536 536 void *buffer, u32 buflen, u32 *buffer_actual_len,
+21 -9
drivers/hv/ring_buffer.c
··· 66 66 * once the ring buffer is empty, it will clear the 67 67 * interrupt_mask and re-check to see if new data has 68 68 * arrived. 69 + * 70 + * KYS: Oct. 30, 2016: 71 + * It looks like Windows hosts have logic to deal with DOS attacks that 72 + * can be triggered if it receives interrupts when it is not expecting 73 + * the interrupt. The host expects interrupts only when the ring 74 + * transitions from empty to non-empty (or full to non full on the guest 75 + * to host ring). 76 + * So, base the signaling decision solely on the ring state until the 77 + * host logic is fixed. 69 78 */ 70 79 71 - static bool hv_need_to_signal(u32 old_write, struct hv_ring_buffer_info *rbi, 72 - enum hv_signal_policy policy) 80 + static void hv_signal_on_write(u32 old_write, struct vmbus_channel *channel, 81 + bool kick_q) 73 82 { 83 + struct hv_ring_buffer_info *rbi = &channel->outbound; 84 + 74 85 virt_mb(); 75 86 if (READ_ONCE(rbi->ring_buffer->interrupt_mask)) 76 - return false; 87 + return; 77 88 78 89 /* check interrupt_mask before read_index */ 79 90 virt_rmb(); ··· 93 82 * ring transitions from being empty to non-empty. 94 83 */ 95 84 if (old_write == READ_ONCE(rbi->ring_buffer->read_index)) 96 - return true; 85 + vmbus_setevent(channel); 97 86 98 - return false; 87 + return; 99 88 } 100 89 101 90 /* Get the next write location for the specified ring buffer. */ ··· 284 273 } 285 274 286 275 /* Write to the ring buffer. */ 287 - int hv_ringbuffer_write(struct hv_ring_buffer_info *outring_info, 288 - struct kvec *kv_list, u32 kv_count, bool *signal, bool lock, 289 - enum hv_signal_policy policy) 276 + int hv_ringbuffer_write(struct vmbus_channel *channel, 277 + struct kvec *kv_list, u32 kv_count, bool lock, 278 + bool kick_q) 290 279 { 291 280 int i = 0; 292 281 u32 bytes_avail_towrite; ··· 296 285 u32 old_write; 297 286 u64 prev_indices = 0; 298 287 unsigned long flags = 0; 288 + struct hv_ring_buffer_info *outring_info = &channel->outbound; 299 289 300 290 for (i = 0; i < kv_count; i++) 301 291 totalbytes_towrite += kv_list[i].iov_len; ··· 349 337 if (lock) 350 338 spin_unlock_irqrestore(&outring_info->ring_lock, flags); 351 339 352 - *signal = hv_need_to_signal(old_write, outring_info, policy); 340 + hv_signal_on_write(old_write, channel, kick_q); 353 341 return 0; 354 342 } 355 343
+1
include/linux/hyperv.h
··· 1454 1454 1455 1455 void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); 1456 1456 1457 + void vmbus_setevent(struct vmbus_channel *channel); 1457 1458 /* 1458 1459 * Negotiated version with the Host. 1459 1460 */