Drivers: hv: vmbus: Fix bugs in rescind handling

This patch addresses the following bugs in the current rescind handling code:

1. Fixes a race condition where we may be invoking hv_process_channel_removal()
on an already freed channel.

2. Prevents indefinite wait when rescinding sub-channels by correctly setting
the probe_complete state.

I would like to thank Dexuan for patiently reviewing earlier versions of this
patch and identifying many of the issues fixed here.

Greg, please apply this to 4.14-final.

Fixes: '54a66265d675 ("Drivers: hv: vmbus: Fix rescind handling")'

Signed-off-by: K. Y. Srinivasan <kys@microsoft.com>
Reviewed-by: Dexuan Cui <decui@microsoft.com>
Cc: stable@vger.kernel.org # (4.13 and above)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>

authored by K. Y. Srinivasan and committed by Greg Kroah-Hartman 192b2d78 688cb678

Changed files
+23 -25
drivers
include
linux
+3 -3
drivers/hv/channel.c
··· 640 640 */ 641 641 return; 642 642 } 643 + mutex_lock(&vmbus_connection.channel_mutex); 643 644 /* 644 645 * Close all the sub-channels first and then close the 645 646 * primary channel. ··· 649 648 cur_channel = list_entry(cur, struct vmbus_channel, sc_list); 650 649 vmbus_close_internal(cur_channel); 651 650 if (cur_channel->rescind) { 652 - mutex_lock(&vmbus_connection.channel_mutex); 653 - hv_process_channel_removal(cur_channel, 651 + hv_process_channel_removal( 654 652 cur_channel->offermsg.child_relid); 655 - mutex_unlock(&vmbus_connection.channel_mutex); 656 653 } 657 654 } 658 655 /* 659 656 * Now close the primary. 660 657 */ 661 658 vmbus_close_internal(channel); 659 + mutex_unlock(&vmbus_connection.channel_mutex); 662 660 } 663 661 EXPORT_SYMBOL_GPL(vmbus_close); 664 662
+18 -19
drivers/hv/channel_mgmt.c
··· 159 159 160 160 161 161 spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags); 162 - 162 + channel->rescind = true; 163 163 list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list, 164 164 msglistentry) { 165 165 ··· 381 381 true); 382 382 } 383 383 384 - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid) 384 + void hv_process_channel_removal(u32 relid) 385 385 { 386 386 unsigned long flags; 387 - struct vmbus_channel *primary_channel; 387 + struct vmbus_channel *primary_channel, *channel; 388 388 389 - BUG_ON(!channel->rescind); 390 389 BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex)); 391 390 391 + /* 392 + * Make sure channel is valid as we may have raced. 393 + */ 394 + channel = relid2channel(relid); 395 + if (!channel) 396 + return; 397 + 398 + BUG_ON(!channel->rescind); 392 399 if (channel->target_cpu != get_cpu()) { 393 400 put_cpu(); 394 401 smp_call_function_single(channel->target_cpu, ··· 522 515 if (!fnew) { 523 516 if (channel->sc_creation_callback != NULL) 524 517 channel->sc_creation_callback(newchannel); 518 + newchannel->probe_done = true; 525 519 return; 526 520 } 527 521 ··· 842 834 { 843 835 struct vmbus_channel_rescind_offer *rescind; 844 836 struct vmbus_channel *channel; 845 - unsigned long flags; 846 837 struct device *dev; 847 838 848 839 rescind = (struct vmbus_channel_rescind_offer *)hdr; ··· 880 873 return; 881 874 } 882 875 883 - spin_lock_irqsave(&channel->lock, flags); 884 - channel->rescind = true; 885 - spin_unlock_irqrestore(&channel->lock, flags); 886 - 887 - /* 888 - * Now that we have posted the rescind state, perform 889 - * rescind related cleanup. 890 - */ 891 - vmbus_rescind_cleanup(channel); 892 - 893 876 /* 894 877 * Now wait for offer handling to complete. 895 878 */ ··· 898 901 if (channel->device_obj) { 899 902 if (channel->chn_rescind_callback) { 900 903 channel->chn_rescind_callback(channel); 904 + vmbus_rescind_cleanup(channel); 901 905 return; 902 906 } 903 907 /* ··· 907 909 */ 908 910 dev = get_device(&channel->device_obj->device); 909 911 if (dev) { 912 + vmbus_rescind_cleanup(channel); 910 913 vmbus_device_unregister(channel->device_obj); 911 914 put_device(dev); 912 915 } ··· 920 921 * 1. Close all sub-channels first 921 922 * 2. Then close the primary channel. 922 923 */ 924 + mutex_lock(&vmbus_connection.channel_mutex); 925 + vmbus_rescind_cleanup(channel); 923 926 if (channel->state == CHANNEL_OPEN_STATE) { 924 927 /* 925 928 * The channel is currently not open; 926 929 * it is safe for us to cleanup the channel. 927 930 */ 928 - mutex_lock(&vmbus_connection.channel_mutex); 929 - hv_process_channel_removal(channel, 930 - channel->offermsg.child_relid); 931 - mutex_unlock(&vmbus_connection.channel_mutex); 931 + hv_process_channel_removal(rescind->child_relid); 932 932 } 933 + mutex_unlock(&vmbus_connection.channel_mutex); 933 934 } 934 935 } 935 936
+1 -2
drivers/hv/vmbus_drv.c
··· 768 768 struct vmbus_channel *channel = hv_dev->channel; 769 769 770 770 mutex_lock(&vmbus_connection.channel_mutex); 771 - hv_process_channel_removal(channel, 772 - channel->offermsg.child_relid); 771 + hv_process_channel_removal(channel->offermsg.child_relid); 773 772 mutex_unlock(&vmbus_connection.channel_mutex); 774 773 kfree(hv_dev); 775 774
+1 -1
include/linux/hyperv.h
··· 1403 1403 const int *srv_version, int srv_vercnt, 1404 1404 int *nego_fw_version, int *nego_srv_version); 1405 1405 1406 - void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid); 1406 + void hv_process_channel_removal(u32 relid); 1407 1407 1408 1408 void vmbus_setevent(struct vmbus_channel *channel); 1409 1409 /*