[IA64] fix possible XPC deadlock when disconnecting

This patch eliminates a potential deadlock that is possible when XPC
disconnects a channel to a partition that has gone down. This deadlock will
occur if at least one of the kthreads created by XPC for the purpose of making
callouts to the channel's registerer is detained in the registerer and will
not be returning back to XPC until some registerer request occurs on the now
downed partition. The potential for a deadlock is removed by ensuring that
there always is a kthread available to make the channel disconnecting callout
to the registerer.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by Dean Nelson and committed by Tony Luck a460ef8d 1cf24bdb

+54 -27
+10 -5
arch/ia64/sn/kernel/xpc_channel.c
··· 632 632 ch->number, ch->partid); 633 633 634 634 spin_unlock_irqrestore(&ch->lock, *irq_flags); 635 - xpc_create_kthreads(ch, 1); 635 + xpc_create_kthreads(ch, 1, 0); 636 636 spin_lock_irqsave(&ch->lock, *irq_flags); 637 637 } 638 638 ··· 754 754 755 755 /* make sure all activity has settled down first */ 756 756 757 - if (atomic_read(&ch->references) > 0 || 758 - ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 759 - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { 757 + if (atomic_read(&ch->kthreads_assigned) > 0 || 758 + atomic_read(&ch->references) > 0) { 760 759 return; 761 760 } 762 - DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); 761 + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 762 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); 763 763 764 764 if (part->act_state == XPC_P_DEACTIVATING) { 765 765 /* can't proceed until the other side disengages from us */ ··· 1651 1651 /* wake all idle kthreads so they can exit */ 1652 1652 if (atomic_read(&ch->kthreads_idle) > 0) { 1653 1653 wake_up_all(&ch->idle_wq); 1654 + 1655 + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 1656 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 1657 + /* start a kthread that will do the xpcDisconnecting callout */ 1658 + xpc_create_kthreads(ch, 1, 1); 1654 1659 } 1655 1660 1656 1661 /* wake those waiting to allocate an entry from the local msg queue */
+43 -21
arch/ia64/sn/kernel/xpc_main.c
··· 681 681 dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", 682 682 needed, ch->partid, ch->number); 683 683 684 - xpc_create_kthreads(ch, needed); 684 + xpc_create_kthreads(ch, needed, 0); 685 685 } 686 686 687 687 ··· 775 775 xpc_kthread_waitmsgs(part, ch); 776 776 } 777 777 778 - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { 779 - spin_lock_irqsave(&ch->lock, irq_flags); 780 - if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 781 - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 782 - ch->flags |= XPC_C_DISCONNECTINGCALLOUT; 783 - spin_unlock_irqrestore(&ch->lock, irq_flags); 778 + /* let registerer know that connection is disconnecting */ 784 779 785 - xpc_disconnect_callout(ch, xpcDisconnecting); 786 - 787 - spin_lock_irqsave(&ch->lock, irq_flags); 788 - ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; 789 - } 780 + spin_lock_irqsave(&ch->lock, irq_flags); 781 + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 782 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 783 + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; 790 784 spin_unlock_irqrestore(&ch->lock, irq_flags); 785 + 786 + xpc_disconnect_callout(ch, xpcDisconnecting); 787 + 788 + spin_lock_irqsave(&ch->lock, irq_flags); 789 + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; 790 + } 791 + spin_unlock_irqrestore(&ch->lock, irq_flags); 792 + 793 + if (atomic_dec_return(&ch->kthreads_assigned) == 0) { 791 794 if (atomic_dec_return(&part->nchannels_engaged) == 0) { 792 795 xpc_mark_partition_disengaged(part); 793 796 xpc_IPI_send_disengage(part); 794 797 } 795 798 } 796 - 797 799 798 800 xpc_msgqueue_deref(ch); 799 801 ··· 820 818 * partition. 821 819 */ 822 820 void 823 - xpc_create_kthreads(struct xpc_channel *ch, int needed) 821 + xpc_create_kthreads(struct xpc_channel *ch, int needed, 822 + int ignore_disconnecting) 824 823 { 825 824 unsigned long irq_flags; 826 825 pid_t pid; ··· 836 833 * kthread. That kthread is responsible for doing the 837 834 * counterpart to the following before it exits. 838 835 */ 836 + if (ignore_disconnecting) { 837 + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { 838 + /* kthreads assigned had gone to zero */ 839 + BUG_ON(!(ch->flags & 840 + XPC_C_DISCONNECTINGCALLOUT_MADE)); 841 + break; 842 + } 843 + 844 + } else if (ch->flags & XPC_C_DISCONNECTING) { 845 + break; 846 + 847 + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { 848 + if (atomic_inc_return(&part->nchannels_engaged) == 1) 849 + xpc_mark_partition_engaged(part); 850 + } 839 851 (void) xpc_part_ref(part); 840 852 xpc_msgqueue_ref(ch); 841 - if (atomic_inc_return(&ch->kthreads_assigned) == 1 && 842 - atomic_inc_return(&part->nchannels_engaged) == 1) { 843 - xpc_mark_partition_engaged(part); 844 - } 845 853 846 854 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); 847 855 if (pid < 0) { 848 856 /* the fork failed */ 857 + 858 + /* 859 + * NOTE: if (ignore_disconnecting && 860 + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, 861 + * then we'll deadlock if all other kthreads assigned 862 + * to this channel are blocked in the channel's 863 + * registerer, because the only thing that will unblock 864 + * them is the xpcDisconnecting callout that this 865 + * failed kernel_thread would have made. 866 + */ 867 + 849 868 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 850 869 atomic_dec_return(&part->nchannels_engaged) == 0) { 851 870 xpc_mark_partition_disengaged(part); ··· 882 857 * Flag this as an error only if we have an 883 858 * insufficient #of kthreads for the channel 884 859 * to function. 885 - * 886 - * No xpc_msgqueue_ref() is needed here since 887 - * the channel mgr is doing this. 888 860 */ 889 861 spin_lock_irqsave(&ch->lock, irq_flags); 890 862 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
+1 -1
include/asm-ia64/sn/xpc.h
··· 673 673 extern void xpc_dropped_IPI_check(struct xpc_partition *); 674 674 extern void xpc_activate_partition(struct xpc_partition *); 675 675 extern void xpc_activate_kthreads(struct xpc_channel *, int); 676 - extern void xpc_create_kthreads(struct xpc_channel *, int); 676 + extern void xpc_create_kthreads(struct xpc_channel *, int, int); 677 677 extern void xpc_disconnect_wait(int); 678 678 679 679