[IA64] fix possible XPC deadlock when disconnecting

This patch eliminates a potential deadlock that is possible when XPC
disconnects a channel to a partition that has gone down. This deadlock will
occur if at least one of the kthreads created by XPC for the purpose of making
callouts to the channel's registerer is detained in the registerer and will
not be returning back to XPC until some registerer request occurs on the now
downed partition. The potential for a deadlock is removed by ensuring that
there always is a kthread available to make the channel disconnecting callout
to the registerer.

Signed-off-by: Dean Nelson <dcn@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by Dean Nelson and committed by Tony Luck a460ef8d 1cf24bdb

+54 -27
+10 -5
arch/ia64/sn/kernel/xpc_channel.c
··· 632 ch->number, ch->partid); 633 634 spin_unlock_irqrestore(&ch->lock, *irq_flags); 635 - xpc_create_kthreads(ch, 1); 636 spin_lock_irqsave(&ch->lock, *irq_flags); 637 } 638 ··· 754 755 /* make sure all activity has settled down first */ 756 757 - if (atomic_read(&ch->references) > 0 || 758 - ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 759 - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE))) { 760 return; 761 } 762 - DBUG_ON(atomic_read(&ch->kthreads_assigned) != 0); 763 764 if (part->act_state == XPC_P_DEACTIVATING) { 765 /* can't proceed until the other side disengages from us */ ··· 1651 /* wake all idle kthreads so they can exit */ 1652 if (atomic_read(&ch->kthreads_idle) > 0) { 1653 wake_up_all(&ch->idle_wq); 1654 } 1655 1656 /* wake those waiting to allocate an entry from the local msg queue */
··· 632 ch->number, ch->partid); 633 634 spin_unlock_irqrestore(&ch->lock, *irq_flags); 635 + xpc_create_kthreads(ch, 1, 0); 636 spin_lock_irqsave(&ch->lock, *irq_flags); 637 } 638 ··· 754 755 /* make sure all activity has settled down first */ 756 757 + if (atomic_read(&ch->kthreads_assigned) > 0 || 758 + atomic_read(&ch->references) > 0) { 759 return; 760 } 761 + DBUG_ON((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 762 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT_MADE)); 763 764 if (part->act_state == XPC_P_DEACTIVATING) { 765 /* can't proceed until the other side disengages from us */ ··· 1651 /* wake all idle kthreads so they can exit */ 1652 if (atomic_read(&ch->kthreads_idle) > 0) { 1653 wake_up_all(&ch->idle_wq); 1654 + 1655 + } else if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 1656 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 1657 + /* start a kthread that will do the xpcDisconnecting callout */ 1658 + xpc_create_kthreads(ch, 1, 1); 1659 } 1660 1661 /* wake those waiting to allocate an entry from the local msg queue */
+43 -21
arch/ia64/sn/kernel/xpc_main.c
··· 681 dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", 682 needed, ch->partid, ch->number); 683 684 - xpc_create_kthreads(ch, needed); 685 } 686 687 ··· 775 xpc_kthread_waitmsgs(part, ch); 776 } 777 778 - if (atomic_dec_return(&ch->kthreads_assigned) == 0) { 779 - spin_lock_irqsave(&ch->lock, irq_flags); 780 - if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 781 - !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 782 - ch->flags |= XPC_C_DISCONNECTINGCALLOUT; 783 - spin_unlock_irqrestore(&ch->lock, irq_flags); 784 785 - xpc_disconnect_callout(ch, xpcDisconnecting); 786 - 787 - spin_lock_irqsave(&ch->lock, irq_flags); 788 - ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; 789 - } 790 spin_unlock_irqrestore(&ch->lock, irq_flags); 791 if (atomic_dec_return(&part->nchannels_engaged) == 0) { 792 xpc_mark_partition_disengaged(part); 793 xpc_IPI_send_disengage(part); 794 } 795 } 796 - 797 798 xpc_msgqueue_deref(ch); 799 ··· 820 * partition. 821 */ 822 void 823 - xpc_create_kthreads(struct xpc_channel *ch, int needed) 824 { 825 unsigned long irq_flags; 826 pid_t pid; ··· 836 * kthread. That kthread is responsible for doing the 837 * counterpart to the following before it exits. 838 */ 839 (void) xpc_part_ref(part); 840 xpc_msgqueue_ref(ch); 841 - if (atomic_inc_return(&ch->kthreads_assigned) == 1 && 842 - atomic_inc_return(&part->nchannels_engaged) == 1) { 843 - xpc_mark_partition_engaged(part); 844 - } 845 846 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); 847 if (pid < 0) { 848 /* the fork failed */ 849 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 850 atomic_dec_return(&part->nchannels_engaged) == 0) { 851 xpc_mark_partition_disengaged(part); ··· 882 * Flag this as an error only if we have an 883 * insufficient #of kthreads for the channel 884 * to function. 885 - * 886 - * No xpc_msgqueue_ref() is needed here since 887 - * the channel mgr is doing this. 888 */ 889 spin_lock_irqsave(&ch->lock, irq_flags); 890 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
··· 681 dev_dbg(xpc_chan, "create %d new kthreads, partid=%d, channel=%d\n", 682 needed, ch->partid, ch->number); 683 684 + xpc_create_kthreads(ch, needed, 0); 685 } 686 687 ··· 775 xpc_kthread_waitmsgs(part, ch); 776 } 777 778 + /* let registerer know that connection is disconnecting */ 779 780 + spin_lock_irqsave(&ch->lock, irq_flags); 781 + if ((ch->flags & XPC_C_CONNECTEDCALLOUT_MADE) && 782 + !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) { 783 + ch->flags |= XPC_C_DISCONNECTINGCALLOUT; 784 spin_unlock_irqrestore(&ch->lock, irq_flags); 785 + 786 + xpc_disconnect_callout(ch, xpcDisconnecting); 787 + 788 + spin_lock_irqsave(&ch->lock, irq_flags); 789 + ch->flags |= XPC_C_DISCONNECTINGCALLOUT_MADE; 790 + } 791 + spin_unlock_irqrestore(&ch->lock, irq_flags); 792 + 793 + if (atomic_dec_return(&ch->kthreads_assigned) == 0) { 794 if (atomic_dec_return(&part->nchannels_engaged) == 0) { 795 xpc_mark_partition_disengaged(part); 796 xpc_IPI_send_disengage(part); 797 } 798 } 799 800 xpc_msgqueue_deref(ch); 801 ··· 818 * partition. 819 */ 820 void 821 + xpc_create_kthreads(struct xpc_channel *ch, int needed, 822 + int ignore_disconnecting) 823 { 824 unsigned long irq_flags; 825 pid_t pid; ··· 833 * kthread. That kthread is responsible for doing the 834 * counterpart to the following before it exits. 835 */ 836 + if (ignore_disconnecting) { 837 + if (!atomic_inc_not_zero(&ch->kthreads_assigned)) { 838 + /* kthreads assigned had gone to zero */ 839 + BUG_ON(!(ch->flags & 840 + XPC_C_DISCONNECTINGCALLOUT_MADE)); 841 + break; 842 + } 843 + 844 + } else if (ch->flags & XPC_C_DISCONNECTING) { 845 + break; 846 + 847 + } else if (atomic_inc_return(&ch->kthreads_assigned) == 1) { 848 + if (atomic_inc_return(&part->nchannels_engaged) == 1) 849 + xpc_mark_partition_engaged(part); 850 + } 851 (void) xpc_part_ref(part); 852 xpc_msgqueue_ref(ch); 853 854 pid = kernel_thread(xpc_daemonize_kthread, (void *) args, 0); 855 if (pid < 0) { 856 /* the fork failed */ 857 + 858 + /* 859 + * NOTE: if (ignore_disconnecting && 860 + * !(ch->flags & XPC_C_DISCONNECTINGCALLOUT)) is true, 861 + * then we'll deadlock if all other kthreads assigned 862 + * to this channel are blocked in the channel's 863 + * registerer, because the only thing that will unblock 864 + * them is the xpcDisconnecting callout that this 865 + * failed kernel_thread would have made. 866 + */ 867 + 868 if (atomic_dec_return(&ch->kthreads_assigned) == 0 && 869 atomic_dec_return(&part->nchannels_engaged) == 0) { 870 xpc_mark_partition_disengaged(part); ··· 857 * Flag this as an error only if we have an 858 * insufficient #of kthreads for the channel 859 * to function. 860 */ 861 spin_lock_irqsave(&ch->lock, irq_flags); 862 XPC_DISCONNECT_CHANNEL(ch, xpcLackOfResources,
+1 -1
include/asm-ia64/sn/xpc.h
··· 673 extern void xpc_dropped_IPI_check(struct xpc_partition *); 674 extern void xpc_activate_partition(struct xpc_partition *); 675 extern void xpc_activate_kthreads(struct xpc_channel *, int); 676 - extern void xpc_create_kthreads(struct xpc_channel *, int); 677 extern void xpc_disconnect_wait(int); 678 679
··· 673 extern void xpc_dropped_IPI_check(struct xpc_partition *); 674 extern void xpc_activate_partition(struct xpc_partition *); 675 extern void xpc_activate_kthreads(struct xpc_channel *, int); 676 + extern void xpc_create_kthreads(struct xpc_channel *, int, int); 677 extern void xpc_disconnect_wait(int); 678 679