[SCSI] iscsi class: regression - fix races with state manipulation and blocking/unblocking

For qla4xxx, we could be starting a session, but some error (network,
target, IO from a device that got started, etc) could cause the session
to fail and curring the block/unblock and state manipulation could race
with each other. This patch just has those operations done in the
single threaded iscsi eh work queue, so that way they are serialized.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@HansenPartnership.com>

authored by Mike Christie and committed by James Bottomley 45ab33b6 024f801f

+50 -28
+48 -28
drivers/scsi/scsi_transport_iscsi.c
··· 373 scsi_target_unblock(&session->dev); 374 } 375 376 - static void __iscsi_unblock_session(struct iscsi_cls_session *session) 377 { 378 - if (!cancel_delayed_work(&session->recovery_work)) 379 - flush_workqueue(iscsi_eh_timer_workq); 380 - scsi_target_unblock(&session->dev); 381 - } 382 - 383 - void iscsi_unblock_session(struct iscsi_cls_session *session) 384 - { 385 struct Scsi_Host *shost = iscsi_session_to_shost(session); 386 struct iscsi_host *ihost = shost->shost_data; 387 unsigned long flags; 388 389 spin_lock_irqsave(&session->lock, flags); 390 session->state = ISCSI_SESSION_LOGGED_IN; 391 spin_unlock_irqrestore(&session->lock, flags); 392 - 393 - __iscsi_unblock_session(session); 394 /* 395 * Only do kernel scanning if the driver is properly hooked into 396 * the async scanning code (drivers like iscsi_tcp do login and ··· 402 atomic_inc(&ihost->nr_scans); 403 } 404 } 405 EXPORT_SYMBOL_GPL(iscsi_unblock_session); 406 407 - void iscsi_block_session(struct iscsi_cls_session *session) 408 { 409 unsigned long flags; 410 411 spin_lock_irqsave(&session->lock, flags); 412 session->state = ISCSI_SESSION_FAILED; 413 spin_unlock_irqrestore(&session->lock, flags); 414 - 415 scsi_target_block(&session->dev); 416 queue_delayed_work(iscsi_eh_timer_workq, &session->recovery_work, 417 session->recovery_tmo * HZ); 418 } 419 EXPORT_SYMBOL_GPL(iscsi_block_session); 420 ··· 487 INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout); 488 INIT_LIST_HEAD(&session->host_list); 489 INIT_LIST_HEAD(&session->sess_list); 490 INIT_WORK(&session->unbind_work, __iscsi_unbind_session); 491 INIT_WORK(&session->scan_work, iscsi_scan_session); 492 spin_lock_init(&session->lock); ··· 601 list_del(&session->sess_list); 602 spin_unlock_irqrestore(&sesslock, flags); 603 604 /* 605 * If we are blocked let commands flow again. The lld or iscsi 606 * layer should set up the queuecommand to fail commands. 607 */ 608 spin_lock_irqsave(&session->lock, flags); 609 session->state = ISCSI_SESSION_FREE; 610 spin_unlock_irqrestore(&session->lock, flags); 611 - __iscsi_unblock_session(session); 612 - __iscsi_unbind_session(&session->unbind_work); 613 614 - /* flush running scans */ 615 flush_workqueue(ihost->scan_workq); 616 - /* 617 - * If the session dropped while removing devices then we need to make 618 - * sure it is not blocked 619 - */ 620 - if (!cancel_delayed_work(&session->recovery_work)) 621 - flush_workqueue(iscsi_eh_timer_workq); 622 623 /* hw iscsi may not have removed all connections from session */ 624 err = device_for_each_child(&session->dev, NULL, ··· 829 830 void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error) 831 { 832 - struct iscsi_cls_session *session = iscsi_conn_to_session(conn); 833 struct nlmsghdr *nlh; 834 struct sk_buff *skb; 835 struct iscsi_uevent *ev; 836 struct iscsi_internal *priv; 837 int len = NLMSG_SPACE(sizeof(*ev)); 838 - unsigned long flags; 839 840 priv = iscsi_if_transport_lookup(conn->transport); 841 if (!priv) 842 return; 843 - 844 - spin_lock_irqsave(&session->lock, flags); 845 - if (session->state == ISCSI_SESSION_LOGGED_IN) 846 - session->state = ISCSI_SESSION_FAILED; 847 - spin_unlock_irqrestore(&session->lock, flags); 848 849 skb = alloc_skb(len, GFP_ATOMIC); 850 if (!skb) {
··· 373 scsi_target_unblock(&session->dev); 374 } 375 376 + static void __iscsi_unblock_session(struct work_struct *work) 377 { 378 + struct iscsi_cls_session *session = 379 + container_of(work, struct iscsi_cls_session, 380 + unblock_work); 381 struct Scsi_Host *shost = iscsi_session_to_shost(session); 382 struct iscsi_host *ihost = shost->shost_data; 383 unsigned long flags; 384 385 + /* 386 + * The recovery and unblock work get run from the same workqueue, 387 + * so try to cancel it if it was going to run after this unblock. 388 + */ 389 + cancel_delayed_work(&session->recovery_work); 390 spin_lock_irqsave(&session->lock, flags); 391 session->state = ISCSI_SESSION_LOGGED_IN; 392 spin_unlock_irqrestore(&session->lock, flags); 393 + /* start IO */ 394 + scsi_target_unblock(&session->dev); 395 /* 396 * Only do kernel scanning if the driver is properly hooked into 397 * the async scanning code (drivers like iscsi_tcp do login and ··· 401 atomic_inc(&ihost->nr_scans); 402 } 403 } 404 + 405 + /** 406 + * iscsi_unblock_session - set a session as logged in and start IO. 407 + * @session: iscsi session 408 + * 409 + * Mark a session as ready to accept IO. 410 + */ 411 + void iscsi_unblock_session(struct iscsi_cls_session *session) 412 + { 413 + queue_work(iscsi_eh_timer_workq, &session->unblock_work); 414 + /* 415 + * make sure all the events have completed before tell the driver 416 + * it is safe 417 + */ 418 + flush_workqueue(iscsi_eh_timer_workq); 419 + } 420 EXPORT_SYMBOL_GPL(iscsi_unblock_session); 421 422 + static void __iscsi_block_session(struct work_struct *work) 423 { 424 + struct iscsi_cls_session *session = 425 + container_of(work, struct iscsi_cls_session, 426 + block_work); 427 unsigned long flags; 428 429 spin_lock_irqsave(&session->lock, flags); 430 session->state = ISCSI_SESSION_FAILED; 431 spin_unlock_irqrestore(&session->lock, flags); 432 scsi_target_block(&session->dev); 433 queue_delayed_work(iscsi_eh_timer_workq, &session->recovery_work, 434 session->recovery_tmo * HZ); 435 + } 436 + 437 + void iscsi_block_session(struct iscsi_cls_session *session) 438 + { 439 + queue_work(iscsi_eh_timer_workq, &session->block_work); 440 } 441 EXPORT_SYMBOL_GPL(iscsi_block_session); 442 ··· 463 INIT_DELAYED_WORK(&session->recovery_work, session_recovery_timedout); 464 INIT_LIST_HEAD(&session->host_list); 465 INIT_LIST_HEAD(&session->sess_list); 466 + INIT_WORK(&session->unblock_work, __iscsi_unblock_session); 467 + INIT_WORK(&session->block_work, __iscsi_block_session); 468 INIT_WORK(&session->unbind_work, __iscsi_unbind_session); 469 INIT_WORK(&session->scan_work, iscsi_scan_session); 470 spin_lock_init(&session->lock); ··· 575 list_del(&session->sess_list); 576 spin_unlock_irqrestore(&sesslock, flags); 577 578 + /* make sure there are no blocks/unblocks queued */ 579 + flush_workqueue(iscsi_eh_timer_workq); 580 + /* make sure the timedout callout is not running */ 581 + if (!cancel_delayed_work(&session->recovery_work)) 582 + flush_workqueue(iscsi_eh_timer_workq); 583 /* 584 * If we are blocked let commands flow again. The lld or iscsi 585 * layer should set up the queuecommand to fail commands. 586 + * We assume that LLD will not be calling block/unblock while 587 + * removing the session. 588 */ 589 spin_lock_irqsave(&session->lock, flags); 590 session->state = ISCSI_SESSION_FREE; 591 spin_unlock_irqrestore(&session->lock, flags); 592 593 + scsi_target_unblock(&session->dev); 594 + /* flush running scans then delete devices */ 595 flush_workqueue(ihost->scan_workq); 596 + __iscsi_unbind_session(&session->unbind_work); 597 598 /* hw iscsi may not have removed all connections from session */ 599 err = device_for_each_child(&session->dev, NULL, ··· 802 803 void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error) 804 { 805 struct nlmsghdr *nlh; 806 struct sk_buff *skb; 807 struct iscsi_uevent *ev; 808 struct iscsi_internal *priv; 809 int len = NLMSG_SPACE(sizeof(*ev)); 810 811 priv = iscsi_if_transport_lookup(conn->transport); 812 if (!priv) 813 return; 814 815 skb = alloc_skb(len, GFP_ATOMIC); 816 if (!skb) {
+2
include/scsi/scsi_transport_iscsi.h
··· 177 struct list_head host_list; 178 struct iscsi_transport *transport; 179 spinlock_t lock; 180 struct work_struct scan_work; 181 struct work_struct unbind_work; 182
··· 177 struct list_head host_list; 178 struct iscsi_transport *transport; 179 spinlock_t lock; 180 + struct work_struct block_work; 181 + struct work_struct unblock_work; 182 struct work_struct scan_work; 183 struct work_struct unbind_work; 184