[PATCH] s390: deadlock in appldata

The system might hang when using appldata_mem with high I/O traffic and a
large number of devices. The spinlocks bdev_lock and swaplock are acquired
via calls to si_meminfo() and si_swapinfo() from a tasklet, i.e. interrupt
context, which can lead to a deadlock. Replace tasklet with work queue.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Gerald Schaefer and committed by Linus Torvalds f26d583e 595bf2aa

+45 -35
+41 -31
arch/s390/appldata/appldata_base.c
··· 28 28 //#include <linux/kernel_stat.h> 29 29 #include <linux/notifier.h> 30 30 #include <linux/cpu.h> 31 + #include <linux/workqueue.h> 31 32 32 33 #include "appldata.h" 33 34 ··· 134 133 static int appldata_timer_active; 135 134 136 135 /* 137 - * Tasklet 136 + * Work queue 138 137 */ 139 - static struct tasklet_struct appldata_tasklet_struct; 138 + static struct workqueue_struct *appldata_wq; 139 + static void appldata_work_fn(void *data); 140 + static DECLARE_WORK(appldata_work, appldata_work_fn, NULL); 141 + 140 142 141 143 /* 142 144 * Ops list ··· 148 144 static LIST_HEAD(appldata_ops_list); 149 145 150 146 151 - /************************* timer, tasklet, DIAG ******************************/ 147 + /*************************** timer, work, DIAG *******************************/ 152 148 /* 153 149 * appldata_timer_function() 154 150 * 155 - * schedule tasklet and reschedule timer 151 + * schedule work and reschedule timer 156 152 */ 157 153 static void appldata_timer_function(unsigned long data, struct pt_regs *regs) 158 154 { ··· 161 157 atomic_read(&appldata_expire_count)); 162 158 if (atomic_dec_and_test(&appldata_expire_count)) { 163 159 atomic_set(&appldata_expire_count, num_online_cpus()); 164 - tasklet_schedule((struct tasklet_struct *) data); 160 + queue_work(appldata_wq, (struct work_struct *) data); 165 161 } 166 162 } 167 163 168 164 /* 169 - * appldata_tasklet_function() 165 + * appldata_work_fn() 170 166 * 171 167 * call data gathering function for each (active) module 172 168 */ 173 - static void appldata_tasklet_function(unsigned long data) 169 + static void appldata_work_fn(void *data) 174 170 { 175 171 struct list_head *lh; 176 172 struct appldata_ops *ops; 177 173 int i; 178 174 179 - P_DEBUG(" -= Tasklet =-\n"); 175 + P_DEBUG(" -= Work Queue =-\n"); 180 176 i = 0; 181 177 spin_lock(&appldata_ops_lock); 182 178 list_for_each(lh, &appldata_ops_list) { ··· 235 231 : "=d" (ry) : "d" (&(appldata_parameter_list)) : "cc"); 236 232 return (int) ry; 237 233 } 238 - /********************** timer, tasklet, DIAG <END> ***************************/ 234 + /************************ timer, work, DIAG <END> ****************************/ 239 235 240 236 241 237 /****************************** /proc stuff **********************************/ ··· 415 411 struct list_head *lh; 416 412 417 413 found = 0; 418 - spin_lock_bh(&appldata_ops_lock); 414 + spin_lock(&appldata_ops_lock); 419 415 list_for_each(lh, &appldata_ops_list) { 420 416 tmp_ops = list_entry(lh, struct appldata_ops, list); 421 417 if (&tmp_ops->ctl_table[2] == ctl) { ··· 423 419 } 424 420 } 425 421 if (!found) { 426 - spin_unlock_bh(&appldata_ops_lock); 422 + spin_unlock(&appldata_ops_lock); 427 423 return -ENODEV; 428 424 } 429 425 ops = ctl->data; 430 426 if (!try_module_get(ops->owner)) { // protect this function 431 - spin_unlock_bh(&appldata_ops_lock); 427 + spin_unlock(&appldata_ops_lock); 432 428 return -ENODEV; 433 429 } 434 - spin_unlock_bh(&appldata_ops_lock); 430 + spin_unlock(&appldata_ops_lock); 435 431 436 432 if (!*lenp || *ppos) { 437 433 *lenp = 0; ··· 455 451 return -EFAULT; 456 452 } 457 453 458 - spin_lock_bh(&appldata_ops_lock); 454 + spin_lock(&appldata_ops_lock); 459 455 if ((buf[0] == '1') && (ops->active == 0)) { 460 - if (!try_module_get(ops->owner)) { // protect tasklet 461 - spin_unlock_bh(&appldata_ops_lock); 456 + // protect work queue callback 457 + if (!try_module_get(ops->owner)) { 458 + spin_unlock(&appldata_ops_lock); 462 459 module_put(ops->owner); 463 460 return -ENODEV; 464 461 } ··· 490 485 } 491 486 module_put(ops->owner); 492 487 } 493 - spin_unlock_bh(&appldata_ops_lock); 488 + spin_unlock(&appldata_ops_lock); 494 489 out: 495 490 *lenp = len; 496 491 *ppos += len; ··· 534 529 } 535 530 memset(ops->ctl_table, 0, 4*sizeof(struct ctl_table)); 536 531 537 - spin_lock_bh(&appldata_ops_lock); 532 + spin_lock(&appldata_ops_lock); 538 533 list_for_each(lh, &appldata_ops_list) { 539 534 tmp_ops = list_entry(lh, struct appldata_ops, list); 540 535 P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", ··· 546 541 APPLDATA_PROC_NAME_LENGTH) == 0) { 547 542 P_ERROR("Name \"%s\" already registered!\n", ops->name); 548 543 kfree(ops->ctl_table); 549 - spin_unlock_bh(&appldata_ops_lock); 544 + spin_unlock(&appldata_ops_lock); 550 545 return -EBUSY; 551 546 } 552 547 if (tmp_ops->ctl_nr == ops->ctl_nr) { 553 548 P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); 554 549 kfree(ops->ctl_table); 555 - spin_unlock_bh(&appldata_ops_lock); 550 + spin_unlock(&appldata_ops_lock); 556 551 return -EBUSY; 557 552 } 558 553 } 559 554 list_add(&ops->list, &appldata_ops_list); 560 - spin_unlock_bh(&appldata_ops_lock); 555 + spin_unlock(&appldata_ops_lock); 561 556 562 557 ops->ctl_table[0].ctl_name = CTL_APPLDATA; 563 558 ops->ctl_table[0].procname = appldata_proc_name; ··· 588 583 */ 589 584 void appldata_unregister_ops(struct appldata_ops *ops) 590 585 { 591 - spin_lock_bh(&appldata_ops_lock); 586 + spin_lock(&appldata_ops_lock); 592 587 unregister_sysctl_table(ops->sysctl_header); 593 588 list_del(&ops->list); 594 589 kfree(ops->ctl_table); 595 590 ops->ctl_table = NULL; 596 - spin_unlock_bh(&appldata_ops_lock); 591 + spin_unlock(&appldata_ops_lock); 597 592 P_INFO("%s-ops unregistered!\n", ops->name); 598 593 } 599 594 /********************** module-ops management <END> **************************/ ··· 607 602 init_virt_timer(&per_cpu(appldata_timer, cpu)); 608 603 per_cpu(appldata_timer, cpu).function = appldata_timer_function; 609 604 per_cpu(appldata_timer, cpu).data = (unsigned long) 610 - &appldata_tasklet_struct; 605 + &appldata_work; 611 606 atomic_inc(&appldata_expire_count); 612 607 spin_lock(&appldata_timer_lock); 613 608 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 620 615 del_virt_timer(&per_cpu(appldata_timer, cpu)); 621 616 if (atomic_dec_and_test(&appldata_expire_count)) { 622 617 atomic_set(&appldata_expire_count, num_online_cpus()); 623 - tasklet_schedule(&appldata_tasklet_struct); 618 + queue_work(appldata_wq, &appldata_work); 624 619 } 625 620 spin_lock(&appldata_timer_lock); 626 621 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 653 648 /* 654 649 * appldata_init() 655 650 * 656 - * init timer and tasklet, register /proc entries 651 + * init timer, register /proc entries 657 652 */ 658 653 static int __init appldata_init(void) 659 654 { ··· 661 656 662 657 P_DEBUG("sizeof(parameter_list) = %lu\n", 663 658 sizeof(struct appldata_parameter_list)); 659 + 660 + appldata_wq = create_singlethread_workqueue("appldata"); 661 + if (!appldata_wq) { 662 + P_ERROR("Could not create work queue\n"); 663 + return -ENOMEM; 664 + } 664 665 665 666 for_each_online_cpu(i) 666 667 appldata_online_cpu(i); ··· 681 670 appldata_table[1].de->owner = THIS_MODULE; 682 671 #endif 683 672 684 - tasklet_init(&appldata_tasklet_struct, appldata_tasklet_function, 0); 685 673 P_DEBUG("Base interface initialized.\n"); 686 674 return 0; 687 675 } ··· 688 678 /* 689 679 * appldata_exit() 690 680 * 691 - * stop timer and tasklet, unregister /proc entries 681 + * stop timer, unregister /proc entries 692 682 */ 693 683 static void __exit appldata_exit(void) 694 684 { ··· 700 690 /* 701 691 * ops list should be empty, but just in case something went wrong... 702 692 */ 703 - spin_lock_bh(&appldata_ops_lock); 693 + spin_lock(&appldata_ops_lock); 704 694 list_for_each(lh, &appldata_ops_list) { 705 695 ops = list_entry(lh, struct appldata_ops, list); 706 696 rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, ··· 710 700 "return code: %d\n", ops->name, rc); 711 701 } 712 702 } 713 - spin_unlock_bh(&appldata_ops_lock); 703 + spin_unlock(&appldata_ops_lock); 714 704 715 705 for_each_online_cpu(i) 716 706 appldata_offline_cpu(i); ··· 719 709 720 710 unregister_sysctl_table(appldata_sysctl_header); 721 711 722 - tasklet_kill(&appldata_tasklet_struct); 712 + destroy_workqueue(appldata_wq); 723 713 P_DEBUG("... module unloaded!\n"); 724 714 } 725 715 /**************************** init / exit <END> ******************************/
+1 -1
arch/s390/appldata/appldata_mem.c
··· 68 68 u64 pgmajfault; /* page faults (major only) */ 69 69 // <-- New in 2.6 70 70 71 - } appldata_mem_data; 71 + } __attribute__((packed)) appldata_mem_data; 72 72 73 73 74 74 static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
+1 -1
arch/s390/appldata/appldata_net_sum.c
··· 57 57 u64 rx_dropped; /* no space in linux buffers */ 58 58 u64 tx_dropped; /* no space available in linux */ 59 59 u64 collisions; /* collisions while transmitting */ 60 - } appldata_net_sum_data; 60 + } __attribute__((packed)) appldata_net_sum_data; 61 61 62 62 63 63 static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
+2 -2
arch/s390/appldata/appldata_os.c
··· 49 49 u32 per_cpu_softirq; /* ... spent in softirqs */ 50 50 u32 per_cpu_iowait; /* ... spent while waiting for I/O */ 51 51 // <-- New in 2.6 52 - }; 52 + } __attribute__((packed)); 53 53 54 54 struct appldata_os_data { 55 55 u64 timestamp; ··· 75 75 76 76 /* per cpu data */ 77 77 struct appldata_os_per_cpu os_cpu[0]; 78 - }; 78 + } __attribute__((packed)); 79 79 80 80 static struct appldata_os_data *appldata_os_data; 81 81