[PATCH] s390: deadlock in appldata

The system might hang when using appldata_mem with high I/O traffic and a
large number of devices. The spinlocks bdev_lock and swaplock are acquired
via calls to si_meminfo() and si_swapinfo() from a tasklet, i.e. interrupt
context, which can lead to a deadlock. Replace tasklet with work queue.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by Gerald Schaefer and committed by Linus Torvalds f26d583e 595bf2aa

+45 -35
+41 -31
arch/s390/appldata/appldata_base.c
··· 28 //#include <linux/kernel_stat.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 32 #include "appldata.h" 33 ··· 134 static int appldata_timer_active; 135 136 /* 137 - * Tasklet 138 */ 139 - static struct tasklet_struct appldata_tasklet_struct; 140 141 /* 142 * Ops list ··· 148 static LIST_HEAD(appldata_ops_list); 149 150 151 - /************************* timer, tasklet, DIAG ******************************/ 152 /* 153 * appldata_timer_function() 154 * 155 - * schedule tasklet and reschedule timer 156 */ 157 static void appldata_timer_function(unsigned long data, struct pt_regs *regs) 158 { ··· 161 atomic_read(&appldata_expire_count)); 162 if (atomic_dec_and_test(&appldata_expire_count)) { 163 atomic_set(&appldata_expire_count, num_online_cpus()); 164 - tasklet_schedule((struct tasklet_struct *) data); 165 } 166 } 167 168 /* 169 - * appldata_tasklet_function() 170 * 171 * call data gathering function for each (active) module 172 */ 173 - static void appldata_tasklet_function(unsigned long data) 174 { 175 struct list_head *lh; 176 struct appldata_ops *ops; 177 int i; 178 179 - P_DEBUG(" -= Tasklet =-\n"); 180 i = 0; 181 spin_lock(&appldata_ops_lock); 182 list_for_each(lh, &appldata_ops_list) { ··· 235 : "=d" (ry) : "d" (&(appldata_parameter_list)) : "cc"); 236 return (int) ry; 237 } 238 - /********************** timer, tasklet, DIAG <END> ***************************/ 239 240 241 /****************************** /proc stuff **********************************/ ··· 415 struct list_head *lh; 416 417 found = 0; 418 - spin_lock_bh(&appldata_ops_lock); 419 list_for_each(lh, &appldata_ops_list) { 420 tmp_ops = list_entry(lh, struct appldata_ops, list); 421 if (&tmp_ops->ctl_table[2] == ctl) { ··· 423 } 424 } 425 if (!found) { 426 - spin_unlock_bh(&appldata_ops_lock); 427 return -ENODEV; 428 } 429 ops = ctl->data; 430 if (!try_module_get(ops->owner)) { // protect this function 431 - spin_unlock_bh(&appldata_ops_lock); 432 return -ENODEV; 433 } 434 - spin_unlock_bh(&appldata_ops_lock); 435 436 if (!*lenp || *ppos) { 437 *lenp = 0; ··· 455 return -EFAULT; 456 } 457 458 - spin_lock_bh(&appldata_ops_lock); 459 if ((buf[0] == '1') && (ops->active == 0)) { 460 - if (!try_module_get(ops->owner)) { // protect tasklet 461 - spin_unlock_bh(&appldata_ops_lock); 462 module_put(ops->owner); 463 return -ENODEV; 464 } ··· 490 } 491 module_put(ops->owner); 492 } 493 - spin_unlock_bh(&appldata_ops_lock); 494 out: 495 *lenp = len; 496 *ppos += len; ··· 534 } 535 memset(ops->ctl_table, 0, 4*sizeof(struct ctl_table)); 536 537 - spin_lock_bh(&appldata_ops_lock); 538 list_for_each(lh, &appldata_ops_list) { 539 tmp_ops = list_entry(lh, struct appldata_ops, list); 540 P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", ··· 546 APPLDATA_PROC_NAME_LENGTH) == 0) { 547 P_ERROR("Name \"%s\" already registered!\n", ops->name); 548 kfree(ops->ctl_table); 549 - spin_unlock_bh(&appldata_ops_lock); 550 return -EBUSY; 551 } 552 if (tmp_ops->ctl_nr == ops->ctl_nr) { 553 P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); 554 kfree(ops->ctl_table); 555 - spin_unlock_bh(&appldata_ops_lock); 556 return -EBUSY; 557 } 558 } 559 list_add(&ops->list, &appldata_ops_list); 560 - spin_unlock_bh(&appldata_ops_lock); 561 562 ops->ctl_table[0].ctl_name = CTL_APPLDATA; 563 ops->ctl_table[0].procname = appldata_proc_name; ··· 588 */ 589 void appldata_unregister_ops(struct appldata_ops *ops) 590 { 591 - spin_lock_bh(&appldata_ops_lock); 592 unregister_sysctl_table(ops->sysctl_header); 593 list_del(&ops->list); 594 kfree(ops->ctl_table); 595 ops->ctl_table = NULL; 596 - spin_unlock_bh(&appldata_ops_lock); 597 P_INFO("%s-ops unregistered!\n", ops->name); 598 } 599 /********************** module-ops management <END> **************************/ ··· 607 init_virt_timer(&per_cpu(appldata_timer, cpu)); 608 per_cpu(appldata_timer, cpu).function = appldata_timer_function; 609 per_cpu(appldata_timer, cpu).data = (unsigned long) 610 - &appldata_tasklet_struct; 611 atomic_inc(&appldata_expire_count); 612 spin_lock(&appldata_timer_lock); 613 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 620 del_virt_timer(&per_cpu(appldata_timer, cpu)); 621 if (atomic_dec_and_test(&appldata_expire_count)) { 622 atomic_set(&appldata_expire_count, num_online_cpus()); 623 - tasklet_schedule(&appldata_tasklet_struct); 624 } 625 spin_lock(&appldata_timer_lock); 626 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 653 /* 654 * appldata_init() 655 * 656 - * init timer and tasklet, register /proc entries 657 */ 658 static int __init appldata_init(void) 659 { ··· 661 662 P_DEBUG("sizeof(parameter_list) = %lu\n", 663 sizeof(struct appldata_parameter_list)); 664 665 for_each_online_cpu(i) 666 appldata_online_cpu(i); ··· 681 appldata_table[1].de->owner = THIS_MODULE; 682 #endif 683 684 - tasklet_init(&appldata_tasklet_struct, appldata_tasklet_function, 0); 685 P_DEBUG("Base interface initialized.\n"); 686 return 0; 687 } ··· 688 /* 689 * appldata_exit() 690 * 691 - * stop timer and tasklet, unregister /proc entries 692 */ 693 static void __exit appldata_exit(void) 694 { ··· 700 /* 701 * ops list should be empty, but just in case something went wrong... 702 */ 703 - spin_lock_bh(&appldata_ops_lock); 704 list_for_each(lh, &appldata_ops_list) { 705 ops = list_entry(lh, struct appldata_ops, list); 706 rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, ··· 710 "return code: %d\n", ops->name, rc); 711 } 712 } 713 - spin_unlock_bh(&appldata_ops_lock); 714 715 for_each_online_cpu(i) 716 appldata_offline_cpu(i); ··· 719 720 unregister_sysctl_table(appldata_sysctl_header); 721 722 - tasklet_kill(&appldata_tasklet_struct); 723 P_DEBUG("... module unloaded!\n"); 724 } 725 /**************************** init / exit <END> ******************************/
··· 28 //#include <linux/kernel_stat.h> 29 #include <linux/notifier.h> 30 #include <linux/cpu.h> 31 + #include <linux/workqueue.h> 32 33 #include "appldata.h" 34 ··· 133 static int appldata_timer_active; 134 135 /* 136 + * Work queue 137 */ 138 + static struct workqueue_struct *appldata_wq; 139 + static void appldata_work_fn(void *data); 140 + static DECLARE_WORK(appldata_work, appldata_work_fn, NULL); 141 + 142 143 /* 144 * Ops list ··· 144 static LIST_HEAD(appldata_ops_list); 145 146 147 + /*************************** timer, work, DIAG *******************************/ 148 /* 149 * appldata_timer_function() 150 * 151 + * schedule work and reschedule timer 152 */ 153 static void appldata_timer_function(unsigned long data, struct pt_regs *regs) 154 { ··· 157 atomic_read(&appldata_expire_count)); 158 if (atomic_dec_and_test(&appldata_expire_count)) { 159 atomic_set(&appldata_expire_count, num_online_cpus()); 160 + queue_work(appldata_wq, (struct work_struct *) data); 161 } 162 } 163 164 /* 165 + * appldata_work_fn() 166 * 167 * call data gathering function for each (active) module 168 */ 169 + static void appldata_work_fn(void *data) 170 { 171 struct list_head *lh; 172 struct appldata_ops *ops; 173 int i; 174 175 + P_DEBUG(" -= Work Queue =-\n"); 176 i = 0; 177 spin_lock(&appldata_ops_lock); 178 list_for_each(lh, &appldata_ops_list) { ··· 231 : "=d" (ry) : "d" (&(appldata_parameter_list)) : "cc"); 232 return (int) ry; 233 } 234 + /************************ timer, work, DIAG <END> ****************************/ 235 236 237 /****************************** /proc stuff **********************************/ ··· 411 struct list_head *lh; 412 413 found = 0; 414 + spin_lock(&appldata_ops_lock); 415 list_for_each(lh, &appldata_ops_list) { 416 tmp_ops = list_entry(lh, struct appldata_ops, list); 417 if (&tmp_ops->ctl_table[2] == ctl) { ··· 419 } 420 } 421 if (!found) { 422 + spin_unlock(&appldata_ops_lock); 423 return -ENODEV; 424 } 425 ops = ctl->data; 426 if (!try_module_get(ops->owner)) { // protect this function 427 + spin_unlock(&appldata_ops_lock); 428 return -ENODEV; 429 } 430 + spin_unlock(&appldata_ops_lock); 431 432 if (!*lenp || *ppos) { 433 *lenp = 0; ··· 451 return -EFAULT; 452 } 453 454 + spin_lock(&appldata_ops_lock); 455 if ((buf[0] == '1') && (ops->active == 0)) { 456 + // protect work queue callback 457 + if (!try_module_get(ops->owner)) { 458 + spin_unlock(&appldata_ops_lock); 459 module_put(ops->owner); 460 return -ENODEV; 461 } ··· 485 } 486 module_put(ops->owner); 487 } 488 + spin_unlock(&appldata_ops_lock); 489 out: 490 *lenp = len; 491 *ppos += len; ··· 529 } 530 memset(ops->ctl_table, 0, 4*sizeof(struct ctl_table)); 531 532 + spin_lock(&appldata_ops_lock); 533 list_for_each(lh, &appldata_ops_list) { 534 tmp_ops = list_entry(lh, struct appldata_ops, list); 535 P_DEBUG("register_ops loop: %i) name = %s, ctl = %i\n", ··· 541 APPLDATA_PROC_NAME_LENGTH) == 0) { 542 P_ERROR("Name \"%s\" already registered!\n", ops->name); 543 kfree(ops->ctl_table); 544 + spin_unlock(&appldata_ops_lock); 545 return -EBUSY; 546 } 547 if (tmp_ops->ctl_nr == ops->ctl_nr) { 548 P_ERROR("ctl_nr %i already registered!\n", ops->ctl_nr); 549 kfree(ops->ctl_table); 550 + spin_unlock(&appldata_ops_lock); 551 return -EBUSY; 552 } 553 } 554 list_add(&ops->list, &appldata_ops_list); 555 + spin_unlock(&appldata_ops_lock); 556 557 ops->ctl_table[0].ctl_name = CTL_APPLDATA; 558 ops->ctl_table[0].procname = appldata_proc_name; ··· 583 */ 584 void appldata_unregister_ops(struct appldata_ops *ops) 585 { 586 + spin_lock(&appldata_ops_lock); 587 unregister_sysctl_table(ops->sysctl_header); 588 list_del(&ops->list); 589 kfree(ops->ctl_table); 590 ops->ctl_table = NULL; 591 + spin_unlock(&appldata_ops_lock); 592 P_INFO("%s-ops unregistered!\n", ops->name); 593 } 594 /********************** module-ops management <END> **************************/ ··· 602 init_virt_timer(&per_cpu(appldata_timer, cpu)); 603 per_cpu(appldata_timer, cpu).function = appldata_timer_function; 604 per_cpu(appldata_timer, cpu).data = (unsigned long) 605 + &appldata_work; 606 atomic_inc(&appldata_expire_count); 607 spin_lock(&appldata_timer_lock); 608 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 615 del_virt_timer(&per_cpu(appldata_timer, cpu)); 616 if (atomic_dec_and_test(&appldata_expire_count)) { 617 atomic_set(&appldata_expire_count, num_online_cpus()); 618 + queue_work(appldata_wq, &appldata_work); 619 } 620 spin_lock(&appldata_timer_lock); 621 __appldata_vtimer_setup(APPLDATA_MOD_TIMER); ··· 648 /* 649 * appldata_init() 650 * 651 + * init timer, register /proc entries 652 */ 653 static int __init appldata_init(void) 654 { ··· 656 657 P_DEBUG("sizeof(parameter_list) = %lu\n", 658 sizeof(struct appldata_parameter_list)); 659 + 660 + appldata_wq = create_singlethread_workqueue("appldata"); 661 + if (!appldata_wq) { 662 + P_ERROR("Could not create work queue\n"); 663 + return -ENOMEM; 664 + } 665 666 for_each_online_cpu(i) 667 appldata_online_cpu(i); ··· 670 appldata_table[1].de->owner = THIS_MODULE; 671 #endif 672 673 P_DEBUG("Base interface initialized.\n"); 674 return 0; 675 } ··· 678 /* 679 * appldata_exit() 680 * 681 + * stop timer, unregister /proc entries 682 */ 683 static void __exit appldata_exit(void) 684 { ··· 690 /* 691 * ops list should be empty, but just in case something went wrong... 692 */ 693 + spin_lock(&appldata_ops_lock); 694 list_for_each(lh, &appldata_ops_list) { 695 ops = list_entry(lh, struct appldata_ops, list); 696 rc = appldata_diag(ops->record_nr, APPLDATA_STOP_REC, ··· 700 "return code: %d\n", ops->name, rc); 701 } 702 } 703 + spin_unlock(&appldata_ops_lock); 704 705 for_each_online_cpu(i) 706 appldata_offline_cpu(i); ··· 709 710 unregister_sysctl_table(appldata_sysctl_header); 711 712 + destroy_workqueue(appldata_wq); 713 P_DEBUG("... module unloaded!\n"); 714 } 715 /**************************** init / exit <END> ******************************/
+1 -1
arch/s390/appldata/appldata_mem.c
··· 68 u64 pgmajfault; /* page faults (major only) */ 69 // <-- New in 2.6 70 71 - } appldata_mem_data; 72 73 74 static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
··· 68 u64 pgmajfault; /* page faults (major only) */ 69 // <-- New in 2.6 70 71 + } __attribute__((packed)) appldata_mem_data; 72 73 74 static inline void appldata_debug_print(struct appldata_mem_data *mem_data)
+1 -1
arch/s390/appldata/appldata_net_sum.c
··· 57 u64 rx_dropped; /* no space in linux buffers */ 58 u64 tx_dropped; /* no space available in linux */ 59 u64 collisions; /* collisions while transmitting */ 60 - } appldata_net_sum_data; 61 62 63 static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
··· 57 u64 rx_dropped; /* no space in linux buffers */ 58 u64 tx_dropped; /* no space available in linux */ 59 u64 collisions; /* collisions while transmitting */ 60 + } __attribute__((packed)) appldata_net_sum_data; 61 62 63 static inline void appldata_print_debug(struct appldata_net_sum_data *net_data)
+2 -2
arch/s390/appldata/appldata_os.c
··· 49 u32 per_cpu_softirq; /* ... spent in softirqs */ 50 u32 per_cpu_iowait; /* ... spent while waiting for I/O */ 51 // <-- New in 2.6 52 - }; 53 54 struct appldata_os_data { 55 u64 timestamp; ··· 75 76 /* per cpu data */ 77 struct appldata_os_per_cpu os_cpu[0]; 78 - }; 79 80 static struct appldata_os_data *appldata_os_data; 81
··· 49 u32 per_cpu_softirq; /* ... spent in softirqs */ 50 u32 per_cpu_iowait; /* ... spent while waiting for I/O */ 51 // <-- New in 2.6 52 + } __attribute__((packed)); 53 54 struct appldata_os_data { 55 u64 timestamp; ··· 75 76 /* per cpu data */ 77 struct appldata_os_per_cpu os_cpu[0]; 78 + } __attribute__((packed)); 79 80 static struct appldata_os_data *appldata_os_data; 81