Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] s390: fix endless retry loop in tape driver

If a tape device is assigned to another host, the interrupt for the assign
operation comes back with deferred condition code 1. Under some conditions
this can lead to an endless loop of retries. Check if the current request is
still in IO in deferred condition code handling and prevent retries when the
request has already been cancelled.

Signed-off-by: Michael Holzheu <holzheu@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Michael Holzheu and committed by
Linus Torvalds
5f384338 4cd190a7

+35 -13
+1
drivers/s390/char/tape.h
··· 250 250 extern int tape_do_io(struct tape_device *, struct tape_request *); 251 251 extern int tape_do_io_async(struct tape_device *, struct tape_request *); 252 252 extern int tape_do_io_interruptible(struct tape_device *, struct tape_request *); 253 + extern int tape_cancel_io(struct tape_device *, struct tape_request *); 253 254 void tape_hotplug_event(struct tape_device *, int major, int action); 254 255 255 256 static inline int
+27 -5
drivers/s390/char/tape_core.c
··· 761 761 */ 762 762 if (request->status == TAPE_REQUEST_IN_IO) 763 763 return; 764 + /* 765 + * Request has already been stopped. We have to wait until 766 + * the request is removed from the queue in the interrupt 767 + * handling. 768 + */ 769 + if (request->status == TAPE_REQUEST_DONE) 770 + return; 764 771 765 772 /* 766 773 * We wanted to cancel the request but the common I/O layer ··· 1031 1024 } 1032 1025 1033 1026 /* 1027 + * Stop running ccw. 1028 + */ 1029 + int 1030 + tape_cancel_io(struct tape_device *device, struct tape_request *request) 1031 + { 1032 + int rc; 1033 + 1034 + spin_lock_irq(get_ccwdev_lock(device->cdev)); 1035 + rc = __tape_cancel_io(device, request); 1036 + spin_unlock_irq(get_ccwdev_lock(device->cdev)); 1037 + return rc; 1038 + } 1039 + 1040 + /* 1034 1041 * Tape interrupt routine, called from the ccw_device layer 1035 1042 */ 1036 1043 static void ··· 1089 1068 * error might still apply. So we just schedule the request to be 1090 1069 * started later. 1091 1070 */ 1092 - if (irb->scsw.cc != 0 && (irb->scsw.fctl & SCSW_FCTL_START_FUNC)) { 1093 - PRINT_WARN("(%s): deferred cc=%i. restaring\n", 1094 - cdev->dev.bus_id, 1095 - irb->scsw.cc); 1071 + if (irb->scsw.cc != 0 && (irb->scsw.fctl & SCSW_FCTL_START_FUNC) && 1072 + (request->status == TAPE_REQUEST_IN_IO)) { 1073 + DBF_EVENT(3,"(%08x): deferred cc=%i, fctl=%i. restarting\n", 1074 + device->cdev_id, irb->scsw.cc, irb->scsw.fctl); 1096 1075 request->status = TAPE_REQUEST_QUEUED; 1097 - schedule_work(&device->tape_dnr); 1076 + schedule_delayed_work(&device->tape_dnr, HZ); 1098 1077 return; 1099 1078 } 1100 1079 ··· 1308 1287 EXPORT_SYMBOL(tape_do_io); 1309 1288 EXPORT_SYMBOL(tape_do_io_async); 1310 1289 EXPORT_SYMBOL(tape_do_io_interruptible); 1290 + EXPORT_SYMBOL(tape_cancel_io); 1311 1291 EXPORT_SYMBOL(tape_mtop);
+7 -8
drivers/s390/char/tape_std.c
··· 37 37 { 38 38 struct tape_request * request; 39 39 struct tape_device * device; 40 + int rc; 40 41 41 42 request = (struct tape_request *) data; 42 43 if ((device = request->device) == NULL) 43 44 BUG(); 44 45 45 - spin_lock_irq(get_ccwdev_lock(device->cdev)); 46 - if (request->callback != NULL) { 47 - DBF_EVENT(3, "%08x: Assignment timeout. Device busy.\n", 46 + DBF_EVENT(3, "%08x: Assignment timeout. Device busy.\n", 48 47 device->cdev_id); 49 - PRINT_ERR("%s: Assignment timeout. Device busy.\n", 50 - device->cdev->dev.bus_id); 51 - ccw_device_clear(device->cdev, (long) request); 52 - } 53 - spin_unlock_irq(get_ccwdev_lock(device->cdev)); 48 + rc = tape_cancel_io(device, request); 49 + if(rc) 50 + PRINT_ERR("(%s): Assign timeout: Cancel failed with rc = %i\n", 51 + device->cdev->dev.bus_id, rc); 52 + 54 53 } 55 54 56 55 int