Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[SCSI] sg: fixes for large page_size

This sg driver patch addresses the problem with larger
page sizes reported by Brian King in this post:
http://marc.theaimsgroup.com/?l=linux-scsi&m=115867718623631&w=2
Some other related matters are also addressed. Some of these
prevent oopses when the SG_SCATTER_SZ or scatter_elem_sz are
set to inappropriate values.

The scatter_elem_sz has been tested up to 4 MB which should
make the largest data transfer with one SCSI command, 32 MB
less one block, achievable with a relatively small number
of elements in the scatter gather list.

ChangeLog:
- add scatter_elem_sz boot time parameter and sysfs module
parameter that is initialized to SG_SCATTER_SZ
- the driver will then adjust scatter_elem_sz to be the
max(given(scatter_elem_sz), PAGE_SIZE)
It will also round it up, if necessary, to be a power
of two
- clean up sg.h header, correct bad urls and some statements
that are no longer valid
- make the def_reserved_size sysfs module attribute writable

Signed-off-by: Douglas Gilbert <dougg@torque.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>

authored by

Douglas Gilbert and committed by
James Bottomley
6460e75a 8aee918a

+60 -52
+40 -13
drivers/scsi/sg.c
··· 60 60 61 61 #ifdef CONFIG_SCSI_PROC_FS 62 62 #include <linux/proc_fs.h> 63 - static char *sg_version_date = "20060818"; 63 + static char *sg_version_date = "20060920"; 64 64 65 65 static int sg_proc_init(void); 66 66 static void sg_proc_cleanup(void); ··· 93 93 the kernel (i.e. it is not a module).] */ 94 94 static int def_reserved_size = -1; /* picks up init parameter */ 95 95 static int sg_allow_dio = SG_ALLOW_DIO_DEF; 96 + 97 + static int scatter_elem_sz = SG_SCATTER_SZ; 98 + static int scatter_elem_sz_prev = SG_SCATTER_SZ; 96 99 97 100 #define SG_SECTOR_SZ 512 98 101 #define SG_SECTOR_MSK (SG_SECTOR_SZ - 1) ··· 1540 1537 msleep(10); /* dirty detach so delay device destruction */ 1541 1538 } 1542 1539 1543 - /* Set 'perm' (4th argument) to 0 to disable module_param's definition 1544 - * of sysfs parameters (which module_param doesn't yet support). 1545 - * Sysfs parameters defined explicitly below. 1546 - */ 1547 - module_param_named(def_reserved_size, def_reserved_size, int, S_IRUGO); 1540 + module_param_named(scatter_elem_sz, scatter_elem_sz, int, S_IRUGO | S_IWUSR); 1541 + module_param_named(def_reserved_size, def_reserved_size, int, 1542 + S_IRUGO | S_IWUSR); 1548 1543 module_param_named(allow_dio, sg_allow_dio, int, S_IRUGO | S_IWUSR); 1549 1544 1550 1545 MODULE_AUTHOR("Douglas Gilbert"); ··· 1551 1550 MODULE_VERSION(SG_VERSION_STR); 1552 1551 MODULE_ALIAS_CHARDEV_MAJOR(SCSI_GENERIC_MAJOR); 1553 1552 1553 + MODULE_PARM_DESC(scatter_elem_sz, "scatter gather element " 1554 + "size (default: max(SG_SCATTER_SZ, PAGE_SIZE))"); 1554 1555 MODULE_PARM_DESC(def_reserved_size, "size of buffer reserved for each fd"); 1555 1556 MODULE_PARM_DESC(allow_dio, "allow direct I/O (default: 0 (disallow))"); 1556 1557 ··· 1561 1558 { 1562 1559 int rc; 1563 1560 1561 + if (scatter_elem_sz < PAGE_SIZE) { 1562 + scatter_elem_sz = PAGE_SIZE; 1563 + scatter_elem_sz_prev = scatter_elem_sz; 1564 + } 1564 1565 if (def_reserved_size >= 0) 1565 1566 sg_big_buff = def_reserved_size; 1567 + else 1568 + def_reserved_size = sg_big_buff; 1566 1569 1567 1570 rc = register_chrdev_region(MKDEV(SCSI_GENERIC_MAJOR, 0), 1568 1571 SG_MAX_DEVS, "sg"); ··· 1851 1842 if (mx_sc_elems < 0) 1852 1843 return mx_sc_elems; /* most likely -ENOMEM */ 1853 1844 1845 + num = scatter_elem_sz; 1846 + if (unlikely(num != scatter_elem_sz_prev)) { 1847 + if (num < PAGE_SIZE) { 1848 + scatter_elem_sz = PAGE_SIZE; 1849 + scatter_elem_sz_prev = PAGE_SIZE; 1850 + } else 1851 + scatter_elem_sz_prev = num; 1852 + } 1854 1853 for (k = 0, sg = schp->buffer, rem_sz = blk_size; 1855 1854 (rem_sz > 0) && (k < mx_sc_elems); 1856 1855 ++k, rem_sz -= ret_sz, ++sg) { 1857 1856 1858 - num = (rem_sz > SG_SCATTER_SZ) ? SG_SCATTER_SZ : rem_sz; 1857 + num = (rem_sz > scatter_elem_sz_prev) ? 1858 + scatter_elem_sz_prev : rem_sz; 1859 1859 p = sg_page_malloc(num, sfp->low_dma, &ret_sz); 1860 1860 if (!p) 1861 1861 return -ENOMEM; 1862 1862 1863 + if (num == scatter_elem_sz_prev) { 1864 + if (unlikely(ret_sz > scatter_elem_sz_prev)) { 1865 + scatter_elem_sz = ret_sz; 1866 + scatter_elem_sz_prev = ret_sz; 1867 + } 1868 + } 1863 1869 sg->page = p; 1864 1870 sg->length = ret_sz; 1865 1871 ··· 2365 2341 } 2366 2342 write_unlock_irqrestore(&sg_dev_arr_lock, iflags); 2367 2343 SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: sfp=0x%p\n", sfp)); 2344 + if (unlikely(sg_big_buff != def_reserved_size)) 2345 + sg_big_buff = def_reserved_size; 2346 + 2368 2347 sg_build_reserve(sfp, sg_big_buff); 2369 2348 SCSI_LOG_TIMEOUT(3, printk("sg_add_sfp: bufflen=%d, k_use_sg=%d\n", 2370 2349 sfp->reserve.bufflen, sfp->reserve.k_use_sg)); ··· 2464 2437 return srp ? 1 : 0; 2465 2438 } 2466 2439 2467 - /* If retSzp==NULL want exact size or fail */ 2440 + /* The size fetched (value output via retSzp) set when non-NULL return */ 2468 2441 static struct page * 2469 2442 sg_page_malloc(int rqSz, int lowDma, int *retSzp) 2470 2443 { 2471 2444 struct page *resp = NULL; 2472 2445 gfp_t page_mask; 2473 2446 int order, a_size; 2474 - int resSz = rqSz; 2447 + int resSz; 2475 2448 2476 - if (rqSz <= 0) 2449 + if ((rqSz <= 0) || (NULL == retSzp)) 2477 2450 return resp; 2478 2451 2479 2452 if (lowDma) ··· 2483 2456 2484 2457 for (order = 0, a_size = PAGE_SIZE; a_size < rqSz; 2485 2458 order++, a_size <<= 1) ; 2459 + resSz = a_size; /* rounded up if necessary */ 2486 2460 resp = alloc_pages(page_mask, order); 2487 - while ((!resp) && order && retSzp) { 2461 + while ((!resp) && order) { 2488 2462 --order; 2489 2463 a_size >>= 1; /* divide by 2, until PAGE_SIZE */ 2490 2464 resp = alloc_pages(page_mask, order); /* try half */ ··· 2494 2466 if (resp) { 2495 2467 if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) 2496 2468 memset(page_address(resp), 0, resSz); 2497 - if (retSzp) 2498 - *retSzp = resSz; 2469 + *retSzp = resSz; 2499 2470 } 2500 2471 return resp; 2501 2472 }
+20 -39
include/scsi/sg.h
··· 11 11 Original driver (sg.h): 12 12 * Copyright (C) 1992 Lawrence Foard 13 13 Version 2 and 3 extensions to driver: 14 - * Copyright (C) 1998 - 2003 Douglas Gilbert 14 + * Copyright (C) 1998 - 2006 Douglas Gilbert 15 15 16 - Version: 3.5.29 (20030529) 17 - This version is for 2.5 series kernels. 18 - 19 - Changes since 3.5.28 (20030308) 20 - - fix bug introduced in version 3.1.24 (last segment of sgat list) 21 - Changes since 3.5.27 (20020812) 22 - - remove procfs entries: hosts, host_hdr + host_strs (now in sysfs) 23 - - add sysfs sg driver params: def_reserved_size, allow_dio, version 24 - - new boot option: "sg_allow_dio" and module parameter: "allow_dio" 25 - - multiple internal changes due to scsi subsystem rework 26 - Changes since 3.5.26 (20020708) 27 - - re-add direct IO using Kai Makisara's work 28 - - re-tab to 8, start using C99-isms 29 - - simplify memory management 30 - Changes since 3.5.25 (20020504) 31 - - driverfs additions 32 - - copy_to/from_user() fixes [William Stinson] 33 - - disable kiobufs support 16 + Version: 3.5.34 (20060920) 17 + This version is for 2.6 series kernels. 34 18 35 19 For a full changelog see http://www.torque.net/sg 36 20 ··· 24 40 2.1.40 2.2.20 25 41 3.0.x optional version 3 sg driver for 2.2 series 26 42 3.1.17++ 2.4.0++ 27 - 3.5.23++ 2.5.0++ 43 + 3.5.30++ 2.6.0++ 28 44 29 45 Major new features in SG 3.x driver (cf SG 2.x drivers) 30 46 - SG_IO ioctl() combines function if write() and read() ··· 35 51 data into kernel buffers and then use the CPU to copy the data into the 36 52 user space (vice versa for writes). That is called "indirect" IO due to 37 53 the double handling of data. There are two methods offered to remove the 38 - redundant copy: 1) direct IO which uses the kernel kiobuf mechanism and 39 - 2) using the mmap() system call to map the reserve buffer (this driver has 40 - one reserve buffer per fd) into the user space. Both have their advantages. 54 + redundant copy: 1) direct IO and 2) using the mmap() system call to map 55 + the reserve buffer (this driver has one reserve buffer per fd) into the 56 + user space. Both have their advantages. 41 57 In terms of absolute speed mmap() is faster. If speed is not a concern, 42 58 indirect IO should be fine. Read the documentation for more information. 43 59 44 - ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' may be 45 - needed. That pseudo file's content is defaulted to 0. ** 60 + ** N.B. To use direct IO 'echo 1 > /proc/scsi/sg/allow_dio' or 61 + 'echo 1 > /sys/module/sg/parameters/allow_dio' is needed. 62 + That attribute is 0 by default. ** 46 63 47 64 Historical note: this SCSI pass-through driver has been known as "sg" for 48 65 a decade. In broader kernel discussions "sg" is used to refer to scatter ··· 57 72 http://www.torque.net/sg/p/sg_v3_ho.html 58 73 This is a rendering from DocBook source [change the extension to "sgml" 59 74 or "xml"]. There are renderings in "ps", "pdf", "rtf" and "txt" (soon). 75 + The SG_IO ioctl is now found in other parts kernel (e.g. the block layer). 76 + For more information see http://www.torque.net/sg/sg_io.html 60 77 61 78 The older, version 2 documents discuss the original sg interface in detail: 62 79 http://www.torque.net/sg/p/scsi-generic.txt 63 80 http://www.torque.net/sg/p/scsi-generic_long.txt 64 - A version of this document (potentially out of date) may also be found in 65 - the kernel source tree, probably at: 66 - Documentation/scsi/scsi-generic.txt . 81 + Also available: <kernel_source>/Documentation/scsi/scsi-generic.txt 67 82 68 83 Utility and test programs are available at the sg web site. They are 69 - bundled as sg_utils (for the lk 2.2 series) and sg3_utils (for the 70 - lk 2.4 series). 71 - 72 - There is a HOWTO on the Linux SCSI subsystem in the lk 2.4 series at: 73 - http://www.linuxdoc.org/HOWTO/SCSI-2.4-HOWTO 84 + packaged as sg3_utils (for the lk 2.4 and 2.6 series) and sg_utils 85 + (for the lk 2.2 series). 74 86 */ 75 87 76 88 ··· 220 238 #define SG_GET_ACCESS_COUNT 0x2289 221 239 222 240 223 - #define SG_SCATTER_SZ (8 * 4096) /* PAGE_SIZE not available to user */ 241 + #define SG_SCATTER_SZ (8 * 4096) 224 242 /* Largest size (in bytes) a single scatter-gather list element can have. 225 - The value must be a power of 2 and <= (PAGE_SIZE * 32) [131072 bytes on 226 - i386]. The minimum value is PAGE_SIZE. If scatter-gather not supported 227 - by adapter then this value is the largest data block that can be 228 - read/written by a single scsi command. The user can find the value of 229 - PAGE_SIZE by calling getpagesize() defined in unistd.h . */ 243 + The value used by the driver is 'max(SG_SCATTER_SZ, PAGE_SIZE)'. 244 + This value should be a power of 2 (and may be rounded up internally). 245 + If scatter-gather is not supported by adapter then this value is the 246 + largest data block that can be read/written by a single scsi command. */ 230 247 231 248 #define SG_DEFAULT_RETRIES 0 232 249