commit e030dbf91a87da7e8be3be3ca781558695bea683

+6

Documentation/networking/ip-sysctl.txt

··· 433 433 not receive a window scaling option from them. 434 434 Default: 0 435 435 436 + tcp_dma_copybreak - INTEGER 437 + Lower limit, in bytes, of the size of socket reads that will be 438 + offloaded to a DMA copy engine, if one is present in the system 439 + and CONFIG_NET_DMA is enabled. 440 + Default: 4096 441 + 436 442 CIPSOv4 Variables: 437 443 438 444 cipso_cache_enable - BOOLEAN

+2

arch/arm/Kconfig

··· 1042 1042 1043 1043 source "drivers/rtc/Kconfig" 1044 1044 1045 + source "drivers/dma/Kconfig" 1046 + 1045 1047 endmenu 1046 1048 1047 1049 source "fs/Kconfig"

+214 -3

arch/arm/mach-iop13xx/setup.c

··· 25 25 #include <asm/hardware.h> 26 26 #include <asm/irq.h> 27 27 #include <asm/io.h> 28 + #include <asm/hardware/iop_adma.h> 28 29 29 30 #define IOP13XX_UART_XTAL 33334000 30 31 #define IOP13XX_SETUP_DEBUG 0 ··· 237 236 } 238 237 #endif 239 238 239 + /* ADMA Channels */ 240 + static struct resource iop13xx_adma_0_resources[] = { 241 + [0] = { 242 + .start = IOP13XX_ADMA_PHYS_BASE(0), 243 + .end = IOP13XX_ADMA_UPPER_PA(0), 244 + .flags = IORESOURCE_MEM, 245 + }, 246 + [1] = { 247 + .start = IRQ_IOP13XX_ADMA0_EOT, 248 + .end = IRQ_IOP13XX_ADMA0_EOT, 249 + .flags = IORESOURCE_IRQ 250 + }, 251 + [2] = { 252 + .start = IRQ_IOP13XX_ADMA0_EOC, 253 + .end = IRQ_IOP13XX_ADMA0_EOC, 254 + .flags = IORESOURCE_IRQ 255 + }, 256 + [3] = { 257 + .start = IRQ_IOP13XX_ADMA0_ERR, 258 + .end = IRQ_IOP13XX_ADMA0_ERR, 259 + .flags = IORESOURCE_IRQ 260 + } 261 + }; 262 + 263 + static struct resource iop13xx_adma_1_resources[] = { 264 + [0] = { 265 + .start = IOP13XX_ADMA_PHYS_BASE(1), 266 + .end = IOP13XX_ADMA_UPPER_PA(1), 267 + .flags = IORESOURCE_MEM, 268 + }, 269 + [1] = { 270 + .start = IRQ_IOP13XX_ADMA1_EOT, 271 + .end = IRQ_IOP13XX_ADMA1_EOT, 272 + .flags = IORESOURCE_IRQ 273 + }, 274 + [2] = { 275 + .start = IRQ_IOP13XX_ADMA1_EOC, 276 + .end = IRQ_IOP13XX_ADMA1_EOC, 277 + .flags = IORESOURCE_IRQ 278 + }, 279 + [3] = { 280 + .start = IRQ_IOP13XX_ADMA1_ERR, 281 + .end = IRQ_IOP13XX_ADMA1_ERR, 282 + .flags = IORESOURCE_IRQ 283 + } 284 + }; 285 + 286 + static struct resource iop13xx_adma_2_resources[] = { 287 + [0] = { 288 + .start = IOP13XX_ADMA_PHYS_BASE(2), 289 + .end = IOP13XX_ADMA_UPPER_PA(2), 290 + .flags = IORESOURCE_MEM, 291 + }, 292 + [1] = { 293 + .start = IRQ_IOP13XX_ADMA2_EOT, 294 + .end = IRQ_IOP13XX_ADMA2_EOT, 295 + .flags = IORESOURCE_IRQ 296 + }, 297 + [2] = { 298 + .start = IRQ_IOP13XX_ADMA2_EOC, 299 + .end = IRQ_IOP13XX_ADMA2_EOC, 300 + .flags = IORESOURCE_IRQ 301 + }, 302 + [3] = { 303 + .start = IRQ_IOP13XX_ADMA2_ERR, 304 + .end = IRQ_IOP13XX_ADMA2_ERR, 305 + .flags = IORESOURCE_IRQ 306 + } 307 + }; 308 + 309 + static u64 iop13xx_adma_dmamask = DMA_64BIT_MASK; 310 + static struct iop_adma_platform_data iop13xx_adma_0_data = { 311 + .hw_id = 0, 312 + .pool_size = PAGE_SIZE, 313 + }; 314 + 315 + static struct iop_adma_platform_data iop13xx_adma_1_data = { 316 + .hw_id = 1, 317 + .pool_size = PAGE_SIZE, 318 + }; 319 + 320 + static struct iop_adma_platform_data iop13xx_adma_2_data = { 321 + .hw_id = 2, 322 + .pool_size = PAGE_SIZE, 323 + }; 324 + 325 + /* The ids are fixed up later in iop13xx_platform_init */ 326 + static struct platform_device iop13xx_adma_0_channel = { 327 + .name = "iop-adma", 328 + .id = 0, 329 + .num_resources = 4, 330 + .resource = iop13xx_adma_0_resources, 331 + .dev = { 332 + .dma_mask = &iop13xx_adma_dmamask, 333 + .coherent_dma_mask = DMA_64BIT_MASK, 334 + .platform_data = (void *) &iop13xx_adma_0_data, 335 + }, 336 + }; 337 + 338 + static struct platform_device iop13xx_adma_1_channel = { 339 + .name = "iop-adma", 340 + .id = 0, 341 + .num_resources = 4, 342 + .resource = iop13xx_adma_1_resources, 343 + .dev = { 344 + .dma_mask = &iop13xx_adma_dmamask, 345 + .coherent_dma_mask = DMA_64BIT_MASK, 346 + .platform_data = (void *) &iop13xx_adma_1_data, 347 + }, 348 + }; 349 + 350 + static struct platform_device iop13xx_adma_2_channel = { 351 + .name = "iop-adma", 352 + .id = 0, 353 + .num_resources = 4, 354 + .resource = iop13xx_adma_2_resources, 355 + .dev = { 356 + .dma_mask = &iop13xx_adma_dmamask, 357 + .coherent_dma_mask = DMA_64BIT_MASK, 358 + .platform_data = (void *) &iop13xx_adma_2_data, 359 + }, 360 + }; 361 + 240 362 void __init iop13xx_map_io(void) 241 363 { 242 364 /* Initialize the Static Page Table maps */ 243 365 iotable_init(iop13xx_std_desc, ARRAY_SIZE(iop13xx_std_desc)); 244 366 } 245 367 246 - static int init_uart = 0; 247 - static int init_i2c = 0; 368 + static int init_uart; 369 + static int init_i2c; 370 + static int init_adma; 248 371 249 372 void __init iop13xx_platform_init(void) 250 373 { 251 374 int i; 252 - u32 uart_idx, i2c_idx, plat_idx; 375 + u32 uart_idx, i2c_idx, adma_idx, plat_idx; 253 376 struct platform_device *iop13xx_devices[IQ81340_MAX_PLAT_DEVICES]; 254 377 255 378 /* set the bases so we can read the device id */ ··· 419 294 } 420 295 } 421 296 297 + if (init_adma == IOP13XX_INIT_ADMA_DEFAULT) { 298 + init_adma |= IOP13XX_INIT_ADMA_0; 299 + init_adma |= IOP13XX_INIT_ADMA_1; 300 + init_adma |= IOP13XX_INIT_ADMA_2; 301 + } 302 + 422 303 plat_idx = 0; 423 304 uart_idx = 0; 424 305 i2c_idx = 0; ··· 459 328 iop13xx_i2c_2_controller.id = i2c_idx++; 460 329 iop13xx_devices[plat_idx++] = 461 330 &iop13xx_i2c_2_controller; 331 + break; 332 + } 333 + } 334 + 335 + /* initialize adma channel ids and capabilities */ 336 + adma_idx = 0; 337 + for (i = 0; i < IQ81340_NUM_ADMA; i++) { 338 + struct iop_adma_platform_data *plat_data; 339 + if ((init_adma & (1 << i)) && IOP13XX_SETUP_DEBUG) 340 + printk(KERN_INFO 341 + "Adding adma%d to platform device list\n", i); 342 + switch (init_adma & (1 << i)) { 343 + case IOP13XX_INIT_ADMA_0: 344 + iop13xx_adma_0_channel.id = adma_idx++; 345 + iop13xx_devices[plat_idx++] = &iop13xx_adma_0_channel; 346 + plat_data = &iop13xx_adma_0_data; 347 + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 348 + dma_cap_set(DMA_XOR, plat_data->cap_mask); 349 + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 350 + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 351 + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 352 + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 353 + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 354 + break; 355 + case IOP13XX_INIT_ADMA_1: 356 + iop13xx_adma_1_channel.id = adma_idx++; 357 + iop13xx_devices[plat_idx++] = &iop13xx_adma_1_channel; 358 + plat_data = &iop13xx_adma_1_data; 359 + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 360 + dma_cap_set(DMA_XOR, plat_data->cap_mask); 361 + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 362 + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 363 + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 364 + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 365 + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 366 + break; 367 + case IOP13XX_INIT_ADMA_2: 368 + iop13xx_adma_2_channel.id = adma_idx++; 369 + iop13xx_devices[plat_idx++] = &iop13xx_adma_2_channel; 370 + plat_data = &iop13xx_adma_2_data; 371 + dma_cap_set(DMA_MEMCPY, plat_data->cap_mask); 372 + dma_cap_set(DMA_XOR, plat_data->cap_mask); 373 + dma_cap_set(DMA_DUAL_XOR, plat_data->cap_mask); 374 + dma_cap_set(DMA_ZERO_SUM, plat_data->cap_mask); 375 + dma_cap_set(DMA_MEMSET, plat_data->cap_mask); 376 + dma_cap_set(DMA_MEMCPY_CRC32C, plat_data->cap_mask); 377 + dma_cap_set(DMA_INTERRUPT, plat_data->cap_mask); 378 + dma_cap_set(DMA_PQ_XOR, plat_data->cap_mask); 379 + dma_cap_set(DMA_PQ_UPDATE, plat_data->cap_mask); 380 + dma_cap_set(DMA_PQ_ZERO_SUM, plat_data->cap_mask); 462 381 break; 463 382 } 464 383 } ··· 580 399 return 1; 581 400 } 582 401 402 + static int __init iop13xx_init_adma_setup(char *str) 403 + { 404 + if (str) { 405 + while (*str != '\0') { 406 + switch (*str) { 407 + case '0': 408 + init_adma |= IOP13XX_INIT_ADMA_0; 409 + break; 410 + case '1': 411 + init_adma |= IOP13XX_INIT_ADMA_1; 412 + break; 413 + case '2': 414 + init_adma |= IOP13XX_INIT_ADMA_2; 415 + break; 416 + case ',': 417 + case '=': 418 + break; 419 + default: 420 + PRINTK("\"iop13xx_init_adma\" malformed" 421 + " at character: \'%c\'", *str); 422 + *(str + 1) = '\0'; 423 + init_adma = IOP13XX_INIT_ADMA_DEFAULT; 424 + } 425 + str++; 426 + } 427 + } 428 + return 1; 429 + } 430 + 431 + __setup("iop13xx_init_adma", iop13xx_init_adma_setup); 583 432 __setup("iop13xx_init_uart", iop13xx_init_uart_setup); 584 433 __setup("iop13xx_init_i2c", iop13xx_init_i2c_setup);

+2

arch/arm/mach-iop32x/glantank.c

··· 180 180 platform_device_register(&iop3xx_i2c1_device); 181 181 platform_device_register(&glantank_flash_device); 182 182 platform_device_register(&glantank_serial_device); 183 + platform_device_register(&iop3xx_dma_0_channel); 184 + platform_device_register(&iop3xx_dma_1_channel); 183 185 184 186 pm_power_off = glantank_power_off; 185 187 }

+5

arch/arm/mach-iop32x/iq31244.c

··· 298 298 platform_device_register(&iop3xx_i2c1_device); 299 299 platform_device_register(&iq31244_flash_device); 300 300 platform_device_register(&iq31244_serial_device); 301 + platform_device_register(&iop3xx_dma_0_channel); 302 + platform_device_register(&iop3xx_dma_1_channel); 301 303 302 304 if (is_ep80219()) 303 305 pm_power_off = ep80219_power_off; 306 + 307 + if (!is_80219()) 308 + platform_device_register(&iop3xx_aau_channel); 304 309 } 305 310 306 311 static int __init force_ep80219_setup(char *str)

+3

arch/arm/mach-iop32x/iq80321.c

··· 181 181 platform_device_register(&iop3xx_i2c1_device); 182 182 platform_device_register(&iq80321_flash_device); 183 183 platform_device_register(&iq80321_serial_device); 184 + platform_device_register(&iop3xx_dma_0_channel); 185 + platform_device_register(&iop3xx_dma_1_channel); 186 + platform_device_register(&iop3xx_aau_channel); 184 187 } 185 188 186 189 MACHINE_START(IQ80321, "Intel IQ80321")

+2

arch/arm/mach-iop32x/n2100.c

··· 245 245 platform_device_register(&iop3xx_i2c0_device); 246 246 platform_device_register(&n2100_flash_device); 247 247 platform_device_register(&n2100_serial_device); 248 + platform_device_register(&iop3xx_dma_0_channel); 249 + platform_device_register(&iop3xx_dma_1_channel); 248 250 249 251 pm_power_off = n2100_power_off; 250 252

+3

arch/arm/mach-iop33x/iq80331.c

··· 136 136 platform_device_register(&iop33x_uart0_device); 137 137 platform_device_register(&iop33x_uart1_device); 138 138 platform_device_register(&iq80331_flash_device); 139 + platform_device_register(&iop3xx_dma_0_channel); 140 + platform_device_register(&iop3xx_dma_1_channel); 141 + platform_device_register(&iop3xx_aau_channel); 139 142 } 140 143 141 144 MACHINE_START(IQ80331, "Intel IQ80331")

+3

arch/arm/mach-iop33x/iq80332.c

··· 136 136 platform_device_register(&iop33x_uart0_device); 137 137 platform_device_register(&iop33x_uart1_device); 138 138 platform_device_register(&iq80332_flash_device); 139 + platform_device_register(&iop3xx_dma_0_channel); 140 + platform_device_register(&iop3xx_dma_1_channel); 141 + platform_device_register(&iop3xx_aau_channel); 139 142 } 140 143 141 144 MACHINE_START(IQ80332, "Intel IQ80332")

+2

arch/arm/plat-iop/Makefile

··· 12 12 obj-$(CONFIG_ARCH_IOP32X) += time.o 13 13 obj-$(CONFIG_ARCH_IOP32X) += io.o 14 14 obj-$(CONFIG_ARCH_IOP32X) += cp6.o 15 + obj-$(CONFIG_ARCH_IOP32X) += adma.o 15 16 16 17 # IOP33X 17 18 obj-$(CONFIG_ARCH_IOP33X) += gpio.o ··· 22 21 obj-$(CONFIG_ARCH_IOP33X) += time.o 23 22 obj-$(CONFIG_ARCH_IOP33X) += io.o 24 23 obj-$(CONFIG_ARCH_IOP33X) += cp6.o 24 + obj-$(CONFIG_ARCH_IOP33X) += adma.o 25 25 26 26 # IOP13XX 27 27 obj-$(CONFIG_ARCH_IOP13XX) += cp6.o

+209

arch/arm/plat-iop/adma.c

··· 1 + /* 2 + * platform device definitions for the iop3xx dma/xor engines 3 + * Copyright © 2006, Intel Corporation. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms and conditions of the GNU General Public License, 7 + * version 2, as published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope it will be useful, but WITHOUT 10 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 + * more details. 13 + * 14 + * You should have received a copy of the GNU General Public License along with 15 + * this program; if not, write to the Free Software Foundation, Inc., 16 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 17 + * 18 + */ 19 + #include <linux/platform_device.h> 20 + #include <asm/hardware/iop3xx.h> 21 + #include <linux/dma-mapping.h> 22 + #include <asm/arch/adma.h> 23 + #include <asm/hardware/iop_adma.h> 24 + 25 + #ifdef CONFIG_ARCH_IOP32X 26 + #define IRQ_DMA0_EOT IRQ_IOP32X_DMA0_EOT 27 + #define IRQ_DMA0_EOC IRQ_IOP32X_DMA0_EOC 28 + #define IRQ_DMA0_ERR IRQ_IOP32X_DMA0_ERR 29 + 30 + #define IRQ_DMA1_EOT IRQ_IOP32X_DMA1_EOT 31 + #define IRQ_DMA1_EOC IRQ_IOP32X_DMA1_EOC 32 + #define IRQ_DMA1_ERR IRQ_IOP32X_DMA1_ERR 33 + 34 + #define IRQ_AA_EOT IRQ_IOP32X_AA_EOT 35 + #define IRQ_AA_EOC IRQ_IOP32X_AA_EOC 36 + #define IRQ_AA_ERR IRQ_IOP32X_AA_ERR 37 + #endif 38 + #ifdef CONFIG_ARCH_IOP33X 39 + #define IRQ_DMA0_EOT IRQ_IOP33X_DMA0_EOT 40 + #define IRQ_DMA0_EOC IRQ_IOP33X_DMA0_EOC 41 + #define IRQ_DMA0_ERR IRQ_IOP33X_DMA0_ERR 42 + 43 + #define IRQ_DMA1_EOT IRQ_IOP33X_DMA1_EOT 44 + #define IRQ_DMA1_EOC IRQ_IOP33X_DMA1_EOC 45 + #define IRQ_DMA1_ERR IRQ_IOP33X_DMA1_ERR 46 + 47 + #define IRQ_AA_EOT IRQ_IOP33X_AA_EOT 48 + #define IRQ_AA_EOC IRQ_IOP33X_AA_EOC 49 + #define IRQ_AA_ERR IRQ_IOP33X_AA_ERR 50 + #endif 51 + /* AAU and DMA Channels */ 52 + static struct resource iop3xx_dma_0_resources[] = { 53 + [0] = { 54 + .start = IOP3XX_DMA_PHYS_BASE(0), 55 + .end = IOP3XX_DMA_UPPER_PA(0), 56 + .flags = IORESOURCE_MEM, 57 + }, 58 + [1] = { 59 + .start = IRQ_DMA0_EOT, 60 + .end = IRQ_DMA0_EOT, 61 + .flags = IORESOURCE_IRQ 62 + }, 63 + [2] = { 64 + .start = IRQ_DMA0_EOC, 65 + .end = IRQ_DMA0_EOC, 66 + .flags = IORESOURCE_IRQ 67 + }, 68 + [3] = { 69 + .start = IRQ_DMA0_ERR, 70 + .end = IRQ_DMA0_ERR, 71 + .flags = IORESOURCE_IRQ 72 + } 73 + }; 74 + 75 + static struct resource iop3xx_dma_1_resources[] = { 76 + [0] = { 77 + .start = IOP3XX_DMA_PHYS_BASE(1), 78 + .end = IOP3XX_DMA_UPPER_PA(1), 79 + .flags = IORESOURCE_MEM, 80 + }, 81 + [1] = { 82 + .start = IRQ_DMA1_EOT, 83 + .end = IRQ_DMA1_EOT, 84 + .flags = IORESOURCE_IRQ 85 + }, 86 + [2] = { 87 + .start = IRQ_DMA1_EOC, 88 + .end = IRQ_DMA1_EOC, 89 + .flags = IORESOURCE_IRQ 90 + }, 91 + [3] = { 92 + .start = IRQ_DMA1_ERR, 93 + .end = IRQ_DMA1_ERR, 94 + .flags = IORESOURCE_IRQ 95 + } 96 + }; 97 + 98 + 99 + static struct resource iop3xx_aau_resources[] = { 100 + [0] = { 101 + .start = IOP3XX_AAU_PHYS_BASE, 102 + .end = IOP3XX_AAU_UPPER_PA, 103 + .flags = IORESOURCE_MEM, 104 + }, 105 + [1] = { 106 + .start = IRQ_AA_EOT, 107 + .end = IRQ_AA_EOT, 108 + .flags = IORESOURCE_IRQ 109 + }, 110 + [2] = { 111 + .start = IRQ_AA_EOC, 112 + .end = IRQ_AA_EOC, 113 + .flags = IORESOURCE_IRQ 114 + }, 115 + [3] = { 116 + .start = IRQ_AA_ERR, 117 + .end = IRQ_AA_ERR, 118 + .flags = IORESOURCE_IRQ 119 + } 120 + }; 121 + 122 + static u64 iop3xx_adma_dmamask = DMA_32BIT_MASK; 123 + 124 + static struct iop_adma_platform_data iop3xx_dma_0_data = { 125 + .hw_id = DMA0_ID, 126 + .pool_size = PAGE_SIZE, 127 + }; 128 + 129 + static struct iop_adma_platform_data iop3xx_dma_1_data = { 130 + .hw_id = DMA1_ID, 131 + .pool_size = PAGE_SIZE, 132 + }; 133 + 134 + static struct iop_adma_platform_data iop3xx_aau_data = { 135 + .hw_id = AAU_ID, 136 + .pool_size = 3 * PAGE_SIZE, 137 + }; 138 + 139 + struct platform_device iop3xx_dma_0_channel = { 140 + .name = "iop-adma", 141 + .id = 0, 142 + .num_resources = 4, 143 + .resource = iop3xx_dma_0_resources, 144 + .dev = { 145 + .dma_mask = &iop3xx_adma_dmamask, 146 + .coherent_dma_mask = DMA_64BIT_MASK, 147 + .platform_data = (void *) &iop3xx_dma_0_data, 148 + }, 149 + }; 150 + 151 + struct platform_device iop3xx_dma_1_channel = { 152 + .name = "iop-adma", 153 + .id = 1, 154 + .num_resources = 4, 155 + .resource = iop3xx_dma_1_resources, 156 + .dev = { 157 + .dma_mask = &iop3xx_adma_dmamask, 158 + .coherent_dma_mask = DMA_64BIT_MASK, 159 + .platform_data = (void *) &iop3xx_dma_1_data, 160 + }, 161 + }; 162 + 163 + struct platform_device iop3xx_aau_channel = { 164 + .name = "iop-adma", 165 + .id = 2, 166 + .num_resources = 4, 167 + .resource = iop3xx_aau_resources, 168 + .dev = { 169 + .dma_mask = &iop3xx_adma_dmamask, 170 + .coherent_dma_mask = DMA_64BIT_MASK, 171 + .platform_data = (void *) &iop3xx_aau_data, 172 + }, 173 + }; 174 + 175 + static int __init iop3xx_adma_cap_init(void) 176 + { 177 + #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ 178 + dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); 179 + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 180 + #else 181 + dma_cap_set(DMA_MEMCPY, iop3xx_dma_0_data.cap_mask); 182 + dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_0_data.cap_mask); 183 + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_0_data.cap_mask); 184 + #endif 185 + 186 + #ifdef CONFIG_ARCH_IOP32X /* the 32x DMA does not perform CRC32C */ 187 + dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); 188 + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 189 + #else 190 + dma_cap_set(DMA_MEMCPY, iop3xx_dma_1_data.cap_mask); 191 + dma_cap_set(DMA_MEMCPY_CRC32C, iop3xx_dma_1_data.cap_mask); 192 + dma_cap_set(DMA_INTERRUPT, iop3xx_dma_1_data.cap_mask); 193 + #endif 194 + 195 + #ifdef CONFIG_ARCH_IOP32X /* the 32x AAU does not perform zero sum */ 196 + dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); 197 + dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); 198 + dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 199 + #else 200 + dma_cap_set(DMA_XOR, iop3xx_aau_data.cap_mask); 201 + dma_cap_set(DMA_ZERO_SUM, iop3xx_aau_data.cap_mask); 202 + dma_cap_set(DMA_MEMSET, iop3xx_aau_data.cap_mask); 203 + dma_cap_set(DMA_INTERRUPT, iop3xx_aau_data.cap_mask); 204 + #endif 205 + 206 + return 0; 207 + } 208 + 209 + arch_initcall(iop3xx_adma_cap_init);

+11 -1

crypto/Kconfig

··· 1 1 # 2 + # Generic algorithms support 3 + # 4 + config XOR_BLOCKS 5 + tristate 6 + 7 + # 8 + # async_tx api: hardware offloaded memory transfer/transform support 9 + # 10 + source "crypto/async_tx/Kconfig" 11 + 12 + # 2 13 # Cryptographic API Configuration 3 14 # 4 - 5 15 menu "Cryptographic options" 6 16 7 17 config CRYPTO

+6

crypto/Makefile

··· 50 50 obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o 51 51 52 52 obj-$(CONFIG_CRYPTO_TEST) += tcrypt.o 53 + 54 + # 55 + # generic algorithms and the async_tx api 56 + # 57 + obj-$(CONFIG_XOR_BLOCKS) += xor.o 58 + obj-$(CONFIG_ASYNC_CORE) += async_tx/

+16

crypto/async_tx/Kconfig

··· 1 + config ASYNC_CORE 2 + tristate 3 + 4 + config ASYNC_MEMCPY 5 + tristate 6 + select ASYNC_CORE 7 + 8 + config ASYNC_XOR 9 + tristate 10 + select ASYNC_CORE 11 + select XOR_BLOCKS 12 + 13 + config ASYNC_MEMSET 14 + tristate 15 + select ASYNC_CORE 16 +

+4

crypto/async_tx/Makefile

··· 1 + obj-$(CONFIG_ASYNC_CORE) += async_tx.o 2 + obj-$(CONFIG_ASYNC_MEMCPY) += async_memcpy.o 3 + obj-$(CONFIG_ASYNC_MEMSET) += async_memset.o 4 + obj-$(CONFIG_ASYNC_XOR) += async_xor.o

+131

crypto/async_tx/async_memcpy.c

··· 1 + /* 2 + * copy offload engine support 3 + * 4 + * Copyright © 2006, Intel Corporation. 5 + * 6 + * Dan Williams <dan.j.williams@intel.com> 7 + * 8 + * with architecture considerations by: 9 + * Neil Brown <neilb@suse.de> 10 + * Jeff Garzik <jeff@garzik.org> 11 + * 12 + * This program is free software; you can redistribute it and/or modify it 13 + * under the terms and conditions of the GNU General Public License, 14 + * version 2, as published by the Free Software Foundation. 15 + * 16 + * This program is distributed in the hope it will be useful, but WITHOUT 17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 + * more details. 20 + * 21 + * You should have received a copy of the GNU General Public License along with 22 + * this program; if not, write to the Free Software Foundation, Inc., 23 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 + * 25 + */ 26 + #include <linux/kernel.h> 27 + #include <linux/highmem.h> 28 + #include <linux/mm.h> 29 + #include <linux/dma-mapping.h> 30 + #include <linux/async_tx.h> 31 + 32 + /** 33 + * async_memcpy - attempt to copy memory with a dma engine. 34 + * @dest: destination page 35 + * @src: src page 36 + * @offset: offset in pages to start transaction 37 + * @len: length in bytes 38 + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK, 39 + * ASYNC_TX_KMAP_SRC, ASYNC_TX_KMAP_DST 40 + * @depend_tx: memcpy depends on the result of this transaction 41 + * @cb_fn: function to call when the memcpy completes 42 + * @cb_param: parameter to pass to the callback routine 43 + */ 44 + struct dma_async_tx_descriptor * 45 + async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 46 + unsigned int src_offset, size_t len, enum async_tx_flags flags, 47 + struct dma_async_tx_descriptor *depend_tx, 48 + dma_async_tx_callback cb_fn, void *cb_param) 49 + { 50 + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMCPY); 51 + struct dma_device *device = chan ? chan->device : NULL; 52 + int int_en = cb_fn ? 1 : 0; 53 + struct dma_async_tx_descriptor *tx = device ? 54 + device->device_prep_dma_memcpy(chan, len, 55 + int_en) : NULL; 56 + 57 + if (tx) { /* run the memcpy asynchronously */ 58 + dma_addr_t addr; 59 + enum dma_data_direction dir; 60 + 61 + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); 62 + 63 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 64 + DMA_NONE : DMA_FROM_DEVICE; 65 + 66 + addr = dma_map_page(device->dev, dest, dest_offset, len, dir); 67 + tx->tx_set_dest(addr, tx, 0); 68 + 69 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 70 + DMA_NONE : DMA_TO_DEVICE; 71 + 72 + addr = dma_map_page(device->dev, src, src_offset, len, dir); 73 + tx->tx_set_src(addr, tx, 0); 74 + 75 + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 76 + } else { /* run the memcpy synchronously */ 77 + void *dest_buf, *src_buf; 78 + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); 79 + 80 + /* wait for any prerequisite operations */ 81 + if (depend_tx) { 82 + /* if ack is already set then we cannot be sure 83 + * we are referring to the correct operation 84 + */ 85 + BUG_ON(depend_tx->ack); 86 + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 87 + panic("%s: DMA_ERROR waiting for depend_tx\n", 88 + __FUNCTION__); 89 + } 90 + 91 + if (flags & ASYNC_TX_KMAP_DST) 92 + dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; 93 + else 94 + dest_buf = page_address(dest) + dest_offset; 95 + 96 + if (flags & ASYNC_TX_KMAP_SRC) 97 + src_buf = kmap_atomic(src, KM_USER0) + src_offset; 98 + else 99 + src_buf = page_address(src) + src_offset; 100 + 101 + memcpy(dest_buf, src_buf, len); 102 + 103 + if (flags & ASYNC_TX_KMAP_DST) 104 + kunmap_atomic(dest_buf, KM_USER0); 105 + 106 + if (flags & ASYNC_TX_KMAP_SRC) 107 + kunmap_atomic(src_buf, KM_USER0); 108 + 109 + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 110 + } 111 + 112 + return tx; 113 + } 114 + EXPORT_SYMBOL_GPL(async_memcpy); 115 + 116 + static int __init async_memcpy_init(void) 117 + { 118 + return 0; 119 + } 120 + 121 + static void __exit async_memcpy_exit(void) 122 + { 123 + do { } while (0); 124 + } 125 + 126 + module_init(async_memcpy_init); 127 + module_exit(async_memcpy_exit); 128 + 129 + MODULE_AUTHOR("Intel Corporation"); 130 + MODULE_DESCRIPTION("asynchronous memcpy api"); 131 + MODULE_LICENSE("GPL");

+109

crypto/async_tx/async_memset.c

··· 1 + /* 2 + * memory fill offload engine support 3 + * 4 + * Copyright © 2006, Intel Corporation. 5 + * 6 + * Dan Williams <dan.j.williams@intel.com> 7 + * 8 + * with architecture considerations by: 9 + * Neil Brown <neilb@suse.de> 10 + * Jeff Garzik <jeff@garzik.org> 11 + * 12 + * This program is free software; you can redistribute it and/or modify it 13 + * under the terms and conditions of the GNU General Public License, 14 + * version 2, as published by the Free Software Foundation. 15 + * 16 + * This program is distributed in the hope it will be useful, but WITHOUT 17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 + * more details. 20 + * 21 + * You should have received a copy of the GNU General Public License along with 22 + * this program; if not, write to the Free Software Foundation, Inc., 23 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 + * 25 + */ 26 + #include <linux/kernel.h> 27 + #include <linux/interrupt.h> 28 + #include <linux/mm.h> 29 + #include <linux/dma-mapping.h> 30 + #include <linux/async_tx.h> 31 + 32 + /** 33 + * async_memset - attempt to fill memory with a dma engine. 34 + * @dest: destination page 35 + * @val: fill value 36 + * @offset: offset in pages to start transaction 37 + * @len: length in bytes 38 + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 39 + * @depend_tx: memset depends on the result of this transaction 40 + * @cb_fn: function to call when the memcpy completes 41 + * @cb_param: parameter to pass to the callback routine 42 + */ 43 + struct dma_async_tx_descriptor * 44 + async_memset(struct page *dest, int val, unsigned int offset, 45 + size_t len, enum async_tx_flags flags, 46 + struct dma_async_tx_descriptor *depend_tx, 47 + dma_async_tx_callback cb_fn, void *cb_param) 48 + { 49 + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_MEMSET); 50 + struct dma_device *device = chan ? chan->device : NULL; 51 + int int_en = cb_fn ? 1 : 0; 52 + struct dma_async_tx_descriptor *tx = device ? 53 + device->device_prep_dma_memset(chan, val, len, 54 + int_en) : NULL; 55 + 56 + if (tx) { /* run the memset asynchronously */ 57 + dma_addr_t dma_addr; 58 + enum dma_data_direction dir; 59 + 60 + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); 61 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 62 + DMA_NONE : DMA_FROM_DEVICE; 63 + 64 + dma_addr = dma_map_page(device->dev, dest, offset, len, dir); 65 + tx->tx_set_dest(dma_addr, tx, 0); 66 + 67 + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 68 + } else { /* run the memset synchronously */ 69 + void *dest_buf; 70 + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); 71 + 72 + dest_buf = (void *) (((char *) page_address(dest)) + offset); 73 + 74 + /* wait for any prerequisite operations */ 75 + if (depend_tx) { 76 + /* if ack is already set then we cannot be sure 77 + * we are referring to the correct operation 78 + */ 79 + BUG_ON(depend_tx->ack); 80 + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 81 + panic("%s: DMA_ERROR waiting for depend_tx\n", 82 + __FUNCTION__); 83 + } 84 + 85 + memset(dest_buf, val, len); 86 + 87 + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 88 + } 89 + 90 + return tx; 91 + } 92 + EXPORT_SYMBOL_GPL(async_memset); 93 + 94 + static int __init async_memset_init(void) 95 + { 96 + return 0; 97 + } 98 + 99 + static void __exit async_memset_exit(void) 100 + { 101 + do { } while (0); 102 + } 103 + 104 + module_init(async_memset_init); 105 + module_exit(async_memset_exit); 106 + 107 + MODULE_AUTHOR("Intel Corporation"); 108 + MODULE_DESCRIPTION("asynchronous memset api"); 109 + MODULE_LICENSE("GPL");

+497

crypto/async_tx/async_tx.c

··· 1 + /* 2 + * core routines for the asynchronous memory transfer/transform api 3 + * 4 + * Copyright © 2006, Intel Corporation. 5 + * 6 + * Dan Williams <dan.j.williams@intel.com> 7 + * 8 + * with architecture considerations by: 9 + * Neil Brown <neilb@suse.de> 10 + * Jeff Garzik <jeff@garzik.org> 11 + * 12 + * This program is free software; you can redistribute it and/or modify it 13 + * under the terms and conditions of the GNU General Public License, 14 + * version 2, as published by the Free Software Foundation. 15 + * 16 + * This program is distributed in the hope it will be useful, but WITHOUT 17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 + * more details. 20 + * 21 + * You should have received a copy of the GNU General Public License along with 22 + * this program; if not, write to the Free Software Foundation, Inc., 23 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 + * 25 + */ 26 + #include <linux/kernel.h> 27 + #include <linux/async_tx.h> 28 + 29 + #ifdef CONFIG_DMA_ENGINE 30 + static enum dma_state_client 31 + dma_channel_add_remove(struct dma_client *client, 32 + struct dma_chan *chan, enum dma_state state); 33 + 34 + static struct dma_client async_tx_dma = { 35 + .event_callback = dma_channel_add_remove, 36 + /* .cap_mask == 0 defaults to all channels */ 37 + }; 38 + 39 + /** 40 + * dma_cap_mask_all - enable iteration over all operation types 41 + */ 42 + static dma_cap_mask_t dma_cap_mask_all; 43 + 44 + /** 45 + * chan_ref_percpu - tracks channel allocations per core/opertion 46 + */ 47 + struct chan_ref_percpu { 48 + struct dma_chan_ref *ref; 49 + }; 50 + 51 + static int channel_table_initialized; 52 + static struct chan_ref_percpu *channel_table[DMA_TX_TYPE_END]; 53 + 54 + /** 55 + * async_tx_lock - protect modification of async_tx_master_list and serialize 56 + * rebalance operations 57 + */ 58 + static spinlock_t async_tx_lock; 59 + 60 + static struct list_head 61 + async_tx_master_list = LIST_HEAD_INIT(async_tx_master_list); 62 + 63 + /* async_tx_issue_pending_all - start all transactions on all channels */ 64 + void async_tx_issue_pending_all(void) 65 + { 66 + struct dma_chan_ref *ref; 67 + 68 + rcu_read_lock(); 69 + list_for_each_entry_rcu(ref, &async_tx_master_list, node) 70 + ref->chan->device->device_issue_pending(ref->chan); 71 + rcu_read_unlock(); 72 + } 73 + EXPORT_SYMBOL_GPL(async_tx_issue_pending_all); 74 + 75 + /* dma_wait_for_async_tx - spin wait for a transcation to complete 76 + * @tx: transaction to wait on 77 + */ 78 + enum dma_status 79 + dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 80 + { 81 + enum dma_status status; 82 + struct dma_async_tx_descriptor *iter; 83 + 84 + if (!tx) 85 + return DMA_SUCCESS; 86 + 87 + /* poll through the dependency chain, return when tx is complete */ 88 + do { 89 + iter = tx; 90 + while (iter->cookie == -EBUSY) 91 + iter = iter->parent; 92 + 93 + status = dma_sync_wait(iter->chan, iter->cookie); 94 + } while (status == DMA_IN_PROGRESS || (iter != tx)); 95 + 96 + return status; 97 + } 98 + EXPORT_SYMBOL_GPL(dma_wait_for_async_tx); 99 + 100 + /* async_tx_run_dependencies - helper routine for dma drivers to process 101 + * (start) dependent operations on their target channel 102 + * @tx: transaction with dependencies 103 + */ 104 + void 105 + async_tx_run_dependencies(struct dma_async_tx_descriptor *tx) 106 + { 107 + struct dma_async_tx_descriptor *dep_tx, *_dep_tx; 108 + struct dma_device *dev; 109 + struct dma_chan *chan; 110 + 111 + list_for_each_entry_safe(dep_tx, _dep_tx, &tx->depend_list, 112 + depend_node) { 113 + chan = dep_tx->chan; 114 + dev = chan->device; 115 + /* we can't depend on ourselves */ 116 + BUG_ON(chan == tx->chan); 117 + list_del(&dep_tx->depend_node); 118 + tx->tx_submit(dep_tx); 119 + 120 + /* we need to poke the engine as client code does not 121 + * know about dependency submission events 122 + */ 123 + dev->device_issue_pending(chan); 124 + } 125 + } 126 + EXPORT_SYMBOL_GPL(async_tx_run_dependencies); 127 + 128 + static void 129 + free_dma_chan_ref(struct rcu_head *rcu) 130 + { 131 + struct dma_chan_ref *ref; 132 + ref = container_of(rcu, struct dma_chan_ref, rcu); 133 + kfree(ref); 134 + } 135 + 136 + static void 137 + init_dma_chan_ref(struct dma_chan_ref *ref, struct dma_chan *chan) 138 + { 139 + INIT_LIST_HEAD(&ref->node); 140 + INIT_RCU_HEAD(&ref->rcu); 141 + ref->chan = chan; 142 + atomic_set(&ref->count, 0); 143 + } 144 + 145 + /** 146 + * get_chan_ref_by_cap - returns the nth channel of the given capability 147 + * defaults to returning the channel with the desired capability and the 148 + * lowest reference count if the index can not be satisfied 149 + * @cap: capability to match 150 + * @index: nth channel desired, passing -1 has the effect of forcing the 151 + * default return value 152 + */ 153 + static struct dma_chan_ref * 154 + get_chan_ref_by_cap(enum dma_transaction_type cap, int index) 155 + { 156 + struct dma_chan_ref *ret_ref = NULL, *min_ref = NULL, *ref; 157 + 158 + rcu_read_lock(); 159 + list_for_each_entry_rcu(ref, &async_tx_master_list, node) 160 + if (dma_has_cap(cap, ref->chan->device->cap_mask)) { 161 + if (!min_ref) 162 + min_ref = ref; 163 + else if (atomic_read(&ref->count) < 164 + atomic_read(&min_ref->count)) 165 + min_ref = ref; 166 + 167 + if (index-- == 0) { 168 + ret_ref = ref; 169 + break; 170 + } 171 + } 172 + rcu_read_unlock(); 173 + 174 + if (!ret_ref) 175 + ret_ref = min_ref; 176 + 177 + if (ret_ref) 178 + atomic_inc(&ret_ref->count); 179 + 180 + return ret_ref; 181 + } 182 + 183 + /** 184 + * async_tx_rebalance - redistribute the available channels, optimize 185 + * for cpu isolation in the SMP case, and opertaion isolation in the 186 + * uniprocessor case 187 + */ 188 + static void async_tx_rebalance(void) 189 + { 190 + int cpu, cap, cpu_idx = 0; 191 + unsigned long flags; 192 + 193 + if (!channel_table_initialized) 194 + return; 195 + 196 + spin_lock_irqsave(&async_tx_lock, flags); 197 + 198 + /* undo the last distribution */ 199 + for_each_dma_cap_mask(cap, dma_cap_mask_all) 200 + for_each_possible_cpu(cpu) { 201 + struct dma_chan_ref *ref = 202 + per_cpu_ptr(channel_table[cap], cpu)->ref; 203 + if (ref) { 204 + atomic_set(&ref->count, 0); 205 + per_cpu_ptr(channel_table[cap], cpu)->ref = 206 + NULL; 207 + } 208 + } 209 + 210 + for_each_dma_cap_mask(cap, dma_cap_mask_all) 211 + for_each_online_cpu(cpu) { 212 + struct dma_chan_ref *new; 213 + if (NR_CPUS > 1) 214 + new = get_chan_ref_by_cap(cap, cpu_idx++); 215 + else 216 + new = get_chan_ref_by_cap(cap, -1); 217 + 218 + per_cpu_ptr(channel_table[cap], cpu)->ref = new; 219 + } 220 + 221 + spin_unlock_irqrestore(&async_tx_lock, flags); 222 + } 223 + 224 + static enum dma_state_client 225 + dma_channel_add_remove(struct dma_client *client, 226 + struct dma_chan *chan, enum dma_state state) 227 + { 228 + unsigned long found, flags; 229 + struct dma_chan_ref *master_ref, *ref; 230 + enum dma_state_client ack = DMA_DUP; /* default: take no action */ 231 + 232 + switch (state) { 233 + case DMA_RESOURCE_AVAILABLE: 234 + found = 0; 235 + rcu_read_lock(); 236 + list_for_each_entry_rcu(ref, &async_tx_master_list, node) 237 + if (ref->chan == chan) { 238 + found = 1; 239 + break; 240 + } 241 + rcu_read_unlock(); 242 + 243 + pr_debug("async_tx: dma resource available [%s]\n", 244 + found ? "old" : "new"); 245 + 246 + if (!found) 247 + ack = DMA_ACK; 248 + else 249 + break; 250 + 251 + /* add the channel to the generic management list */ 252 + master_ref = kmalloc(sizeof(*master_ref), GFP_KERNEL); 253 + if (master_ref) { 254 + /* keep a reference until async_tx is unloaded */ 255 + dma_chan_get(chan); 256 + init_dma_chan_ref(master_ref, chan); 257 + spin_lock_irqsave(&async_tx_lock, flags); 258 + list_add_tail_rcu(&master_ref->node, 259 + &async_tx_master_list); 260 + spin_unlock_irqrestore(&async_tx_lock, 261 + flags); 262 + } else { 263 + printk(KERN_WARNING "async_tx: unable to create" 264 + " new master entry in response to" 265 + " a DMA_RESOURCE_ADDED event" 266 + " (-ENOMEM)\n"); 267 + return 0; 268 + } 269 + 270 + async_tx_rebalance(); 271 + break; 272 + case DMA_RESOURCE_REMOVED: 273 + found = 0; 274 + spin_lock_irqsave(&async_tx_lock, flags); 275 + list_for_each_entry_rcu(ref, &async_tx_master_list, node) 276 + if (ref->chan == chan) { 277 + /* permit backing devices to go away */ 278 + dma_chan_put(ref->chan); 279 + list_del_rcu(&ref->node); 280 + call_rcu(&ref->rcu, free_dma_chan_ref); 281 + found = 1; 282 + break; 283 + } 284 + spin_unlock_irqrestore(&async_tx_lock, flags); 285 + 286 + pr_debug("async_tx: dma resource removed [%s]\n", 287 + found ? "ours" : "not ours"); 288 + 289 + if (found) 290 + ack = DMA_ACK; 291 + else 292 + break; 293 + 294 + async_tx_rebalance(); 295 + break; 296 + case DMA_RESOURCE_SUSPEND: 297 + case DMA_RESOURCE_RESUME: 298 + printk(KERN_WARNING "async_tx: does not support dma channel" 299 + " suspend/resume\n"); 300 + break; 301 + default: 302 + BUG(); 303 + } 304 + 305 + return ack; 306 + } 307 + 308 + static int __init 309 + async_tx_init(void) 310 + { 311 + enum dma_transaction_type cap; 312 + 313 + spin_lock_init(&async_tx_lock); 314 + bitmap_fill(dma_cap_mask_all.bits, DMA_TX_TYPE_END); 315 + 316 + /* an interrupt will never be an explicit operation type. 317 + * clearing this bit prevents allocation to a slot in 'channel_table' 318 + */ 319 + clear_bit(DMA_INTERRUPT, dma_cap_mask_all.bits); 320 + 321 + for_each_dma_cap_mask(cap, dma_cap_mask_all) { 322 + channel_table[cap] = alloc_percpu(struct chan_ref_percpu); 323 + if (!channel_table[cap]) 324 + goto err; 325 + } 326 + 327 + channel_table_initialized = 1; 328 + dma_async_client_register(&async_tx_dma); 329 + dma_async_client_chan_request(&async_tx_dma); 330 + 331 + printk(KERN_INFO "async_tx: api initialized (async)\n"); 332 + 333 + return 0; 334 + err: 335 + printk(KERN_ERR "async_tx: initialization failure\n"); 336 + 337 + while (--cap >= 0) 338 + free_percpu(channel_table[cap]); 339 + 340 + return 1; 341 + } 342 + 343 + static void __exit async_tx_exit(void) 344 + { 345 + enum dma_transaction_type cap; 346 + 347 + channel_table_initialized = 0; 348 + 349 + for_each_dma_cap_mask(cap, dma_cap_mask_all) 350 + if (channel_table[cap]) 351 + free_percpu(channel_table[cap]); 352 + 353 + dma_async_client_unregister(&async_tx_dma); 354 + } 355 + 356 + /** 357 + * async_tx_find_channel - find a channel to carry out the operation or let 358 + * the transaction execute synchronously 359 + * @depend_tx: transaction dependency 360 + * @tx_type: transaction type 361 + */ 362 + struct dma_chan * 363 + async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 364 + enum dma_transaction_type tx_type) 365 + { 366 + /* see if we can keep the chain on one channel */ 367 + if (depend_tx && 368 + dma_has_cap(tx_type, depend_tx->chan->device->cap_mask)) 369 + return depend_tx->chan; 370 + else if (likely(channel_table_initialized)) { 371 + struct dma_chan_ref *ref; 372 + int cpu = get_cpu(); 373 + ref = per_cpu_ptr(channel_table[tx_type], cpu)->ref; 374 + put_cpu(); 375 + return ref ? ref->chan : NULL; 376 + } else 377 + return NULL; 378 + } 379 + EXPORT_SYMBOL_GPL(async_tx_find_channel); 380 + #else 381 + static int __init async_tx_init(void) 382 + { 383 + printk(KERN_INFO "async_tx: api initialized (sync-only)\n"); 384 + return 0; 385 + } 386 + 387 + static void __exit async_tx_exit(void) 388 + { 389 + do { } while (0); 390 + } 391 + #endif 392 + 393 + void 394 + async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 395 + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 396 + dma_async_tx_callback cb_fn, void *cb_param) 397 + { 398 + tx->callback = cb_fn; 399 + tx->callback_param = cb_param; 400 + 401 + /* set this new tx to run after depend_tx if: 402 + * 1/ a dependency exists (depend_tx is !NULL) 403 + * 2/ the tx can not be submitted to the current channel 404 + */ 405 + if (depend_tx && depend_tx->chan != chan) { 406 + /* if ack is already set then we cannot be sure 407 + * we are referring to the correct operation 408 + */ 409 + BUG_ON(depend_tx->ack); 410 + 411 + tx->parent = depend_tx; 412 + spin_lock_bh(&depend_tx->lock); 413 + list_add_tail(&tx->depend_node, &depend_tx->depend_list); 414 + if (depend_tx->cookie == 0) { 415 + struct dma_chan *dep_chan = depend_tx->chan; 416 + struct dma_device *dep_dev = dep_chan->device; 417 + dep_dev->device_dependency_added(dep_chan); 418 + } 419 + spin_unlock_bh(&depend_tx->lock); 420 + 421 + /* schedule an interrupt to trigger the channel switch */ 422 + async_trigger_callback(ASYNC_TX_ACK, depend_tx, NULL, NULL); 423 + } else { 424 + tx->parent = NULL; 425 + tx->tx_submit(tx); 426 + } 427 + 428 + if (flags & ASYNC_TX_ACK) 429 + async_tx_ack(tx); 430 + 431 + if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 432 + async_tx_ack(depend_tx); 433 + } 434 + EXPORT_SYMBOL_GPL(async_tx_submit); 435 + 436 + /** 437 + * async_trigger_callback - schedules the callback function to be run after 438 + * any dependent operations have been completed. 439 + * @flags: ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 440 + * @depend_tx: 'callback' requires the completion of this transaction 441 + * @cb_fn: function to call after depend_tx completes 442 + * @cb_param: parameter to pass to the callback routine 443 + */ 444 + struct dma_async_tx_descriptor * 445 + async_trigger_callback(enum async_tx_flags flags, 446 + struct dma_async_tx_descriptor *depend_tx, 447 + dma_async_tx_callback cb_fn, void *cb_param) 448 + { 449 + struct dma_chan *chan; 450 + struct dma_device *device; 451 + struct dma_async_tx_descriptor *tx; 452 + 453 + if (depend_tx) { 454 + chan = depend_tx->chan; 455 + device = chan->device; 456 + 457 + /* see if we can schedule an interrupt 458 + * otherwise poll for completion 459 + */ 460 + if (device && !dma_has_cap(DMA_INTERRUPT, device->cap_mask)) 461 + device = NULL; 462 + 463 + tx = device ? device->device_prep_dma_interrupt(chan) : NULL; 464 + } else 465 + tx = NULL; 466 + 467 + if (tx) { 468 + pr_debug("%s: (async)\n", __FUNCTION__); 469 + 470 + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 471 + } else { 472 + pr_debug("%s: (sync)\n", __FUNCTION__); 473 + 474 + /* wait for any prerequisite operations */ 475 + if (depend_tx) { 476 + /* if ack is already set then we cannot be sure 477 + * we are referring to the correct operation 478 + */ 479 + BUG_ON(depend_tx->ack); 480 + if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) 481 + panic("%s: DMA_ERROR waiting for depend_tx\n", 482 + __FUNCTION__); 483 + } 484 + 485 + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 486 + } 487 + 488 + return tx; 489 + } 490 + EXPORT_SYMBOL_GPL(async_trigger_callback); 491 + 492 + module_init(async_tx_init); 493 + module_exit(async_tx_exit); 494 + 495 + MODULE_AUTHOR("Intel Corporation"); 496 + MODULE_DESCRIPTION("Asynchronous Bulk Memory Transactions API"); 497 + MODULE_LICENSE("GPL");

+327

crypto/async_tx/async_xor.c

··· 1 + /* 2 + * xor offload engine api 3 + * 4 + * Copyright © 2006, Intel Corporation. 5 + * 6 + * Dan Williams <dan.j.williams@intel.com> 7 + * 8 + * with architecture considerations by: 9 + * Neil Brown <neilb@suse.de> 10 + * Jeff Garzik <jeff@garzik.org> 11 + * 12 + * This program is free software; you can redistribute it and/or modify it 13 + * under the terms and conditions of the GNU General Public License, 14 + * version 2, as published by the Free Software Foundation. 15 + * 16 + * This program is distributed in the hope it will be useful, but WITHOUT 17 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 18 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 19 + * more details. 20 + * 21 + * You should have received a copy of the GNU General Public License along with 22 + * this program; if not, write to the Free Software Foundation, Inc., 23 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 24 + * 25 + */ 26 + #include <linux/kernel.h> 27 + #include <linux/interrupt.h> 28 + #include <linux/mm.h> 29 + #include <linux/dma-mapping.h> 30 + #include <linux/raid/xor.h> 31 + #include <linux/async_tx.h> 32 + 33 + static void 34 + do_async_xor(struct dma_async_tx_descriptor *tx, struct dma_device *device, 35 + struct dma_chan *chan, struct page *dest, struct page **src_list, 36 + unsigned int offset, unsigned int src_cnt, size_t len, 37 + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 38 + dma_async_tx_callback cb_fn, void *cb_param) 39 + { 40 + dma_addr_t dma_addr; 41 + enum dma_data_direction dir; 42 + int i; 43 + 44 + pr_debug("%s: len: %zu\n", __FUNCTION__, len); 45 + 46 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 47 + DMA_NONE : DMA_FROM_DEVICE; 48 + 49 + dma_addr = dma_map_page(device->dev, dest, offset, len, dir); 50 + tx->tx_set_dest(dma_addr, tx, 0); 51 + 52 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 53 + DMA_NONE : DMA_TO_DEVICE; 54 + 55 + for (i = 0; i < src_cnt; i++) { 56 + dma_addr = dma_map_page(device->dev, src_list[i], 57 + offset, len, dir); 58 + tx->tx_set_src(dma_addr, tx, i); 59 + } 60 + 61 + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 62 + } 63 + 64 + static void 65 + do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, 66 + unsigned int src_cnt, size_t len, enum async_tx_flags flags, 67 + struct dma_async_tx_descriptor *depend_tx, 68 + dma_async_tx_callback cb_fn, void *cb_param) 69 + { 70 + void *_dest; 71 + int i; 72 + 73 + pr_debug("%s: len: %zu\n", __FUNCTION__, len); 74 + 75 + /* reuse the 'src_list' array to convert to buffer pointers */ 76 + for (i = 0; i < src_cnt; i++) 77 + src_list[i] = (struct page *) 78 + (page_address(src_list[i]) + offset); 79 + 80 + /* set destination address */ 81 + _dest = page_address(dest) + offset; 82 + 83 + if (flags & ASYNC_TX_XOR_ZERO_DST) 84 + memset(_dest, 0, len); 85 + 86 + xor_blocks(src_cnt, len, _dest, 87 + (void **) src_list); 88 + 89 + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 90 + } 91 + 92 + /** 93 + * async_xor - attempt to xor a set of blocks with a dma engine. 94 + * xor_blocks always uses the dest as a source so the ASYNC_TX_XOR_ZERO_DST 95 + * flag must be set to not include dest data in the calculation. The 96 + * assumption with dma eninges is that they only use the destination 97 + * buffer as a source when it is explicity specified in the source list. 98 + * @dest: destination page 99 + * @src_list: array of source pages (if the dest is also a source it must be 100 + * at index zero). The contents of this array may be overwritten. 101 + * @offset: offset in pages to start transaction 102 + * @src_cnt: number of source pages 103 + * @len: length in bytes 104 + * @flags: ASYNC_TX_XOR_ZERO_DST, ASYNC_TX_XOR_DROP_DEST, 105 + * ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 106 + * @depend_tx: xor depends on the result of this transaction. 107 + * @cb_fn: function to call when the xor completes 108 + * @cb_param: parameter to pass to the callback routine 109 + */ 110 + struct dma_async_tx_descriptor * 111 + async_xor(struct page *dest, struct page **src_list, unsigned int offset, 112 + int src_cnt, size_t len, enum async_tx_flags flags, 113 + struct dma_async_tx_descriptor *depend_tx, 114 + dma_async_tx_callback cb_fn, void *cb_param) 115 + { 116 + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR); 117 + struct dma_device *device = chan ? chan->device : NULL; 118 + struct dma_async_tx_descriptor *tx = NULL; 119 + dma_async_tx_callback _cb_fn; 120 + void *_cb_param; 121 + unsigned long local_flags; 122 + int xor_src_cnt; 123 + int i = 0, src_off = 0, int_en; 124 + 125 + BUG_ON(src_cnt <= 1); 126 + 127 + while (src_cnt) { 128 + local_flags = flags; 129 + if (device) { /* run the xor asynchronously */ 130 + xor_src_cnt = min(src_cnt, device->max_xor); 131 + /* if we are submitting additional xors 132 + * only set the callback on the last transaction 133 + */ 134 + if (src_cnt > xor_src_cnt) { 135 + local_flags &= ~ASYNC_TX_ACK; 136 + _cb_fn = NULL; 137 + _cb_param = NULL; 138 + } else { 139 + _cb_fn = cb_fn; 140 + _cb_param = cb_param; 141 + } 142 + 143 + int_en = _cb_fn ? 1 : 0; 144 + 145 + tx = device->device_prep_dma_xor( 146 + chan, xor_src_cnt, len, int_en); 147 + 148 + if (tx) { 149 + do_async_xor(tx, device, chan, dest, 150 + &src_list[src_off], offset, xor_src_cnt, len, 151 + local_flags, depend_tx, _cb_fn, 152 + _cb_param); 153 + } else /* fall through */ 154 + goto xor_sync; 155 + } else { /* run the xor synchronously */ 156 + xor_sync: 157 + /* in the sync case the dest is an implied source 158 + * (assumes the dest is at the src_off index) 159 + */ 160 + if (flags & ASYNC_TX_XOR_DROP_DST) { 161 + src_cnt--; 162 + src_off++; 163 + } 164 + 165 + /* process up to 'MAX_XOR_BLOCKS' sources */ 166 + xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); 167 + 168 + /* if we are submitting additional xors 169 + * only set the callback on the last transaction 170 + */ 171 + if (src_cnt > xor_src_cnt) { 172 + local_flags &= ~ASYNC_TX_ACK; 173 + _cb_fn = NULL; 174 + _cb_param = NULL; 175 + } else { 176 + _cb_fn = cb_fn; 177 + _cb_param = cb_param; 178 + } 179 + 180 + /* wait for any prerequisite operations */ 181 + if (depend_tx) { 182 + /* if ack is already set then we cannot be sure 183 + * we are referring to the correct operation 184 + */ 185 + BUG_ON(depend_tx->ack); 186 + if (dma_wait_for_async_tx(depend_tx) == 187 + DMA_ERROR) 188 + panic("%s: DMA_ERROR waiting for " 189 + "depend_tx\n", 190 + __FUNCTION__); 191 + } 192 + 193 + do_sync_xor(dest, &src_list[src_off], offset, 194 + xor_src_cnt, len, local_flags, depend_tx, 195 + _cb_fn, _cb_param); 196 + } 197 + 198 + /* the previous tx is hidden from the client, 199 + * so ack it 200 + */ 201 + if (i && depend_tx) 202 + async_tx_ack(depend_tx); 203 + 204 + depend_tx = tx; 205 + 206 + if (src_cnt > xor_src_cnt) { 207 + /* drop completed sources */ 208 + src_cnt -= xor_src_cnt; 209 + src_off += xor_src_cnt; 210 + 211 + /* unconditionally preserve the destination */ 212 + flags &= ~ASYNC_TX_XOR_ZERO_DST; 213 + 214 + /* use the intermediate result a source, but remember 215 + * it's dropped, because it's implied, in the sync case 216 + */ 217 + src_list[--src_off] = dest; 218 + src_cnt++; 219 + flags |= ASYNC_TX_XOR_DROP_DST; 220 + } else 221 + src_cnt = 0; 222 + i++; 223 + } 224 + 225 + return tx; 226 + } 227 + EXPORT_SYMBOL_GPL(async_xor); 228 + 229 + static int page_is_zero(struct page *p, unsigned int offset, size_t len) 230 + { 231 + char *a = page_address(p) + offset; 232 + return ((*(u32 *) a) == 0 && 233 + memcmp(a, a + 4, len - 4) == 0); 234 + } 235 + 236 + /** 237 + * async_xor_zero_sum - attempt a xor parity check with a dma engine. 238 + * @dest: destination page used if the xor is performed synchronously 239 + * @src_list: array of source pages. The dest page must be listed as a source 240 + * at index zero. The contents of this array may be overwritten. 241 + * @offset: offset in pages to start transaction 242 + * @src_cnt: number of source pages 243 + * @len: length in bytes 244 + * @result: 0 if sum == 0 else non-zero 245 + * @flags: ASYNC_TX_ASSUME_COHERENT, ASYNC_TX_ACK, ASYNC_TX_DEP_ACK 246 + * @depend_tx: xor depends on the result of this transaction. 247 + * @cb_fn: function to call when the xor completes 248 + * @cb_param: parameter to pass to the callback routine 249 + */ 250 + struct dma_async_tx_descriptor * 251 + async_xor_zero_sum(struct page *dest, struct page **src_list, 252 + unsigned int offset, int src_cnt, size_t len, 253 + u32 *result, enum async_tx_flags flags, 254 + struct dma_async_tx_descriptor *depend_tx, 255 + dma_async_tx_callback cb_fn, void *cb_param) 256 + { 257 + struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_ZERO_SUM); 258 + struct dma_device *device = chan ? chan->device : NULL; 259 + int int_en = cb_fn ? 1 : 0; 260 + struct dma_async_tx_descriptor *tx = device ? 261 + device->device_prep_dma_zero_sum(chan, src_cnt, len, result, 262 + int_en) : NULL; 263 + int i; 264 + 265 + BUG_ON(src_cnt <= 1); 266 + 267 + if (tx) { 268 + dma_addr_t dma_addr; 269 + enum dma_data_direction dir; 270 + 271 + pr_debug("%s: (async) len: %zu\n", __FUNCTION__, len); 272 + 273 + dir = (flags & ASYNC_TX_ASSUME_COHERENT) ? 274 + DMA_NONE : DMA_TO_DEVICE; 275 + 276 + for (i = 0; i < src_cnt; i++) { 277 + dma_addr = dma_map_page(device->dev, src_list[i], 278 + offset, len, dir); 279 + tx->tx_set_src(dma_addr, tx, i); 280 + } 281 + 282 + async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); 283 + } else { 284 + unsigned long xor_flags = flags; 285 + 286 + pr_debug("%s: (sync) len: %zu\n", __FUNCTION__, len); 287 + 288 + xor_flags |= ASYNC_TX_XOR_DROP_DST; 289 + xor_flags &= ~ASYNC_TX_ACK; 290 + 291 + tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, 292 + depend_tx, NULL, NULL); 293 + 294 + if (tx) { 295 + if (dma_wait_for_async_tx(tx) == DMA_ERROR) 296 + panic("%s: DMA_ERROR waiting for tx\n", 297 + __FUNCTION__); 298 + async_tx_ack(tx); 299 + } 300 + 301 + *result = page_is_zero(dest, offset, len) ? 0 : 1; 302 + 303 + tx = NULL; 304 + 305 + async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); 306 + } 307 + 308 + return tx; 309 + } 310 + EXPORT_SYMBOL_GPL(async_xor_zero_sum); 311 + 312 + static int __init async_xor_init(void) 313 + { 314 + return 0; 315 + } 316 + 317 + static void __exit async_xor_exit(void) 318 + { 319 + do { } while (0); 320 + } 321 + 322 + module_init(async_xor_init); 323 + module_exit(async_xor_exit); 324 + 325 + MODULE_AUTHOR("Intel Corporation"); 326 + MODULE_DESCRIPTION("asynchronous xor/xor-zero-sum api"); 327 + MODULE_LICENSE("GPL");

+10 -2

drivers/dma/Kconfig

··· 8 8 config DMA_ENGINE 9 9 bool "Support for DMA engines" 10 10 ---help--- 11 - DMA engines offload copy operations from the CPU to dedicated 12 - hardware, allowing the copies to happen asynchronously. 11 + DMA engines offload bulk memory operations from the CPU to dedicated 12 + hardware, allowing the operations to happen asynchronously. 13 13 14 14 comment "DMA Clients" 15 15 ··· 31 31 default m 32 32 ---help--- 33 33 Enable support for the Intel(R) I/OAT DMA engine. 34 + 35 + config INTEL_IOP_ADMA 36 + tristate "Intel IOP ADMA support" 37 + depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX) 38 + select ASYNC_CORE 39 + default m 40 + ---help--- 41 + Enable support for the Intel(R) IOP Series RAID engines. 34 42 35 43 endmenu

+1

drivers/dma/Makefile

··· 1 1 obj-$(CONFIG_DMA_ENGINE) += dmaengine.o 2 2 obj-$(CONFIG_NET_DMA) += iovlock.o 3 3 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o 4 + obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o

+314 -109

drivers/dma/dmaengine.c

··· 37 37 * Each device has a channels list, which runs unlocked but is never modified 38 38 * once the device is registered, it's just setup by the driver. 39 39 * 40 - * Each client has a channels list, it's only modified under the client->lock 41 - * and in an RCU callback, so it's safe to read under rcu_read_lock(). 40 + * Each client is responsible for keeping track of the channels it uses. See 41 + * the definition of dma_event_callback in dmaengine.h. 42 42 * 43 43 * Each device has a kref, which is initialized to 1 when the device is 44 - * registered. A kref_put is done for each class_device registered. When the 44 + * registered. A kref_get is done for each class_device registered. When the 45 45 * class_device is released, the coresponding kref_put is done in the release 46 46 * method. Every time one of the device's channels is allocated to a client, 47 47 * a kref_get occurs. When the channel is freed, the coresponding kref_put ··· 51 51 * references to finish. 52 52 * 53 53 * Each channel has an open-coded implementation of Rusty Russell's "bigref," 54 - * with a kref and a per_cpu local_t. A single reference is set when on an 55 - * ADDED event, and removed with a REMOVE event. Net DMA client takes an 56 - * extra reference per outstanding transaction. The relase function does a 57 - * kref_put on the device. -ChrisL 54 + * with a kref and a per_cpu local_t. A dma_chan_get is called when a client 55 + * signals that it wants to use a channel, and dma_chan_put is called when 56 + * a channel is removed or a client using it is unregesitered. A client can 57 + * take extra references per outstanding transaction, as is the case with 58 + * the NET DMA client. The release function does a kref_put on the device. 59 + * -ChrisL, DanW 58 60 */ 59 61 60 62 #include <linux/init.h> 61 63 #include <linux/module.h> 64 + #include <linux/mm.h> 62 65 #include <linux/device.h> 63 66 #include <linux/dmaengine.h> 64 67 #include <linux/hardirq.h> ··· 69 66 #include <linux/percpu.h> 70 67 #include <linux/rcupdate.h> 71 68 #include <linux/mutex.h> 69 + #include <linux/jiffies.h> 72 70 73 71 static DEFINE_MUTEX(dma_list_mutex); 74 72 static LIST_HEAD(dma_device_list); ··· 104 100 static ssize_t show_in_use(struct class_device *cd, char *buf) 105 101 { 106 102 struct dma_chan *chan = container_of(cd, struct dma_chan, class_dev); 103 + int in_use = 0; 107 104 108 - return sprintf(buf, "%d\n", (chan->client ? 1 : 0)); 105 + if (unlikely(chan->slow_ref) && 106 + atomic_read(&chan->refcount.refcount) > 1) 107 + in_use = 1; 108 + else { 109 + if (local_read(&(per_cpu_ptr(chan->local, 110 + get_cpu())->refcount)) > 0) 111 + in_use = 1; 112 + put_cpu(); 113 + } 114 + 115 + return sprintf(buf, "%d\n", in_use); 109 116 } 110 117 111 118 static struct class_device_attribute dma_class_attrs[] = { ··· 142 127 143 128 /* --- client and device registration --- */ 144 129 130 + #define dma_chan_satisfies_mask(chan, mask) \ 131 + __dma_chan_satisfies_mask((chan), &(mask)) 132 + static int 133 + __dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) 134 + { 135 + dma_cap_mask_t has; 136 + 137 + bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, 138 + DMA_TX_TYPE_END); 139 + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); 140 + } 141 + 145 142 /** 146 - * dma_client_chan_alloc - try to allocate a channel to a client 143 + * dma_client_chan_alloc - try to allocate channels to a client 147 144 * @client: &dma_client 148 145 * 149 146 * Called with dma_list_mutex held. 150 147 */ 151 - static struct dma_chan *dma_client_chan_alloc(struct dma_client *client) 148 + static void dma_client_chan_alloc(struct dma_client *client) 152 149 { 153 150 struct dma_device *device; 154 151 struct dma_chan *chan; 155 - unsigned long flags; 156 152 int desc; /* allocated descriptor count */ 153 + enum dma_state_client ack; 157 154 158 - /* Find a channel, any DMA engine will do */ 159 - list_for_each_entry(device, &dma_device_list, global_node) { 155 + /* Find a channel */ 156 + list_for_each_entry(device, &dma_device_list, global_node) 160 157 list_for_each_entry(chan, &device->channels, device_node) { 161 - if (chan->client) 158 + if (!dma_chan_satisfies_mask(chan, client->cap_mask)) 162 159 continue; 163 160 164 161 desc = chan->device->device_alloc_chan_resources(chan); 165 162 if (desc >= 0) { 166 - kref_get(&device->refcount); 167 - kref_init(&chan->refcount); 168 - chan->slow_ref = 0; 169 - INIT_RCU_HEAD(&chan->rcu); 170 - chan->client = client; 171 - spin_lock_irqsave(&client->lock, flags); 172 - list_add_tail_rcu(&chan->client_node, 173 - &client->channels); 174 - spin_unlock_irqrestore(&client->lock, flags); 175 - return chan; 163 + ack = client->event_callback(client, 164 + chan, 165 + DMA_RESOURCE_AVAILABLE); 166 + 167 + /* we are done once this client rejects 168 + * an available resource 169 + */ 170 + if (ack == DMA_ACK) { 171 + dma_chan_get(chan); 172 + kref_get(&device->refcount); 173 + } else if (ack == DMA_NAK) 174 + return; 176 175 } 177 176 } 178 - } 179 - 180 - return NULL; 181 177 } 178 + 179 + enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) 180 + { 181 + enum dma_status status; 182 + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); 183 + 184 + dma_async_issue_pending(chan); 185 + do { 186 + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); 187 + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { 188 + printk(KERN_ERR "dma_sync_wait_timeout!\n"); 189 + return DMA_ERROR; 190 + } 191 + } while (status == DMA_IN_PROGRESS); 192 + 193 + return status; 194 + } 195 + EXPORT_SYMBOL(dma_sync_wait); 182 196 183 197 /** 184 198 * dma_chan_cleanup - release a DMA channel's resources ··· 217 173 { 218 174 struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); 219 175 chan->device->device_free_chan_resources(chan); 220 - chan->client = NULL; 221 176 kref_put(&chan->device->refcount, dma_async_device_cleanup); 222 177 } 223 178 EXPORT_SYMBOL(dma_chan_cleanup); ··· 232 189 kref_put(&chan->refcount, dma_chan_cleanup); 233 190 } 234 191 235 - static void dma_client_chan_free(struct dma_chan *chan) 192 + static void dma_chan_release(struct dma_chan *chan) 236 193 { 237 194 atomic_add(0x7FFFFFFF, &chan->refcount.refcount); 238 195 chan->slow_ref = 1; ··· 240 197 } 241 198 242 199 /** 243 - * dma_chans_rebalance - reallocate channels to clients 244 - * 245 - * When the number of DMA channel in the system changes, 246 - * channels need to be rebalanced among clients. 200 + * dma_chans_notify_available - broadcast available channels to the clients 247 201 */ 248 - static void dma_chans_rebalance(void) 202 + static void dma_clients_notify_available(void) 249 203 { 250 204 struct dma_client *client; 251 - struct dma_chan *chan; 252 - unsigned long flags; 205 + 206 + mutex_lock(&dma_list_mutex); 207 + 208 + list_for_each_entry(client, &dma_client_list, global_node) 209 + dma_client_chan_alloc(client); 210 + 211 + mutex_unlock(&dma_list_mutex); 212 + } 213 + 214 + /** 215 + * dma_chans_notify_available - tell the clients that a channel is going away 216 + * @chan: channel on its way out 217 + */ 218 + static void dma_clients_notify_removed(struct dma_chan *chan) 219 + { 220 + struct dma_client *client; 221 + enum dma_state_client ack; 253 222 254 223 mutex_lock(&dma_list_mutex); 255 224 256 225 list_for_each_entry(client, &dma_client_list, global_node) { 257 - while (client->chans_desired > client->chan_count) { 258 - chan = dma_client_chan_alloc(client); 259 - if (!chan) 260 - break; 261 - client->chan_count++; 262 - client->event_callback(client, 263 - chan, 264 - DMA_RESOURCE_ADDED); 265 - } 266 - while (client->chans_desired < client->chan_count) { 267 - spin_lock_irqsave(&client->lock, flags); 268 - chan = list_entry(client->channels.next, 269 - struct dma_chan, 270 - client_node); 271 - list_del_rcu(&chan->client_node); 272 - spin_unlock_irqrestore(&client->lock, flags); 273 - client->chan_count--; 274 - client->event_callback(client, 275 - chan, 276 - DMA_RESOURCE_REMOVED); 277 - dma_client_chan_free(chan); 226 + ack = client->event_callback(client, chan, 227 + DMA_RESOURCE_REMOVED); 228 + 229 + /* client was holding resources for this channel so 230 + * free it 231 + */ 232 + if (ack == DMA_ACK) { 233 + dma_chan_put(chan); 234 + kref_put(&chan->device->refcount, 235 + dma_async_device_cleanup); 278 236 } 279 237 } 280 238 ··· 283 239 } 284 240 285 241 /** 286 - * dma_async_client_register - allocate and register a &dma_client 287 - * @event_callback: callback for notification of channel addition/removal 242 + * dma_async_client_register - register a &dma_client 243 + * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' 288 244 */ 289 - struct dma_client *dma_async_client_register(dma_event_callback event_callback) 245 + void dma_async_client_register(struct dma_client *client) 290 246 { 291 - struct dma_client *client; 292 - 293 - client = kzalloc(sizeof(*client), GFP_KERNEL); 294 - if (!client) 295 - return NULL; 296 - 297 - INIT_LIST_HEAD(&client->channels); 298 - spin_lock_init(&client->lock); 299 - client->chans_desired = 0; 300 - client->chan_count = 0; 301 - client->event_callback = event_callback; 302 - 303 247 mutex_lock(&dma_list_mutex); 304 248 list_add_tail(&client->global_node, &dma_client_list); 305 249 mutex_unlock(&dma_list_mutex); 306 - 307 - return client; 308 250 } 309 251 EXPORT_SYMBOL(dma_async_client_register); 310 252 ··· 302 272 */ 303 273 void dma_async_client_unregister(struct dma_client *client) 304 274 { 275 + struct dma_device *device; 305 276 struct dma_chan *chan; 277 + enum dma_state_client ack; 306 278 307 279 if (!client) 308 280 return; 309 281 310 - rcu_read_lock(); 311 - list_for_each_entry_rcu(chan, &client->channels, client_node) 312 - dma_client_chan_free(chan); 313 - rcu_read_unlock(); 314 - 315 282 mutex_lock(&dma_list_mutex); 283 + /* free all channels the client is holding */ 284 + list_for_each_entry(device, &dma_device_list, global_node) 285 + list_for_each_entry(chan, &device->channels, device_node) { 286 + ack = client->event_callback(client, chan, 287 + DMA_RESOURCE_REMOVED); 288 + 289 + if (ack == DMA_ACK) { 290 + dma_chan_put(chan); 291 + kref_put(&chan->device->refcount, 292 + dma_async_device_cleanup); 293 + } 294 + } 295 + 316 296 list_del(&client->global_node); 317 297 mutex_unlock(&dma_list_mutex); 318 - 319 - kfree(client); 320 - dma_chans_rebalance(); 321 298 } 322 299 EXPORT_SYMBOL(dma_async_client_unregister); 323 300 324 301 /** 325 - * dma_async_client_chan_request - request DMA channels 326 - * @client: &dma_client 327 - * @number: count of DMA channels requested 328 - * 329 - * Clients call dma_async_client_chan_request() to specify how many 330 - * DMA channels they need, 0 to free all currently allocated. 331 - * The resulting allocations/frees are indicated to the client via the 332 - * event callback. 302 + * dma_async_client_chan_request - send all available channels to the 303 + * client that satisfy the capability mask 304 + * @client - requester 333 305 */ 334 - void dma_async_client_chan_request(struct dma_client *client, 335 - unsigned int number) 306 + void dma_async_client_chan_request(struct dma_client *client) 336 307 { 337 - client->chans_desired = number; 338 - dma_chans_rebalance(); 308 + mutex_lock(&dma_list_mutex); 309 + dma_client_chan_alloc(client); 310 + mutex_unlock(&dma_list_mutex); 339 311 } 340 312 EXPORT_SYMBOL(dma_async_client_chan_request); 341 313 ··· 348 316 int dma_async_device_register(struct dma_device *device) 349 317 { 350 318 static int id; 351 - int chancnt = 0; 319 + int chancnt = 0, rc; 352 320 struct dma_chan* chan; 353 321 354 322 if (!device) 355 323 return -ENODEV; 324 + 325 + /* validate device routines */ 326 + BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && 327 + !device->device_prep_dma_memcpy); 328 + BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && 329 + !device->device_prep_dma_xor); 330 + BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 331 + !device->device_prep_dma_zero_sum); 332 + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && 333 + !device->device_prep_dma_memset); 334 + BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && 335 + !device->device_prep_dma_interrupt); 336 + 337 + BUG_ON(!device->device_alloc_chan_resources); 338 + BUG_ON(!device->device_free_chan_resources); 339 + BUG_ON(!device->device_dependency_added); 340 + BUG_ON(!device->device_is_tx_complete); 341 + BUG_ON(!device->device_issue_pending); 342 + BUG_ON(!device->dev); 356 343 357 344 init_completion(&device->done); 358 345 kref_init(&device->refcount); ··· 389 338 snprintf(chan->class_dev.class_id, BUS_ID_SIZE, "dma%dchan%d", 390 339 device->dev_id, chan->chan_id); 391 340 341 + rc = class_device_register(&chan->class_dev); 342 + if (rc) { 343 + chancnt--; 344 + free_percpu(chan->local); 345 + chan->local = NULL; 346 + goto err_out; 347 + } 348 + 392 349 kref_get(&device->refcount); 393 - class_device_register(&chan->class_dev); 350 + kref_init(&chan->refcount); 351 + chan->slow_ref = 0; 352 + INIT_RCU_HEAD(&chan->rcu); 394 353 } 395 354 396 355 mutex_lock(&dma_list_mutex); 397 356 list_add_tail(&device->global_node, &dma_device_list); 398 357 mutex_unlock(&dma_list_mutex); 399 358 400 - dma_chans_rebalance(); 359 + dma_clients_notify_available(); 401 360 402 361 return 0; 362 + 363 + err_out: 364 + list_for_each_entry(chan, &device->channels, device_node) { 365 + if (chan->local == NULL) 366 + continue; 367 + kref_put(&device->refcount, dma_async_device_cleanup); 368 + class_device_unregister(&chan->class_dev); 369 + chancnt--; 370 + free_percpu(chan->local); 371 + } 372 + return rc; 403 373 } 404 374 EXPORT_SYMBOL(dma_async_device_register); 405 375 ··· 443 371 void dma_async_device_unregister(struct dma_device *device) 444 372 { 445 373 struct dma_chan *chan; 446 - unsigned long flags; 447 374 448 375 mutex_lock(&dma_list_mutex); 449 376 list_del(&device->global_node); 450 377 mutex_unlock(&dma_list_mutex); 451 378 452 379 list_for_each_entry(chan, &device->channels, device_node) { 453 - if (chan->client) { 454 - spin_lock_irqsave(&chan->client->lock, flags); 455 - list_del(&chan->client_node); 456 - chan->client->chan_count--; 457 - spin_unlock_irqrestore(&chan->client->lock, flags); 458 - chan->client->event_callback(chan->client, 459 - chan, 460 - DMA_RESOURCE_REMOVED); 461 - dma_client_chan_free(chan); 462 - } 380 + dma_clients_notify_removed(chan); 463 381 class_device_unregister(&chan->class_dev); 382 + dma_chan_release(chan); 464 383 } 465 - dma_chans_rebalance(); 466 384 467 385 kref_put(&device->refcount, dma_async_device_cleanup); 468 386 wait_for_completion(&device->done); 469 387 } 470 388 EXPORT_SYMBOL(dma_async_device_unregister); 389 + 390 + /** 391 + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses 392 + * @chan: DMA channel to offload copy to 393 + * @dest: destination address (virtual) 394 + * @src: source address (virtual) 395 + * @len: length 396 + * 397 + * Both @dest and @src must be mappable to a bus address according to the 398 + * DMA mapping API rules for streaming mappings. 399 + * Both @dest and @src must stay memory resident (kernel memory or locked 400 + * user space pages). 401 + */ 402 + dma_cookie_t 403 + dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, 404 + void *src, size_t len) 405 + { 406 + struct dma_device *dev = chan->device; 407 + struct dma_async_tx_descriptor *tx; 408 + dma_addr_t addr; 409 + dma_cookie_t cookie; 410 + int cpu; 411 + 412 + tx = dev->device_prep_dma_memcpy(chan, len, 0); 413 + if (!tx) 414 + return -ENOMEM; 415 + 416 + tx->ack = 1; 417 + tx->callback = NULL; 418 + addr = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); 419 + tx->tx_set_src(addr, tx, 0); 420 + addr = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); 421 + tx->tx_set_dest(addr, tx, 0); 422 + cookie = tx->tx_submit(tx); 423 + 424 + cpu = get_cpu(); 425 + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 426 + per_cpu_ptr(chan->local, cpu)->memcpy_count++; 427 + put_cpu(); 428 + 429 + return cookie; 430 + } 431 + EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); 432 + 433 + /** 434 + * dma_async_memcpy_buf_to_pg - offloaded copy from address to page 435 + * @chan: DMA channel to offload copy to 436 + * @page: destination page 437 + * @offset: offset in page to copy to 438 + * @kdata: source address (virtual) 439 + * @len: length 440 + * 441 + * Both @page/@offset and @kdata must be mappable to a bus address according 442 + * to the DMA mapping API rules for streaming mappings. 443 + * Both @page/@offset and @kdata must stay memory resident (kernel memory or 444 + * locked user space pages) 445 + */ 446 + dma_cookie_t 447 + dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, 448 + unsigned int offset, void *kdata, size_t len) 449 + { 450 + struct dma_device *dev = chan->device; 451 + struct dma_async_tx_descriptor *tx; 452 + dma_addr_t addr; 453 + dma_cookie_t cookie; 454 + int cpu; 455 + 456 + tx = dev->device_prep_dma_memcpy(chan, len, 0); 457 + if (!tx) 458 + return -ENOMEM; 459 + 460 + tx->ack = 1; 461 + tx->callback = NULL; 462 + addr = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); 463 + tx->tx_set_src(addr, tx, 0); 464 + addr = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); 465 + tx->tx_set_dest(addr, tx, 0); 466 + cookie = tx->tx_submit(tx); 467 + 468 + cpu = get_cpu(); 469 + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 470 + per_cpu_ptr(chan->local, cpu)->memcpy_count++; 471 + put_cpu(); 472 + 473 + return cookie; 474 + } 475 + EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); 476 + 477 + /** 478 + * dma_async_memcpy_pg_to_pg - offloaded copy from page to page 479 + * @chan: DMA channel to offload copy to 480 + * @dest_pg: destination page 481 + * @dest_off: offset in page to copy to 482 + * @src_pg: source page 483 + * @src_off: offset in page to copy from 484 + * @len: length 485 + * 486 + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus 487 + * address according to the DMA mapping API rules for streaming mappings. 488 + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident 489 + * (kernel memory or locked user space pages). 490 + */ 491 + dma_cookie_t 492 + dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, 493 + unsigned int dest_off, struct page *src_pg, unsigned int src_off, 494 + size_t len) 495 + { 496 + struct dma_device *dev = chan->device; 497 + struct dma_async_tx_descriptor *tx; 498 + dma_addr_t addr; 499 + dma_cookie_t cookie; 500 + int cpu; 501 + 502 + tx = dev->device_prep_dma_memcpy(chan, len, 0); 503 + if (!tx) 504 + return -ENOMEM; 505 + 506 + tx->ack = 1; 507 + tx->callback = NULL; 508 + addr = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); 509 + tx->tx_set_src(addr, tx, 0); 510 + addr = dma_map_page(dev->dev, dest_pg, dest_off, len, DMA_FROM_DEVICE); 511 + tx->tx_set_dest(addr, tx, 0); 512 + cookie = tx->tx_submit(tx); 513 + 514 + cpu = get_cpu(); 515 + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 516 + per_cpu_ptr(chan->local, cpu)->memcpy_count++; 517 + put_cpu(); 518 + 519 + return cookie; 520 + } 521 + EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); 522 + 523 + void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, 524 + struct dma_chan *chan) 525 + { 526 + tx->chan = chan; 527 + spin_lock_init(&tx->lock); 528 + INIT_LIST_HEAD(&tx->depend_node); 529 + INIT_LIST_HEAD(&tx->depend_list); 530 + } 531 + EXPORT_SYMBOL(dma_async_tx_descriptor_init); 471 532 472 533 static int __init dma_bus_init(void) 473 534 {

+178 -191

drivers/dma/ioatdma.c

··· 32 32 #include <linux/delay.h> 33 33 #include <linux/dma-mapping.h> 34 34 #include "ioatdma.h" 35 - #include "ioatdma_io.h" 36 35 #include "ioatdma_registers.h" 37 36 #include "ioatdma_hw.h" 38 37 39 38 #define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) 40 39 #define to_ioat_device(dev) container_of(dev, struct ioat_device, common) 41 40 #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) 41 + #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) 42 42 43 43 /* internal functions */ 44 44 static int __devinit ioat_probe(struct pci_dev *pdev, const struct pci_device_id *ent); 45 + static void ioat_shutdown(struct pci_dev *pdev); 45 46 static void __devexit ioat_remove(struct pci_dev *pdev); 46 47 47 48 static int enumerate_dma_channels(struct ioat_device *device) ··· 52 51 int i; 53 52 struct ioat_dma_chan *ioat_chan; 54 53 55 - device->common.chancnt = ioatdma_read8(device, IOAT_CHANCNT_OFFSET); 56 - xfercap_scale = ioatdma_read8(device, IOAT_XFERCAP_OFFSET); 54 + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); 55 + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); 57 56 xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); 58 57 59 58 for (i = 0; i < device->common.chancnt; i++) { ··· 72 71 INIT_LIST_HEAD(&ioat_chan->used_desc); 73 72 /* This should be made common somewhere in dmaengine.c */ 74 73 ioat_chan->common.device = &device->common; 75 - ioat_chan->common.client = NULL; 76 74 list_add_tail(&ioat_chan->common.device_node, 77 75 &device->common.channels); 78 76 } 79 77 return device->common.chancnt; 78 + } 79 + 80 + static void 81 + ioat_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) 82 + { 83 + struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); 84 + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 85 + 86 + pci_unmap_addr_set(desc, src, addr); 87 + 88 + list_for_each_entry(iter, &desc->async_tx.tx_list, node) { 89 + iter->hw->src_addr = addr; 90 + addr += ioat_chan->xfercap; 91 + } 92 + 93 + } 94 + 95 + static void 96 + ioat_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, int index) 97 + { 98 + struct ioat_desc_sw *iter, *desc = tx_to_ioat_desc(tx); 99 + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 100 + 101 + pci_unmap_addr_set(desc, dst, addr); 102 + 103 + list_for_each_entry(iter, &desc->async_tx.tx_list, node) { 104 + iter->hw->dst_addr = addr; 105 + addr += ioat_chan->xfercap; 106 + } 107 + } 108 + 109 + static dma_cookie_t 110 + ioat_tx_submit(struct dma_async_tx_descriptor *tx) 111 + { 112 + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); 113 + struct ioat_desc_sw *desc = tx_to_ioat_desc(tx); 114 + int append = 0; 115 + dma_cookie_t cookie; 116 + struct ioat_desc_sw *group_start; 117 + 118 + group_start = list_entry(desc->async_tx.tx_list.next, 119 + struct ioat_desc_sw, node); 120 + spin_lock_bh(&ioat_chan->desc_lock); 121 + /* cookie incr and addition to used_list must be atomic */ 122 + cookie = ioat_chan->common.cookie; 123 + cookie++; 124 + if (cookie < 0) 125 + cookie = 1; 126 + ioat_chan->common.cookie = desc->async_tx.cookie = cookie; 127 + 128 + /* write address into NextDescriptor field of last desc in chain */ 129 + to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = 130 + group_start->async_tx.phys; 131 + list_splice_init(&desc->async_tx.tx_list, ioat_chan->used_desc.prev); 132 + 133 + ioat_chan->pending += desc->tx_cnt; 134 + if (ioat_chan->pending >= 4) { 135 + append = 1; 136 + ioat_chan->pending = 0; 137 + } 138 + spin_unlock_bh(&ioat_chan->desc_lock); 139 + 140 + if (append) 141 + writeb(IOAT_CHANCMD_APPEND, 142 + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); 143 + 144 + return cookie; 80 145 } 81 146 82 147 static struct ioat_desc_sw *ioat_dma_alloc_descriptor( ··· 166 99 } 167 100 168 101 memset(desc, 0, sizeof(*desc)); 102 + dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); 103 + desc_sw->async_tx.tx_set_src = ioat_set_src; 104 + desc_sw->async_tx.tx_set_dest = ioat_set_dest; 105 + desc_sw->async_tx.tx_submit = ioat_tx_submit; 106 + INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); 169 107 desc_sw->hw = desc; 170 - desc_sw->phys = phys; 108 + desc_sw->async_tx.phys = phys; 171 109 172 110 return desc_sw; 173 111 } ··· 195 123 * In-use bit automatically set by reading chanctrl 196 124 * If 0, we got it, if 1, someone else did 197 125 */ 198 - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); 126 + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 199 127 if (chanctrl & IOAT_CHANCTRL_CHANNEL_IN_USE) 200 128 return -EBUSY; 201 129 ··· 204 132 IOAT_CHANCTRL_ERR_INT_EN | 205 133 IOAT_CHANCTRL_ANY_ERR_ABORT_EN | 206 134 IOAT_CHANCTRL_ERR_COMPLETION_EN; 207 - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); 135 + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 208 136 209 - chanerr = ioatdma_chan_read32(ioat_chan, IOAT_CHANERR_OFFSET); 137 + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 210 138 if (chanerr) { 211 139 printk("IOAT: CHANERR = %x, clearing\n", chanerr); 212 - ioatdma_chan_write32(ioat_chan, IOAT_CHANERR_OFFSET, chanerr); 140 + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); 213 141 } 214 142 215 143 /* Allocate descriptors */ ··· 233 161 &ioat_chan->completion_addr); 234 162 memset(ioat_chan->completion_virt, 0, 235 163 sizeof(*ioat_chan->completion_virt)); 236 - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_LOW, 237 - ((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF); 238 - ioatdma_chan_write32(ioat_chan, IOAT_CHANCMP_OFFSET_HIGH, 239 - ((u64) ioat_chan->completion_addr) >> 32); 164 + writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, 165 + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); 166 + writel(((u64) ioat_chan->completion_addr) >> 32, 167 + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); 240 168 241 169 ioat_start_null_desc(ioat_chan); 242 170 return i; ··· 254 182 255 183 ioat_dma_memcpy_cleanup(ioat_chan); 256 184 257 - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_RESET); 185 + writeb(IOAT_CHANCMD_RESET, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); 258 186 259 187 spin_lock_bh(&ioat_chan->desc_lock); 260 188 list_for_each_entry_safe(desc, _desc, &ioat_chan->used_desc, node) { 261 189 in_use_descs++; 262 190 list_del(&desc->node); 263 - pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys); 191 + pci_pool_free(ioat_device->dma_pool, desc->hw, 192 + desc->async_tx.phys); 264 193 kfree(desc); 265 194 } 266 195 list_for_each_entry_safe(desc, _desc, &ioat_chan->free_desc, node) { 267 196 list_del(&desc->node); 268 - pci_pool_free(ioat_device->dma_pool, desc->hw, desc->phys); 197 + pci_pool_free(ioat_device->dma_pool, desc->hw, 198 + desc->async_tx.phys); 269 199 kfree(desc); 270 200 } 271 201 spin_unlock_bh(&ioat_chan->desc_lock); ··· 284 210 ioat_chan->last_completion = ioat_chan->completion_addr = 0; 285 211 286 212 /* Tell hw the chan is free */ 287 - chanctrl = ioatdma_chan_read16(ioat_chan, IOAT_CHANCTRL_OFFSET); 213 + chanctrl = readw(ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 288 214 chanctrl &= ~IOAT_CHANCTRL_CHANNEL_IN_USE; 289 - ioatdma_chan_write16(ioat_chan, IOAT_CHANCTRL_OFFSET, chanctrl); 215 + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); 290 216 } 291 217 292 - /** 293 - * do_ioat_dma_memcpy - actual function that initiates a IOAT DMA transaction 294 - * @ioat_chan: IOAT DMA channel handle 295 - * @dest: DMA destination address 296 - * @src: DMA source address 297 - * @len: transaction length in bytes 298 - */ 299 - 300 - static dma_cookie_t do_ioat_dma_memcpy(struct ioat_dma_chan *ioat_chan, 301 - dma_addr_t dest, 302 - dma_addr_t src, 303 - size_t len) 218 + static struct dma_async_tx_descriptor * 219 + ioat_dma_prep_memcpy(struct dma_chan *chan, size_t len, int int_en) 304 220 { 305 - struct ioat_desc_sw *first; 306 - struct ioat_desc_sw *prev; 307 - struct ioat_desc_sw *new; 308 - dma_cookie_t cookie; 221 + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 222 + struct ioat_desc_sw *first, *prev, *new; 309 223 LIST_HEAD(new_chain); 310 224 u32 copy; 311 225 size_t orig_len; 312 - dma_addr_t orig_src, orig_dst; 313 - unsigned int desc_count = 0; 314 - unsigned int append = 0; 315 - 316 - if (!ioat_chan || !dest || !src) 317 - return -EFAULT; 226 + int desc_count = 0; 318 227 319 228 if (!len) 320 - return ioat_chan->common.cookie; 229 + return NULL; 321 230 322 231 orig_len = len; 323 - orig_src = src; 324 - orig_dst = dest; 325 232 326 233 first = NULL; 327 234 prev = NULL; 328 235 329 236 spin_lock_bh(&ioat_chan->desc_lock); 330 - 331 237 while (len) { 332 238 if (!list_empty(&ioat_chan->free_desc)) { 333 239 new = to_ioat_desc(ioat_chan->free_desc.next); ··· 324 270 325 271 new->hw->size = copy; 326 272 new->hw->ctl = 0; 327 - new->hw->src_addr = src; 328 - new->hw->dst_addr = dest; 329 - new->cookie = 0; 273 + new->async_tx.cookie = 0; 274 + new->async_tx.ack = 1; 330 275 331 276 /* chain together the physical address list for the HW */ 332 277 if (!first) 333 278 first = new; 334 279 else 335 - prev->hw->next = (u64) new->phys; 280 + prev->hw->next = (u64) new->async_tx.phys; 336 281 337 282 prev = new; 338 - 339 283 len -= copy; 340 - dest += copy; 341 - src += copy; 342 - 343 284 list_add_tail(&new->node, &new_chain); 344 285 desc_count++; 345 286 } 287 + 288 + list_splice(&new_chain, &new->async_tx.tx_list); 289 + 346 290 new->hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; 347 291 new->hw->next = 0; 292 + new->tx_cnt = desc_count; 293 + new->async_tx.ack = 0; /* client is in control of this ack */ 294 + new->async_tx.cookie = -EBUSY; 348 295 349 - /* cookie incr and addition to used_list must be atomic */ 350 - 351 - cookie = ioat_chan->common.cookie; 352 - cookie++; 353 - if (cookie < 0) 354 - cookie = 1; 355 - ioat_chan->common.cookie = new->cookie = cookie; 356 - 357 - pci_unmap_addr_set(new, src, orig_src); 358 - pci_unmap_addr_set(new, dst, orig_dst); 359 296 pci_unmap_len_set(new, src_len, orig_len); 360 297 pci_unmap_len_set(new, dst_len, orig_len); 361 - 362 - /* write address into NextDescriptor field of last desc in chain */ 363 - to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = first->phys; 364 - list_splice_init(&new_chain, ioat_chan->used_desc.prev); 365 - 366 - ioat_chan->pending += desc_count; 367 - if (ioat_chan->pending >= 20) { 368 - append = 1; 369 - ioat_chan->pending = 0; 370 - } 371 - 372 298 spin_unlock_bh(&ioat_chan->desc_lock); 373 299 374 - if (append) 375 - ioatdma_chan_write8(ioat_chan, 376 - IOAT_CHANCMD_OFFSET, 377 - IOAT_CHANCMD_APPEND); 378 - return cookie; 300 + return new ? &new->async_tx : NULL; 379 301 } 380 302 381 - /** 382 - * ioat_dma_memcpy_buf_to_buf - wrapper that takes src & dest bufs 383 - * @chan: IOAT DMA channel handle 384 - * @dest: DMA destination address 385 - * @src: DMA source address 386 - * @len: transaction length in bytes 387 - */ 388 - 389 - static dma_cookie_t ioat_dma_memcpy_buf_to_buf(struct dma_chan *chan, 390 - void *dest, 391 - void *src, 392 - size_t len) 393 - { 394 - dma_addr_t dest_addr; 395 - dma_addr_t src_addr; 396 - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 397 - 398 - dest_addr = pci_map_single(ioat_chan->device->pdev, 399 - dest, len, PCI_DMA_FROMDEVICE); 400 - src_addr = pci_map_single(ioat_chan->device->pdev, 401 - src, len, PCI_DMA_TODEVICE); 402 - 403 - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); 404 - } 405 - 406 - /** 407 - * ioat_dma_memcpy_buf_to_pg - wrapper, copying from a buf to a page 408 - * @chan: IOAT DMA channel handle 409 - * @page: pointer to the page to copy to 410 - * @offset: offset into that page 411 - * @src: DMA source address 412 - * @len: transaction length in bytes 413 - */ 414 - 415 - static dma_cookie_t ioat_dma_memcpy_buf_to_pg(struct dma_chan *chan, 416 - struct page *page, 417 - unsigned int offset, 418 - void *src, 419 - size_t len) 420 - { 421 - dma_addr_t dest_addr; 422 - dma_addr_t src_addr; 423 - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 424 - 425 - dest_addr = pci_map_page(ioat_chan->device->pdev, 426 - page, offset, len, PCI_DMA_FROMDEVICE); 427 - src_addr = pci_map_single(ioat_chan->device->pdev, 428 - src, len, PCI_DMA_TODEVICE); 429 - 430 - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); 431 - } 432 - 433 - /** 434 - * ioat_dma_memcpy_pg_to_pg - wrapper, copying between two pages 435 - * @chan: IOAT DMA channel handle 436 - * @dest_pg: pointer to the page to copy to 437 - * @dest_off: offset into that page 438 - * @src_pg: pointer to the page to copy from 439 - * @src_off: offset into that page 440 - * @len: transaction length in bytes. This is guaranteed not to make a copy 441 - * across a page boundary. 442 - */ 443 - 444 - static dma_cookie_t ioat_dma_memcpy_pg_to_pg(struct dma_chan *chan, 445 - struct page *dest_pg, 446 - unsigned int dest_off, 447 - struct page *src_pg, 448 - unsigned int src_off, 449 - size_t len) 450 - { 451 - dma_addr_t dest_addr; 452 - dma_addr_t src_addr; 453 - struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 454 - 455 - dest_addr = pci_map_page(ioat_chan->device->pdev, 456 - dest_pg, dest_off, len, PCI_DMA_FROMDEVICE); 457 - src_addr = pci_map_page(ioat_chan->device->pdev, 458 - src_pg, src_off, len, PCI_DMA_TODEVICE); 459 - 460 - return do_ioat_dma_memcpy(ioat_chan, dest_addr, src_addr, len); 461 - } 462 303 463 304 /** 464 305 * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended descriptors to hw ··· 366 417 367 418 if (ioat_chan->pending != 0) { 368 419 ioat_chan->pending = 0; 369 - ioatdma_chan_write8(ioat_chan, 370 - IOAT_CHANCMD_OFFSET, 371 - IOAT_CHANCMD_APPEND); 420 + writeb(IOAT_CHANCMD_APPEND, 421 + ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); 372 422 } 373 423 } 374 424 ··· 397 449 if ((chan->completion_virt->full & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == 398 450 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { 399 451 printk("IOAT: Channel halted, chanerr = %x\n", 400 - ioatdma_chan_read32(chan, IOAT_CHANERR_OFFSET)); 452 + readl(chan->reg_base + IOAT_CHANERR_OFFSET)); 401 453 402 454 /* TODO do something to salvage the situation */ 403 455 } ··· 415 467 * exceeding xfercap, perhaps. If so, only the last one will 416 468 * have a cookie, and require unmapping. 417 469 */ 418 - if (desc->cookie) { 419 - cookie = desc->cookie; 470 + if (desc->async_tx.cookie) { 471 + cookie = desc->async_tx.cookie; 420 472 421 473 /* yes we are unmapping both _page and _single alloc'd 422 474 regions with unmap_page. Is this *really* that bad? ··· 431 483 PCI_DMA_TODEVICE); 432 484 } 433 485 434 - if (desc->phys != phys_complete) { 435 - /* a completed entry, but not the last, so cleanup */ 436 - list_del(&desc->node); 437 - list_add_tail(&desc->node, &chan->free_desc); 486 + if (desc->async_tx.phys != phys_complete) { 487 + /* a completed entry, but not the last, so cleanup 488 + * if the client is done with the descriptor 489 + */ 490 + if (desc->async_tx.ack) { 491 + list_del(&desc->node); 492 + list_add_tail(&desc->node, &chan->free_desc); 493 + } else 494 + desc->async_tx.cookie = 0; 438 495 } else { 439 496 /* last used desc. Do not remove, so we can append from 440 497 it, but don't look at it next time, either */ 441 - desc->cookie = 0; 498 + desc->async_tx.cookie = 0; 442 499 443 500 /* TODO check status bits? */ 444 501 break; ··· 457 504 chan->completed_cookie = cookie; 458 505 459 506 spin_unlock(&chan->cleanup_lock); 507 + } 508 + 509 + static void ioat_dma_dependency_added(struct dma_chan *chan) 510 + { 511 + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); 512 + spin_lock_bh(&ioat_chan->desc_lock); 513 + if (ioat_chan->pending == 0) { 514 + spin_unlock_bh(&ioat_chan->desc_lock); 515 + ioat_dma_memcpy_cleanup(ioat_chan); 516 + } else 517 + spin_unlock_bh(&ioat_chan->desc_lock); 460 518 } 461 519 462 520 /** ··· 517 553 518 554 static struct pci_device_id ioat_pci_tbl[] = { 519 555 { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, 556 + { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, 557 + PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, 520 558 { 0, } 521 559 }; 522 560 ··· 526 560 .name = "ioatdma", 527 561 .id_table = ioat_pci_tbl, 528 562 .probe = ioat_probe, 563 + .shutdown = ioat_shutdown, 529 564 .remove = __devexit_p(ioat_remove), 530 565 }; 531 566 ··· 536 569 unsigned long attnstatus; 537 570 u8 intrctrl; 538 571 539 - intrctrl = ioatdma_read8(instance, IOAT_INTRCTRL_OFFSET); 572 + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); 540 573 541 574 if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) 542 575 return IRQ_NONE; 543 576 544 577 if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { 545 - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); 578 + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 546 579 return IRQ_NONE; 547 580 } 548 581 549 - attnstatus = ioatdma_read32(instance, IOAT_ATTNSTATUS_OFFSET); 582 + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); 550 583 551 584 printk(KERN_ERR "ioatdma error: interrupt! status %lx\n", attnstatus); 552 585 553 - ioatdma_write8(instance, IOAT_INTRCTRL_OFFSET, intrctrl); 586 + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); 554 587 return IRQ_HANDLED; 555 588 } 556 589 ··· 574 607 575 608 desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; 576 609 desc->hw->next = 0; 610 + desc->async_tx.ack = 1; 577 611 578 612 list_add_tail(&desc->node, &ioat_chan->used_desc); 579 613 spin_unlock_bh(&ioat_chan->desc_lock); 580 614 581 - #if (BITS_PER_LONG == 64) 582 - ioatdma_chan_write64(ioat_chan, IOAT_CHAINADDR_OFFSET, desc->phys); 583 - #else 584 - ioatdma_chan_write32(ioat_chan, 585 - IOAT_CHAINADDR_OFFSET_LOW, 586 - (u32) desc->phys); 587 - ioatdma_chan_write32(ioat_chan, IOAT_CHAINADDR_OFFSET_HIGH, 0); 588 - #endif 589 - ioatdma_chan_write8(ioat_chan, IOAT_CHANCMD_OFFSET, IOAT_CHANCMD_START); 615 + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, 616 + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_LOW); 617 + writel(((u64) desc->async_tx.phys) >> 32, 618 + ioat_chan->reg_base + IOAT_CHAINADDR_OFFSET_HIGH); 619 + 620 + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + IOAT_CHANCMD_OFFSET); 590 621 } 591 622 592 623 /* ··· 598 633 u8 *src; 599 634 u8 *dest; 600 635 struct dma_chan *dma_chan; 636 + struct dma_async_tx_descriptor *tx; 637 + dma_addr_t addr; 601 638 dma_cookie_t cookie; 602 639 int err = 0; 603 640 ··· 625 658 goto out; 626 659 } 627 660 628 - cookie = ioat_dma_memcpy_buf_to_buf(dma_chan, dest, src, IOAT_TEST_SIZE); 661 + tx = ioat_dma_prep_memcpy(dma_chan, IOAT_TEST_SIZE, 0); 662 + async_tx_ack(tx); 663 + addr = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, 664 + DMA_TO_DEVICE); 665 + ioat_set_src(addr, tx, 0); 666 + addr = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, 667 + DMA_FROM_DEVICE); 668 + ioat_set_dest(addr, tx, 0); 669 + cookie = ioat_tx_submit(tx); 629 670 ioat_dma_memcpy_issue_pending(dma_chan); 630 671 msleep(1); 631 672 ··· 723 748 724 749 device->reg_base = reg_base; 725 750 726 - ioatdma_write8(device, IOAT_INTRCTRL_OFFSET, IOAT_INTRCTRL_MASTER_INT_EN); 751 + writeb(IOAT_INTRCTRL_MASTER_INT_EN, device->reg_base + IOAT_INTRCTRL_OFFSET); 727 752 pci_set_master(pdev); 728 753 729 754 INIT_LIST_HEAD(&device->common.channels); 730 755 enumerate_dma_channels(device); 731 756 757 + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); 732 758 device->common.device_alloc_chan_resources = ioat_dma_alloc_chan_resources; 733 759 device->common.device_free_chan_resources = ioat_dma_free_chan_resources; 734 - device->common.device_memcpy_buf_to_buf = ioat_dma_memcpy_buf_to_buf; 735 - device->common.device_memcpy_buf_to_pg = ioat_dma_memcpy_buf_to_pg; 736 - device->common.device_memcpy_pg_to_pg = ioat_dma_memcpy_pg_to_pg; 737 - device->common.device_memcpy_complete = ioat_dma_is_complete; 738 - device->common.device_memcpy_issue_pending = ioat_dma_memcpy_issue_pending; 760 + device->common.device_prep_dma_memcpy = ioat_dma_prep_memcpy; 761 + device->common.device_is_tx_complete = ioat_dma_is_complete; 762 + device->common.device_issue_pending = ioat_dma_memcpy_issue_pending; 763 + device->common.device_dependency_added = ioat_dma_dependency_added; 764 + device->common.dev = &pdev->dev; 739 765 printk(KERN_INFO "Intel(R) I/OAT DMA Engine found, %d channels\n", 740 766 device->common.chancnt); 741 767 ··· 763 787 err_set_dma_mask: 764 788 pci_disable_device(pdev); 765 789 err_enable_device: 790 + 791 + printk(KERN_ERR "Intel(R) I/OAT DMA Engine initialization failed\n"); 792 + 766 793 return err; 794 + } 795 + 796 + static void ioat_shutdown(struct pci_dev *pdev) 797 + { 798 + struct ioat_device *device; 799 + device = pci_get_drvdata(pdev); 800 + 801 + dma_async_device_unregister(&device->common); 767 802 } 768 803 769 804 static void __devexit ioat_remove(struct pci_dev *pdev) ··· 805 818 } 806 819 807 820 /* MODULE API */ 808 - MODULE_VERSION("1.7"); 821 + MODULE_VERSION("1.9"); 809 822 MODULE_LICENSE("GPL"); 810 823 MODULE_AUTHOR("Intel Corporation"); 811 824

+6 -10

drivers/dma/ioatdma.h

··· 30 30 31 31 #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 32 32 33 - extern struct list_head dma_device_list; 34 - extern struct list_head dma_client_list; 35 - 36 33 /** 37 34 * struct ioat_device - internal representation of a IOAT device 38 35 * @pdev: PCI-Express device ··· 102 105 /** 103 106 * struct ioat_desc_sw - wrapper around hardware descriptor 104 107 * @hw: hardware DMA descriptor 105 - * @node: 106 - * @cookie: 107 - * @phys: 108 + * @node: this descriptor will either be on the free list, 109 + * or attached to a transaction list (async_tx.tx_list) 110 + * @tx_cnt: number of descriptors required to complete the transaction 111 + * @async_tx: the generic software descriptor for all engines 108 112 */ 109 - 110 113 struct ioat_desc_sw { 111 114 struct ioat_dma_descriptor *hw; 112 115 struct list_head node; 113 - dma_cookie_t cookie; 114 - dma_addr_t phys; 116 + int tx_cnt; 115 117 DECLARE_PCI_UNMAP_ADDR(src) 116 118 DECLARE_PCI_UNMAP_LEN(src_len) 117 119 DECLARE_PCI_UNMAP_ADDR(dst) 118 120 DECLARE_PCI_UNMAP_LEN(dst_len) 121 + struct dma_async_tx_descriptor async_tx; 119 122 }; 120 123 121 124 #endif /* IOATDMA_H */ 122 -

-118

drivers/dma/ioatdma_io.h

··· 1 - /* 2 - * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. 3 - * 4 - * This program is free software; you can redistribute it and/or modify it 5 - * under the terms of the GNU General Public License as published by the Free 6 - * Software Foundation; either version 2 of the License, or (at your option) 7 - * any later version. 8 - * 9 - * This program is distributed in the hope that it will be useful, but WITHOUT 10 - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 - * more details. 13 - * 14 - * You should have received a copy of the GNU General Public License along with 15 - * this program; if not, write to the Free Software Foundation, Inc., 59 16 - * Temple Place - Suite 330, Boston, MA 02111-1307, USA. 17 - * 18 - * The full GNU General Public License is included in this distribution in the 19 - * file called COPYING. 20 - */ 21 - #ifndef IOATDMA_IO_H 22 - #define IOATDMA_IO_H 23 - 24 - #include <asm/io.h> 25 - 26 - /* 27 - * device and per-channel MMIO register read and write functions 28 - * this is a lot of anoying inline functions, but it's typesafe 29 - */ 30 - 31 - static inline u8 ioatdma_read8(struct ioat_device *device, 32 - unsigned int offset) 33 - { 34 - return readb(device->reg_base + offset); 35 - } 36 - 37 - static inline u16 ioatdma_read16(struct ioat_device *device, 38 - unsigned int offset) 39 - { 40 - return readw(device->reg_base + offset); 41 - } 42 - 43 - static inline u32 ioatdma_read32(struct ioat_device *device, 44 - unsigned int offset) 45 - { 46 - return readl(device->reg_base + offset); 47 - } 48 - 49 - static inline void ioatdma_write8(struct ioat_device *device, 50 - unsigned int offset, u8 value) 51 - { 52 - writeb(value, device->reg_base + offset); 53 - } 54 - 55 - static inline void ioatdma_write16(struct ioat_device *device, 56 - unsigned int offset, u16 value) 57 - { 58 - writew(value, device->reg_base + offset); 59 - } 60 - 61 - static inline void ioatdma_write32(struct ioat_device *device, 62 - unsigned int offset, u32 value) 63 - { 64 - writel(value, device->reg_base + offset); 65 - } 66 - 67 - static inline u8 ioatdma_chan_read8(struct ioat_dma_chan *chan, 68 - unsigned int offset) 69 - { 70 - return readb(chan->reg_base + offset); 71 - } 72 - 73 - static inline u16 ioatdma_chan_read16(struct ioat_dma_chan *chan, 74 - unsigned int offset) 75 - { 76 - return readw(chan->reg_base + offset); 77 - } 78 - 79 - static inline u32 ioatdma_chan_read32(struct ioat_dma_chan *chan, 80 - unsigned int offset) 81 - { 82 - return readl(chan->reg_base + offset); 83 - } 84 - 85 - static inline void ioatdma_chan_write8(struct ioat_dma_chan *chan, 86 - unsigned int offset, u8 value) 87 - { 88 - writeb(value, chan->reg_base + offset); 89 - } 90 - 91 - static inline void ioatdma_chan_write16(struct ioat_dma_chan *chan, 92 - unsigned int offset, u16 value) 93 - { 94 - writew(value, chan->reg_base + offset); 95 - } 96 - 97 - static inline void ioatdma_chan_write32(struct ioat_dma_chan *chan, 98 - unsigned int offset, u32 value) 99 - { 100 - writel(value, chan->reg_base + offset); 101 - } 102 - 103 - #if (BITS_PER_LONG == 64) 104 - static inline u64 ioatdma_chan_read64(struct ioat_dma_chan *chan, 105 - unsigned int offset) 106 - { 107 - return readq(chan->reg_base + offset); 108 - } 109 - 110 - static inline void ioatdma_chan_write64(struct ioat_dma_chan *chan, 111 - unsigned int offset, u64 value) 112 - { 113 - writeq(value, chan->reg_base + offset); 114 - } 115 - #endif 116 - 117 - #endif /* IOATDMA_IO_H */ 118 -

+1467

drivers/dma/iop-adma.c

··· 1 + /* 2 + * offload engine driver for the Intel Xscale series of i/o processors 3 + * Copyright © 2006, Intel Corporation. 4 + * 5 + * This program is free software; you can redistribute it and/or modify it 6 + * under the terms and conditions of the GNU General Public License, 7 + * version 2, as published by the Free Software Foundation. 8 + * 9 + * This program is distributed in the hope it will be useful, but WITHOUT 10 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 11 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 12 + * more details. 13 + * 14 + * You should have received a copy of the GNU General Public License along with 15 + * this program; if not, write to the Free Software Foundation, Inc., 16 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 17 + * 18 + */ 19 + 20 + /* 21 + * This driver supports the asynchrounous DMA copy and RAID engines available 22 + * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) 23 + */ 24 + 25 + #include <linux/init.h> 26 + #include <linux/module.h> 27 + #include <linux/async_tx.h> 28 + #include <linux/delay.h> 29 + #include <linux/dma-mapping.h> 30 + #include <linux/spinlock.h> 31 + #include <linux/interrupt.h> 32 + #include <linux/platform_device.h> 33 + #include <linux/memory.h> 34 + #include <linux/ioport.h> 35 + 36 + #include <asm/arch/adma.h> 37 + 38 + #define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) 39 + #define to_iop_adma_device(dev) \ 40 + container_of(dev, struct iop_adma_device, common) 41 + #define tx_to_iop_adma_slot(tx) \ 42 + container_of(tx, struct iop_adma_desc_slot, async_tx) 43 + 44 + /** 45 + * iop_adma_free_slots - flags descriptor slots for reuse 46 + * @slot: Slot to free 47 + * Caller must hold &iop_chan->lock while calling this function 48 + */ 49 + static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) 50 + { 51 + int stride = slot->slots_per_op; 52 + 53 + while (stride--) { 54 + slot->slots_per_op = 0; 55 + slot = list_entry(slot->slot_node.next, 56 + struct iop_adma_desc_slot, 57 + slot_node); 58 + } 59 + } 60 + 61 + static dma_cookie_t 62 + iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, 63 + struct iop_adma_chan *iop_chan, dma_cookie_t cookie) 64 + { 65 + BUG_ON(desc->async_tx.cookie < 0); 66 + spin_lock_bh(&desc->async_tx.lock); 67 + if (desc->async_tx.cookie > 0) { 68 + cookie = desc->async_tx.cookie; 69 + desc->async_tx.cookie = 0; 70 + 71 + /* call the callback (must not sleep or submit new 72 + * operations to this channel) 73 + */ 74 + if (desc->async_tx.callback) 75 + desc->async_tx.callback( 76 + desc->async_tx.callback_param); 77 + 78 + /* unmap dma addresses 79 + * (unmap_single vs unmap_page?) 80 + */ 81 + if (desc->group_head && desc->unmap_len) { 82 + struct iop_adma_desc_slot *unmap = desc->group_head; 83 + struct device *dev = 84 + &iop_chan->device->pdev->dev; 85 + u32 len = unmap->unmap_len; 86 + u32 src_cnt = unmap->unmap_src_cnt; 87 + dma_addr_t addr = iop_desc_get_dest_addr(unmap, 88 + iop_chan); 89 + 90 + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); 91 + while (src_cnt--) { 92 + addr = iop_desc_get_src_addr(unmap, 93 + iop_chan, 94 + src_cnt); 95 + dma_unmap_page(dev, addr, len, 96 + DMA_TO_DEVICE); 97 + } 98 + desc->group_head = NULL; 99 + } 100 + } 101 + 102 + /* run dependent operations */ 103 + async_tx_run_dependencies(&desc->async_tx); 104 + spin_unlock_bh(&desc->async_tx.lock); 105 + 106 + return cookie; 107 + } 108 + 109 + static int 110 + iop_adma_clean_slot(struct iop_adma_desc_slot *desc, 111 + struct iop_adma_chan *iop_chan) 112 + { 113 + /* the client is allowed to attach dependent operations 114 + * until 'ack' is set 115 + */ 116 + if (!desc->async_tx.ack) 117 + return 0; 118 + 119 + /* leave the last descriptor in the chain 120 + * so we can append to it 121 + */ 122 + if (desc->chain_node.next == &iop_chan->chain) 123 + return 1; 124 + 125 + dev_dbg(iop_chan->device->common.dev, 126 + "\tfree slot: %d slots_per_op: %d\n", 127 + desc->idx, desc->slots_per_op); 128 + 129 + list_del(&desc->chain_node); 130 + iop_adma_free_slots(desc); 131 + 132 + return 0; 133 + } 134 + 135 + static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) 136 + { 137 + struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; 138 + dma_cookie_t cookie = 0; 139 + u32 current_desc = iop_chan_get_current_descriptor(iop_chan); 140 + int busy = iop_chan_is_busy(iop_chan); 141 + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; 142 + 143 + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); 144 + /* free completed slots from the chain starting with 145 + * the oldest descriptor 146 + */ 147 + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, 148 + chain_node) { 149 + pr_debug("\tcookie: %d slot: %d busy: %d " 150 + "this_desc: %#x next_desc: %#x ack: %d\n", 151 + iter->async_tx.cookie, iter->idx, busy, 152 + iter->async_tx.phys, iop_desc_get_next_desc(iter), 153 + iter->async_tx.ack); 154 + prefetch(_iter); 155 + prefetch(&_iter->async_tx); 156 + 157 + /* do not advance past the current descriptor loaded into the 158 + * hardware channel, subsequent descriptors are either in 159 + * process or have not been submitted 160 + */ 161 + if (seen_current) 162 + break; 163 + 164 + /* stop the search if we reach the current descriptor and the 165 + * channel is busy, or if it appears that the current descriptor 166 + * needs to be re-read (i.e. has been appended to) 167 + */ 168 + if (iter->async_tx.phys == current_desc) { 169 + BUG_ON(seen_current++); 170 + if (busy || iop_desc_get_next_desc(iter)) 171 + break; 172 + } 173 + 174 + /* detect the start of a group transaction */ 175 + if (!slot_cnt && !slots_per_op) { 176 + slot_cnt = iter->slot_cnt; 177 + slots_per_op = iter->slots_per_op; 178 + if (slot_cnt <= slots_per_op) { 179 + slot_cnt = 0; 180 + slots_per_op = 0; 181 + } 182 + } 183 + 184 + if (slot_cnt) { 185 + pr_debug("\tgroup++\n"); 186 + if (!grp_start) 187 + grp_start = iter; 188 + slot_cnt -= slots_per_op; 189 + } 190 + 191 + /* all the members of a group are complete */ 192 + if (slots_per_op != 0 && slot_cnt == 0) { 193 + struct iop_adma_desc_slot *grp_iter, *_grp_iter; 194 + int end_of_chain = 0; 195 + pr_debug("\tgroup end\n"); 196 + 197 + /* collect the total results */ 198 + if (grp_start->xor_check_result) { 199 + u32 zero_sum_result = 0; 200 + slot_cnt = grp_start->slot_cnt; 201 + grp_iter = grp_start; 202 + 203 + list_for_each_entry_from(grp_iter, 204 + &iop_chan->chain, chain_node) { 205 + zero_sum_result |= 206 + iop_desc_get_zero_result(grp_iter); 207 + pr_debug("\titer%d result: %d\n", 208 + grp_iter->idx, zero_sum_result); 209 + slot_cnt -= slots_per_op; 210 + if (slot_cnt == 0) 211 + break; 212 + } 213 + pr_debug("\tgrp_start->xor_check_result: %p\n", 214 + grp_start->xor_check_result); 215 + *grp_start->xor_check_result = zero_sum_result; 216 + } 217 + 218 + /* clean up the group */ 219 + slot_cnt = grp_start->slot_cnt; 220 + grp_iter = grp_start; 221 + list_for_each_entry_safe_from(grp_iter, _grp_iter, 222 + &iop_chan->chain, chain_node) { 223 + cookie = iop_adma_run_tx_complete_actions( 224 + grp_iter, iop_chan, cookie); 225 + 226 + slot_cnt -= slots_per_op; 227 + end_of_chain = iop_adma_clean_slot(grp_iter, 228 + iop_chan); 229 + 230 + if (slot_cnt == 0 || end_of_chain) 231 + break; 232 + } 233 + 234 + /* the group should be complete at this point */ 235 + BUG_ON(slot_cnt); 236 + 237 + slots_per_op = 0; 238 + grp_start = NULL; 239 + if (end_of_chain) 240 + break; 241 + else 242 + continue; 243 + } else if (slots_per_op) /* wait for group completion */ 244 + continue; 245 + 246 + /* write back zero sum results (single descriptor case) */ 247 + if (iter->xor_check_result && iter->async_tx.cookie) 248 + *iter->xor_check_result = 249 + iop_desc_get_zero_result(iter); 250 + 251 + cookie = iop_adma_run_tx_complete_actions( 252 + iter, iop_chan, cookie); 253 + 254 + if (iop_adma_clean_slot(iter, iop_chan)) 255 + break; 256 + } 257 + 258 + BUG_ON(!seen_current); 259 + 260 + iop_chan_idle(busy, iop_chan); 261 + 262 + if (cookie > 0) { 263 + iop_chan->completed_cookie = cookie; 264 + pr_debug("\tcompleted cookie %d\n", cookie); 265 + } 266 + } 267 + 268 + static void 269 + iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) 270 + { 271 + spin_lock_bh(&iop_chan->lock); 272 + __iop_adma_slot_cleanup(iop_chan); 273 + spin_unlock_bh(&iop_chan->lock); 274 + } 275 + 276 + static void iop_adma_tasklet(unsigned long data) 277 + { 278 + struct iop_adma_chan *chan = (struct iop_adma_chan *) data; 279 + __iop_adma_slot_cleanup(chan); 280 + } 281 + 282 + static struct iop_adma_desc_slot * 283 + iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, 284 + int slots_per_op) 285 + { 286 + struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; 287 + struct list_head chain = LIST_HEAD_INIT(chain); 288 + int slots_found, retry = 0; 289 + 290 + /* start search from the last allocated descrtiptor 291 + * if a contiguous allocation can not be found start searching 292 + * from the beginning of the list 293 + */ 294 + retry: 295 + slots_found = 0; 296 + if (retry == 0) 297 + iter = iop_chan->last_used; 298 + else 299 + iter = list_entry(&iop_chan->all_slots, 300 + struct iop_adma_desc_slot, 301 + slot_node); 302 + 303 + list_for_each_entry_safe_continue( 304 + iter, _iter, &iop_chan->all_slots, slot_node) { 305 + prefetch(_iter); 306 + prefetch(&_iter->async_tx); 307 + if (iter->slots_per_op) { 308 + /* give up after finding the first busy slot 309 + * on the second pass through the list 310 + */ 311 + if (retry) 312 + break; 313 + 314 + slots_found = 0; 315 + continue; 316 + } 317 + 318 + /* start the allocation if the slot is correctly aligned */ 319 + if (!slots_found++) { 320 + if (iop_desc_is_aligned(iter, slots_per_op)) 321 + alloc_start = iter; 322 + else { 323 + slots_found = 0; 324 + continue; 325 + } 326 + } 327 + 328 + if (slots_found == num_slots) { 329 + struct iop_adma_desc_slot *alloc_tail = NULL; 330 + struct iop_adma_desc_slot *last_used = NULL; 331 + iter = alloc_start; 332 + while (num_slots) { 333 + int i; 334 + dev_dbg(iop_chan->device->common.dev, 335 + "allocated slot: %d " 336 + "(desc %p phys: %#x) slots_per_op %d\n", 337 + iter->idx, iter->hw_desc, 338 + iter->async_tx.phys, slots_per_op); 339 + 340 + /* pre-ack all but the last descriptor */ 341 + if (num_slots != slots_per_op) 342 + iter->async_tx.ack = 1; 343 + else 344 + iter->async_tx.ack = 0; 345 + 346 + list_add_tail(&iter->chain_node, &chain); 347 + alloc_tail = iter; 348 + iter->async_tx.cookie = 0; 349 + iter->slot_cnt = num_slots; 350 + iter->xor_check_result = NULL; 351 + for (i = 0; i < slots_per_op; i++) { 352 + iter->slots_per_op = slots_per_op - i; 353 + last_used = iter; 354 + iter = list_entry(iter->slot_node.next, 355 + struct iop_adma_desc_slot, 356 + slot_node); 357 + } 358 + num_slots -= slots_per_op; 359 + } 360 + alloc_tail->group_head = alloc_start; 361 + alloc_tail->async_tx.cookie = -EBUSY; 362 + list_splice(&chain, &alloc_tail->async_tx.tx_list); 363 + iop_chan->last_used = last_used; 364 + iop_desc_clear_next_desc(alloc_start); 365 + iop_desc_clear_next_desc(alloc_tail); 366 + return alloc_tail; 367 + } 368 + } 369 + if (!retry++) 370 + goto retry; 371 + 372 + /* try to free some slots if the allocation fails */ 373 + tasklet_schedule(&iop_chan->irq_tasklet); 374 + 375 + return NULL; 376 + } 377 + 378 + static dma_cookie_t 379 + iop_desc_assign_cookie(struct iop_adma_chan *iop_chan, 380 + struct iop_adma_desc_slot *desc) 381 + { 382 + dma_cookie_t cookie = iop_chan->common.cookie; 383 + cookie++; 384 + if (cookie < 0) 385 + cookie = 1; 386 + iop_chan->common.cookie = desc->async_tx.cookie = cookie; 387 + return cookie; 388 + } 389 + 390 + static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) 391 + { 392 + dev_dbg(iop_chan->device->common.dev, "pending: %d\n", 393 + iop_chan->pending); 394 + 395 + if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { 396 + iop_chan->pending = 0; 397 + iop_chan_append(iop_chan); 398 + } 399 + } 400 + 401 + static dma_cookie_t 402 + iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) 403 + { 404 + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); 405 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); 406 + struct iop_adma_desc_slot *grp_start, *old_chain_tail; 407 + int slot_cnt; 408 + int slots_per_op; 409 + dma_cookie_t cookie; 410 + 411 + grp_start = sw_desc->group_head; 412 + slot_cnt = grp_start->slot_cnt; 413 + slots_per_op = grp_start->slots_per_op; 414 + 415 + spin_lock_bh(&iop_chan->lock); 416 + cookie = iop_desc_assign_cookie(iop_chan, sw_desc); 417 + 418 + old_chain_tail = list_entry(iop_chan->chain.prev, 419 + struct iop_adma_desc_slot, chain_node); 420 + list_splice_init(&sw_desc->async_tx.tx_list, 421 + &old_chain_tail->chain_node); 422 + 423 + /* fix up the hardware chain */ 424 + iop_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys); 425 + 426 + /* 1/ don't add pre-chained descriptors 427 + * 2/ dummy read to flush next_desc write 428 + */ 429 + BUG_ON(iop_desc_get_next_desc(sw_desc)); 430 + 431 + /* increment the pending count by the number of slots 432 + * memcpy operations have a 1:1 (slot:operation) relation 433 + * other operations are heavier and will pop the threshold 434 + * more often. 435 + */ 436 + iop_chan->pending += slot_cnt; 437 + iop_adma_check_threshold(iop_chan); 438 + spin_unlock_bh(&iop_chan->lock); 439 + 440 + dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", 441 + __FUNCTION__, sw_desc->async_tx.cookie, sw_desc->idx); 442 + 443 + return cookie; 444 + } 445 + 446 + static void 447 + iop_adma_set_dest(dma_addr_t addr, struct dma_async_tx_descriptor *tx, 448 + int index) 449 + { 450 + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); 451 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); 452 + 453 + /* to do: support transfers lengths > IOP_ADMA_MAX_BYTE_COUNT */ 454 + iop_desc_set_dest_addr(sw_desc->group_head, iop_chan, addr); 455 + } 456 + 457 + static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); 458 + static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); 459 + 460 + /* returns the number of allocated descriptors */ 461 + static int iop_adma_alloc_chan_resources(struct dma_chan *chan) 462 + { 463 + char *hw_desc; 464 + int idx; 465 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 466 + struct iop_adma_desc_slot *slot = NULL; 467 + int init = iop_chan->slots_allocated ? 0 : 1; 468 + struct iop_adma_platform_data *plat_data = 469 + iop_chan->device->pdev->dev.platform_data; 470 + int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; 471 + 472 + /* Allocate descriptor slots */ 473 + do { 474 + idx = iop_chan->slots_allocated; 475 + if (idx == num_descs_in_pool) 476 + break; 477 + 478 + slot = kzalloc(sizeof(*slot), GFP_KERNEL); 479 + if (!slot) { 480 + printk(KERN_INFO "IOP ADMA Channel only initialized" 481 + " %d descriptor slots", idx); 482 + break; 483 + } 484 + hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; 485 + slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; 486 + 487 + dma_async_tx_descriptor_init(&slot->async_tx, chan); 488 + slot->async_tx.tx_submit = iop_adma_tx_submit; 489 + slot->async_tx.tx_set_dest = iop_adma_set_dest; 490 + INIT_LIST_HEAD(&slot->chain_node); 491 + INIT_LIST_HEAD(&slot->slot_node); 492 + INIT_LIST_HEAD(&slot->async_tx.tx_list); 493 + hw_desc = (char *) iop_chan->device->dma_desc_pool; 494 + slot->async_tx.phys = 495 + (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; 496 + slot->idx = idx; 497 + 498 + spin_lock_bh(&iop_chan->lock); 499 + iop_chan->slots_allocated++; 500 + list_add_tail(&slot->slot_node, &iop_chan->all_slots); 501 + spin_unlock_bh(&iop_chan->lock); 502 + } while (iop_chan->slots_allocated < num_descs_in_pool); 503 + 504 + if (idx && !iop_chan->last_used) 505 + iop_chan->last_used = list_entry(iop_chan->all_slots.next, 506 + struct iop_adma_desc_slot, 507 + slot_node); 508 + 509 + dev_dbg(iop_chan->device->common.dev, 510 + "allocated %d descriptor slots last_used: %p\n", 511 + iop_chan->slots_allocated, iop_chan->last_used); 512 + 513 + /* initialize the channel and the chain with a null operation */ 514 + if (init) { 515 + if (dma_has_cap(DMA_MEMCPY, 516 + iop_chan->device->common.cap_mask)) 517 + iop_chan_start_null_memcpy(iop_chan); 518 + else if (dma_has_cap(DMA_XOR, 519 + iop_chan->device->common.cap_mask)) 520 + iop_chan_start_null_xor(iop_chan); 521 + else 522 + BUG(); 523 + } 524 + 525 + return (idx > 0) ? idx : -ENOMEM; 526 + } 527 + 528 + static struct dma_async_tx_descriptor * 529 + iop_adma_prep_dma_interrupt(struct dma_chan *chan) 530 + { 531 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 532 + struct iop_adma_desc_slot *sw_desc, *grp_start; 533 + int slot_cnt, slots_per_op; 534 + 535 + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); 536 + 537 + spin_lock_bh(&iop_chan->lock); 538 + slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); 539 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 540 + if (sw_desc) { 541 + grp_start = sw_desc->group_head; 542 + iop_desc_init_interrupt(grp_start, iop_chan); 543 + grp_start->unmap_len = 0; 544 + } 545 + spin_unlock_bh(&iop_chan->lock); 546 + 547 + return sw_desc ? &sw_desc->async_tx : NULL; 548 + } 549 + 550 + static void 551 + iop_adma_memcpy_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, 552 + int index) 553 + { 554 + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); 555 + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; 556 + 557 + iop_desc_set_memcpy_src_addr(grp_start, addr); 558 + } 559 + 560 + static struct dma_async_tx_descriptor * 561 + iop_adma_prep_dma_memcpy(struct dma_chan *chan, size_t len, int int_en) 562 + { 563 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 564 + struct iop_adma_desc_slot *sw_desc, *grp_start; 565 + int slot_cnt, slots_per_op; 566 + 567 + if (unlikely(!len)) 568 + return NULL; 569 + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); 570 + 571 + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", 572 + __FUNCTION__, len); 573 + 574 + spin_lock_bh(&iop_chan->lock); 575 + slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); 576 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 577 + if (sw_desc) { 578 + grp_start = sw_desc->group_head; 579 + iop_desc_init_memcpy(grp_start, int_en); 580 + iop_desc_set_byte_count(grp_start, iop_chan, len); 581 + sw_desc->unmap_src_cnt = 1; 582 + sw_desc->unmap_len = len; 583 + sw_desc->async_tx.tx_set_src = iop_adma_memcpy_set_src; 584 + } 585 + spin_unlock_bh(&iop_chan->lock); 586 + 587 + return sw_desc ? &sw_desc->async_tx : NULL; 588 + } 589 + 590 + static struct dma_async_tx_descriptor * 591 + iop_adma_prep_dma_memset(struct dma_chan *chan, int value, size_t len, 592 + int int_en) 593 + { 594 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 595 + struct iop_adma_desc_slot *sw_desc, *grp_start; 596 + int slot_cnt, slots_per_op; 597 + 598 + if (unlikely(!len)) 599 + return NULL; 600 + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); 601 + 602 + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", 603 + __FUNCTION__, len); 604 + 605 + spin_lock_bh(&iop_chan->lock); 606 + slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); 607 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 608 + if (sw_desc) { 609 + grp_start = sw_desc->group_head; 610 + iop_desc_init_memset(grp_start, int_en); 611 + iop_desc_set_byte_count(grp_start, iop_chan, len); 612 + iop_desc_set_block_fill_val(grp_start, value); 613 + sw_desc->unmap_src_cnt = 1; 614 + sw_desc->unmap_len = len; 615 + } 616 + spin_unlock_bh(&iop_chan->lock); 617 + 618 + return sw_desc ? &sw_desc->async_tx : NULL; 619 + } 620 + 621 + static void 622 + iop_adma_xor_set_src(dma_addr_t addr, struct dma_async_tx_descriptor *tx, 623 + int index) 624 + { 625 + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); 626 + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; 627 + 628 + iop_desc_set_xor_src_addr(grp_start, index, addr); 629 + } 630 + 631 + static struct dma_async_tx_descriptor * 632 + iop_adma_prep_dma_xor(struct dma_chan *chan, unsigned int src_cnt, size_t len, 633 + int int_en) 634 + { 635 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 636 + struct iop_adma_desc_slot *sw_desc, *grp_start; 637 + int slot_cnt, slots_per_op; 638 + 639 + if (unlikely(!len)) 640 + return NULL; 641 + BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); 642 + 643 + dev_dbg(iop_chan->device->common.dev, 644 + "%s src_cnt: %d len: %u int_en: %d\n", 645 + __FUNCTION__, src_cnt, len, int_en); 646 + 647 + spin_lock_bh(&iop_chan->lock); 648 + slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); 649 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 650 + if (sw_desc) { 651 + grp_start = sw_desc->group_head; 652 + iop_desc_init_xor(grp_start, src_cnt, int_en); 653 + iop_desc_set_byte_count(grp_start, iop_chan, len); 654 + sw_desc->unmap_src_cnt = src_cnt; 655 + sw_desc->unmap_len = len; 656 + sw_desc->async_tx.tx_set_src = iop_adma_xor_set_src; 657 + } 658 + spin_unlock_bh(&iop_chan->lock); 659 + 660 + return sw_desc ? &sw_desc->async_tx : NULL; 661 + } 662 + 663 + static void 664 + iop_adma_xor_zero_sum_set_src(dma_addr_t addr, 665 + struct dma_async_tx_descriptor *tx, 666 + int index) 667 + { 668 + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); 669 + struct iop_adma_desc_slot *grp_start = sw_desc->group_head; 670 + 671 + iop_desc_set_zero_sum_src_addr(grp_start, index, addr); 672 + } 673 + 674 + static struct dma_async_tx_descriptor * 675 + iop_adma_prep_dma_zero_sum(struct dma_chan *chan, unsigned int src_cnt, 676 + size_t len, u32 *result, int int_en) 677 + { 678 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 679 + struct iop_adma_desc_slot *sw_desc, *grp_start; 680 + int slot_cnt, slots_per_op; 681 + 682 + if (unlikely(!len)) 683 + return NULL; 684 + 685 + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", 686 + __FUNCTION__, src_cnt, len); 687 + 688 + spin_lock_bh(&iop_chan->lock); 689 + slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); 690 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 691 + if (sw_desc) { 692 + grp_start = sw_desc->group_head; 693 + iop_desc_init_zero_sum(grp_start, src_cnt, int_en); 694 + iop_desc_set_zero_sum_byte_count(grp_start, len); 695 + grp_start->xor_check_result = result; 696 + pr_debug("\t%s: grp_start->xor_check_result: %p\n", 697 + __FUNCTION__, grp_start->xor_check_result); 698 + sw_desc->unmap_src_cnt = src_cnt; 699 + sw_desc->unmap_len = len; 700 + sw_desc->async_tx.tx_set_src = iop_adma_xor_zero_sum_set_src; 701 + } 702 + spin_unlock_bh(&iop_chan->lock); 703 + 704 + return sw_desc ? &sw_desc->async_tx : NULL; 705 + } 706 + 707 + static void iop_adma_dependency_added(struct dma_chan *chan) 708 + { 709 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 710 + tasklet_schedule(&iop_chan->irq_tasklet); 711 + } 712 + 713 + static void iop_adma_free_chan_resources(struct dma_chan *chan) 714 + { 715 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 716 + struct iop_adma_desc_slot *iter, *_iter; 717 + int in_use_descs = 0; 718 + 719 + iop_adma_slot_cleanup(iop_chan); 720 + 721 + spin_lock_bh(&iop_chan->lock); 722 + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, 723 + chain_node) { 724 + in_use_descs++; 725 + list_del(&iter->chain_node); 726 + } 727 + list_for_each_entry_safe_reverse( 728 + iter, _iter, &iop_chan->all_slots, slot_node) { 729 + list_del(&iter->slot_node); 730 + kfree(iter); 731 + iop_chan->slots_allocated--; 732 + } 733 + iop_chan->last_used = NULL; 734 + 735 + dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", 736 + __FUNCTION__, iop_chan->slots_allocated); 737 + spin_unlock_bh(&iop_chan->lock); 738 + 739 + /* one is ok since we left it on there on purpose */ 740 + if (in_use_descs > 1) 741 + printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", 742 + in_use_descs - 1); 743 + } 744 + 745 + /** 746 + * iop_adma_is_complete - poll the status of an ADMA transaction 747 + * @chan: ADMA channel handle 748 + * @cookie: ADMA transaction identifier 749 + */ 750 + static enum dma_status iop_adma_is_complete(struct dma_chan *chan, 751 + dma_cookie_t cookie, 752 + dma_cookie_t *done, 753 + dma_cookie_t *used) 754 + { 755 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 756 + dma_cookie_t last_used; 757 + dma_cookie_t last_complete; 758 + enum dma_status ret; 759 + 760 + last_used = chan->cookie; 761 + last_complete = iop_chan->completed_cookie; 762 + 763 + if (done) 764 + *done = last_complete; 765 + if (used) 766 + *used = last_used; 767 + 768 + ret = dma_async_is_complete(cookie, last_complete, last_used); 769 + if (ret == DMA_SUCCESS) 770 + return ret; 771 + 772 + iop_adma_slot_cleanup(iop_chan); 773 + 774 + last_used = chan->cookie; 775 + last_complete = iop_chan->completed_cookie; 776 + 777 + if (done) 778 + *done = last_complete; 779 + if (used) 780 + *used = last_used; 781 + 782 + return dma_async_is_complete(cookie, last_complete, last_used); 783 + } 784 + 785 + static irqreturn_t iop_adma_eot_handler(int irq, void *data) 786 + { 787 + struct iop_adma_chan *chan = data; 788 + 789 + dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); 790 + 791 + tasklet_schedule(&chan->irq_tasklet); 792 + 793 + iop_adma_device_clear_eot_status(chan); 794 + 795 + return IRQ_HANDLED; 796 + } 797 + 798 + static irqreturn_t iop_adma_eoc_handler(int irq, void *data) 799 + { 800 + struct iop_adma_chan *chan = data; 801 + 802 + dev_dbg(chan->device->common.dev, "%s\n", __FUNCTION__); 803 + 804 + tasklet_schedule(&chan->irq_tasklet); 805 + 806 + iop_adma_device_clear_eoc_status(chan); 807 + 808 + return IRQ_HANDLED; 809 + } 810 + 811 + static irqreturn_t iop_adma_err_handler(int irq, void *data) 812 + { 813 + struct iop_adma_chan *chan = data; 814 + unsigned long status = iop_chan_get_status(chan); 815 + 816 + dev_printk(KERN_ERR, chan->device->common.dev, 817 + "error ( %s%s%s%s%s%s%s)\n", 818 + iop_is_err_int_parity(status, chan) ? "int_parity " : "", 819 + iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", 820 + iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", 821 + iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", 822 + iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", 823 + iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", 824 + iop_is_err_split_tx(status, chan) ? "split_tx " : ""); 825 + 826 + iop_adma_device_clear_err_status(chan); 827 + 828 + BUG(); 829 + 830 + return IRQ_HANDLED; 831 + } 832 + 833 + static void iop_adma_issue_pending(struct dma_chan *chan) 834 + { 835 + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); 836 + 837 + if (iop_chan->pending) { 838 + iop_chan->pending = 0; 839 + iop_chan_append(iop_chan); 840 + } 841 + } 842 + 843 + /* 844 + * Perform a transaction to verify the HW works. 845 + */ 846 + #define IOP_ADMA_TEST_SIZE 2000 847 + 848 + static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) 849 + { 850 + int i; 851 + void *src, *dest; 852 + dma_addr_t src_dma, dest_dma; 853 + struct dma_chan *dma_chan; 854 + dma_cookie_t cookie; 855 + struct dma_async_tx_descriptor *tx; 856 + int err = 0; 857 + struct iop_adma_chan *iop_chan; 858 + 859 + dev_dbg(device->common.dev, "%s\n", __FUNCTION__); 860 + 861 + src = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); 862 + if (!src) 863 + return -ENOMEM; 864 + dest = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, GFP_KERNEL); 865 + if (!dest) { 866 + kfree(src); 867 + return -ENOMEM; 868 + } 869 + 870 + /* Fill in src buffer */ 871 + for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) 872 + ((u8 *) src)[i] = (u8)i; 873 + 874 + memset(dest, 0, IOP_ADMA_TEST_SIZE); 875 + 876 + /* Start copy, using first DMA channel */ 877 + dma_chan = container_of(device->common.channels.next, 878 + struct dma_chan, 879 + device_node); 880 + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { 881 + err = -ENODEV; 882 + goto out; 883 + } 884 + 885 + tx = iop_adma_prep_dma_memcpy(dma_chan, IOP_ADMA_TEST_SIZE, 1); 886 + dest_dma = dma_map_single(dma_chan->device->dev, dest, 887 + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); 888 + iop_adma_set_dest(dest_dma, tx, 0); 889 + src_dma = dma_map_single(dma_chan->device->dev, src, 890 + IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); 891 + iop_adma_memcpy_set_src(src_dma, tx, 0); 892 + 893 + cookie = iop_adma_tx_submit(tx); 894 + iop_adma_issue_pending(dma_chan); 895 + async_tx_ack(tx); 896 + msleep(1); 897 + 898 + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != 899 + DMA_SUCCESS) { 900 + dev_printk(KERN_ERR, dma_chan->device->dev, 901 + "Self-test copy timed out, disabling\n"); 902 + err = -ENODEV; 903 + goto free_resources; 904 + } 905 + 906 + iop_chan = to_iop_adma_chan(dma_chan); 907 + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, 908 + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); 909 + if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { 910 + dev_printk(KERN_ERR, dma_chan->device->dev, 911 + "Self-test copy failed compare, disabling\n"); 912 + err = -ENODEV; 913 + goto free_resources; 914 + } 915 + 916 + free_resources: 917 + iop_adma_free_chan_resources(dma_chan); 918 + out: 919 + kfree(src); 920 + kfree(dest); 921 + return err; 922 + } 923 + 924 + #define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ 925 + static int __devinit 926 + iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) 927 + { 928 + int i, src_idx; 929 + struct page *dest; 930 + struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; 931 + struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; 932 + dma_addr_t dma_addr, dest_dma; 933 + struct dma_async_tx_descriptor *tx; 934 + struct dma_chan *dma_chan; 935 + dma_cookie_t cookie; 936 + u8 cmp_byte = 0; 937 + u32 cmp_word; 938 + u32 zero_sum_result; 939 + int err = 0; 940 + struct iop_adma_chan *iop_chan; 941 + 942 + dev_dbg(device->common.dev, "%s\n", __FUNCTION__); 943 + 944 + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { 945 + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); 946 + if (!xor_srcs[src_idx]) 947 + while (src_idx--) { 948 + __free_page(xor_srcs[src_idx]); 949 + return -ENOMEM; 950 + } 951 + } 952 + 953 + dest = alloc_page(GFP_KERNEL); 954 + if (!dest) 955 + while (src_idx--) { 956 + __free_page(xor_srcs[src_idx]); 957 + return -ENOMEM; 958 + } 959 + 960 + /* Fill in src buffers */ 961 + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { 962 + u8 *ptr = page_address(xor_srcs[src_idx]); 963 + for (i = 0; i < PAGE_SIZE; i++) 964 + ptr[i] = (1 << src_idx); 965 + } 966 + 967 + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) 968 + cmp_byte ^= (u8) (1 << src_idx); 969 + 970 + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | 971 + (cmp_byte << 8) | cmp_byte; 972 + 973 + memset(page_address(dest), 0, PAGE_SIZE); 974 + 975 + dma_chan = container_of(device->common.channels.next, 976 + struct dma_chan, 977 + device_node); 978 + if (iop_adma_alloc_chan_resources(dma_chan) < 1) { 979 + err = -ENODEV; 980 + goto out; 981 + } 982 + 983 + /* test xor */ 984 + tx = iop_adma_prep_dma_xor(dma_chan, IOP_ADMA_NUM_SRC_TEST, 985 + PAGE_SIZE, 1); 986 + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, 987 + PAGE_SIZE, DMA_FROM_DEVICE); 988 + iop_adma_set_dest(dest_dma, tx, 0); 989 + 990 + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) { 991 + dma_addr = dma_map_page(dma_chan->device->dev, xor_srcs[i], 0, 992 + PAGE_SIZE, DMA_TO_DEVICE); 993 + iop_adma_xor_set_src(dma_addr, tx, i); 994 + } 995 + 996 + cookie = iop_adma_tx_submit(tx); 997 + iop_adma_issue_pending(dma_chan); 998 + async_tx_ack(tx); 999 + msleep(8); 1000 + 1001 + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != 1002 + DMA_SUCCESS) { 1003 + dev_printk(KERN_ERR, dma_chan->device->dev, 1004 + "Self-test xor timed out, disabling\n"); 1005 + err = -ENODEV; 1006 + goto free_resources; 1007 + } 1008 + 1009 + iop_chan = to_iop_adma_chan(dma_chan); 1010 + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, 1011 + PAGE_SIZE, DMA_FROM_DEVICE); 1012 + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { 1013 + u32 *ptr = page_address(dest); 1014 + if (ptr[i] != cmp_word) { 1015 + dev_printk(KERN_ERR, dma_chan->device->dev, 1016 + "Self-test xor failed compare, disabling\n"); 1017 + err = -ENODEV; 1018 + goto free_resources; 1019 + } 1020 + } 1021 + dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, 1022 + PAGE_SIZE, DMA_TO_DEVICE); 1023 + 1024 + /* skip zero sum if the capability is not present */ 1025 + if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) 1026 + goto free_resources; 1027 + 1028 + /* zero sum the sources with the destintation page */ 1029 + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) 1030 + zero_sum_srcs[i] = xor_srcs[i]; 1031 + zero_sum_srcs[i] = dest; 1032 + 1033 + zero_sum_result = 1; 1034 + 1035 + tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, 1036 + PAGE_SIZE, &zero_sum_result, 1); 1037 + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { 1038 + dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 1039 + 0, PAGE_SIZE, DMA_TO_DEVICE); 1040 + iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); 1041 + } 1042 + 1043 + cookie = iop_adma_tx_submit(tx); 1044 + iop_adma_issue_pending(dma_chan); 1045 + async_tx_ack(tx); 1046 + msleep(8); 1047 + 1048 + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 1049 + dev_printk(KERN_ERR, dma_chan->device->dev, 1050 + "Self-test zero sum timed out, disabling\n"); 1051 + err = -ENODEV; 1052 + goto free_resources; 1053 + } 1054 + 1055 + if (zero_sum_result != 0) { 1056 + dev_printk(KERN_ERR, dma_chan->device->dev, 1057 + "Self-test zero sum failed compare, disabling\n"); 1058 + err = -ENODEV; 1059 + goto free_resources; 1060 + } 1061 + 1062 + /* test memset */ 1063 + tx = iop_adma_prep_dma_memset(dma_chan, 0, PAGE_SIZE, 1); 1064 + dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, 1065 + PAGE_SIZE, DMA_FROM_DEVICE); 1066 + iop_adma_set_dest(dma_addr, tx, 0); 1067 + 1068 + cookie = iop_adma_tx_submit(tx); 1069 + iop_adma_issue_pending(dma_chan); 1070 + async_tx_ack(tx); 1071 + msleep(8); 1072 + 1073 + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 1074 + dev_printk(KERN_ERR, dma_chan->device->dev, 1075 + "Self-test memset timed out, disabling\n"); 1076 + err = -ENODEV; 1077 + goto free_resources; 1078 + } 1079 + 1080 + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { 1081 + u32 *ptr = page_address(dest); 1082 + if (ptr[i]) { 1083 + dev_printk(KERN_ERR, dma_chan->device->dev, 1084 + "Self-test memset failed compare, disabling\n"); 1085 + err = -ENODEV; 1086 + goto free_resources; 1087 + } 1088 + } 1089 + 1090 + /* test for non-zero parity sum */ 1091 + zero_sum_result = 0; 1092 + tx = iop_adma_prep_dma_zero_sum(dma_chan, IOP_ADMA_NUM_SRC_TEST + 1, 1093 + PAGE_SIZE, &zero_sum_result, 1); 1094 + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) { 1095 + dma_addr = dma_map_page(dma_chan->device->dev, zero_sum_srcs[i], 1096 + 0, PAGE_SIZE, DMA_TO_DEVICE); 1097 + iop_adma_xor_zero_sum_set_src(dma_addr, tx, i); 1098 + } 1099 + 1100 + cookie = iop_adma_tx_submit(tx); 1101 + iop_adma_issue_pending(dma_chan); 1102 + async_tx_ack(tx); 1103 + msleep(8); 1104 + 1105 + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { 1106 + dev_printk(KERN_ERR, dma_chan->device->dev, 1107 + "Self-test non-zero sum timed out, disabling\n"); 1108 + err = -ENODEV; 1109 + goto free_resources; 1110 + } 1111 + 1112 + if (zero_sum_result != 1) { 1113 + dev_printk(KERN_ERR, dma_chan->device->dev, 1114 + "Self-test non-zero sum failed compare, disabling\n"); 1115 + err = -ENODEV; 1116 + goto free_resources; 1117 + } 1118 + 1119 + free_resources: 1120 + iop_adma_free_chan_resources(dma_chan); 1121 + out: 1122 + src_idx = IOP_ADMA_NUM_SRC_TEST; 1123 + while (src_idx--) 1124 + __free_page(xor_srcs[src_idx]); 1125 + __free_page(dest); 1126 + return err; 1127 + } 1128 + 1129 + static int __devexit iop_adma_remove(struct platform_device *dev) 1130 + { 1131 + struct iop_adma_device *device = platform_get_drvdata(dev); 1132 + struct dma_chan *chan, *_chan; 1133 + struct iop_adma_chan *iop_chan; 1134 + int i; 1135 + struct iop_adma_platform_data *plat_data = dev->dev.platform_data; 1136 + 1137 + dma_async_device_unregister(&device->common); 1138 + 1139 + for (i = 0; i < 3; i++) { 1140 + unsigned int irq; 1141 + irq = platform_get_irq(dev, i); 1142 + free_irq(irq, device); 1143 + } 1144 + 1145 + dma_free_coherent(&dev->dev, plat_data->pool_size, 1146 + device->dma_desc_pool_virt, device->dma_desc_pool); 1147 + 1148 + do { 1149 + struct resource *res; 1150 + res = platform_get_resource(dev, IORESOURCE_MEM, 0); 1151 + release_mem_region(res->start, res->end - res->start); 1152 + } while (0); 1153 + 1154 + list_for_each_entry_safe(chan, _chan, &device->common.channels, 1155 + device_node) { 1156 + iop_chan = to_iop_adma_chan(chan); 1157 + list_del(&chan->device_node); 1158 + kfree(iop_chan); 1159 + } 1160 + kfree(device); 1161 + 1162 + return 0; 1163 + } 1164 + 1165 + static int __devinit iop_adma_probe(struct platform_device *pdev) 1166 + { 1167 + struct resource *res; 1168 + int ret = 0, i; 1169 + struct iop_adma_device *adev; 1170 + struct iop_adma_chan *iop_chan; 1171 + struct dma_device *dma_dev; 1172 + struct iop_adma_platform_data *plat_data = pdev->dev.platform_data; 1173 + 1174 + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); 1175 + if (!res) 1176 + return -ENODEV; 1177 + 1178 + if (!devm_request_mem_region(&pdev->dev, res->start, 1179 + res->end - res->start, pdev->name)) 1180 + return -EBUSY; 1181 + 1182 + adev = kzalloc(sizeof(*adev), GFP_KERNEL); 1183 + if (!adev) 1184 + return -ENOMEM; 1185 + dma_dev = &adev->common; 1186 + 1187 + /* allocate coherent memory for hardware descriptors 1188 + * note: writecombine gives slightly better performance, but 1189 + * requires that we explicitly flush the writes 1190 + */ 1191 + if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, 1192 + plat_data->pool_size, 1193 + &adev->dma_desc_pool, 1194 + GFP_KERNEL)) == NULL) { 1195 + ret = -ENOMEM; 1196 + goto err_free_adev; 1197 + } 1198 + 1199 + dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n", 1200 + __FUNCTION__, adev->dma_desc_pool_virt, 1201 + (void *) adev->dma_desc_pool); 1202 + 1203 + adev->id = plat_data->hw_id; 1204 + 1205 + /* discover transaction capabilites from the platform data */ 1206 + dma_dev->cap_mask = plat_data->cap_mask; 1207 + 1208 + adev->pdev = pdev; 1209 + platform_set_drvdata(pdev, adev); 1210 + 1211 + INIT_LIST_HEAD(&dma_dev->channels); 1212 + 1213 + /* set base routines */ 1214 + dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; 1215 + dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; 1216 + dma_dev->device_is_tx_complete = iop_adma_is_complete; 1217 + dma_dev->device_issue_pending = iop_adma_issue_pending; 1218 + dma_dev->device_dependency_added = iop_adma_dependency_added; 1219 + dma_dev->dev = &pdev->dev; 1220 + 1221 + /* set prep routines based on capability */ 1222 + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) 1223 + dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; 1224 + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) 1225 + dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; 1226 + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { 1227 + dma_dev->max_xor = iop_adma_get_max_xor(); 1228 + dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; 1229 + } 1230 + if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) 1231 + dma_dev->device_prep_dma_zero_sum = 1232 + iop_adma_prep_dma_zero_sum; 1233 + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) 1234 + dma_dev->device_prep_dma_interrupt = 1235 + iop_adma_prep_dma_interrupt; 1236 + 1237 + iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); 1238 + if (!iop_chan) { 1239 + ret = -ENOMEM; 1240 + goto err_free_dma; 1241 + } 1242 + iop_chan->device = adev; 1243 + 1244 + iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, 1245 + res->end - res->start); 1246 + if (!iop_chan->mmr_base) { 1247 + ret = -ENOMEM; 1248 + goto err_free_iop_chan; 1249 + } 1250 + tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long) 1251 + iop_chan); 1252 + 1253 + /* clear errors before enabling interrupts */ 1254 + iop_adma_device_clear_err_status(iop_chan); 1255 + 1256 + for (i = 0; i < 3; i++) { 1257 + irq_handler_t handler[] = { iop_adma_eot_handler, 1258 + iop_adma_eoc_handler, 1259 + iop_adma_err_handler }; 1260 + int irq = platform_get_irq(pdev, i); 1261 + if (irq < 0) { 1262 + ret = -ENXIO; 1263 + goto err_free_iop_chan; 1264 + } else { 1265 + ret = devm_request_irq(&pdev->dev, irq, 1266 + handler[i], 0, pdev->name, iop_chan); 1267 + if (ret) 1268 + goto err_free_iop_chan; 1269 + } 1270 + } 1271 + 1272 + spin_lock_init(&iop_chan->lock); 1273 + init_timer(&iop_chan->cleanup_watchdog); 1274 + iop_chan->cleanup_watchdog.data = (unsigned long) iop_chan; 1275 + iop_chan->cleanup_watchdog.function = iop_adma_tasklet; 1276 + INIT_LIST_HEAD(&iop_chan->chain); 1277 + INIT_LIST_HEAD(&iop_chan->all_slots); 1278 + INIT_RCU_HEAD(&iop_chan->common.rcu); 1279 + iop_chan->common.device = dma_dev; 1280 + list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); 1281 + 1282 + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { 1283 + ret = iop_adma_memcpy_self_test(adev); 1284 + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); 1285 + if (ret) 1286 + goto err_free_iop_chan; 1287 + } 1288 + 1289 + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || 1290 + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { 1291 + ret = iop_adma_xor_zero_sum_self_test(adev); 1292 + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); 1293 + if (ret) 1294 + goto err_free_iop_chan; 1295 + } 1296 + 1297 + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " 1298 + "( %s%s%s%s%s%s%s%s%s%s)\n", 1299 + dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", 1300 + dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", 1301 + dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", 1302 + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", 1303 + dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", 1304 + dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", 1305 + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", 1306 + dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", 1307 + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", 1308 + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); 1309 + 1310 + dma_async_device_register(dma_dev); 1311 + goto out; 1312 + 1313 + err_free_iop_chan: 1314 + kfree(iop_chan); 1315 + err_free_dma: 1316 + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, 1317 + adev->dma_desc_pool_virt, adev->dma_desc_pool); 1318 + err_free_adev: 1319 + kfree(adev); 1320 + out: 1321 + return ret; 1322 + } 1323 + 1324 + static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) 1325 + { 1326 + struct iop_adma_desc_slot *sw_desc, *grp_start; 1327 + dma_cookie_t cookie; 1328 + int slot_cnt, slots_per_op; 1329 + 1330 + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); 1331 + 1332 + spin_lock_bh(&iop_chan->lock); 1333 + slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); 1334 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 1335 + if (sw_desc) { 1336 + grp_start = sw_desc->group_head; 1337 + 1338 + list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1339 + sw_desc->async_tx.ack = 1; 1340 + iop_desc_init_memcpy(grp_start, 0); 1341 + iop_desc_set_byte_count(grp_start, iop_chan, 0); 1342 + iop_desc_set_dest_addr(grp_start, iop_chan, 0); 1343 + iop_desc_set_memcpy_src_addr(grp_start, 0); 1344 + 1345 + cookie = iop_chan->common.cookie; 1346 + cookie++; 1347 + if (cookie <= 1) 1348 + cookie = 2; 1349 + 1350 + /* initialize the completed cookie to be less than 1351 + * the most recently used cookie 1352 + */ 1353 + iop_chan->completed_cookie = cookie - 1; 1354 + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; 1355 + 1356 + /* channel should not be busy */ 1357 + BUG_ON(iop_chan_is_busy(iop_chan)); 1358 + 1359 + /* clear any prior error-status bits */ 1360 + iop_adma_device_clear_err_status(iop_chan); 1361 + 1362 + /* disable operation */ 1363 + iop_chan_disable(iop_chan); 1364 + 1365 + /* set the descriptor address */ 1366 + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); 1367 + 1368 + /* 1/ don't add pre-chained descriptors 1369 + * 2/ dummy read to flush next_desc write 1370 + */ 1371 + BUG_ON(iop_desc_get_next_desc(sw_desc)); 1372 + 1373 + /* run the descriptor */ 1374 + iop_chan_enable(iop_chan); 1375 + } else 1376 + dev_printk(KERN_ERR, iop_chan->device->common.dev, 1377 + "failed to allocate null descriptor\n"); 1378 + spin_unlock_bh(&iop_chan->lock); 1379 + } 1380 + 1381 + static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) 1382 + { 1383 + struct iop_adma_desc_slot *sw_desc, *grp_start; 1384 + dma_cookie_t cookie; 1385 + int slot_cnt, slots_per_op; 1386 + 1387 + dev_dbg(iop_chan->device->common.dev, "%s\n", __FUNCTION__); 1388 + 1389 + spin_lock_bh(&iop_chan->lock); 1390 + slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); 1391 + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); 1392 + if (sw_desc) { 1393 + grp_start = sw_desc->group_head; 1394 + list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); 1395 + sw_desc->async_tx.ack = 1; 1396 + iop_desc_init_null_xor(grp_start, 2, 0); 1397 + iop_desc_set_byte_count(grp_start, iop_chan, 0); 1398 + iop_desc_set_dest_addr(grp_start, iop_chan, 0); 1399 + iop_desc_set_xor_src_addr(grp_start, 0, 0); 1400 + iop_desc_set_xor_src_addr(grp_start, 1, 0); 1401 + 1402 + cookie = iop_chan->common.cookie; 1403 + cookie++; 1404 + if (cookie <= 1) 1405 + cookie = 2; 1406 + 1407 + /* initialize the completed cookie to be less than 1408 + * the most recently used cookie 1409 + */ 1410 + iop_chan->completed_cookie = cookie - 1; 1411 + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; 1412 + 1413 + /* channel should not be busy */ 1414 + BUG_ON(iop_chan_is_busy(iop_chan)); 1415 + 1416 + /* clear any prior error-status bits */ 1417 + iop_adma_device_clear_err_status(iop_chan); 1418 + 1419 + /* disable operation */ 1420 + iop_chan_disable(iop_chan); 1421 + 1422 + /* set the descriptor address */ 1423 + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); 1424 + 1425 + /* 1/ don't add pre-chained descriptors 1426 + * 2/ dummy read to flush next_desc write 1427 + */ 1428 + BUG_ON(iop_desc_get_next_desc(sw_desc)); 1429 + 1430 + /* run the descriptor */ 1431 + iop_chan_enable(iop_chan); 1432 + } else 1433 + dev_printk(KERN_ERR, iop_chan->device->common.dev, 1434 + "failed to allocate null descriptor\n"); 1435 + spin_unlock_bh(&iop_chan->lock); 1436 + } 1437 + 1438 + static struct platform_driver iop_adma_driver = { 1439 + .probe = iop_adma_probe, 1440 + .remove = iop_adma_remove, 1441 + .driver = { 1442 + .owner = THIS_MODULE, 1443 + .name = "iop-adma", 1444 + }, 1445 + }; 1446 + 1447 + static int __init iop_adma_init (void) 1448 + { 1449 + /* it's currently unsafe to unload this module */ 1450 + /* if forced, worst case is that rmmod hangs */ 1451 + __unsafe(THIS_MODULE); 1452 + 1453 + return platform_driver_register(&iop_adma_driver); 1454 + } 1455 + 1456 + static void __exit iop_adma_exit (void) 1457 + { 1458 + platform_driver_unregister(&iop_adma_driver); 1459 + return; 1460 + } 1461 + 1462 + module_init(iop_adma_init); 1463 + module_exit(iop_adma_exit); 1464 + 1465 + MODULE_AUTHOR("Intel Corporation"); 1466 + MODULE_DESCRIPTION("IOP ADMA Engine Driver"); 1467 + MODULE_LICENSE("GPL");

+2

drivers/md/Kconfig

··· 109 109 config MD_RAID456 110 110 tristate "RAID-4/RAID-5/RAID-6 mode" 111 111 depends on BLK_DEV_MD 112 + select ASYNC_MEMCPY 113 + select ASYNC_XOR 112 114 ---help--- 113 115 A RAID-5 set of N drives with a capacity of C MB per drive provides 114 116 the capacity of C * (N - 1) MB, and protects against a failure

+2 -2

drivers/md/Makefile

··· 18 18 hostprogs-y := mktables 19 19 20 20 # Note: link order is important. All raid personalities 21 - # and xor.o must come before md.o, as they each initialise 21 + # and must come before md.o, as they each initialise 22 22 # themselves, and md.o may use the personalities when it 23 23 # auto-initialised. 24 24 ··· 26 26 obj-$(CONFIG_MD_RAID0) += raid0.o 27 27 obj-$(CONFIG_MD_RAID1) += raid1.o 28 28 obj-$(CONFIG_MD_RAID10) += raid10.o 29 - obj-$(CONFIG_MD_RAID456) += raid456.o xor.o 29 + obj-$(CONFIG_MD_RAID456) += raid456.o 30 30 obj-$(CONFIG_MD_MULTIPATH) += multipath.o 31 31 obj-$(CONFIG_MD_FAULTY) += faulty.o 32 32 obj-$(CONFIG_BLK_DEV_MD) += md-mod.o

+1 -1

drivers/md/md.c

··· 5814 5814 } 5815 5815 } 5816 5816 5817 - module_init(md_init) 5817 + subsys_initcall(md_init); 5818 5818 module_exit(md_exit) 5819 5819 5820 5820 static int get_ro(char *buffer, struct kernel_param *kp)

+1676 -1043

drivers/md/raid5.c

··· 52 52 #include "raid6.h" 53 53 54 54 #include <linux/raid/bitmap.h> 55 + #include <linux/async_tx.h> 55 56 56 57 /* 57 58 * Stripe cache ··· 81 80 /* 82 81 * The following can be used to debug the driver 83 82 */ 84 - #define RAID5_DEBUG 0 85 83 #define RAID5_PARANOIA 1 86 84 #if RAID5_PARANOIA && defined(CONFIG_SMP) 87 85 # define CHECK_DEVLOCK() assert_spin_locked(&conf->device_lock) ··· 88 88 # define CHECK_DEVLOCK() 89 89 #endif 90 90 91 - #define PRINTK(x...) ((void)(RAID5_DEBUG && printk(x))) 92 - #if RAID5_DEBUG 91 + #ifdef DEBUG 93 92 #define inline 94 93 #define __inline__ 95 94 #endif ··· 103 104 disk++; 104 105 return (disk < raid_disks) ? disk : 0; 105 106 } 107 + 108 + static void return_io(struct bio *return_bi) 109 + { 110 + struct bio *bi = return_bi; 111 + while (bi) { 112 + int bytes = bi->bi_size; 113 + 114 + return_bi = bi->bi_next; 115 + bi->bi_next = NULL; 116 + bi->bi_size = 0; 117 + bi->bi_end_io(bi, bytes, 118 + test_bit(BIO_UPTODATE, &bi->bi_flags) 119 + ? 0 : -EIO); 120 + bi = return_bi; 121 + } 122 + } 123 + 106 124 static void print_raid5_conf (raid5_conf_t *conf); 107 125 108 126 static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) ··· 141 125 } 142 126 md_wakeup_thread(conf->mddev->thread); 143 127 } else { 128 + BUG_ON(sh->ops.pending); 144 129 if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 145 130 atomic_dec(&conf->preread_active_stripes); 146 131 if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) ··· 169 152 170 153 static inline void remove_hash(struct stripe_head *sh) 171 154 { 172 - PRINTK("remove_hash(), stripe %llu\n", (unsigned long long)sh->sector); 155 + pr_debug("remove_hash(), stripe %llu\n", 156 + (unsigned long long)sh->sector); 173 157 174 158 hlist_del_init(&sh->hash); 175 159 } ··· 179 161 { 180 162 struct hlist_head *hp = stripe_hash(conf, sh->sector); 181 163 182 - PRINTK("insert_hash(), stripe %llu\n", (unsigned long long)sh->sector); 164 + pr_debug("insert_hash(), stripe %llu\n", 165 + (unsigned long long)sh->sector); 183 166 184 167 CHECK_DEVLOCK(); 185 168 hlist_add_head(&sh->hash, hp); ··· 243 224 244 225 BUG_ON(atomic_read(&sh->count) != 0); 245 226 BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); 246 - 227 + BUG_ON(sh->ops.pending || sh->ops.ack || sh->ops.complete); 228 + 247 229 CHECK_DEVLOCK(); 248 - PRINTK("init_stripe called, stripe %llu\n", 230 + pr_debug("init_stripe called, stripe %llu\n", 249 231 (unsigned long long)sh->sector); 250 232 251 233 remove_hash(sh); ··· 260 240 for (i = sh->disks; i--; ) { 261 241 struct r5dev *dev = &sh->dev[i]; 262 242 263 - if (dev->toread || dev->towrite || dev->written || 243 + if (dev->toread || dev->read || dev->towrite || dev->written || 264 244 test_bit(R5_LOCKED, &dev->flags)) { 265 - printk("sector=%llx i=%d %p %p %p %d\n", 245 + printk(KERN_ERR "sector=%llx i=%d %p %p %p %p %d\n", 266 246 (unsigned long long)sh->sector, i, dev->toread, 267 - dev->towrite, dev->written, 247 + dev->read, dev->towrite, dev->written, 268 248 test_bit(R5_LOCKED, &dev->flags)); 269 249 BUG(); 270 250 } ··· 280 260 struct hlist_node *hn; 281 261 282 262 CHECK_DEVLOCK(); 283 - PRINTK("__find_stripe, sector %llu\n", (unsigned long long)sector); 263 + pr_debug("__find_stripe, sector %llu\n", (unsigned long long)sector); 284 264 hlist_for_each_entry(sh, hn, stripe_hash(conf, sector), hash) 285 265 if (sh->sector == sector && sh->disks == disks) 286 266 return sh; 287 - PRINTK("__stripe %llu not in cache\n", (unsigned long long)sector); 267 + pr_debug("__stripe %llu not in cache\n", (unsigned long long)sector); 288 268 return NULL; 289 269 } 290 270 ··· 296 276 { 297 277 struct stripe_head *sh; 298 278 299 - PRINTK("get_stripe, sector %llu\n", (unsigned long long)sector); 279 + pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); 300 280 301 281 spin_lock_irq(&conf->device_lock); 302 282 ··· 342 322 343 323 spin_unlock_irq(&conf->device_lock); 344 324 return sh; 325 + } 326 + 327 + /* test_and_ack_op() ensures that we only dequeue an operation once */ 328 + #define test_and_ack_op(op, pend) \ 329 + do { \ 330 + if (test_bit(op, &sh->ops.pending) && \ 331 + !test_bit(op, &sh->ops.complete)) { \ 332 + if (test_and_set_bit(op, &sh->ops.ack)) \ 333 + clear_bit(op, &pend); \ 334 + else \ 335 + ack++; \ 336 + } else \ 337 + clear_bit(op, &pend); \ 338 + } while (0) 339 + 340 + /* find new work to run, do not resubmit work that is already 341 + * in flight 342 + */ 343 + static unsigned long get_stripe_work(struct stripe_head *sh) 344 + { 345 + unsigned long pending; 346 + int ack = 0; 347 + 348 + pending = sh->ops.pending; 349 + 350 + test_and_ack_op(STRIPE_OP_BIOFILL, pending); 351 + test_and_ack_op(STRIPE_OP_COMPUTE_BLK, pending); 352 + test_and_ack_op(STRIPE_OP_PREXOR, pending); 353 + test_and_ack_op(STRIPE_OP_BIODRAIN, pending); 354 + test_and_ack_op(STRIPE_OP_POSTXOR, pending); 355 + test_and_ack_op(STRIPE_OP_CHECK, pending); 356 + if (test_and_clear_bit(STRIPE_OP_IO, &sh->ops.pending)) 357 + ack++; 358 + 359 + sh->ops.count -= ack; 360 + BUG_ON(sh->ops.count < 0); 361 + 362 + return pending; 363 + } 364 + 365 + static int 366 + raid5_end_read_request(struct bio *bi, unsigned int bytes_done, int error); 367 + static int 368 + raid5_end_write_request (struct bio *bi, unsigned int bytes_done, int error); 369 + 370 + static void ops_run_io(struct stripe_head *sh) 371 + { 372 + raid5_conf_t *conf = sh->raid_conf; 373 + int i, disks = sh->disks; 374 + 375 + might_sleep(); 376 + 377 + for (i = disks; i--; ) { 378 + int rw; 379 + struct bio *bi; 380 + mdk_rdev_t *rdev; 381 + if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) 382 + rw = WRITE; 383 + else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 384 + rw = READ; 385 + else 386 + continue; 387 + 388 + bi = &sh->dev[i].req; 389 + 390 + bi->bi_rw = rw; 391 + if (rw == WRITE) 392 + bi->bi_end_io = raid5_end_write_request; 393 + else 394 + bi->bi_end_io = raid5_end_read_request; 395 + 396 + rcu_read_lock(); 397 + rdev = rcu_dereference(conf->disks[i].rdev); 398 + if (rdev && test_bit(Faulty, &rdev->flags)) 399 + rdev = NULL; 400 + if (rdev) 401 + atomic_inc(&rdev->nr_pending); 402 + rcu_read_unlock(); 403 + 404 + if (rdev) { 405 + if (test_bit(STRIPE_SYNCING, &sh->state) || 406 + test_bit(STRIPE_EXPAND_SOURCE, &sh->state) || 407 + test_bit(STRIPE_EXPAND_READY, &sh->state)) 408 + md_sync_acct(rdev->bdev, STRIPE_SECTORS); 409 + 410 + bi->bi_bdev = rdev->bdev; 411 + pr_debug("%s: for %llu schedule op %ld on disc %d\n", 412 + __FUNCTION__, (unsigned long long)sh->sector, 413 + bi->bi_rw, i); 414 + atomic_inc(&sh->count); 415 + bi->bi_sector = sh->sector + rdev->data_offset; 416 + bi->bi_flags = 1 << BIO_UPTODATE; 417 + bi->bi_vcnt = 1; 418 + bi->bi_max_vecs = 1; 419 + bi->bi_idx = 0; 420 + bi->bi_io_vec = &sh->dev[i].vec; 421 + bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 422 + bi->bi_io_vec[0].bv_offset = 0; 423 + bi->bi_size = STRIPE_SIZE; 424 + bi->bi_next = NULL; 425 + if (rw == WRITE && 426 + test_bit(R5_ReWrite, &sh->dev[i].flags)) 427 + atomic_add(STRIPE_SECTORS, 428 + &rdev->corrected_errors); 429 + generic_make_request(bi); 430 + } else { 431 + if (rw == WRITE) 432 + set_bit(STRIPE_DEGRADED, &sh->state); 433 + pr_debug("skip op %ld on disc %d for sector %llu\n", 434 + bi->bi_rw, i, (unsigned long long)sh->sector); 435 + clear_bit(R5_LOCKED, &sh->dev[i].flags); 436 + set_bit(STRIPE_HANDLE, &sh->state); 437 + } 438 + } 439 + } 440 + 441 + static struct dma_async_tx_descriptor * 442 + async_copy_data(int frombio, struct bio *bio, struct page *page, 443 + sector_t sector, struct dma_async_tx_descriptor *tx) 444 + { 445 + struct bio_vec *bvl; 446 + struct page *bio_page; 447 + int i; 448 + int page_offset; 449 + 450 + if (bio->bi_sector >= sector) 451 + page_offset = (signed)(bio->bi_sector - sector) * 512; 452 + else 453 + page_offset = (signed)(sector - bio->bi_sector) * -512; 454 + bio_for_each_segment(bvl, bio, i) { 455 + int len = bio_iovec_idx(bio, i)->bv_len; 456 + int clen; 457 + int b_offset = 0; 458 + 459 + if (page_offset < 0) { 460 + b_offset = -page_offset; 461 + page_offset += b_offset; 462 + len -= b_offset; 463 + } 464 + 465 + if (len > 0 && page_offset + len > STRIPE_SIZE) 466 + clen = STRIPE_SIZE - page_offset; 467 + else 468 + clen = len; 469 + 470 + if (clen > 0) { 471 + b_offset += bio_iovec_idx(bio, i)->bv_offset; 472 + bio_page = bio_iovec_idx(bio, i)->bv_page; 473 + if (frombio) 474 + tx = async_memcpy(page, bio_page, page_offset, 475 + b_offset, clen, 476 + ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_SRC, 477 + tx, NULL, NULL); 478 + else 479 + tx = async_memcpy(bio_page, page, b_offset, 480 + page_offset, clen, 481 + ASYNC_TX_DEP_ACK | ASYNC_TX_KMAP_DST, 482 + tx, NULL, NULL); 483 + } 484 + if (clen < len) /* hit end of page */ 485 + break; 486 + page_offset += len; 487 + } 488 + 489 + return tx; 490 + } 491 + 492 + static void ops_complete_biofill(void *stripe_head_ref) 493 + { 494 + struct stripe_head *sh = stripe_head_ref; 495 + struct bio *return_bi = NULL; 496 + raid5_conf_t *conf = sh->raid_conf; 497 + int i, more_to_read = 0; 498 + 499 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 500 + (unsigned long long)sh->sector); 501 + 502 + /* clear completed biofills */ 503 + for (i = sh->disks; i--; ) { 504 + struct r5dev *dev = &sh->dev[i]; 505 + /* check if this stripe has new incoming reads */ 506 + if (dev->toread) 507 + more_to_read++; 508 + 509 + /* acknowledge completion of a biofill operation */ 510 + /* and check if we need to reply to a read request 511 + */ 512 + if (test_bit(R5_Wantfill, &dev->flags) && !dev->toread) { 513 + struct bio *rbi, *rbi2; 514 + clear_bit(R5_Wantfill, &dev->flags); 515 + 516 + /* The access to dev->read is outside of the 517 + * spin_lock_irq(&conf->device_lock), but is protected 518 + * by the STRIPE_OP_BIOFILL pending bit 519 + */ 520 + BUG_ON(!dev->read); 521 + rbi = dev->read; 522 + dev->read = NULL; 523 + while (rbi && rbi->bi_sector < 524 + dev->sector + STRIPE_SECTORS) { 525 + rbi2 = r5_next_bio(rbi, dev->sector); 526 + spin_lock_irq(&conf->device_lock); 527 + if (--rbi->bi_phys_segments == 0) { 528 + rbi->bi_next = return_bi; 529 + return_bi = rbi; 530 + } 531 + spin_unlock_irq(&conf->device_lock); 532 + rbi = rbi2; 533 + } 534 + } 535 + } 536 + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.ack); 537 + clear_bit(STRIPE_OP_BIOFILL, &sh->ops.pending); 538 + 539 + return_io(return_bi); 540 + 541 + if (more_to_read) 542 + set_bit(STRIPE_HANDLE, &sh->state); 543 + release_stripe(sh); 544 + } 545 + 546 + static void ops_run_biofill(struct stripe_head *sh) 547 + { 548 + struct dma_async_tx_descriptor *tx = NULL; 549 + raid5_conf_t *conf = sh->raid_conf; 550 + int i; 551 + 552 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 553 + (unsigned long long)sh->sector); 554 + 555 + for (i = sh->disks; i--; ) { 556 + struct r5dev *dev = &sh->dev[i]; 557 + if (test_bit(R5_Wantfill, &dev->flags)) { 558 + struct bio *rbi; 559 + spin_lock_irq(&conf->device_lock); 560 + dev->read = rbi = dev->toread; 561 + dev->toread = NULL; 562 + spin_unlock_irq(&conf->device_lock); 563 + while (rbi && rbi->bi_sector < 564 + dev->sector + STRIPE_SECTORS) { 565 + tx = async_copy_data(0, rbi, dev->page, 566 + dev->sector, tx); 567 + rbi = r5_next_bio(rbi, dev->sector); 568 + } 569 + } 570 + } 571 + 572 + atomic_inc(&sh->count); 573 + async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 574 + ops_complete_biofill, sh); 575 + } 576 + 577 + static void ops_complete_compute5(void *stripe_head_ref) 578 + { 579 + struct stripe_head *sh = stripe_head_ref; 580 + int target = sh->ops.target; 581 + struct r5dev *tgt = &sh->dev[target]; 582 + 583 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 584 + (unsigned long long)sh->sector); 585 + 586 + set_bit(R5_UPTODATE, &tgt->flags); 587 + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 588 + clear_bit(R5_Wantcompute, &tgt->flags); 589 + set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); 590 + set_bit(STRIPE_HANDLE, &sh->state); 591 + release_stripe(sh); 592 + } 593 + 594 + static struct dma_async_tx_descriptor * 595 + ops_run_compute5(struct stripe_head *sh, unsigned long pending) 596 + { 597 + /* kernel stack size limits the total number of disks */ 598 + int disks = sh->disks; 599 + struct page *xor_srcs[disks]; 600 + int target = sh->ops.target; 601 + struct r5dev *tgt = &sh->dev[target]; 602 + struct page *xor_dest = tgt->page; 603 + int count = 0; 604 + struct dma_async_tx_descriptor *tx; 605 + int i; 606 + 607 + pr_debug("%s: stripe %llu block: %d\n", 608 + __FUNCTION__, (unsigned long long)sh->sector, target); 609 + BUG_ON(!test_bit(R5_Wantcompute, &tgt->flags)); 610 + 611 + for (i = disks; i--; ) 612 + if (i != target) 613 + xor_srcs[count++] = sh->dev[i].page; 614 + 615 + atomic_inc(&sh->count); 616 + 617 + if (unlikely(count == 1)) 618 + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 619 + 0, NULL, ops_complete_compute5, sh); 620 + else 621 + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 622 + ASYNC_TX_XOR_ZERO_DST, NULL, 623 + ops_complete_compute5, sh); 624 + 625 + /* ack now if postxor is not set to be run */ 626 + if (tx && !test_bit(STRIPE_OP_POSTXOR, &pending)) 627 + async_tx_ack(tx); 628 + 629 + return tx; 630 + } 631 + 632 + static void ops_complete_prexor(void *stripe_head_ref) 633 + { 634 + struct stripe_head *sh = stripe_head_ref; 635 + 636 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 637 + (unsigned long long)sh->sector); 638 + 639 + set_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 640 + } 641 + 642 + static struct dma_async_tx_descriptor * 643 + ops_run_prexor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 644 + { 645 + /* kernel stack size limits the total number of disks */ 646 + int disks = sh->disks; 647 + struct page *xor_srcs[disks]; 648 + int count = 0, pd_idx = sh->pd_idx, i; 649 + 650 + /* existing parity data subtracted */ 651 + struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 652 + 653 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 654 + (unsigned long long)sh->sector); 655 + 656 + for (i = disks; i--; ) { 657 + struct r5dev *dev = &sh->dev[i]; 658 + /* Only process blocks that are known to be uptodate */ 659 + if (dev->towrite && test_bit(R5_Wantprexor, &dev->flags)) 660 + xor_srcs[count++] = dev->page; 661 + } 662 + 663 + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 664 + ASYNC_TX_DEP_ACK | ASYNC_TX_XOR_DROP_DST, tx, 665 + ops_complete_prexor, sh); 666 + 667 + return tx; 668 + } 669 + 670 + static struct dma_async_tx_descriptor * 671 + ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 672 + { 673 + int disks = sh->disks; 674 + int pd_idx = sh->pd_idx, i; 675 + 676 + /* check if prexor is active which means only process blocks 677 + * that are part of a read-modify-write (Wantprexor) 678 + */ 679 + int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 680 + 681 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 682 + (unsigned long long)sh->sector); 683 + 684 + for (i = disks; i--; ) { 685 + struct r5dev *dev = &sh->dev[i]; 686 + struct bio *chosen; 687 + int towrite; 688 + 689 + towrite = 0; 690 + if (prexor) { /* rmw */ 691 + if (dev->towrite && 692 + test_bit(R5_Wantprexor, &dev->flags)) 693 + towrite = 1; 694 + } else { /* rcw */ 695 + if (i != pd_idx && dev->towrite && 696 + test_bit(R5_LOCKED, &dev->flags)) 697 + towrite = 1; 698 + } 699 + 700 + if (towrite) { 701 + struct bio *wbi; 702 + 703 + spin_lock(&sh->lock); 704 + chosen = dev->towrite; 705 + dev->towrite = NULL; 706 + BUG_ON(dev->written); 707 + wbi = dev->written = chosen; 708 + spin_unlock(&sh->lock); 709 + 710 + while (wbi && wbi->bi_sector < 711 + dev->sector + STRIPE_SECTORS) { 712 + tx = async_copy_data(1, wbi, dev->page, 713 + dev->sector, tx); 714 + wbi = r5_next_bio(wbi, dev->sector); 715 + } 716 + } 717 + } 718 + 719 + return tx; 720 + } 721 + 722 + static void ops_complete_postxor(void *stripe_head_ref) 723 + { 724 + struct stripe_head *sh = stripe_head_ref; 725 + 726 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 727 + (unsigned long long)sh->sector); 728 + 729 + set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); 730 + set_bit(STRIPE_HANDLE, &sh->state); 731 + release_stripe(sh); 732 + } 733 + 734 + static void ops_complete_write(void *stripe_head_ref) 735 + { 736 + struct stripe_head *sh = stripe_head_ref; 737 + int disks = sh->disks, i, pd_idx = sh->pd_idx; 738 + 739 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 740 + (unsigned long long)sh->sector); 741 + 742 + for (i = disks; i--; ) { 743 + struct r5dev *dev = &sh->dev[i]; 744 + if (dev->written || i == pd_idx) 745 + set_bit(R5_UPTODATE, &dev->flags); 746 + } 747 + 748 + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); 749 + set_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); 750 + 751 + set_bit(STRIPE_HANDLE, &sh->state); 752 + release_stripe(sh); 753 + } 754 + 755 + static void 756 + ops_run_postxor(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) 757 + { 758 + /* kernel stack size limits the total number of disks */ 759 + int disks = sh->disks; 760 + struct page *xor_srcs[disks]; 761 + 762 + int count = 0, pd_idx = sh->pd_idx, i; 763 + struct page *xor_dest; 764 + int prexor = test_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 765 + unsigned long flags; 766 + dma_async_tx_callback callback; 767 + 768 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 769 + (unsigned long long)sh->sector); 770 + 771 + /* check if prexor is active which means only process blocks 772 + * that are part of a read-modify-write (written) 773 + */ 774 + if (prexor) { 775 + xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 776 + for (i = disks; i--; ) { 777 + struct r5dev *dev = &sh->dev[i]; 778 + if (dev->written) 779 + xor_srcs[count++] = dev->page; 780 + } 781 + } else { 782 + xor_dest = sh->dev[pd_idx].page; 783 + for (i = disks; i--; ) { 784 + struct r5dev *dev = &sh->dev[i]; 785 + if (i != pd_idx) 786 + xor_srcs[count++] = dev->page; 787 + } 788 + } 789 + 790 + /* check whether this postxor is part of a write */ 791 + callback = test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending) ? 792 + ops_complete_write : ops_complete_postxor; 793 + 794 + /* 1/ if we prexor'd then the dest is reused as a source 795 + * 2/ if we did not prexor then we are redoing the parity 796 + * set ASYNC_TX_XOR_DROP_DST and ASYNC_TX_XOR_ZERO_DST 797 + * for the synchronous xor case 798 + */ 799 + flags = ASYNC_TX_DEP_ACK | ASYNC_TX_ACK | 800 + (prexor ? ASYNC_TX_XOR_DROP_DST : ASYNC_TX_XOR_ZERO_DST); 801 + 802 + atomic_inc(&sh->count); 803 + 804 + if (unlikely(count == 1)) { 805 + flags &= ~(ASYNC_TX_XOR_DROP_DST | ASYNC_TX_XOR_ZERO_DST); 806 + tx = async_memcpy(xor_dest, xor_srcs[0], 0, 0, STRIPE_SIZE, 807 + flags, tx, callback, sh); 808 + } else 809 + tx = async_xor(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 810 + flags, tx, callback, sh); 811 + } 812 + 813 + static void ops_complete_check(void *stripe_head_ref) 814 + { 815 + struct stripe_head *sh = stripe_head_ref; 816 + int pd_idx = sh->pd_idx; 817 + 818 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 819 + (unsigned long long)sh->sector); 820 + 821 + if (test_and_clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending) && 822 + sh->ops.zero_sum_result == 0) 823 + set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 824 + 825 + set_bit(STRIPE_OP_CHECK, &sh->ops.complete); 826 + set_bit(STRIPE_HANDLE, &sh->state); 827 + release_stripe(sh); 828 + } 829 + 830 + static void ops_run_check(struct stripe_head *sh) 831 + { 832 + /* kernel stack size limits the total number of disks */ 833 + int disks = sh->disks; 834 + struct page *xor_srcs[disks]; 835 + struct dma_async_tx_descriptor *tx; 836 + 837 + int count = 0, pd_idx = sh->pd_idx, i; 838 + struct page *xor_dest = xor_srcs[count++] = sh->dev[pd_idx].page; 839 + 840 + pr_debug("%s: stripe %llu\n", __FUNCTION__, 841 + (unsigned long long)sh->sector); 842 + 843 + for (i = disks; i--; ) { 844 + struct r5dev *dev = &sh->dev[i]; 845 + if (i != pd_idx) 846 + xor_srcs[count++] = dev->page; 847 + } 848 + 849 + tx = async_xor_zero_sum(xor_dest, xor_srcs, 0, count, STRIPE_SIZE, 850 + &sh->ops.zero_sum_result, 0, NULL, NULL, NULL); 851 + 852 + if (tx) 853 + set_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); 854 + else 855 + clear_bit(STRIPE_OP_MOD_DMA_CHECK, &sh->ops.pending); 856 + 857 + atomic_inc(&sh->count); 858 + tx = async_trigger_callback(ASYNC_TX_DEP_ACK | ASYNC_TX_ACK, tx, 859 + ops_complete_check, sh); 860 + } 861 + 862 + static void raid5_run_ops(struct stripe_head *sh, unsigned long pending) 863 + { 864 + int overlap_clear = 0, i, disks = sh->disks; 865 + struct dma_async_tx_descriptor *tx = NULL; 866 + 867 + if (test_bit(STRIPE_OP_BIOFILL, &pending)) { 868 + ops_run_biofill(sh); 869 + overlap_clear++; 870 + } 871 + 872 + if (test_bit(STRIPE_OP_COMPUTE_BLK, &pending)) 873 + tx = ops_run_compute5(sh, pending); 874 + 875 + if (test_bit(STRIPE_OP_PREXOR, &pending)) 876 + tx = ops_run_prexor(sh, tx); 877 + 878 + if (test_bit(STRIPE_OP_BIODRAIN, &pending)) { 879 + tx = ops_run_biodrain(sh, tx); 880 + overlap_clear++; 881 + } 882 + 883 + if (test_bit(STRIPE_OP_POSTXOR, &pending)) 884 + ops_run_postxor(sh, tx); 885 + 886 + if (test_bit(STRIPE_OP_CHECK, &pending)) 887 + ops_run_check(sh); 888 + 889 + if (test_bit(STRIPE_OP_IO, &pending)) 890 + ops_run_io(sh); 891 + 892 + if (overlap_clear) 893 + for (i = disks; i--; ) { 894 + struct r5dev *dev = &sh->dev[i]; 895 + if (test_and_clear_bit(R5_Overlap, &dev->flags)) 896 + wake_up(&sh->raid_conf->wait_for_overlap); 897 + } 345 898 } 346 899 347 900 static int grow_one_stripe(raid5_conf_t *conf) ··· 1130 537 if (bi == &sh->dev[i].req) 1131 538 break; 1132 539 1133 - PRINTK("end_read_request %llu/%d, count: %d, uptodate %d.\n", 1134 - (unsigned long long)sh->sector, i, atomic_read(&sh->count), 540 + pr_debug("end_read_request %llu/%d, count: %d, uptodate %d.\n", 541 + (unsigned long long)sh->sector, i, atomic_read(&sh->count), 1135 542 uptodate); 1136 543 if (i == disks) { 1137 544 BUG(); ··· 1206 613 if (bi == &sh->dev[i].req) 1207 614 break; 1208 615 1209 - PRINTK("end_write_request %llu/%d, count %d, uptodate: %d.\n", 616 + pr_debug("end_write_request %llu/%d, count %d, uptodate: %d.\n", 1210 617 (unsigned long long)sh->sector, i, atomic_read(&sh->count), 1211 618 uptodate); 1212 619 if (i == disks) { ··· 1251 658 { 1252 659 char b[BDEVNAME_SIZE]; 1253 660 raid5_conf_t *conf = (raid5_conf_t *) mddev->private; 1254 - PRINTK("raid5: error called\n"); 661 + pr_debug("raid5: error called\n"); 1255 662 1256 663 if (!test_bit(Faulty, &rdev->flags)) { 1257 664 set_bit(MD_CHANGE_DEVS, &mddev->flags); ··· 1509 916 } 1510 917 } 1511 918 1512 - #define check_xor() do { \ 1513 - if (count == MAX_XOR_BLOCKS) { \ 1514 - xor_block(count, STRIPE_SIZE, ptr); \ 1515 - count = 1; \ 1516 - } \ 919 + #define check_xor() do { \ 920 + if (count == MAX_XOR_BLOCKS) { \ 921 + xor_blocks(count, STRIPE_SIZE, dest, ptr);\ 922 + count = 0; \ 923 + } \ 1517 924 } while(0) 1518 - 1519 - 1520 - static void compute_block(struct stripe_head *sh, int dd_idx) 1521 - { 1522 - int i, count, disks = sh->disks; 1523 - void *ptr[MAX_XOR_BLOCKS], *p; 1524 - 1525 - PRINTK("compute_block, stripe %llu, idx %d\n", 1526 - (unsigned long long)sh->sector, dd_idx); 1527 - 1528 - ptr[0] = page_address(sh->dev[dd_idx].page); 1529 - memset(ptr[0], 0, STRIPE_SIZE); 1530 - count = 1; 1531 - for (i = disks ; i--; ) { 1532 - if (i == dd_idx) 1533 - continue; 1534 - p = page_address(sh->dev[i].page); 1535 - if (test_bit(R5_UPTODATE, &sh->dev[i].flags)) 1536 - ptr[count++] = p; 1537 - else 1538 - printk(KERN_ERR "compute_block() %d, stripe %llu, %d" 1539 - " not present\n", dd_idx, 1540 - (unsigned long long)sh->sector, i); 1541 - 1542 - check_xor(); 1543 - } 1544 - if (count != 1) 1545 - xor_block(count, STRIPE_SIZE, ptr); 1546 - set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 1547 - } 1548 - 1549 - static void compute_parity5(struct stripe_head *sh, int method) 1550 - { 1551 - raid5_conf_t *conf = sh->raid_conf; 1552 - int i, pd_idx = sh->pd_idx, disks = sh->disks, count; 1553 - void *ptr[MAX_XOR_BLOCKS]; 1554 - struct bio *chosen; 1555 - 1556 - PRINTK("compute_parity5, stripe %llu, method %d\n", 1557 - (unsigned long long)sh->sector, method); 1558 - 1559 - count = 1; 1560 - ptr[0] = page_address(sh->dev[pd_idx].page); 1561 - switch(method) { 1562 - case READ_MODIFY_WRITE: 1563 - BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)); 1564 - for (i=disks ; i-- ;) { 1565 - if (i==pd_idx) 1566 - continue; 1567 - if (sh->dev[i].towrite && 1568 - test_bit(R5_UPTODATE, &sh->dev[i].flags)) { 1569 - ptr[count++] = page_address(sh->dev[i].page); 1570 - chosen = sh->dev[i].towrite; 1571 - sh->dev[i].towrite = NULL; 1572 - 1573 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 1574 - wake_up(&conf->wait_for_overlap); 1575 - 1576 - BUG_ON(sh->dev[i].written); 1577 - sh->dev[i].written = chosen; 1578 - check_xor(); 1579 - } 1580 - } 1581 - break; 1582 - case RECONSTRUCT_WRITE: 1583 - memset(ptr[0], 0, STRIPE_SIZE); 1584 - for (i= disks; i-- ;) 1585 - if (i!=pd_idx && sh->dev[i].towrite) { 1586 - chosen = sh->dev[i].towrite; 1587 - sh->dev[i].towrite = NULL; 1588 - 1589 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 1590 - wake_up(&conf->wait_for_overlap); 1591 - 1592 - BUG_ON(sh->dev[i].written); 1593 - sh->dev[i].written = chosen; 1594 - } 1595 - break; 1596 - case CHECK_PARITY: 1597 - break; 1598 - } 1599 - if (count>1) { 1600 - xor_block(count, STRIPE_SIZE, ptr); 1601 - count = 1; 1602 - } 1603 - 1604 - for (i = disks; i--;) 1605 - if (sh->dev[i].written) { 1606 - sector_t sector = sh->dev[i].sector; 1607 - struct bio *wbi = sh->dev[i].written; 1608 - while (wbi && wbi->bi_sector < sector + STRIPE_SECTORS) { 1609 - copy_data(1, wbi, sh->dev[i].page, sector); 1610 - wbi = r5_next_bio(wbi, sector); 1611 - } 1612 - 1613 - set_bit(R5_LOCKED, &sh->dev[i].flags); 1614 - set_bit(R5_UPTODATE, &sh->dev[i].flags); 1615 - } 1616 - 1617 - switch(method) { 1618 - case RECONSTRUCT_WRITE: 1619 - case CHECK_PARITY: 1620 - for (i=disks; i--;) 1621 - if (i != pd_idx) { 1622 - ptr[count++] = page_address(sh->dev[i].page); 1623 - check_xor(); 1624 - } 1625 - break; 1626 - case READ_MODIFY_WRITE: 1627 - for (i = disks; i--;) 1628 - if (sh->dev[i].written) { 1629 - ptr[count++] = page_address(sh->dev[i].page); 1630 - check_xor(); 1631 - } 1632 - } 1633 - if (count != 1) 1634 - xor_block(count, STRIPE_SIZE, ptr); 1635 - 1636 - if (method != CHECK_PARITY) { 1637 - set_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1638 - set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 1639 - } else 1640 - clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1641 - } 1642 925 1643 926 static void compute_parity6(struct stripe_head *sh, int method) 1644 927 { ··· 1527 1058 qd_idx = raid6_next_disk(pd_idx, disks); 1528 1059 d0_idx = raid6_next_disk(qd_idx, disks); 1529 1060 1530 - PRINTK("compute_parity, stripe %llu, method %d\n", 1061 + pr_debug("compute_parity, stripe %llu, method %d\n", 1531 1062 (unsigned long long)sh->sector, method); 1532 1063 1533 1064 switch(method) { ··· 1601 1132 static void compute_block_1(struct stripe_head *sh, int dd_idx, int nozero) 1602 1133 { 1603 1134 int i, count, disks = sh->disks; 1604 - void *ptr[MAX_XOR_BLOCKS], *p; 1135 + void *ptr[MAX_XOR_BLOCKS], *dest, *p; 1605 1136 int pd_idx = sh->pd_idx; 1606 1137 int qd_idx = raid6_next_disk(pd_idx, disks); 1607 1138 1608 - PRINTK("compute_block_1, stripe %llu, idx %d\n", 1139 + pr_debug("compute_block_1, stripe %llu, idx %d\n", 1609 1140 (unsigned long long)sh->sector, dd_idx); 1610 1141 1611 1142 if ( dd_idx == qd_idx ) { 1612 1143 /* We're actually computing the Q drive */ 1613 1144 compute_parity6(sh, UPDATE_PARITY); 1614 1145 } else { 1615 - ptr[0] = page_address(sh->dev[dd_idx].page); 1616 - if (!nozero) memset(ptr[0], 0, STRIPE_SIZE); 1617 - count = 1; 1146 + dest = page_address(sh->dev[dd_idx].page); 1147 + if (!nozero) memset(dest, 0, STRIPE_SIZE); 1148 + count = 0; 1618 1149 for (i = disks ; i--; ) { 1619 1150 if (i == dd_idx || i == qd_idx) 1620 1151 continue; ··· 1628 1159 1629 1160 check_xor(); 1630 1161 } 1631 - if (count != 1) 1632 - xor_block(count, STRIPE_SIZE, ptr); 1162 + if (count) 1163 + xor_blocks(count, STRIPE_SIZE, dest, ptr); 1633 1164 if (!nozero) set_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 1634 1165 else clear_bit(R5_UPTODATE, &sh->dev[dd_idx].flags); 1635 1166 } ··· 1652 1183 BUG_ON(faila == failb); 1653 1184 if ( failb < faila ) { int tmp = faila; faila = failb; failb = tmp; } 1654 1185 1655 - PRINTK("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", 1186 + pr_debug("compute_block_2, stripe %llu, idx %d,%d (%d,%d)\n", 1656 1187 (unsigned long long)sh->sector, dd_idx1, dd_idx2, faila, failb); 1657 1188 1658 1189 if ( failb == disks-1 ) { ··· 1698 1229 } 1699 1230 } 1700 1231 1232 + static int 1233 + handle_write_operations5(struct stripe_head *sh, int rcw, int expand) 1234 + { 1235 + int i, pd_idx = sh->pd_idx, disks = sh->disks; 1236 + int locked = 0; 1701 1237 1238 + if (rcw) { 1239 + /* if we are not expanding this is a proper write request, and 1240 + * there will be bios with new data to be drained into the 1241 + * stripe cache 1242 + */ 1243 + if (!expand) { 1244 + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); 1245 + sh->ops.count++; 1246 + } 1247 + 1248 + set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1249 + sh->ops.count++; 1250 + 1251 + for (i = disks; i--; ) { 1252 + struct r5dev *dev = &sh->dev[i]; 1253 + 1254 + if (dev->towrite) { 1255 + set_bit(R5_LOCKED, &dev->flags); 1256 + if (!expand) 1257 + clear_bit(R5_UPTODATE, &dev->flags); 1258 + locked++; 1259 + } 1260 + } 1261 + } else { 1262 + BUG_ON(!(test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags) || 1263 + test_bit(R5_Wantcompute, &sh->dev[pd_idx].flags))); 1264 + 1265 + set_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 1266 + set_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); 1267 + set_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1268 + 1269 + sh->ops.count += 3; 1270 + 1271 + for (i = disks; i--; ) { 1272 + struct r5dev *dev = &sh->dev[i]; 1273 + if (i == pd_idx) 1274 + continue; 1275 + 1276 + /* For a read-modify write there may be blocks that are 1277 + * locked for reading while others are ready to be 1278 + * written so we distinguish these blocks by the 1279 + * R5_Wantprexor bit 1280 + */ 1281 + if (dev->towrite && 1282 + (test_bit(R5_UPTODATE, &dev->flags) || 1283 + test_bit(R5_Wantcompute, &dev->flags))) { 1284 + set_bit(R5_Wantprexor, &dev->flags); 1285 + set_bit(R5_LOCKED, &dev->flags); 1286 + clear_bit(R5_UPTODATE, &dev->flags); 1287 + locked++; 1288 + } 1289 + } 1290 + } 1291 + 1292 + /* keep the parity disk locked while asynchronous operations 1293 + * are in flight 1294 + */ 1295 + set_bit(R5_LOCKED, &sh->dev[pd_idx].flags); 1296 + clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); 1297 + locked++; 1298 + 1299 + pr_debug("%s: stripe %llu locked: %d pending: %lx\n", 1300 + __FUNCTION__, (unsigned long long)sh->sector, 1301 + locked, sh->ops.pending); 1302 + 1303 + return locked; 1304 + } 1702 1305 1703 1306 /* 1704 1307 * Each stripe/dev can have one or more bion attached. ··· 1783 1242 raid5_conf_t *conf = sh->raid_conf; 1784 1243 int firstwrite=0; 1785 1244 1786 - PRINTK("adding bh b#%llu to stripe s#%llu\n", 1245 + pr_debug("adding bh b#%llu to stripe s#%llu\n", 1787 1246 (unsigned long long)bi->bi_sector, 1788 1247 (unsigned long long)sh->sector); 1789 1248 ··· 1812 1271 spin_unlock_irq(&conf->device_lock); 1813 1272 spin_unlock(&sh->lock); 1814 1273 1815 - PRINTK("added bi b#%llu to stripe s#%llu, disk %d.\n", 1274 + pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n", 1816 1275 (unsigned long long)bi->bi_sector, 1817 1276 (unsigned long long)sh->sector, dd_idx); 1818 1277 ··· 1867 1326 return pd_idx; 1868 1327 } 1869 1328 1329 + static void 1330 + handle_requests_to_failed_array(raid5_conf_t *conf, struct stripe_head *sh, 1331 + struct stripe_head_state *s, int disks, 1332 + struct bio **return_bi) 1333 + { 1334 + int i; 1335 + for (i = disks; i--; ) { 1336 + struct bio *bi; 1337 + int bitmap_end = 0; 1338 + 1339 + if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 1340 + mdk_rdev_t *rdev; 1341 + rcu_read_lock(); 1342 + rdev = rcu_dereference(conf->disks[i].rdev); 1343 + if (rdev && test_bit(In_sync, &rdev->flags)) 1344 + /* multiple read failures in one stripe */ 1345 + md_error(conf->mddev, rdev); 1346 + rcu_read_unlock(); 1347 + } 1348 + spin_lock_irq(&conf->device_lock); 1349 + /* fail all writes first */ 1350 + bi = sh->dev[i].towrite; 1351 + sh->dev[i].towrite = NULL; 1352 + if (bi) { 1353 + s->to_write--; 1354 + bitmap_end = 1; 1355 + } 1356 + 1357 + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 1358 + wake_up(&conf->wait_for_overlap); 1359 + 1360 + while (bi && bi->bi_sector < 1361 + sh->dev[i].sector + STRIPE_SECTORS) { 1362 + struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 1363 + clear_bit(BIO_UPTODATE, &bi->bi_flags); 1364 + if (--bi->bi_phys_segments == 0) { 1365 + md_write_end(conf->mddev); 1366 + bi->bi_next = *return_bi; 1367 + *return_bi = bi; 1368 + } 1369 + bi = nextbi; 1370 + } 1371 + /* and fail all 'written' */ 1372 + bi = sh->dev[i].written; 1373 + sh->dev[i].written = NULL; 1374 + if (bi) bitmap_end = 1; 1375 + while (bi && bi->bi_sector < 1376 + sh->dev[i].sector + STRIPE_SECTORS) { 1377 + struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 1378 + clear_bit(BIO_UPTODATE, &bi->bi_flags); 1379 + if (--bi->bi_phys_segments == 0) { 1380 + md_write_end(conf->mddev); 1381 + bi->bi_next = *return_bi; 1382 + *return_bi = bi; 1383 + } 1384 + bi = bi2; 1385 + } 1386 + 1387 + /* fail any reads if this device is non-operational and 1388 + * the data has not reached the cache yet. 1389 + */ 1390 + if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && 1391 + (!test_bit(R5_Insync, &sh->dev[i].flags) || 1392 + test_bit(R5_ReadError, &sh->dev[i].flags))) { 1393 + bi = sh->dev[i].toread; 1394 + sh->dev[i].toread = NULL; 1395 + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 1396 + wake_up(&conf->wait_for_overlap); 1397 + if (bi) s->to_read--; 1398 + while (bi && bi->bi_sector < 1399 + sh->dev[i].sector + STRIPE_SECTORS) { 1400 + struct bio *nextbi = 1401 + r5_next_bio(bi, sh->dev[i].sector); 1402 + clear_bit(BIO_UPTODATE, &bi->bi_flags); 1403 + if (--bi->bi_phys_segments == 0) { 1404 + bi->bi_next = *return_bi; 1405 + *return_bi = bi; 1406 + } 1407 + bi = nextbi; 1408 + } 1409 + } 1410 + spin_unlock_irq(&conf->device_lock); 1411 + if (bitmap_end) 1412 + bitmap_endwrite(conf->mddev->bitmap, sh->sector, 1413 + STRIPE_SECTORS, 0, 0); 1414 + } 1415 + 1416 + } 1417 + 1418 + /* __handle_issuing_new_read_requests5 - returns 0 if there are no more disks 1419 + * to process 1420 + */ 1421 + static int __handle_issuing_new_read_requests5(struct stripe_head *sh, 1422 + struct stripe_head_state *s, int disk_idx, int disks) 1423 + { 1424 + struct r5dev *dev = &sh->dev[disk_idx]; 1425 + struct r5dev *failed_dev = &sh->dev[s->failed_num]; 1426 + 1427 + /* don't schedule compute operations or reads on the parity block while 1428 + * a check is in flight 1429 + */ 1430 + if ((disk_idx == sh->pd_idx) && 1431 + test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) 1432 + return ~0; 1433 + 1434 + /* is the data in this block needed, and can we get it? */ 1435 + if (!test_bit(R5_LOCKED, &dev->flags) && 1436 + !test_bit(R5_UPTODATE, &dev->flags) && (dev->toread || 1437 + (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || 1438 + s->syncing || s->expanding || (s->failed && 1439 + (failed_dev->toread || (failed_dev->towrite && 1440 + !test_bit(R5_OVERWRITE, &failed_dev->flags) 1441 + ))))) { 1442 + /* 1/ We would like to get this block, possibly by computing it, 1443 + * but we might not be able to. 1444 + * 1445 + * 2/ Since parity check operations potentially make the parity 1446 + * block !uptodate it will need to be refreshed before any 1447 + * compute operations on data disks are scheduled. 1448 + * 1449 + * 3/ We hold off parity block re-reads until check operations 1450 + * have quiesced. 1451 + */ 1452 + if ((s->uptodate == disks - 1) && 1453 + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 1454 + set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 1455 + set_bit(R5_Wantcompute, &dev->flags); 1456 + sh->ops.target = disk_idx; 1457 + s->req_compute = 1; 1458 + sh->ops.count++; 1459 + /* Careful: from this point on 'uptodate' is in the eye 1460 + * of raid5_run_ops which services 'compute' operations 1461 + * before writes. R5_Wantcompute flags a block that will 1462 + * be R5_UPTODATE by the time it is needed for a 1463 + * subsequent operation. 1464 + */ 1465 + s->uptodate++; 1466 + return 0; /* uptodate + compute == disks */ 1467 + } else if ((s->uptodate < disks - 1) && 1468 + test_bit(R5_Insync, &dev->flags)) { 1469 + /* Note: we hold off compute operations while checks are 1470 + * in flight, but we still prefer 'compute' over 'read' 1471 + * hence we only read if (uptodate < * disks-1) 1472 + */ 1473 + set_bit(R5_LOCKED, &dev->flags); 1474 + set_bit(R5_Wantread, &dev->flags); 1475 + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 1476 + sh->ops.count++; 1477 + s->locked++; 1478 + pr_debug("Reading block %d (sync=%d)\n", disk_idx, 1479 + s->syncing); 1480 + } 1481 + } 1482 + 1483 + return ~0; 1484 + } 1485 + 1486 + static void handle_issuing_new_read_requests5(struct stripe_head *sh, 1487 + struct stripe_head_state *s, int disks) 1488 + { 1489 + int i; 1490 + 1491 + /* Clear completed compute operations. Parity recovery 1492 + * (STRIPE_OP_MOD_REPAIR_PD) implies a write-back which is handled 1493 + * later on in this routine 1494 + */ 1495 + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && 1496 + !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { 1497 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); 1498 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); 1499 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 1500 + } 1501 + 1502 + /* look for blocks to read/compute, skip this if a compute 1503 + * is already in flight, or if the stripe contents are in the 1504 + * midst of changing due to a write 1505 + */ 1506 + if (!test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && 1507 + !test_bit(STRIPE_OP_PREXOR, &sh->ops.pending) && 1508 + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { 1509 + for (i = disks; i--; ) 1510 + if (__handle_issuing_new_read_requests5( 1511 + sh, s, i, disks) == 0) 1512 + break; 1513 + } 1514 + set_bit(STRIPE_HANDLE, &sh->state); 1515 + } 1516 + 1517 + static void handle_issuing_new_read_requests6(struct stripe_head *sh, 1518 + struct stripe_head_state *s, struct r6_state *r6s, 1519 + int disks) 1520 + { 1521 + int i; 1522 + for (i = disks; i--; ) { 1523 + struct r5dev *dev = &sh->dev[i]; 1524 + if (!test_bit(R5_LOCKED, &dev->flags) && 1525 + !test_bit(R5_UPTODATE, &dev->flags) && 1526 + (dev->toread || (dev->towrite && 1527 + !test_bit(R5_OVERWRITE, &dev->flags)) || 1528 + s->syncing || s->expanding || 1529 + (s->failed >= 1 && 1530 + (sh->dev[r6s->failed_num[0]].toread || 1531 + s->to_write)) || 1532 + (s->failed >= 2 && 1533 + (sh->dev[r6s->failed_num[1]].toread || 1534 + s->to_write)))) { 1535 + /* we would like to get this block, possibly 1536 + * by computing it, but we might not be able to 1537 + */ 1538 + if (s->uptodate == disks-1) { 1539 + pr_debug("Computing stripe %llu block %d\n", 1540 + (unsigned long long)sh->sector, i); 1541 + compute_block_1(sh, i, 0); 1542 + s->uptodate++; 1543 + } else if ( s->uptodate == disks-2 && s->failed >= 2 ) { 1544 + /* Computing 2-failure is *very* expensive; only 1545 + * do it if failed >= 2 1546 + */ 1547 + int other; 1548 + for (other = disks; other--; ) { 1549 + if (other == i) 1550 + continue; 1551 + if (!test_bit(R5_UPTODATE, 1552 + &sh->dev[other].flags)) 1553 + break; 1554 + } 1555 + BUG_ON(other < 0); 1556 + pr_debug("Computing stripe %llu blocks %d,%d\n", 1557 + (unsigned long long)sh->sector, 1558 + i, other); 1559 + compute_block_2(sh, i, other); 1560 + s->uptodate += 2; 1561 + } else if (test_bit(R5_Insync, &dev->flags)) { 1562 + set_bit(R5_LOCKED, &dev->flags); 1563 + set_bit(R5_Wantread, &dev->flags); 1564 + s->locked++; 1565 + pr_debug("Reading block %d (sync=%d)\n", 1566 + i, s->syncing); 1567 + } 1568 + } 1569 + } 1570 + set_bit(STRIPE_HANDLE, &sh->state); 1571 + } 1572 + 1573 + 1574 + /* handle_completed_write_requests 1575 + * any written block on an uptodate or failed drive can be returned. 1576 + * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 1577 + * never LOCKED, so we don't need to test 'failed' directly. 1578 + */ 1579 + static void handle_completed_write_requests(raid5_conf_t *conf, 1580 + struct stripe_head *sh, int disks, struct bio **return_bi) 1581 + { 1582 + int i; 1583 + struct r5dev *dev; 1584 + 1585 + for (i = disks; i--; ) 1586 + if (sh->dev[i].written) { 1587 + dev = &sh->dev[i]; 1588 + if (!test_bit(R5_LOCKED, &dev->flags) && 1589 + test_bit(R5_UPTODATE, &dev->flags)) { 1590 + /* We can return any write requests */ 1591 + struct bio *wbi, *wbi2; 1592 + int bitmap_end = 0; 1593 + pr_debug("Return write for disc %d\n", i); 1594 + spin_lock_irq(&conf->device_lock); 1595 + wbi = dev->written; 1596 + dev->written = NULL; 1597 + while (wbi && wbi->bi_sector < 1598 + dev->sector + STRIPE_SECTORS) { 1599 + wbi2 = r5_next_bio(wbi, dev->sector); 1600 + if (--wbi->bi_phys_segments == 0) { 1601 + md_write_end(conf->mddev); 1602 + wbi->bi_next = *return_bi; 1603 + *return_bi = wbi; 1604 + } 1605 + wbi = wbi2; 1606 + } 1607 + if (dev->towrite == NULL) 1608 + bitmap_end = 1; 1609 + spin_unlock_irq(&conf->device_lock); 1610 + if (bitmap_end) 1611 + bitmap_endwrite(conf->mddev->bitmap, 1612 + sh->sector, 1613 + STRIPE_SECTORS, 1614 + !test_bit(STRIPE_DEGRADED, &sh->state), 1615 + 0); 1616 + } 1617 + } 1618 + } 1619 + 1620 + static void handle_issuing_new_write_requests5(raid5_conf_t *conf, 1621 + struct stripe_head *sh, struct stripe_head_state *s, int disks) 1622 + { 1623 + int rmw = 0, rcw = 0, i; 1624 + for (i = disks; i--; ) { 1625 + /* would I have to read this buffer for read_modify_write */ 1626 + struct r5dev *dev = &sh->dev[i]; 1627 + if ((dev->towrite || i == sh->pd_idx) && 1628 + !test_bit(R5_LOCKED, &dev->flags) && 1629 + !(test_bit(R5_UPTODATE, &dev->flags) || 1630 + test_bit(R5_Wantcompute, &dev->flags))) { 1631 + if (test_bit(R5_Insync, &dev->flags)) 1632 + rmw++; 1633 + else 1634 + rmw += 2*disks; /* cannot read it */ 1635 + } 1636 + /* Would I have to read this buffer for reconstruct_write */ 1637 + if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && 1638 + !test_bit(R5_LOCKED, &dev->flags) && 1639 + !(test_bit(R5_UPTODATE, &dev->flags) || 1640 + test_bit(R5_Wantcompute, &dev->flags))) { 1641 + if (test_bit(R5_Insync, &dev->flags)) rcw++; 1642 + else 1643 + rcw += 2*disks; 1644 + } 1645 + } 1646 + pr_debug("for sector %llu, rmw=%d rcw=%d\n", 1647 + (unsigned long long)sh->sector, rmw, rcw); 1648 + set_bit(STRIPE_HANDLE, &sh->state); 1649 + if (rmw < rcw && rmw > 0) 1650 + /* prefer read-modify-write, but need to get some data */ 1651 + for (i = disks; i--; ) { 1652 + struct r5dev *dev = &sh->dev[i]; 1653 + if ((dev->towrite || i == sh->pd_idx) && 1654 + !test_bit(R5_LOCKED, &dev->flags) && 1655 + !(test_bit(R5_UPTODATE, &dev->flags) || 1656 + test_bit(R5_Wantcompute, &dev->flags)) && 1657 + test_bit(R5_Insync, &dev->flags)) { 1658 + if ( 1659 + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 1660 + pr_debug("Read_old block " 1661 + "%d for r-m-w\n", i); 1662 + set_bit(R5_LOCKED, &dev->flags); 1663 + set_bit(R5_Wantread, &dev->flags); 1664 + if (!test_and_set_bit( 1665 + STRIPE_OP_IO, &sh->ops.pending)) 1666 + sh->ops.count++; 1667 + s->locked++; 1668 + } else { 1669 + set_bit(STRIPE_DELAYED, &sh->state); 1670 + set_bit(STRIPE_HANDLE, &sh->state); 1671 + } 1672 + } 1673 + } 1674 + if (rcw <= rmw && rcw > 0) 1675 + /* want reconstruct write, but need to get some data */ 1676 + for (i = disks; i--; ) { 1677 + struct r5dev *dev = &sh->dev[i]; 1678 + if (!test_bit(R5_OVERWRITE, &dev->flags) && 1679 + i != sh->pd_idx && 1680 + !test_bit(R5_LOCKED, &dev->flags) && 1681 + !(test_bit(R5_UPTODATE, &dev->flags) || 1682 + test_bit(R5_Wantcompute, &dev->flags)) && 1683 + test_bit(R5_Insync, &dev->flags)) { 1684 + if ( 1685 + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 1686 + pr_debug("Read_old block " 1687 + "%d for Reconstruct\n", i); 1688 + set_bit(R5_LOCKED, &dev->flags); 1689 + set_bit(R5_Wantread, &dev->flags); 1690 + if (!test_and_set_bit( 1691 + STRIPE_OP_IO, &sh->ops.pending)) 1692 + sh->ops.count++; 1693 + s->locked++; 1694 + } else { 1695 + set_bit(STRIPE_DELAYED, &sh->state); 1696 + set_bit(STRIPE_HANDLE, &sh->state); 1697 + } 1698 + } 1699 + } 1700 + /* now if nothing is locked, and if we have enough data, 1701 + * we can start a write request 1702 + */ 1703 + /* since handle_stripe can be called at any time we need to handle the 1704 + * case where a compute block operation has been submitted and then a 1705 + * subsequent call wants to start a write request. raid5_run_ops only 1706 + * handles the case where compute block and postxor are requested 1707 + * simultaneously. If this is not the case then new writes need to be 1708 + * held off until the compute completes. 1709 + */ 1710 + if ((s->req_compute || 1711 + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) && 1712 + (s->locked == 0 && (rcw == 0 || rmw == 0) && 1713 + !test_bit(STRIPE_BIT_DELAY, &sh->state))) 1714 + s->locked += handle_write_operations5(sh, rcw == 0, 0); 1715 + } 1716 + 1717 + static void handle_issuing_new_write_requests6(raid5_conf_t *conf, 1718 + struct stripe_head *sh, struct stripe_head_state *s, 1719 + struct r6_state *r6s, int disks) 1720 + { 1721 + int rcw = 0, must_compute = 0, pd_idx = sh->pd_idx, i; 1722 + int qd_idx = r6s->qd_idx; 1723 + for (i = disks; i--; ) { 1724 + struct r5dev *dev = &sh->dev[i]; 1725 + /* Would I have to read this buffer for reconstruct_write */ 1726 + if (!test_bit(R5_OVERWRITE, &dev->flags) 1727 + && i != pd_idx && i != qd_idx 1728 + && (!test_bit(R5_LOCKED, &dev->flags) 1729 + ) && 1730 + !test_bit(R5_UPTODATE, &dev->flags)) { 1731 + if (test_bit(R5_Insync, &dev->flags)) rcw++; 1732 + else { 1733 + pr_debug("raid6: must_compute: " 1734 + "disk %d flags=%#lx\n", i, dev->flags); 1735 + must_compute++; 1736 + } 1737 + } 1738 + } 1739 + pr_debug("for sector %llu, rcw=%d, must_compute=%d\n", 1740 + (unsigned long long)sh->sector, rcw, must_compute); 1741 + set_bit(STRIPE_HANDLE, &sh->state); 1742 + 1743 + if (rcw > 0) 1744 + /* want reconstruct write, but need to get some data */ 1745 + for (i = disks; i--; ) { 1746 + struct r5dev *dev = &sh->dev[i]; 1747 + if (!test_bit(R5_OVERWRITE, &dev->flags) 1748 + && !(s->failed == 0 && (i == pd_idx || i == qd_idx)) 1749 + && !test_bit(R5_LOCKED, &dev->flags) && 1750 + !test_bit(R5_UPTODATE, &dev->flags) && 1751 + test_bit(R5_Insync, &dev->flags)) { 1752 + if ( 1753 + test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 1754 + pr_debug("Read_old stripe %llu " 1755 + "block %d for Reconstruct\n", 1756 + (unsigned long long)sh->sector, i); 1757 + set_bit(R5_LOCKED, &dev->flags); 1758 + set_bit(R5_Wantread, &dev->flags); 1759 + s->locked++; 1760 + } else { 1761 + pr_debug("Request delayed stripe %llu " 1762 + "block %d for Reconstruct\n", 1763 + (unsigned long long)sh->sector, i); 1764 + set_bit(STRIPE_DELAYED, &sh->state); 1765 + set_bit(STRIPE_HANDLE, &sh->state); 1766 + } 1767 + } 1768 + } 1769 + /* now if nothing is locked, and if we have enough data, we can start a 1770 + * write request 1771 + */ 1772 + if (s->locked == 0 && rcw == 0 && 1773 + !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 1774 + if (must_compute > 0) { 1775 + /* We have failed blocks and need to compute them */ 1776 + switch (s->failed) { 1777 + case 0: 1778 + BUG(); 1779 + case 1: 1780 + compute_block_1(sh, r6s->failed_num[0], 0); 1781 + break; 1782 + case 2: 1783 + compute_block_2(sh, r6s->failed_num[0], 1784 + r6s->failed_num[1]); 1785 + break; 1786 + default: /* This request should have been failed? */ 1787 + BUG(); 1788 + } 1789 + } 1790 + 1791 + pr_debug("Computing parity for stripe %llu\n", 1792 + (unsigned long long)sh->sector); 1793 + compute_parity6(sh, RECONSTRUCT_WRITE); 1794 + /* now every locked buffer is ready to be written */ 1795 + for (i = disks; i--; ) 1796 + if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { 1797 + pr_debug("Writing stripe %llu block %d\n", 1798 + (unsigned long long)sh->sector, i); 1799 + s->locked++; 1800 + set_bit(R5_Wantwrite, &sh->dev[i].flags); 1801 + } 1802 + /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ 1803 + set_bit(STRIPE_INSYNC, &sh->state); 1804 + 1805 + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 1806 + atomic_dec(&conf->preread_active_stripes); 1807 + if (atomic_read(&conf->preread_active_stripes) < 1808 + IO_THRESHOLD) 1809 + md_wakeup_thread(conf->mddev->thread); 1810 + } 1811 + } 1812 + } 1813 + 1814 + static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, 1815 + struct stripe_head_state *s, int disks) 1816 + { 1817 + set_bit(STRIPE_HANDLE, &sh->state); 1818 + /* Take one of the following actions: 1819 + * 1/ start a check parity operation if (uptodate == disks) 1820 + * 2/ finish a check parity operation and act on the result 1821 + * 3/ skip to the writeback section if we previously 1822 + * initiated a recovery operation 1823 + */ 1824 + if (s->failed == 0 && 1825 + !test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { 1826 + if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { 1827 + BUG_ON(s->uptodate != disks); 1828 + clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags); 1829 + sh->ops.count++; 1830 + s->uptodate--; 1831 + } else if ( 1832 + test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) { 1833 + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); 1834 + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); 1835 + 1836 + if (sh->ops.zero_sum_result == 0) 1837 + /* parity is correct (on disc, 1838 + * not in buffer any more) 1839 + */ 1840 + set_bit(STRIPE_INSYNC, &sh->state); 1841 + else { 1842 + conf->mddev->resync_mismatches += 1843 + STRIPE_SECTORS; 1844 + if (test_bit( 1845 + MD_RECOVERY_CHECK, &conf->mddev->recovery)) 1846 + /* don't try to repair!! */ 1847 + set_bit(STRIPE_INSYNC, &sh->state); 1848 + else { 1849 + set_bit(STRIPE_OP_COMPUTE_BLK, 1850 + &sh->ops.pending); 1851 + set_bit(STRIPE_OP_MOD_REPAIR_PD, 1852 + &sh->ops.pending); 1853 + set_bit(R5_Wantcompute, 1854 + &sh->dev[sh->pd_idx].flags); 1855 + sh->ops.target = sh->pd_idx; 1856 + sh->ops.count++; 1857 + s->uptodate++; 1858 + } 1859 + } 1860 + } 1861 + } 1862 + 1863 + /* check if we can clear a parity disk reconstruct */ 1864 + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && 1865 + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { 1866 + 1867 + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); 1868 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); 1869 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); 1870 + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); 1871 + } 1872 + 1873 + /* Wait for check parity and compute block operations to complete 1874 + * before write-back 1875 + */ 1876 + if (!test_bit(STRIPE_INSYNC, &sh->state) && 1877 + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && 1878 + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { 1879 + struct r5dev *dev; 1880 + /* either failed parity check, or recovery is happening */ 1881 + if (s->failed == 0) 1882 + s->failed_num = sh->pd_idx; 1883 + dev = &sh->dev[s->failed_num]; 1884 + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); 1885 + BUG_ON(s->uptodate != disks); 1886 + 1887 + set_bit(R5_LOCKED, &dev->flags); 1888 + set_bit(R5_Wantwrite, &dev->flags); 1889 + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 1890 + sh->ops.count++; 1891 + 1892 + clear_bit(STRIPE_DEGRADED, &sh->state); 1893 + s->locked++; 1894 + set_bit(STRIPE_INSYNC, &sh->state); 1895 + } 1896 + } 1897 + 1898 + 1899 + static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, 1900 + struct stripe_head_state *s, 1901 + struct r6_state *r6s, struct page *tmp_page, 1902 + int disks) 1903 + { 1904 + int update_p = 0, update_q = 0; 1905 + struct r5dev *dev; 1906 + int pd_idx = sh->pd_idx; 1907 + int qd_idx = r6s->qd_idx; 1908 + 1909 + set_bit(STRIPE_HANDLE, &sh->state); 1910 + 1911 + BUG_ON(s->failed > 2); 1912 + BUG_ON(s->uptodate < disks); 1913 + /* Want to check and possibly repair P and Q. 1914 + * However there could be one 'failed' device, in which 1915 + * case we can only check one of them, possibly using the 1916 + * other to generate missing data 1917 + */ 1918 + 1919 + /* If !tmp_page, we cannot do the calculations, 1920 + * but as we have set STRIPE_HANDLE, we will soon be called 1921 + * by stripe_handle with a tmp_page - just wait until then. 1922 + */ 1923 + if (tmp_page) { 1924 + if (s->failed == r6s->q_failed) { 1925 + /* The only possible failed device holds 'Q', so it 1926 + * makes sense to check P (If anything else were failed, 1927 + * we would have used P to recreate it). 1928 + */ 1929 + compute_block_1(sh, pd_idx, 1); 1930 + if (!page_is_zero(sh->dev[pd_idx].page)) { 1931 + compute_block_1(sh, pd_idx, 0); 1932 + update_p = 1; 1933 + } 1934 + } 1935 + if (!r6s->q_failed && s->failed < 2) { 1936 + /* q is not failed, and we didn't use it to generate 1937 + * anything, so it makes sense to check it 1938 + */ 1939 + memcpy(page_address(tmp_page), 1940 + page_address(sh->dev[qd_idx].page), 1941 + STRIPE_SIZE); 1942 + compute_parity6(sh, UPDATE_PARITY); 1943 + if (memcmp(page_address(tmp_page), 1944 + page_address(sh->dev[qd_idx].page), 1945 + STRIPE_SIZE) != 0) { 1946 + clear_bit(STRIPE_INSYNC, &sh->state); 1947 + update_q = 1; 1948 + } 1949 + } 1950 + if (update_p || update_q) { 1951 + conf->mddev->resync_mismatches += STRIPE_SECTORS; 1952 + if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 1953 + /* don't try to repair!! */ 1954 + update_p = update_q = 0; 1955 + } 1956 + 1957 + /* now write out any block on a failed drive, 1958 + * or P or Q if they need it 1959 + */ 1960 + 1961 + if (s->failed == 2) { 1962 + dev = &sh->dev[r6s->failed_num[1]]; 1963 + s->locked++; 1964 + set_bit(R5_LOCKED, &dev->flags); 1965 + set_bit(R5_Wantwrite, &dev->flags); 1966 + } 1967 + if (s->failed >= 1) { 1968 + dev = &sh->dev[r6s->failed_num[0]]; 1969 + s->locked++; 1970 + set_bit(R5_LOCKED, &dev->flags); 1971 + set_bit(R5_Wantwrite, &dev->flags); 1972 + } 1973 + 1974 + if (update_p) { 1975 + dev = &sh->dev[pd_idx]; 1976 + s->locked++; 1977 + set_bit(R5_LOCKED, &dev->flags); 1978 + set_bit(R5_Wantwrite, &dev->flags); 1979 + } 1980 + if (update_q) { 1981 + dev = &sh->dev[qd_idx]; 1982 + s->locked++; 1983 + set_bit(R5_LOCKED, &dev->flags); 1984 + set_bit(R5_Wantwrite, &dev->flags); 1985 + } 1986 + clear_bit(STRIPE_DEGRADED, &sh->state); 1987 + 1988 + set_bit(STRIPE_INSYNC, &sh->state); 1989 + } 1990 + } 1991 + 1992 + static void handle_stripe_expansion(raid5_conf_t *conf, struct stripe_head *sh, 1993 + struct r6_state *r6s) 1994 + { 1995 + int i; 1996 + 1997 + /* We have read all the blocks in this stripe and now we need to 1998 + * copy some of them into a target stripe for expand. 1999 + */ 2000 + struct dma_async_tx_descriptor *tx = NULL; 2001 + clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2002 + for (i = 0; i < sh->disks; i++) 2003 + if (i != sh->pd_idx && (r6s && i != r6s->qd_idx)) { 2004 + int dd_idx, pd_idx, j; 2005 + struct stripe_head *sh2; 2006 + 2007 + sector_t bn = compute_blocknr(sh, i); 2008 + sector_t s = raid5_compute_sector(bn, conf->raid_disks, 2009 + conf->raid_disks - 2010 + conf->max_degraded, &dd_idx, 2011 + &pd_idx, conf); 2012 + sh2 = get_active_stripe(conf, s, conf->raid_disks, 2013 + pd_idx, 1); 2014 + if (sh2 == NULL) 2015 + /* so far only the early blocks of this stripe 2016 + * have been requested. When later blocks 2017 + * get requested, we will try again 2018 + */ 2019 + continue; 2020 + if (!test_bit(STRIPE_EXPANDING, &sh2->state) || 2021 + test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { 2022 + /* must have already done this block */ 2023 + release_stripe(sh2); 2024 + continue; 2025 + } 2026 + 2027 + /* place all the copies on one channel */ 2028 + tx = async_memcpy(sh2->dev[dd_idx].page, 2029 + sh->dev[i].page, 0, 0, STRIPE_SIZE, 2030 + ASYNC_TX_DEP_ACK, tx, NULL, NULL); 2031 + 2032 + set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2033 + set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2034 + for (j = 0; j < conf->raid_disks; j++) 2035 + if (j != sh2->pd_idx && 2036 + (r6s && j != r6s->qd_idx) && 2037 + !test_bit(R5_Expanded, &sh2->dev[j].flags)) 2038 + break; 2039 + if (j == conf->raid_disks) { 2040 + set_bit(STRIPE_EXPAND_READY, &sh2->state); 2041 + set_bit(STRIPE_HANDLE, &sh2->state); 2042 + } 2043 + release_stripe(sh2); 2044 + 2045 + /* done submitting copies, wait for them to complete */ 2046 + if (i + 1 >= sh->disks) { 2047 + async_tx_ack(tx); 2048 + dma_wait_for_async_tx(tx); 2049 + } 2050 + } 2051 + } 1870 2052 1871 2053 /* 1872 2054 * handle_stripe - do things to a stripe. ··· 2603 1339 * schedule a write of some buffers 2604 1340 * return confirmation of parity correctness 2605 1341 * 2606 - * Parity calculations are done inside the stripe lock 2607 1342 * buffers are taken off read_list or write_list, and bh_cache buffers 2608 1343 * get BH_Lock set before the stripe lock is released. 2609 1344 * 2610 1345 */ 2611 - 1346 + 2612 1347 static void handle_stripe5(struct stripe_head *sh) 2613 1348 { 2614 1349 raid5_conf_t *conf = sh->raid_conf; 2615 - int disks = sh->disks; 2616 - struct bio *return_bi= NULL; 2617 - struct bio *bi; 2618 - int i; 2619 - int syncing, expanding, expanded; 2620 - int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; 2621 - int non_overwrite = 0; 2622 - int failed_num=0; 1350 + int disks = sh->disks, i; 1351 + struct bio *return_bi = NULL; 1352 + struct stripe_head_state s; 2623 1353 struct r5dev *dev; 1354 + unsigned long pending = 0; 2624 1355 2625 - PRINTK("handling stripe %llu, cnt=%d, pd_idx=%d\n", 2626 - (unsigned long long)sh->sector, atomic_read(&sh->count), 2627 - sh->pd_idx); 1356 + memset(&s, 0, sizeof(s)); 1357 + pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " 1358 + "ops=%lx:%lx:%lx\n", (unsigned long long)sh->sector, sh->state, 1359 + atomic_read(&sh->count), sh->pd_idx, 1360 + sh->ops.pending, sh->ops.ack, sh->ops.complete); 2628 1361 2629 1362 spin_lock(&sh->lock); 2630 1363 clear_bit(STRIPE_HANDLE, &sh->state); 2631 1364 clear_bit(STRIPE_DELAYED, &sh->state); 2632 1365 2633 - syncing = test_bit(STRIPE_SYNCING, &sh->state); 2634 - expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2635 - expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 1366 + s.syncing = test_bit(STRIPE_SYNCING, &sh->state); 1367 + s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 1368 + s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 2636 1369 /* Now to look around and see what can be done */ 2637 1370 2638 1371 rcu_read_lock(); 2639 1372 for (i=disks; i--; ) { 2640 1373 mdk_rdev_t *rdev; 2641 - dev = &sh->dev[i]; 1374 + struct r5dev *dev = &sh->dev[i]; 2642 1375 clear_bit(R5_Insync, &dev->flags); 2643 1376 2644 - PRINTK("check %d: state 0x%lx read %p write %p written %p\n", 2645 - i, dev->flags, dev->toread, dev->towrite, dev->written); 2646 - /* maybe we can reply to a read */ 2647 - if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 2648 - struct bio *rbi, *rbi2; 2649 - PRINTK("Return read for disc %d\n", i); 2650 - spin_lock_irq(&conf->device_lock); 2651 - rbi = dev->toread; 2652 - dev->toread = NULL; 2653 - if (test_and_clear_bit(R5_Overlap, &dev->flags)) 2654 - wake_up(&conf->wait_for_overlap); 2655 - spin_unlock_irq(&conf->device_lock); 2656 - while (rbi && rbi->bi_sector < dev->sector + STRIPE_SECTORS) { 2657 - copy_data(0, rbi, dev->page, dev->sector); 2658 - rbi2 = r5_next_bio(rbi, dev->sector); 2659 - spin_lock_irq(&conf->device_lock); 2660 - if (--rbi->bi_phys_segments == 0) { 2661 - rbi->bi_next = return_bi; 2662 - return_bi = rbi; 2663 - } 2664 - spin_unlock_irq(&conf->device_lock); 2665 - rbi = rbi2; 2666 - } 2667 - } 1377 + pr_debug("check %d: state 0x%lx toread %p read %p write %p " 1378 + "written %p\n", i, dev->flags, dev->toread, dev->read, 1379 + dev->towrite, dev->written); 1380 + 1381 + /* maybe we can request a biofill operation 1382 + * 1383 + * new wantfill requests are only permitted while 1384 + * STRIPE_OP_BIOFILL is clear 1385 + */ 1386 + if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread && 1387 + !test_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) 1388 + set_bit(R5_Wantfill, &dev->flags); 2668 1389 2669 1390 /* now count some things */ 2670 - if (test_bit(R5_LOCKED, &dev->flags)) locked++; 2671 - if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; 1391 + if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 1392 + if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 1393 + if (test_bit(R5_Wantcompute, &dev->flags)) s.compute++; 2672 1394 2673 - 2674 - if (dev->toread) to_read++; 1395 + if (test_bit(R5_Wantfill, &dev->flags)) 1396 + s.to_fill++; 1397 + else if (dev->toread) 1398 + s.to_read++; 2675 1399 if (dev->towrite) { 2676 - to_write++; 1400 + s.to_write++; 2677 1401 if (!test_bit(R5_OVERWRITE, &dev->flags)) 2678 - non_overwrite++; 1402 + s.non_overwrite++; 2679 1403 } 2680 - if (dev->written) written++; 1404 + if (dev->written) 1405 + s.written++; 2681 1406 rdev = rcu_dereference(conf->disks[i].rdev); 2682 1407 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2683 1408 /* The ReadError flag will just be confusing now */ ··· 2675 1422 } 2676 1423 if (!rdev || !test_bit(In_sync, &rdev->flags) 2677 1424 || test_bit(R5_ReadError, &dev->flags)) { 2678 - failed++; 2679 - failed_num = i; 1425 + s.failed++; 1426 + s.failed_num = i; 2680 1427 } else 2681 1428 set_bit(R5_Insync, &dev->flags); 2682 1429 } 2683 1430 rcu_read_unlock(); 2684 - PRINTK("locked=%d uptodate=%d to_read=%d" 1431 + 1432 + if (s.to_fill && !test_and_set_bit(STRIPE_OP_BIOFILL, &sh->ops.pending)) 1433 + sh->ops.count++; 1434 + 1435 + pr_debug("locked=%d uptodate=%d to_read=%d" 2685 1436 " to_write=%d failed=%d failed_num=%d\n", 2686 - locked, uptodate, to_read, to_write, failed, failed_num); 1437 + s.locked, s.uptodate, s.to_read, s.to_write, 1438 + s.failed, s.failed_num); 2687 1439 /* check if the array has lost two devices and, if so, some requests might 2688 1440 * need to be failed 2689 1441 */ 2690 - if (failed > 1 && to_read+to_write+written) { 2691 - for (i=disks; i--; ) { 2692 - int bitmap_end = 0; 2693 - 2694 - if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2695 - mdk_rdev_t *rdev; 2696 - rcu_read_lock(); 2697 - rdev = rcu_dereference(conf->disks[i].rdev); 2698 - if (rdev && test_bit(In_sync, &rdev->flags)) 2699 - /* multiple read failures in one stripe */ 2700 - md_error(conf->mddev, rdev); 2701 - rcu_read_unlock(); 2702 - } 2703 - 2704 - spin_lock_irq(&conf->device_lock); 2705 - /* fail all writes first */ 2706 - bi = sh->dev[i].towrite; 2707 - sh->dev[i].towrite = NULL; 2708 - if (bi) { to_write--; bitmap_end = 1; } 2709 - 2710 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2711 - wake_up(&conf->wait_for_overlap); 2712 - 2713 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ 2714 - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 2715 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 2716 - if (--bi->bi_phys_segments == 0) { 2717 - md_write_end(conf->mddev); 2718 - bi->bi_next = return_bi; 2719 - return_bi = bi; 2720 - } 2721 - bi = nextbi; 2722 - } 2723 - /* and fail all 'written' */ 2724 - bi = sh->dev[i].written; 2725 - sh->dev[i].written = NULL; 2726 - if (bi) bitmap_end = 1; 2727 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { 2728 - struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 2729 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 2730 - if (--bi->bi_phys_segments == 0) { 2731 - md_write_end(conf->mddev); 2732 - bi->bi_next = return_bi; 2733 - return_bi = bi; 2734 - } 2735 - bi = bi2; 2736 - } 2737 - 2738 - /* fail any reads if this device is non-operational */ 2739 - if (!test_bit(R5_Insync, &sh->dev[i].flags) || 2740 - test_bit(R5_ReadError, &sh->dev[i].flags)) { 2741 - bi = sh->dev[i].toread; 2742 - sh->dev[i].toread = NULL; 2743 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2744 - wake_up(&conf->wait_for_overlap); 2745 - if (bi) to_read--; 2746 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ 2747 - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 2748 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 2749 - if (--bi->bi_phys_segments == 0) { 2750 - bi->bi_next = return_bi; 2751 - return_bi = bi; 2752 - } 2753 - bi = nextbi; 2754 - } 2755 - } 2756 - spin_unlock_irq(&conf->device_lock); 2757 - if (bitmap_end) 2758 - bitmap_endwrite(conf->mddev->bitmap, sh->sector, 2759 - STRIPE_SECTORS, 0, 0); 2760 - } 2761 - } 2762 - if (failed > 1 && syncing) { 1442 + if (s.failed > 1 && s.to_read+s.to_write+s.written) 1443 + handle_requests_to_failed_array(conf, sh, &s, disks, 1444 + &return_bi); 1445 + if (s.failed > 1 && s.syncing) { 2763 1446 md_done_sync(conf->mddev, STRIPE_SECTORS,0); 2764 1447 clear_bit(STRIPE_SYNCING, &sh->state); 2765 - syncing = 0; 1448 + s.syncing = 0; 2766 1449 } 2767 1450 2768 1451 /* might be able to return some write requests if the parity block 2769 1452 * is safe, or on a failed drive 2770 1453 */ 2771 1454 dev = &sh->dev[sh->pd_idx]; 2772 - if ( written && 2773 - ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) && 2774 - test_bit(R5_UPTODATE, &dev->flags)) 2775 - || (failed == 1 && failed_num == sh->pd_idx)) 2776 - ) { 2777 - /* any written block on an uptodate or failed drive can be returned. 2778 - * Note that if we 'wrote' to a failed drive, it will be UPTODATE, but 2779 - * never LOCKED, so we don't need to test 'failed' directly. 2780 - */ 2781 - for (i=disks; i--; ) 2782 - if (sh->dev[i].written) { 2783 - dev = &sh->dev[i]; 2784 - if (!test_bit(R5_LOCKED, &dev->flags) && 2785 - test_bit(R5_UPTODATE, &dev->flags) ) { 2786 - /* We can return any write requests */ 2787 - struct bio *wbi, *wbi2; 2788 - int bitmap_end = 0; 2789 - PRINTK("Return write for disc %d\n", i); 2790 - spin_lock_irq(&conf->device_lock); 2791 - wbi = dev->written; 2792 - dev->written = NULL; 2793 - while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { 2794 - wbi2 = r5_next_bio(wbi, dev->sector); 2795 - if (--wbi->bi_phys_segments == 0) { 2796 - md_write_end(conf->mddev); 2797 - wbi->bi_next = return_bi; 2798 - return_bi = wbi; 2799 - } 2800 - wbi = wbi2; 2801 - } 2802 - if (dev->towrite == NULL) 2803 - bitmap_end = 1; 2804 - spin_unlock_irq(&conf->device_lock); 2805 - if (bitmap_end) 2806 - bitmap_endwrite(conf->mddev->bitmap, sh->sector, 2807 - STRIPE_SECTORS, 2808 - !test_bit(STRIPE_DEGRADED, &sh->state), 0); 2809 - } 2810 - } 2811 - } 1455 + if ( s.written && 1456 + ((test_bit(R5_Insync, &dev->flags) && 1457 + !test_bit(R5_LOCKED, &dev->flags) && 1458 + test_bit(R5_UPTODATE, &dev->flags)) || 1459 + (s.failed == 1 && s.failed_num == sh->pd_idx))) 1460 + handle_completed_write_requests(conf, sh, disks, &return_bi); 2812 1461 2813 1462 /* Now we might consider reading some blocks, either to check/generate 2814 1463 * parity, or to satisfy requests 2815 1464 * or to load a block that is being partially written. 2816 1465 */ 2817 - if (to_read || non_overwrite || (syncing && (uptodate < disks)) || expanding) { 2818 - for (i=disks; i--;) { 2819 - dev = &sh->dev[i]; 2820 - if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 2821 - (dev->toread || 2822 - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || 2823 - syncing || 2824 - expanding || 2825 - (failed && (sh->dev[failed_num].toread || 2826 - (sh->dev[failed_num].towrite && !test_bit(R5_OVERWRITE, &sh->dev[failed_num].flags)))) 2827 - ) 2828 - ) { 2829 - /* we would like to get this block, possibly 2830 - * by computing it, but we might not be able to 2831 - */ 2832 - if (uptodate == disks-1) { 2833 - PRINTK("Computing block %d\n", i); 2834 - compute_block(sh, i); 2835 - uptodate++; 2836 - } else if (test_bit(R5_Insync, &dev->flags)) { 2837 - set_bit(R5_LOCKED, &dev->flags); 2838 - set_bit(R5_Wantread, &dev->flags); 2839 - locked++; 2840 - PRINTK("Reading block %d (sync=%d)\n", 2841 - i, syncing); 2842 - } 2843 - } 2844 - } 2845 - set_bit(STRIPE_HANDLE, &sh->state); 1466 + if (s.to_read || s.non_overwrite || 1467 + (s.syncing && (s.uptodate + s.compute < disks)) || s.expanding || 1468 + test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) 1469 + handle_issuing_new_read_requests5(sh, &s, disks); 1470 + 1471 + /* Now we check to see if any write operations have recently 1472 + * completed 1473 + */ 1474 + 1475 + /* leave prexor set until postxor is done, allows us to distinguish 1476 + * a rmw from a rcw during biodrain 1477 + */ 1478 + if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && 1479 + test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 1480 + 1481 + clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); 1482 + clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); 1483 + clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); 1484 + 1485 + for (i = disks; i--; ) 1486 + clear_bit(R5_Wantprexor, &sh->dev[i].flags); 2846 1487 } 2847 1488 2848 - /* now to consider writing and what else, if anything should be read */ 2849 - if (to_write) { 2850 - int rmw=0, rcw=0; 2851 - for (i=disks ; i--;) { 2852 - /* would I have to read this buffer for read_modify_write */ 1489 + /* if only POSTXOR is set then this is an 'expand' postxor */ 1490 + if (test_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete) && 1491 + test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { 1492 + 1493 + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.complete); 1494 + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.ack); 1495 + clear_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending); 1496 + 1497 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); 1498 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); 1499 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1500 + 1501 + /* All the 'written' buffers and the parity block are ready to 1502 + * be written back to disk 1503 + */ 1504 + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags)); 1505 + for (i = disks; i--; ) { 2853 1506 dev = &sh->dev[i]; 2854 - if ((dev->towrite || i == sh->pd_idx) && 2855 - (!test_bit(R5_LOCKED, &dev->flags) 2856 - ) && 2857 - !test_bit(R5_UPTODATE, &dev->flags)) { 2858 - if (test_bit(R5_Insync, &dev->flags) 2859 - /* && !(!mddev->insync && i == sh->pd_idx) */ 2860 - ) 2861 - rmw++; 2862 - else rmw += 2*disks; /* cannot read it */ 2863 - } 2864 - /* Would I have to read this buffer for reconstruct_write */ 2865 - if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && 2866 - (!test_bit(R5_LOCKED, &dev->flags) 2867 - ) && 2868 - !test_bit(R5_UPTODATE, &dev->flags)) { 2869 - if (test_bit(R5_Insync, &dev->flags)) rcw++; 2870 - else rcw += 2*disks; 1507 + if (test_bit(R5_LOCKED, &dev->flags) && 1508 + (i == sh->pd_idx || dev->written)) { 1509 + pr_debug("Writing block %d\n", i); 1510 + set_bit(R5_Wantwrite, &dev->flags); 1511 + if (!test_and_set_bit( 1512 + STRIPE_OP_IO, &sh->ops.pending)) 1513 + sh->ops.count++; 1514 + if (!test_bit(R5_Insync, &dev->flags) || 1515 + (i == sh->pd_idx && s.failed == 0)) 1516 + set_bit(STRIPE_INSYNC, &sh->state); 2871 1517 } 2872 1518 } 2873 - PRINTK("for sector %llu, rmw=%d rcw=%d\n", 2874 - (unsigned long long)sh->sector, rmw, rcw); 2875 - set_bit(STRIPE_HANDLE, &sh->state); 2876 - if (rmw < rcw && rmw > 0) 2877 - /* prefer read-modify-write, but need to get some data */ 2878 - for (i=disks; i--;) { 2879 - dev = &sh->dev[i]; 2880 - if ((dev->towrite || i == sh->pd_idx) && 2881 - !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 2882 - test_bit(R5_Insync, &dev->flags)) { 2883 - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 2884 - { 2885 - PRINTK("Read_old block %d for r-m-w\n", i); 2886 - set_bit(R5_LOCKED, &dev->flags); 2887 - set_bit(R5_Wantread, &dev->flags); 2888 - locked++; 2889 - } else { 2890 - set_bit(STRIPE_DELAYED, &sh->state); 2891 - set_bit(STRIPE_HANDLE, &sh->state); 2892 - } 2893 - } 2894 - } 2895 - if (rcw <= rmw && rcw > 0) 2896 - /* want reconstruct write, but need to get some data */ 2897 - for (i=disks; i--;) { 2898 - dev = &sh->dev[i]; 2899 - if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx && 2900 - !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 2901 - test_bit(R5_Insync, &dev->flags)) { 2902 - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 2903 - { 2904 - PRINTK("Read_old block %d for Reconstruct\n", i); 2905 - set_bit(R5_LOCKED, &dev->flags); 2906 - set_bit(R5_Wantread, &dev->flags); 2907 - locked++; 2908 - } else { 2909 - set_bit(STRIPE_DELAYED, &sh->state); 2910 - set_bit(STRIPE_HANDLE, &sh->state); 2911 - } 2912 - } 2913 - } 2914 - /* now if nothing is locked, and if we have enough data, we can start a write request */ 2915 - if (locked == 0 && (rcw == 0 ||rmw == 0) && 2916 - !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 2917 - PRINTK("Computing parity...\n"); 2918 - compute_parity5(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); 2919 - /* now every locked buffer is ready to be written */ 2920 - for (i=disks; i--;) 2921 - if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { 2922 - PRINTK("Writing block %d\n", i); 2923 - locked++; 2924 - set_bit(R5_Wantwrite, &sh->dev[i].flags); 2925 - if (!test_bit(R5_Insync, &sh->dev[i].flags) 2926 - || (i==sh->pd_idx && failed == 0)) 2927 - set_bit(STRIPE_INSYNC, &sh->state); 2928 - } 2929 - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 2930 - atomic_dec(&conf->preread_active_stripes); 2931 - if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) 2932 - md_wakeup_thread(conf->mddev->thread); 2933 - } 1519 + if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 1520 + atomic_dec(&conf->preread_active_stripes); 1521 + if (atomic_read(&conf->preread_active_stripes) < 1522 + IO_THRESHOLD) 1523 + md_wakeup_thread(conf->mddev->thread); 2934 1524 } 2935 1525 } 1526 + 1527 + /* Now to consider new write requests and what else, if anything 1528 + * should be read. We do not handle new writes when: 1529 + * 1/ A 'write' operation (copy+xor) is already in flight. 1530 + * 2/ A 'check' operation is in flight, as it may clobber the parity 1531 + * block. 1532 + */ 1533 + if (s.to_write && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending) && 1534 + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) 1535 + handle_issuing_new_write_requests5(conf, sh, &s, disks); 2936 1536 2937 1537 /* maybe we need to check and possibly fix the parity for this stripe 2938 - * Any reads will already have been scheduled, so we just see if enough data 2939 - * is available 1538 + * Any reads will already have been scheduled, so we just see if enough 1539 + * data is available. The parity check is held off while parity 1540 + * dependent operations are in flight. 2940 1541 */ 2941 - if (syncing && locked == 0 && 2942 - !test_bit(STRIPE_INSYNC, &sh->state)) { 2943 - set_bit(STRIPE_HANDLE, &sh->state); 2944 - if (failed == 0) { 2945 - BUG_ON(uptodate != disks); 2946 - compute_parity5(sh, CHECK_PARITY); 2947 - uptodate--; 2948 - if (page_is_zero(sh->dev[sh->pd_idx].page)) { 2949 - /* parity is correct (on disc, not in buffer any more) */ 2950 - set_bit(STRIPE_INSYNC, &sh->state); 2951 - } else { 2952 - conf->mddev->resync_mismatches += STRIPE_SECTORS; 2953 - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 2954 - /* don't try to repair!! */ 2955 - set_bit(STRIPE_INSYNC, &sh->state); 2956 - else { 2957 - compute_block(sh, sh->pd_idx); 2958 - uptodate++; 2959 - } 2960 - } 2961 - } 2962 - if (!test_bit(STRIPE_INSYNC, &sh->state)) { 2963 - /* either failed parity check, or recovery is happening */ 2964 - if (failed==0) 2965 - failed_num = sh->pd_idx; 2966 - dev = &sh->dev[failed_num]; 2967 - BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); 2968 - BUG_ON(uptodate != disks); 1542 + if ((s.syncing && s.locked == 0 && 1543 + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending) && 1544 + !test_bit(STRIPE_INSYNC, &sh->state)) || 1545 + test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || 1546 + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) 1547 + handle_parity_checks5(conf, sh, &s, disks); 2969 1548 2970 - set_bit(R5_LOCKED, &dev->flags); 2971 - set_bit(R5_Wantwrite, &dev->flags); 2972 - clear_bit(STRIPE_DEGRADED, &sh->state); 2973 - locked++; 2974 - set_bit(STRIPE_INSYNC, &sh->state); 2975 - } 2976 - } 2977 - if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 1549 + if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 2978 1550 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 2979 1551 clear_bit(STRIPE_SYNCING, &sh->state); 2980 1552 } ··· 2807 1729 /* If the failed drive is just a ReadError, then we might need to progress 2808 1730 * the repair/check process 2809 1731 */ 2810 - if (failed == 1 && ! conf->mddev->ro && 2811 - test_bit(R5_ReadError, &sh->dev[failed_num].flags) 2812 - && !test_bit(R5_LOCKED, &sh->dev[failed_num].flags) 2813 - && test_bit(R5_UPTODATE, &sh->dev[failed_num].flags) 1732 + if (s.failed == 1 && !conf->mddev->ro && 1733 + test_bit(R5_ReadError, &sh->dev[s.failed_num].flags) 1734 + && !test_bit(R5_LOCKED, &sh->dev[s.failed_num].flags) 1735 + && test_bit(R5_UPTODATE, &sh->dev[s.failed_num].flags) 2814 1736 ) { 2815 - dev = &sh->dev[failed_num]; 1737 + dev = &sh->dev[s.failed_num]; 2816 1738 if (!test_bit(R5_ReWrite, &dev->flags)) { 2817 1739 set_bit(R5_Wantwrite, &dev->flags); 1740 + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 1741 + sh->ops.count++; 2818 1742 set_bit(R5_ReWrite, &dev->flags); 2819 1743 set_bit(R5_LOCKED, &dev->flags); 2820 - locked++; 1744 + s.locked++; 2821 1745 } else { 2822 1746 /* let's read it back */ 2823 1747 set_bit(R5_Wantread, &dev->flags); 1748 + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 1749 + sh->ops.count++; 2824 1750 set_bit(R5_LOCKED, &dev->flags); 2825 - locked++; 1751 + s.locked++; 2826 1752 } 2827 1753 } 2828 1754 2829 - if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 1755 + /* Finish postxor operations initiated by the expansion 1756 + * process 1757 + */ 1758 + if (test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete) && 1759 + !test_bit(STRIPE_OP_BIODRAIN, &sh->ops.pending)) { 1760 + 1761 + clear_bit(STRIPE_EXPANDING, &sh->state); 1762 + 1763 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.pending); 1764 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.ack); 1765 + clear_bit(STRIPE_OP_POSTXOR, &sh->ops.complete); 1766 + 1767 + for (i = conf->raid_disks; i--; ) { 1768 + set_bit(R5_Wantwrite, &sh->dev[i].flags); 1769 + if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) 1770 + sh->ops.count++; 1771 + } 1772 + } 1773 + 1774 + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state) && 1775 + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { 2830 1776 /* Need to write out all blocks after computing parity */ 2831 1777 sh->disks = conf->raid_disks; 2832 - sh->pd_idx = stripe_to_pdidx(sh->sector, conf, conf->raid_disks); 2833 - compute_parity5(sh, RECONSTRUCT_WRITE); 2834 - for (i= conf->raid_disks; i--;) { 2835 - set_bit(R5_LOCKED, &sh->dev[i].flags); 2836 - locked++; 2837 - set_bit(R5_Wantwrite, &sh->dev[i].flags); 2838 - } 2839 - clear_bit(STRIPE_EXPANDING, &sh->state); 2840 - } else if (expanded) { 1778 + sh->pd_idx = stripe_to_pdidx(sh->sector, conf, 1779 + conf->raid_disks); 1780 + s.locked += handle_write_operations5(sh, 0, 1); 1781 + } else if (s.expanded && 1782 + !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { 2841 1783 clear_bit(STRIPE_EXPAND_READY, &sh->state); 2842 1784 atomic_dec(&conf->reshape_stripes); 2843 1785 wake_up(&conf->wait_for_overlap); 2844 1786 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 2845 1787 } 2846 1788 2847 - if (expanding && locked == 0) { 2848 - /* We have read all the blocks in this stripe and now we need to 2849 - * copy some of them into a target stripe for expand. 2850 - */ 2851 - clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2852 - for (i=0; i< sh->disks; i++) 2853 - if (i != sh->pd_idx) { 2854 - int dd_idx, pd_idx, j; 2855 - struct stripe_head *sh2; 1789 + if (s.expanding && s.locked == 0) 1790 + handle_stripe_expansion(conf, sh, NULL); 2856 1791 2857 - sector_t bn = compute_blocknr(sh, i); 2858 - sector_t s = raid5_compute_sector(bn, conf->raid_disks, 2859 - conf->raid_disks-1, 2860 - &dd_idx, &pd_idx, conf); 2861 - sh2 = get_active_stripe(conf, s, conf->raid_disks, pd_idx, 1); 2862 - if (sh2 == NULL) 2863 - /* so far only the early blocks of this stripe 2864 - * have been requested. When later blocks 2865 - * get requested, we will try again 2866 - */ 2867 - continue; 2868 - if(!test_bit(STRIPE_EXPANDING, &sh2->state) || 2869 - test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { 2870 - /* must have already done this block */ 2871 - release_stripe(sh2); 2872 - continue; 2873 - } 2874 - memcpy(page_address(sh2->dev[dd_idx].page), 2875 - page_address(sh->dev[i].page), 2876 - STRIPE_SIZE); 2877 - set_bit(R5_Expanded, &sh2->dev[dd_idx].flags); 2878 - set_bit(R5_UPTODATE, &sh2->dev[dd_idx].flags); 2879 - for (j=0; j<conf->raid_disks; j++) 2880 - if (j != sh2->pd_idx && 2881 - !test_bit(R5_Expanded, &sh2->dev[j].flags)) 2882 - break; 2883 - if (j == conf->raid_disks) { 2884 - set_bit(STRIPE_EXPAND_READY, &sh2->state); 2885 - set_bit(STRIPE_HANDLE, &sh2->state); 2886 - } 2887 - release_stripe(sh2); 2888 - } 2889 - } 1792 + if (sh->ops.count) 1793 + pending = get_stripe_work(sh); 2890 1794 2891 1795 spin_unlock(&sh->lock); 2892 1796 2893 - while ((bi=return_bi)) { 2894 - int bytes = bi->bi_size; 1797 + if (pending) 1798 + raid5_run_ops(sh, pending); 2895 1799 2896 - return_bi = bi->bi_next; 2897 - bi->bi_next = NULL; 2898 - bi->bi_size = 0; 2899 - bi->bi_end_io(bi, bytes, 2900 - test_bit(BIO_UPTODATE, &bi->bi_flags) 2901 - ? 0 : -EIO); 2902 - } 2903 - for (i=disks; i-- ;) { 2904 - int rw; 2905 - struct bio *bi; 2906 - mdk_rdev_t *rdev; 2907 - if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) 2908 - rw = WRITE; 2909 - else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) 2910 - rw = READ; 2911 - else 2912 - continue; 2913 - 2914 - bi = &sh->dev[i].req; 2915 - 2916 - bi->bi_rw = rw; 2917 - if (rw == WRITE) 2918 - bi->bi_end_io = raid5_end_write_request; 2919 - else 2920 - bi->bi_end_io = raid5_end_read_request; 2921 - 2922 - rcu_read_lock(); 2923 - rdev = rcu_dereference(conf->disks[i].rdev); 2924 - if (rdev && test_bit(Faulty, &rdev->flags)) 2925 - rdev = NULL; 2926 - if (rdev) 2927 - atomic_inc(&rdev->nr_pending); 2928 - rcu_read_unlock(); 2929 - 2930 - if (rdev) { 2931 - if (syncing || expanding || expanded) 2932 - md_sync_acct(rdev->bdev, STRIPE_SECTORS); 1800 + return_io(return_bi); 2933 1801 2934 - bi->bi_bdev = rdev->bdev; 2935 - PRINTK("for %llu schedule op %ld on disc %d\n", 2936 - (unsigned long long)sh->sector, bi->bi_rw, i); 2937 - atomic_inc(&sh->count); 2938 - bi->bi_sector = sh->sector + rdev->data_offset; 2939 - bi->bi_flags = 1 << BIO_UPTODATE; 2940 - bi->bi_vcnt = 1; 2941 - bi->bi_max_vecs = 1; 2942 - bi->bi_idx = 0; 2943 - bi->bi_io_vec = &sh->dev[i].vec; 2944 - bi->bi_io_vec[0].bv_len = STRIPE_SIZE; 2945 - bi->bi_io_vec[0].bv_offset = 0; 2946 - bi->bi_size = STRIPE_SIZE; 2947 - bi->bi_next = NULL; 2948 - if (rw == WRITE && 2949 - test_bit(R5_ReWrite, &sh->dev[i].flags)) 2950 - atomic_add(STRIPE_SECTORS, &rdev->corrected_errors); 2951 - generic_make_request(bi); 2952 - } else { 2953 - if (rw == WRITE) 2954 - set_bit(STRIPE_DEGRADED, &sh->state); 2955 - PRINTK("skip op %ld on disc %d for sector %llu\n", 2956 - bi->bi_rw, i, (unsigned long long)sh->sector); 2957 - clear_bit(R5_LOCKED, &sh->dev[i].flags); 2958 - set_bit(STRIPE_HANDLE, &sh->state); 2959 - } 2960 - } 2961 1802 } 2962 1803 2963 1804 static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) 2964 1805 { 2965 1806 raid6_conf_t *conf = sh->raid_conf; 2966 1807 int disks = sh->disks; 2967 - struct bio *return_bi= NULL; 2968 - struct bio *bi; 2969 - int i; 2970 - int syncing, expanding, expanded; 2971 - int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; 2972 - int non_overwrite = 0; 2973 - int failed_num[2] = {0, 0}; 1808 + struct bio *return_bi = NULL; 1809 + int i, pd_idx = sh->pd_idx; 1810 + struct stripe_head_state s; 1811 + struct r6_state r6s; 2974 1812 struct r5dev *dev, *pdev, *qdev; 2975 - int pd_idx = sh->pd_idx; 2976 - int qd_idx = raid6_next_disk(pd_idx, disks); 2977 - int p_failed, q_failed; 2978 1813 2979 - PRINTK("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d, qd_idx=%d\n", 2980 - (unsigned long long)sh->sector, sh->state, atomic_read(&sh->count), 2981 - pd_idx, qd_idx); 1814 + r6s.qd_idx = raid6_next_disk(pd_idx, disks); 1815 + pr_debug("handling stripe %llu, state=%#lx cnt=%d, " 1816 + "pd_idx=%d, qd_idx=%d\n", 1817 + (unsigned long long)sh->sector, sh->state, 1818 + atomic_read(&sh->count), pd_idx, r6s.qd_idx); 1819 + memset(&s, 0, sizeof(s)); 2982 1820 2983 1821 spin_lock(&sh->lock); 2984 1822 clear_bit(STRIPE_HANDLE, &sh->state); 2985 1823 clear_bit(STRIPE_DELAYED, &sh->state); 2986 1824 2987 - syncing = test_bit(STRIPE_SYNCING, &sh->state); 2988 - expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 2989 - expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 1825 + s.syncing = test_bit(STRIPE_SYNCING, &sh->state); 1826 + s.expanding = test_bit(STRIPE_EXPAND_SOURCE, &sh->state); 1827 + s.expanded = test_bit(STRIPE_EXPAND_READY, &sh->state); 2990 1828 /* Now to look around and see what can be done */ 2991 1829 2992 1830 rcu_read_lock(); ··· 2911 1917 dev = &sh->dev[i]; 2912 1918 clear_bit(R5_Insync, &dev->flags); 2913 1919 2914 - PRINTK("check %d: state 0x%lx read %p write %p written %p\n", 1920 + pr_debug("check %d: state 0x%lx read %p write %p written %p\n", 2915 1921 i, dev->flags, dev->toread, dev->towrite, dev->written); 2916 1922 /* maybe we can reply to a read */ 2917 1923 if (test_bit(R5_UPTODATE, &dev->flags) && dev->toread) { 2918 1924 struct bio *rbi, *rbi2; 2919 - PRINTK("Return read for disc %d\n", i); 1925 + pr_debug("Return read for disc %d\n", i); 2920 1926 spin_lock_irq(&conf->device_lock); 2921 1927 rbi = dev->toread; 2922 1928 dev->toread = NULL; ··· 2937 1943 } 2938 1944 2939 1945 /* now count some things */ 2940 - if (test_bit(R5_LOCKED, &dev->flags)) locked++; 2941 - if (test_bit(R5_UPTODATE, &dev->flags)) uptodate++; 1946 + if (test_bit(R5_LOCKED, &dev->flags)) s.locked++; 1947 + if (test_bit(R5_UPTODATE, &dev->flags)) s.uptodate++; 2942 1948 2943 1949 2944 - if (dev->toread) to_read++; 1950 + if (dev->toread) 1951 + s.to_read++; 2945 1952 if (dev->towrite) { 2946 - to_write++; 1953 + s.to_write++; 2947 1954 if (!test_bit(R5_OVERWRITE, &dev->flags)) 2948 - non_overwrite++; 1955 + s.non_overwrite++; 2949 1956 } 2950 - if (dev->written) written++; 1957 + if (dev->written) 1958 + s.written++; 2951 1959 rdev = rcu_dereference(conf->disks[i].rdev); 2952 1960 if (!rdev || !test_bit(In_sync, &rdev->flags)) { 2953 1961 /* The ReadError flag will just be confusing now */ ··· 2958 1962 } 2959 1963 if (!rdev || !test_bit(In_sync, &rdev->flags) 2960 1964 || test_bit(R5_ReadError, &dev->flags)) { 2961 - if ( failed < 2 ) 2962 - failed_num[failed] = i; 2963 - failed++; 1965 + if (s.failed < 2) 1966 + r6s.failed_num[s.failed] = i; 1967 + s.failed++; 2964 1968 } else 2965 1969 set_bit(R5_Insync, &dev->flags); 2966 1970 } 2967 1971 rcu_read_unlock(); 2968 - PRINTK("locked=%d uptodate=%d to_read=%d" 1972 + pr_debug("locked=%d uptodate=%d to_read=%d" 2969 1973 " to_write=%d failed=%d failed_num=%d,%d\n", 2970 - locked, uptodate, to_read, to_write, failed, 2971 - failed_num[0], failed_num[1]); 2972 - /* check if the array has lost >2 devices and, if so, some requests might 2973 - * need to be failed 1974 + s.locked, s.uptodate, s.to_read, s.to_write, s.failed, 1975 + r6s.failed_num[0], r6s.failed_num[1]); 1976 + /* check if the array has lost >2 devices and, if so, some requests 1977 + * might need to be failed 2974 1978 */ 2975 - if (failed > 2 && to_read+to_write+written) { 2976 - for (i=disks; i--; ) { 2977 - int bitmap_end = 0; 2978 - 2979 - if (test_bit(R5_ReadError, &sh->dev[i].flags)) { 2980 - mdk_rdev_t *rdev; 2981 - rcu_read_lock(); 2982 - rdev = rcu_dereference(conf->disks[i].rdev); 2983 - if (rdev && test_bit(In_sync, &rdev->flags)) 2984 - /* multiple read failures in one stripe */ 2985 - md_error(conf->mddev, rdev); 2986 - rcu_read_unlock(); 2987 - } 2988 - 2989 - spin_lock_irq(&conf->device_lock); 2990 - /* fail all writes first */ 2991 - bi = sh->dev[i].towrite; 2992 - sh->dev[i].towrite = NULL; 2993 - if (bi) { to_write--; bitmap_end = 1; } 2994 - 2995 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 2996 - wake_up(&conf->wait_for_overlap); 2997 - 2998 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ 2999 - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 3000 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 3001 - if (--bi->bi_phys_segments == 0) { 3002 - md_write_end(conf->mddev); 3003 - bi->bi_next = return_bi; 3004 - return_bi = bi; 3005 - } 3006 - bi = nextbi; 3007 - } 3008 - /* and fail all 'written' */ 3009 - bi = sh->dev[i].written; 3010 - sh->dev[i].written = NULL; 3011 - if (bi) bitmap_end = 1; 3012 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS) { 3013 - struct bio *bi2 = r5_next_bio(bi, sh->dev[i].sector); 3014 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 3015 - if (--bi->bi_phys_segments == 0) { 3016 - md_write_end(conf->mddev); 3017 - bi->bi_next = return_bi; 3018 - return_bi = bi; 3019 - } 3020 - bi = bi2; 3021 - } 3022 - 3023 - /* fail any reads if this device is non-operational */ 3024 - if (!test_bit(R5_Insync, &sh->dev[i].flags) || 3025 - test_bit(R5_ReadError, &sh->dev[i].flags)) { 3026 - bi = sh->dev[i].toread; 3027 - sh->dev[i].toread = NULL; 3028 - if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) 3029 - wake_up(&conf->wait_for_overlap); 3030 - if (bi) to_read--; 3031 - while (bi && bi->bi_sector < sh->dev[i].sector + STRIPE_SECTORS){ 3032 - struct bio *nextbi = r5_next_bio(bi, sh->dev[i].sector); 3033 - clear_bit(BIO_UPTODATE, &bi->bi_flags); 3034 - if (--bi->bi_phys_segments == 0) { 3035 - bi->bi_next = return_bi; 3036 - return_bi = bi; 3037 - } 3038 - bi = nextbi; 3039 - } 3040 - } 3041 - spin_unlock_irq(&conf->device_lock); 3042 - if (bitmap_end) 3043 - bitmap_endwrite(conf->mddev->bitmap, sh->sector, 3044 - STRIPE_SECTORS, 0, 0); 3045 - } 3046 - } 3047 - if (failed > 2 && syncing) { 1979 + if (s.failed > 2 && s.to_read+s.to_write+s.written) 1980 + handle_requests_to_failed_array(conf, sh, &s, disks, 1981 + &return_bi); 1982 + if (s.failed > 2 && s.syncing) { 3048 1983 md_done_sync(conf->mddev, STRIPE_SECTORS,0); 3049 1984 clear_bit(STRIPE_SYNCING, &sh->state); 3050 - syncing = 0; 1985 + s.syncing = 0; 3051 1986 } 3052 1987 3053 1988 /* ··· 2986 2059 * are safe, or on a failed drive 2987 2060 */ 2988 2061 pdev = &sh->dev[pd_idx]; 2989 - p_failed = (failed >= 1 && failed_num[0] == pd_idx) 2990 - || (failed >= 2 && failed_num[1] == pd_idx); 2991 - qdev = &sh->dev[qd_idx]; 2992 - q_failed = (failed >= 1 && failed_num[0] == qd_idx) 2993 - || (failed >= 2 && failed_num[1] == qd_idx); 2062 + r6s.p_failed = (s.failed >= 1 && r6s.failed_num[0] == pd_idx) 2063 + || (s.failed >= 2 && r6s.failed_num[1] == pd_idx); 2064 + qdev = &sh->dev[r6s.qd_idx]; 2065 + r6s.q_failed = (s.failed >= 1 && r6s.failed_num[0] == r6s.qd_idx) 2066 + || (s.failed >= 2 && r6s.failed_num[1] == r6s.qd_idx); 2994 2067 2995 - if ( written && 2996 - ( p_failed || ((test_bit(R5_Insync, &pdev->flags) 2068 + if ( s.written && 2069 + ( r6s.p_failed || ((test_bit(R5_Insync, &pdev->flags) 2997 2070 && !test_bit(R5_LOCKED, &pdev->flags) 2998 - && test_bit(R5_UPTODATE, &pdev->flags))) ) && 2999 - ( q_failed || ((test_bit(R5_Insync, &qdev->flags) 2071 + && test_bit(R5_UPTODATE, &pdev->flags)))) && 2072 + ( r6s.q_failed || ((test_bit(R5_Insync, &qdev->flags) 3000 2073 && !test_bit(R5_LOCKED, &qdev->flags) 3001 - && test_bit(R5_UPTODATE, &qdev->flags))) ) ) { 3002 - /* any written block on an uptodate or failed drive can be 3003 - * returned. Note that if we 'wrote' to a failed drive, 3004 - * it will be UPTODATE, but never LOCKED, so we don't need 3005 - * to test 'failed' directly. 3006 - */ 3007 - for (i=disks; i--; ) 3008 - if (sh->dev[i].written) { 3009 - dev = &sh->dev[i]; 3010 - if (!test_bit(R5_LOCKED, &dev->flags) && 3011 - test_bit(R5_UPTODATE, &dev->flags) ) { 3012 - /* We can return any write requests */ 3013 - int bitmap_end = 0; 3014 - struct bio *wbi, *wbi2; 3015 - PRINTK("Return write for stripe %llu disc %d\n", 3016 - (unsigned long long)sh->sector, i); 3017 - spin_lock_irq(&conf->device_lock); 3018 - wbi = dev->written; 3019 - dev->written = NULL; 3020 - while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { 3021 - wbi2 = r5_next_bio(wbi, dev->sector); 3022 - if (--wbi->bi_phys_segments == 0) { 3023 - md_write_end(conf->mddev); 3024 - wbi->bi_next = return_bi; 3025 - return_bi = wbi; 3026 - } 3027 - wbi = wbi2; 3028 - } 3029 - if (dev->towrite == NULL) 3030 - bitmap_end = 1; 3031 - spin_unlock_irq(&conf->device_lock); 3032 - if (bitmap_end) 3033 - bitmap_endwrite(conf->mddev->bitmap, sh->sector, 3034 - STRIPE_SECTORS, 3035 - !test_bit(STRIPE_DEGRADED, &sh->state), 0); 3036 - } 3037 - } 3038 - } 2074 + && test_bit(R5_UPTODATE, &qdev->flags))))) 2075 + handle_completed_write_requests(conf, sh, disks, &return_bi); 3039 2076 3040 2077 /* Now we might consider reading some blocks, either to check/generate 3041 2078 * parity, or to satisfy requests 3042 2079 * or to load a block that is being partially written. 3043 2080 */ 3044 - if (to_read || non_overwrite || (to_write && failed) || 3045 - (syncing && (uptodate < disks)) || expanding) { 3046 - for (i=disks; i--;) { 3047 - dev = &sh->dev[i]; 3048 - if (!test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 3049 - (dev->toread || 3050 - (dev->towrite && !test_bit(R5_OVERWRITE, &dev->flags)) || 3051 - syncing || 3052 - expanding || 3053 - (failed >= 1 && (sh->dev[failed_num[0]].toread || to_write)) || 3054 - (failed >= 2 && (sh->dev[failed_num[1]].toread || to_write)) 3055 - ) 3056 - ) { 3057 - /* we would like to get this block, possibly 3058 - * by computing it, but we might not be able to 3059 - */ 3060 - if (uptodate == disks-1) { 3061 - PRINTK("Computing stripe %llu block %d\n", 3062 - (unsigned long long)sh->sector, i); 3063 - compute_block_1(sh, i, 0); 3064 - uptodate++; 3065 - } else if ( uptodate == disks-2 && failed >= 2 ) { 3066 - /* Computing 2-failure is *very* expensive; only do it if failed >= 2 */ 3067 - int other; 3068 - for (other=disks; other--;) { 3069 - if ( other == i ) 3070 - continue; 3071 - if ( !test_bit(R5_UPTODATE, &sh->dev[other].flags) ) 3072 - break; 3073 - } 3074 - BUG_ON(other < 0); 3075 - PRINTK("Computing stripe %llu blocks %d,%d\n", 3076 - (unsigned long long)sh->sector, i, other); 3077 - compute_block_2(sh, i, other); 3078 - uptodate += 2; 3079 - } else if (test_bit(R5_Insync, &dev->flags)) { 3080 - set_bit(R5_LOCKED, &dev->flags); 3081 - set_bit(R5_Wantread, &dev->flags); 3082 - locked++; 3083 - PRINTK("Reading block %d (sync=%d)\n", 3084 - i, syncing); 3085 - } 3086 - } 3087 - } 3088 - set_bit(STRIPE_HANDLE, &sh->state); 3089 - } 2081 + if (s.to_read || s.non_overwrite || (s.to_write && s.failed) || 2082 + (s.syncing && (s.uptodate < disks)) || s.expanding) 2083 + handle_issuing_new_read_requests6(sh, &s, &r6s, disks); 3090 2084 3091 2085 /* now to consider writing and what else, if anything should be read */ 3092 - if (to_write) { 3093 - int rcw=0, must_compute=0; 3094 - for (i=disks ; i--;) { 3095 - dev = &sh->dev[i]; 3096 - /* Would I have to read this buffer for reconstruct_write */ 3097 - if (!test_bit(R5_OVERWRITE, &dev->flags) 3098 - && i != pd_idx && i != qd_idx 3099 - && (!test_bit(R5_LOCKED, &dev->flags) 3100 - ) && 3101 - !test_bit(R5_UPTODATE, &dev->flags)) { 3102 - if (test_bit(R5_Insync, &dev->flags)) rcw++; 3103 - else { 3104 - PRINTK("raid6: must_compute: disk %d flags=%#lx\n", i, dev->flags); 3105 - must_compute++; 3106 - } 3107 - } 3108 - } 3109 - PRINTK("for sector %llu, rcw=%d, must_compute=%d\n", 3110 - (unsigned long long)sh->sector, rcw, must_compute); 3111 - set_bit(STRIPE_HANDLE, &sh->state); 3112 - 3113 - if (rcw > 0) 3114 - /* want reconstruct write, but need to get some data */ 3115 - for (i=disks; i--;) { 3116 - dev = &sh->dev[i]; 3117 - if (!test_bit(R5_OVERWRITE, &dev->flags) 3118 - && !(failed == 0 && (i == pd_idx || i == qd_idx)) 3119 - && !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) && 3120 - test_bit(R5_Insync, &dev->flags)) { 3121 - if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) 3122 - { 3123 - PRINTK("Read_old stripe %llu block %d for Reconstruct\n", 3124 - (unsigned long long)sh->sector, i); 3125 - set_bit(R5_LOCKED, &dev->flags); 3126 - set_bit(R5_Wantread, &dev->flags); 3127 - locked++; 3128 - } else { 3129 - PRINTK("Request delayed stripe %llu block %d for Reconstruct\n", 3130 - (unsigned long long)sh->sector, i); 3131 - set_bit(STRIPE_DELAYED, &sh->state); 3132 - set_bit(STRIPE_HANDLE, &sh->state); 3133 - } 3134 - } 3135 - } 3136 - /* now if nothing is locked, and if we have enough data, we can start a write request */ 3137 - if (locked == 0 && rcw == 0 && 3138 - !test_bit(STRIPE_BIT_DELAY, &sh->state)) { 3139 - if ( must_compute > 0 ) { 3140 - /* We have failed blocks and need to compute them */ 3141 - switch ( failed ) { 3142 - case 0: BUG(); 3143 - case 1: compute_block_1(sh, failed_num[0], 0); break; 3144 - case 2: compute_block_2(sh, failed_num[0], failed_num[1]); break; 3145 - default: BUG(); /* This request should have been failed? */ 3146 - } 3147 - } 3148 - 3149 - PRINTK("Computing parity for stripe %llu\n", (unsigned long long)sh->sector); 3150 - compute_parity6(sh, RECONSTRUCT_WRITE); 3151 - /* now every locked buffer is ready to be written */ 3152 - for (i=disks; i--;) 3153 - if (test_bit(R5_LOCKED, &sh->dev[i].flags)) { 3154 - PRINTK("Writing stripe %llu block %d\n", 3155 - (unsigned long long)sh->sector, i); 3156 - locked++; 3157 - set_bit(R5_Wantwrite, &sh->dev[i].flags); 3158 - } 3159 - /* after a RECONSTRUCT_WRITE, the stripe MUST be in-sync */ 3160 - set_bit(STRIPE_INSYNC, &sh->state); 3161 - 3162 - if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) { 3163 - atomic_dec(&conf->preread_active_stripes); 3164 - if (atomic_read(&conf->preread_active_stripes) < IO_THRESHOLD) 3165 - md_wakeup_thread(conf->mddev->thread); 3166 - } 3167 - } 3168 - } 2086 + if (s.to_write) 2087 + handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks); 3169 2088 3170 2089 /* maybe we need to check and possibly fix the parity for this stripe 3171 - * Any reads will already have been scheduled, so we just see if enough data 3172 - * is available 2090 + * Any reads will already have been scheduled, so we just see if enough 2091 + * data is available 3173 2092 */ 3174 - if (syncing && locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) { 3175 - int update_p = 0, update_q = 0; 3176 - struct r5dev *dev; 2093 + if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) 2094 + handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); 3177 2095 3178 - set_bit(STRIPE_HANDLE, &sh->state); 3179 - 3180 - BUG_ON(failed>2); 3181 - BUG_ON(uptodate < disks); 3182 - /* Want to check and possibly repair P and Q. 3183 - * However there could be one 'failed' device, in which 3184 - * case we can only check one of them, possibly using the 3185 - * other to generate missing data 3186 - */ 3187 - 3188 - /* If !tmp_page, we cannot do the calculations, 3189 - * but as we have set STRIPE_HANDLE, we will soon be called 3190 - * by stripe_handle with a tmp_page - just wait until then. 3191 - */ 3192 - if (tmp_page) { 3193 - if (failed == q_failed) { 3194 - /* The only possible failed device holds 'Q', so it makes 3195 - * sense to check P (If anything else were failed, we would 3196 - * have used P to recreate it). 3197 - */ 3198 - compute_block_1(sh, pd_idx, 1); 3199 - if (!page_is_zero(sh->dev[pd_idx].page)) { 3200 - compute_block_1(sh,pd_idx,0); 3201 - update_p = 1; 3202 - } 3203 - } 3204 - if (!q_failed && failed < 2) { 3205 - /* q is not failed, and we didn't use it to generate 3206 - * anything, so it makes sense to check it 3207 - */ 3208 - memcpy(page_address(tmp_page), 3209 - page_address(sh->dev[qd_idx].page), 3210 - STRIPE_SIZE); 3211 - compute_parity6(sh, UPDATE_PARITY); 3212 - if (memcmp(page_address(tmp_page), 3213 - page_address(sh->dev[qd_idx].page), 3214 - STRIPE_SIZE)!= 0) { 3215 - clear_bit(STRIPE_INSYNC, &sh->state); 3216 - update_q = 1; 3217 - } 3218 - } 3219 - if (update_p || update_q) { 3220 - conf->mddev->resync_mismatches += STRIPE_SECTORS; 3221 - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) 3222 - /* don't try to repair!! */ 3223 - update_p = update_q = 0; 3224 - } 3225 - 3226 - /* now write out any block on a failed drive, 3227 - * or P or Q if they need it 3228 - */ 3229 - 3230 - if (failed == 2) { 3231 - dev = &sh->dev[failed_num[1]]; 3232 - locked++; 3233 - set_bit(R5_LOCKED, &dev->flags); 3234 - set_bit(R5_Wantwrite, &dev->flags); 3235 - } 3236 - if (failed >= 1) { 3237 - dev = &sh->dev[failed_num[0]]; 3238 - locked++; 3239 - set_bit(R5_LOCKED, &dev->flags); 3240 - set_bit(R5_Wantwrite, &dev->flags); 3241 - } 3242 - 3243 - if (update_p) { 3244 - dev = &sh->dev[pd_idx]; 3245 - locked ++; 3246 - set_bit(R5_LOCKED, &dev->flags); 3247 - set_bit(R5_Wantwrite, &dev->flags); 3248 - } 3249 - if (update_q) { 3250 - dev = &sh->dev[qd_idx]; 3251 - locked++; 3252 - set_bit(R5_LOCKED, &dev->flags); 3253 - set_bit(R5_Wantwrite, &dev->flags); 3254 - } 3255 - clear_bit(STRIPE_DEGRADED, &sh->state); 3256 - 3257 - set_bit(STRIPE_INSYNC, &sh->state); 3258 - } 3259 - } 3260 - 3261 - if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 2096 + if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { 3262 2097 md_done_sync(conf->mddev, STRIPE_SECTORS,1); 3263 2098 clear_bit(STRIPE_SYNCING, &sh->state); 3264 2099 } ··· 3028 2339 /* If the failed drives are just a ReadError, then we might need 3029 2340 * to progress the repair/check process 3030 2341 */ 3031 - if (failed <= 2 && ! conf->mddev->ro) 3032 - for (i=0; i<failed;i++) { 3033 - dev = &sh->dev[failed_num[i]]; 2342 + if (s.failed <= 2 && !conf->mddev->ro) 2343 + for (i = 0; i < s.failed; i++) { 2344 + dev = &sh->dev[r6s.failed_num[i]]; 3034 2345 if (test_bit(R5_ReadError, &dev->flags) 3035 2346 && !test_bit(R5_LOCKED, &dev->flags) 3036 2347 && test_bit(R5_UPTODATE, &dev->flags) ··· 3047 2358 } 3048 2359 } 3049 2360 3050 - if (expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 2361 + if (s.expanded && test_bit(STRIPE_EXPANDING, &sh->state)) { 3051 2362 /* Need to write out all blocks after computing P&Q */ 3052 2363 sh->disks = conf->raid_disks; 3053 2364 sh->pd_idx = stripe_to_pdidx(sh->sector, conf, ··· 3055 2366 compute_parity6(sh, RECONSTRUCT_WRITE); 3056 2367 for (i = conf->raid_disks ; i-- ; ) { 3057 2368 set_bit(R5_LOCKED, &sh->dev[i].flags); 3058 - locked++; 2369 + s.locked++; 3059 2370 set_bit(R5_Wantwrite, &sh->dev[i].flags); 3060 2371 } 3061 2372 clear_bit(STRIPE_EXPANDING, &sh->state); 3062 - } else if (expanded) { 2373 + } else if (s.expanded) { 3063 2374 clear_bit(STRIPE_EXPAND_READY, &sh->state); 3064 2375 atomic_dec(&conf->reshape_stripes); 3065 2376 wake_up(&conf->wait_for_overlap); 3066 2377 md_done_sync(conf->mddev, STRIPE_SECTORS, 1); 3067 2378 } 3068 2379 3069 - if (expanding && locked == 0) { 3070 - /* We have read all the blocks in this stripe and now we need to 3071 - * copy some of them into a target stripe for expand. 3072 - */ 3073 - clear_bit(STRIPE_EXPAND_SOURCE, &sh->state); 3074 - for (i = 0; i < sh->disks ; i++) 3075 - if (i != pd_idx && i != qd_idx) { 3076 - int dd_idx2, pd_idx2, j; 3077 - struct stripe_head *sh2; 3078 - 3079 - sector_t bn = compute_blocknr(sh, i); 3080 - sector_t s = raid5_compute_sector( 3081 - bn, conf->raid_disks, 3082 - conf->raid_disks - conf->max_degraded, 3083 - &dd_idx2, &pd_idx2, conf); 3084 - sh2 = get_active_stripe(conf, s, 3085 - conf->raid_disks, 3086 - pd_idx2, 1); 3087 - if (sh2 == NULL) 3088 - /* so for only the early blocks of 3089 - * this stripe have been requests. 3090 - * When later blocks get requests, we 3091 - * will try again 3092 - */ 3093 - continue; 3094 - if (!test_bit(STRIPE_EXPANDING, &sh2->state) || 3095 - test_bit(R5_Expanded, 3096 - &sh2->dev[dd_idx2].flags)) { 3097 - /* must have already done this block */ 3098 - release_stripe(sh2); 3099 - continue; 3100 - } 3101 - memcpy(page_address(sh2->dev[dd_idx2].page), 3102 - page_address(sh->dev[i].page), 3103 - STRIPE_SIZE); 3104 - set_bit(R5_Expanded, &sh2->dev[dd_idx2].flags); 3105 - set_bit(R5_UPTODATE, &sh2->dev[dd_idx2].flags); 3106 - for (j = 0 ; j < conf->raid_disks ; j++) 3107 - if (j != sh2->pd_idx && 3108 - j != raid6_next_disk(sh2->pd_idx, 3109 - sh2->disks) && 3110 - !test_bit(R5_Expanded, 3111 - &sh2->dev[j].flags)) 3112 - break; 3113 - if (j == conf->raid_disks) { 3114 - set_bit(STRIPE_EXPAND_READY, 3115 - &sh2->state); 3116 - set_bit(STRIPE_HANDLE, &sh2->state); 3117 - } 3118 - release_stripe(sh2); 3119 - } 3120 - } 2380 + if (s.expanding && s.locked == 0) 2381 + handle_stripe_expansion(conf, sh, &r6s); 3121 2382 3122 2383 spin_unlock(&sh->lock); 3123 2384 3124 - while ((bi=return_bi)) { 3125 - int bytes = bi->bi_size; 2385 + return_io(return_bi); 3126 2386 3127 - return_bi = bi->bi_next; 3128 - bi->bi_next = NULL; 3129 - bi->bi_size = 0; 3130 - bi->bi_end_io(bi, bytes, 3131 - test_bit(BIO_UPTODATE, &bi->bi_flags) 3132 - ? 0 : -EIO); 3133 - } 3134 2387 for (i=disks; i-- ;) { 3135 2388 int rw; 3136 2389 struct bio *bi; ··· 3101 2470 rcu_read_unlock(); 3102 2471 3103 2472 if (rdev) { 3104 - if (syncing || expanding || expanded) 2473 + if (s.syncing || s.expanding || s.expanded) 3105 2474 md_sync_acct(rdev->bdev, STRIPE_SECTORS); 3106 2475 3107 2476 bi->bi_bdev = rdev->bdev; 3108 - PRINTK("for %llu schedule op %ld on disc %d\n", 2477 + pr_debug("for %llu schedule op %ld on disc %d\n", 3109 2478 (unsigned long long)sh->sector, bi->bi_rw, i); 3110 2479 atomic_inc(&sh->count); 3111 2480 bi->bi_sector = sh->sector + rdev->data_offset; ··· 3125 2494 } else { 3126 2495 if (rw == WRITE) 3127 2496 set_bit(STRIPE_DEGRADED, &sh->state); 3128 - PRINTK("skip op %ld on disc %d for sector %llu\n", 2497 + pr_debug("skip op %ld on disc %d for sector %llu\n", 3129 2498 bi->bi_rw, i, (unsigned long long)sh->sector); 3130 2499 clear_bit(R5_LOCKED, &sh->dev[i].flags); 3131 2500 set_bit(STRIPE_HANDLE, &sh->state); ··· 3369 2738 } 3370 2739 3371 2740 3372 - PRINTK("raid5_align_endio : io error...handing IO for a retry\n"); 2741 + pr_debug("raid5_align_endio : io error...handing IO for a retry\n"); 3373 2742 3374 2743 add_bio_to_retry(raid_bi, conf); 3375 2744 return 0; ··· 3407 2776 mdk_rdev_t *rdev; 3408 2777 3409 2778 if (!in_chunk_boundary(mddev, raid_bio)) { 3410 - PRINTK("chunk_aligned_read : non aligned\n"); 2779 + pr_debug("chunk_aligned_read : non aligned\n"); 3411 2780 return 0; 3412 2781 } 3413 2782 /* ··· 3531 2900 3532 2901 new_sector = raid5_compute_sector(logical_sector, disks, data_disks, 3533 2902 &dd_idx, &pd_idx, conf); 3534 - PRINTK("raid5: make_request, sector %llu logical %llu\n", 2903 + pr_debug("raid5: make_request, sector %llu logical %llu\n", 3535 2904 (unsigned long long)new_sector, 3536 2905 (unsigned long long)logical_sector); 3537 2906 ··· 3904 3273 raid5_conf_t *conf = mddev_to_conf(mddev); 3905 3274 int handled; 3906 3275 3907 - PRINTK("+++ raid5d active\n"); 3276 + pr_debug("+++ raid5d active\n"); 3908 3277 3909 3278 md_check_recovery(mddev); 3910 3279 ··· 3939 3308 handled++; 3940 3309 } 3941 3310 3942 - if (list_empty(&conf->handle_list)) 3311 + if (list_empty(&conf->handle_list)) { 3312 + async_tx_issue_pending_all(); 3943 3313 break; 3314 + } 3944 3315 3945 3316 first = conf->handle_list.next; 3946 3317 sh = list_entry(first, struct stripe_head, lru); ··· 3958 3325 3959 3326 spin_lock_irq(&conf->device_lock); 3960 3327 } 3961 - PRINTK("%d stripes handled\n", handled); 3328 + pr_debug("%d stripes handled\n", handled); 3962 3329 3963 3330 spin_unlock_irq(&conf->device_lock); 3964 3331 3965 3332 unplug_slaves(mddev); 3966 3333 3967 - PRINTK("--- raid5d inactive\n"); 3334 + pr_debug("--- raid5d inactive\n"); 3968 3335 } 3969 3336 3970 3337 static ssize_t ··· 4140 3507 atomic_set(&conf->preread_active_stripes, 0); 4141 3508 atomic_set(&conf->active_aligned_reads, 0); 4142 3509 4143 - PRINTK("raid5: run(%s) called.\n", mdname(mddev)); 3510 + pr_debug("raid5: run(%s) called.\n", mdname(mddev)); 4144 3511 4145 3512 ITERATE_RDEV(mddev,rdev,tmp) { 4146 3513 raid_disk = rdev->raid_disk; ··· 4323 3690 return 0; 4324 3691 } 4325 3692 4326 - #if RAID5_DEBUG 3693 + #ifdef DEBUG 4327 3694 static void print_sh (struct seq_file *seq, struct stripe_head *sh) 4328 3695 { 4329 3696 int i; ··· 4370 3737 conf->disks[i].rdev && 4371 3738 test_bit(In_sync, &conf->disks[i].rdev->flags) ? "U" : "_"); 4372 3739 seq_printf (seq, "]"); 4373 - #if RAID5_DEBUG 3740 + #ifdef DEBUG 4374 3741 seq_printf (seq, "\n"); 4375 3742 printall(seq, conf); 4376 3743 #endif

+28 -27

drivers/md/xor.c crypto/xor.c

··· 26 26 static struct xor_block_template *active_template; 27 27 28 28 void 29 - xor_block(unsigned int count, unsigned int bytes, void **ptr) 29 + xor_blocks(unsigned int src_count, unsigned int bytes, void *dest, void **srcs) 30 30 { 31 - unsigned long *p0, *p1, *p2, *p3, *p4; 31 + unsigned long *p1, *p2, *p3, *p4; 32 32 33 - p0 = (unsigned long *) ptr[0]; 34 - p1 = (unsigned long *) ptr[1]; 35 - if (count == 2) { 36 - active_template->do_2(bytes, p0, p1); 33 + p1 = (unsigned long *) srcs[0]; 34 + if (src_count == 1) { 35 + active_template->do_2(bytes, dest, p1); 37 36 return; 38 37 } 39 38 40 - p2 = (unsigned long *) ptr[2]; 41 - if (count == 3) { 42 - active_template->do_3(bytes, p0, p1, p2); 39 + p2 = (unsigned long *) srcs[1]; 40 + if (src_count == 2) { 41 + active_template->do_3(bytes, dest, p1, p2); 43 42 return; 44 43 } 45 44 46 - p3 = (unsigned long *) ptr[3]; 47 - if (count == 4) { 48 - active_template->do_4(bytes, p0, p1, p2, p3); 45 + p3 = (unsigned long *) srcs[2]; 46 + if (src_count == 3) { 47 + active_template->do_4(bytes, dest, p1, p2, p3); 49 48 return; 50 49 } 51 50 52 - p4 = (unsigned long *) ptr[4]; 53 - active_template->do_5(bytes, p0, p1, p2, p3, p4); 51 + p4 = (unsigned long *) srcs[3]; 52 + active_template->do_5(bytes, dest, p1, p2, p3, p4); 54 53 } 54 + EXPORT_SYMBOL(xor_blocks); 55 55 56 56 /* Set of all registered templates. */ 57 57 static struct xor_block_template *template_list; ··· 78 78 now = jiffies; 79 79 count = 0; 80 80 while (jiffies == now) { 81 - mb(); 81 + mb(); /* prevent loop optimzation */ 82 82 tmpl->do_2(BENCH_SIZE, b1, b2); 83 83 mb(); 84 84 count++; ··· 91 91 speed = max * (HZ * BENCH_SIZE / 1024); 92 92 tmpl->speed = speed; 93 93 94 - printk(" %-10s: %5d.%03d MB/sec\n", tmpl->name, 94 + printk(KERN_INFO " %-10s: %5d.%03d MB/sec\n", tmpl->name, 95 95 speed / 1000, speed % 1000); 96 96 } 97 97 98 - static int 99 - calibrate_xor_block(void) 98 + static int __init 99 + calibrate_xor_blocks(void) 100 100 { 101 101 void *b1, *b2; 102 102 struct xor_block_template *f, *fastest; 103 103 104 104 b1 = (void *) __get_free_pages(GFP_KERNEL, 2); 105 - if (! b1) { 106 - printk("raid5: Yikes! No memory available.\n"); 105 + if (!b1) { 106 + printk(KERN_WARNING "xor: Yikes! No memory available.\n"); 107 107 return -ENOMEM; 108 108 } 109 109 b2 = b1 + 2*PAGE_SIZE + BENCH_SIZE; 110 110 111 111 /* 112 - * If this arch/cpu has a short-circuited selection, don't loop through all 113 - * the possible functions, just test the best one 112 + * If this arch/cpu has a short-circuited selection, don't loop through 113 + * all the possible functions, just test the best one 114 114 */ 115 115 116 116 fastest = NULL; ··· 122 122 #define xor_speed(templ) do_xor_speed((templ), b1, b2) 123 123 124 124 if (fastest) { 125 - printk(KERN_INFO "raid5: automatically using best checksumming function: %s\n", 125 + printk(KERN_INFO "xor: automatically using best " 126 + "checksumming function: %s\n", 126 127 fastest->name); 127 128 xor_speed(fastest); 128 129 } else { 129 - printk(KERN_INFO "raid5: measuring checksumming speed\n"); 130 + printk(KERN_INFO "xor: measuring software checksum speed\n"); 130 131 XOR_TRY_TEMPLATES; 131 132 fastest = template_list; 132 133 for (f = fastest; f; f = f->next) ··· 135 134 fastest = f; 136 135 } 137 136 138 - printk("raid5: using function: %s (%d.%03d MB/sec)\n", 137 + printk(KERN_INFO "xor: using function: %s (%d.%03d MB/sec)\n", 139 138 fastest->name, fastest->speed / 1000, fastest->speed % 1000); 140 139 141 140 #undef xor_speed ··· 148 147 149 148 static __exit void xor_exit(void) { } 150 149 151 - EXPORT_SYMBOL(xor_block); 152 150 MODULE_LICENSE("GPL"); 153 151 154 - module_init(calibrate_xor_block); 152 + /* when built-in xor.o must initialize before drivers/md/md.o */ 153 + core_initcall(calibrate_xor_blocks); 155 154 module_exit(xor_exit);

+544

include/asm-arm/arch-iop13xx/adma.h

··· 1 + /* 2 + * Copyright(c) 2006, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License along with 14 + * this program; if not, write to the Free Software Foundation, Inc., 15 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 16 + * 17 + */ 18 + #ifndef _ADMA_H 19 + #define _ADMA_H 20 + #include <linux/types.h> 21 + #include <linux/io.h> 22 + #include <asm/hardware.h> 23 + #include <asm/hardware/iop_adma.h> 24 + 25 + #define ADMA_ACCR(chan) (chan->mmr_base + 0x0) 26 + #define ADMA_ACSR(chan) (chan->mmr_base + 0x4) 27 + #define ADMA_ADAR(chan) (chan->mmr_base + 0x8) 28 + #define ADMA_IIPCR(chan) (chan->mmr_base + 0x18) 29 + #define ADMA_IIPAR(chan) (chan->mmr_base + 0x1c) 30 + #define ADMA_IIPUAR(chan) (chan->mmr_base + 0x20) 31 + #define ADMA_ANDAR(chan) (chan->mmr_base + 0x24) 32 + #define ADMA_ADCR(chan) (chan->mmr_base + 0x28) 33 + #define ADMA_CARMD(chan) (chan->mmr_base + 0x2c) 34 + #define ADMA_ABCR(chan) (chan->mmr_base + 0x30) 35 + #define ADMA_DLADR(chan) (chan->mmr_base + 0x34) 36 + #define ADMA_DUADR(chan) (chan->mmr_base + 0x38) 37 + #define ADMA_SLAR(src, chan) (chan->mmr_base + (0x3c + (src << 3))) 38 + #define ADMA_SUAR(src, chan) (chan->mmr_base + (0x40 + (src << 3))) 39 + 40 + struct iop13xx_adma_src { 41 + u32 src_addr; 42 + union { 43 + u32 upper_src_addr; 44 + struct { 45 + unsigned int pq_upper_src_addr:24; 46 + unsigned int pq_dmlt:8; 47 + }; 48 + }; 49 + }; 50 + 51 + struct iop13xx_adma_desc_ctrl { 52 + unsigned int int_en:1; 53 + unsigned int xfer_dir:2; 54 + unsigned int src_select:4; 55 + unsigned int zero_result:1; 56 + unsigned int block_fill_en:1; 57 + unsigned int crc_gen_en:1; 58 + unsigned int crc_xfer_dis:1; 59 + unsigned int crc_seed_fetch_dis:1; 60 + unsigned int status_write_back_en:1; 61 + unsigned int endian_swap_en:1; 62 + unsigned int reserved0:2; 63 + unsigned int pq_update_xfer_en:1; 64 + unsigned int dual_xor_en:1; 65 + unsigned int pq_xfer_en:1; 66 + unsigned int p_xfer_dis:1; 67 + unsigned int reserved1:10; 68 + unsigned int relax_order_en:1; 69 + unsigned int no_snoop_en:1; 70 + }; 71 + 72 + struct iop13xx_adma_byte_count { 73 + unsigned int byte_count:24; 74 + unsigned int host_if:3; 75 + unsigned int reserved:2; 76 + unsigned int zero_result_err_q:1; 77 + unsigned int zero_result_err:1; 78 + unsigned int tx_complete:1; 79 + }; 80 + 81 + struct iop13xx_adma_desc_hw { 82 + u32 next_desc; 83 + union { 84 + u32 desc_ctrl; 85 + struct iop13xx_adma_desc_ctrl desc_ctrl_field; 86 + }; 87 + union { 88 + u32 crc_addr; 89 + u32 block_fill_data; 90 + u32 q_dest_addr; 91 + }; 92 + union { 93 + u32 byte_count; 94 + struct iop13xx_adma_byte_count byte_count_field; 95 + }; 96 + union { 97 + u32 dest_addr; 98 + u32 p_dest_addr; 99 + }; 100 + union { 101 + u32 upper_dest_addr; 102 + u32 pq_upper_dest_addr; 103 + }; 104 + struct iop13xx_adma_src src[1]; 105 + }; 106 + 107 + struct iop13xx_adma_desc_dual_xor { 108 + u32 next_desc; 109 + u32 desc_ctrl; 110 + u32 reserved; 111 + u32 byte_count; 112 + u32 h_dest_addr; 113 + u32 h_upper_dest_addr; 114 + u32 src0_addr; 115 + u32 upper_src0_addr; 116 + u32 src1_addr; 117 + u32 upper_src1_addr; 118 + u32 h_src_addr; 119 + u32 h_upper_src_addr; 120 + u32 d_src_addr; 121 + u32 d_upper_src_addr; 122 + u32 d_dest_addr; 123 + u32 d_upper_dest_addr; 124 + }; 125 + 126 + struct iop13xx_adma_desc_pq_update { 127 + u32 next_desc; 128 + u32 desc_ctrl; 129 + u32 reserved; 130 + u32 byte_count; 131 + u32 p_dest_addr; 132 + u32 p_upper_dest_addr; 133 + u32 src0_addr; 134 + u32 upper_src0_addr; 135 + u32 src1_addr; 136 + u32 upper_src1_addr; 137 + u32 p_src_addr; 138 + u32 p_upper_src_addr; 139 + u32 q_src_addr; 140 + struct { 141 + unsigned int q_upper_src_addr:24; 142 + unsigned int q_dmlt:8; 143 + }; 144 + u32 q_dest_addr; 145 + u32 q_upper_dest_addr; 146 + }; 147 + 148 + static inline int iop_adma_get_max_xor(void) 149 + { 150 + return 16; 151 + } 152 + 153 + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 154 + { 155 + return __raw_readl(ADMA_ADAR(chan)); 156 + } 157 + 158 + static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, 159 + u32 next_desc_addr) 160 + { 161 + __raw_writel(next_desc_addr, ADMA_ANDAR(chan)); 162 + } 163 + 164 + #define ADMA_STATUS_BUSY (1 << 13) 165 + 166 + static inline char iop_chan_is_busy(struct iop_adma_chan *chan) 167 + { 168 + if (__raw_readl(ADMA_ACSR(chan)) & 169 + ADMA_STATUS_BUSY) 170 + return 1; 171 + else 172 + return 0; 173 + } 174 + 175 + static inline int 176 + iop_chan_get_desc_align(struct iop_adma_chan *chan, int num_slots) 177 + { 178 + return 1; 179 + } 180 + #define iop_desc_is_aligned(x, y) 1 181 + 182 + static inline int 183 + iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) 184 + { 185 + *slots_per_op = 1; 186 + return 1; 187 + } 188 + 189 + #define iop_chan_interrupt_slot_count(s, c) iop_chan_memcpy_slot_count(0, s) 190 + 191 + static inline int 192 + iop_chan_memset_slot_count(size_t len, int *slots_per_op) 193 + { 194 + *slots_per_op = 1; 195 + return 1; 196 + } 197 + 198 + static inline int 199 + iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) 200 + { 201 + int num_slots; 202 + /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1 203 + * (1 source => 8 bytes) (1 slot => 32 bytes) 204 + */ 205 + num_slots = 1 + (((src_cnt - 1) << 3) >> 5); 206 + if (((src_cnt - 1) << 3) & 0x1f) 207 + num_slots++; 208 + 209 + *slots_per_op = num_slots; 210 + 211 + return num_slots; 212 + } 213 + 214 + #define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) 215 + #define IOP_ADMA_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 216 + #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 217 + #define IOP_ADMA_XOR_MAX_BYTE_COUNT ADMA_MAX_BYTE_COUNT 218 + #define iop_chan_zero_sum_slot_count(l, s, o) iop_chan_xor_slot_count(l, s, o) 219 + 220 + static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 221 + struct iop_adma_chan *chan) 222 + { 223 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 224 + return hw_desc->dest_addr; 225 + } 226 + 227 + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 228 + struct iop_adma_chan *chan) 229 + { 230 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 231 + return hw_desc->byte_count_field.byte_count; 232 + } 233 + 234 + static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, 235 + struct iop_adma_chan *chan, 236 + int src_idx) 237 + { 238 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 239 + return hw_desc->src[src_idx].src_addr; 240 + } 241 + 242 + static inline u32 iop_desc_get_src_count(struct iop_adma_desc_slot *desc, 243 + struct iop_adma_chan *chan) 244 + { 245 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 246 + return hw_desc->desc_ctrl_field.src_select + 1; 247 + } 248 + 249 + static inline void 250 + iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) 251 + { 252 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 253 + union { 254 + u32 value; 255 + struct iop13xx_adma_desc_ctrl field; 256 + } u_desc_ctrl; 257 + 258 + u_desc_ctrl.value = 0; 259 + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ 260 + u_desc_ctrl.field.int_en = int_en; 261 + hw_desc->desc_ctrl = u_desc_ctrl.value; 262 + hw_desc->crc_addr = 0; 263 + } 264 + 265 + static inline void 266 + iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) 267 + { 268 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 269 + union { 270 + u32 value; 271 + struct iop13xx_adma_desc_ctrl field; 272 + } u_desc_ctrl; 273 + 274 + u_desc_ctrl.value = 0; 275 + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ 276 + u_desc_ctrl.field.block_fill_en = 1; 277 + u_desc_ctrl.field.int_en = int_en; 278 + hw_desc->desc_ctrl = u_desc_ctrl.value; 279 + hw_desc->crc_addr = 0; 280 + } 281 + 282 + /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ 283 + static inline void 284 + iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) 285 + { 286 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 287 + union { 288 + u32 value; 289 + struct iop13xx_adma_desc_ctrl field; 290 + } u_desc_ctrl; 291 + 292 + u_desc_ctrl.value = 0; 293 + u_desc_ctrl.field.src_select = src_cnt - 1; 294 + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ 295 + u_desc_ctrl.field.int_en = int_en; 296 + hw_desc->desc_ctrl = u_desc_ctrl.value; 297 + hw_desc->crc_addr = 0; 298 + 299 + } 300 + #define iop_desc_init_null_xor(d, s, i) iop_desc_init_xor(d, s, i) 301 + 302 + /* to do: support buffers larger than ADMA_MAX_BYTE_COUNT */ 303 + static inline int 304 + iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) 305 + { 306 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 307 + union { 308 + u32 value; 309 + struct iop13xx_adma_desc_ctrl field; 310 + } u_desc_ctrl; 311 + 312 + u_desc_ctrl.value = 0; 313 + u_desc_ctrl.field.src_select = src_cnt - 1; 314 + u_desc_ctrl.field.xfer_dir = 3; /* local to internal bus */ 315 + u_desc_ctrl.field.zero_result = 1; 316 + u_desc_ctrl.field.status_write_back_en = 1; 317 + u_desc_ctrl.field.int_en = int_en; 318 + hw_desc->desc_ctrl = u_desc_ctrl.value; 319 + hw_desc->crc_addr = 0; 320 + 321 + return 1; 322 + } 323 + 324 + static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, 325 + struct iop_adma_chan *chan, 326 + u32 byte_count) 327 + { 328 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 329 + hw_desc->byte_count = byte_count; 330 + } 331 + 332 + static inline void 333 + iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) 334 + { 335 + int slots_per_op = desc->slots_per_op; 336 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; 337 + int i = 0; 338 + 339 + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { 340 + hw_desc->byte_count = len; 341 + } else { 342 + do { 343 + iter = iop_hw_desc_slot_idx(hw_desc, i); 344 + iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 345 + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 346 + i += slots_per_op; 347 + } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); 348 + 349 + if (len) { 350 + iter = iop_hw_desc_slot_idx(hw_desc, i); 351 + iter->byte_count = len; 352 + } 353 + } 354 + } 355 + 356 + 357 + static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, 358 + struct iop_adma_chan *chan, 359 + dma_addr_t addr) 360 + { 361 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 362 + hw_desc->dest_addr = addr; 363 + hw_desc->upper_dest_addr = 0; 364 + } 365 + 366 + static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, 367 + dma_addr_t addr) 368 + { 369 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 370 + hw_desc->src[0].src_addr = addr; 371 + hw_desc->src[0].upper_src_addr = 0; 372 + } 373 + 374 + static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, 375 + int src_idx, dma_addr_t addr) 376 + { 377 + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; 378 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc, *iter; 379 + int i = 0; 380 + 381 + do { 382 + iter = iop_hw_desc_slot_idx(hw_desc, i); 383 + iter->src[src_idx].src_addr = addr; 384 + iter->src[src_idx].upper_src_addr = 0; 385 + slot_cnt -= slots_per_op; 386 + if (slot_cnt) { 387 + i += slots_per_op; 388 + addr += IOP_ADMA_XOR_MAX_BYTE_COUNT; 389 + } 390 + } while (slot_cnt); 391 + } 392 + 393 + static inline void 394 + iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, 395 + struct iop_adma_chan *chan) 396 + { 397 + iop_desc_init_memcpy(desc, 1); 398 + iop_desc_set_byte_count(desc, chan, 0); 399 + iop_desc_set_dest_addr(desc, chan, 0); 400 + iop_desc_set_memcpy_src_addr(desc, 0); 401 + } 402 + 403 + #define iop_desc_set_zero_sum_src_addr iop_desc_set_xor_src_addr 404 + 405 + static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, 406 + u32 next_desc_addr) 407 + { 408 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 409 + BUG_ON(hw_desc->next_desc); 410 + hw_desc->next_desc = next_desc_addr; 411 + } 412 + 413 + static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) 414 + { 415 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 416 + return hw_desc->next_desc; 417 + } 418 + 419 + static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) 420 + { 421 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 422 + hw_desc->next_desc = 0; 423 + } 424 + 425 + static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, 426 + u32 val) 427 + { 428 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 429 + hw_desc->block_fill_data = val; 430 + } 431 + 432 + static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 433 + { 434 + struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; 435 + struct iop13xx_adma_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 436 + struct iop13xx_adma_byte_count byte_count = hw_desc->byte_count_field; 437 + 438 + BUG_ON(!(byte_count.tx_complete && desc_ctrl.zero_result)); 439 + 440 + if (desc_ctrl.pq_xfer_en) 441 + return byte_count.zero_result_err_q; 442 + else 443 + return byte_count.zero_result_err; 444 + } 445 + 446 + static inline void iop_chan_append(struct iop_adma_chan *chan) 447 + { 448 + u32 adma_accr; 449 + 450 + adma_accr = __raw_readl(ADMA_ACCR(chan)); 451 + adma_accr |= 0x2; 452 + __raw_writel(adma_accr, ADMA_ACCR(chan)); 453 + } 454 + 455 + static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) 456 + { 457 + do { } while (0); 458 + } 459 + 460 + static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) 461 + { 462 + return __raw_readl(ADMA_ACSR(chan)); 463 + } 464 + 465 + static inline void iop_chan_disable(struct iop_adma_chan *chan) 466 + { 467 + u32 adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); 468 + adma_chan_ctrl &= ~0x1; 469 + __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); 470 + } 471 + 472 + static inline void iop_chan_enable(struct iop_adma_chan *chan) 473 + { 474 + u32 adma_chan_ctrl; 475 + 476 + adma_chan_ctrl = __raw_readl(ADMA_ACCR(chan)); 477 + adma_chan_ctrl |= 0x1; 478 + __raw_writel(adma_chan_ctrl, ADMA_ACCR(chan)); 479 + } 480 + 481 + static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) 482 + { 483 + u32 status = __raw_readl(ADMA_ACSR(chan)); 484 + status &= (1 << 12); 485 + __raw_writel(status, ADMA_ACSR(chan)); 486 + } 487 + 488 + static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) 489 + { 490 + u32 status = __raw_readl(ADMA_ACSR(chan)); 491 + status &= (1 << 11); 492 + __raw_writel(status, ADMA_ACSR(chan)); 493 + } 494 + 495 + static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) 496 + { 497 + u32 status = __raw_readl(ADMA_ACSR(chan)); 498 + status &= (1 << 9) | (1 << 5) | (1 << 4) | (1 << 3); 499 + __raw_writel(status, ADMA_ACSR(chan)); 500 + } 501 + 502 + static inline int 503 + iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) 504 + { 505 + return test_bit(9, &status); 506 + } 507 + 508 + static inline int 509 + iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) 510 + { 511 + return test_bit(5, &status); 512 + } 513 + 514 + static inline int 515 + iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) 516 + { 517 + return test_bit(4, &status); 518 + } 519 + 520 + static inline int 521 + iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) 522 + { 523 + return test_bit(3, &status); 524 + } 525 + 526 + static inline int 527 + iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) 528 + { 529 + return 0; 530 + } 531 + 532 + static inline int 533 + iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) 534 + { 535 + return 0; 536 + } 537 + 538 + static inline int 539 + iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) 540 + { 541 + return 0; 542 + } 543 + 544 + #endif /* _ADMA_H */

+16 -22

include/asm-arm/arch-iop13xx/iop13xx.h

··· 166 166 #define IOP13XX_INIT_I2C_1 (1 << 1) 167 167 #define IOP13XX_INIT_I2C_2 (1 << 2) 168 168 169 - #define IQ81340_NUM_UART 2 170 - #define IQ81340_NUM_I2C 3 171 - #define IQ81340_NUM_PHYS_MAP_FLASH 1 172 - #define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART +\ 173 - IQ81340_NUM_I2C +\ 174 - IQ81340_NUM_PHYS_MAP_FLASH) 169 + /* ADMA selection flags */ 170 + /* INIT_ADMA_DEFAULT = Rely on CONFIG_IOP13XX_ADMA* */ 171 + #define IOP13XX_INIT_ADMA_DEFAULT (0) 172 + #define IOP13XX_INIT_ADMA_0 (1 << 0) 173 + #define IOP13XX_INIT_ADMA_1 (1 << 1) 174 + #define IOP13XX_INIT_ADMA_2 (1 << 2) 175 + 176 + /* Platform devices */ 177 + #define IQ81340_NUM_UART 2 178 + #define IQ81340_NUM_I2C 3 179 + #define IQ81340_NUM_PHYS_MAP_FLASH 1 180 + #define IQ81340_NUM_ADMA 3 181 + #define IQ81340_MAX_PLAT_DEVICES (IQ81340_NUM_UART + \ 182 + IQ81340_NUM_I2C + \ 183 + IQ81340_NUM_PHYS_MAP_FLASH + \ 184 + IQ81340_NUM_ADMA) 175 185 176 186 /*========================== PMMR offsets for key registers ============*/ 177 187 #define IOP13XX_ATU0_PMMR_OFFSET 0x00048000 ··· 454 444 /*==============================ADMA UNITS===============================*/ 455 445 #define IOP13XX_ADMA_PHYS_BASE(chan) IOP13XX_REG_ADDR32_PHYS((chan << 9)) 456 446 #define IOP13XX_ADMA_UPPER_PA(chan) (IOP13XX_ADMA_PHYS_BASE(chan) + 0xc0) 457 - #define IOP13XX_ADMA_OFFSET(chan, ofs) IOP13XX_REG_ADDR32((chan << 9) + (ofs)) 458 - 459 - #define IOP13XX_ADMA_ACCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x0) 460 - #define IOP13XX_ADMA_ACSR(chan) IOP13XX_ADMA_OFFSET(chan, 0x4) 461 - #define IOP13XX_ADMA_ADAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x8) 462 - #define IOP13XX_ADMA_IIPCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x18) 463 - #define IOP13XX_ADMA_IIPAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x1c) 464 - #define IOP13XX_ADMA_IIPUAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x20) 465 - #define IOP13XX_ADMA_ANDAR(chan) IOP13XX_ADMA_OFFSET(chan, 0x24) 466 - #define IOP13XX_ADMA_ADCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x28) 467 - #define IOP13XX_ADMA_CARMD(chan) IOP13XX_ADMA_OFFSET(chan, 0x2c) 468 - #define IOP13XX_ADMA_ABCR(chan) IOP13XX_ADMA_OFFSET(chan, 0x30) 469 - #define IOP13XX_ADMA_DLADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x34) 470 - #define IOP13XX_ADMA_DUADR(chan) IOP13XX_ADMA_OFFSET(chan, 0x38) 471 - #define IOP13XX_ADMA_SLAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x3c + (src <<3)) 472 - #define IOP13XX_ADMA_SUAR(src, chan) IOP13XX_ADMA_OFFSET(chan, 0x40 + (src <<3)) 473 447 474 448 /*==============================XSI BRIDGE===============================*/ 475 449 #define IOP13XX_XBG_BECSR IOP13XX_REG_ADDR32(0x178c)

+5

include/asm-arm/arch-iop32x/adma.h

··· 1 + #ifndef IOP32X_ADMA_H 2 + #define IOP32X_ADMA_H 3 + #include <asm/hardware/iop3xx-adma.h> 4 + #endif 5 +

+5

include/asm-arm/arch-iop33x/adma.h

··· 1 + #ifndef IOP33X_ADMA_H 2 + #define IOP33X_ADMA_H 3 + #include <asm/hardware/iop3xx-adma.h> 4 + #endif 5 +

+892

include/asm-arm/hardware/iop3xx-adma.h

··· 1 + /* 2 + * Copyright © 2006, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License along with 14 + * this program; if not, write to the Free Software Foundation, Inc., 15 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 16 + * 17 + */ 18 + #ifndef _ADMA_H 19 + #define _ADMA_H 20 + #include <linux/types.h> 21 + #include <linux/io.h> 22 + #include <asm/hardware.h> 23 + #include <asm/hardware/iop_adma.h> 24 + 25 + /* Memory copy units */ 26 + #define DMA_CCR(chan) (chan->mmr_base + 0x0) 27 + #define DMA_CSR(chan) (chan->mmr_base + 0x4) 28 + #define DMA_DAR(chan) (chan->mmr_base + 0xc) 29 + #define DMA_NDAR(chan) (chan->mmr_base + 0x10) 30 + #define DMA_PADR(chan) (chan->mmr_base + 0x14) 31 + #define DMA_PUADR(chan) (chan->mmr_base + 0x18) 32 + #define DMA_LADR(chan) (chan->mmr_base + 0x1c) 33 + #define DMA_BCR(chan) (chan->mmr_base + 0x20) 34 + #define DMA_DCR(chan) (chan->mmr_base + 0x24) 35 + 36 + /* Application accelerator unit */ 37 + #define AAU_ACR(chan) (chan->mmr_base + 0x0) 38 + #define AAU_ASR(chan) (chan->mmr_base + 0x4) 39 + #define AAU_ADAR(chan) (chan->mmr_base + 0x8) 40 + #define AAU_ANDAR(chan) (chan->mmr_base + 0xc) 41 + #define AAU_SAR(src, chan) (chan->mmr_base + (0x10 + ((src) << 2))) 42 + #define AAU_DAR(chan) (chan->mmr_base + 0x20) 43 + #define AAU_ABCR(chan) (chan->mmr_base + 0x24) 44 + #define AAU_ADCR(chan) (chan->mmr_base + 0x28) 45 + #define AAU_SAR_EDCR(src_edc) (chan->mmr_base + (0x02c + ((src_edc-4) << 2))) 46 + #define AAU_EDCR0_IDX 8 47 + #define AAU_EDCR1_IDX 17 48 + #define AAU_EDCR2_IDX 26 49 + 50 + #define DMA0_ID 0 51 + #define DMA1_ID 1 52 + #define AAU_ID 2 53 + 54 + struct iop3xx_aau_desc_ctrl { 55 + unsigned int int_en:1; 56 + unsigned int blk1_cmd_ctrl:3; 57 + unsigned int blk2_cmd_ctrl:3; 58 + unsigned int blk3_cmd_ctrl:3; 59 + unsigned int blk4_cmd_ctrl:3; 60 + unsigned int blk5_cmd_ctrl:3; 61 + unsigned int blk6_cmd_ctrl:3; 62 + unsigned int blk7_cmd_ctrl:3; 63 + unsigned int blk8_cmd_ctrl:3; 64 + unsigned int blk_ctrl:2; 65 + unsigned int dual_xor_en:1; 66 + unsigned int tx_complete:1; 67 + unsigned int zero_result_err:1; 68 + unsigned int zero_result_en:1; 69 + unsigned int dest_write_en:1; 70 + }; 71 + 72 + struct iop3xx_aau_e_desc_ctrl { 73 + unsigned int reserved:1; 74 + unsigned int blk1_cmd_ctrl:3; 75 + unsigned int blk2_cmd_ctrl:3; 76 + unsigned int blk3_cmd_ctrl:3; 77 + unsigned int blk4_cmd_ctrl:3; 78 + unsigned int blk5_cmd_ctrl:3; 79 + unsigned int blk6_cmd_ctrl:3; 80 + unsigned int blk7_cmd_ctrl:3; 81 + unsigned int blk8_cmd_ctrl:3; 82 + unsigned int reserved2:7; 83 + }; 84 + 85 + struct iop3xx_dma_desc_ctrl { 86 + unsigned int pci_transaction:4; 87 + unsigned int int_en:1; 88 + unsigned int dac_cycle_en:1; 89 + unsigned int mem_to_mem_en:1; 90 + unsigned int crc_data_tx_en:1; 91 + unsigned int crc_gen_en:1; 92 + unsigned int crc_seed_dis:1; 93 + unsigned int reserved:21; 94 + unsigned int crc_tx_complete:1; 95 + }; 96 + 97 + struct iop3xx_desc_dma { 98 + u32 next_desc; 99 + union { 100 + u32 pci_src_addr; 101 + u32 pci_dest_addr; 102 + u32 src_addr; 103 + }; 104 + union { 105 + u32 upper_pci_src_addr; 106 + u32 upper_pci_dest_addr; 107 + }; 108 + union { 109 + u32 local_pci_src_addr; 110 + u32 local_pci_dest_addr; 111 + u32 dest_addr; 112 + }; 113 + u32 byte_count; 114 + union { 115 + u32 desc_ctrl; 116 + struct iop3xx_dma_desc_ctrl desc_ctrl_field; 117 + }; 118 + u32 crc_addr; 119 + }; 120 + 121 + struct iop3xx_desc_aau { 122 + u32 next_desc; 123 + u32 src[4]; 124 + u32 dest_addr; 125 + u32 byte_count; 126 + union { 127 + u32 desc_ctrl; 128 + struct iop3xx_aau_desc_ctrl desc_ctrl_field; 129 + }; 130 + union { 131 + u32 src_addr; 132 + u32 e_desc_ctrl; 133 + struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; 134 + } src_edc[31]; 135 + }; 136 + 137 + struct iop3xx_aau_gfmr { 138 + unsigned int gfmr1:8; 139 + unsigned int gfmr2:8; 140 + unsigned int gfmr3:8; 141 + unsigned int gfmr4:8; 142 + }; 143 + 144 + struct iop3xx_desc_pq_xor { 145 + u32 next_desc; 146 + u32 src[3]; 147 + union { 148 + u32 data_mult1; 149 + struct iop3xx_aau_gfmr data_mult1_field; 150 + }; 151 + u32 dest_addr; 152 + u32 byte_count; 153 + union { 154 + u32 desc_ctrl; 155 + struct iop3xx_aau_desc_ctrl desc_ctrl_field; 156 + }; 157 + union { 158 + u32 src_addr; 159 + u32 e_desc_ctrl; 160 + struct iop3xx_aau_e_desc_ctrl e_desc_ctrl_field; 161 + u32 data_multiplier; 162 + struct iop3xx_aau_gfmr data_mult_field; 163 + u32 reserved; 164 + } src_edc_gfmr[19]; 165 + }; 166 + 167 + struct iop3xx_desc_dual_xor { 168 + u32 next_desc; 169 + u32 src0_addr; 170 + u32 src1_addr; 171 + u32 h_src_addr; 172 + u32 d_src_addr; 173 + u32 h_dest_addr; 174 + u32 byte_count; 175 + union { 176 + u32 desc_ctrl; 177 + struct iop3xx_aau_desc_ctrl desc_ctrl_field; 178 + }; 179 + u32 d_dest_addr; 180 + }; 181 + 182 + union iop3xx_desc { 183 + struct iop3xx_desc_aau *aau; 184 + struct iop3xx_desc_dma *dma; 185 + struct iop3xx_desc_pq_xor *pq_xor; 186 + struct iop3xx_desc_dual_xor *dual_xor; 187 + void *ptr; 188 + }; 189 + 190 + static inline int iop_adma_get_max_xor(void) 191 + { 192 + return 32; 193 + } 194 + 195 + static inline u32 iop_chan_get_current_descriptor(struct iop_adma_chan *chan) 196 + { 197 + int id = chan->device->id; 198 + 199 + switch (id) { 200 + case DMA0_ID: 201 + case DMA1_ID: 202 + return __raw_readl(DMA_DAR(chan)); 203 + case AAU_ID: 204 + return __raw_readl(AAU_ADAR(chan)); 205 + default: 206 + BUG(); 207 + } 208 + return 0; 209 + } 210 + 211 + static inline void iop_chan_set_next_descriptor(struct iop_adma_chan *chan, 212 + u32 next_desc_addr) 213 + { 214 + int id = chan->device->id; 215 + 216 + switch (id) { 217 + case DMA0_ID: 218 + case DMA1_ID: 219 + __raw_writel(next_desc_addr, DMA_NDAR(chan)); 220 + break; 221 + case AAU_ID: 222 + __raw_writel(next_desc_addr, AAU_ANDAR(chan)); 223 + break; 224 + } 225 + 226 + } 227 + 228 + #define IOP_ADMA_STATUS_BUSY (1 << 10) 229 + #define IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT (1024) 230 + #define IOP_ADMA_XOR_MAX_BYTE_COUNT (16 * 1024 * 1024) 231 + #define IOP_ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) 232 + 233 + static inline int iop_chan_is_busy(struct iop_adma_chan *chan) 234 + { 235 + u32 status = __raw_readl(DMA_CSR(chan)); 236 + return (status & IOP_ADMA_STATUS_BUSY) ? 1 : 0; 237 + } 238 + 239 + static inline int iop_desc_is_aligned(struct iop_adma_desc_slot *desc, 240 + int num_slots) 241 + { 242 + /* num_slots will only ever be 1, 2, 4, or 8 */ 243 + return (desc->idx & (num_slots - 1)) ? 0 : 1; 244 + } 245 + 246 + /* to do: support large (i.e. > hw max) buffer sizes */ 247 + static inline int iop_chan_memcpy_slot_count(size_t len, int *slots_per_op) 248 + { 249 + *slots_per_op = 1; 250 + return 1; 251 + } 252 + 253 + /* to do: support large (i.e. > hw max) buffer sizes */ 254 + static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) 255 + { 256 + *slots_per_op = 1; 257 + return 1; 258 + } 259 + 260 + static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, 261 + int *slots_per_op) 262 + { 263 + const static int slot_count_table[] = { 0, 264 + 1, 1, 1, 1, /* 01 - 04 */ 265 + 2, 2, 2, 2, /* 05 - 08 */ 266 + 4, 4, 4, 4, /* 09 - 12 */ 267 + 4, 4, 4, 4, /* 13 - 16 */ 268 + 8, 8, 8, 8, /* 17 - 20 */ 269 + 8, 8, 8, 8, /* 21 - 24 */ 270 + 8, 8, 8, 8, /* 25 - 28 */ 271 + 8, 8, 8, 8, /* 29 - 32 */ 272 + }; 273 + *slots_per_op = slot_count_table[src_cnt]; 274 + return *slots_per_op; 275 + } 276 + 277 + static inline int 278 + iop_chan_interrupt_slot_count(int *slots_per_op, struct iop_adma_chan *chan) 279 + { 280 + switch (chan->device->id) { 281 + case DMA0_ID: 282 + case DMA1_ID: 283 + return iop_chan_memcpy_slot_count(0, slots_per_op); 284 + case AAU_ID: 285 + return iop3xx_aau_xor_slot_count(0, 2, slots_per_op); 286 + default: 287 + BUG(); 288 + } 289 + return 0; 290 + } 291 + 292 + static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, 293 + int *slots_per_op) 294 + { 295 + int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); 296 + 297 + if (len <= IOP_ADMA_XOR_MAX_BYTE_COUNT) 298 + return slot_cnt; 299 + 300 + len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; 301 + while (len > IOP_ADMA_XOR_MAX_BYTE_COUNT) { 302 + len -= IOP_ADMA_XOR_MAX_BYTE_COUNT; 303 + slot_cnt += *slots_per_op; 304 + } 305 + 306 + if (len) 307 + slot_cnt += *slots_per_op; 308 + 309 + return slot_cnt; 310 + } 311 + 312 + /* zero sum on iop3xx is limited to 1k at a time so it requires multiple 313 + * descriptors 314 + */ 315 + static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, 316 + int *slots_per_op) 317 + { 318 + int slot_cnt = iop3xx_aau_xor_slot_count(len, src_cnt, slots_per_op); 319 + 320 + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) 321 + return slot_cnt; 322 + 323 + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 324 + while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { 325 + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 326 + slot_cnt += *slots_per_op; 327 + } 328 + 329 + if (len) 330 + slot_cnt += *slots_per_op; 331 + 332 + return slot_cnt; 333 + } 334 + 335 + static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, 336 + struct iop_adma_chan *chan) 337 + { 338 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 339 + 340 + switch (chan->device->id) { 341 + case DMA0_ID: 342 + case DMA1_ID: 343 + return hw_desc.dma->dest_addr; 344 + case AAU_ID: 345 + return hw_desc.aau->dest_addr; 346 + default: 347 + BUG(); 348 + } 349 + return 0; 350 + } 351 + 352 + static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, 353 + struct iop_adma_chan *chan) 354 + { 355 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 356 + 357 + switch (chan->device->id) { 358 + case DMA0_ID: 359 + case DMA1_ID: 360 + return hw_desc.dma->byte_count; 361 + case AAU_ID: 362 + return hw_desc.aau->byte_count; 363 + default: 364 + BUG(); 365 + } 366 + return 0; 367 + } 368 + 369 + /* translate the src_idx to a descriptor word index */ 370 + static inline int __desc_idx(int src_idx) 371 + { 372 + const static int desc_idx_table[] = { 0, 0, 0, 0, 373 + 0, 1, 2, 3, 374 + 5, 6, 7, 8, 375 + 9, 10, 11, 12, 376 + 14, 15, 16, 17, 377 + 18, 19, 20, 21, 378 + 23, 24, 25, 26, 379 + 27, 28, 29, 30, 380 + }; 381 + 382 + return desc_idx_table[src_idx]; 383 + } 384 + 385 + static inline u32 iop_desc_get_src_addr(struct iop_adma_desc_slot *desc, 386 + struct iop_adma_chan *chan, 387 + int src_idx) 388 + { 389 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 390 + 391 + switch (chan->device->id) { 392 + case DMA0_ID: 393 + case DMA1_ID: 394 + return hw_desc.dma->src_addr; 395 + case AAU_ID: 396 + break; 397 + default: 398 + BUG(); 399 + } 400 + 401 + if (src_idx < 4) 402 + return hw_desc.aau->src[src_idx]; 403 + else 404 + return hw_desc.aau->src_edc[__desc_idx(src_idx)].src_addr; 405 + } 406 + 407 + static inline void iop3xx_aau_desc_set_src_addr(struct iop3xx_desc_aau *hw_desc, 408 + int src_idx, dma_addr_t addr) 409 + { 410 + if (src_idx < 4) 411 + hw_desc->src[src_idx] = addr; 412 + else 413 + hw_desc->src_edc[__desc_idx(src_idx)].src_addr = addr; 414 + } 415 + 416 + static inline void 417 + iop_desc_init_memcpy(struct iop_adma_desc_slot *desc, int int_en) 418 + { 419 + struct iop3xx_desc_dma *hw_desc = desc->hw_desc; 420 + union { 421 + u32 value; 422 + struct iop3xx_dma_desc_ctrl field; 423 + } u_desc_ctrl; 424 + 425 + u_desc_ctrl.value = 0; 426 + u_desc_ctrl.field.mem_to_mem_en = 1; 427 + u_desc_ctrl.field.pci_transaction = 0xe; /* memory read block */ 428 + u_desc_ctrl.field.int_en = int_en; 429 + hw_desc->desc_ctrl = u_desc_ctrl.value; 430 + hw_desc->upper_pci_src_addr = 0; 431 + hw_desc->crc_addr = 0; 432 + } 433 + 434 + static inline void 435 + iop_desc_init_memset(struct iop_adma_desc_slot *desc, int int_en) 436 + { 437 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 438 + union { 439 + u32 value; 440 + struct iop3xx_aau_desc_ctrl field; 441 + } u_desc_ctrl; 442 + 443 + u_desc_ctrl.value = 0; 444 + u_desc_ctrl.field.blk1_cmd_ctrl = 0x2; /* memory block fill */ 445 + u_desc_ctrl.field.dest_write_en = 1; 446 + u_desc_ctrl.field.int_en = int_en; 447 + hw_desc->desc_ctrl = u_desc_ctrl.value; 448 + } 449 + 450 + static inline u32 451 + iop3xx_desc_init_xor(struct iop3xx_desc_aau *hw_desc, int src_cnt, int int_en) 452 + { 453 + int i, shift; 454 + u32 edcr; 455 + union { 456 + u32 value; 457 + struct iop3xx_aau_desc_ctrl field; 458 + } u_desc_ctrl; 459 + 460 + u_desc_ctrl.value = 0; 461 + switch (src_cnt) { 462 + case 25 ... 32: 463 + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ 464 + edcr = 0; 465 + shift = 1; 466 + for (i = 24; i < src_cnt; i++) { 467 + edcr |= (1 << shift); 468 + shift += 3; 469 + } 470 + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = edcr; 471 + src_cnt = 24; 472 + /* fall through */ 473 + case 17 ... 24: 474 + if (!u_desc_ctrl.field.blk_ctrl) { 475 + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; 476 + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ 477 + } 478 + edcr = 0; 479 + shift = 1; 480 + for (i = 16; i < src_cnt; i++) { 481 + edcr |= (1 << shift); 482 + shift += 3; 483 + } 484 + hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = edcr; 485 + src_cnt = 16; 486 + /* fall through */ 487 + case 9 ... 16: 488 + if (!u_desc_ctrl.field.blk_ctrl) 489 + u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ 490 + edcr = 0; 491 + shift = 1; 492 + for (i = 8; i < src_cnt; i++) { 493 + edcr |= (1 << shift); 494 + shift += 3; 495 + } 496 + hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = edcr; 497 + src_cnt = 8; 498 + /* fall through */ 499 + case 2 ... 8: 500 + shift = 1; 501 + for (i = 0; i < src_cnt; i++) { 502 + u_desc_ctrl.value |= (1 << shift); 503 + shift += 3; 504 + } 505 + 506 + if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) 507 + u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ 508 + } 509 + 510 + u_desc_ctrl.field.dest_write_en = 1; 511 + u_desc_ctrl.field.blk1_cmd_ctrl = 0x7; /* direct fill */ 512 + u_desc_ctrl.field.int_en = int_en; 513 + hw_desc->desc_ctrl = u_desc_ctrl.value; 514 + 515 + return u_desc_ctrl.value; 516 + } 517 + 518 + static inline void 519 + iop_desc_init_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) 520 + { 521 + iop3xx_desc_init_xor(desc->hw_desc, src_cnt, int_en); 522 + } 523 + 524 + /* return the number of operations */ 525 + static inline int 526 + iop_desc_init_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) 527 + { 528 + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; 529 + struct iop3xx_desc_aau *hw_desc, *prev_hw_desc, *iter; 530 + union { 531 + u32 value; 532 + struct iop3xx_aau_desc_ctrl field; 533 + } u_desc_ctrl; 534 + int i, j; 535 + 536 + hw_desc = desc->hw_desc; 537 + 538 + for (i = 0, j = 0; (slot_cnt -= slots_per_op) >= 0; 539 + i += slots_per_op, j++) { 540 + iter = iop_hw_desc_slot_idx(hw_desc, i); 541 + u_desc_ctrl.value = iop3xx_desc_init_xor(iter, src_cnt, int_en); 542 + u_desc_ctrl.field.dest_write_en = 0; 543 + u_desc_ctrl.field.zero_result_en = 1; 544 + u_desc_ctrl.field.int_en = int_en; 545 + iter->desc_ctrl = u_desc_ctrl.value; 546 + 547 + /* for the subsequent descriptors preserve the store queue 548 + * and chain them together 549 + */ 550 + if (i) { 551 + prev_hw_desc = 552 + iop_hw_desc_slot_idx(hw_desc, i - slots_per_op); 553 + prev_hw_desc->next_desc = 554 + (u32) (desc->async_tx.phys + (i << 5)); 555 + } 556 + } 557 + 558 + return j; 559 + } 560 + 561 + static inline void 562 + iop_desc_init_null_xor(struct iop_adma_desc_slot *desc, int src_cnt, int int_en) 563 + { 564 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 565 + union { 566 + u32 value; 567 + struct iop3xx_aau_desc_ctrl field; 568 + } u_desc_ctrl; 569 + 570 + u_desc_ctrl.value = 0; 571 + switch (src_cnt) { 572 + case 25 ... 32: 573 + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ 574 + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; 575 + /* fall through */ 576 + case 17 ... 24: 577 + if (!u_desc_ctrl.field.blk_ctrl) { 578 + hw_desc->src_edc[AAU_EDCR2_IDX].e_desc_ctrl = 0; 579 + u_desc_ctrl.field.blk_ctrl = 0x3; /* use EDCR[2:0] */ 580 + } 581 + hw_desc->src_edc[AAU_EDCR1_IDX].e_desc_ctrl = 0; 582 + /* fall through */ 583 + case 9 ... 16: 584 + if (!u_desc_ctrl.field.blk_ctrl) 585 + u_desc_ctrl.field.blk_ctrl = 0x2; /* use EDCR0 */ 586 + hw_desc->src_edc[AAU_EDCR0_IDX].e_desc_ctrl = 0; 587 + /* fall through */ 588 + case 1 ... 8: 589 + if (!u_desc_ctrl.field.blk_ctrl && src_cnt > 4) 590 + u_desc_ctrl.field.blk_ctrl = 0x1; /* use mini-desc */ 591 + } 592 + 593 + u_desc_ctrl.field.dest_write_en = 0; 594 + u_desc_ctrl.field.int_en = int_en; 595 + hw_desc->desc_ctrl = u_desc_ctrl.value; 596 + } 597 + 598 + static inline void iop_desc_set_byte_count(struct iop_adma_desc_slot *desc, 599 + struct iop_adma_chan *chan, 600 + u32 byte_count) 601 + { 602 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 603 + 604 + switch (chan->device->id) { 605 + case DMA0_ID: 606 + case DMA1_ID: 607 + hw_desc.dma->byte_count = byte_count; 608 + break; 609 + case AAU_ID: 610 + hw_desc.aau->byte_count = byte_count; 611 + break; 612 + default: 613 + BUG(); 614 + } 615 + } 616 + 617 + static inline void 618 + iop_desc_init_interrupt(struct iop_adma_desc_slot *desc, 619 + struct iop_adma_chan *chan) 620 + { 621 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 622 + 623 + switch (chan->device->id) { 624 + case DMA0_ID: 625 + case DMA1_ID: 626 + iop_desc_init_memcpy(desc, 1); 627 + hw_desc.dma->byte_count = 0; 628 + hw_desc.dma->dest_addr = 0; 629 + hw_desc.dma->src_addr = 0; 630 + break; 631 + case AAU_ID: 632 + iop_desc_init_null_xor(desc, 2, 1); 633 + hw_desc.aau->byte_count = 0; 634 + hw_desc.aau->dest_addr = 0; 635 + hw_desc.aau->src[0] = 0; 636 + hw_desc.aau->src[1] = 0; 637 + break; 638 + default: 639 + BUG(); 640 + } 641 + } 642 + 643 + static inline void 644 + iop_desc_set_zero_sum_byte_count(struct iop_adma_desc_slot *desc, u32 len) 645 + { 646 + int slots_per_op = desc->slots_per_op; 647 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; 648 + int i = 0; 649 + 650 + if (len <= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { 651 + hw_desc->byte_count = len; 652 + } else { 653 + do { 654 + iter = iop_hw_desc_slot_idx(hw_desc, i); 655 + iter->byte_count = IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 656 + len -= IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT; 657 + i += slots_per_op; 658 + } while (len > IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT); 659 + 660 + if (len) { 661 + iter = iop_hw_desc_slot_idx(hw_desc, i); 662 + iter->byte_count = len; 663 + } 664 + } 665 + } 666 + 667 + static inline void iop_desc_set_dest_addr(struct iop_adma_desc_slot *desc, 668 + struct iop_adma_chan *chan, 669 + dma_addr_t addr) 670 + { 671 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 672 + 673 + switch (chan->device->id) { 674 + case DMA0_ID: 675 + case DMA1_ID: 676 + hw_desc.dma->dest_addr = addr; 677 + break; 678 + case AAU_ID: 679 + hw_desc.aau->dest_addr = addr; 680 + break; 681 + default: 682 + BUG(); 683 + } 684 + } 685 + 686 + static inline void iop_desc_set_memcpy_src_addr(struct iop_adma_desc_slot *desc, 687 + dma_addr_t addr) 688 + { 689 + struct iop3xx_desc_dma *hw_desc = desc->hw_desc; 690 + hw_desc->src_addr = addr; 691 + } 692 + 693 + static inline void 694 + iop_desc_set_zero_sum_src_addr(struct iop_adma_desc_slot *desc, int src_idx, 695 + dma_addr_t addr) 696 + { 697 + 698 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; 699 + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; 700 + int i; 701 + 702 + for (i = 0; (slot_cnt -= slots_per_op) >= 0; 703 + i += slots_per_op, addr += IOP_ADMA_ZERO_SUM_MAX_BYTE_COUNT) { 704 + iter = iop_hw_desc_slot_idx(hw_desc, i); 705 + iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); 706 + } 707 + } 708 + 709 + static inline void iop_desc_set_xor_src_addr(struct iop_adma_desc_slot *desc, 710 + int src_idx, dma_addr_t addr) 711 + { 712 + 713 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc, *iter; 714 + int slot_cnt = desc->slot_cnt, slots_per_op = desc->slots_per_op; 715 + int i; 716 + 717 + for (i = 0; (slot_cnt -= slots_per_op) >= 0; 718 + i += slots_per_op, addr += IOP_ADMA_XOR_MAX_BYTE_COUNT) { 719 + iter = iop_hw_desc_slot_idx(hw_desc, i); 720 + iop3xx_aau_desc_set_src_addr(iter, src_idx, addr); 721 + } 722 + } 723 + 724 + static inline void iop_desc_set_next_desc(struct iop_adma_desc_slot *desc, 725 + u32 next_desc_addr) 726 + { 727 + /* hw_desc->next_desc is the same location for all channels */ 728 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 729 + BUG_ON(hw_desc.dma->next_desc); 730 + hw_desc.dma->next_desc = next_desc_addr; 731 + } 732 + 733 + static inline u32 iop_desc_get_next_desc(struct iop_adma_desc_slot *desc) 734 + { 735 + /* hw_desc->next_desc is the same location for all channels */ 736 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 737 + return hw_desc.dma->next_desc; 738 + } 739 + 740 + static inline void iop_desc_clear_next_desc(struct iop_adma_desc_slot *desc) 741 + { 742 + /* hw_desc->next_desc is the same location for all channels */ 743 + union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; 744 + hw_desc.dma->next_desc = 0; 745 + } 746 + 747 + static inline void iop_desc_set_block_fill_val(struct iop_adma_desc_slot *desc, 748 + u32 val) 749 + { 750 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 751 + hw_desc->src[0] = val; 752 + } 753 + 754 + static inline int iop_desc_get_zero_result(struct iop_adma_desc_slot *desc) 755 + { 756 + struct iop3xx_desc_aau *hw_desc = desc->hw_desc; 757 + struct iop3xx_aau_desc_ctrl desc_ctrl = hw_desc->desc_ctrl_field; 758 + 759 + BUG_ON(!(desc_ctrl.tx_complete && desc_ctrl.zero_result_en)); 760 + return desc_ctrl.zero_result_err; 761 + } 762 + 763 + static inline void iop_chan_append(struct iop_adma_chan *chan) 764 + { 765 + u32 dma_chan_ctrl; 766 + /* workaround dropped interrupts on 3xx */ 767 + mod_timer(&chan->cleanup_watchdog, jiffies + msecs_to_jiffies(3)); 768 + 769 + dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); 770 + dma_chan_ctrl |= 0x2; 771 + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); 772 + } 773 + 774 + static inline void iop_chan_idle(int busy, struct iop_adma_chan *chan) 775 + { 776 + if (!busy) 777 + del_timer(&chan->cleanup_watchdog); 778 + } 779 + 780 + static inline u32 iop_chan_get_status(struct iop_adma_chan *chan) 781 + { 782 + return __raw_readl(DMA_CSR(chan)); 783 + } 784 + 785 + static inline void iop_chan_disable(struct iop_adma_chan *chan) 786 + { 787 + u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); 788 + dma_chan_ctrl &= ~1; 789 + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); 790 + } 791 + 792 + static inline void iop_chan_enable(struct iop_adma_chan *chan) 793 + { 794 + u32 dma_chan_ctrl = __raw_readl(DMA_CCR(chan)); 795 + 796 + dma_chan_ctrl |= 1; 797 + __raw_writel(dma_chan_ctrl, DMA_CCR(chan)); 798 + } 799 + 800 + static inline void iop_adma_device_clear_eot_status(struct iop_adma_chan *chan) 801 + { 802 + u32 status = __raw_readl(DMA_CSR(chan)); 803 + status &= (1 << 9); 804 + __raw_writel(status, DMA_CSR(chan)); 805 + } 806 + 807 + static inline void iop_adma_device_clear_eoc_status(struct iop_adma_chan *chan) 808 + { 809 + u32 status = __raw_readl(DMA_CSR(chan)); 810 + status &= (1 << 8); 811 + __raw_writel(status, DMA_CSR(chan)); 812 + } 813 + 814 + static inline void iop_adma_device_clear_err_status(struct iop_adma_chan *chan) 815 + { 816 + u32 status = __raw_readl(DMA_CSR(chan)); 817 + 818 + switch (chan->device->id) { 819 + case DMA0_ID: 820 + case DMA1_ID: 821 + status &= (1 << 5) | (1 << 3) | (1 << 2) | (1 << 1); 822 + break; 823 + case AAU_ID: 824 + status &= (1 << 5); 825 + break; 826 + default: 827 + BUG(); 828 + } 829 + 830 + __raw_writel(status, DMA_CSR(chan)); 831 + } 832 + 833 + static inline int 834 + iop_is_err_int_parity(unsigned long status, struct iop_adma_chan *chan) 835 + { 836 + return 0; 837 + } 838 + 839 + static inline int 840 + iop_is_err_mcu_abort(unsigned long status, struct iop_adma_chan *chan) 841 + { 842 + return 0; 843 + } 844 + 845 + static inline int 846 + iop_is_err_int_tabort(unsigned long status, struct iop_adma_chan *chan) 847 + { 848 + return 0; 849 + } 850 + 851 + static inline int 852 + iop_is_err_int_mabort(unsigned long status, struct iop_adma_chan *chan) 853 + { 854 + return test_bit(5, &status); 855 + } 856 + 857 + static inline int 858 + iop_is_err_pci_tabort(unsigned long status, struct iop_adma_chan *chan) 859 + { 860 + switch (chan->device->id) { 861 + case DMA0_ID: 862 + case DMA1_ID: 863 + return test_bit(2, &status); 864 + default: 865 + return 0; 866 + } 867 + } 868 + 869 + static inline int 870 + iop_is_err_pci_mabort(unsigned long status, struct iop_adma_chan *chan) 871 + { 872 + switch (chan->device->id) { 873 + case DMA0_ID: 874 + case DMA1_ID: 875 + return test_bit(3, &status); 876 + default: 877 + return 0; 878 + } 879 + } 880 + 881 + static inline int 882 + iop_is_err_split_tx(unsigned long status, struct iop_adma_chan *chan) 883 + { 884 + switch (chan->device->id) { 885 + case DMA0_ID: 886 + case DMA1_ID: 887 + return test_bit(1, &status); 888 + default: 889 + return 0; 890 + } 891 + } 892 + #endif /* _ADMA_H */

+8 -60

include/asm-arm/hardware/iop3xx.h

··· 144 144 #define IOP3XX_IAR (volatile u32 *)IOP3XX_REG_ADDR(0x0380) 145 145 146 146 /* DMA Controller */ 147 - #define IOP3XX_DMA0_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0400) 148 - #define IOP3XX_DMA0_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0404) 149 - #define IOP3XX_DMA0_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x040c) 150 - #define IOP3XX_DMA0_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0410) 151 - #define IOP3XX_DMA0_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0414) 152 - #define IOP3XX_DMA0_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0418) 153 - #define IOP3XX_DMA0_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x041c) 154 - #define IOP3XX_DMA0_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0420) 155 - #define IOP3XX_DMA0_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0424) 156 - #define IOP3XX_DMA1_CCR (volatile u32 *)IOP3XX_REG_ADDR(0x0440) 157 - #define IOP3XX_DMA1_CSR (volatile u32 *)IOP3XX_REG_ADDR(0x0444) 158 - #define IOP3XX_DMA1_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x044c) 159 - #define IOP3XX_DMA1_NDAR (volatile u32 *)IOP3XX_REG_ADDR(0x0450) 160 - #define IOP3XX_DMA1_PADR (volatile u32 *)IOP3XX_REG_ADDR(0x0454) 161 - #define IOP3XX_DMA1_PUADR (volatile u32 *)IOP3XX_REG_ADDR(0x0458) 162 - #define IOP3XX_DMA1_LADR (volatile u32 *)IOP3XX_REG_ADDR(0x045c) 163 - #define IOP3XX_DMA1_BCR (volatile u32 *)IOP3XX_REG_ADDR(0x0460) 164 - #define IOP3XX_DMA1_DCR (volatile u32 *)IOP3XX_REG_ADDR(0x0464) 147 + #define IOP3XX_DMA_PHYS_BASE(chan) (IOP3XX_PERIPHERAL_PHYS_BASE + \ 148 + (0x400 + (chan << 6))) 149 + #define IOP3XX_DMA_UPPER_PA(chan) (IOP3XX_DMA_PHYS_BASE(chan) + 0x27) 165 150 166 151 /* Peripheral bus interface */ 167 152 #define IOP3XX_PBCR (volatile u32 *)IOP3XX_REG_ADDR(0x0680) ··· 195 210 #define IOP_TMR_RATIO_1_1 0x00 196 211 197 212 /* Application accelerator unit */ 198 - #define IOP3XX_AAU_ACR (volatile u32 *)IOP3XX_REG_ADDR(0x0800) 199 - #define IOP3XX_AAU_ASR (volatile u32 *)IOP3XX_REG_ADDR(0x0804) 200 - #define IOP3XX_AAU_ADAR (volatile u32 *)IOP3XX_REG_ADDR(0x0808) 201 - #define IOP3XX_AAU_ANDAR (volatile u32 *)IOP3XX_REG_ADDR(0x080c) 202 - #define IOP3XX_AAU_SAR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0810) 203 - #define IOP3XX_AAU_SAR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0814) 204 - #define IOP3XX_AAU_SAR3 (volatile u32 *)IOP3XX_REG_ADDR(0x0818) 205 - #define IOP3XX_AAU_SAR4 (volatile u32 *)IOP3XX_REG_ADDR(0x081c) 206 - #define IOP3XX_AAU_DAR (volatile u32 *)IOP3XX_REG_ADDR(0x0820) 207 - #define IOP3XX_AAU_ABCR (volatile u32 *)IOP3XX_REG_ADDR(0x0824) 208 - #define IOP3XX_AAU_ADCR (volatile u32 *)IOP3XX_REG_ADDR(0x0828) 209 - #define IOP3XX_AAU_SAR5 (volatile u32 *)IOP3XX_REG_ADDR(0x082c) 210 - #define IOP3XX_AAU_SAR6 (volatile u32 *)IOP3XX_REG_ADDR(0x0830) 211 - #define IOP3XX_AAU_SAR7 (volatile u32 *)IOP3XX_REG_ADDR(0x0834) 212 - #define IOP3XX_AAU_SAR8 (volatile u32 *)IOP3XX_REG_ADDR(0x0838) 213 - #define IOP3XX_AAU_EDCR0 (volatile u32 *)IOP3XX_REG_ADDR(0x083c) 214 - #define IOP3XX_AAU_SAR9 (volatile u32 *)IOP3XX_REG_ADDR(0x0840) 215 - #define IOP3XX_AAU_SAR10 (volatile u32 *)IOP3XX_REG_ADDR(0x0844) 216 - #define IOP3XX_AAU_SAR11 (volatile u32 *)IOP3XX_REG_ADDR(0x0848) 217 - #define IOP3XX_AAU_SAR12 (volatile u32 *)IOP3XX_REG_ADDR(0x084c) 218 - #define IOP3XX_AAU_SAR13 (volatile u32 *)IOP3XX_REG_ADDR(0x0850) 219 - #define IOP3XX_AAU_SAR14 (volatile u32 *)IOP3XX_REG_ADDR(0x0854) 220 - #define IOP3XX_AAU_SAR15 (volatile u32 *)IOP3XX_REG_ADDR(0x0858) 221 - #define IOP3XX_AAU_SAR16 (volatile u32 *)IOP3XX_REG_ADDR(0x085c) 222 - #define IOP3XX_AAU_EDCR1 (volatile u32 *)IOP3XX_REG_ADDR(0x0860) 223 - #define IOP3XX_AAU_SAR17 (volatile u32 *)IOP3XX_REG_ADDR(0x0864) 224 - #define IOP3XX_AAU_SAR18 (volatile u32 *)IOP3XX_REG_ADDR(0x0868) 225 - #define IOP3XX_AAU_SAR19 (volatile u32 *)IOP3XX_REG_ADDR(0x086c) 226 - #define IOP3XX_AAU_SAR20 (volatile u32 *)IOP3XX_REG_ADDR(0x0870) 227 - #define IOP3XX_AAU_SAR21 (volatile u32 *)IOP3XX_REG_ADDR(0x0874) 228 - #define IOP3XX_AAU_SAR22 (volatile u32 *)IOP3XX_REG_ADDR(0x0878) 229 - #define IOP3XX_AAU_SAR23 (volatile u32 *)IOP3XX_REG_ADDR(0x087c) 230 - #define IOP3XX_AAU_SAR24 (volatile u32 *)IOP3XX_REG_ADDR(0x0880) 231 - #define IOP3XX_AAU_EDCR2 (volatile u32 *)IOP3XX_REG_ADDR(0x0884) 232 - #define IOP3XX_AAU_SAR25 (volatile u32 *)IOP3XX_REG_ADDR(0x0888) 233 - #define IOP3XX_AAU_SAR26 (volatile u32 *)IOP3XX_REG_ADDR(0x088c) 234 - #define IOP3XX_AAU_SAR27 (volatile u32 *)IOP3XX_REG_ADDR(0x0890) 235 - #define IOP3XX_AAU_SAR28 (volatile u32 *)IOP3XX_REG_ADDR(0x0894) 236 - #define IOP3XX_AAU_SAR29 (volatile u32 *)IOP3XX_REG_ADDR(0x0898) 237 - #define IOP3XX_AAU_SAR30 (volatile u32 *)IOP3XX_REG_ADDR(0x089c) 238 - #define IOP3XX_AAU_SAR31 (volatile u32 *)IOP3XX_REG_ADDR(0x08a0) 239 - #define IOP3XX_AAU_SAR32 (volatile u32 *)IOP3XX_REG_ADDR(0x08a4) 213 + #define IOP3XX_AAU_PHYS_BASE (IOP3XX_PERIPHERAL_PHYS_BASE + 0x800) 214 + #define IOP3XX_AAU_UPPER_PA (IOP3XX_AAU_PHYS_BASE + 0xa7) 240 215 241 216 /* I2C bus interface unit */ 242 217 #define IOP3XX_ICR0 (volatile u32 *)IOP3XX_REG_ADDR(0x1680) ··· 274 329 asm volatile("mcr p6, 0, %0, c6, c1, 0" : : "r" (val)); 275 330 } 276 331 332 + extern struct platform_device iop3xx_dma_0_channel; 333 + extern struct platform_device iop3xx_dma_1_channel; 334 + extern struct platform_device iop3xx_aau_channel; 277 335 extern struct platform_device iop3xx_i2c0_device; 278 336 extern struct platform_device iop3xx_i2c1_device; 279 337

+118

include/asm-arm/hardware/iop_adma.h

··· 1 + /* 2 + * Copyright © 2006, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License along with 14 + * this program; if not, write to the Free Software Foundation, Inc., 15 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 16 + * 17 + */ 18 + #ifndef IOP_ADMA_H 19 + #define IOP_ADMA_H 20 + #include <linux/types.h> 21 + #include <linux/dmaengine.h> 22 + #include <linux/interrupt.h> 23 + 24 + #define IOP_ADMA_SLOT_SIZE 32 25 + #define IOP_ADMA_THRESHOLD 4 26 + 27 + /** 28 + * struct iop_adma_device - internal representation of an ADMA device 29 + * @pdev: Platform device 30 + * @id: HW ADMA Device selector 31 + * @dma_desc_pool: base of DMA descriptor region (DMA address) 32 + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) 33 + * @common: embedded struct dma_device 34 + */ 35 + struct iop_adma_device { 36 + struct platform_device *pdev; 37 + int id; 38 + dma_addr_t dma_desc_pool; 39 + void *dma_desc_pool_virt; 40 + struct dma_device common; 41 + }; 42 + 43 + /** 44 + * struct iop_adma_chan - internal representation of an ADMA device 45 + * @pending: allows batching of hardware operations 46 + * @completed_cookie: identifier for the most recently completed operation 47 + * @lock: serializes enqueue/dequeue operations to the slot pool 48 + * @mmr_base: memory mapped register base 49 + * @chain: device chain view of the descriptors 50 + * @device: parent device 51 + * @common: common dmaengine channel object members 52 + * @last_used: place holder for allocation to continue from where it left off 53 + * @all_slots: complete domain of slots usable by the channel 54 + * @cleanup_watchdog: workaround missed interrupts on iop3xx 55 + * @slots_allocated: records the actual size of the descriptor slot pool 56 + * @irq_tasklet: bottom half where iop_adma_slot_cleanup runs 57 + */ 58 + struct iop_adma_chan { 59 + int pending; 60 + dma_cookie_t completed_cookie; 61 + spinlock_t lock; /* protects the descriptor slot pool */ 62 + void __iomem *mmr_base; 63 + struct list_head chain; 64 + struct iop_adma_device *device; 65 + struct dma_chan common; 66 + struct iop_adma_desc_slot *last_used; 67 + struct list_head all_slots; 68 + struct timer_list cleanup_watchdog; 69 + int slots_allocated; 70 + struct tasklet_struct irq_tasklet; 71 + }; 72 + 73 + /** 74 + * struct iop_adma_desc_slot - IOP-ADMA software descriptor 75 + * @slot_node: node on the iop_adma_chan.all_slots list 76 + * @chain_node: node on the op_adma_chan.chain list 77 + * @hw_desc: virtual address of the hardware descriptor chain 78 + * @phys: hardware address of the hardware descriptor chain 79 + * @group_head: first operation in a transaction 80 + * @slot_cnt: total slots used in an transaction (group of operations) 81 + * @slots_per_op: number of slots per operation 82 + * @idx: pool index 83 + * @unmap_src_cnt: number of xor sources 84 + * @unmap_len: transaction bytecount 85 + * @async_tx: support for the async_tx api 86 + * @group_list: list of slots that make up a multi-descriptor transaction 87 + * for example transfer lengths larger than the supported hw max 88 + * @xor_check_result: result of zero sum 89 + * @crc32_result: result crc calculation 90 + */ 91 + struct iop_adma_desc_slot { 92 + struct list_head slot_node; 93 + struct list_head chain_node; 94 + void *hw_desc; 95 + struct iop_adma_desc_slot *group_head; 96 + u16 slot_cnt; 97 + u16 slots_per_op; 98 + u16 idx; 99 + u16 unmap_src_cnt; 100 + size_t unmap_len; 101 + struct dma_async_tx_descriptor async_tx; 102 + union { 103 + u32 *xor_check_result; 104 + u32 *crc32_result; 105 + }; 106 + }; 107 + 108 + struct iop_adma_platform_data { 109 + int hw_id; 110 + dma_cap_mask_t cap_mask; 111 + size_t pool_size; 112 + }; 113 + 114 + #define to_iop_sw_desc(addr_hw_desc) \ 115 + container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc) 116 + #define iop_hw_desc_slot_idx(hw_desc, idx) \ 117 + ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) ) 118 + #endif

+156

include/linux/async_tx.h

··· 1 + /* 2 + * Copyright © 2006, Intel Corporation. 3 + * 4 + * This program is free software; you can redistribute it and/or modify it 5 + * under the terms and conditions of the GNU General Public License, 6 + * version 2, as published by the Free Software Foundation. 7 + * 8 + * This program is distributed in the hope it will be useful, but WITHOUT 9 + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 10 + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for 11 + * more details. 12 + * 13 + * You should have received a copy of the GNU General Public License along with 14 + * this program; if not, write to the Free Software Foundation, Inc., 15 + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. 16 + * 17 + */ 18 + #ifndef _ASYNC_TX_H_ 19 + #define _ASYNC_TX_H_ 20 + #include <linux/dmaengine.h> 21 + #include <linux/spinlock.h> 22 + #include <linux/interrupt.h> 23 + 24 + /** 25 + * dma_chan_ref - object used to manage dma channels received from the 26 + * dmaengine core. 27 + * @chan - the channel being tracked 28 + * @node - node for the channel to be placed on async_tx_master_list 29 + * @rcu - for list_del_rcu 30 + * @count - number of times this channel is listed in the pool 31 + * (for channels with multiple capabiities) 32 + */ 33 + struct dma_chan_ref { 34 + struct dma_chan *chan; 35 + struct list_head node; 36 + struct rcu_head rcu; 37 + atomic_t count; 38 + }; 39 + 40 + /** 41 + * async_tx_flags - modifiers for the async_* calls 42 + * @ASYNC_TX_XOR_ZERO_DST: this flag must be used for xor operations where the 43 + * the destination address is not a source. The asynchronous case handles this 44 + * implicitly, the synchronous case needs to zero the destination block. 45 + * @ASYNC_TX_XOR_DROP_DST: this flag must be used if the destination address is 46 + * also one of the source addresses. In the synchronous case the destination 47 + * address is an implied source, whereas the asynchronous case it must be listed 48 + * as a source. The destination address must be the first address in the source 49 + * array. 50 + * @ASYNC_TX_ASSUME_COHERENT: skip cache maintenance operations 51 + * @ASYNC_TX_ACK: immediately ack the descriptor, precludes setting up a 52 + * dependency chain 53 + * @ASYNC_TX_DEP_ACK: ack the dependency descriptor. Useful for chaining. 54 + * @ASYNC_TX_KMAP_SRC: if the transaction is to be performed synchronously 55 + * take an atomic mapping (KM_USER0) on the source page(s) 56 + * @ASYNC_TX_KMAP_DST: if the transaction is to be performed synchronously 57 + * take an atomic mapping (KM_USER0) on the dest page(s) 58 + */ 59 + enum async_tx_flags { 60 + ASYNC_TX_XOR_ZERO_DST = (1 << 0), 61 + ASYNC_TX_XOR_DROP_DST = (1 << 1), 62 + ASYNC_TX_ASSUME_COHERENT = (1 << 2), 63 + ASYNC_TX_ACK = (1 << 3), 64 + ASYNC_TX_DEP_ACK = (1 << 4), 65 + ASYNC_TX_KMAP_SRC = (1 << 5), 66 + ASYNC_TX_KMAP_DST = (1 << 6), 67 + }; 68 + 69 + #ifdef CONFIG_DMA_ENGINE 70 + void async_tx_issue_pending_all(void); 71 + enum dma_status dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx); 72 + void async_tx_run_dependencies(struct dma_async_tx_descriptor *tx); 73 + struct dma_chan * 74 + async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 75 + enum dma_transaction_type tx_type); 76 + #else 77 + static inline void async_tx_issue_pending_all(void) 78 + { 79 + do { } while (0); 80 + } 81 + 82 + static inline enum dma_status 83 + dma_wait_for_async_tx(struct dma_async_tx_descriptor *tx) 84 + { 85 + return DMA_SUCCESS; 86 + } 87 + 88 + static inline void 89 + async_tx_run_dependencies(struct dma_async_tx_descriptor *tx, 90 + struct dma_chan *host_chan) 91 + { 92 + do { } while (0); 93 + } 94 + 95 + static inline struct dma_chan * 96 + async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, 97 + enum dma_transaction_type tx_type) 98 + { 99 + return NULL; 100 + } 101 + #endif 102 + 103 + /** 104 + * async_tx_sync_epilog - actions to take if an operation is run synchronously 105 + * @flags: async_tx flags 106 + * @depend_tx: transaction depends on depend_tx 107 + * @cb_fn: function to call when the transaction completes 108 + * @cb_fn_param: parameter to pass to the callback routine 109 + */ 110 + static inline void 111 + async_tx_sync_epilog(unsigned long flags, 112 + struct dma_async_tx_descriptor *depend_tx, 113 + dma_async_tx_callback cb_fn, void *cb_fn_param) 114 + { 115 + if (cb_fn) 116 + cb_fn(cb_fn_param); 117 + 118 + if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) 119 + async_tx_ack(depend_tx); 120 + } 121 + 122 + void 123 + async_tx_submit(struct dma_chan *chan, struct dma_async_tx_descriptor *tx, 124 + enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, 125 + dma_async_tx_callback cb_fn, void *cb_fn_param); 126 + 127 + struct dma_async_tx_descriptor * 128 + async_xor(struct page *dest, struct page **src_list, unsigned int offset, 129 + int src_cnt, size_t len, enum async_tx_flags flags, 130 + struct dma_async_tx_descriptor *depend_tx, 131 + dma_async_tx_callback cb_fn, void *cb_fn_param); 132 + 133 + struct dma_async_tx_descriptor * 134 + async_xor_zero_sum(struct page *dest, struct page **src_list, 135 + unsigned int offset, int src_cnt, size_t len, 136 + u32 *result, enum async_tx_flags flags, 137 + struct dma_async_tx_descriptor *depend_tx, 138 + dma_async_tx_callback cb_fn, void *cb_fn_param); 139 + 140 + struct dma_async_tx_descriptor * 141 + async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, 142 + unsigned int src_offset, size_t len, enum async_tx_flags flags, 143 + struct dma_async_tx_descriptor *depend_tx, 144 + dma_async_tx_callback cb_fn, void *cb_fn_param); 145 + 146 + struct dma_async_tx_descriptor * 147 + async_memset(struct page *dest, int val, unsigned int offset, 148 + size_t len, enum async_tx_flags flags, 149 + struct dma_async_tx_descriptor *depend_tx, 150 + dma_async_tx_callback cb_fn, void *cb_fn_param); 151 + 152 + struct dma_async_tx_descriptor * 153 + async_trigger_callback(enum async_tx_flags flags, 154 + struct dma_async_tx_descriptor *depend_tx, 155 + dma_async_tx_callback cb_fn, void *cb_fn_param); 156 + #endif /* _ASYNC_TX_H_ */

+191 -118

include/linux/dmaengine.h

··· 21 21 #ifndef DMAENGINE_H 22 22 #define DMAENGINE_H 23 23 24 - #ifdef CONFIG_DMA_ENGINE 25 - 26 24 #include <linux/device.h> 27 25 #include <linux/uio.h> 28 26 #include <linux/kref.h> 29 27 #include <linux/completion.h> 30 28 #include <linux/rcupdate.h> 29 + #include <linux/dma-mapping.h> 31 30 32 31 /** 33 - * enum dma_event - resource PNP/power managment events 32 + * enum dma_state - resource PNP/power managment state 34 33 * @DMA_RESOURCE_SUSPEND: DMA device going into low power state 35 34 * @DMA_RESOURCE_RESUME: DMA device returning to full power 36 - * @DMA_RESOURCE_ADDED: DMA device added to the system 35 + * @DMA_RESOURCE_AVAILABLE: DMA device available to the system 37 36 * @DMA_RESOURCE_REMOVED: DMA device removed from the system 38 37 */ 39 - enum dma_event { 38 + enum dma_state { 40 39 DMA_RESOURCE_SUSPEND, 41 40 DMA_RESOURCE_RESUME, 42 - DMA_RESOURCE_ADDED, 41 + DMA_RESOURCE_AVAILABLE, 43 42 DMA_RESOURCE_REMOVED, 43 + }; 44 + 45 + /** 46 + * enum dma_state_client - state of the channel in the client 47 + * @DMA_ACK: client would like to use, or was using this channel 48 + * @DMA_DUP: client has already seen this channel, or is not using this channel 49 + * @DMA_NAK: client does not want to see any more channels 50 + */ 51 + enum dma_state_client { 52 + DMA_ACK, 53 + DMA_DUP, 54 + DMA_NAK, 44 55 }; 45 56 46 57 /** ··· 76 65 }; 77 66 78 67 /** 68 + * enum dma_transaction_type - DMA transaction types/indexes 69 + */ 70 + enum dma_transaction_type { 71 + DMA_MEMCPY, 72 + DMA_XOR, 73 + DMA_PQ_XOR, 74 + DMA_DUAL_XOR, 75 + DMA_PQ_UPDATE, 76 + DMA_ZERO_SUM, 77 + DMA_PQ_ZERO_SUM, 78 + DMA_MEMSET, 79 + DMA_MEMCPY_CRC32C, 80 + DMA_INTERRUPT, 81 + }; 82 + 83 + /* last transaction type for creation of the capabilities mask */ 84 + #define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) 85 + 86 + /** 87 + * dma_cap_mask_t - capabilities bitmap modeled after cpumask_t. 88 + * See linux/cpumask.h 89 + */ 90 + typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; 91 + 92 + /** 79 93 * struct dma_chan_percpu - the per-CPU part of struct dma_chan 80 94 * @refcount: local_t used for open-coded "bigref" counting 81 95 * @memcpy_count: transaction counter ··· 116 80 117 81 /** 118 82 * struct dma_chan - devices supply DMA channels, clients use them 119 - * @client: ptr to the client user of this chan, will be %NULL when unused 120 83 * @device: ptr to the dma device who supplies this channel, always !%NULL 121 84 * @cookie: last cookie value returned to client 122 85 * @chan_id: channel ID for sysfs ··· 123 88 * @refcount: kref, used in "bigref" slow-mode 124 89 * @slow_ref: indicates that the DMA channel is free 125 90 * @rcu: the DMA channel's RCU head 126 - * @client_node: used to add this to the client chan list 127 91 * @device_node: used to add this to the device chan list 128 92 * @local: per-cpu pointer to a struct dma_chan_percpu 129 93 */ 130 94 struct dma_chan { 131 - struct dma_client *client; 132 95 struct dma_device *device; 133 96 dma_cookie_t cookie; 134 97 ··· 138 105 int slow_ref; 139 106 struct rcu_head rcu; 140 107 141 - struct list_head client_node; 142 108 struct list_head device_node; 143 109 struct dma_chan_percpu *local; 144 110 }; 111 + 145 112 146 113 void dma_chan_cleanup(struct kref *kref); 147 114 ··· 167 134 168 135 /* 169 136 * typedef dma_event_callback - function pointer to a DMA event callback 137 + * For each channel added to the system this routine is called for each client. 138 + * If the client would like to use the channel it returns '1' to signal (ack) 139 + * the dmaengine core to take out a reference on the channel and its 140 + * corresponding device. A client must not 'ack' an available channel more 141 + * than once. When a channel is removed all clients are notified. If a client 142 + * is using the channel it must 'ack' the removal. A client must not 'ack' a 143 + * removed channel more than once. 144 + * @client - 'this' pointer for the client context 145 + * @chan - channel to be acted upon 146 + * @state - available or removed 170 147 */ 171 - typedef void (*dma_event_callback) (struct dma_client *client, 172 - struct dma_chan *chan, enum dma_event event); 148 + struct dma_client; 149 + typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, 150 + struct dma_chan *chan, enum dma_state state); 173 151 174 152 /** 175 153 * struct dma_client - info on the entity making use of DMA services 176 154 * @event_callback: func ptr to call when something happens 177 - * @chan_count: number of chans allocated 178 - * @chans_desired: number of chans requested. Can be +/- chan_count 179 - * @lock: protects access to the channels list 180 - * @channels: the list of DMA channels allocated 155 + * @cap_mask: only return channels that satisfy the requested capabilities 156 + * a value of zero corresponds to any capability 181 157 * @global_node: list_head for global dma_client_list 182 158 */ 183 159 struct dma_client { 184 160 dma_event_callback event_callback; 185 - unsigned int chan_count; 186 - unsigned int chans_desired; 187 - 188 - spinlock_t lock; 189 - struct list_head channels; 161 + dma_cap_mask_t cap_mask; 190 162 struct list_head global_node; 163 + }; 164 + 165 + typedef void (*dma_async_tx_callback)(void *dma_async_param); 166 + /** 167 + * struct dma_async_tx_descriptor - async transaction descriptor 168 + * ---dma generic offload fields--- 169 + * @cookie: tracking cookie for this transaction, set to -EBUSY if 170 + * this tx is sitting on a dependency list 171 + * @ack: the descriptor can not be reused until the client acknowledges 172 + * receipt, i.e. has has a chance to establish any dependency chains 173 + * @phys: physical address of the descriptor 174 + * @tx_list: driver common field for operations that require multiple 175 + * descriptors 176 + * @chan: target channel for this operation 177 + * @tx_submit: set the prepared descriptor(s) to be executed by the engine 178 + * @tx_set_dest: set a destination address in a hardware descriptor 179 + * @tx_set_src: set a source address in a hardware descriptor 180 + * @callback: routine to call after this operation is complete 181 + * @callback_param: general parameter to pass to the callback routine 182 + * ---async_tx api specific fields--- 183 + * @depend_list: at completion this list of transactions are submitted 184 + * @depend_node: allow this transaction to be executed after another 185 + * transaction has completed, possibly on another channel 186 + * @parent: pointer to the next level up in the dependency chain 187 + * @lock: protect the dependency list 188 + */ 189 + struct dma_async_tx_descriptor { 190 + dma_cookie_t cookie; 191 + int ack; 192 + dma_addr_t phys; 193 + struct list_head tx_list; 194 + struct dma_chan *chan; 195 + dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); 196 + void (*tx_set_dest)(dma_addr_t addr, 197 + struct dma_async_tx_descriptor *tx, int index); 198 + void (*tx_set_src)(dma_addr_t addr, 199 + struct dma_async_tx_descriptor *tx, int index); 200 + dma_async_tx_callback callback; 201 + void *callback_param; 202 + struct list_head depend_list; 203 + struct list_head depend_node; 204 + struct dma_async_tx_descriptor *parent; 205 + spinlock_t lock; 191 206 }; 192 207 193 208 /** ··· 243 162 * @chancnt: how many DMA channels are supported 244 163 * @channels: the list of struct dma_chan 245 164 * @global_node: list_head for global dma_device_list 165 + * @cap_mask: one or more dma_capability flags 166 + * @max_xor: maximum number of xor sources, 0 if no capability 246 167 * @refcount: reference count 247 168 * @done: IO completion struct 248 169 * @dev_id: unique device ID 170 + * @dev: struct device reference for dma mapping api 249 171 * @device_alloc_chan_resources: allocate resources and return the 250 172 * number of allocated descriptors 251 173 * @device_free_chan_resources: release DMA channel's resources 252 - * @device_memcpy_buf_to_buf: memcpy buf pointer to buf pointer 253 - * @device_memcpy_buf_to_pg: memcpy buf pointer to struct page 254 - * @device_memcpy_pg_to_pg: memcpy struct page/offset to struct page/offset 255 - * @device_memcpy_complete: poll the status of an IOAT DMA transaction 256 - * @device_memcpy_issue_pending: push appended descriptors to hardware 174 + * @device_prep_dma_memcpy: prepares a memcpy operation 175 + * @device_prep_dma_xor: prepares a xor operation 176 + * @device_prep_dma_zero_sum: prepares a zero_sum operation 177 + * @device_prep_dma_memset: prepares a memset operation 178 + * @device_prep_dma_interrupt: prepares an end of chain interrupt operation 179 + * @device_dependency_added: async_tx notifies the channel about new deps 180 + * @device_issue_pending: push pending transactions to hardware 257 181 */ 258 182 struct dma_device { 259 183 260 184 unsigned int chancnt; 261 185 struct list_head channels; 262 186 struct list_head global_node; 187 + dma_cap_mask_t cap_mask; 188 + int max_xor; 263 189 264 190 struct kref refcount; 265 191 struct completion done; 266 192 267 193 int dev_id; 194 + struct device *dev; 268 195 269 196 int (*device_alloc_chan_resources)(struct dma_chan *chan); 270 197 void (*device_free_chan_resources)(struct dma_chan *chan); 271 - dma_cookie_t (*device_memcpy_buf_to_buf)(struct dma_chan *chan, 272 - void *dest, void *src, size_t len); 273 - dma_cookie_t (*device_memcpy_buf_to_pg)(struct dma_chan *chan, 274 - struct page *page, unsigned int offset, void *kdata, 275 - size_t len); 276 - dma_cookie_t (*device_memcpy_pg_to_pg)(struct dma_chan *chan, 277 - struct page *dest_pg, unsigned int dest_off, 278 - struct page *src_pg, unsigned int src_off, size_t len); 279 - enum dma_status (*device_memcpy_complete)(struct dma_chan *chan, 198 + 199 + struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( 200 + struct dma_chan *chan, size_t len, int int_en); 201 + struct dma_async_tx_descriptor *(*device_prep_dma_xor)( 202 + struct dma_chan *chan, unsigned int src_cnt, size_t len, 203 + int int_en); 204 + struct dma_async_tx_descriptor *(*device_prep_dma_zero_sum)( 205 + struct dma_chan *chan, unsigned int src_cnt, size_t len, 206 + u32 *result, int int_en); 207 + struct dma_async_tx_descriptor *(*device_prep_dma_memset)( 208 + struct dma_chan *chan, int value, size_t len, int int_en); 209 + struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( 210 + struct dma_chan *chan); 211 + 212 + void (*device_dependency_added)(struct dma_chan *chan); 213 + enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, 280 214 dma_cookie_t cookie, dma_cookie_t *last, 281 215 dma_cookie_t *used); 282 - void (*device_memcpy_issue_pending)(struct dma_chan *chan); 216 + void (*device_issue_pending)(struct dma_chan *chan); 283 217 }; 284 218 285 219 /* --- public DMA engine API --- */ 286 220 287 - struct dma_client *dma_async_client_register(dma_event_callback event_callback); 221 + void dma_async_client_register(struct dma_client *client); 288 222 void dma_async_client_unregister(struct dma_client *client); 289 - void dma_async_client_chan_request(struct dma_client *client, 290 - unsigned int number); 291 - 292 - /** 293 - * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses 294 - * @chan: DMA channel to offload copy to 295 - * @dest: destination address (virtual) 296 - * @src: source address (virtual) 297 - * @len: length 298 - * 299 - * Both @dest and @src must be mappable to a bus address according to the 300 - * DMA mapping API rules for streaming mappings. 301 - * Both @dest and @src must stay memory resident (kernel memory or locked 302 - * user space pages). 303 - */ 304 - static inline dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, 305 - void *dest, void *src, size_t len) 306 - { 307 - int cpu = get_cpu(); 308 - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 309 - per_cpu_ptr(chan->local, cpu)->memcpy_count++; 310 - put_cpu(); 311 - 312 - return chan->device->device_memcpy_buf_to_buf(chan, dest, src, len); 313 - } 314 - 315 - /** 316 - * dma_async_memcpy_buf_to_pg - offloaded copy from address to page 317 - * @chan: DMA channel to offload copy to 318 - * @page: destination page 319 - * @offset: offset in page to copy to 320 - * @kdata: source address (virtual) 321 - * @len: length 322 - * 323 - * Both @page/@offset and @kdata must be mappable to a bus address according 324 - * to the DMA mapping API rules for streaming mappings. 325 - * Both @page/@offset and @kdata must stay memory resident (kernel memory or 326 - * locked user space pages) 327 - */ 328 - static inline dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, 329 - struct page *page, unsigned int offset, void *kdata, size_t len) 330 - { 331 - int cpu = get_cpu(); 332 - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 333 - per_cpu_ptr(chan->local, cpu)->memcpy_count++; 334 - put_cpu(); 335 - 336 - return chan->device->device_memcpy_buf_to_pg(chan, page, offset, 337 - kdata, len); 338 - } 339 - 340 - /** 341 - * dma_async_memcpy_pg_to_pg - offloaded copy from page to page 342 - * @chan: DMA channel to offload copy to 343 - * @dest_pg: destination page 344 - * @dest_off: offset in page to copy to 345 - * @src_pg: source page 346 - * @src_off: offset in page to copy from 347 - * @len: length 348 - * 349 - * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus 350 - * address according to the DMA mapping API rules for streaming mappings. 351 - * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident 352 - * (kernel memory or locked user space pages). 353 - */ 354 - static inline dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, 223 + void dma_async_client_chan_request(struct dma_client *client); 224 + dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan, 225 + void *dest, void *src, size_t len); 226 + dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan, 227 + struct page *page, unsigned int offset, void *kdata, size_t len); 228 + dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, 355 229 struct page *dest_pg, unsigned int dest_off, struct page *src_pg, 356 - unsigned int src_off, size_t len) 357 - { 358 - int cpu = get_cpu(); 359 - per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; 360 - per_cpu_ptr(chan->local, cpu)->memcpy_count++; 361 - put_cpu(); 230 + unsigned int src_off, size_t len); 231 + void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, 232 + struct dma_chan *chan); 362 233 363 - return chan->device->device_memcpy_pg_to_pg(chan, dest_pg, dest_off, 364 - src_pg, src_off, len); 234 + static inline void 235 + async_tx_ack(struct dma_async_tx_descriptor *tx) 236 + { 237 + tx->ack = 1; 365 238 } 366 239 240 + #define first_dma_cap(mask) __first_dma_cap(&(mask)) 241 + static inline int __first_dma_cap(const dma_cap_mask_t *srcp) 242 + { 243 + return min_t(int, DMA_TX_TYPE_END, 244 + find_first_bit(srcp->bits, DMA_TX_TYPE_END)); 245 + } 246 + 247 + #define next_dma_cap(n, mask) __next_dma_cap((n), &(mask)) 248 + static inline int __next_dma_cap(int n, const dma_cap_mask_t *srcp) 249 + { 250 + return min_t(int, DMA_TX_TYPE_END, 251 + find_next_bit(srcp->bits, DMA_TX_TYPE_END, n+1)); 252 + } 253 + 254 + #define dma_cap_set(tx, mask) __dma_cap_set((tx), &(mask)) 255 + static inline void 256 + __dma_cap_set(enum dma_transaction_type tx_type, dma_cap_mask_t *dstp) 257 + { 258 + set_bit(tx_type, dstp->bits); 259 + } 260 + 261 + #define dma_has_cap(tx, mask) __dma_has_cap((tx), &(mask)) 262 + static inline int 263 + __dma_has_cap(enum dma_transaction_type tx_type, dma_cap_mask_t *srcp) 264 + { 265 + return test_bit(tx_type, srcp->bits); 266 + } 267 + 268 + #define for_each_dma_cap_mask(cap, mask) \ 269 + for ((cap) = first_dma_cap(mask); \ 270 + (cap) < DMA_TX_TYPE_END; \ 271 + (cap) = next_dma_cap((cap), (mask))) 272 + 367 273 /** 368 - * dma_async_memcpy_issue_pending - flush pending copies to HW 274 + * dma_async_issue_pending - flush pending transactions to HW 369 275 * @chan: target DMA channel 370 276 * 371 277 * This allows drivers to push copies to HW in batches, 372 278 * reducing MMIO writes where possible. 373 279 */ 374 - static inline void dma_async_memcpy_issue_pending(struct dma_chan *chan) 280 + static inline void dma_async_issue_pending(struct dma_chan *chan) 375 281 { 376 - return chan->device->device_memcpy_issue_pending(chan); 282 + return chan->device->device_issue_pending(chan); 377 283 } 378 284 285 + #define dma_async_memcpy_issue_pending(chan) dma_async_issue_pending(chan) 286 + 379 287 /** 380 - * dma_async_memcpy_complete - poll for transaction completion 288 + * dma_async_is_tx_complete - poll for transaction completion 381 289 * @chan: DMA channel 382 290 * @cookie: transaction identifier to check status of 383 291 * @last: returns last completed cookie, can be NULL ··· 376 306 * internal state and can be used with dma_async_is_complete() to check 377 307 * the status of multiple cookies without re-checking hardware state. 378 308 */ 379 - static inline enum dma_status dma_async_memcpy_complete(struct dma_chan *chan, 309 + static inline enum dma_status dma_async_is_tx_complete(struct dma_chan *chan, 380 310 dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used) 381 311 { 382 - return chan->device->device_memcpy_complete(chan, cookie, last, used); 312 + return chan->device->device_is_tx_complete(chan, cookie, last, used); 383 313 } 314 + 315 + #define dma_async_memcpy_complete(chan, cookie, last, used)\ 316 + dma_async_is_tx_complete(chan, cookie, last, used) 384 317 385 318 /** 386 319 * dma_async_is_complete - test a cookie against chan state ··· 407 334 return DMA_IN_PROGRESS; 408 335 } 409 336 337 + enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie); 410 338 411 339 /* --- DMA device --- */ 412 340 ··· 436 362 struct dma_pinned_list *pinned_list, struct page *page, 437 363 unsigned int offset, size_t len); 438 364 439 - #endif /* CONFIG_DMA_ENGINE */ 440 365 #endif /* DMAENGINE_H */

+3

include/linux/pci_ids.h

··· 479 479 #define PCI_DEVICE_ID_IBM_ICOM_V2_ONE_PORT_RVX_ONE_PORT_MDM_PCIE 0x0361 480 480 #define PCI_DEVICE_ID_IBM_ICOM_FOUR_PORT_MODEL 0x252 481 481 482 + #define PCI_VENDOR_ID_UNISYS 0x1018 483 + #define PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR 0x001C 484 + 482 485 #define PCI_VENDOR_ID_COMPEX2 0x101a /* pci.ids says "AT&T GIS (NCR)" */ 483 486 #define PCI_DEVICE_ID_COMPEX2_100VG 0x0005 484 487

+94 -3

include/linux/raid/raid5.h

··· 116 116 * attach a request to an active stripe (add_stripe_bh()) 117 117 * lockdev attach-buffer unlockdev 118 118 * handle a stripe (handle_stripe()) 119 - * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io 119 + * lockstripe clrSTRIPE_HANDLE ... 120 + * (lockdev check-buffers unlockdev) .. 121 + * change-state .. 122 + * record io/ops needed unlockstripe schedule io/ops 120 123 * release an active stripe (release_stripe()) 121 124 * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev 122 125 * 123 126 * The refcount counts each thread that have activated the stripe, 124 127 * plus raid5d if it is handling it, plus one for each active request 125 - * on a cached buffer. 128 + * on a cached buffer, and plus one if the stripe is undergoing stripe 129 + * operations. 130 + * 131 + * Stripe operations are performed outside the stripe lock, 132 + * the stripe operations are: 133 + * -copying data between the stripe cache and user application buffers 134 + * -computing blocks to save a disk access, or to recover a missing block 135 + * -updating the parity on a write operation (reconstruct write and 136 + * read-modify-write) 137 + * -checking parity correctness 138 + * -running i/o to disk 139 + * These operations are carried out by raid5_run_ops which uses the async_tx 140 + * api to (optionally) offload operations to dedicated hardware engines. 141 + * When requesting an operation handle_stripe sets the pending bit for the 142 + * operation and increments the count. raid5_run_ops is then run whenever 143 + * the count is non-zero. 144 + * There are some critical dependencies between the operations that prevent some 145 + * from being requested while another is in flight. 146 + * 1/ Parity check operations destroy the in cache version of the parity block, 147 + * so we prevent parity dependent operations like writes and compute_blocks 148 + * from starting while a check is in progress. Some dma engines can perform 149 + * the check without damaging the parity block, in these cases the parity 150 + * block is re-marked up to date (assuming the check was successful) and is 151 + * not re-read from disk. 152 + * 2/ When a write operation is requested we immediately lock the affected 153 + * blocks, and mark them as not up to date. This causes new read requests 154 + * to be held off, as well as parity checks and compute block operations. 155 + * 3/ Once a compute block operation has been requested handle_stripe treats 156 + * that block as if it is up to date. raid5_run_ops guaruntees that any 157 + * operation that is dependent on the compute block result is initiated after 158 + * the compute block completes. 126 159 */ 127 160 128 161 struct stripe_head { ··· 169 136 spinlock_t lock; 170 137 int bm_seq; /* sequence number for bitmap flushes */ 171 138 int disks; /* disks in stripe */ 139 + /* stripe_operations 140 + * @pending - pending ops flags (set for request->issue->complete) 141 + * @ack - submitted ops flags (set for issue->complete) 142 + * @complete - completed ops flags (set for complete) 143 + * @target - STRIPE_OP_COMPUTE_BLK target 144 + * @count - raid5_runs_ops is set to run when this is non-zero 145 + */ 146 + struct stripe_operations { 147 + unsigned long pending; 148 + unsigned long ack; 149 + unsigned long complete; 150 + int target; 151 + int count; 152 + u32 zero_sum_result; 153 + } ops; 172 154 struct r5dev { 173 155 struct bio req; 174 156 struct bio_vec vec; 175 157 struct page *page; 176 - struct bio *toread, *towrite, *written; 158 + struct bio *toread, *read, *towrite, *written; 177 159 sector_t sector; /* sector of this page */ 178 160 unsigned long flags; 179 161 } dev[1]; /* allocated with extra space depending of RAID geometry */ 180 162 }; 163 + 164 + /* stripe_head_state - collects and tracks the dynamic state of a stripe_head 165 + * for handle_stripe. It is only valid under spin_lock(sh->lock); 166 + */ 167 + struct stripe_head_state { 168 + int syncing, expanding, expanded; 169 + int locked, uptodate, to_read, to_write, failed, written; 170 + int to_fill, compute, req_compute, non_overwrite; 171 + int failed_num; 172 + }; 173 + 174 + /* r6_state - extra state data only relevant to r6 */ 175 + struct r6_state { 176 + int p_failed, q_failed, qd_idx, failed_num[2]; 177 + }; 178 + 181 179 /* Flags */ 182 180 #define R5_UPTODATE 0 /* page contains current data */ 183 181 #define R5_LOCKED 1 /* IO has been submitted on "req" */ ··· 222 158 #define R5_ReWrite 9 /* have tried to over-write the readerror */ 223 159 224 160 #define R5_Expanded 10 /* This block now has post-expand data */ 161 + #define R5_Wantcompute 11 /* compute_block in progress treat as 162 + * uptodate 163 + */ 164 + #define R5_Wantfill 12 /* dev->toread contains a bio that needs 165 + * filling 166 + */ 167 + #define R5_Wantprexor 13 /* distinguish blocks ready for rmw from 168 + * other "towrites" 169 + */ 225 170 /* 226 171 * Write method 227 172 */ ··· 252 179 #define STRIPE_EXPANDING 9 253 180 #define STRIPE_EXPAND_SOURCE 10 254 181 #define STRIPE_EXPAND_READY 11 182 + /* 183 + * Operations flags (in issue order) 184 + */ 185 + #define STRIPE_OP_BIOFILL 0 186 + #define STRIPE_OP_COMPUTE_BLK 1 187 + #define STRIPE_OP_PREXOR 2 188 + #define STRIPE_OP_BIODRAIN 3 189 + #define STRIPE_OP_POSTXOR 4 190 + #define STRIPE_OP_CHECK 5 191 + #define STRIPE_OP_IO 6 192 + 193 + /* modifiers to the base operations 194 + * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back 195 + * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check 196 + */ 197 + #define STRIPE_OP_MOD_REPAIR_PD 7 198 + #define STRIPE_OP_MOD_DMA_CHECK 8 199 + 255 200 /* 256 201 * Plugging: 257 202 *

+3 -2

include/linux/raid/xor.h

··· 3 3 4 4 #include <linux/raid/md.h> 5 5 6 - #define MAX_XOR_BLOCKS 5 6 + #define MAX_XOR_BLOCKS 4 7 7 8 - extern void xor_block(unsigned int count, unsigned int bytes, void **ptr); 8 + extern void xor_blocks(unsigned int count, unsigned int bytes, 9 + void *dest, void **srcs); 9 10 10 11 struct xor_block_template { 11 12 struct xor_block_template *next;

+77 -33

net/core/dev.c

··· 151 151 static struct list_head ptype_all __read_mostly; /* Taps */ 152 152 153 153 #ifdef CONFIG_NET_DMA 154 - static struct dma_client *net_dma_client; 155 - static unsigned int net_dma_count; 156 - static spinlock_t net_dma_event_lock; 154 + struct net_dma { 155 + struct dma_client client; 156 + spinlock_t lock; 157 + cpumask_t channel_mask; 158 + struct dma_chan *channels[NR_CPUS]; 159 + }; 160 + 161 + static enum dma_state_client 162 + netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 163 + enum dma_state state); 164 + 165 + static struct net_dma net_dma = { 166 + .client = { 167 + .event_callback = netdev_dma_event, 168 + }, 169 + }; 157 170 #endif 158 171 159 172 /* ··· 2035 2022 * There may not be any more sk_buffs coming right now, so push 2036 2023 * any pending DMA copies to hardware 2037 2024 */ 2038 - if (net_dma_client) { 2039 - struct dma_chan *chan; 2040 - rcu_read_lock(); 2041 - list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) 2042 - dma_async_memcpy_issue_pending(chan); 2043 - rcu_read_unlock(); 2025 + if (!cpus_empty(net_dma.channel_mask)) { 2026 + int chan_idx; 2027 + for_each_cpu_mask(chan_idx, net_dma.channel_mask) { 2028 + struct dma_chan *chan = net_dma.channels[chan_idx]; 2029 + if (chan) 2030 + dma_async_memcpy_issue_pending(chan); 2031 + } 2044 2032 } 2045 2033 #endif 2046 2034 return; ··· 3789 3775 * This is called when the number of channels allocated to the net_dma_client 3790 3776 * changes. The net_dma_client tries to have one DMA channel per CPU. 3791 3777 */ 3792 - static void net_dma_rebalance(void) 3778 + 3779 + static void net_dma_rebalance(struct net_dma *net_dma) 3793 3780 { 3794 - unsigned int cpu, i, n; 3781 + unsigned int cpu, i, n, chan_idx; 3795 3782 struct dma_chan *chan; 3796 3783 3797 - if (net_dma_count == 0) { 3784 + if (cpus_empty(net_dma->channel_mask)) { 3798 3785 for_each_online_cpu(cpu) 3799 3786 rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL); 3800 3787 return; ··· 3804 3789 i = 0; 3805 3790 cpu = first_cpu(cpu_online_map); 3806 3791 3807 - rcu_read_lock(); 3808 - list_for_each_entry(chan, &net_dma_client->channels, client_node) { 3809 - n = ((num_online_cpus() / net_dma_count) 3810 - + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); 3792 + for_each_cpu_mask(chan_idx, net_dma->channel_mask) { 3793 + chan = net_dma->channels[chan_idx]; 3794 + 3795 + n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask)) 3796 + + (i < (num_online_cpus() % 3797 + cpus_weight(net_dma->channel_mask)) ? 1 : 0)); 3811 3798 3812 3799 while(n) { 3813 3800 per_cpu(softnet_data, cpu).net_dma = chan; ··· 3818 3801 } 3819 3802 i++; 3820 3803 } 3821 - rcu_read_unlock(); 3822 3804 } 3823 3805 3824 3806 /** ··· 3826 3810 * @chan: DMA channel for the event 3827 3811 * @event: event type 3828 3812 */ 3829 - static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3830 - enum dma_event event) 3813 + static enum dma_state_client 3814 + netdev_dma_event(struct dma_client *client, struct dma_chan *chan, 3815 + enum dma_state state) 3831 3816 { 3832 - spin_lock(&net_dma_event_lock); 3833 - switch (event) { 3834 - case DMA_RESOURCE_ADDED: 3835 - net_dma_count++; 3836 - net_dma_rebalance(); 3817 + int i, found = 0, pos = -1; 3818 + struct net_dma *net_dma = 3819 + container_of(client, struct net_dma, client); 3820 + enum dma_state_client ack = DMA_DUP; /* default: take no action */ 3821 + 3822 + spin_lock(&net_dma->lock); 3823 + switch (state) { 3824 + case DMA_RESOURCE_AVAILABLE: 3825 + for (i = 0; i < NR_CPUS; i++) 3826 + if (net_dma->channels[i] == chan) { 3827 + found = 1; 3828 + break; 3829 + } else if (net_dma->channels[i] == NULL && pos < 0) 3830 + pos = i; 3831 + 3832 + if (!found && pos >= 0) { 3833 + ack = DMA_ACK; 3834 + net_dma->channels[pos] = chan; 3835 + cpu_set(pos, net_dma->channel_mask); 3836 + net_dma_rebalance(net_dma); 3837 + } 3837 3838 break; 3838 3839 case DMA_RESOURCE_REMOVED: 3839 - net_dma_count--; 3840 - net_dma_rebalance(); 3840 + for (i = 0; i < NR_CPUS; i++) 3841 + if (net_dma->channels[i] == chan) { 3842 + found = 1; 3843 + pos = i; 3844 + break; 3845 + } 3846 + 3847 + if (found) { 3848 + ack = DMA_ACK; 3849 + cpu_clear(pos, net_dma->channel_mask); 3850 + net_dma->channels[i] = NULL; 3851 + net_dma_rebalance(net_dma); 3852 + } 3841 3853 break; 3842 3854 default: 3843 3855 break; 3844 3856 } 3845 - spin_unlock(&net_dma_event_lock); 3857 + spin_unlock(&net_dma->lock); 3858 + 3859 + return ack; 3846 3860 } 3847 3861 3848 3862 /** ··· 3880 3834 */ 3881 3835 static int __init netdev_dma_register(void) 3882 3836 { 3883 - spin_lock_init(&net_dma_event_lock); 3884 - net_dma_client = dma_async_client_register(netdev_dma_event); 3885 - if (net_dma_client == NULL) 3886 - return -ENOMEM; 3887 - 3888 - dma_async_client_chan_request(net_dma_client, num_online_cpus()); 3837 + spin_lock_init(&net_dma.lock); 3838 + dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask); 3839 + dma_async_client_register(&net_dma.client); 3840 + dma_async_client_chan_request(&net_dma.client); 3889 3841 return 0; 3890 3842 } 3891 3843

+18 -8

net/ipv4/tcp.c

··· 1116 1116 long timeo; 1117 1117 struct task_struct *user_recv = NULL; 1118 1118 int copied_early = 0; 1119 + struct sk_buff *skb; 1119 1120 1120 1121 lock_sock(sk); 1121 1122 ··· 1143 1142 #ifdef CONFIG_NET_DMA 1144 1143 tp->ucopy.dma_chan = NULL; 1145 1144 preempt_disable(); 1146 - if ((len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && 1147 - !sysctl_tcp_low_latency && __get_cpu_var(softnet_data).net_dma) { 1148 - preempt_enable_no_resched(); 1149 - tp->ucopy.pinned_list = dma_pin_iovec_pages(msg->msg_iov, len); 1150 - } else 1151 - preempt_enable_no_resched(); 1145 + skb = skb_peek_tail(&sk->sk_receive_queue); 1146 + { 1147 + int available = 0; 1148 + 1149 + if (skb) 1150 + available = TCP_SKB_CB(skb)->seq + skb->len - (*seq); 1151 + if ((available < target) && 1152 + (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) && 1153 + !sysctl_tcp_low_latency && 1154 + __get_cpu_var(softnet_data).net_dma) { 1155 + preempt_enable_no_resched(); 1156 + tp->ucopy.pinned_list = 1157 + dma_pin_iovec_pages(msg->msg_iov, len); 1158 + } else { 1159 + preempt_enable_no_resched(); 1160 + } 1161 + } 1152 1162 #endif 1153 1163 1154 1164 do { 1155 - struct sk_buff *skb; 1156 1165 u32 offset; 1157 1166 1158 1167 /* Are we at urgent data? Stop if we have read anything or have SIGURG pending. */ ··· 1450 1439 1451 1440 #ifdef CONFIG_NET_DMA 1452 1441 if (tp->ucopy.dma_chan) { 1453 - struct sk_buff *skb; 1454 1442 dma_cookie_t done, used; 1455 1443 1456 1444 dma_async_memcpy_issue_pending(tp->ucopy.dma_chan);