Merge tag 'for-5.8/drivers-2020-06-01' of git://git.kernel.dk/linux-block

+1

MAINTAINERS

··· 14711 14711 W: http://www.ibm.com/developerworks/linux/linux390/ 14712 14712 F: block/partitions/ibm.c 14713 14713 F: drivers/s390/block/dasd* 14714 + F: include/linux/dasd_mod.h 14714 14715 14715 14716 S390 IOMMU (PCI) 14716 14717 M: Gerald Schaefer <gerald.schaefer@de.ibm.com>

+2 -2

arch/alpha/include/asm/floppy.h

··· 11 11 #define __ASM_ALPHA_FLOPPY_H 12 12 13 13 14 - #define fd_inb(port) inb_p(port) 15 - #define fd_outb(value,port) outb_p(value,port) 14 + #define fd_inb(base, reg) inb_p((base) + (reg)) 15 + #define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) 16 16 17 17 #define fd_enable_dma() enable_dma(FLOPPY_DMA) 18 18 #define fd_disable_dma() disable_dma(FLOPPY_DMA)

+4 -4

arch/arm/include/asm/floppy.h

··· 9 9 #ifndef __ASM_ARM_FLOPPY_H 10 10 #define __ASM_ARM_FLOPPY_H 11 11 12 - #define fd_outb(val,port) \ 12 + #define fd_outb(val, base, reg) \ 13 13 do { \ 14 14 int new_val = (val); \ 15 - if (((port) & 7) == FD_DOR) { \ 15 + if ((reg) == FD_DOR) { \ 16 16 if (new_val & 0xf0) \ 17 17 new_val = (new_val & 0x0c) | \ 18 18 floppy_selects[new_val & 3]; \ 19 19 else \ 20 20 new_val &= 0x0c; \ 21 21 } \ 22 - outb(new_val, (port)); \ 22 + outb(new_val, (base) + (reg)); \ 23 23 } while(0) 24 24 25 - #define fd_inb(port) inb((port)) 25 + #define fd_inb(base, reg) inb((base) + (reg)) 26 26 #define fd_request_irq() request_irq(IRQ_FLOPPYDISK,floppy_interrupt,\ 27 27 0,"floppy",NULL) 28 28 #define fd_free_irq() free_irq(IRQ_FLOPPYDISK,NULL)

+14 -13

arch/m68k/include/asm/floppy.h

··· 63 63 } 64 64 65 65 66 - static __inline__ unsigned char fd_inb(int port) 66 + static __inline__ unsigned char fd_inb(int base, int reg) 67 67 { 68 68 if(MACH_IS_Q40) 69 - return inb_p(port); 69 + return inb_p(base + reg); 70 70 else if(MACH_IS_SUN3X) 71 - return sun3x_82072_fd_inb(port); 71 + return sun3x_82072_fd_inb(base + reg); 72 72 return 0; 73 73 } 74 74 75 - static __inline__ void fd_outb(unsigned char value, int port) 75 + static __inline__ void fd_outb(unsigned char value, int base, int reg) 76 76 { 77 77 if(MACH_IS_Q40) 78 - outb_p(value, port); 78 + outb_p(value, base + reg); 79 79 else if(MACH_IS_SUN3X) 80 - sun3x_82072_fd_outb(value, port); 80 + sun3x_82072_fd_outb(value, base + reg); 81 81 } 82 82 83 83 ··· 211 211 st=1; 212 212 for(lcount=virtual_dma_count, lptr=virtual_dma_addr; 213 213 lcount; lcount--, lptr++) { 214 - st=inb(virtual_dma_port+4) & 0xa0 ; 215 - if(st != 0xa0) 214 + st = inb(virtual_dma_port + FD_STATUS); 215 + st &= STATUS_DMA | STATUS_READY; 216 + if (st != (STATUS_DMA | STATUS_READY)) 216 217 break; 217 218 if(virtual_dma_mode) 218 - outb_p(*lptr, virtual_dma_port+5); 219 + outb_p(*lptr, virtual_dma_port + FD_DATA); 219 220 else 220 - *lptr = inb_p(virtual_dma_port+5); 221 + *lptr = inb_p(virtual_dma_port + FD_DATA); 221 222 } 222 223 223 224 virtual_dma_count = lcount; 224 225 virtual_dma_addr = lptr; 225 - st = inb(virtual_dma_port+4); 226 + st = inb(virtual_dma_port + FD_STATUS); 226 227 } 227 228 228 229 #ifdef TRACE_FLPY_INT 229 230 calls++; 230 231 #endif 231 - if(st == 0x20) 232 + if (st == STATUS_DMA) 232 233 return IRQ_HANDLED; 233 - if(!(st & 0x20)) { 234 + if (!(st & STATUS_DMA)) { 234 235 virtual_dma_residue += virtual_dma_count; 235 236 virtual_dma_count=0; 236 237 #ifdef TRACE_FLPY_INT

+4 -4

arch/mips/include/asm/mach-generic/floppy.h

··· 26 26 /* 27 27 * How to access the FDC's registers. 28 28 */ 29 - static inline unsigned char fd_inb(unsigned int port) 29 + static inline unsigned char fd_inb(unsigned int base, unsigned int reg) 30 30 { 31 - return inb_p(port); 31 + return inb_p(base + reg); 32 32 } 33 33 34 - static inline void fd_outb(unsigned char value, unsigned int port) 34 + static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) 35 35 { 36 - outb_p(value, port); 36 + outb_p(value, base + reg); 37 37 } 38 38 39 39 /*

+4 -4

arch/mips/include/asm/mach-jazz/floppy.h

··· 17 17 #include <asm/jazzdma.h> 18 18 #include <asm/pgtable.h> 19 19 20 - static inline unsigned char fd_inb(unsigned int port) 20 + static inline unsigned char fd_inb(unsigned int base, unsigned int reg) 21 21 { 22 22 unsigned char c; 23 23 24 - c = *(volatile unsigned char *) port; 24 + c = *(volatile unsigned char *) (base + reg); 25 25 udelay(1); 26 26 27 27 return c; 28 28 } 29 29 30 - static inline void fd_outb(unsigned char value, unsigned int port) 30 + static inline void fd_outb(unsigned char value, unsigned int base, unsigned int reg) 31 31 { 32 - *(volatile unsigned char *) port = value; 32 + *(volatile unsigned char *) (base + reg) = value; 33 33 } 34 34 35 35 /*

+10 -9

arch/parisc/include/asm/floppy.h

··· 29 29 #define CSW fd_routine[can_use_virtual_dma & 1] 30 30 31 31 32 - #define fd_inb(port) readb(port) 33 - #define fd_outb(value, port) writeb(value, port) 32 + #define fd_inb(base, reg) readb((base) + (reg)) 33 + #define fd_outb(value, base, reg) writeb(value, (base) + (reg)) 34 34 35 35 #define fd_request_dma() CSW._request_dma(FLOPPY_DMA,"floppy") 36 36 #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) ··· 75 75 register char *lptr = virtual_dma_addr; 76 76 77 77 for (lcount = virtual_dma_count; lcount; lcount--) { 78 - st = fd_inb(virtual_dma_port+4) & 0xa0 ; 79 - if (st != 0xa0) 78 + st = fd_inb(virtual_dma_port, FD_STATUS); 79 + st &= STATUS_DMA | STATUS_READY; 80 + if (st != (STATUS_DMA | STATUS_READY)) 80 81 break; 81 82 if (virtual_dma_mode) { 82 - fd_outb(*lptr, virtual_dma_port+5); 83 + fd_outb(*lptr, virtual_dma_port, FD_DATA); 83 84 } else { 84 - *lptr = fd_inb(virtual_dma_port+5); 85 + *lptr = fd_inb(virtual_dma_port, FD_DATA); 85 86 } 86 87 lptr++; 87 88 } 88 89 virtual_dma_count = lcount; 89 90 virtual_dma_addr = lptr; 90 - st = fd_inb(virtual_dma_port+4); 91 + st = fd_inb(virtual_dma_port, FD_STATUS); 91 92 } 92 93 93 94 #ifdef TRACE_FLPY_INT 94 95 calls++; 95 96 #endif 96 - if (st == 0x20) 97 + if (st == STATUS_DMA) 97 98 return; 98 - if (!(st & 0x20)) { 99 + if (!(st & STATUS_DMA)) { 99 100 virtual_dma_residue += virtual_dma_count; 100 101 virtual_dma_count = 0; 101 102 #ifdef TRACE_FLPY_INT

+10 -9

arch/powerpc/include/asm/floppy.h

··· 13 13 14 14 #include <asm/machdep.h> 15 15 16 - #define fd_inb(port) inb_p(port) 17 - #define fd_outb(value,port) outb_p(value,port) 16 + #define fd_inb(base, reg) inb_p((base) + (reg)) 17 + #define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) 18 18 19 19 #define fd_enable_dma() enable_dma(FLOPPY_DMA) 20 20 #define fd_disable_dma() fd_ops->_disable_dma(FLOPPY_DMA) ··· 61 61 st = 1; 62 62 for (lcount=virtual_dma_count, lptr=virtual_dma_addr; 63 63 lcount; lcount--, lptr++) { 64 - st=inb(virtual_dma_port+4) & 0xa0 ; 65 - if (st != 0xa0) 64 + st = inb(virtual_dma_port + FD_STATUS); 65 + st &= STATUS_DMA | STATUS_READY; 66 + if (st != (STATUS_DMA | STATUS_READY)) 66 67 break; 67 68 if (virtual_dma_mode) 68 - outb_p(*lptr, virtual_dma_port+5); 69 + outb_p(*lptr, virtual_dma_port + FD_DATA); 69 70 else 70 - *lptr = inb_p(virtual_dma_port+5); 71 + *lptr = inb_p(virtual_dma_port + FD_DATA); 71 72 } 72 73 virtual_dma_count = lcount; 73 74 virtual_dma_addr = lptr; 74 - st = inb(virtual_dma_port+4); 75 + st = inb(virtual_dma_port + FD_STATUS); 75 76 76 - if (st == 0x20) 77 + if (st == STATUS_DMA) 77 78 return IRQ_HANDLED; 78 - if (!(st & 0x20)) { 79 + if (!(st & STATUS_DMA)) { 79 80 virtual_dma_residue += virtual_dma_count; 80 81 virtual_dma_count=0; 81 82 doing_vdma = 0;

+25 -25

arch/sparc/include/asm/floppy_32.h

··· 59 59 60 60 static struct sun_floppy_ops sun_fdops; 61 61 62 - #define fd_inb(port) sun_fdops.fd_inb(port) 63 - #define fd_outb(value,port) sun_fdops.fd_outb(value,port) 62 + #define fd_inb(base, reg) sun_fdops.fd_inb(reg) 63 + #define fd_outb(value, base, reg) sun_fdops.fd_outb(value, reg) 64 64 #define fd_enable_dma() sun_fd_enable_dma() 65 65 #define fd_disable_dma() sun_fd_disable_dma() 66 66 #define fd_request_dma() (0) /* nothing... */ ··· 114 114 static unsigned char sun_82072_fd_inb(int port) 115 115 { 116 116 udelay(5); 117 - switch(port & 7) { 117 + switch (port) { 118 118 default: 119 119 printk("floppy: Asked to read unknown port %d\n", port); 120 120 panic("floppy: Port bolixed."); 121 - case 4: /* FD_STATUS */ 121 + case FD_STATUS: 122 122 return sun_fdc->status_82072 & ~STATUS_DMA; 123 - case 5: /* FD_DATA */ 123 + case FD_DATA: 124 124 return sun_fdc->data_82072; 125 - case 7: /* FD_DIR */ 125 + case FD_DIR: 126 126 return sun_read_dir(); 127 127 } 128 128 panic("sun_82072_fd_inb: How did I get here?"); ··· 131 131 static void sun_82072_fd_outb(unsigned char value, int port) 132 132 { 133 133 udelay(5); 134 - switch(port & 7) { 134 + switch (port) { 135 135 default: 136 136 printk("floppy: Asked to write to unknown port %d\n", port); 137 137 panic("floppy: Port bolixed."); 138 - case 2: /* FD_DOR */ 138 + case FD_DOR: 139 139 sun_set_dor(value, 0); 140 140 break; 141 - case 5: /* FD_DATA */ 141 + case FD_DATA: 142 142 sun_fdc->data_82072 = value; 143 143 break; 144 - case 7: /* FD_DCR */ 144 + case FD_DCR: 145 145 sun_fdc->dcr_82072 = value; 146 146 break; 147 - case 4: /* FD_STATUS */ 147 + case FD_DSR: 148 148 sun_fdc->status_82072 = value; 149 149 break; 150 150 } ··· 154 154 static unsigned char sun_82077_fd_inb(int port) 155 155 { 156 156 udelay(5); 157 - switch(port & 7) { 157 + switch (port) { 158 158 default: 159 159 printk("floppy: Asked to read unknown port %d\n", port); 160 160 panic("floppy: Port bolixed."); 161 - case 0: /* FD_STATUS_0 */ 161 + case FD_SRA: 162 162 return sun_fdc->status1_82077; 163 - case 1: /* FD_STATUS_1 */ 163 + case FD_SRB: 164 164 return sun_fdc->status2_82077; 165 - case 2: /* FD_DOR */ 165 + case FD_DOR: 166 166 return sun_fdc->dor_82077; 167 - case 3: /* FD_TDR */ 167 + case FD_TDR: 168 168 return sun_fdc->tapectl_82077; 169 - case 4: /* FD_STATUS */ 169 + case FD_STATUS: 170 170 return sun_fdc->status_82077 & ~STATUS_DMA; 171 - case 5: /* FD_DATA */ 171 + case FD_DATA: 172 172 return sun_fdc->data_82077; 173 - case 7: /* FD_DIR */ 173 + case FD_DIR: 174 174 return sun_read_dir(); 175 175 } 176 176 panic("sun_82077_fd_inb: How did I get here?"); ··· 179 179 static void sun_82077_fd_outb(unsigned char value, int port) 180 180 { 181 181 udelay(5); 182 - switch(port & 7) { 182 + switch (port) { 183 183 default: 184 184 printk("floppy: Asked to write to unknown port %d\n", port); 185 185 panic("floppy: Port bolixed."); 186 - case 2: /* FD_DOR */ 186 + case FD_DOR: 187 187 sun_set_dor(value, 1); 188 188 break; 189 - case 5: /* FD_DATA */ 189 + case FD_DATA: 190 190 sun_fdc->data_82077 = value; 191 191 break; 192 - case 7: /* FD_DCR */ 192 + case FD_DCR: 193 193 sun_fdc->dcr_82077 = value; 194 194 break; 195 - case 4: /* FD_STATUS */ 195 + case FD_DSR: 196 196 sun_fdc->status_82077 = value; 197 197 break; 198 - case 3: /* FD_TDR */ 198 + case FD_TDR: 199 199 sun_fdc->tapectl_82077 = value; 200 200 break; 201 201 }

+32 -27

arch/sparc/include/asm/floppy_64.h

··· 47 47 static struct platform_device *floppy_op = NULL; 48 48 49 49 struct sun_floppy_ops { 50 - unsigned char (*fd_inb) (unsigned long port); 51 - void (*fd_outb) (unsigned char value, unsigned long port); 50 + unsigned char (*fd_inb) (unsigned long port, unsigned int reg); 51 + void (*fd_outb) (unsigned char value, unsigned long base, 52 + unsigned int reg); 52 53 void (*fd_enable_dma) (void); 53 54 void (*fd_disable_dma) (void); 54 55 void (*fd_set_dma_mode) (int); ··· 63 62 64 63 static struct sun_floppy_ops sun_fdops; 65 64 66 - #define fd_inb(port) sun_fdops.fd_inb(port) 67 - #define fd_outb(value,port) sun_fdops.fd_outb(value,port) 65 + #define fd_inb(base, reg) sun_fdops.fd_inb(base, reg) 66 + #define fd_outb(value, base, reg) sun_fdops.fd_outb(value, base, reg) 68 67 #define fd_enable_dma() sun_fdops.fd_enable_dma() 69 68 #define fd_disable_dma() sun_fdops.fd_disable_dma() 70 69 #define fd_request_dma() (0) /* nothing... */ ··· 98 97 /* No 64k boundary crossing problems on the Sparc. */ 99 98 #define CROSS_64KB(a,s) (0) 100 99 101 - static unsigned char sun_82077_fd_inb(unsigned long port) 100 + static unsigned char sun_82077_fd_inb(unsigned long base, unsigned int reg) 102 101 { 103 102 udelay(5); 104 - switch(port & 7) { 103 + switch (reg) { 105 104 default: 106 - printk("floppy: Asked to read unknown port %lx\n", port); 105 + printk("floppy: Asked to read unknown port %x\n", reg); 107 106 panic("floppy: Port bolixed."); 108 - case 4: /* FD_STATUS */ 107 + case FD_STATUS: 109 108 return sbus_readb(&sun_fdc->status_82077) & ~STATUS_DMA; 110 - case 5: /* FD_DATA */ 109 + case FD_DATA: 111 110 return sbus_readb(&sun_fdc->data_82077); 112 - case 7: /* FD_DIR */ 111 + case FD_DIR: 113 112 /* XXX: Is DCL on 0x80 in sun4m? */ 114 113 return sbus_readb(&sun_fdc->dir_82077); 115 114 } 116 115 panic("sun_82072_fd_inb: How did I get here?"); 117 116 } 118 117 119 - static void sun_82077_fd_outb(unsigned char value, unsigned long port) 118 + static void sun_82077_fd_outb(unsigned char value, unsigned long base, 119 + unsigned int reg) 120 120 { 121 121 udelay(5); 122 - switch(port & 7) { 122 + switch (reg) { 123 123 default: 124 - printk("floppy: Asked to write to unknown port %lx\n", port); 124 + printk("floppy: Asked to write to unknown port %x\n", reg); 125 125 panic("floppy: Port bolixed."); 126 - case 2: /* FD_DOR */ 126 + case FD_DOR: 127 127 /* Happily, the 82077 has a real DOR register. */ 128 128 sbus_writeb(value, &sun_fdc->dor_82077); 129 129 break; 130 - case 5: /* FD_DATA */ 130 + case FD_DATA: 131 131 sbus_writeb(value, &sun_fdc->data_82077); 132 132 break; 133 - case 7: /* FD_DCR */ 133 + case FD_DCR: 134 134 sbus_writeb(value, &sun_fdc->dcr_82077); 135 135 break; 136 - case 4: /* FD_STATUS */ 136 + case FD_DSR: 137 137 sbus_writeb(value, &sun_fdc->status_82077); 138 138 break; 139 139 } ··· 300 298 301 299 irqreturn_t floppy_interrupt(int irq, void *dev_id); 302 300 303 - static unsigned char sun_pci_fd_inb(unsigned long port) 301 + static unsigned char sun_pci_fd_inb(unsigned long base, unsigned int reg) 304 302 { 305 303 udelay(5); 306 - return inb(port); 304 + return inb(base + reg); 307 305 } 308 306 309 - static void sun_pci_fd_outb(unsigned char val, unsigned long port) 307 + static void sun_pci_fd_outb(unsigned char val, unsigned long base, 308 + unsigned int reg) 310 309 { 311 310 udelay(5); 312 - outb(val, port); 311 + outb(val, base + reg); 313 312 } 314 313 315 - static void sun_pci_fd_broken_outb(unsigned char val, unsigned long port) 314 + static void sun_pci_fd_broken_outb(unsigned char val, unsigned long base, 315 + unsigned int reg) 316 316 { 317 317 udelay(5); 318 318 /* ··· 324 320 * this does not hurt correct hardware like the AXmp. 325 321 * (Eddie, Sep 12 1998). 326 322 */ 327 - if (port == ((unsigned long)sun_fdc) + 2) { 323 + if (reg == FD_DOR) { 328 324 if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x20)) { 329 325 val |= 0x10; 330 326 } 331 327 } 332 - outb(val, port); 328 + outb(val, base + reg); 333 329 } 334 330 335 331 #ifdef PCI_FDC_SWAP_DRIVES 336 - static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long port) 332 + static void sun_pci_fd_lde_broken_outb(unsigned char val, unsigned long base, 333 + unsigned int reg) 337 334 { 338 335 udelay(5); 339 336 /* ··· 344 339 * this does not hurt correct hardware like the AXmp. 345 340 * (Eddie, Sep 12 1998). 346 341 */ 347 - if (port == ((unsigned long)sun_fdc) + 2) { 342 + if (reg == FD_DOR) { 348 343 if (((val & 0x03) == sun_pci_broken_drive) && (val & 0x10)) { 349 344 val &= ~(0x03); 350 345 val |= 0x21; 351 346 } 352 347 } 353 - outb(val, port); 348 + outb(val, base + reg); 354 349 } 355 350 #endif /* PCI_FDC_SWAP_DRIVES */ 356 351

+10 -9

arch/x86/include/asm/floppy.h

··· 31 31 #define CSW fd_routine[can_use_virtual_dma & 1] 32 32 33 33 34 - #define fd_inb(port) inb_p(port) 35 - #define fd_outb(value, port) outb_p(value, port) 34 + #define fd_inb(base, reg) inb_p((base) + (reg)) 35 + #define fd_outb(value, base, reg) outb_p(value, (base) + (reg)) 36 36 37 37 #define fd_request_dma() CSW._request_dma(FLOPPY_DMA, "floppy") 38 38 #define fd_free_dma() CSW._free_dma(FLOPPY_DMA) ··· 77 77 st = 1; 78 78 for (lcount = virtual_dma_count, lptr = virtual_dma_addr; 79 79 lcount; lcount--, lptr++) { 80 - st = inb(virtual_dma_port + 4) & 0xa0; 81 - if (st != 0xa0) 80 + st = inb(virtual_dma_port + FD_STATUS); 81 + st &= STATUS_DMA | STATUS_READY; 82 + if (st != (STATUS_DMA | STATUS_READY)) 82 83 break; 83 84 if (virtual_dma_mode) 84 - outb_p(*lptr, virtual_dma_port + 5); 85 + outb_p(*lptr, virtual_dma_port + FD_DATA); 85 86 else 86 - *lptr = inb_p(virtual_dma_port + 5); 87 + *lptr = inb_p(virtual_dma_port + FD_DATA); 87 88 } 88 89 virtual_dma_count = lcount; 89 90 virtual_dma_addr = lptr; 90 - st = inb(virtual_dma_port + 4); 91 + st = inb(virtual_dma_port + FD_STATUS); 91 92 } 92 93 93 94 #ifdef TRACE_FLPY_INT 94 95 calls++; 95 96 #endif 96 - if (st == 0x20) 97 + if (st == STATUS_DMA) 97 98 return IRQ_HANDLED; 98 - if (!(st & 0x20)) { 99 + if (!(st & STATUS_DMA)) { 99 100 virtual_dma_residue += virtual_dma_count; 100 101 virtual_dma_count = 0; 101 102 #ifdef TRACE_FLPY_INT

+18 -6

block/partitions/ibm.c

··· 13 13 #include <asm/ebcdic.h> 14 14 #include <linux/uaccess.h> 15 15 #include <asm/vtoc.h> 16 + #include <linux/module.h> 17 + #include <linux/dasd_mod.h> 16 18 17 19 #include "check.h" 18 - 19 20 20 21 union label_t { 21 22 struct vtoc_volume_label_cdl vol; ··· 289 288 */ 290 289 int ibm_partition(struct parsed_partitions *state) 291 290 { 291 + int (*fn)(struct gendisk *disk, dasd_information2_t *info); 292 292 struct block_device *bdev = state->bdev; 293 + struct gendisk *disk = bdev->bd_disk; 293 294 int blocksize, res; 294 295 loff_t i_size, offset, size; 295 296 dasd_information2_t *info; ··· 302 299 union label_t *label; 303 300 304 301 res = 0; 302 + if (!disk->fops->getgeo) 303 + goto out_exit; 304 + fn = symbol_get(dasd_biodasdinfo); 305 + if (!fn) 306 + goto out_exit; 305 307 blocksize = bdev_logical_block_size(bdev); 306 308 if (blocksize <= 0) 307 - goto out_exit; 309 + goto out_symbol; 308 310 i_size = i_size_read(bdev->bd_inode); 309 311 if (i_size == 0) 310 - goto out_exit; 312 + goto out_symbol; 311 313 info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); 312 314 if (info == NULL) 313 - goto out_exit; 315 + goto out_symbol; 314 316 geo = kmalloc(sizeof(struct hd_geometry), GFP_KERNEL); 315 317 if (geo == NULL) 316 318 goto out_nogeo; 317 319 label = kmalloc(sizeof(union label_t), GFP_KERNEL); 318 320 if (label == NULL) 319 321 goto out_nolab; 320 - if (ioctl_by_bdev(bdev, HDIO_GETGEO, (unsigned long)geo) != 0) 322 + /* set start if not filled by getgeo function e.g. virtblk */ 323 + geo->start = get_start_sect(bdev); 324 + if (disk->fops->getgeo(bdev, geo)) 321 325 goto out_freeall; 322 - if (ioctl_by_bdev(bdev, BIODASDINFO2, (unsigned long)info) != 0) { 326 + if (fn(disk, info)) { 323 327 kfree(info); 324 328 info = NULL; 325 329 } ··· 369 359 kfree(geo); 370 360 out_nogeo: 371 361 kfree(info); 362 + out_symbol: 363 + symbol_put(dasd_biodasdinfo); 372 364 out_exit: 373 365 return res; 374 366 }

+239 -227

drivers/block/floppy.c

··· 337 337 /* 338 338 * globals used by 'result()' 339 339 */ 340 - #define MAX_REPLIES 16 341 - static unsigned char reply_buffer[MAX_REPLIES]; 340 + static unsigned char reply_buffer[FD_RAW_REPLY_SIZE]; 342 341 static int inr; /* size of reply buffer, when called from interrupt */ 343 342 #define ST0 0 344 343 #define ST1 1 ··· 594 595 595 596 static inline unsigned char fdc_inb(int fdc, int reg) 596 597 { 597 - return fd_inb(fdc_state[fdc].address + reg); 598 + return fd_inb(fdc_state[fdc].address, reg); 598 599 } 599 600 600 601 static inline void fdc_outb(unsigned char value, int fdc, int reg) 601 602 { 602 - fd_outb(value, fdc_state[fdc].address + reg); 603 + fd_outb(value, fdc_state[fdc].address, reg); 603 604 } 604 605 605 606 static inline bool drive_no_geom(int drive) ··· 667 668 668 669 static int output_log_pos; 669 670 670 - #define current_reqD -1 671 671 #define MAXTIMEOUT -2 672 672 673 673 static void __reschedule_timeout(int drive, const char *message) 674 674 { 675 675 unsigned long delay; 676 - 677 - if (drive == current_reqD) 678 - drive = current_drive; 679 676 680 677 if (drive < 0 || drive >= N_DRIVE) { 681 678 delay = 20UL * HZ; ··· 822 827 return olddor; 823 828 } 824 829 825 - static void twaddle(void) 830 + static void twaddle(int fdc, int drive) 826 831 { 827 - if (drive_params[current_drive].select_delay) 832 + if (drive_params[drive].select_delay) 828 833 return; 829 - fdc_outb(fdc_state[current_fdc].dor & ~(0x10 << UNIT(current_drive)), 830 - current_fdc, FD_DOR); 831 - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); 832 - drive_state[current_drive].select_date = jiffies; 834 + fdc_outb(fdc_state[fdc].dor & ~(0x10 << UNIT(drive)), 835 + fdc, FD_DOR); 836 + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); 837 + drive_state[drive].select_date = jiffies; 833 838 } 834 839 835 840 /* 836 - * Reset all driver information about the current fdc. 841 + * Reset all driver information about the specified fdc. 837 842 * This is needed after a reset, and after a raw command. 838 843 */ 839 - static void reset_fdc_info(int mode) 844 + static void reset_fdc_info(int fdc, int mode) 840 845 { 841 846 int drive; 842 847 843 - fdc_state[current_fdc].spec1 = fdc_state[current_fdc].spec2 = -1; 844 - fdc_state[current_fdc].need_configure = 1; 845 - fdc_state[current_fdc].perp_mode = 1; 846 - fdc_state[current_fdc].rawcmd = 0; 848 + fdc_state[fdc].spec1 = fdc_state[fdc].spec2 = -1; 849 + fdc_state[fdc].need_configure = 1; 850 + fdc_state[fdc].perp_mode = 1; 851 + fdc_state[fdc].rawcmd = 0; 847 852 for (drive = 0; drive < N_DRIVE; drive++) 848 - if (FDC(drive) == current_fdc && 853 + if (FDC(drive) == fdc && 849 854 (mode || drive_state[drive].track != NEED_1_RECAL)) 850 855 drive_state[drive].track = NEED_2_RECAL; 851 856 } 852 857 853 - /* selects the fdc and drive, and enables the fdc's input/dma. */ 858 + /* 859 + * selects the fdc and drive, and enables the fdc's input/dma. 860 + * Both current_drive and current_fdc are changed to match the new drive. 861 + */ 854 862 static void set_fdc(int drive) 855 863 { 856 - unsigned int new_fdc = current_fdc; 864 + unsigned int fdc; 857 865 858 - if (drive >= 0 && drive < N_DRIVE) { 859 - new_fdc = FDC(drive); 860 - current_drive = drive; 866 + if (drive < 0 || drive >= N_DRIVE) { 867 + pr_info("bad drive value %d\n", drive); 868 + return; 861 869 } 862 - if (new_fdc >= N_FDC) { 870 + 871 + fdc = FDC(drive); 872 + if (fdc >= N_FDC) { 863 873 pr_info("bad fdc value\n"); 864 874 return; 865 875 } 866 - current_fdc = new_fdc; 867 - set_dor(current_fdc, ~0, 8); 876 + 877 + set_dor(fdc, ~0, 8); 868 878 #if N_FDC > 1 869 - set_dor(1 - current_fdc, ~8, 0); 879 + set_dor(1 - fdc, ~8, 0); 870 880 #endif 871 - if (fdc_state[current_fdc].rawcmd == 2) 872 - reset_fdc_info(1); 873 - if (fdc_inb(current_fdc, FD_STATUS) != STATUS_READY) 874 - fdc_state[current_fdc].reset = 1; 881 + if (fdc_state[fdc].rawcmd == 2) 882 + reset_fdc_info(fdc, 1); 883 + if (fdc_inb(fdc, FD_STATUS) != STATUS_READY) 884 + fdc_state[fdc].reset = 1; 885 + 886 + current_drive = drive; 887 + current_fdc = fdc; 875 888 } 876 889 877 - /* locks the driver */ 890 + /* 891 + * locks the driver. 892 + * Both current_drive and current_fdc are changed to match the new drive. 893 + */ 878 894 static int lock_fdc(int drive) 879 895 { 880 896 if (WARN(atomic_read(&usage_count) == 0, ··· 1068 1062 unsigned long f; 1069 1063 1070 1064 if (raw_cmd->length == 0) { 1071 - int i; 1072 - 1073 - pr_info("zero dma transfer size:"); 1074 - for (i = 0; i < raw_cmd->cmd_count; i++) 1075 - pr_cont("%x,", raw_cmd->cmd[i]); 1076 - pr_cont("\n"); 1065 + print_hex_dump(KERN_INFO, "zero dma transfer size: ", 1066 + DUMP_PREFIX_NONE, 16, 1, 1067 + raw_cmd->fullcmd, raw_cmd->cmd_count, false); 1077 1068 cont->done(0); 1078 1069 fdc_state[current_fdc].reset = 1; 1079 1070 return; ··· 1107 1104 #endif 1108 1105 } 1109 1106 1110 - static void show_floppy(void); 1107 + static void show_floppy(int fdc); 1111 1108 1112 1109 /* waits until the fdc becomes ready */ 1113 - static int wait_til_ready(void) 1110 + static int wait_til_ready(int fdc) 1114 1111 { 1115 1112 int status; 1116 1113 int counter; 1117 1114 1118 - if (fdc_state[current_fdc].reset) 1115 + if (fdc_state[fdc].reset) 1119 1116 return -1; 1120 1117 for (counter = 0; counter < 10000; counter++) { 1121 - status = fdc_inb(current_fdc, FD_STATUS); 1118 + status = fdc_inb(fdc, FD_STATUS); 1122 1119 if (status & STATUS_READY) 1123 1120 return status; 1124 1121 } 1125 1122 if (initialized) { 1126 - DPRINT("Getstatus times out (%x) on fdc %d\n", status, current_fdc); 1127 - show_floppy(); 1123 + DPRINT("Getstatus times out (%x) on fdc %d\n", status, fdc); 1124 + show_floppy(fdc); 1128 1125 } 1129 - fdc_state[current_fdc].reset = 1; 1126 + fdc_state[fdc].reset = 1; 1130 1127 return -1; 1131 1128 } 1132 1129 1133 1130 /* sends a command byte to the fdc */ 1134 - static int output_byte(char byte) 1131 + static int output_byte(int fdc, char byte) 1135 1132 { 1136 - int status = wait_til_ready(); 1133 + int status = wait_til_ready(fdc); 1137 1134 1138 1135 if (status < 0) 1139 1136 return -1; 1140 1137 1141 1138 if (is_ready_state(status)) { 1142 - fdc_outb(byte, current_fdc, FD_DATA); 1139 + fdc_outb(byte, fdc, FD_DATA); 1143 1140 output_log[output_log_pos].data = byte; 1144 1141 output_log[output_log_pos].status = status; 1145 1142 output_log[output_log_pos].jiffies = jiffies; 1146 1143 output_log_pos = (output_log_pos + 1) % OLOGSIZE; 1147 1144 return 0; 1148 1145 } 1149 - fdc_state[current_fdc].reset = 1; 1146 + fdc_state[fdc].reset = 1; 1150 1147 if (initialized) { 1151 1148 DPRINT("Unable to send byte %x to FDC. Fdc=%x Status=%x\n", 1152 - byte, current_fdc, status); 1153 - show_floppy(); 1149 + byte, fdc, status); 1150 + show_floppy(fdc); 1154 1151 } 1155 1152 return -1; 1156 1153 } 1157 1154 1158 1155 /* gets the response from the fdc */ 1159 - static int result(void) 1156 + static int result(int fdc) 1160 1157 { 1161 1158 int i; 1162 1159 int status = 0; 1163 1160 1164 - for (i = 0; i < MAX_REPLIES; i++) { 1165 - status = wait_til_ready(); 1161 + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) { 1162 + status = wait_til_ready(fdc); 1166 1163 if (status < 0) 1167 1164 break; 1168 1165 status &= STATUS_DIR | STATUS_READY | STATUS_BUSY | STATUS_DMA; ··· 1172 1169 return i; 1173 1170 } 1174 1171 if (status == (STATUS_DIR | STATUS_READY | STATUS_BUSY)) 1175 - reply_buffer[i] = fdc_inb(current_fdc, FD_DATA); 1172 + reply_buffer[i] = fdc_inb(fdc, FD_DATA); 1176 1173 else 1177 1174 break; 1178 1175 } 1179 1176 if (initialized) { 1180 1177 DPRINT("get result error. Fdc=%d Last status=%x Read bytes=%d\n", 1181 - current_fdc, status, i); 1182 - show_floppy(); 1178 + fdc, status, i); 1179 + show_floppy(fdc); 1183 1180 } 1184 - fdc_state[current_fdc].reset = 1; 1181 + fdc_state[fdc].reset = 1; 1185 1182 return -1; 1186 1183 } 1187 1184 1188 1185 #define MORE_OUTPUT -2 1189 1186 /* does the fdc need more output? */ 1190 - static int need_more_output(void) 1187 + static int need_more_output(int fdc) 1191 1188 { 1192 - int status = wait_til_ready(); 1189 + int status = wait_til_ready(fdc); 1193 1190 1194 1191 if (status < 0) 1195 1192 return -1; ··· 1197 1194 if (is_ready_state(status)) 1198 1195 return MORE_OUTPUT; 1199 1196 1200 - return result(); 1197 + return result(fdc); 1201 1198 } 1202 1199 1203 1200 /* Set perpendicular mode as required, based on data rate, if supported. 1204 1201 * 82077 Now tested. 1Mbps data rate only possible with 82077-1. 1205 1202 */ 1206 - static void perpendicular_mode(void) 1203 + static void perpendicular_mode(int fdc) 1207 1204 { 1208 1205 unsigned char perp_mode; 1209 1206 ··· 1218 1215 default: 1219 1216 DPRINT("Invalid data rate for perpendicular mode!\n"); 1220 1217 cont->done(0); 1221 - fdc_state[current_fdc].reset = 1; 1218 + fdc_state[fdc].reset = 1; 1222 1219 /* 1223 1220 * convenient way to return to 1224 1221 * redo without too much hassle ··· 1229 1226 } else 1230 1227 perp_mode = 0; 1231 1228 1232 - if (fdc_state[current_fdc].perp_mode == perp_mode) 1229 + if (fdc_state[fdc].perp_mode == perp_mode) 1233 1230 return; 1234 - if (fdc_state[current_fdc].version >= FDC_82077_ORIG) { 1235 - output_byte(FD_PERPENDICULAR); 1236 - output_byte(perp_mode); 1237 - fdc_state[current_fdc].perp_mode = perp_mode; 1231 + if (fdc_state[fdc].version >= FDC_82077_ORIG) { 1232 + output_byte(fdc, FD_PERPENDICULAR); 1233 + output_byte(fdc, perp_mode); 1234 + fdc_state[fdc].perp_mode = perp_mode; 1238 1235 } else if (perp_mode) { 1239 1236 DPRINT("perpendicular mode not supported by this FDC.\n"); 1240 1237 } ··· 1243 1240 static int fifo_depth = 0xa; 1244 1241 static int no_fifo; 1245 1242 1246 - static int fdc_configure(void) 1243 + static int fdc_configure(int fdc) 1247 1244 { 1248 1245 /* Turn on FIFO */ 1249 - output_byte(FD_CONFIGURE); 1250 - if (need_more_output() != MORE_OUTPUT) 1246 + output_byte(fdc, FD_CONFIGURE); 1247 + if (need_more_output(fdc) != MORE_OUTPUT) 1251 1248 return 0; 1252 - output_byte(0); 1253 - output_byte(0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); 1254 - output_byte(0); /* pre-compensation from track 1255 - 0 upwards */ 1249 + output_byte(fdc, 0); 1250 + output_byte(fdc, 0x10 | (no_fifo & 0x20) | (fifo_depth & 0xf)); 1251 + output_byte(fdc, 0); /* pre-compensation from track 0 upwards */ 1256 1252 return 1; 1257 1253 } 1258 1254 ··· 1276 1274 * 1277 1275 * These values are rounded up to the next highest available delay time. 1278 1276 */ 1279 - static void fdc_specify(void) 1277 + static void fdc_specify(int fdc, int drive) 1280 1278 { 1281 1279 unsigned char spec1; 1282 1280 unsigned char spec2; ··· 1288 1286 int hlt_max_code = 0x7f; 1289 1287 int hut_max_code = 0xf; 1290 1288 1291 - if (fdc_state[current_fdc].need_configure && 1292 - fdc_state[current_fdc].version >= FDC_82072A) { 1293 - fdc_configure(); 1294 - fdc_state[current_fdc].need_configure = 0; 1289 + if (fdc_state[fdc].need_configure && 1290 + fdc_state[fdc].version >= FDC_82072A) { 1291 + fdc_configure(fdc); 1292 + fdc_state[fdc].need_configure = 0; 1295 1293 } 1296 1294 1297 1295 switch (raw_cmd->rate & 0x03) { ··· 1300 1298 break; 1301 1299 case 1: 1302 1300 dtr = 300; 1303 - if (fdc_state[current_fdc].version >= FDC_82078) { 1301 + if (fdc_state[fdc].version >= FDC_82078) { 1304 1302 /* chose the default rate table, not the one 1305 1303 * where 1 = 2 Mbps */ 1306 - output_byte(FD_DRIVESPEC); 1307 - if (need_more_output() == MORE_OUTPUT) { 1308 - output_byte(UNIT(current_drive)); 1309 - output_byte(0xc0); 1304 + output_byte(fdc, FD_DRIVESPEC); 1305 + if (need_more_output(fdc) == MORE_OUTPUT) { 1306 + output_byte(fdc, UNIT(drive)); 1307 + output_byte(fdc, 0xc0); 1310 1308 } 1311 1309 } 1312 1310 break; ··· 1315 1313 break; 1316 1314 } 1317 1315 1318 - if (fdc_state[current_fdc].version >= FDC_82072) { 1316 + if (fdc_state[fdc].version >= FDC_82072) { 1319 1317 scale_dtr = dtr; 1320 1318 hlt_max_code = 0x00; /* 0==256msec*dtr0/dtr (not linear!) */ 1321 1319 hut_max_code = 0x0; /* 0==256msec*dtr0/dtr (not linear!) */ 1322 1320 } 1323 1321 1324 1322 /* Convert step rate from microseconds to milliseconds and 4 bits */ 1325 - srt = 16 - DIV_ROUND_UP(drive_params[current_drive].srt * scale_dtr / 1000, 1323 + srt = 16 - DIV_ROUND_UP(drive_params[drive].srt * scale_dtr / 1000, 1326 1324 NOMINAL_DTR); 1327 1325 if (slow_floppy) 1328 1326 srt = srt / 4; ··· 1330 1328 SUPBOUND(srt, 0xf); 1331 1329 INFBOUND(srt, 0); 1332 1330 1333 - hlt = DIV_ROUND_UP(drive_params[current_drive].hlt * scale_dtr / 2, 1331 + hlt = DIV_ROUND_UP(drive_params[drive].hlt * scale_dtr / 2, 1334 1332 NOMINAL_DTR); 1335 1333 if (hlt < 0x01) 1336 1334 hlt = 0x01; 1337 1335 else if (hlt > 0x7f) 1338 1336 hlt = hlt_max_code; 1339 1337 1340 - hut = DIV_ROUND_UP(drive_params[current_drive].hut * scale_dtr / 16, 1338 + hut = DIV_ROUND_UP(drive_params[drive].hut * scale_dtr / 16, 1341 1339 NOMINAL_DTR); 1342 1340 if (hut < 0x1) 1343 1341 hut = 0x1; ··· 1348 1346 spec2 = (hlt << 1) | (use_virtual_dma & 1); 1349 1347 1350 1348 /* If these parameters did not change, just return with success */ 1351 - if (fdc_state[current_fdc].spec1 != spec1 || 1352 - fdc_state[current_fdc].spec2 != spec2) { 1349 + if (fdc_state[fdc].spec1 != spec1 || 1350 + fdc_state[fdc].spec2 != spec2) { 1353 1351 /* Go ahead and set spec1 and spec2 */ 1354 - output_byte(FD_SPECIFY); 1355 - output_byte(fdc_state[current_fdc].spec1 = spec1); 1356 - output_byte(fdc_state[current_fdc].spec2 = spec2); 1352 + output_byte(fdc, FD_SPECIFY); 1353 + output_byte(fdc, fdc_state[fdc].spec1 = spec1); 1354 + output_byte(fdc, fdc_state[fdc].spec2 = spec2); 1357 1355 } 1358 1356 } /* fdc_specify */ 1359 1357 ··· 1515 1513 1516 1514 r = 0; 1517 1515 for (i = 0; i < raw_cmd->cmd_count; i++) 1518 - r |= output_byte(raw_cmd->cmd[i]); 1516 + r |= output_byte(current_fdc, raw_cmd->fullcmd[i]); 1519 1517 1520 1518 debugt(__func__, "rw_command"); 1521 1519 ··· 1526 1524 } 1527 1525 1528 1526 if (!(flags & FD_RAW_INTR)) { 1529 - inr = result(); 1527 + inr = result(current_fdc); 1530 1528 cont->interrupt(); 1531 1529 } else if (flags & FD_RAW_NEED_DISK) 1532 1530 fd_watchdog(); ··· 1564 1562 floppy_ready(); 1565 1563 } 1566 1564 1567 - static void check_wp(void) 1565 + static void check_wp(int fdc, int drive) 1568 1566 { 1569 - if (test_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags)) { 1567 + if (test_bit(FD_VERIFY_BIT, &drive_state[drive].flags)) { 1570 1568 /* check write protection */ 1571 - output_byte(FD_GETSTATUS); 1572 - output_byte(UNIT(current_drive)); 1573 - if (result() != 1) { 1574 - fdc_state[current_fdc].reset = 1; 1569 + output_byte(fdc, FD_GETSTATUS); 1570 + output_byte(fdc, UNIT(drive)); 1571 + if (result(fdc) != 1) { 1572 + fdc_state[fdc].reset = 1; 1575 1573 return; 1576 1574 } 1577 - clear_bit(FD_VERIFY_BIT, &drive_state[current_drive].flags); 1575 + clear_bit(FD_VERIFY_BIT, &drive_state[drive].flags); 1578 1576 clear_bit(FD_NEED_TWADDLE_BIT, 1579 - &drive_state[current_drive].flags); 1580 - debug_dcl(drive_params[current_drive].flags, 1577 + &drive_state[drive].flags); 1578 + debug_dcl(drive_params[drive].flags, 1581 1579 "checking whether disk is write protected\n"); 1582 - debug_dcl(drive_params[current_drive].flags, "wp=%x\n", 1580 + debug_dcl(drive_params[drive].flags, "wp=%x\n", 1583 1581 reply_buffer[ST3] & 0x40); 1584 1582 if (!(reply_buffer[ST3] & 0x40)) 1585 1583 set_bit(FD_DISK_WRITABLE_BIT, 1586 - &drive_state[current_drive].flags); 1584 + &drive_state[drive].flags); 1587 1585 else 1588 1586 clear_bit(FD_DISK_WRITABLE_BIT, 1589 - &drive_state[current_drive].flags); 1587 + &drive_state[drive].flags); 1590 1588 } 1591 1589 } 1592 1590 ··· 1630 1628 track = 1; 1631 1629 } 1632 1630 } else { 1633 - check_wp(); 1631 + check_wp(current_fdc, current_drive); 1634 1632 if (raw_cmd->track != drive_state[current_drive].track && 1635 1633 (raw_cmd->flags & FD_RAW_NEED_SEEK)) 1636 1634 track = raw_cmd->track; ··· 1641 1639 } 1642 1640 1643 1641 do_floppy = seek_interrupt; 1644 - output_byte(FD_SEEK); 1645 - output_byte(UNIT(current_drive)); 1646 - if (output_byte(track) < 0) { 1642 + output_byte(current_fdc, FD_SEEK); 1643 + output_byte(current_fdc, UNIT(current_drive)); 1644 + if (output_byte(current_fdc, track) < 0) { 1647 1645 reset_fdc(); 1648 1646 return; 1649 1647 } ··· 1744 1742 1745 1743 do_print = !handler && print_unex && initialized; 1746 1744 1747 - inr = result(); 1745 + inr = result(current_fdc); 1748 1746 if (do_print) 1749 1747 print_result("unexpected interrupt", inr); 1750 1748 if (inr == 0) { 1751 1749 int max_sensei = 4; 1752 1750 do { 1753 - output_byte(FD_SENSEI); 1754 - inr = result(); 1751 + output_byte(current_fdc, FD_SENSEI); 1752 + inr = result(current_fdc); 1755 1753 if (do_print) 1756 1754 print_result("sensei", inr); 1757 1755 max_sensei--; ··· 1773 1771 { 1774 1772 debugt(__func__, ""); 1775 1773 do_floppy = recal_interrupt; 1776 - output_byte(FD_RECALIBRATE); 1777 - if (output_byte(UNIT(current_drive)) < 0) 1774 + output_byte(current_fdc, FD_RECALIBRATE); 1775 + if (output_byte(current_fdc, UNIT(current_drive)) < 0) 1778 1776 reset_fdc(); 1779 1777 } 1780 1778 ··· 1784 1782 static void reset_interrupt(void) 1785 1783 { 1786 1784 debugt(__func__, ""); 1787 - result(); /* get the status ready for set_fdc */ 1785 + result(current_fdc); /* get the status ready for set_fdc */ 1788 1786 if (fdc_state[current_fdc].reset) { 1789 1787 pr_info("reset set in interrupt, calling %ps\n", cont->error); 1790 1788 cont->error(); /* a reset just after a reset. BAD! */ ··· 1794 1792 1795 1793 /* 1796 1794 * reset is done by pulling bit 2 of DOR low for a while (old FDCs), 1797 - * or by setting the self clearing bit 7 of STATUS (newer FDCs) 1795 + * or by setting the self clearing bit 7 of STATUS (newer FDCs). 1796 + * This WILL trigger an interrupt, causing the handlers in the current 1797 + * cont's ->redo() to be called via reset_interrupt(). 1798 1798 */ 1799 1799 static void reset_fdc(void) 1800 1800 { ··· 1804 1800 1805 1801 do_floppy = reset_interrupt; 1806 1802 fdc_state[current_fdc].reset = 0; 1807 - reset_fdc_info(0); 1803 + reset_fdc_info(current_fdc, 0); 1808 1804 1809 1805 /* Pseudo-DMA may intercept 'reset finished' interrupt. */ 1810 1806 /* Irrelevant for systems with true DMA (i386). */ ··· 1823 1819 } 1824 1820 } 1825 1821 1826 - static void show_floppy(void) 1822 + static void show_floppy(int fdc) 1827 1823 { 1828 1824 int i; 1829 1825 ··· 1846 1842 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, 1847 1843 reply_buffer, resultsize, true); 1848 1844 1849 - pr_info("status=%x\n", fdc_inb(current_fdc, FD_STATUS)); 1845 + pr_info("status=%x\n", fdc_inb(fdc, FD_STATUS)); 1850 1846 pr_info("fdc_busy=%lu\n", fdc_busy); 1851 1847 if (do_floppy) 1852 1848 pr_info("do_floppy=%ps\n", do_floppy); ··· 1872 1868 unsigned long flags; 1873 1869 1874 1870 if (initialized) 1875 - show_floppy(); 1871 + show_floppy(current_fdc); 1876 1872 cancel_activity(); 1877 1873 1878 1874 flags = claim_dma_lock(); ··· 1938 1934 "calling disk change from floppy_ready\n"); 1939 1935 if (!(raw_cmd->flags & FD_RAW_NO_MOTOR) && 1940 1936 disk_change(current_drive) && !drive_params[current_drive].select_delay) 1941 - twaddle(); /* this clears the dcl on certain 1937 + twaddle(current_fdc, current_drive); /* this clears the dcl on certain 1942 1938 * drive/controller combinations */ 1943 1939 1944 1940 #ifdef fd_chose_dma_mode ··· 1950 1946 #endif 1951 1947 1952 1948 if (raw_cmd->flags & (FD_RAW_NEED_SEEK | FD_RAW_NEED_DISK)) { 1953 - perpendicular_mode(); 1954 - fdc_specify(); /* must be done here because of hut, hlt ... */ 1949 + perpendicular_mode(current_fdc); 1950 + fdc_specify(current_fdc, current_drive); /* must be done here because of hut, hlt ... */ 1955 1951 seek_floppy(); 1956 1952 } else { 1957 1953 if ((raw_cmd->flags & FD_RAW_READ) || 1958 1954 (raw_cmd->flags & FD_RAW_WRITE)) 1959 - fdc_specify(); 1955 + fdc_specify(current_fdc, current_drive); 1960 1956 setup_rw_floppy(); 1961 1957 } 1962 1958 } 1963 1959 1964 1960 static void floppy_start(void) 1965 1961 { 1966 - reschedule_timeout(current_reqD, "floppy start"); 1962 + reschedule_timeout(current_drive, "floppy start"); 1967 1963 1968 1964 scandrives(); 1969 1965 debug_dcl(drive_params[current_drive].flags, ··· 2008 2004 .done = (done_f)empty 2009 2005 }; 2010 2006 2007 + /* schedules handler, waiting for completion. May be interrupted, will then 2008 + * return -EINTR, in which case the driver will automatically be unlocked. 2009 + */ 2011 2010 static int wait_til_done(void (*handler)(void), bool interruptible) 2012 2011 { 2013 2012 int ret; ··· 2066 2059 * ========================== 2067 2060 */ 2068 2061 2069 - static int next_valid_format(void) 2062 + static int next_valid_format(int drive) 2070 2063 { 2071 2064 int probed_format; 2072 2065 2073 - probed_format = drive_state[current_drive].probed_format; 2066 + probed_format = drive_state[drive].probed_format; 2074 2067 while (1) { 2075 - if (probed_format >= 8 || !drive_params[current_drive].autodetect[probed_format]) { 2076 - drive_state[current_drive].probed_format = 0; 2068 + if (probed_format >= FD_AUTODETECT_SIZE || 2069 + !drive_params[drive].autodetect[probed_format]) { 2070 + drive_state[drive].probed_format = 0; 2077 2071 return 1; 2078 2072 } 2079 - if (floppy_type[drive_params[current_drive].autodetect[probed_format]].sect) { 2080 - drive_state[current_drive].probed_format = probed_format; 2073 + if (floppy_type[drive_params[drive].autodetect[probed_format]].sect) { 2074 + drive_state[drive].probed_format = probed_format; 2081 2075 return 0; 2082 2076 } 2083 2077 probed_format++; ··· 2091 2083 2092 2084 if (probing) { 2093 2085 drive_state[current_drive].probed_format++; 2094 - if (!next_valid_format()) 2086 + if (!next_valid_format(current_drive)) 2095 2087 return; 2096 2088 } 2097 2089 err_count = ++(*errors); ··· 2851 2843 return current_req != NULL; 2852 2844 } 2853 2845 2846 + /* Starts or continues processing request. Will automatically unlock the 2847 + * driver at end of request. 2848 + */ 2854 2849 static void redo_fd_request(void) 2855 2850 { 2856 2851 int drive; ··· 2878 2867 } 2879 2868 drive = (long)current_req->rq_disk->private_data; 2880 2869 set_fdc(drive); 2881 - reschedule_timeout(current_reqD, "redo fd request"); 2870 + reschedule_timeout(current_drive, "redo fd request"); 2882 2871 2883 2872 set_floppy(drive); 2884 2873 raw_cmd = &default_raw_cmd; ··· 2896 2885 if (!_floppy) { /* Autodetection */ 2897 2886 if (!probing) { 2898 2887 drive_state[current_drive].probed_format = 0; 2899 - if (next_valid_format()) { 2888 + if (next_valid_format(current_drive)) { 2900 2889 DPRINT("no autodetectable formats\n"); 2901 2890 _floppy = NULL; 2902 2891 request_done(0); ··· 2915 2904 } 2916 2905 2917 2906 if (test_bit(FD_NEED_TWADDLE_BIT, &drive_state[current_drive].flags)) 2918 - twaddle(); 2907 + twaddle(current_fdc, current_drive); 2919 2908 schedule_bh(floppy_start); 2920 2909 debugt(__func__, "queue fd request"); 2921 2910 return; ··· 2928 2917 .done = request_done 2929 2918 }; 2930 2919 2920 + /* schedule the request and automatically unlock the driver on completion */ 2931 2921 static void process_fd_request(void) 2932 2922 { 2933 2923 cont = &rw_cont; ··· 2950 2938 (unsigned long long) current_req->cmd_flags)) 2951 2939 return BLK_STS_IOERR; 2952 2940 2953 - spin_lock_irq(&floppy_lock); 2954 - list_add_tail(&bd->rq->queuelist, &floppy_reqs); 2955 - spin_unlock_irq(&floppy_lock); 2956 - 2957 2941 if (test_and_set_bit(0, &fdc_busy)) { 2958 2942 /* fdc busy, this new request will be treated when the 2959 2943 current one is done */ 2960 2944 is_alive(__func__, "old request running"); 2961 - return BLK_STS_OK; 2945 + return BLK_STS_RESOURCE; 2962 2946 } 2947 + 2948 + spin_lock_irq(&floppy_lock); 2949 + list_add_tail(&bd->rq->queuelist, &floppy_reqs); 2950 + spin_unlock_irq(&floppy_lock); 2963 2951 2964 2952 command_status = FD_COMMAND_NONE; 2965 2953 __reschedule_timeout(MAXTIMEOUT, "fd_request"); ··· 3008 2996 .done = generic_done 3009 2997 }; 3010 2998 2999 + /* 3000 + * Resets the FDC connected to drive <drive>. 3001 + * Both current_drive and current_fdc are changed to match the new drive. 3002 + */ 3011 3003 static int user_reset_fdc(int drive, int arg, bool interruptible) 3012 3004 { 3013 3005 int ret; ··· 3022 3006 if (arg == FD_RESET_ALWAYS) 3023 3007 fdc_state[current_fdc].reset = 1; 3024 3008 if (fdc_state[current_fdc].reset) { 3009 + /* note: reset_fdc will take care of unlocking the driver 3010 + * on completion. 3011 + */ 3025 3012 cont = &reset_cont; 3026 3013 ret = wait_til_done(reset_fdc, interruptible); 3027 3014 if (ret == -EINTR) ··· 3078 3059 raw_cmd->flags |= FD_RAW_HARDFAILURE; 3079 3060 } else { 3080 3061 raw_cmd->reply_count = inr; 3081 - if (raw_cmd->reply_count > MAX_REPLIES) 3062 + if (raw_cmd->reply_count > FD_RAW_REPLY_SIZE) 3082 3063 raw_cmd->reply_count = 0; 3083 3064 for (i = 0; i < raw_cmd->reply_count; i++) 3084 3065 raw_cmd->reply[i] = reply_buffer[i]; ··· 3189 3170 if (ret) 3190 3171 return -EFAULT; 3191 3172 param += sizeof(struct floppy_raw_cmd); 3192 - if (ptr->cmd_count > 33) 3193 - /* the command may now also take up the space 3194 - * initially intended for the reply & the 3195 - * reply count. Needed for long 82078 commands 3196 - * such as RESTORE, which takes ... 17 command 3197 - * bytes. Murphy's law #137: When you reserve 3198 - * 16 bytes for a structure, you'll one day 3199 - * discover that you really need 17... 3200 - */ 3173 + if (ptr->cmd_count > FD_RAW_CMD_FULLSIZE) 3201 3174 return -EINVAL; 3202 3175 3203 - for (i = 0; i < 16; i++) 3176 + for (i = 0; i < FD_RAW_REPLY_SIZE; i++) 3204 3177 ptr->reply[i] = 0; 3205 3178 ptr->resultcode = 0; 3206 3179 ··· 3434 3423 return 0; 3435 3424 } 3436 3425 3437 - static bool valid_floppy_drive_params(const short autodetect[8], 3426 + static bool valid_floppy_drive_params(const short autodetect[FD_AUTODETECT_SIZE], 3438 3427 int native_format) 3439 3428 { 3440 3429 size_t floppy_type_size = ARRAY_SIZE(floppy_type); 3441 3430 size_t i = 0; 3442 3431 3443 - for (i = 0; i < 8; ++i) { 3432 + for (i = 0; i < FD_AUTODETECT_SIZE; ++i) { 3444 3433 if (autodetect[i] < 0 || 3445 3434 autodetect[i] >= floppy_type_size) 3446 3435 return false; ··· 3621 3610 case FDTWADDLE: 3622 3611 if (lock_fdc(drive)) 3623 3612 return -EINTR; 3624 - twaddle(); 3613 + twaddle(current_fdc, current_drive); 3625 3614 process_fd_request(); 3626 3615 return 0; 3627 3616 default: ··· 3665 3654 struct floppy_max_errors max_errors; 3666 3655 char flags; 3667 3656 char read_track; 3668 - short autodetect[8]; 3657 + short autodetect[FD_AUTODETECT_SIZE]; 3669 3658 compat_int_t checkfreq; 3670 3659 compat_int_t native_format; 3671 3660 }; ··· 4309 4298 4310 4299 /* Determine the floppy disk controller type */ 4311 4300 /* This routine was written by David C. Niemi */ 4312 - static char __init get_fdc_version(void) 4301 + static char __init get_fdc_version(int fdc) 4313 4302 { 4314 4303 int r; 4315 4304 4316 - output_byte(FD_DUMPREGS); /* 82072 and better know DUMPREGS */ 4317 - if (fdc_state[current_fdc].reset) 4305 + output_byte(fdc, FD_DUMPREGS); /* 82072 and better know DUMPREGS */ 4306 + if (fdc_state[fdc].reset) 4318 4307 return FDC_NONE; 4319 - r = result(); 4308 + r = result(fdc); 4320 4309 if (r <= 0x00) 4321 4310 return FDC_NONE; /* No FDC present ??? */ 4322 4311 if ((r == 1) && (reply_buffer[0] == 0x80)) { 4323 - pr_info("FDC %d is an 8272A\n", current_fdc); 4312 + pr_info("FDC %d is an 8272A\n", fdc); 4324 4313 return FDC_8272A; /* 8272a/765 don't know DUMPREGS */ 4325 4314 } 4326 4315 if (r != 10) { 4327 4316 pr_info("FDC %d init: DUMPREGS: unexpected return of %d bytes.\n", 4328 - current_fdc, r); 4317 + fdc, r); 4329 4318 return FDC_UNKNOWN; 4330 4319 } 4331 4320 4332 - if (!fdc_configure()) { 4333 - pr_info("FDC %d is an 82072\n", current_fdc); 4321 + if (!fdc_configure(fdc)) { 4322 + pr_info("FDC %d is an 82072\n", fdc); 4334 4323 return FDC_82072; /* 82072 doesn't know CONFIGURE */ 4335 4324 } 4336 4325 4337 - output_byte(FD_PERPENDICULAR); 4338 - if (need_more_output() == MORE_OUTPUT) { 4339 - output_byte(0); 4326 + output_byte(fdc, FD_PERPENDICULAR); 4327 + if (need_more_output(fdc) == MORE_OUTPUT) { 4328 + output_byte(fdc, 0); 4340 4329 } else { 4341 - pr_info("FDC %d is an 82072A\n", current_fdc); 4330 + pr_info("FDC %d is an 82072A\n", fdc); 4342 4331 return FDC_82072A; /* 82072A as found on Sparcs. */ 4343 4332 } 4344 4333 4345 - output_byte(FD_UNLOCK); 4346 - r = result(); 4334 + output_byte(fdc, FD_UNLOCK); 4335 + r = result(fdc); 4347 4336 if ((r == 1) && (reply_buffer[0] == 0x80)) { 4348 - pr_info("FDC %d is a pre-1991 82077\n", current_fdc); 4337 + pr_info("FDC %d is a pre-1991 82077\n", fdc); 4349 4338 return FDC_82077_ORIG; /* Pre-1991 82077, doesn't know 4350 4339 * LOCK/UNLOCK */ 4351 4340 } 4352 4341 if ((r != 1) || (reply_buffer[0] != 0x00)) { 4353 4342 pr_info("FDC %d init: UNLOCK: unexpected return of %d bytes.\n", 4354 - current_fdc, r); 4343 + fdc, r); 4355 4344 return FDC_UNKNOWN; 4356 4345 } 4357 - output_byte(FD_PARTID); 4358 - r = result(); 4346 + output_byte(fdc, FD_PARTID); 4347 + r = result(fdc); 4359 4348 if (r != 1) { 4360 4349 pr_info("FDC %d init: PARTID: unexpected return of %d bytes.\n", 4361 - current_fdc, r); 4350 + fdc, r); 4362 4351 return FDC_UNKNOWN; 4363 4352 } 4364 4353 if (reply_buffer[0] == 0x80) { 4365 - pr_info("FDC %d is a post-1991 82077\n", current_fdc); 4354 + pr_info("FDC %d is a post-1991 82077\n", fdc); 4366 4355 return FDC_82077; /* Revised 82077AA passes all the tests */ 4367 4356 } 4368 4357 switch (reply_buffer[0] >> 5) { 4369 4358 case 0x0: 4370 4359 /* Either a 82078-1 or a 82078SL running at 5Volt */ 4371 - pr_info("FDC %d is an 82078.\n", current_fdc); 4360 + pr_info("FDC %d is an 82078.\n", fdc); 4372 4361 return FDC_82078; 4373 4362 case 0x1: 4374 - pr_info("FDC %d is a 44pin 82078\n", current_fdc); 4363 + pr_info("FDC %d is a 44pin 82078\n", fdc); 4375 4364 return FDC_82078; 4376 4365 case 0x2: 4377 - pr_info("FDC %d is a S82078B\n", current_fdc); 4366 + pr_info("FDC %d is a S82078B\n", fdc); 4378 4367 return FDC_S82078B; 4379 4368 case 0x3: 4380 - pr_info("FDC %d is a National Semiconductor PC87306\n", current_fdc); 4369 + pr_info("FDC %d is a National Semiconductor PC87306\n", fdc); 4381 4370 return FDC_87306; 4382 4371 default: 4383 4372 pr_info("FDC %d init: 82078 variant with unknown PARTID=%d.\n", 4384 - current_fdc, reply_buffer[0] >> 5); 4373 + fdc, reply_buffer[0] >> 5); 4385 4374 return FDC_82078_UNKN; 4386 4375 } 4387 4376 } /* get_fdc_version */ ··· 4545 4534 static int floppy_resume(struct device *dev) 4546 4535 { 4547 4536 int fdc; 4537 + int saved_drive; 4548 4538 4539 + saved_drive = current_drive; 4549 4540 for (fdc = 0; fdc < N_FDC; fdc++) 4550 4541 if (fdc_state[fdc].address != -1) 4551 - user_reset_fdc(-1, FD_RESET_ALWAYS, false); 4552 - 4542 + user_reset_fdc(REVDRIVE(fdc, 0), FD_RESET_ALWAYS, false); 4543 + set_fdc(saved_drive); 4553 4544 return 0; 4554 4545 } 4555 4546 ··· 4659 4646 config_types(); 4660 4647 4661 4648 for (i = 0; i < N_FDC; i++) { 4662 - current_fdc = i; 4663 - memset(&fdc_state[current_fdc], 0, sizeof(*fdc_state)); 4664 - fdc_state[current_fdc].dtr = -1; 4665 - fdc_state[current_fdc].dor = 0x4; 4649 + memset(&fdc_state[i], 0, sizeof(*fdc_state)); 4650 + fdc_state[i].dtr = -1; 4651 + fdc_state[i].dor = 0x4; 4666 4652 #if defined(__sparc__) || defined(__mc68000__) 4667 4653 /*sparcs/sun3x don't have a DOR reset which we can fall back on to */ 4668 4654 #ifdef __mc68000__ 4669 4655 if (MACH_IS_SUN3X) 4670 4656 #endif 4671 - fdc_state[current_fdc].version = FDC_82072A; 4657 + fdc_state[i].version = FDC_82072A; 4672 4658 #endif 4673 4659 } 4674 4660 ··· 4709 4697 msleep(10); 4710 4698 4711 4699 for (i = 0; i < N_FDC; i++) { 4712 - current_fdc = i; 4713 - fdc_state[current_fdc].driver_version = FD_DRIVER_VERSION; 4700 + fdc_state[i].driver_version = FD_DRIVER_VERSION; 4714 4701 for (unit = 0; unit < 4; unit++) 4715 - fdc_state[current_fdc].track[unit] = 0; 4716 - if (fdc_state[current_fdc].address == -1) 4702 + fdc_state[i].track[unit] = 0; 4703 + if (fdc_state[i].address == -1) 4717 4704 continue; 4718 - fdc_state[current_fdc].rawcmd = 2; 4719 - if (user_reset_fdc(-1, FD_RESET_ALWAYS, false)) { 4705 + fdc_state[i].rawcmd = 2; 4706 + if (user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false)) { 4720 4707 /* free ioports reserved by floppy_grab_irq_and_dma() */ 4721 - floppy_release_regions(current_fdc); 4722 - fdc_state[current_fdc].address = -1; 4723 - fdc_state[current_fdc].version = FDC_NONE; 4708 + floppy_release_regions(i); 4709 + fdc_state[i].address = -1; 4710 + fdc_state[i].version = FDC_NONE; 4724 4711 continue; 4725 4712 } 4726 4713 /* Try to determine the floppy controller type */ 4727 - fdc_state[current_fdc].version = get_fdc_version(); 4728 - if (fdc_state[current_fdc].version == FDC_NONE) { 4714 + fdc_state[i].version = get_fdc_version(i); 4715 + if (fdc_state[i].version == FDC_NONE) { 4729 4716 /* free ioports reserved by floppy_grab_irq_and_dma() */ 4730 - floppy_release_regions(current_fdc); 4731 - fdc_state[current_fdc].address = -1; 4717 + floppy_release_regions(i); 4718 + fdc_state[i].address = -1; 4732 4719 continue; 4733 4720 } 4734 4721 if (can_use_virtual_dma == 2 && 4735 - fdc_state[current_fdc].version < FDC_82072A) 4722 + fdc_state[i].version < FDC_82072A) 4736 4723 can_use_virtual_dma = 0; 4737 4724 4738 4725 have_no_fdc = 0; ··· 4739 4728 * properly, so force a reset for the standard FDC clones, 4740 4729 * to avoid interrupt garbage. 4741 4730 */ 4742 - user_reset_fdc(-1, FD_RESET_ALWAYS, false); 4731 + user_reset_fdc(REVDRIVE(i, 0), FD_RESET_ALWAYS, false); 4743 4732 } 4744 4733 current_fdc = 0; 4745 4734 cancel_delayed_work(&fd_timeout); ··· 4866 4855 4867 4856 static int floppy_grab_irq_and_dma(void) 4868 4857 { 4858 + int fdc; 4859 + 4869 4860 if (atomic_inc_return(&usage_count) > 1) 4870 4861 return 0; 4871 4862 ··· 4895 4882 } 4896 4883 } 4897 4884 4898 - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { 4899 - if (fdc_state[current_fdc].address != -1) { 4900 - if (floppy_request_regions(current_fdc)) 4885 + for (fdc = 0; fdc < N_FDC; fdc++) { 4886 + if (fdc_state[fdc].address != -1) { 4887 + if (floppy_request_regions(fdc)) 4901 4888 goto cleanup; 4902 4889 } 4903 4890 } 4904 - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) { 4905 - if (fdc_state[current_fdc].address != -1) { 4906 - reset_fdc_info(1); 4907 - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); 4891 + for (fdc = 0; fdc < N_FDC; fdc++) { 4892 + if (fdc_state[fdc].address != -1) { 4893 + reset_fdc_info(fdc, 1); 4894 + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); 4908 4895 } 4909 4896 } 4910 - current_fdc = 0; 4897 + 4911 4898 set_dor(0, ~0, 8); /* avoid immediate interrupt */ 4912 4899 4913 - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) 4914 - if (fdc_state[current_fdc].address != -1) 4915 - fdc_outb(fdc_state[current_fdc].dor, current_fdc, FD_DOR); 4900 + for (fdc = 0; fdc < N_FDC; fdc++) 4901 + if (fdc_state[fdc].address != -1) 4902 + fdc_outb(fdc_state[fdc].dor, fdc, FD_DOR); 4916 4903 /* 4917 4904 * The driver will try and free resources and relies on us 4918 4905 * to know if they were allocated or not. ··· 4923 4910 cleanup: 4924 4911 fd_free_irq(); 4925 4912 fd_free_dma(); 4926 - while (--current_fdc >= 0) 4927 - floppy_release_regions(current_fdc); 4913 + while (--fdc >= 0) 4914 + floppy_release_regions(fdc); 4915 + current_fdc = 0; 4928 4916 atomic_dec(&usage_count); 4929 4917 return -1; 4930 4918 } 4931 4919 4932 4920 static void floppy_release_irq_and_dma(void) 4933 4921 { 4934 - int old_fdc; 4922 + int fdc; 4935 4923 #ifndef __sparc__ 4936 4924 int drive; 4937 4925 #endif ··· 4973 4959 pr_info("auxiliary floppy timer still active\n"); 4974 4960 if (work_pending(&floppy_work)) 4975 4961 pr_info("work still pending\n"); 4976 - old_fdc = current_fdc; 4977 - for (current_fdc = 0; current_fdc < N_FDC; current_fdc++) 4978 - if (fdc_state[current_fdc].address != -1) 4979 - floppy_release_regions(current_fdc); 4980 - current_fdc = old_fdc; 4962 + for (fdc = 0; fdc < N_FDC; fdc++) 4963 + if (fdc_state[fdc].address != -1) 4964 + floppy_release_regions(fdc); 4981 4965 } 4982 4966 4983 4967 #ifdef MODULE

+285 -216

drivers/block/loop.c

··· 228 228 blk_mq_unfreeze_queue(lo->lo_queue); 229 229 } 230 230 231 + /** 232 + * loop_validate_block_size() - validates the passed in block size 233 + * @bsize: size to validate 234 + */ 231 235 static int 232 - figure_loop_size(struct loop_device *lo, loff_t offset, loff_t sizelimit) 236 + loop_validate_block_size(unsigned short bsize) 233 237 { 234 - loff_t size = get_size(offset, sizelimit, lo->lo_backing_file); 235 - sector_t x = (sector_t)size; 238 + if (bsize < 512 || bsize > PAGE_SIZE || !is_power_of_2(bsize)) 239 + return -EINVAL; 240 + 241 + return 0; 242 + } 243 + 244 + /** 245 + * loop_set_size() - sets device size and notifies userspace 246 + * @lo: struct loop_device to set the size for 247 + * @size: new size of the loop device 248 + * 249 + * Callers must validate that the size passed into this function fits into 250 + * a sector_t, eg using loop_validate_size() 251 + */ 252 + static void loop_set_size(struct loop_device *lo, loff_t size) 253 + { 236 254 struct block_device *bdev = lo->lo_device; 237 255 238 - if (unlikely((loff_t)x != size)) 239 - return -EFBIG; 240 - if (lo->lo_offset != offset) 241 - lo->lo_offset = offset; 242 - if (lo->lo_sizelimit != sizelimit) 243 - lo->lo_sizelimit = sizelimit; 244 - set_capacity(lo->lo_disk, x); 245 - bd_set_size(bdev, (loff_t)get_capacity(bdev->bd_disk) << 9); 246 - /* let user-space know about the new size */ 247 - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 248 - return 0; 256 + bd_set_size(bdev, size << SECTOR_SHIFT); 257 + 258 + set_capacity_revalidate_and_notify(lo->lo_disk, size, false); 249 259 } 250 260 251 261 static inline int ··· 962 952 blk_queue_flag_clear(QUEUE_FLAG_NONROT, q); 963 953 } 964 954 965 - static int loop_set_fd(struct loop_device *lo, fmode_t mode, 966 - struct block_device *bdev, unsigned int arg) 967 - { 968 - struct file *file; 969 - struct inode *inode; 970 - struct address_space *mapping; 971 - struct block_device *claimed_bdev = NULL; 972 - int lo_flags = 0; 973 - int error; 974 - loff_t size; 975 - bool partscan; 976 - 977 - /* This is safe, since we have a reference from open(). */ 978 - __module_get(THIS_MODULE); 979 - 980 - error = -EBADF; 981 - file = fget(arg); 982 - if (!file) 983 - goto out; 984 - 985 - /* 986 - * If we don't hold exclusive handle for the device, upgrade to it 987 - * here to avoid changing device under exclusive owner. 988 - */ 989 - if (!(mode & FMODE_EXCL)) { 990 - claimed_bdev = bd_start_claiming(bdev, loop_set_fd); 991 - if (IS_ERR(claimed_bdev)) { 992 - error = PTR_ERR(claimed_bdev); 993 - goto out_putf; 994 - } 995 - } 996 - 997 - error = mutex_lock_killable(&loop_ctl_mutex); 998 - if (error) 999 - goto out_bdev; 1000 - 1001 - error = -EBUSY; 1002 - if (lo->lo_state != Lo_unbound) 1003 - goto out_unlock; 1004 - 1005 - error = loop_validate_file(file, bdev); 1006 - if (error) 1007 - goto out_unlock; 1008 - 1009 - mapping = file->f_mapping; 1010 - inode = mapping->host; 1011 - 1012 - if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || 1013 - !file->f_op->write_iter) 1014 - lo_flags |= LO_FLAGS_READ_ONLY; 1015 - 1016 - error = -EFBIG; 1017 - size = get_loop_size(lo, file); 1018 - if ((loff_t)(sector_t)size != size) 1019 - goto out_unlock; 1020 - error = loop_prepare_queue(lo); 1021 - if (error) 1022 - goto out_unlock; 1023 - 1024 - error = 0; 1025 - 1026 - set_device_ro(bdev, (lo_flags & LO_FLAGS_READ_ONLY) != 0); 1027 - 1028 - lo->use_dio = false; 1029 - lo->lo_device = bdev; 1030 - lo->lo_flags = lo_flags; 1031 - lo->lo_backing_file = file; 1032 - lo->transfer = NULL; 1033 - lo->ioctl = NULL; 1034 - lo->lo_sizelimit = 0; 1035 - lo->old_gfp_mask = mapping_gfp_mask(mapping); 1036 - mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 1037 - 1038 - if (!(lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 1039 - blk_queue_write_cache(lo->lo_queue, true, false); 1040 - 1041 - if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) { 1042 - /* In case of direct I/O, match underlying block size */ 1043 - unsigned short bsize = bdev_logical_block_size( 1044 - inode->i_sb->s_bdev); 1045 - 1046 - blk_queue_logical_block_size(lo->lo_queue, bsize); 1047 - blk_queue_physical_block_size(lo->lo_queue, bsize); 1048 - blk_queue_io_min(lo->lo_queue, bsize); 1049 - } 1050 - 1051 - loop_update_rotational(lo); 1052 - loop_update_dio(lo); 1053 - set_capacity(lo->lo_disk, size); 1054 - bd_set_size(bdev, size << 9); 1055 - loop_sysfs_init(lo); 1056 - /* let user-space know about the new size */ 1057 - kobject_uevent(&disk_to_dev(bdev->bd_disk)->kobj, KOBJ_CHANGE); 1058 - 1059 - set_blocksize(bdev, S_ISBLK(inode->i_mode) ? 1060 - block_size(inode->i_bdev) : PAGE_SIZE); 1061 - 1062 - lo->lo_state = Lo_bound; 1063 - if (part_shift) 1064 - lo->lo_flags |= LO_FLAGS_PARTSCAN; 1065 - partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; 1066 - 1067 - /* Grab the block_device to prevent its destruction after we 1068 - * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). 1069 - */ 1070 - bdgrab(bdev); 1071 - mutex_unlock(&loop_ctl_mutex); 1072 - if (partscan) 1073 - loop_reread_partitions(lo, bdev); 1074 - if (claimed_bdev) 1075 - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); 1076 - return 0; 1077 - 1078 - out_unlock: 1079 - mutex_unlock(&loop_ctl_mutex); 1080 - out_bdev: 1081 - if (claimed_bdev) 1082 - bd_abort_claiming(bdev, claimed_bdev, loop_set_fd); 1083 - out_putf: 1084 - fput(file); 1085 - out: 1086 - /* This is safe: open() is still holding a reference. */ 1087 - module_put(THIS_MODULE); 1088 - return error; 1089 - } 1090 - 1091 955 static int 1092 956 loop_release_xfer(struct loop_device *lo) 1093 957 { ··· 997 1113 lo->lo_encryption = xfer; 998 1114 } 999 1115 return err; 1116 + } 1117 + 1118 + /** 1119 + * loop_set_status_from_info - configure device from loop_info 1120 + * @lo: struct loop_device to configure 1121 + * @info: struct loop_info64 to configure the device with 1122 + * 1123 + * Configures the loop device parameters according to the passed 1124 + * in loop_info64 configuration. 1125 + */ 1126 + static int 1127 + loop_set_status_from_info(struct loop_device *lo, 1128 + const struct loop_info64 *info) 1129 + { 1130 + int err; 1131 + struct loop_func_table *xfer; 1132 + kuid_t uid = current_uid(); 1133 + 1134 + if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) 1135 + return -EINVAL; 1136 + 1137 + err = loop_release_xfer(lo); 1138 + if (err) 1139 + return err; 1140 + 1141 + if (info->lo_encrypt_type) { 1142 + unsigned int type = info->lo_encrypt_type; 1143 + 1144 + if (type >= MAX_LO_CRYPT) 1145 + return -EINVAL; 1146 + xfer = xfer_funcs[type]; 1147 + if (xfer == NULL) 1148 + return -EINVAL; 1149 + } else 1150 + xfer = NULL; 1151 + 1152 + err = loop_init_xfer(lo, xfer, info); 1153 + if (err) 1154 + return err; 1155 + 1156 + lo->lo_offset = info->lo_offset; 1157 + lo->lo_sizelimit = info->lo_sizelimit; 1158 + memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1159 + memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1160 + lo->lo_file_name[LO_NAME_SIZE-1] = 0; 1161 + lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 1162 + 1163 + if (!xfer) 1164 + xfer = &none_funcs; 1165 + lo->transfer = xfer->transfer; 1166 + lo->ioctl = xfer->ioctl; 1167 + 1168 + lo->lo_flags = info->lo_flags; 1169 + 1170 + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1171 + lo->lo_init[0] = info->lo_init[0]; 1172 + lo->lo_init[1] = info->lo_init[1]; 1173 + if (info->lo_encrypt_key_size) { 1174 + memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1175 + info->lo_encrypt_key_size); 1176 + lo->lo_key_owner = uid; 1177 + } 1178 + 1179 + return 0; 1180 + } 1181 + 1182 + static int loop_configure(struct loop_device *lo, fmode_t mode, 1183 + struct block_device *bdev, 1184 + const struct loop_config *config) 1185 + { 1186 + struct file *file; 1187 + struct inode *inode; 1188 + struct address_space *mapping; 1189 + struct block_device *claimed_bdev = NULL; 1190 + int error; 1191 + loff_t size; 1192 + bool partscan; 1193 + unsigned short bsize; 1194 + 1195 + /* This is safe, since we have a reference from open(). */ 1196 + __module_get(THIS_MODULE); 1197 + 1198 + error = -EBADF; 1199 + file = fget(config->fd); 1200 + if (!file) 1201 + goto out; 1202 + 1203 + /* 1204 + * If we don't hold exclusive handle for the device, upgrade to it 1205 + * here to avoid changing device under exclusive owner. 1206 + */ 1207 + if (!(mode & FMODE_EXCL)) { 1208 + claimed_bdev = bd_start_claiming(bdev, loop_configure); 1209 + if (IS_ERR(claimed_bdev)) { 1210 + error = PTR_ERR(claimed_bdev); 1211 + goto out_putf; 1212 + } 1213 + } 1214 + 1215 + error = mutex_lock_killable(&loop_ctl_mutex); 1216 + if (error) 1217 + goto out_bdev; 1218 + 1219 + error = -EBUSY; 1220 + if (lo->lo_state != Lo_unbound) 1221 + goto out_unlock; 1222 + 1223 + error = loop_validate_file(file, bdev); 1224 + if (error) 1225 + goto out_unlock; 1226 + 1227 + mapping = file->f_mapping; 1228 + inode = mapping->host; 1229 + 1230 + size = get_loop_size(lo, file); 1231 + 1232 + if ((config->info.lo_flags & ~LOOP_CONFIGURE_SETTABLE_FLAGS) != 0) { 1233 + error = -EINVAL; 1234 + goto out_unlock; 1235 + } 1236 + 1237 + if (config->block_size) { 1238 + error = loop_validate_block_size(config->block_size); 1239 + if (error) 1240 + goto out_unlock; 1241 + } 1242 + 1243 + error = loop_set_status_from_info(lo, &config->info); 1244 + if (error) 1245 + goto out_unlock; 1246 + 1247 + if (!(file->f_mode & FMODE_WRITE) || !(mode & FMODE_WRITE) || 1248 + !file->f_op->write_iter) 1249 + lo->lo_flags |= LO_FLAGS_READ_ONLY; 1250 + 1251 + error = loop_prepare_queue(lo); 1252 + if (error) 1253 + goto out_unlock; 1254 + 1255 + set_device_ro(bdev, (lo->lo_flags & LO_FLAGS_READ_ONLY) != 0); 1256 + 1257 + lo->use_dio = lo->lo_flags & LO_FLAGS_DIRECT_IO; 1258 + lo->lo_device = bdev; 1259 + lo->lo_backing_file = file; 1260 + lo->old_gfp_mask = mapping_gfp_mask(mapping); 1261 + mapping_set_gfp_mask(mapping, lo->old_gfp_mask & ~(__GFP_IO|__GFP_FS)); 1262 + 1263 + if (!(lo->lo_flags & LO_FLAGS_READ_ONLY) && file->f_op->fsync) 1264 + blk_queue_write_cache(lo->lo_queue, true, false); 1265 + 1266 + if (config->block_size) 1267 + bsize = config->block_size; 1268 + else if (io_is_direct(lo->lo_backing_file) && inode->i_sb->s_bdev) 1269 + /* In case of direct I/O, match underlying block size */ 1270 + bsize = bdev_logical_block_size(inode->i_sb->s_bdev); 1271 + else 1272 + bsize = 512; 1273 + 1274 + blk_queue_logical_block_size(lo->lo_queue, bsize); 1275 + blk_queue_physical_block_size(lo->lo_queue, bsize); 1276 + blk_queue_io_min(lo->lo_queue, bsize); 1277 + 1278 + loop_update_rotational(lo); 1279 + loop_update_dio(lo); 1280 + loop_sysfs_init(lo); 1281 + loop_set_size(lo, size); 1282 + 1283 + set_blocksize(bdev, S_ISBLK(inode->i_mode) ? 1284 + block_size(inode->i_bdev) : PAGE_SIZE); 1285 + 1286 + lo->lo_state = Lo_bound; 1287 + if (part_shift) 1288 + lo->lo_flags |= LO_FLAGS_PARTSCAN; 1289 + partscan = lo->lo_flags & LO_FLAGS_PARTSCAN; 1290 + 1291 + /* Grab the block_device to prevent its destruction after we 1292 + * put /dev/loopXX inode. Later in __loop_clr_fd() we bdput(bdev). 1293 + */ 1294 + bdgrab(bdev); 1295 + mutex_unlock(&loop_ctl_mutex); 1296 + if (partscan) 1297 + loop_reread_partitions(lo, bdev); 1298 + if (claimed_bdev) 1299 + bd_abort_claiming(bdev, claimed_bdev, loop_configure); 1300 + return 0; 1301 + 1302 + out_unlock: 1303 + mutex_unlock(&loop_ctl_mutex); 1304 + out_bdev: 1305 + if (claimed_bdev) 1306 + bd_abort_claiming(bdev, claimed_bdev, loop_configure); 1307 + out_putf: 1308 + fput(file); 1309 + out: 1310 + /* This is safe: open() is still holding a reference. */ 1311 + module_put(THIS_MODULE); 1312 + return error; 1000 1313 } 1001 1314 1002 1315 static int __loop_clr_fd(struct loop_device *lo, bool release) ··· 1344 1263 loop_set_status(struct loop_device *lo, const struct loop_info64 *info) 1345 1264 { 1346 1265 int err; 1347 - struct loop_func_table *xfer; 1348 - kuid_t uid = current_uid(); 1349 1266 struct block_device *bdev; 1267 + kuid_t uid = current_uid(); 1268 + int prev_lo_flags; 1350 1269 bool partscan = false; 1270 + bool size_changed = false; 1351 1271 1352 1272 err = mutex_lock_killable(&loop_ctl_mutex); 1353 1273 if (err) ··· 1363 1281 err = -ENXIO; 1364 1282 goto out_unlock; 1365 1283 } 1366 - if ((unsigned int) info->lo_encrypt_key_size > LO_KEY_SIZE) { 1367 - err = -EINVAL; 1368 - goto out_unlock; 1369 - } 1370 1284 1371 1285 if (lo->lo_offset != info->lo_offset || 1372 1286 lo->lo_sizelimit != info->lo_sizelimit) { 1287 + size_changed = true; 1373 1288 sync_blockdev(lo->lo_device); 1374 1289 kill_bdev(lo->lo_device); 1375 1290 } ··· 1374 1295 /* I/O need to be drained during transfer transition */ 1375 1296 blk_mq_freeze_queue(lo->lo_queue); 1376 1297 1377 - err = loop_release_xfer(lo); 1298 + if (size_changed && lo->lo_device->bd_inode->i_mapping->nrpages) { 1299 + /* If any pages were dirtied after kill_bdev(), try again */ 1300 + err = -EAGAIN; 1301 + pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", 1302 + __func__, lo->lo_number, lo->lo_file_name, 1303 + lo->lo_device->bd_inode->i_mapping->nrpages); 1304 + goto out_unfreeze; 1305 + } 1306 + 1307 + prev_lo_flags = lo->lo_flags; 1308 + 1309 + err = loop_set_status_from_info(lo, info); 1378 1310 if (err) 1379 1311 goto out_unfreeze; 1380 1312 1381 - if (info->lo_encrypt_type) { 1382 - unsigned int type = info->lo_encrypt_type; 1313 + /* Mask out flags that can't be set using LOOP_SET_STATUS. */ 1314 + lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS; 1315 + /* For those flags, use the previous values instead */ 1316 + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS; 1317 + /* For flags that can't be cleared, use previous values too */ 1318 + lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_CLEARABLE_FLAGS; 1383 1319 1384 - if (type >= MAX_LO_CRYPT) { 1385 - err = -EINVAL; 1386 - goto out_unfreeze; 1387 - } 1388 - xfer = xfer_funcs[type]; 1389 - if (xfer == NULL) { 1390 - err = -EINVAL; 1391 - goto out_unfreeze; 1392 - } 1393 - } else 1394 - xfer = NULL; 1395 - 1396 - err = loop_init_xfer(lo, xfer, info); 1397 - if (err) 1398 - goto out_unfreeze; 1399 - 1400 - if (lo->lo_offset != info->lo_offset || 1401 - lo->lo_sizelimit != info->lo_sizelimit) { 1402 - /* kill_bdev should have truncated all the pages */ 1403 - if (lo->lo_device->bd_inode->i_mapping->nrpages) { 1404 - err = -EAGAIN; 1405 - pr_warn("%s: loop%d (%s) has still dirty pages (nrpages=%lu)\n", 1406 - __func__, lo->lo_number, lo->lo_file_name, 1407 - lo->lo_device->bd_inode->i_mapping->nrpages); 1408 - goto out_unfreeze; 1409 - } 1410 - if (figure_loop_size(lo, info->lo_offset, info->lo_sizelimit)) { 1411 - err = -EFBIG; 1412 - goto out_unfreeze; 1413 - } 1320 + if (size_changed) { 1321 + loff_t new_size = get_size(lo->lo_offset, lo->lo_sizelimit, 1322 + lo->lo_backing_file); 1323 + loop_set_size(lo, new_size); 1414 1324 } 1415 1325 1416 1326 loop_config_discard(lo); 1417 - 1418 - memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); 1419 - memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); 1420 - lo->lo_file_name[LO_NAME_SIZE-1] = 0; 1421 - lo->lo_crypt_name[LO_NAME_SIZE-1] = 0; 1422 - 1423 - if (!xfer) 1424 - xfer = &none_funcs; 1425 - lo->transfer = xfer->transfer; 1426 - lo->ioctl = xfer->ioctl; 1427 - 1428 - if ((lo->lo_flags & LO_FLAGS_AUTOCLEAR) != 1429 - (info->lo_flags & LO_FLAGS_AUTOCLEAR)) 1430 - lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; 1431 - 1432 - lo->lo_encrypt_key_size = info->lo_encrypt_key_size; 1433 - lo->lo_init[0] = info->lo_init[0]; 1434 - lo->lo_init[1] = info->lo_init[1]; 1435 - if (info->lo_encrypt_key_size) { 1436 - memcpy(lo->lo_encrypt_key, info->lo_encrypt_key, 1437 - info->lo_encrypt_key_size); 1438 - lo->lo_key_owner = uid; 1439 - } 1440 1327 1441 1328 /* update dio if lo_offset or transfer is changed */ 1442 1329 __loop_update_dio(lo, lo->use_dio); ··· 1410 1365 out_unfreeze: 1411 1366 blk_mq_unfreeze_queue(lo->lo_queue); 1412 1367 1413 - if (!err && (info->lo_flags & LO_FLAGS_PARTSCAN) && 1414 - !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { 1415 - lo->lo_flags |= LO_FLAGS_PARTSCAN; 1368 + if (!err && (lo->lo_flags & LO_FLAGS_PARTSCAN) && 1369 + !(prev_lo_flags & LO_FLAGS_PARTSCAN)) { 1416 1370 lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; 1417 1371 bdev = lo->lo_device; 1418 1372 partscan = true; ··· 1575 1531 1576 1532 static int loop_set_capacity(struct loop_device *lo) 1577 1533 { 1534 + loff_t size; 1535 + 1578 1536 if (unlikely(lo->lo_state != Lo_bound)) 1579 1537 return -ENXIO; 1580 1538 1581 - return figure_loop_size(lo, lo->lo_offset, lo->lo_sizelimit); 1539 + size = get_loop_size(lo, lo->lo_backing_file); 1540 + loop_set_size(lo, size); 1541 + 1542 + return 0; 1582 1543 } 1583 1544 1584 1545 static int loop_set_dio(struct loop_device *lo, unsigned long arg) ··· 1607 1558 if (lo->lo_state != Lo_bound) 1608 1559 return -ENXIO; 1609 1560 1610 - if (arg < 512 || arg > PAGE_SIZE || !is_power_of_2(arg)) 1611 - return -EINVAL; 1561 + err = loop_validate_block_size(arg); 1562 + if (err) 1563 + return err; 1612 1564 1613 1565 if (lo->lo_queue->limits.logical_block_size == arg) 1614 1566 return 0; ··· 1667 1617 unsigned int cmd, unsigned long arg) 1668 1618 { 1669 1619 struct loop_device *lo = bdev->bd_disk->private_data; 1620 + void __user *argp = (void __user *) arg; 1670 1621 int err; 1671 1622 1672 1623 switch (cmd) { 1673 - case LOOP_SET_FD: 1674 - return loop_set_fd(lo, mode, bdev, arg); 1624 + case LOOP_SET_FD: { 1625 + /* 1626 + * Legacy case - pass in a zeroed out struct loop_config with 1627 + * only the file descriptor set , which corresponds with the 1628 + * default parameters we'd have used otherwise. 1629 + */ 1630 + struct loop_config config; 1631 + 1632 + memset(&config, 0, sizeof(config)); 1633 + config.fd = arg; 1634 + 1635 + return loop_configure(lo, mode, bdev, &config); 1636 + } 1637 + case LOOP_CONFIGURE: { 1638 + struct loop_config config; 1639 + 1640 + if (copy_from_user(&config, argp, sizeof(config))) 1641 + return -EFAULT; 1642 + 1643 + return loop_configure(lo, mode, bdev, &config); 1644 + } 1675 1645 case LOOP_CHANGE_FD: 1676 1646 return loop_change_fd(lo, bdev, arg); 1677 1647 case LOOP_CLR_FD: ··· 1699 1629 case LOOP_SET_STATUS: 1700 1630 err = -EPERM; 1701 1631 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { 1702 - err = loop_set_status_old(lo, 1703 - (struct loop_info __user *)arg); 1632 + err = loop_set_status_old(lo, argp); 1704 1633 } 1705 1634 break; 1706 1635 case LOOP_GET_STATUS: 1707 - return loop_get_status_old(lo, (struct loop_info __user *) arg); 1636 + return loop_get_status_old(lo, argp); 1708 1637 case LOOP_SET_STATUS64: 1709 1638 err = -EPERM; 1710 1639 if ((mode & FMODE_WRITE) || capable(CAP_SYS_ADMIN)) { 1711 - err = loop_set_status64(lo, 1712 - (struct loop_info64 __user *) arg); 1640 + err = loop_set_status64(lo, argp); 1713 1641 } 1714 1642 break; 1715 1643 case LOOP_GET_STATUS64: 1716 - return loop_get_status64(lo, (struct loop_info64 __user *) arg); 1644 + return loop_get_status64(lo, argp); 1717 1645 case LOOP_SET_CAPACITY: 1718 1646 case LOOP_SET_DIRECT_IO: 1719 1647 case LOOP_SET_BLOCK_SIZE: ··· 1863 1795 case LOOP_CLR_FD: 1864 1796 case LOOP_GET_STATUS64: 1865 1797 case LOOP_SET_STATUS64: 1798 + case LOOP_CONFIGURE: 1866 1799 arg = (unsigned long) compat_ptr(arg); 1867 1800 /* fall through */ 1868 1801 case LOOP_SET_FD:

+3 -3

drivers/block/swim.c

··· 327 327 swim_select(base, RELAX); 328 328 if (swim_readbit(base, MOTOR_ON)) 329 329 break; 330 - current->state = TASK_INTERRUPTIBLE; 330 + set_current_state(TASK_INTERRUPTIBLE); 331 331 schedule_timeout(1); 332 332 } 333 333 } else if (action == OFF) { ··· 346 346 swim_select(base, RELAX); 347 347 if (!swim_readbit(base, DISK_IN)) 348 348 break; 349 - current->state = TASK_INTERRUPTIBLE; 349 + set_current_state(TASK_INTERRUPTIBLE); 350 350 schedule_timeout(1); 351 351 } 352 352 swim_select(base, RELAX); ··· 370 370 371 371 for (wait = 0; wait < HZ; wait++) { 372 372 373 - current->state = TASK_INTERRUPTIBLE; 373 + set_current_state(TASK_INTERRUPTIBLE); 374 374 schedule_timeout(1); 375 375 376 376 swim_select(base, RELAX);

+9

drivers/md/bcache/Kconfig

··· 26 26 Keeps all active closures in a linked list and provides a debugfs 27 27 interface to list them, which makes it possible to see asynchronous 28 28 operations that get stuck. 29 + 30 + config BCACHE_ASYNC_REGISTRAION 31 + bool "Asynchronous device registration (EXPERIMENTAL)" 32 + depends on BCACHE 33 + help 34 + Add a sysfs file /sys/fs/bcache/register_async. Writing registering 35 + device path into this file will returns immediately and the real 36 + registration work is handled in kernel work queue in asynchronous 37 + way.

+1 -1

drivers/md/bcache/bcache.h

··· 176 176 * - updates to non leaf nodes just happen synchronously (see btree_split()). 177 177 */ 178 178 179 - #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ 179 + #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ 180 180 181 181 #include <linux/bcache.h> 182 182 #include <linux/bio.h>

+3 -3

drivers/md/bcache/bset.c

··· 6 6 * Copyright 2012 Google, Inc. 7 7 */ 8 8 9 - #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ 9 + #define pr_fmt(fmt) "bcache: %s() " fmt, __func__ 10 10 11 11 #include "util.h" 12 12 #include "bset.h" ··· 31 31 if (b->ops->key_dump) 32 32 b->ops->key_dump(b, k); 33 33 else 34 - pr_err("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); 34 + pr_cont("%llu:%llu\n", KEY_INODE(k), KEY_OFFSET(k)); 35 35 36 36 if (next < bset_bkey_last(i) && 37 37 bkey_cmp(k, b->ops->is_extents ? ··· 1225 1225 1226 1226 out->keys = last ? (uint64_t *) bkey_next(last) - out->d : 0; 1227 1227 1228 - pr_debug("sorted %i keys", out->keys); 1228 + pr_debug("sorted %i keys\n", out->keys); 1229 1229 } 1230 1230 1231 1231 static void __btree_sort(struct btree_keys *b, struct btree_iter *iter,

+7 -9

drivers/md/bcache/btree.c

··· 619 619 * and BTREE_NODE_journal_flush bit cleared by btree_flush_write(). 620 620 */ 621 621 if (btree_node_journal_flush(b)) { 622 - pr_debug("bnode %p is flushing by journal, retry", b); 622 + pr_debug("bnode %p is flushing by journal, retry\n", b); 623 623 mutex_unlock(&b->write_lock); 624 624 udelay(1); 625 625 goto retry; ··· 802 802 c->shrink.batch = c->btree_pages * 2; 803 803 804 804 if (register_shrinker(&c->shrink)) 805 - pr_warn("bcache: %s: could not register shrinker", 805 + pr_warn("bcache: %s: could not register shrinker\n", 806 806 __func__); 807 807 808 808 return 0; ··· 1054 1054 */ 1055 1055 if (btree_node_journal_flush(b)) { 1056 1056 mutex_unlock(&b->write_lock); 1057 - pr_debug("bnode %p journal_flush set, retry", b); 1057 + pr_debug("bnode %p journal_flush set, retry\n", b); 1058 1058 udelay(1); 1059 1059 goto retry; 1060 1060 } ··· 1798 1798 schedule_timeout_interruptible(msecs_to_jiffies 1799 1799 (GC_SLEEP_MS)); 1800 1800 else if (ret) 1801 - pr_warn("gc failed!"); 1801 + pr_warn("gc failed!\n"); 1802 1802 } while (ret && !test_bit(CACHE_SET_IO_DISABLE, &c->flags)); 1803 1803 1804 1804 bch_btree_gc_finish(c); ··· 1907 1907 struct btree_iter iter; 1908 1908 struct bkey *k, *p; 1909 1909 int cur_idx, prev_idx, skip_nr; 1910 - int i, n; 1911 1910 1912 1911 k = p = NULL; 1913 - i = n = 0; 1914 1912 cur_idx = prev_idx = 0; 1915 1913 ret = 0; 1916 1914 ··· 2043 2045 &check_state->infos[i], 2044 2046 name); 2045 2047 if (IS_ERR(check_state->infos[i].thread)) { 2046 - pr_err("fails to run thread bch_btrchk[%d]", i); 2048 + pr_err("fails to run thread bch_btrchk[%d]\n", i); 2047 2049 for (--i; i >= 0; i--) 2048 2050 kthread_stop(check_state->infos[i].thread); 2049 2051 ret = -ENOMEM; ··· 2454 2456 if (ret) { 2455 2457 struct bkey *k; 2456 2458 2457 - pr_err("error %i", ret); 2459 + pr_err("error %i\n", ret); 2458 2460 2459 2461 while ((k = bch_keylist_pop(keys))) 2460 2462 bkey_put(c, k); ··· 2742 2744 break; 2743 2745 2744 2746 if (bkey_cmp(&buf->last_scanned, end) >= 0) { 2745 - pr_debug("scan finished"); 2747 + pr_debug("scan finished\n"); 2746 2748 break; 2747 2749 } 2748 2750

+6 -6

drivers/md/bcache/extents.c

··· 130 130 char buf[80]; 131 131 132 132 bch_extent_to_text(buf, sizeof(buf), k); 133 - pr_err(" %s", buf); 133 + pr_cont(" %s", buf); 134 134 135 135 for (j = 0; j < KEY_PTRS(k); j++) { 136 136 size_t n = PTR_BUCKET_NR(b->c, k, j); 137 137 138 - pr_err(" bucket %zu", n); 138 + pr_cont(" bucket %zu", n); 139 139 if (n >= b->c->sb.first_bucket && n < b->c->sb.nbuckets) 140 - pr_err(" prio %i", 141 - PTR_BUCKET(b->c, k, j)->prio); 140 + pr_cont(" prio %i", 141 + PTR_BUCKET(b->c, k, j)->prio); 142 142 } 143 143 144 - pr_err(" %s\n", bch_ptr_status(b->c, k)); 144 + pr_cont(" %s\n", bch_ptr_status(b->c, k)); 145 145 } 146 146 147 147 /* Btree ptrs */ ··· 553 553 554 554 if (stale && KEY_DIRTY(k)) { 555 555 bch_extent_to_text(buf, sizeof(buf), k); 556 - pr_info("stale dirty pointer, stale %u, key: %s", 556 + pr_info("stale dirty pointer, stale %u, key: %s\n", 557 557 stale, buf); 558 558 } 559 559

+4 -4

drivers/md/bcache/io.c

··· 65 65 * we shouldn't count failed REQ_RAHEAD bio to dc->io_errors. 66 66 */ 67 67 if (bio->bi_opf & REQ_RAHEAD) { 68 - pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore", 68 + pr_warn_ratelimited("%s: Read-ahead I/O failed on backing device, ignore\n", 69 69 dc->backing_dev_name); 70 70 return; 71 71 } 72 72 73 73 errors = atomic_add_return(1, &dc->io_errors); 74 74 if (errors < dc->error_limit) 75 - pr_err("%s: IO error on backing device, unrecoverable", 75 + pr_err("%s: IO error on backing device, unrecoverable\n", 76 76 dc->backing_dev_name); 77 77 else 78 78 bch_cached_dev_error(dc); ··· 123 123 errors >>= IO_ERROR_SHIFT; 124 124 125 125 if (errors < ca->set->error_limit) 126 - pr_err("%s: IO error on %s%s", 126 + pr_err("%s: IO error on %s%s\n", 127 127 ca->cache_dev_name, m, 128 128 is_read ? ", recovering." : "."); 129 129 else 130 130 bch_cache_set_error(ca->set, 131 - "%s: too many IO errors %s", 131 + "%s: too many IO errors %s\n", 132 132 ca->cache_dev_name, m); 133 133 } 134 134 }

+17 -17

drivers/md/bcache/journal.c

··· 47 47 48 48 closure_init_stack(&cl); 49 49 50 - pr_debug("reading %u", bucket_index); 50 + pr_debug("reading %u\n", bucket_index); 51 51 52 52 while (offset < ca->sb.bucket_size) { 53 53 reread: left = ca->sb.bucket_size - offset; ··· 78 78 size_t blocks, bytes = set_bytes(j); 79 79 80 80 if (j->magic != jset_magic(&ca->sb)) { 81 - pr_debug("%u: bad magic", bucket_index); 81 + pr_debug("%u: bad magic\n", bucket_index); 82 82 return ret; 83 83 } 84 84 85 85 if (bytes > left << 9 || 86 86 bytes > PAGE_SIZE << JSET_BITS) { 87 - pr_info("%u: too big, %zu bytes, offset %u", 87 + pr_info("%u: too big, %zu bytes, offset %u\n", 88 88 bucket_index, bytes, offset); 89 89 return ret; 90 90 } ··· 93 93 goto reread; 94 94 95 95 if (j->csum != csum_set(j)) { 96 - pr_info("%u: bad csum, %zu bytes, offset %u", 96 + pr_info("%u: bad csum, %zu bytes, offset %u\n", 97 97 bucket_index, bytes, offset); 98 98 return ret; 99 99 } ··· 190 190 uint64_t seq; 191 191 192 192 bitmap_zero(bitmap, SB_JOURNAL_BUCKETS); 193 - pr_debug("%u journal buckets", ca->sb.njournal_buckets); 193 + pr_debug("%u journal buckets\n", ca->sb.njournal_buckets); 194 194 195 195 /* 196 196 * Read journal buckets ordered by golden ratio hash to quickly ··· 215 215 * If that fails, check all the buckets we haven't checked 216 216 * already 217 217 */ 218 - pr_debug("falling back to linear search"); 218 + pr_debug("falling back to linear search\n"); 219 219 220 220 for (l = find_first_zero_bit(bitmap, ca->sb.njournal_buckets); 221 221 l < ca->sb.njournal_buckets; ··· 233 233 /* Binary search */ 234 234 m = l; 235 235 r = find_next_bit(bitmap, ca->sb.njournal_buckets, l + 1); 236 - pr_debug("starting binary search, l %u r %u", l, r); 236 + pr_debug("starting binary search, l %u r %u\n", l, r); 237 237 238 238 while (l + 1 < r) { 239 239 seq = list_entry(list->prev, struct journal_replay, ··· 253 253 * Read buckets in reverse order until we stop finding more 254 254 * journal entries 255 255 */ 256 - pr_debug("finishing up: m %u njournal_buckets %u", 256 + pr_debug("finishing up: m %u njournal_buckets %u\n", 257 257 m, ca->sb.njournal_buckets); 258 258 l = m; 259 259 ··· 370 370 371 371 if (n != i->j.seq) { 372 372 if (n == start && is_discard_enabled(s)) 373 - pr_info("bcache: journal entries %llu-%llu may be discarded! (replaying %llu-%llu)", 373 + pr_info("journal entries %llu-%llu may be discarded! (replaying %llu-%llu)\n", 374 374 n, i->j.seq - 1, start, end); 375 375 else { 376 - pr_err("bcache: journal entries %llu-%llu missing! (replaying %llu-%llu)", 376 + pr_err("journal entries %llu-%llu missing! (replaying %llu-%llu)\n", 377 377 n, i->j.seq - 1, start, end); 378 378 ret = -EIO; 379 379 goto err; ··· 403 403 entries++; 404 404 } 405 405 406 - pr_info("journal replay done, %i keys in %i entries, seq %llu", 406 + pr_info("journal replay done, %i keys in %i entries, seq %llu\n", 407 407 keys, entries, end); 408 408 err: 409 409 while (!list_empty(list)) { ··· 481 481 break; 482 482 483 483 if (btree_node_journal_flush(b)) 484 - pr_err("BUG: flush_write bit should not be set here!"); 484 + pr_err("BUG: flush_write bit should not be set here!\n"); 485 485 486 486 mutex_lock(&b->write_lock); 487 487 ··· 534 534 for (i = 0; i < nr; i++) { 535 535 b = btree_nodes[i]; 536 536 if (!b) { 537 - pr_err("BUG: btree_nodes[%d] is NULL", i); 537 + pr_err("BUG: btree_nodes[%d] is NULL\n", i); 538 538 continue; 539 539 } 540 540 541 541 /* safe to check without holding b->write_lock */ 542 542 if (!btree_node_journal_flush(b)) { 543 - pr_err("BUG: bnode %p: journal_flush bit cleaned", b); 543 + pr_err("BUG: bnode %p: journal_flush bit cleaned\n", b); 544 544 continue; 545 545 } 546 546 ··· 548 548 if (!btree_current_write(b)->journal) { 549 549 clear_bit(BTREE_NODE_journal_flush, &b->flags); 550 550 mutex_unlock(&b->write_lock); 551 - pr_debug("bnode %p: written by others", b); 551 + pr_debug("bnode %p: written by others\n", b); 552 552 continue; 553 553 } 554 554 555 555 if (!btree_node_dirty(b)) { 556 556 clear_bit(BTREE_NODE_journal_flush, &b->flags); 557 557 mutex_unlock(&b->write_lock); 558 - pr_debug("bnode %p: dirty bit cleaned by others", b); 558 + pr_debug("bnode %p: dirty bit cleaned by others\n", b); 559 559 continue; 560 560 } 561 561 ··· 716 716 j->cur->data->keys = 0; 717 717 718 718 if (fifo_full(&j->pin)) 719 - pr_debug("journal_pin full (%zu)", fifo_used(&j->pin)); 719 + pr_debug("journal_pin full (%zu)\n", fifo_used(&j->pin)); 720 720 } 721 721 722 722 static void journal_write_endio(struct bio *bio)

+3 -3

drivers/md/bcache/request.c

··· 110 110 struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); 111 111 struct bio *bio = op->bio; 112 112 113 - pr_debug("invalidating %i sectors from %llu", 113 + pr_debug("invalidating %i sectors from %llu\n", 114 114 bio_sectors(bio), (uint64_t) bio->bi_iter.bi_sector); 115 115 116 116 while (bio_sectors(bio)) { ··· 396 396 397 397 if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || 398 398 bio_sectors(bio) & (c->sb.block_size - 1)) { 399 - pr_debug("skipping unaligned io"); 399 + pr_debug("skipping unaligned io\n"); 400 400 goto skip; 401 401 } 402 402 ··· 650 650 */ 651 651 if (unlikely(s->iop.writeback && 652 652 bio->bi_opf & REQ_PREFLUSH)) { 653 - pr_err("Can't flush %s: returned bi_status %i", 653 + pr_err("Can't flush %s: returned bi_status %i\n", 654 654 dc->backing_dev_name, bio->bi_status); 655 655 } else { 656 656 /* set to orig_bio->bi_status in bio_complete() */

+169 -63

drivers/md/bcache/super.c

··· 89 89 for (i = 0; i < SB_JOURNAL_BUCKETS; i++) 90 90 sb->d[i] = le64_to_cpu(s->d[i]); 91 91 92 - pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u", 92 + pr_debug("read sb version %llu, flags %llu, seq %llu, journal size %u\n", 93 93 sb->version, sb->flags, sb->seq, sb->keys); 94 94 95 95 err = "Not a bcache superblock (bad offset)"; ··· 234 234 235 235 out->csum = csum_set(out); 236 236 237 - pr_debug("ver %llu, flags %llu, seq %llu", 237 + pr_debug("ver %llu, flags %llu, seq %llu\n", 238 238 sb->version, sb->flags, sb->seq); 239 239 240 240 submit_bio(bio); ··· 365 365 } 366 366 367 367 bch_extent_to_text(buf, sizeof(buf), k); 368 - pr_debug("%s UUIDs at %s", op == REQ_OP_WRITE ? "wrote" : "read", buf); 368 + pr_debug("%s UUIDs at %s\n", op == REQ_OP_WRITE ? "wrote" : "read", buf); 369 369 370 370 for (u = c->uuids; u < c->uuids + c->nr_uuids; u++) 371 371 if (!bch_is_zero(u->uuid, 16)) 372 - pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u", 372 + pr_debug("Slot %zi: %pU: %s: 1st: %u last: %u inv: %u\n", 373 373 u - c->uuids, u->uuid, u->label, 374 374 u->first_reg, u->last_reg, u->invalidated); 375 375 ··· 534 534 struct bucket *b; 535 535 struct closure cl; 536 536 537 - pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu", 537 + pr_debug("free_prio=%zu, free_none=%zu, free_inc=%zu\n", 538 538 fifo_used(&ca->free[RESERVE_PRIO]), 539 539 fifo_used(&ca->free[RESERVE_NONE]), 540 540 fifo_used(&ca->free_inc)); ··· 629 629 630 630 if (p->csum != 631 631 bch_crc64(&p->magic, bucket_bytes(ca) - 8)) { 632 - pr_warn("bad csum reading priorities"); 632 + pr_warn("bad csum reading priorities\n"); 633 633 goto out; 634 634 } 635 635 636 636 if (p->magic != pset_magic(&ca->sb)) { 637 - pr_warn("bad magic reading priorities"); 637 + pr_warn("bad magic reading priorities\n"); 638 638 goto out; 639 639 } 640 640 ··· 728 728 729 729 ret = sysfs_create_link(&d->kobj, &c->kobj, "cache"); 730 730 if (ret < 0) 731 - pr_err("Couldn't create device -> cache set symlink"); 731 + pr_err("Couldn't create device -> cache set symlink\n"); 732 732 733 733 ret = sysfs_create_link(&c->kobj, &d->kobj, d->name); 734 734 if (ret < 0) 735 - pr_err("Couldn't create cache set -> device symlink"); 735 + pr_err("Couldn't create cache set -> device symlink\n"); 736 736 737 737 clear_bit(BCACHE_DEV_UNLINK_DONE, &d->flags); 738 738 } ··· 789 789 lockdep_assert_held(&bch_register_lock); 790 790 791 791 if (disk) 792 - pr_info("%s stopped", disk->disk_name); 792 + pr_info("%s stopped\n", disk->disk_name); 793 793 else 794 - pr_err("bcache device (NULL gendisk) stopped"); 794 + pr_err("bcache device (NULL gendisk) stopped\n"); 795 795 796 796 if (d->c) 797 797 bcache_device_detach(d); 798 798 799 799 if (disk) { 800 - if (disk->flags & GENHD_FL_UP) 800 + bool disk_added = (disk->flags & GENHD_FL_UP) != 0; 801 + 802 + if (disk_added) 801 803 del_gendisk(disk); 802 804 803 805 if (disk->queue) ··· 807 805 808 806 ida_simple_remove(&bcache_device_idx, 809 807 first_minor_to_idx(disk->first_minor)); 810 - put_disk(disk); 808 + if (disk_added) 809 + put_disk(disk); 811 810 } 812 811 813 812 bioset_exit(&d->bio_split); ··· 833 830 d->nr_stripes = DIV_ROUND_UP_ULL(sectors, d->stripe_size); 834 831 835 832 if (!d->nr_stripes || d->nr_stripes > max_stripes) { 836 - pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)", 833 + pr_err("nr_stripes too large or invalid: %u (start sector beyond end of disk?)\n", 837 834 (unsigned int)d->nr_stripes); 838 835 return -ENOMEM; 839 836 } ··· 931 928 dc->offline_seconds = 0; 932 929 933 930 if (dc->offline_seconds >= BACKING_DEV_OFFLINE_TIMEOUT) { 934 - pr_err("%s: device offline for %d seconds", 931 + pr_err("%s: device offline for %d seconds\n", 935 932 dc->backing_dev_name, 936 933 BACKING_DEV_OFFLINE_TIMEOUT); 937 - pr_err("%s: disable I/O request due to backing " 938 - "device offline", dc->disk.name); 934 + pr_err("%s: disable I/O request due to backing device offline\n", 935 + dc->disk.name); 939 936 dc->io_disable = true; 940 937 /* let others know earlier that io_disable is true */ 941 938 smp_mb(); ··· 962 959 }; 963 960 964 961 if (dc->io_disable) { 965 - pr_err("I/O disabled on cached dev %s", 962 + pr_err("I/O disabled on cached dev %s\n", 966 963 dc->backing_dev_name); 967 964 kfree(env[1]); 968 965 kfree(env[2]); ··· 974 971 kfree(env[1]); 975 972 kfree(env[2]); 976 973 kfree(buf); 977 - pr_info("cached dev %s is running already", 974 + pr_info("cached dev %s is running already\n", 978 975 dc->backing_dev_name); 979 976 return -EBUSY; 980 977 } ··· 1004 1001 if (sysfs_create_link(&d->kobj, &disk_to_dev(d->disk)->kobj, "dev") || 1005 1002 sysfs_create_link(&disk_to_dev(d->disk)->kobj, 1006 1003 &d->kobj, "bcache")) { 1007 - pr_err("Couldn't create bcache dev <-> disk sysfs symlinks"); 1004 + pr_err("Couldn't create bcache dev <-> disk sysfs symlinks\n"); 1008 1005 return -ENOMEM; 1009 1006 } 1010 1007 1011 1008 dc->status_update_thread = kthread_run(cached_dev_status_update, 1012 1009 dc, "bcache_status_update"); 1013 1010 if (IS_ERR(dc->status_update_thread)) { 1014 - pr_warn("failed to create bcache_status_update kthread, " 1015 - "continue to run without monitoring backing " 1016 - "device status"); 1011 + pr_warn("failed to create bcache_status_update kthread, continue to run without monitoring backing device status\n"); 1017 1012 } 1018 1013 1019 1014 return 0; ··· 1037 1036 } while (time_out > 0); 1038 1037 1039 1038 if (time_out == 0) 1040 - pr_warn("give up waiting for dc->writeback_write_update to quit"); 1039 + pr_warn("give up waiting for dc->writeback_write_update to quit\n"); 1041 1040 1042 1041 cancel_delayed_work_sync(&dc->writeback_rate_update); 1043 1042 } ··· 1078 1077 1079 1078 mutex_unlock(&bch_register_lock); 1080 1079 1081 - pr_info("Caching disabled for %s", dc->backing_dev_name); 1080 + pr_info("Caching disabled for %s\n", dc->backing_dev_name); 1082 1081 1083 1082 /* Drop ref we took in cached_dev_detach() */ 1084 1083 closure_put(&dc->disk.cl); ··· 1118 1117 return -ENOENT; 1119 1118 1120 1119 if (dc->disk.c) { 1121 - pr_err("Can't attach %s: already attached", 1120 + pr_err("Can't attach %s: already attached\n", 1122 1121 dc->backing_dev_name); 1123 1122 return -EINVAL; 1124 1123 } 1125 1124 1126 1125 if (test_bit(CACHE_SET_STOPPING, &c->flags)) { 1127 - pr_err("Can't attach %s: shutting down", 1126 + pr_err("Can't attach %s: shutting down\n", 1128 1127 dc->backing_dev_name); 1129 1128 return -EINVAL; 1130 1129 } 1131 1130 1132 1131 if (dc->sb.block_size < c->sb.block_size) { 1133 1132 /* Will die */ 1134 - pr_err("Couldn't attach %s: block size less than set's block size", 1133 + pr_err("Couldn't attach %s: block size less than set's block size\n", 1135 1134 dc->backing_dev_name); 1136 1135 return -EINVAL; 1137 1136 } ··· 1139 1138 /* Check whether already attached */ 1140 1139 list_for_each_entry_safe(exist_dc, t, &c->cached_devs, list) { 1141 1140 if (!memcmp(dc->sb.uuid, exist_dc->sb.uuid, 16)) { 1142 - pr_err("Tried to attach %s but duplicate UUID already attached", 1141 + pr_err("Tried to attach %s but duplicate UUID already attached\n", 1143 1142 dc->backing_dev_name); 1144 1143 1145 1144 return -EINVAL; ··· 1158 1157 1159 1158 if (!u) { 1160 1159 if (BDEV_STATE(&dc->sb) == BDEV_STATE_DIRTY) { 1161 - pr_err("Couldn't find uuid for %s in set", 1160 + pr_err("Couldn't find uuid for %s in set\n", 1162 1161 dc->backing_dev_name); 1163 1162 return -ENOENT; 1164 1163 } 1165 1164 1166 1165 u = uuid_find_empty(c); 1167 1166 if (!u) { 1168 - pr_err("Not caching %s, no room for UUID", 1167 + pr_err("Not caching %s, no room for UUID\n", 1169 1168 dc->backing_dev_name); 1170 1169 return -EINVAL; 1171 1170 } ··· 1211 1210 down_write(&dc->writeback_lock); 1212 1211 if (bch_cached_dev_writeback_start(dc)) { 1213 1212 up_write(&dc->writeback_lock); 1214 - pr_err("Couldn't start writeback facilities for %s", 1213 + pr_err("Couldn't start writeback facilities for %s\n", 1215 1214 dc->disk.disk->disk_name); 1216 1215 return -ENOMEM; 1217 1216 } ··· 1234 1233 */ 1235 1234 kthread_stop(dc->writeback_thread); 1236 1235 cancel_writeback_rate_update_dwork(dc); 1237 - pr_err("Couldn't run cached device %s", 1236 + pr_err("Couldn't run cached device %s\n", 1238 1237 dc->backing_dev_name); 1239 1238 return ret; 1240 1239 } ··· 1245 1244 /* Allow the writeback thread to proceed */ 1246 1245 up_write(&dc->writeback_lock); 1247 1246 1248 - pr_info("Caching %s as %s on set %pU", 1247 + pr_info("Caching %s as %s on set %pU\n", 1249 1248 dc->backing_dev_name, 1250 1249 dc->disk.disk->disk_name, 1251 1250 dc->disk.c->sb.set_uuid); ··· 1385 1384 if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj)) 1386 1385 goto err; 1387 1386 1388 - pr_info("registered backing device %s", dc->backing_dev_name); 1387 + pr_info("registered backing device %s\n", dc->backing_dev_name); 1389 1388 1390 1389 list_add(&dc->list, &uncached_devices); 1391 1390 /* attach to a matched cache set if it exists */ ··· 1402 1401 1403 1402 return 0; 1404 1403 err: 1405 - pr_notice("error %s: %s", dc->backing_dev_name, err); 1404 + pr_notice("error %s: %s\n", dc->backing_dev_name, err); 1406 1405 bcache_device_stop(&dc->disk); 1407 1406 return ret; 1408 1407 } ··· 1498 1497 1499 1498 u = uuid_find_empty(c); 1500 1499 if (!u) { 1501 - pr_err("Can't create volume, no room for UUID"); 1500 + pr_err("Can't create volume, no room for UUID\n"); 1502 1501 return -EINVAL; 1503 1502 } 1504 1503 ··· 1524 1523 smp_mb(); 1525 1524 1526 1525 pr_err("stop %s: too many IO errors on backing device %s\n", 1527 - dc->disk.disk->disk_name, dc->backing_dev_name); 1526 + dc->disk.disk->disk_name, dc->backing_dev_name); 1528 1527 1529 1528 bcache_device_stop(&dc->disk); 1530 1529 return true; ··· 1535 1534 __printf(2, 3) 1536 1535 bool bch_cache_set_error(struct cache_set *c, const char *fmt, ...) 1537 1536 { 1537 + struct va_format vaf; 1538 1538 va_list args; 1539 1539 1540 1540 if (c->on_error != ON_ERROR_PANIC && ··· 1543 1541 return false; 1544 1542 1545 1543 if (test_and_set_bit(CACHE_SET_IO_DISABLE, &c->flags)) 1546 - pr_info("CACHE_SET_IO_DISABLE already set"); 1544 + pr_info("CACHE_SET_IO_DISABLE already set\n"); 1547 1545 1548 1546 /* 1549 1547 * XXX: we can be called from atomic context 1550 1548 * acquire_console_sem(); 1551 1549 */ 1552 1550 1553 - pr_err("bcache: error on %pU: ", c->sb.set_uuid); 1554 - 1555 1551 va_start(args, fmt); 1556 - vprintk(fmt, args); 1557 - va_end(args); 1558 1552 1559 - pr_err(", disabling caching\n"); 1553 + vaf.fmt = fmt; 1554 + vaf.va = &args; 1555 + 1556 + pr_err("error on %pU: %pV, disabling caching\n", 1557 + c->sb.set_uuid, &vaf); 1558 + 1559 + va_end(args); 1560 1560 1561 1561 if (c->on_error == ON_ERROR_PANIC) 1562 1562 panic("panic forced after error\n"); ··· 1610 1606 list_del(&c->list); 1611 1607 mutex_unlock(&bch_register_lock); 1612 1608 1613 - pr_info("Cache set %pU unregistered", c->sb.set_uuid); 1609 + pr_info("Cache set %pU unregistered\n", c->sb.set_uuid); 1614 1610 wake_up(&unregister_wait); 1615 1611 1616 1612 closure_debug_destroy(&c->cl); ··· 1681 1677 struct cached_dev *dc) 1682 1678 { 1683 1679 if (dc->stop_when_cache_set_failed == BCH_CACHED_DEV_STOP_ALWAYS) { 1684 - pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.", 1680 + pr_warn("stop_when_cache_set_failed of %s is \"always\", stop it for failed cache set %pU.\n", 1685 1681 d->disk->disk_name, c->sb.set_uuid); 1686 1682 bcache_device_stop(d); 1687 1683 } else if (atomic_read(&dc->has_dirty)) { ··· 1689 1685 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO 1690 1686 * and dc->has_dirty == 1 1691 1687 */ 1692 - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.", 1688 + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is dirty, stop it to avoid potential data corruption.\n", 1693 1689 d->disk->disk_name); 1694 1690 /* 1695 1691 * There might be a small time gap that cache set is ··· 1711 1707 * dc->stop_when_cache_set_failed == BCH_CACHED_STOP_AUTO 1712 1708 * and dc->has_dirty == 0 1713 1709 */ 1714 - pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.", 1710 + pr_warn("stop_when_cache_set_failed of %s is \"auto\" and cache is clean, keep it alive.\n", 1715 1711 d->disk->disk_name); 1716 1712 } 1717 1713 } ··· 1878 1874 if (bch_journal_read(c, &journal)) 1879 1875 goto err; 1880 1876 1881 - pr_debug("btree_journal_read() done"); 1877 + pr_debug("btree_journal_read() done\n"); 1882 1878 1883 1879 err = "no journal entries found"; 1884 1880 if (list_empty(&journal)) ··· 1924 1920 1925 1921 bch_journal_mark(c, &journal); 1926 1922 bch_initial_gc_finish(c); 1927 - pr_debug("btree_check() done"); 1923 + pr_debug("btree_check() done\n"); 1928 1924 1929 1925 /* 1930 1926 * bcache_journal_next() can't happen sooner, or ··· 1955 1951 if (bch_journal_replay(c, &journal)) 1956 1952 goto err; 1957 1953 } else { 1958 - pr_notice("invalidating existing data"); 1954 + pr_notice("invalidating existing data\n"); 1959 1955 1960 1956 for_each_cache(ca, c, i) { 1961 1957 unsigned int j; ··· 2089 2085 memcpy(c->sb.set_uuid, ca->sb.set_uuid, 16); 2090 2086 c->sb.flags = ca->sb.flags; 2091 2087 c->sb.seq = ca->sb.seq; 2092 - pr_debug("set version = %llu", c->sb.version); 2088 + pr_debug("set version = %llu\n", c->sb.version); 2093 2089 } 2094 2090 2095 2091 kobject_get(&ca->kobj); ··· 2251 2247 err_free: 2252 2248 module_put(THIS_MODULE); 2253 2249 if (err) 2254 - pr_notice("error %s: %s", ca->cache_dev_name, err); 2250 + pr_notice("error %s: %s\n", ca->cache_dev_name, err); 2255 2251 return ret; 2256 2252 } 2257 2253 ··· 2305 2301 goto out; 2306 2302 } 2307 2303 2308 - pr_info("registered cache device %s", ca->cache_dev_name); 2304 + pr_info("registered cache device %s\n", ca->cache_dev_name); 2309 2305 2310 2306 out: 2311 2307 kobject_put(&ca->kobj); 2312 2308 2313 2309 err: 2314 2310 if (err) 2315 - pr_notice("error %s: %s", ca->cache_dev_name, err); 2311 + pr_notice("error %s: %s\n", ca->cache_dev_name, err); 2316 2312 2317 2313 return ret; 2318 2314 } ··· 2327 2323 2328 2324 kobj_attribute_write(register, register_bcache); 2329 2325 kobj_attribute_write(register_quiet, register_bcache); 2326 + kobj_attribute_write(register_async, register_bcache); 2330 2327 kobj_attribute_write(pendings_cleanup, bch_pending_bdevs_cleanup); 2331 2328 2332 2329 static bool bch_is_open_backing(struct block_device *bdev) ··· 2361 2356 static bool bch_is_open(struct block_device *bdev) 2362 2357 { 2363 2358 return bch_is_open_cache(bdev) || bch_is_open_backing(bdev); 2359 + } 2360 + 2361 + struct async_reg_args { 2362 + struct work_struct reg_work; 2363 + char *path; 2364 + struct cache_sb *sb; 2365 + struct cache_sb_disk *sb_disk; 2366 + struct block_device *bdev; 2367 + }; 2368 + 2369 + static void register_bdev_worker(struct work_struct *work) 2370 + { 2371 + int fail = false; 2372 + struct async_reg_args *args = 2373 + container_of(work, struct async_reg_args, reg_work); 2374 + struct cached_dev *dc; 2375 + 2376 + dc = kzalloc(sizeof(*dc), GFP_KERNEL); 2377 + if (!dc) { 2378 + fail = true; 2379 + put_page(virt_to_page(args->sb_disk)); 2380 + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 2381 + goto out; 2382 + } 2383 + 2384 + mutex_lock(&bch_register_lock); 2385 + if (register_bdev(args->sb, args->sb_disk, args->bdev, dc) < 0) 2386 + fail = true; 2387 + mutex_unlock(&bch_register_lock); 2388 + 2389 + out: 2390 + if (fail) 2391 + pr_info("error %s: fail to register backing device\n", 2392 + args->path); 2393 + kfree(args->sb); 2394 + kfree(args->path); 2395 + kfree(args); 2396 + module_put(THIS_MODULE); 2397 + } 2398 + 2399 + static void register_cache_worker(struct work_struct *work) 2400 + { 2401 + int fail = false; 2402 + struct async_reg_args *args = 2403 + container_of(work, struct async_reg_args, reg_work); 2404 + struct cache *ca; 2405 + 2406 + ca = kzalloc(sizeof(*ca), GFP_KERNEL); 2407 + if (!ca) { 2408 + fail = true; 2409 + put_page(virt_to_page(args->sb_disk)); 2410 + blkdev_put(args->bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); 2411 + goto out; 2412 + } 2413 + 2414 + /* blkdev_put() will be called in bch_cache_release() */ 2415 + if (register_cache(args->sb, args->sb_disk, args->bdev, ca) != 0) 2416 + fail = true; 2417 + 2418 + out: 2419 + if (fail) 2420 + pr_info("error %s: fail to register cache device\n", 2421 + args->path); 2422 + kfree(args->sb); 2423 + kfree(args->path); 2424 + kfree(args); 2425 + module_put(THIS_MODULE); 2426 + } 2427 + 2428 + static void register_device_aync(struct async_reg_args *args) 2429 + { 2430 + if (SB_IS_BDEV(args->sb)) 2431 + INIT_WORK(&args->reg_work, register_bdev_worker); 2432 + else 2433 + INIT_WORK(&args->reg_work, register_cache_worker); 2434 + 2435 + queue_work(system_wq, &args->reg_work); 2364 2436 } 2365 2437 2366 2438 static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr, ··· 2502 2420 goto out_blkdev_put; 2503 2421 2504 2422 err = "failed to register device"; 2423 + if (attr == &ksysfs_register_async) { 2424 + /* register in asynchronous way */ 2425 + struct async_reg_args *args = 2426 + kzalloc(sizeof(struct async_reg_args), GFP_KERNEL); 2427 + 2428 + if (!args) { 2429 + ret = -ENOMEM; 2430 + err = "cannot allocate memory"; 2431 + goto out_put_sb_page; 2432 + } 2433 + 2434 + args->path = path; 2435 + args->sb = sb; 2436 + args->sb_disk = sb_disk; 2437 + args->bdev = bdev; 2438 + register_device_aync(args); 2439 + /* No wait and returns to user space */ 2440 + goto async_done; 2441 + } 2442 + 2505 2443 if (SB_IS_BDEV(sb)) { 2506 2444 struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL); 2507 2445 ··· 2549 2447 kfree(sb); 2550 2448 kfree(path); 2551 2449 module_put(THIS_MODULE); 2450 + async_done: 2552 2451 return size; 2553 2452 2554 2453 out_put_sb_page: ··· 2564 2461 out_module_put: 2565 2462 module_put(THIS_MODULE); 2566 2463 out: 2567 - pr_info("error %s: %s", path?path:"", err); 2464 + pr_info("error %s: %s\n", path?path:"", err); 2568 2465 return ret; 2569 2466 } 2570 2467 ··· 2609 2506 mutex_unlock(&bch_register_lock); 2610 2507 2611 2508 list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) { 2612 - pr_info("delete pdev %p", pdev); 2509 + pr_info("delete pdev %p\n", pdev); 2613 2510 list_del(&pdev->list); 2614 2511 bcache_device_stop(&pdev->dc->disk); 2615 2512 kfree(pdev); ··· 2652 2549 2653 2550 mutex_unlock(&bch_register_lock); 2654 2551 2655 - pr_info("Stopping all devices:"); 2552 + pr_info("Stopping all devices:\n"); 2656 2553 2657 2554 /* 2658 2555 * The reason bch_register_lock is not held to call ··· 2702 2599 finish_wait(&unregister_wait, &wait); 2703 2600 2704 2601 if (stopped) 2705 - pr_info("All devices stopped"); 2602 + pr_info("All devices stopped\n"); 2706 2603 else 2707 - pr_notice("Timeout waiting for devices to be closed"); 2604 + pr_notice("Timeout waiting for devices to be closed\n"); 2708 2605 out: 2709 2606 mutex_unlock(&bch_register_lock); 2710 2607 } ··· 2740 2637 if (bch_cutoff_writeback_sync == 0) 2741 2638 bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC; 2742 2639 else if (bch_cutoff_writeback_sync > CUTOFF_WRITEBACK_SYNC_MAX) { 2743 - pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u", 2640 + pr_warn("set bch_cutoff_writeback_sync (%u) to max value %u\n", 2744 2641 bch_cutoff_writeback_sync, CUTOFF_WRITEBACK_SYNC_MAX); 2745 2642 bch_cutoff_writeback_sync = CUTOFF_WRITEBACK_SYNC_MAX; 2746 2643 } ··· 2748 2645 if (bch_cutoff_writeback == 0) 2749 2646 bch_cutoff_writeback = CUTOFF_WRITEBACK; 2750 2647 else if (bch_cutoff_writeback > CUTOFF_WRITEBACK_MAX) { 2751 - pr_warn("set bch_cutoff_writeback (%u) to max value %u", 2648 + pr_warn("set bch_cutoff_writeback (%u) to max value %u\n", 2752 2649 bch_cutoff_writeback, CUTOFF_WRITEBACK_MAX); 2753 2650 bch_cutoff_writeback = CUTOFF_WRITEBACK_MAX; 2754 2651 } 2755 2652 2756 2653 if (bch_cutoff_writeback > bch_cutoff_writeback_sync) { 2757 - pr_warn("set bch_cutoff_writeback (%u) to %u", 2654 + pr_warn("set bch_cutoff_writeback (%u) to %u\n", 2758 2655 bch_cutoff_writeback, bch_cutoff_writeback_sync); 2759 2656 bch_cutoff_writeback = bch_cutoff_writeback_sync; 2760 2657 } ··· 2765 2662 static const struct attribute *files[] = { 2766 2663 &ksysfs_register.attr, 2767 2664 &ksysfs_register_quiet.attr, 2665 + #ifdef CONFIG_BCACHE_ASYNC_REGISTRAION 2666 + &ksysfs_register_async.attr, 2667 + #endif 2768 2668 &ksysfs_pendings_cleanup.attr, 2769 2669 NULL 2770 2670 };

+4 -4

drivers/md/bcache/sysfs.c

··· 421 421 return size; 422 422 } 423 423 if (v == -ENOENT) 424 - pr_err("Can't attach %s: cache set not found", buf); 424 + pr_err("Can't attach %s: cache set not found\n", buf); 425 425 return v; 426 426 } 427 427 ··· 455 455 */ 456 456 if (dc->writeback_running) { 457 457 dc->writeback_running = false; 458 - pr_err("%s: failed to run non-existent writeback thread", 458 + pr_err("%s: failed to run non-existent writeback thread\n", 459 459 dc->disk.disk->disk_name); 460 460 } 461 461 } else ··· 872 872 if (v) { 873 873 if (test_and_set_bit(CACHE_SET_IO_DISABLE, 874 874 &c->flags)) 875 - pr_warn("CACHE_SET_IO_DISABLE already set"); 875 + pr_warn("CACHE_SET_IO_DISABLE already set\n"); 876 876 } else { 877 877 if (!test_and_clear_bit(CACHE_SET_IO_DISABLE, 878 878 &c->flags)) 879 - pr_warn("CACHE_SET_IO_DISABLE already cleared"); 879 + pr_warn("CACHE_SET_IO_DISABLE already cleared\n"); 880 880 } 881 881 } 882 882

+3 -3

drivers/md/bcache/writeback.c

··· 809 809 schedule_timeout_interruptible( 810 810 msecs_to_jiffies(INIT_KEYS_SLEEP_MS)); 811 811 else if (ret < 0) { 812 - pr_warn("sectors dirty init failed, ret=%d!", ret); 812 + pr_warn("sectors dirty init failed, ret=%d!\n", ret); 813 813 break; 814 814 } 815 815 } while (ret == -EAGAIN); ··· 917 917 918 918 state = kzalloc(sizeof(struct bch_dirty_init_state), GFP_KERNEL); 919 919 if (!state) { 920 - pr_warn("sectors dirty init failed: cannot allocate memory"); 920 + pr_warn("sectors dirty init failed: cannot allocate memory\n"); 921 921 return; 922 922 } 923 923 ··· 945 945 &state->infos[i], 946 946 name); 947 947 if (IS_ERR(state->infos[i].thread)) { 948 - pr_err("fails to run thread bch_dirty_init[%d]", i); 948 + pr_err("fails to run thread bch_dirty_init[%d]\n", i); 949 949 for (--i; i >= 0; i--) 950 950 kthread_stop(state->infos[i].thread); 951 951 goto out;

+1 -1

drivers/md/md-linear.h

··· 12 12 struct rcu_head rcu; 13 13 sector_t array_sectors; 14 14 int raid_disks; /* a copy of mddev->raid_disks */ 15 - struct dev_info disks[0]; 15 + struct dev_info disks[]; 16 16 }; 17 17 #endif

+48 -23

drivers/md/md.c

··· 89 89 static DECLARE_WAIT_QUEUE_HEAD(resync_wait); 90 90 static struct workqueue_struct *md_wq; 91 91 static struct workqueue_struct *md_misc_wq; 92 + static struct workqueue_struct *md_rdev_misc_wq; 92 93 93 94 static int remove_and_add_spares(struct mddev *mddev, 94 95 struct md_rdev *this); ··· 228 227 goto abort; 229 228 230 229 if (mddev->serial_info_pool == NULL) { 231 - unsigned int noio_flag; 232 - 233 - noio_flag = memalloc_noio_save(); 230 + /* 231 + * already in memalloc noio context by 232 + * mddev_suspend() 233 + */ 234 234 mddev->serial_info_pool = 235 235 mempool_create_kmalloc_pool(NR_SERIAL_INFOS, 236 236 sizeof(struct serial_info)); 237 - memalloc_noio_restore(noio_flag); 238 237 if (!mddev->serial_info_pool) { 239 238 rdevs_uninit_serial(mddev); 240 239 pr_err("can't alloc memory pool for serialization\n"); ··· 467 466 { 468 467 const int rw = bio_data_dir(bio); 469 468 const int sgrp = op_stat_group(bio_op(bio)); 470 - struct mddev *mddev = q->queuedata; 469 + struct mddev *mddev = bio->bi_disk->private_data; 471 470 unsigned int sectors; 472 471 473 472 if (unlikely(test_bit(MD_BROKEN, &mddev->flags)) && (rw == WRITE)) { ··· 528 527 wait_event(mddev->sb_wait, !test_bit(MD_UPDATING_SB, &mddev->flags)); 529 528 530 529 del_timer_sync(&mddev->safemode_timer); 530 + /* restrict memory reclaim I/O during raid array is suspend */ 531 + mddev->noio_flag = memalloc_noio_save(); 531 532 } 532 533 EXPORT_SYMBOL_GPL(mddev_suspend); 533 534 534 535 void mddev_resume(struct mddev *mddev) 535 536 { 537 + /* entred the memalloc scope from mddev_suspend() */ 538 + memalloc_noio_restore(mddev->noio_flag); 536 539 lockdep_assert_held(&mddev->reconfig_mutex); 537 540 if (--mddev->suspended) 538 541 return; ··· 2459 2454 return err; 2460 2455 } 2461 2456 2462 - static void md_delayed_delete(struct work_struct *ws) 2457 + static void rdev_delayed_delete(struct work_struct *ws) 2463 2458 { 2464 2459 struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work); 2465 2460 kobject_del(&rdev->kobj); ··· 2484 2479 * to delay it due to rcu usage. 2485 2480 */ 2486 2481 synchronize_rcu(); 2487 - INIT_WORK(&rdev->del_work, md_delayed_delete); 2482 + INIT_WORK(&rdev->del_work, rdev_delayed_delete); 2488 2483 kobject_get(&rdev->kobj); 2489 - queue_work(md_misc_wq, &rdev->del_work); 2484 + queue_work(md_rdev_misc_wq, &rdev->del_work); 2490 2485 } 2491 2486 2492 2487 /* ··· 3196 3191 rdev->saved_raid_disk = -1; 3197 3192 clear_bit(In_sync, &rdev->flags); 3198 3193 clear_bit(Bitmap_sync, &rdev->flags); 3199 - err = rdev->mddev->pers-> 3200 - hot_add_disk(rdev->mddev, rdev); 3194 + err = rdev->mddev->pers->hot_add_disk(rdev->mddev, rdev); 3201 3195 if (err) { 3202 3196 rdev->raid_disk = -1; 3203 3197 return err; ··· 4518 4514 return -EINVAL; 4519 4515 } 4520 4516 4517 + /* need to ensure rdev_delayed_delete() has completed */ 4518 + static void flush_rdev_wq(struct mddev *mddev) 4519 + { 4520 + struct md_rdev *rdev; 4521 + 4522 + rcu_read_lock(); 4523 + rdev_for_each_rcu(rdev, mddev) 4524 + if (work_pending(&rdev->del_work)) { 4525 + flush_workqueue(md_rdev_misc_wq); 4526 + break; 4527 + } 4528 + rcu_read_unlock(); 4529 + } 4530 + 4521 4531 static ssize_t 4522 4532 new_dev_store(struct mddev *mddev, const char *buf, size_t len) 4523 4533 { ··· 4559 4541 minor != MINOR(dev)) 4560 4542 return -EOVERFLOW; 4561 4543 4562 - flush_workqueue(md_misc_wq); 4563 - 4544 + flush_rdev_wq(mddev); 4564 4545 err = mddev_lock(mddev); 4565 4546 if (err) 4566 4547 return err; ··· 4797 4780 clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 4798 4781 if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && 4799 4782 mddev_lock(mddev) == 0) { 4800 - flush_workqueue(md_misc_wq); 4783 + if (work_pending(&mddev->del_work)) 4784 + flush_workqueue(md_misc_wq); 4801 4785 if (mddev->sync_thread) { 4802 4786 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 4803 4787 md_reap_sync_thread(mddev); ··· 5644 5626 mddev->queue = blk_alloc_queue(md_make_request, NUMA_NO_NODE); 5645 5627 if (!mddev->queue) 5646 5628 goto abort; 5647 - mddev->queue->queuedata = mddev; 5648 5629 5649 5630 blk_set_stacking_limits(&mddev->queue->limits); 5650 5631 ··· 6164 6147 static void __md_stop_writes(struct mddev *mddev) 6165 6148 { 6166 6149 set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); 6167 - flush_workqueue(md_misc_wq); 6150 + if (work_pending(&mddev->del_work)) 6151 + flush_workqueue(md_misc_wq); 6168 6152 if (mddev->sync_thread) { 6169 6153 set_bit(MD_RECOVERY_INTR, &mddev->recovery); 6170 6154 md_reap_sync_thread(mddev); ··· 6218 6200 md_bitmap_destroy(mddev); 6219 6201 mddev_detach(mddev); 6220 6202 /* Ensure ->event_work is done */ 6221 - flush_workqueue(md_misc_wq); 6203 + if (mddev->event_work.func) 6204 + flush_workqueue(md_misc_wq); 6222 6205 spin_lock(&mddev->lock); 6223 6206 mddev->pers = NULL; 6224 6207 spin_unlock(&mddev->lock); ··· 7514 7495 7515 7496 } 7516 7497 7517 - if (cmd == ADD_NEW_DISK) 7518 - /* need to ensure md_delayed_delete() has completed */ 7519 - flush_workqueue(md_misc_wq); 7498 + if (cmd == ADD_NEW_DISK || cmd == HOT_ADD_DISK) 7499 + flush_rdev_wq(mddev); 7520 7500 7521 7501 if (cmd == HOT_REMOVE_DISK) 7522 7502 /* need to ensure recovery thread has run */ ··· 7770 7752 */ 7771 7753 mddev_put(mddev); 7772 7754 /* Wait until bdev->bd_disk is definitely gone */ 7773 - flush_workqueue(md_misc_wq); 7755 + if (work_pending(&mddev->del_work)) 7756 + flush_workqueue(md_misc_wq); 7774 7757 /* Then retry the open from the top */ 7775 7758 return -ERESTARTSYS; 7776 7759 } ··· 9059 9040 9060 9041 rdev->recovery_offset = 0; 9061 9042 } 9062 - if (mddev->pers-> 9063 - hot_add_disk(mddev, rdev) == 0) { 9043 + if (mddev->pers->hot_add_disk(mddev, rdev) == 0) { 9064 9044 if (sysfs_link_rdev(mddev, rdev)) 9065 9045 /* failure here is OK */; 9066 9046 if (!test_bit(Journal, &rdev->flags)) ··· 9487 9469 if (!md_misc_wq) 9488 9470 goto err_misc_wq; 9489 9471 9472 + md_rdev_misc_wq = alloc_workqueue("md_rdev_misc", 0, 0); 9473 + if (!md_misc_wq) 9474 + goto err_rdev_misc_wq; 9475 + 9490 9476 if ((ret = register_blkdev(MD_MAJOR, "md")) < 0) 9491 9477 goto err_md; 9492 9478 ··· 9512 9490 err_mdp: 9513 9491 unregister_blkdev(MD_MAJOR, "md"); 9514 9492 err_md: 9493 + destroy_workqueue(md_rdev_misc_wq); 9494 + err_rdev_misc_wq: 9515 9495 destroy_workqueue(md_misc_wq); 9516 9496 err_misc_wq: 9517 9497 destroy_workqueue(md_wq); ··· 9800 9776 * destroy_workqueue() below will wait for that to complete. 9801 9777 */ 9802 9778 } 9779 + destroy_workqueue(md_rdev_misc_wq); 9803 9780 destroy_workqueue(md_misc_wq); 9804 9781 destroy_workqueue(md_wq); 9805 9782 } ··· 9810 9785 9811 9786 static int get_ro(char *buffer, const struct kernel_param *kp) 9812 9787 { 9813 - return sprintf(buffer, "%d", start_readonly); 9788 + return sprintf(buffer, "%d\n", start_readonly); 9814 9789 } 9815 9790 static int set_ro(const char *val, const struct kernel_param *kp) 9816 9791 {

+1

drivers/md/md.h

··· 497 497 void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); 498 498 struct md_cluster_info *cluster_info; 499 499 unsigned int good_device_nr; /* good device num within cluster raid */ 500 + unsigned int noio_flag; /* for memalloc scope API */ 500 501 501 502 bool has_superblocks:1; 502 503 bool fail_last_dev:1;

+7 -6

drivers/md/raid1.c

··· 296 296 static void call_bio_endio(struct r1bio *r1_bio) 297 297 { 298 298 struct bio *bio = r1_bio->master_bio; 299 - struct r1conf *conf = r1_bio->mddev->private; 300 299 301 300 if (!test_bit(R1BIO_Uptodate, &r1_bio->state)) 302 301 bio->bi_status = BLK_STS_IOERR; 303 302 304 303 bio_endio(bio); 305 - /* 306 - * Wake up any possible resync thread that waits for the device 307 - * to go idle. 308 - */ 309 - allow_barrier(conf, r1_bio->sector); 310 304 } 311 305 312 306 static void raid_end_bio_io(struct r1bio *r1_bio) 313 307 { 314 308 struct bio *bio = r1_bio->master_bio; 309 + struct r1conf *conf = r1_bio->mddev->private; 315 310 316 311 /* if nobody has done the final endio yet, do it now */ 317 312 if (!test_and_set_bit(R1BIO_Returned, &r1_bio->state)) { ··· 317 322 318 323 call_bio_endio(r1_bio); 319 324 } 325 + /* 326 + * Wake up any possible resync thread that waits for the device 327 + * to go idle. All I/Os, even write-behind writes, are done. 328 + */ 329 + allow_barrier(conf, r1_bio->sector); 330 + 320 331 free_r1bio(r1_bio); 321 332 } 322 333

+1 -1

drivers/md/raid1.h

··· 180 180 * if the IO is in WRITE direction, then multiple bios are used. 181 181 * We choose the number when they are allocated. 182 182 */ 183 - struct bio *bios[0]; 183 + struct bio *bios[]; 184 184 /* DO NOT PUT ANY NEW FIELDS HERE - bios array is contiguously alloced*/ 185 185 }; 186 186

+1 -1

drivers/md/raid10.h

··· 153 153 }; 154 154 sector_t addr; 155 155 int devnum; 156 - } devs[0]; 156 + } devs[]; 157 157 }; 158 158 159 159 /* bits for r10bio.state */

+14 -8

drivers/md/raid5.c

··· 2215 2215 } 2216 2216 2217 2217 /** 2218 - * scribble_len - return the required size of the scribble region 2218 + * scribble_alloc - allocate percpu scribble buffer for required size 2219 + * of the scribble region 2220 + * @percpu - from for_each_present_cpu() of the caller 2219 2221 * @num - total number of disks in the array 2222 + * @cnt - scribble objs count for required size of the scribble region 2220 2223 * 2221 - * The size must be enough to contain: 2224 + * The scribble buffer size must be enough to contain: 2222 2225 * 1/ a struct page pointer for each device in the array +2 2223 2226 * 2/ room to convert each entry in (1) to its corresponding dma 2224 2227 * (dma_map_page()) or page (page_address()) address. ··· 2231 2228 * of the P and Q blocks. 2232 2229 */ 2233 2230 static int scribble_alloc(struct raid5_percpu *percpu, 2234 - int num, int cnt, gfp_t flags) 2231 + int num, int cnt) 2235 2232 { 2236 2233 size_t obj_size = 2237 2234 sizeof(struct page *) * (num+2) + 2238 2235 sizeof(addr_conv_t) * (num+2); 2239 2236 void *scribble; 2240 2237 2241 - scribble = kvmalloc_array(cnt, obj_size, flags); 2238 + /* 2239 + * If here is in raid array suspend context, it is in memalloc noio 2240 + * context as well, there is no potential recursive memory reclaim 2241 + * I/Os with the GFP_KERNEL flag. 2242 + */ 2243 + scribble = kvmalloc_array(cnt, obj_size, GFP_KERNEL); 2242 2244 if (!scribble) 2243 2245 return -ENOMEM; 2244 2246 ··· 2275 2267 2276 2268 percpu = per_cpu_ptr(conf->percpu, cpu); 2277 2269 err = scribble_alloc(percpu, new_disks, 2278 - new_sectors / STRIPE_SECTORS, 2279 - GFP_NOIO); 2270 + new_sectors / STRIPE_SECTORS); 2280 2271 if (err) 2281 2272 break; 2282 2273 } ··· 6766 6759 conf->previous_raid_disks), 6767 6760 max(conf->chunk_sectors, 6768 6761 conf->prev_chunk_sectors) 6769 - / STRIPE_SECTORS, 6770 - GFP_KERNEL)) { 6762 + / STRIPE_SECTORS)) { 6771 6763 free_scratch_buffer(conf, percpu); 6772 6764 return -ENOMEM; 6773 6765 }

+190 -132

drivers/nvme/host/core.c

··· 19 19 #include <linux/pr.h> 20 20 #include <linux/ptrace.h> 21 21 #include <linux/nvme_ioctl.h> 22 - #include <linux/t10-pi.h> 23 22 #include <linux/pm_qos.h> 24 23 #include <asm/unaligned.h> 25 24 ··· 201 202 if (nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING)) 202 203 nvme_do_delete_ctrl(ctrl); 203 204 nvme_put_ctrl(ctrl); 204 - } 205 - 206 - static inline bool nvme_ns_has_pi(struct nvme_ns *ns) 207 - { 208 - return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); 209 205 } 210 206 211 207 static blk_status_t nvme_error_status(u16 status) ··· 427 433 428 434 nvme_mpath_remove_disk(head); 429 435 ida_simple_remove(&head->subsys->ns_ida, head->instance); 430 - list_del_init(&head->entry); 431 436 cleanup_srcu_struct(&head->srcu); 432 437 nvme_put_subsystem(head->subsys); 433 438 kfree(head); ··· 523 530 524 531 c.directive.opcode = nvme_admin_directive_recv; 525 532 c.directive.nsid = cpu_to_le32(nsid); 526 - c.directive.numd = cpu_to_le32((sizeof(*s) >> 2) - 1); 533 + c.directive.numd = cpu_to_le32(nvme_bytes_to_numd(sizeof(*s))); 527 534 c.directive.doper = NVME_DIR_RCV_ST_OP_PARAM; 528 535 c.directive.dtype = NVME_DIR_STREAMS; 529 536 ··· 546 553 547 554 ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); 548 555 if (ret) 549 - return ret; 556 + goto out_disable_stream; 550 557 551 558 ctrl->nssa = le16_to_cpu(s.nssa); 552 559 if (ctrl->nssa < BLK_MAX_WRITE_HINTS - 1) { 553 560 dev_info(ctrl->device, "too few streams (%u) available\n", 554 561 ctrl->nssa); 555 - nvme_disable_streams(ctrl); 556 - return 0; 562 + goto out_disable_stream; 557 563 } 558 564 559 565 ctrl->nr_streams = min_t(unsigned, ctrl->nssa, BLK_MAX_WRITE_HINTS - 1); 560 566 dev_info(ctrl->device, "Using %u streams\n", ctrl->nr_streams); 561 567 return 0; 568 + 569 + out_disable_stream: 570 + nvme_disable_streams(ctrl); 571 + return ret; 562 572 } 563 573 564 574 /* ··· 1023 1027 } 1024 1028 EXPORT_SYMBOL_GPL(nvme_stop_keep_alive); 1025 1029 1030 + /* 1031 + * In NVMe 1.0 the CNS field was just a binary controller or namespace 1032 + * flag, thus sending any new CNS opcodes has a big chance of not working. 1033 + * Qemu unfortunately had that bug after reporting a 1.1 version compliance 1034 + * (but not for any later version). 1035 + */ 1036 + static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) 1037 + { 1038 + if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) 1039 + return ctrl->vs < NVME_VS(1, 2, 0); 1040 + return ctrl->vs < NVME_VS(1, 1, 0); 1041 + } 1042 + 1026 1043 static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) 1027 1044 { 1028 1045 struct nvme_command c = { }; ··· 1299 1290 meta_len = (io.nblocks + 1) * ns->ms; 1300 1291 metadata = nvme_to_user_ptr(io.metadata); 1301 1292 1302 - if (ns->ext) { 1293 + if (ns->features & NVME_NS_EXT_LBAS) { 1303 1294 length += meta_len; 1304 1295 meta_len = 0; 1305 1296 } else if (meta_len) { ··· 1401 1392 } 1402 1393 if (effects & NVME_CMD_EFFECTS_CCC) 1403 1394 nvme_init_identify(ctrl); 1404 - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) 1395 + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { 1405 1396 nvme_queue_scan(ctrl); 1397 + flush_work(&ctrl->scan_work); 1398 + } 1406 1399 } 1407 1400 1408 1401 static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, ··· 1693 1682 } 1694 1683 1695 1684 #ifdef CONFIG_BLK_DEV_INTEGRITY 1696 - static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) 1685 + static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, 1686 + u32 max_integrity_segments) 1697 1687 { 1698 1688 struct blk_integrity integrity; 1699 1689 ··· 1717 1705 } 1718 1706 integrity.tuple_size = ms; 1719 1707 blk_integrity_register(disk, &integrity); 1720 - blk_queue_max_integrity_segments(disk->queue, 1); 1708 + blk_queue_max_integrity_segments(disk->queue, max_integrity_segments); 1721 1709 } 1722 1710 #else 1723 - static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type) 1711 + static void nvme_init_integrity(struct gendisk *disk, u16 ms, u8 pi_type, 1712 + u32 max_integrity_segments) 1724 1713 { 1725 1714 } 1726 1715 #endif /* CONFIG_BLK_DEV_INTEGRITY */ 1727 - 1728 - static void nvme_set_chunk_size(struct nvme_ns *ns) 1729 - { 1730 - u32 chunk_size = nvme_lba_to_sect(ns, ns->noiob); 1731 - blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(chunk_size)); 1732 - } 1733 1716 1734 1717 static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns) 1735 1718 { ··· 1811 1804 memcmp(&a->eui64, &b->eui64, sizeof(a->eui64)) == 0; 1812 1805 } 1813 1806 1807 + static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns, 1808 + u32 *phys_bs, u32 *io_opt) 1809 + { 1810 + struct streams_directive_params s; 1811 + int ret; 1812 + 1813 + if (!ctrl->nr_streams) 1814 + return 0; 1815 + 1816 + ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); 1817 + if (ret) 1818 + return ret; 1819 + 1820 + ns->sws = le32_to_cpu(s.sws); 1821 + ns->sgs = le16_to_cpu(s.sgs); 1822 + 1823 + if (ns->sws) { 1824 + *phys_bs = ns->sws * (1 << ns->lba_shift); 1825 + if (ns->sgs) 1826 + *io_opt = *phys_bs * ns->sgs; 1827 + } 1828 + 1829 + return 0; 1830 + } 1831 + 1814 1832 static void nvme_update_disk_info(struct gendisk *disk, 1815 1833 struct nvme_ns *ns, struct nvme_id_ns *id) 1816 1834 { 1817 1835 sector_t capacity = nvme_lba_to_sect(ns, le64_to_cpu(id->nsze)); 1818 1836 unsigned short bs = 1 << ns->lba_shift; 1819 - u32 atomic_bs, phys_bs, io_opt; 1837 + u32 atomic_bs, phys_bs, io_opt = 0; 1820 1838 1821 1839 if (ns->lba_shift > PAGE_SHIFT) { 1822 1840 /* unsupported block size, set capacity to 0 later */ ··· 1850 1818 blk_mq_freeze_queue(disk->queue); 1851 1819 blk_integrity_unregister(disk); 1852 1820 1821 + atomic_bs = phys_bs = bs; 1822 + nvme_setup_streams_ns(ns->ctrl, ns, &phys_bs, &io_opt); 1853 1823 if (id->nabo == 0) { 1854 1824 /* 1855 1825 * Bit 1 indicates whether NAWUPF is defined for this namespace 1856 1826 * and whether it should be used instead of AWUPF. If NAWUPF == 1857 1827 * 0 then AWUPF must be used instead. 1858 1828 */ 1859 - if (id->nsfeat & (1 << 1) && id->nawupf) 1829 + if (id->nsfeat & NVME_NS_FEAT_ATOMICS && id->nawupf) 1860 1830 atomic_bs = (1 + le16_to_cpu(id->nawupf)) * bs; 1861 1831 else 1862 1832 atomic_bs = (1 + ns->ctrl->subsys->awupf) * bs; 1863 - } else { 1864 - atomic_bs = bs; 1865 1833 } 1866 - phys_bs = bs; 1867 - io_opt = bs; 1868 - if (id->nsfeat & (1 << 4)) { 1834 + 1835 + if (id->nsfeat & NVME_NS_FEAT_IO_OPT) { 1869 1836 /* NPWG = Namespace Preferred Write Granularity */ 1870 - phys_bs *= 1 + le16_to_cpu(id->npwg); 1837 + phys_bs = bs * (1 + le16_to_cpu(id->npwg)); 1871 1838 /* NOWS = Namespace Optimal Write Size */ 1872 - io_opt *= 1 + le16_to_cpu(id->nows); 1839 + io_opt = bs * (1 + le16_to_cpu(id->nows)); 1873 1840 } 1874 1841 1875 1842 blk_queue_logical_block_size(disk->queue, bs); ··· 1881 1850 blk_queue_io_min(disk->queue, phys_bs); 1882 1851 blk_queue_io_opt(disk->queue, io_opt); 1883 1852 1884 - if (ns->ms && !ns->ext && 1885 - (ns->ctrl->ops->flags & NVME_F_METADATA_SUPPORTED)) 1886 - nvme_init_integrity(disk, ns->ms, ns->pi_type); 1887 - if ((ns->ms && !nvme_ns_has_pi(ns) && !blk_get_integrity(disk)) || 1888 - ns->lba_shift > PAGE_SHIFT) 1853 + /* 1854 + * The block layer can't support LBA sizes larger than the page size 1855 + * yet, so catch this early and don't allow block I/O. 1856 + */ 1857 + if (ns->lba_shift > PAGE_SHIFT) 1889 1858 capacity = 0; 1859 + 1860 + /* 1861 + * Register a metadata profile for PI, or the plain non-integrity NVMe 1862 + * metadata masquerading as Type 0 if supported, otherwise reject block 1863 + * I/O to namespaces with metadata except when the namespace supports 1864 + * PI, as it can strip/insert in that case. 1865 + */ 1866 + if (ns->ms) { 1867 + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && 1868 + (ns->features & NVME_NS_METADATA_SUPPORTED)) 1869 + nvme_init_integrity(disk, ns->ms, ns->pi_type, 1870 + ns->ctrl->max_integrity_segments); 1871 + else if (!nvme_ns_has_pi(ns)) 1872 + capacity = 0; 1873 + } 1890 1874 1891 1875 set_capacity_revalidate_and_notify(disk, capacity, false); 1892 1876 1893 1877 nvme_config_discard(disk, ns); 1894 1878 nvme_config_write_zeroes(disk, ns); 1895 1879 1896 - if (id->nsattr & (1 << 0)) 1880 + if (id->nsattr & NVME_NS_ATTR_RO) 1897 1881 set_disk_ro(disk, true); 1898 1882 else 1899 1883 set_disk_ro(disk, false); ··· 1916 1870 blk_mq_unfreeze_queue(disk->queue); 1917 1871 } 1918 1872 1919 - static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) 1873 + static int __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) 1920 1874 { 1921 1875 struct nvme_ns *ns = disk->private_data; 1876 + struct nvme_ctrl *ctrl = ns->ctrl; 1877 + u32 iob; 1922 1878 1923 1879 /* 1924 1880 * If identify namespace failed, use default 512 byte block size so ··· 1929 1881 ns->lba_shift = id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ds; 1930 1882 if (ns->lba_shift == 0) 1931 1883 ns->lba_shift = 9; 1932 - ns->noiob = le16_to_cpu(id->noiob); 1884 + 1885 + if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && 1886 + is_power_of_2(ctrl->max_hw_sectors)) 1887 + iob = ctrl->max_hw_sectors; 1888 + else 1889 + iob = nvme_lba_to_sect(ns, le16_to_cpu(id->noiob)); 1890 + 1891 + ns->features = 0; 1933 1892 ns->ms = le16_to_cpu(id->lbaf[id->flbas & NVME_NS_FLBAS_LBA_MASK].ms); 1934 - ns->ext = ns->ms && (id->flbas & NVME_NS_FLBAS_META_EXT); 1935 1893 /* the PI implementation requires metadata equal t10 pi tuple size */ 1936 1894 if (ns->ms == sizeof(struct t10_pi_tuple)) 1937 1895 ns->pi_type = id->dps & NVME_NS_DPS_PI_MASK; 1938 1896 else 1939 1897 ns->pi_type = 0; 1940 1898 1941 - if (ns->noiob) 1942 - nvme_set_chunk_size(ns); 1899 + if (ns->ms) { 1900 + /* 1901 + * For PCIe only the separate metadata pointer is supported, 1902 + * as the block layer supplies metadata in a separate bio_vec 1903 + * chain. For Fabrics, only metadata as part of extended data 1904 + * LBA is supported on the wire per the Fabrics specification, 1905 + * but the HBA/HCA will do the remapping from the separate 1906 + * metadata buffers for us. 1907 + */ 1908 + if (id->flbas & NVME_NS_FLBAS_META_EXT) { 1909 + ns->features |= NVME_NS_EXT_LBAS; 1910 + if ((ctrl->ops->flags & NVME_F_FABRICS) && 1911 + (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) && 1912 + ctrl->max_integrity_segments) 1913 + ns->features |= NVME_NS_METADATA_SUPPORTED; 1914 + } else { 1915 + if (WARN_ON_ONCE(ctrl->ops->flags & NVME_F_FABRICS)) 1916 + return -EINVAL; 1917 + if (ctrl->ops->flags & NVME_F_METADATA_SUPPORTED) 1918 + ns->features |= NVME_NS_METADATA_SUPPORTED; 1919 + } 1920 + } 1921 + 1922 + if (iob) 1923 + blk_queue_chunk_sectors(ns->queue, rounddown_pow_of_two(iob)); 1943 1924 nvme_update_disk_info(disk, ns, id); 1944 1925 #ifdef CONFIG_NVME_MULTIPATH 1945 1926 if (ns->head->disk) { 1946 1927 nvme_update_disk_info(ns->head->disk, ns, id); 1947 1928 blk_queue_stack_limits(ns->head->disk->queue, ns->queue); 1948 - if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { 1949 - struct backing_dev_info *info = 1950 - ns->head->disk->queue->backing_dev_info; 1951 - 1952 - info->capabilities |= BDI_CAP_STABLE_WRITES; 1953 - } 1954 - 1955 1929 revalidate_disk(ns->head->disk); 1956 1930 } 1957 1931 #endif 1932 + return 0; 1958 1933 } 1959 1934 1960 1935 static int nvme_revalidate_disk(struct gendisk *disk) ··· 2002 1931 goto free_id; 2003 1932 } 2004 1933 2005 - __nvme_revalidate_disk(disk, id); 2006 1934 ret = nvme_report_ns_ids(ctrl, ns->head->ns_id, id, &ids); 2007 1935 if (ret) 2008 1936 goto free_id; ··· 2010 1940 dev_err(ctrl->device, 2011 1941 "identifiers changed for nsid %d\n", ns->head->ns_id); 2012 1942 ret = -ENODEV; 1943 + goto free_id; 2013 1944 } 2014 1945 1946 + ret = __nvme_revalidate_disk(disk, id); 2015 1947 free_id: 2016 1948 kfree(id); 2017 1949 out: ··· 2321 2249 blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); 2322 2250 blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); 2323 2251 } 2324 - if ((ctrl->quirks & NVME_QUIRK_STRIPE_SIZE) && 2325 - is_power_of_2(ctrl->max_hw_sectors)) 2326 - blk_queue_chunk_sectors(q, ctrl->max_hw_sectors); 2327 2252 blk_queue_virt_boundary(q, ctrl->page_size - 1); 2253 + blk_queue_dma_alignment(q, 7); 2328 2254 if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) 2329 2255 vwc = true; 2330 2256 blk_queue_write_cache(q, vwc, vwc); ··· 2725 2655 return false; 2726 2656 } 2727 2657 2728 - if ((id->cmic & (1 << 1)) || 2658 + if ((id->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || 2729 2659 (ctrl->opts && ctrl->opts->discovery_nqn)) 2730 2660 continue; 2731 2661 ··· 2816 2746 void *log, size_t size, u64 offset) 2817 2747 { 2818 2748 struct nvme_command c = { }; 2819 - unsigned long dwlen = size / 4 - 1; 2749 + u32 dwlen = nvme_bytes_to_numd(size); 2820 2750 2821 2751 c.get_log_page.opcode = nvme_admin_get_log_page; 2822 2752 c.get_log_page.nsid = cpu_to_le32(nsid); ··· 3471 3401 3472 3402 list_for_each_entry(h, &subsys->nsheads, entry) { 3473 3403 if (nvme_ns_ids_valid(&new->ids) && 3474 - !list_empty(&h->list) && 3475 3404 nvme_ns_ids_equal(&new->ids, &h->ids)) 3476 3405 return -EINVAL; 3477 3406 } ··· 3479 3410 } 3480 3411 3481 3412 static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, 3482 - unsigned nsid, struct nvme_id_ns *id, 3483 - struct nvme_ns_ids *ids) 3413 + unsigned nsid, struct nvme_ns_ids *ids) 3484 3414 { 3485 3415 struct nvme_ns_head *head; 3486 3416 size_t size = sizeof(*head); ··· 3537 3469 struct nvme_id_ns *id) 3538 3470 { 3539 3471 struct nvme_ctrl *ctrl = ns->ctrl; 3540 - bool is_shared = id->nmic & (1 << 0); 3472 + bool is_shared = id->nmic & NVME_NS_NMIC_SHARED; 3541 3473 struct nvme_ns_head *head = NULL; 3542 3474 struct nvme_ns_ids ids; 3543 3475 int ret = 0; 3544 3476 3545 3477 ret = nvme_report_ns_ids(ctrl, nsid, id, &ids); 3546 - if (ret) 3547 - goto out; 3478 + if (ret) { 3479 + if (ret < 0) 3480 + return ret; 3481 + return blk_status_to_errno(nvme_error_status(ret)); 3482 + } 3548 3483 3549 3484 mutex_lock(&ctrl->subsys->lock); 3550 - if (is_shared) 3551 - head = nvme_find_ns_head(ctrl->subsys, nsid); 3485 + head = nvme_find_ns_head(ctrl->subsys, nsid); 3552 3486 if (!head) { 3553 - head = nvme_alloc_ns_head(ctrl, nsid, id, &ids); 3487 + head = nvme_alloc_ns_head(ctrl, nsid, &ids); 3554 3488 if (IS_ERR(head)) { 3555 3489 ret = PTR_ERR(head); 3556 3490 goto out_unlock; 3557 3491 } 3492 + head->shared = is_shared; 3558 3493 } else { 3494 + ret = -EINVAL; 3495 + if (!is_shared || !head->shared) { 3496 + dev_err(ctrl->device, 3497 + "Duplicate unshared namespace %d\n", nsid); 3498 + goto out_put_ns_head; 3499 + } 3559 3500 if (!nvme_ns_ids_equal(&head->ids, &ids)) { 3560 3501 dev_err(ctrl->device, 3561 3502 "IDs don't match for shared namespace %d\n", 3562 3503 nsid); 3563 - ret = -EINVAL; 3564 - goto out_unlock; 3504 + goto out_put_ns_head; 3565 3505 } 3566 3506 } 3567 3507 3568 3508 list_add_tail(&ns->siblings, &head->list); 3569 3509 ns->head = head; 3510 + mutex_unlock(&ctrl->subsys->lock); 3511 + return 0; 3570 3512 3513 + out_put_ns_head: 3514 + nvme_put_ns_head(head); 3571 3515 out_unlock: 3572 3516 mutex_unlock(&ctrl->subsys->lock); 3573 - out: 3574 - if (ret > 0) 3575 - ret = blk_status_to_errno(nvme_error_status(ret)); 3576 3517 return ret; 3577 3518 } 3578 3519 ··· 3610 3533 } 3611 3534 up_read(&ctrl->namespaces_rwsem); 3612 3535 return ret; 3613 - } 3614 - 3615 - static int nvme_setup_streams_ns(struct nvme_ctrl *ctrl, struct nvme_ns *ns) 3616 - { 3617 - struct streams_directive_params s; 3618 - int ret; 3619 - 3620 - if (!ctrl->nr_streams) 3621 - return 0; 3622 - 3623 - ret = nvme_get_stream_params(ctrl, &s, ns->head->ns_id); 3624 - if (ret) 3625 - return ret; 3626 - 3627 - ns->sws = le32_to_cpu(s.sws); 3628 - ns->sgs = le16_to_cpu(s.sgs); 3629 - 3630 - if (ns->sws) { 3631 - unsigned int bs = 1 << ns->lba_shift; 3632 - 3633 - blk_queue_io_min(ns->queue, bs * ns->sws); 3634 - if (ns->sgs) 3635 - blk_queue_io_opt(ns->queue, bs * ns->sws * ns->sgs); 3636 - } 3637 - 3638 - return 0; 3639 3536 } 3640 3537 3641 3538 static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) ··· 3655 3604 ret = nvme_init_ns_head(ns, nsid, id); 3656 3605 if (ret) 3657 3606 goto out_free_id; 3658 - nvme_setup_streams_ns(ctrl, ns); 3659 3607 nvme_set_disk_name(disk_name, ns, ctrl, &flags); 3660 3608 3661 3609 disk = alloc_disk_node(0, node); ··· 3668 3618 memcpy(disk->disk_name, disk_name, DISK_NAME_LEN); 3669 3619 ns->disk = disk; 3670 3620 3671 - __nvme_revalidate_disk(disk, id); 3621 + if (__nvme_revalidate_disk(disk, id)) 3622 + goto out_free_disk; 3672 3623 3673 3624 if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) { 3674 3625 ret = nvme_nvm_register(ns, disk_name, node); ··· 3696 3645 /* prevent double queue cleanup */ 3697 3646 ns->disk->queue = NULL; 3698 3647 put_disk(ns->disk); 3648 + out_free_disk: 3649 + del_gendisk(ns->disk); 3699 3650 out_unlink_ns: 3700 3651 mutex_lock(&ctrl->subsys->lock); 3701 3652 list_del_rcu(&ns->siblings); 3653 + if (list_empty(&ns->head->list)) 3654 + list_del_init(&ns->head->entry); 3702 3655 mutex_unlock(&ctrl->subsys->lock); 3703 3656 nvme_put_ns_head(ns->head); 3704 3657 out_free_id: ··· 3722 3667 3723 3668 mutex_lock(&ns->ctrl->subsys->lock); 3724 3669 list_del_rcu(&ns->siblings); 3670 + if (list_empty(&ns->head->list)) 3671 + list_del_init(&ns->head->entry); 3725 3672 mutex_unlock(&ns->ctrl->subsys->lock); 3673 + 3726 3674 synchronize_rcu(); /* guarantee not available in head->list */ 3727 3675 nvme_mpath_clear_current_path(ns); 3728 3676 synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */ ··· 3743 3685 3744 3686 nvme_mpath_check_last_path(ns); 3745 3687 nvme_put_ns(ns); 3688 + } 3689 + 3690 + static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid) 3691 + { 3692 + struct nvme_ns *ns = nvme_find_get_ns(ctrl, nsid); 3693 + 3694 + if (ns) { 3695 + nvme_ns_remove(ns); 3696 + nvme_put_ns(ns); 3697 + } 3746 3698 } 3747 3699 3748 3700 static void nvme_validate_ns(struct nvme_ctrl *ctrl, unsigned nsid) ··· 3786 3718 3787 3719 } 3788 3720 3789 - static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn) 3721 + static int nvme_scan_ns_list(struct nvme_ctrl *ctrl) 3790 3722 { 3791 - struct nvme_ns *ns; 3723 + const int nr_entries = NVME_IDENTIFY_DATA_SIZE / sizeof(__le32); 3792 3724 __le32 *ns_list; 3793 - unsigned i, j, nsid, prev = 0; 3794 - unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024); 3795 - int ret = 0; 3725 + u32 prev = 0; 3726 + int ret = 0, i; 3727 + 3728 + if (nvme_ctrl_limited_cns(ctrl)) 3729 + return -EOPNOTSUPP; 3796 3730 3797 3731 ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); 3798 3732 if (!ns_list) 3799 3733 return -ENOMEM; 3800 3734 3801 - for (i = 0; i < num_lists; i++) { 3735 + for (;;) { 3802 3736 ret = nvme_identify_ns_list(ctrl, prev, ns_list); 3803 3737 if (ret) 3804 3738 goto free; 3805 3739 3806 - for (j = 0; j < min(nn, 1024U); j++) { 3807 - nsid = le32_to_cpu(ns_list[j]); 3808 - if (!nsid) 3740 + for (i = 0; i < nr_entries; i++) { 3741 + u32 nsid = le32_to_cpu(ns_list[i]); 3742 + 3743 + if (!nsid) /* end of the list? */ 3809 3744 goto out; 3810 - 3811 3745 nvme_validate_ns(ctrl, nsid); 3812 - 3813 - while (++prev < nsid) { 3814 - ns = nvme_find_get_ns(ctrl, prev); 3815 - if (ns) { 3816 - nvme_ns_remove(ns); 3817 - nvme_put_ns(ns); 3818 - } 3819 - } 3746 + while (++prev < nsid) 3747 + nvme_ns_remove_by_nsid(ctrl, prev); 3820 3748 } 3821 - nn -= j; 3822 3749 } 3823 3750 out: 3824 3751 nvme_remove_invalid_namespaces(ctrl, prev); ··· 3822 3759 return ret; 3823 3760 } 3824 3761 3825 - static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) 3762 + static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl) 3826 3763 { 3827 - unsigned i; 3764 + struct nvme_id_ctrl *id; 3765 + u32 nn, i; 3766 + 3767 + if (nvme_identify_ctrl(ctrl, &id)) 3768 + return; 3769 + nn = le32_to_cpu(id->nn); 3770 + kfree(id); 3828 3771 3829 3772 for (i = 1; i <= nn; i++) 3830 3773 nvme_validate_ns(ctrl, i); ··· 3867 3798 { 3868 3799 struct nvme_ctrl *ctrl = 3869 3800 container_of(work, struct nvme_ctrl, scan_work); 3870 - struct nvme_id_ctrl *id; 3871 - unsigned nn; 3872 3801 3873 3802 /* No tagset on a live ctrl means IO queues could not created */ 3874 3803 if (ctrl->state != NVME_CTRL_LIVE || !ctrl->tagset) ··· 3877 3810 nvme_clear_changed_ns_log(ctrl); 3878 3811 } 3879 3812 3880 - if (nvme_identify_ctrl(ctrl, &id)) 3881 - return; 3882 - 3883 3813 mutex_lock(&ctrl->scan_lock); 3884 - nn = le32_to_cpu(id->nn); 3885 - if (ctrl->vs >= NVME_VS(1, 1, 0) && 3886 - !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) { 3887 - if (!nvme_scan_ns_list(ctrl, nn)) 3888 - goto out_free_id; 3889 - } 3890 - nvme_scan_ns_sequential(ctrl, nn); 3891 - out_free_id: 3814 + if (nvme_scan_ns_list(ctrl) != 0) 3815 + nvme_scan_ns_sequential(ctrl); 3892 3816 mutex_unlock(&ctrl->scan_lock); 3893 - kfree(id); 3817 + 3894 3818 down_write(&ctrl->namespaces_rwsem); 3895 3819 list_sort(NULL, &ctrl->namespaces, ns_cmp); 3896 3820 up_write(&ctrl->namespaces_rwsem);

+463 -118

drivers/nvme/host/fc.c

··· 14 14 #include "fabrics.h" 15 15 #include <linux/nvme-fc-driver.h> 16 16 #include <linux/nvme-fc.h> 17 + #include "fc.h" 17 18 #include <scsi/scsi_transport_fc.h> 18 19 19 20 /* *************************** Data Structures/Defines ****************** */ ··· 62 61 bool req_queued; 63 62 }; 64 63 64 + struct nvmefc_ls_rcv_op { 65 + struct nvme_fc_rport *rport; 66 + struct nvmefc_ls_rsp *lsrsp; 67 + union nvmefc_ls_requests *rqstbuf; 68 + union nvmefc_ls_responses *rspbuf; 69 + u16 rqstdatalen; 70 + bool handled; 71 + dma_addr_t rspdma; 72 + struct list_head lsrcv_list; /* rport->ls_rcv_list */ 73 + } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 74 + 65 75 enum nvme_fcpop_state { 66 76 FCPOP_STATE_UNINIT = 0, 67 77 FCPOP_STATE_IDLE = 1, ··· 108 96 struct nvme_fcp_op_w_sgl { 109 97 struct nvme_fc_fcp_op op; 110 98 struct scatterlist sgl[NVME_INLINE_SG_CNT]; 111 - uint8_t priv[0]; 99 + uint8_t priv[]; 112 100 }; 113 101 114 102 struct nvme_fc_lport { ··· 129 117 struct list_head endp_list; /* for lport->endp_list */ 130 118 struct list_head ctrl_list; 131 119 struct list_head ls_req_list; 120 + struct list_head ls_rcv_list; 132 121 struct list_head disc_list; 133 122 struct device *dev; /* physical device for dma */ 134 123 struct nvme_fc_lport *lport; ··· 137 124 struct kref ref; 138 125 atomic_t act_ctrl_cnt; 139 126 unsigned long dev_loss_end; 127 + struct work_struct lsrcv_work; 140 128 } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 141 129 142 - enum nvme_fcctrl_flags { 143 - FCCTRL_TERMIO = (1 << 0), 144 - }; 130 + /* fc_ctrl flags values - specified as bit positions */ 131 + #define ASSOC_ACTIVE 0 132 + #define FCCTRL_TERMIO 1 145 133 146 134 struct nvme_fc_ctrl { 147 135 spinlock_t lock; ··· 153 139 u32 cnum; 154 140 155 141 bool ioq_live; 156 - bool assoc_active; 157 142 atomic_t err_work_active; 158 143 u64 association_id; 144 + struct nvmefc_ls_rcv_op *rcv_disconn; 159 145 160 146 struct list_head ctrl_list; /* rport->ctrl_list */ 161 147 ··· 166 152 struct work_struct err_work; 167 153 168 154 struct kref ref; 169 - u32 flags; 155 + unsigned long flags; 170 156 u32 iocnt; 171 157 wait_queue_head_t ioabort_wait; 172 158 ··· 232 218 233 219 static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, 234 220 struct nvme_fc_queue *, unsigned int); 221 + 222 + static void nvme_fc_handle_ls_rqst_work(struct work_struct *work); 223 + 235 224 236 225 static void 237 226 nvme_fc_free_lport(struct kref *ref) ··· 411 394 newrec->ops = template; 412 395 newrec->dev = dev; 413 396 ida_init(&newrec->endp_cnt); 414 - newrec->localport.private = &newrec[1]; 397 + if (template->local_priv_sz) 398 + newrec->localport.private = &newrec[1]; 399 + else 400 + newrec->localport.private = NULL; 415 401 newrec->localport.node_name = pinfo->node_name; 416 402 newrec->localport.port_name = pinfo->port_name; 417 403 newrec->localport.port_role = pinfo->port_role; ··· 721 701 atomic_set(&newrec->act_ctrl_cnt, 0); 722 702 spin_lock_init(&newrec->lock); 723 703 newrec->remoteport.localport = &lport->localport; 704 + INIT_LIST_HEAD(&newrec->ls_rcv_list); 724 705 newrec->dev = lport->dev; 725 706 newrec->lport = lport; 726 - newrec->remoteport.private = &newrec[1]; 707 + if (lport->ops->remote_priv_sz) 708 + newrec->remoteport.private = &newrec[1]; 709 + else 710 + newrec->remoteport.private = NULL; 727 711 newrec->remoteport.port_role = pinfo->port_role; 728 712 newrec->remoteport.node_name = pinfo->node_name; 729 713 newrec->remoteport.port_name = pinfo->port_name; ··· 735 711 newrec->remoteport.port_state = FC_OBJSTATE_ONLINE; 736 712 newrec->remoteport.port_num = idx; 737 713 __nvme_fc_set_dev_loss_tmo(newrec, pinfo); 714 + INIT_WORK(&newrec->lsrcv_work, nvme_fc_handle_ls_rqst_work); 738 715 739 716 spin_lock_irqsave(&nvme_fc_lock, flags); 740 717 list_add_tail(&newrec->endp_list, &lport->endp_list); ··· 1025 1000 static void nvme_fc_ctrl_put(struct nvme_fc_ctrl *); 1026 1001 static int nvme_fc_ctrl_get(struct nvme_fc_ctrl *); 1027 1002 1003 + static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1028 1004 1029 1005 static void 1030 1006 __nvme_fc_finish_ls_req(struct nvmefc_ls_req_op *lsop) ··· 1166 1140 return __nvme_fc_send_ls_req(rport, lsop, done); 1167 1141 } 1168 1142 1169 - /* Validation Error indexes into the string table below */ 1170 - enum { 1171 - VERR_NO_ERROR = 0, 1172 - VERR_LSACC = 1, 1173 - VERR_LSDESC_RQST = 2, 1174 - VERR_LSDESC_RQST_LEN = 3, 1175 - VERR_ASSOC_ID = 4, 1176 - VERR_ASSOC_ID_LEN = 5, 1177 - VERR_CONN_ID = 6, 1178 - VERR_CONN_ID_LEN = 7, 1179 - VERR_CR_ASSOC = 8, 1180 - VERR_CR_ASSOC_ACC_LEN = 9, 1181 - VERR_CR_CONN = 10, 1182 - VERR_CR_CONN_ACC_LEN = 11, 1183 - VERR_DISCONN = 12, 1184 - VERR_DISCONN_ACC_LEN = 13, 1185 - }; 1186 - 1187 - static char *validation_errors[] = { 1188 - "OK", 1189 - "Not LS_ACC", 1190 - "Not LSDESC_RQST", 1191 - "Bad LSDESC_RQST Length", 1192 - "Not Association ID", 1193 - "Bad Association ID Length", 1194 - "Not Connection ID", 1195 - "Bad Connection ID Length", 1196 - "Not CR_ASSOC Rqst", 1197 - "Bad CR_ASSOC ACC Length", 1198 - "Not CR_CONN Rqst", 1199 - "Bad CR_CONN ACC Length", 1200 - "Not Disconnect Rqst", 1201 - "Bad Disconnect ACC Length", 1202 - }; 1203 - 1204 1143 static int 1205 1144 nvme_fc_connect_admin_queue(struct nvme_fc_ctrl *ctrl, 1206 1145 struct nvme_fc_queue *queue, u16 qsize, u16 ersp_ratio) ··· 1174 1183 struct nvmefc_ls_req *lsreq; 1175 1184 struct fcnvme_ls_cr_assoc_rqst *assoc_rqst; 1176 1185 struct fcnvme_ls_cr_assoc_acc *assoc_acc; 1186 + unsigned long flags; 1177 1187 int ret, fcret = 0; 1178 1188 1179 1189 lsop = kzalloc((sizeof(*lsop) + 1180 - ctrl->lport->ops->lsrqst_priv_sz + 1181 - sizeof(*assoc_rqst) + sizeof(*assoc_acc)), GFP_KERNEL); 1190 + sizeof(*assoc_rqst) + sizeof(*assoc_acc) + 1191 + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); 1182 1192 if (!lsop) { 1193 + dev_info(ctrl->ctrl.device, 1194 + "NVME-FC{%d}: send Create Association failed: ENOMEM\n", 1195 + ctrl->cnum); 1183 1196 ret = -ENOMEM; 1184 1197 goto out_no_memory; 1185 1198 } 1186 - lsreq = &lsop->ls_req; 1187 1199 1188 - lsreq->private = (void *)&lsop[1]; 1189 - assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *) 1190 - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1200 + assoc_rqst = (struct fcnvme_ls_cr_assoc_rqst *)&lsop[1]; 1191 1201 assoc_acc = (struct fcnvme_ls_cr_assoc_acc *)&assoc_rqst[1]; 1202 + lsreq = &lsop->ls_req; 1203 + if (ctrl->lport->ops->lsrqst_priv_sz) 1204 + lsreq->private = &assoc_acc[1]; 1205 + else 1206 + lsreq->private = NULL; 1192 1207 1193 1208 assoc_rqst->w0.ls_cmd = FCNVME_LS_CREATE_ASSOCIATION; 1194 1209 assoc_rqst->desc_list_len = ··· 1264 1267 "q %d Create Association LS failed: %s\n", 1265 1268 queue->qnum, validation_errors[fcret]); 1266 1269 } else { 1270 + spin_lock_irqsave(&ctrl->lock, flags); 1267 1271 ctrl->association_id = 1268 1272 be64_to_cpu(assoc_acc->associd.association_id); 1269 1273 queue->connection_id = 1270 1274 be64_to_cpu(assoc_acc->connectid.connection_id); 1271 1275 set_bit(NVME_FC_Q_CONNECTED, &queue->flags); 1276 + spin_unlock_irqrestore(&ctrl->lock, flags); 1272 1277 } 1273 1278 1274 1279 out_free_buffer: ··· 1294 1295 int ret, fcret = 0; 1295 1296 1296 1297 lsop = kzalloc((sizeof(*lsop) + 1297 - ctrl->lport->ops->lsrqst_priv_sz + 1298 - sizeof(*conn_rqst) + sizeof(*conn_acc)), GFP_KERNEL); 1298 + sizeof(*conn_rqst) + sizeof(*conn_acc) + 1299 + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); 1299 1300 if (!lsop) { 1301 + dev_info(ctrl->ctrl.device, 1302 + "NVME-FC{%d}: send Create Connection failed: ENOMEM\n", 1303 + ctrl->cnum); 1300 1304 ret = -ENOMEM; 1301 1305 goto out_no_memory; 1302 1306 } 1303 - lsreq = &lsop->ls_req; 1304 1307 1305 - lsreq->private = (void *)&lsop[1]; 1306 - conn_rqst = (struct fcnvme_ls_cr_conn_rqst *) 1307 - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1308 + conn_rqst = (struct fcnvme_ls_cr_conn_rqst *)&lsop[1]; 1308 1309 conn_acc = (struct fcnvme_ls_cr_conn_acc *)&conn_rqst[1]; 1310 + lsreq = &lsop->ls_req; 1311 + if (ctrl->lport->ops->lsrqst_priv_sz) 1312 + lsreq->private = (void *)&conn_acc[1]; 1313 + else 1314 + lsreq->private = NULL; 1309 1315 1310 1316 conn_rqst->w0.ls_cmd = FCNVME_LS_CREATE_CONNECTION; 1311 1317 conn_rqst->desc_list_len = cpu_to_be32( ··· 1424 1420 int ret; 1425 1421 1426 1422 lsop = kzalloc((sizeof(*lsop) + 1427 - ctrl->lport->ops->lsrqst_priv_sz + 1428 - sizeof(*discon_rqst) + sizeof(*discon_acc)), 1429 - GFP_KERNEL); 1430 - if (!lsop) 1431 - /* couldn't sent it... too bad */ 1423 + sizeof(*discon_rqst) + sizeof(*discon_acc) + 1424 + ctrl->lport->ops->lsrqst_priv_sz), GFP_KERNEL); 1425 + if (!lsop) { 1426 + dev_info(ctrl->ctrl.device, 1427 + "NVME-FC{%d}: send Disconnect Association " 1428 + "failed: ENOMEM\n", 1429 + ctrl->cnum); 1432 1430 return; 1431 + } 1433 1432 1434 - lsreq = &lsop->ls_req; 1435 - 1436 - lsreq->private = (void *)&lsop[1]; 1437 - discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *) 1438 - (lsreq->private + ctrl->lport->ops->lsrqst_priv_sz); 1433 + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; 1439 1434 discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; 1435 + lsreq = &lsop->ls_req; 1436 + if (ctrl->lport->ops->lsrqst_priv_sz) 1437 + lsreq->private = (void *)&discon_acc[1]; 1438 + else 1439 + lsreq->private = NULL; 1440 1440 1441 - discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; 1442 - discon_rqst->desc_list_len = cpu_to_be32( 1443 - sizeof(struct fcnvme_lsdesc_assoc_id) + 1444 - sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1445 - 1446 - discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 1447 - discon_rqst->associd.desc_len = 1448 - fcnvme_lsdesc_len( 1449 - sizeof(struct fcnvme_lsdesc_assoc_id)); 1450 - 1451 - discon_rqst->associd.association_id = cpu_to_be64(ctrl->association_id); 1452 - 1453 - discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 1454 - FCNVME_LSDESC_DISCONN_CMD); 1455 - discon_rqst->discon_cmd.desc_len = 1456 - fcnvme_lsdesc_len( 1457 - sizeof(struct fcnvme_lsdesc_disconn_cmd)); 1458 - 1459 - lsreq->rqstaddr = discon_rqst; 1460 - lsreq->rqstlen = sizeof(*discon_rqst); 1461 - lsreq->rspaddr = discon_acc; 1462 - lsreq->rsplen = sizeof(*discon_acc); 1463 - lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; 1441 + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, 1442 + ctrl->association_id); 1464 1443 1465 1444 ret = nvme_fc_send_ls_req_async(ctrl->rport, lsop, 1466 1445 nvme_fc_disconnect_assoc_done); ··· 1451 1464 kfree(lsop); 1452 1465 } 1453 1466 1467 + static void 1468 + nvme_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) 1469 + { 1470 + struct nvmefc_ls_rcv_op *lsop = lsrsp->nvme_fc_private; 1471 + struct nvme_fc_rport *rport = lsop->rport; 1472 + struct nvme_fc_lport *lport = rport->lport; 1473 + unsigned long flags; 1474 + 1475 + spin_lock_irqsave(&rport->lock, flags); 1476 + list_del(&lsop->lsrcv_list); 1477 + spin_unlock_irqrestore(&rport->lock, flags); 1478 + 1479 + fc_dma_sync_single_for_cpu(lport->dev, lsop->rspdma, 1480 + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); 1481 + fc_dma_unmap_single(lport->dev, lsop->rspdma, 1482 + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); 1483 + 1484 + kfree(lsop); 1485 + 1486 + nvme_fc_rport_put(rport); 1487 + } 1488 + 1489 + static void 1490 + nvme_fc_xmt_ls_rsp(struct nvmefc_ls_rcv_op *lsop) 1491 + { 1492 + struct nvme_fc_rport *rport = lsop->rport; 1493 + struct nvme_fc_lport *lport = rport->lport; 1494 + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; 1495 + int ret; 1496 + 1497 + fc_dma_sync_single_for_device(lport->dev, lsop->rspdma, 1498 + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); 1499 + 1500 + ret = lport->ops->xmt_ls_rsp(&lport->localport, &rport->remoteport, 1501 + lsop->lsrsp); 1502 + if (ret) { 1503 + dev_warn(lport->dev, 1504 + "LLDD rejected LS RSP xmt: LS %d status %d\n", 1505 + w0->ls_cmd, ret); 1506 + nvme_fc_xmt_ls_rsp_done(lsop->lsrsp); 1507 + return; 1508 + } 1509 + } 1510 + 1511 + static struct nvme_fc_ctrl * 1512 + nvme_fc_match_disconn_ls(struct nvme_fc_rport *rport, 1513 + struct nvmefc_ls_rcv_op *lsop) 1514 + { 1515 + struct fcnvme_ls_disconnect_assoc_rqst *rqst = 1516 + &lsop->rqstbuf->rq_dis_assoc; 1517 + struct nvme_fc_ctrl *ctrl, *ret = NULL; 1518 + struct nvmefc_ls_rcv_op *oldls = NULL; 1519 + u64 association_id = be64_to_cpu(rqst->associd.association_id); 1520 + unsigned long flags; 1521 + 1522 + spin_lock_irqsave(&rport->lock, flags); 1523 + 1524 + list_for_each_entry(ctrl, &rport->ctrl_list, ctrl_list) { 1525 + if (!nvme_fc_ctrl_get(ctrl)) 1526 + continue; 1527 + spin_lock(&ctrl->lock); 1528 + if (association_id == ctrl->association_id) { 1529 + oldls = ctrl->rcv_disconn; 1530 + ctrl->rcv_disconn = lsop; 1531 + ret = ctrl; 1532 + } 1533 + spin_unlock(&ctrl->lock); 1534 + if (ret) 1535 + /* leave the ctrl get reference */ 1536 + break; 1537 + nvme_fc_ctrl_put(ctrl); 1538 + } 1539 + 1540 + spin_unlock_irqrestore(&rport->lock, flags); 1541 + 1542 + /* transmit a response for anything that was pending */ 1543 + if (oldls) { 1544 + dev_info(rport->lport->dev, 1545 + "NVME-FC{%d}: Multiple Disconnect Association " 1546 + "LS's received\n", ctrl->cnum); 1547 + /* overwrite good response with bogus failure */ 1548 + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, 1549 + sizeof(*oldls->rspbuf), 1550 + rqst->w0.ls_cmd, 1551 + FCNVME_RJT_RC_UNAB, 1552 + FCNVME_RJT_EXP_NONE, 0); 1553 + nvme_fc_xmt_ls_rsp(oldls); 1554 + } 1555 + 1556 + return ret; 1557 + } 1558 + 1559 + /* 1560 + * returns true to mean LS handled and ls_rsp can be sent 1561 + * returns false to defer ls_rsp xmt (will be done as part of 1562 + * association termination) 1563 + */ 1564 + static bool 1565 + nvme_fc_ls_disconnect_assoc(struct nvmefc_ls_rcv_op *lsop) 1566 + { 1567 + struct nvme_fc_rport *rport = lsop->rport; 1568 + struct fcnvme_ls_disconnect_assoc_rqst *rqst = 1569 + &lsop->rqstbuf->rq_dis_assoc; 1570 + struct fcnvme_ls_disconnect_assoc_acc *acc = 1571 + &lsop->rspbuf->rsp_dis_assoc; 1572 + struct nvme_fc_ctrl *ctrl = NULL; 1573 + int ret = 0; 1574 + 1575 + memset(acc, 0, sizeof(*acc)); 1576 + 1577 + ret = nvmefc_vldt_lsreq_discon_assoc(lsop->rqstdatalen, rqst); 1578 + if (!ret) { 1579 + /* match an active association */ 1580 + ctrl = nvme_fc_match_disconn_ls(rport, lsop); 1581 + if (!ctrl) 1582 + ret = VERR_NO_ASSOC; 1583 + } 1584 + 1585 + if (ret) { 1586 + dev_info(rport->lport->dev, 1587 + "Disconnect LS failed: %s\n", 1588 + validation_errors[ret]); 1589 + lsop->lsrsp->rsplen = nvme_fc_format_rjt(acc, 1590 + sizeof(*acc), rqst->w0.ls_cmd, 1591 + (ret == VERR_NO_ASSOC) ? 1592 + FCNVME_RJT_RC_INV_ASSOC : 1593 + FCNVME_RJT_RC_LOGIC, 1594 + FCNVME_RJT_EXP_NONE, 0); 1595 + return true; 1596 + } 1597 + 1598 + /* format an ACCept response */ 1599 + 1600 + lsop->lsrsp->rsplen = sizeof(*acc); 1601 + 1602 + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1603 + fcnvme_lsdesc_len( 1604 + sizeof(struct fcnvme_ls_disconnect_assoc_acc)), 1605 + FCNVME_LS_DISCONNECT_ASSOC); 1606 + 1607 + /* 1608 + * the transmit of the response will occur after the exchanges 1609 + * for the association have been ABTS'd by 1610 + * nvme_fc_delete_association(). 1611 + */ 1612 + 1613 + /* fail the association */ 1614 + nvme_fc_error_recovery(ctrl, "Disconnect Association LS received"); 1615 + 1616 + /* release the reference taken by nvme_fc_match_disconn_ls() */ 1617 + nvme_fc_ctrl_put(ctrl); 1618 + 1619 + return false; 1620 + } 1621 + 1622 + /* 1623 + * Actual Processing routine for received FC-NVME LS Requests from the LLD 1624 + * returns true if a response should be sent afterward, false if rsp will 1625 + * be sent asynchronously. 1626 + */ 1627 + static bool 1628 + nvme_fc_handle_ls_rqst(struct nvmefc_ls_rcv_op *lsop) 1629 + { 1630 + struct fcnvme_ls_rqst_w0 *w0 = &lsop->rqstbuf->w0; 1631 + bool ret = true; 1632 + 1633 + lsop->lsrsp->nvme_fc_private = lsop; 1634 + lsop->lsrsp->rspbuf = lsop->rspbuf; 1635 + lsop->lsrsp->rspdma = lsop->rspdma; 1636 + lsop->lsrsp->done = nvme_fc_xmt_ls_rsp_done; 1637 + /* Be preventative. handlers will later set to valid length */ 1638 + lsop->lsrsp->rsplen = 0; 1639 + 1640 + /* 1641 + * handlers: 1642 + * parse request input, execute the request, and format the 1643 + * LS response 1644 + */ 1645 + switch (w0->ls_cmd) { 1646 + case FCNVME_LS_DISCONNECT_ASSOC: 1647 + ret = nvme_fc_ls_disconnect_assoc(lsop); 1648 + break; 1649 + case FCNVME_LS_DISCONNECT_CONN: 1650 + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, 1651 + sizeof(*lsop->rspbuf), w0->ls_cmd, 1652 + FCNVME_RJT_RC_UNSUP, FCNVME_RJT_EXP_NONE, 0); 1653 + break; 1654 + case FCNVME_LS_CREATE_ASSOCIATION: 1655 + case FCNVME_LS_CREATE_CONNECTION: 1656 + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, 1657 + sizeof(*lsop->rspbuf), w0->ls_cmd, 1658 + FCNVME_RJT_RC_LOGIC, FCNVME_RJT_EXP_NONE, 0); 1659 + break; 1660 + default: 1661 + lsop->lsrsp->rsplen = nvme_fc_format_rjt(lsop->rspbuf, 1662 + sizeof(*lsop->rspbuf), w0->ls_cmd, 1663 + FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); 1664 + break; 1665 + } 1666 + 1667 + return(ret); 1668 + } 1669 + 1670 + static void 1671 + nvme_fc_handle_ls_rqst_work(struct work_struct *work) 1672 + { 1673 + struct nvme_fc_rport *rport = 1674 + container_of(work, struct nvme_fc_rport, lsrcv_work); 1675 + struct fcnvme_ls_rqst_w0 *w0; 1676 + struct nvmefc_ls_rcv_op *lsop; 1677 + unsigned long flags; 1678 + bool sendrsp; 1679 + 1680 + restart: 1681 + sendrsp = true; 1682 + spin_lock_irqsave(&rport->lock, flags); 1683 + list_for_each_entry(lsop, &rport->ls_rcv_list, lsrcv_list) { 1684 + if (lsop->handled) 1685 + continue; 1686 + 1687 + lsop->handled = true; 1688 + if (rport->remoteport.port_state == FC_OBJSTATE_ONLINE) { 1689 + spin_unlock_irqrestore(&rport->lock, flags); 1690 + sendrsp = nvme_fc_handle_ls_rqst(lsop); 1691 + } else { 1692 + spin_unlock_irqrestore(&rport->lock, flags); 1693 + w0 = &lsop->rqstbuf->w0; 1694 + lsop->lsrsp->rsplen = nvme_fc_format_rjt( 1695 + lsop->rspbuf, 1696 + sizeof(*lsop->rspbuf), 1697 + w0->ls_cmd, 1698 + FCNVME_RJT_RC_UNAB, 1699 + FCNVME_RJT_EXP_NONE, 0); 1700 + } 1701 + if (sendrsp) 1702 + nvme_fc_xmt_ls_rsp(lsop); 1703 + goto restart; 1704 + } 1705 + spin_unlock_irqrestore(&rport->lock, flags); 1706 + } 1707 + 1708 + /** 1709 + * nvme_fc_rcv_ls_req - transport entry point called by an LLDD 1710 + * upon the reception of a NVME LS request. 1711 + * 1712 + * The nvme-fc layer will copy payload to an internal structure for 1713 + * processing. As such, upon completion of the routine, the LLDD may 1714 + * immediately free/reuse the LS request buffer passed in the call. 1715 + * 1716 + * If this routine returns error, the LLDD should abort the exchange. 1717 + * 1718 + * @remoteport: pointer to the (registered) remote port that the LS 1719 + * was received from. The remoteport is associated with 1720 + * a specific localport. 1721 + * @lsrsp: pointer to a nvmefc_ls_rsp response structure to be 1722 + * used to reference the exchange corresponding to the LS 1723 + * when issuing an ls response. 1724 + * @lsreqbuf: pointer to the buffer containing the LS Request 1725 + * @lsreqbuf_len: length, in bytes, of the received LS request 1726 + */ 1727 + int 1728 + nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *portptr, 1729 + struct nvmefc_ls_rsp *lsrsp, 1730 + void *lsreqbuf, u32 lsreqbuf_len) 1731 + { 1732 + struct nvme_fc_rport *rport = remoteport_to_rport(portptr); 1733 + struct nvme_fc_lport *lport = rport->lport; 1734 + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; 1735 + struct nvmefc_ls_rcv_op *lsop; 1736 + unsigned long flags; 1737 + int ret; 1738 + 1739 + nvme_fc_rport_get(rport); 1740 + 1741 + /* validate there's a routine to transmit a response */ 1742 + if (!lport->ops->xmt_ls_rsp) { 1743 + dev_info(lport->dev, 1744 + "RCV %s LS failed: no LLDD xmt_ls_rsp\n", 1745 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1746 + nvmefc_ls_names[w0->ls_cmd] : ""); 1747 + ret = -EINVAL; 1748 + goto out_put; 1749 + } 1750 + 1751 + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { 1752 + dev_info(lport->dev, 1753 + "RCV %s LS failed: payload too large\n", 1754 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1755 + nvmefc_ls_names[w0->ls_cmd] : ""); 1756 + ret = -E2BIG; 1757 + goto out_put; 1758 + } 1759 + 1760 + lsop = kzalloc(sizeof(*lsop) + 1761 + sizeof(union nvmefc_ls_requests) + 1762 + sizeof(union nvmefc_ls_responses), 1763 + GFP_KERNEL); 1764 + if (!lsop) { 1765 + dev_info(lport->dev, 1766 + "RCV %s LS failed: No memory\n", 1767 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1768 + nvmefc_ls_names[w0->ls_cmd] : ""); 1769 + ret = -ENOMEM; 1770 + goto out_put; 1771 + } 1772 + lsop->rqstbuf = (union nvmefc_ls_requests *)&lsop[1]; 1773 + lsop->rspbuf = (union nvmefc_ls_responses *)&lsop->rqstbuf[1]; 1774 + 1775 + lsop->rspdma = fc_dma_map_single(lport->dev, lsop->rspbuf, 1776 + sizeof(*lsop->rspbuf), 1777 + DMA_TO_DEVICE); 1778 + if (fc_dma_mapping_error(lport->dev, lsop->rspdma)) { 1779 + dev_info(lport->dev, 1780 + "RCV %s LS failed: DMA mapping failure\n", 1781 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1782 + nvmefc_ls_names[w0->ls_cmd] : ""); 1783 + ret = -EFAULT; 1784 + goto out_free; 1785 + } 1786 + 1787 + lsop->rport = rport; 1788 + lsop->lsrsp = lsrsp; 1789 + 1790 + memcpy(lsop->rqstbuf, lsreqbuf, lsreqbuf_len); 1791 + lsop->rqstdatalen = lsreqbuf_len; 1792 + 1793 + spin_lock_irqsave(&rport->lock, flags); 1794 + if (rport->remoteport.port_state != FC_OBJSTATE_ONLINE) { 1795 + spin_unlock_irqrestore(&rport->lock, flags); 1796 + ret = -ENOTCONN; 1797 + goto out_unmap; 1798 + } 1799 + list_add_tail(&lsop->lsrcv_list, &rport->ls_rcv_list); 1800 + spin_unlock_irqrestore(&rport->lock, flags); 1801 + 1802 + schedule_work(&rport->lsrcv_work); 1803 + 1804 + return 0; 1805 + 1806 + out_unmap: 1807 + fc_dma_unmap_single(lport->dev, lsop->rspdma, 1808 + sizeof(*lsop->rspbuf), DMA_TO_DEVICE); 1809 + out_free: 1810 + kfree(lsop); 1811 + out_put: 1812 + nvme_fc_rport_put(rport); 1813 + return ret; 1814 + } 1815 + EXPORT_SYMBOL_GPL(nvme_fc_rcv_ls_req); 1816 + 1454 1817 1455 1818 /* *********************** NVME Ctrl Routines **************************** */ 1456 - 1457 - static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1458 1819 1459 1820 static void 1460 1821 __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, ··· 1835 1500 opstate = atomic_xchg(&op->state, FCPOP_STATE_ABORTED); 1836 1501 if (opstate != FCPOP_STATE_ACTIVE) 1837 1502 atomic_set(&op->state, opstate); 1838 - else if (ctrl->flags & FCCTRL_TERMIO) 1503 + else if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) 1839 1504 ctrl->iocnt++; 1840 1505 spin_unlock_irqrestore(&ctrl->lock, flags); 1841 1506 ··· 1872 1537 1873 1538 if (opstate == FCPOP_STATE_ABORTED) { 1874 1539 spin_lock_irqsave(&ctrl->lock, flags); 1875 - if (ctrl->flags & FCCTRL_TERMIO) { 1540 + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) { 1876 1541 if (!--ctrl->iocnt) 1877 1542 wake_up(&ctrl->ioabort_wait); 1878 1543 } ··· 2106 1771 res = __nvme_fc_init_request(ctrl, queue, &op->op, rq, queue->rqcnt++); 2107 1772 if (res) 2108 1773 return res; 2109 - op->op.fcp_req.first_sgl = &op->sgl[0]; 1774 + op->op.fcp_req.first_sgl = op->sgl; 2110 1775 op->op.fcp_req.private = &op->priv[0]; 2111 1776 nvme_req(rq)->ctrl = &ctrl->ctrl; 2112 1777 return res; ··· 2118 1783 struct nvme_fc_fcp_op *aen_op; 2119 1784 struct nvme_fc_cmd_iu *cmdiu; 2120 1785 struct nvme_command *sqe; 2121 - void *private; 1786 + void *private = NULL; 2122 1787 int i, ret; 2123 1788 2124 1789 aen_op = ctrl->aen_ops; 2125 1790 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 2126 - private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 1791 + if (ctrl->lport->ops->fcprqst_priv_sz) { 1792 + private = kzalloc(ctrl->lport->ops->fcprqst_priv_sz, 2127 1793 GFP_KERNEL); 2128 - if (!private) 2129 - return -ENOMEM; 1794 + if (!private) 1795 + return -ENOMEM; 1796 + } 2130 1797 2131 1798 cmdiu = &aen_op->cmd_iu; 2132 1799 sqe = &cmdiu->sqe; ··· 2159 1822 2160 1823 aen_op = ctrl->aen_ops; 2161 1824 for (i = 0; i < NVME_NR_AEN_COMMANDS; i++, aen_op++) { 2162 - if (!aen_op->fcp_req.private) 2163 - continue; 2164 - 2165 1825 __nvme_fc_exit_request(ctrl, aen_op); 2166 1826 2167 1827 kfree(aen_op->fcp_req.private); ··· 2700 2366 { 2701 2367 struct nvme_fc_ctrl *ctrl = to_fc_ctrl(arg); 2702 2368 struct nvme_fc_fcp_op *aen_op; 2703 - unsigned long flags; 2704 - bool terminating = false; 2705 2369 blk_status_t ret; 2706 2370 2707 - spin_lock_irqsave(&ctrl->lock, flags); 2708 - if (ctrl->flags & FCCTRL_TERMIO) 2709 - terminating = true; 2710 - spin_unlock_irqrestore(&ctrl->lock, flags); 2711 - 2712 - if (terminating) 2371 + if (test_bit(FCCTRL_TERMIO, &ctrl->flags)) 2713 2372 return; 2714 2373 2715 2374 aen_op = &ctrl->aen_ops[0]; ··· 2911 2584 struct nvme_fc_rport *rport = ctrl->rport; 2912 2585 u32 cnt; 2913 2586 2914 - if (ctrl->assoc_active) 2587 + if (test_and_set_bit(ASSOC_ACTIVE, &ctrl->flags)) 2915 2588 return 1; 2916 2589 2917 - ctrl->assoc_active = true; 2918 2590 cnt = atomic_inc_return(&rport->act_ctrl_cnt); 2919 2591 if (cnt == 1) 2920 2592 nvme_fc_rport_active_on_lport(rport); ··· 2928 2602 struct nvme_fc_lport *lport = rport->lport; 2929 2603 u32 cnt; 2930 2604 2931 - /* ctrl->assoc_active=false will be set independently */ 2605 + /* clearing of ctrl->flags ASSOC_ACTIVE bit is in association delete */ 2932 2606 2933 2607 cnt = atomic_dec_return(&rport->act_ctrl_cnt); 2934 2608 if (cnt == 0) { ··· 2948 2622 nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) 2949 2623 { 2950 2624 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2625 + struct nvmefc_ls_rcv_op *disls = NULL; 2626 + unsigned long flags; 2951 2627 int ret; 2952 2628 bool changed; 2953 2629 ··· 3067 2739 out_disconnect_admin_queue: 3068 2740 /* send a Disconnect(association) LS to fc-nvme target */ 3069 2741 nvme_fc_xmt_disconnect_assoc(ctrl); 2742 + spin_lock_irqsave(&ctrl->lock, flags); 3070 2743 ctrl->association_id = 0; 2744 + disls = ctrl->rcv_disconn; 2745 + ctrl->rcv_disconn = NULL; 2746 + spin_unlock_irqrestore(&ctrl->lock, flags); 2747 + if (disls) 2748 + nvme_fc_xmt_ls_rsp(disls); 3071 2749 out_delete_hw_queue: 3072 2750 __nvme_fc_delete_hw_queue(ctrl, &ctrl->queues[0], 0); 3073 2751 out_free_queue: 3074 2752 nvme_fc_free_queue(&ctrl->queues[0]); 3075 - ctrl->assoc_active = false; 2753 + clear_bit(ASSOC_ACTIVE, &ctrl->flags); 3076 2754 nvme_fc_ctlr_inactive_on_rport(ctrl); 3077 2755 3078 2756 return ret; ··· 3093 2759 static void 3094 2760 nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) 3095 2761 { 2762 + struct nvmefc_ls_rcv_op *disls = NULL; 3096 2763 unsigned long flags; 3097 2764 3098 - if (!ctrl->assoc_active) 2765 + if (!test_and_clear_bit(ASSOC_ACTIVE, &ctrl->flags)) 3099 2766 return; 3100 - ctrl->assoc_active = false; 3101 2767 3102 2768 spin_lock_irqsave(&ctrl->lock, flags); 3103 - ctrl->flags |= FCCTRL_TERMIO; 2769 + set_bit(FCCTRL_TERMIO, &ctrl->flags); 3104 2770 ctrl->iocnt = 0; 3105 2771 spin_unlock_irqrestore(&ctrl->lock, flags); 3106 2772 ··· 3151 2817 /* wait for all io that had to be aborted */ 3152 2818 spin_lock_irq(&ctrl->lock); 3153 2819 wait_event_lock_irq(ctrl->ioabort_wait, ctrl->iocnt == 0, ctrl->lock); 3154 - ctrl->flags &= ~FCCTRL_TERMIO; 2820 + clear_bit(FCCTRL_TERMIO, &ctrl->flags); 3155 2821 spin_unlock_irq(&ctrl->lock); 3156 2822 3157 2823 nvme_fc_term_aen_ops(ctrl); ··· 3165 2831 if (ctrl->association_id) 3166 2832 nvme_fc_xmt_disconnect_assoc(ctrl); 3167 2833 2834 + spin_lock_irqsave(&ctrl->lock, flags); 3168 2835 ctrl->association_id = 0; 2836 + disls = ctrl->rcv_disconn; 2837 + ctrl->rcv_disconn = NULL; 2838 + spin_unlock_irqrestore(&ctrl->lock, flags); 2839 + if (disls) 2840 + /* 2841 + * if a Disconnect Request was waiting for a response, send 2842 + * now that all ABTS's have been issued (and are complete). 2843 + */ 2844 + nvme_fc_xmt_ls_rsp(disls); 3169 2845 3170 2846 if (ctrl->ctrl.tagset) { 3171 2847 nvme_fc_delete_hw_io_queues(ctrl); ··· 3246 2902 dev_warn(ctrl->ctrl.device, 3247 2903 "NVME-FC{%d}: dev_loss_tmo (%d) expired " 3248 2904 "while waiting for remoteport connectivity.\n", 3249 - ctrl->cnum, portptr->dev_loss_tmo); 2905 + ctrl->cnum, min_t(int, portptr->dev_loss_tmo, 2906 + (ctrl->ctrl.opts->max_reconnects * 2907 + ctrl->ctrl.opts->reconnect_delay))); 3250 2908 WARN_ON(nvme_delete_ctrl(&ctrl->ctrl)); 3251 2909 } 3252 2910 } ··· 3435 3089 ctrl->dev = lport->dev; 3436 3090 ctrl->cnum = idx; 3437 3091 ctrl->ioq_live = false; 3438 - ctrl->assoc_active = false; 3439 3092 atomic_set(&ctrl->err_work_active, 0); 3440 3093 init_waitqueue_head(&ctrl->ioabort_wait); 3441 3094

+227

drivers/nvme/host/fc.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + /* 3 + * Copyright (c) 2016, Avago Technologies 4 + */ 5 + 6 + #ifndef _NVME_FC_TRANSPORT_H 7 + #define _NVME_FC_TRANSPORT_H 1 8 + 9 + 10 + /* 11 + * Common definitions between the nvme_fc (host) transport and 12 + * nvmet_fc (target) transport implementation. 13 + */ 14 + 15 + /* 16 + * ****************** FC-NVME LS HANDLING ****************** 17 + */ 18 + 19 + union nvmefc_ls_requests { 20 + struct fcnvme_ls_rqst_w0 w0; 21 + struct fcnvme_ls_cr_assoc_rqst rq_cr_assoc; 22 + struct fcnvme_ls_cr_conn_rqst rq_cr_conn; 23 + struct fcnvme_ls_disconnect_assoc_rqst rq_dis_assoc; 24 + struct fcnvme_ls_disconnect_conn_rqst rq_dis_conn; 25 + } __aligned(128); /* alignment for other things alloc'd with */ 26 + 27 + union nvmefc_ls_responses { 28 + struct fcnvme_ls_rjt rsp_rjt; 29 + struct fcnvme_ls_cr_assoc_acc rsp_cr_assoc; 30 + struct fcnvme_ls_cr_conn_acc rsp_cr_conn; 31 + struct fcnvme_ls_disconnect_assoc_acc rsp_dis_assoc; 32 + struct fcnvme_ls_disconnect_conn_acc rsp_dis_conn; 33 + } __aligned(128); /* alignment for other things alloc'd with */ 34 + 35 + static inline void 36 + nvme_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) 37 + { 38 + struct fcnvme_ls_acc_hdr *acc = buf; 39 + 40 + acc->w0.ls_cmd = ls_cmd; 41 + acc->desc_list_len = desc_len; 42 + acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); 43 + acc->rqst.desc_len = 44 + fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); 45 + acc->rqst.w0.ls_cmd = rqst_ls_cmd; 46 + } 47 + 48 + static inline int 49 + nvme_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, 50 + u8 reason, u8 explanation, u8 vendor) 51 + { 52 + struct fcnvme_ls_rjt *rjt = buf; 53 + 54 + nvme_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, 55 + fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), 56 + ls_cmd); 57 + rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); 58 + rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); 59 + rjt->rjt.reason_code = reason; 60 + rjt->rjt.reason_explanation = explanation; 61 + rjt->rjt.vendor = vendor; 62 + 63 + return sizeof(struct fcnvme_ls_rjt); 64 + } 65 + 66 + /* Validation Error indexes into the string table below */ 67 + enum { 68 + VERR_NO_ERROR = 0, 69 + VERR_CR_ASSOC_LEN = 1, 70 + VERR_CR_ASSOC_RQST_LEN = 2, 71 + VERR_CR_ASSOC_CMD = 3, 72 + VERR_CR_ASSOC_CMD_LEN = 4, 73 + VERR_ERSP_RATIO = 5, 74 + VERR_ASSOC_ALLOC_FAIL = 6, 75 + VERR_QUEUE_ALLOC_FAIL = 7, 76 + VERR_CR_CONN_LEN = 8, 77 + VERR_CR_CONN_RQST_LEN = 9, 78 + VERR_ASSOC_ID = 10, 79 + VERR_ASSOC_ID_LEN = 11, 80 + VERR_NO_ASSOC = 12, 81 + VERR_CONN_ID = 13, 82 + VERR_CONN_ID_LEN = 14, 83 + VERR_INVAL_CONN = 15, 84 + VERR_CR_CONN_CMD = 16, 85 + VERR_CR_CONN_CMD_LEN = 17, 86 + VERR_DISCONN_LEN = 18, 87 + VERR_DISCONN_RQST_LEN = 19, 88 + VERR_DISCONN_CMD = 20, 89 + VERR_DISCONN_CMD_LEN = 21, 90 + VERR_DISCONN_SCOPE = 22, 91 + VERR_RS_LEN = 23, 92 + VERR_RS_RQST_LEN = 24, 93 + VERR_RS_CMD = 25, 94 + VERR_RS_CMD_LEN = 26, 95 + VERR_RS_RCTL = 27, 96 + VERR_RS_RO = 28, 97 + VERR_LSACC = 29, 98 + VERR_LSDESC_RQST = 30, 99 + VERR_LSDESC_RQST_LEN = 31, 100 + VERR_CR_ASSOC = 32, 101 + VERR_CR_ASSOC_ACC_LEN = 33, 102 + VERR_CR_CONN = 34, 103 + VERR_CR_CONN_ACC_LEN = 35, 104 + VERR_DISCONN = 36, 105 + VERR_DISCONN_ACC_LEN = 37, 106 + }; 107 + 108 + static char *validation_errors[] = { 109 + "OK", 110 + "Bad CR_ASSOC Length", 111 + "Bad CR_ASSOC Rqst Length", 112 + "Not CR_ASSOC Cmd", 113 + "Bad CR_ASSOC Cmd Length", 114 + "Bad Ersp Ratio", 115 + "Association Allocation Failed", 116 + "Queue Allocation Failed", 117 + "Bad CR_CONN Length", 118 + "Bad CR_CONN Rqst Length", 119 + "Not Association ID", 120 + "Bad Association ID Length", 121 + "No Association", 122 + "Not Connection ID", 123 + "Bad Connection ID Length", 124 + "Invalid Connection ID", 125 + "Not CR_CONN Cmd", 126 + "Bad CR_CONN Cmd Length", 127 + "Bad DISCONN Length", 128 + "Bad DISCONN Rqst Length", 129 + "Not DISCONN Cmd", 130 + "Bad DISCONN Cmd Length", 131 + "Bad Disconnect Scope", 132 + "Bad RS Length", 133 + "Bad RS Rqst Length", 134 + "Not RS Cmd", 135 + "Bad RS Cmd Length", 136 + "Bad RS R_CTL", 137 + "Bad RS Relative Offset", 138 + "Not LS_ACC", 139 + "Not LSDESC_RQST", 140 + "Bad LSDESC_RQST Length", 141 + "Not CR_ASSOC Rqst", 142 + "Bad CR_ASSOC ACC Length", 143 + "Not CR_CONN Rqst", 144 + "Bad CR_CONN ACC Length", 145 + "Not Disconnect Rqst", 146 + "Bad Disconnect ACC Length", 147 + }; 148 + 149 + #define NVME_FC_LAST_LS_CMD_VALUE FCNVME_LS_DISCONNECT_CONN 150 + 151 + static char *nvmefc_ls_names[] = { 152 + "Reserved (0)", 153 + "RJT (1)", 154 + "ACC (2)", 155 + "Create Association", 156 + "Create Connection", 157 + "Disconnect Association", 158 + "Disconnect Connection", 159 + }; 160 + 161 + static inline void 162 + nvmefc_fmt_lsreq_discon_assoc(struct nvmefc_ls_req *lsreq, 163 + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst, 164 + struct fcnvme_ls_disconnect_assoc_acc *discon_acc, 165 + u64 association_id) 166 + { 167 + lsreq->rqstaddr = discon_rqst; 168 + lsreq->rqstlen = sizeof(*discon_rqst); 169 + lsreq->rspaddr = discon_acc; 170 + lsreq->rsplen = sizeof(*discon_acc); 171 + lsreq->timeout = NVME_FC_LS_TIMEOUT_SEC; 172 + 173 + discon_rqst->w0.ls_cmd = FCNVME_LS_DISCONNECT_ASSOC; 174 + discon_rqst->desc_list_len = cpu_to_be32( 175 + sizeof(struct fcnvme_lsdesc_assoc_id) + 176 + sizeof(struct fcnvme_lsdesc_disconn_cmd)); 177 + 178 + discon_rqst->associd.desc_tag = cpu_to_be32(FCNVME_LSDESC_ASSOC_ID); 179 + discon_rqst->associd.desc_len = 180 + fcnvme_lsdesc_len( 181 + sizeof(struct fcnvme_lsdesc_assoc_id)); 182 + 183 + discon_rqst->associd.association_id = cpu_to_be64(association_id); 184 + 185 + discon_rqst->discon_cmd.desc_tag = cpu_to_be32( 186 + FCNVME_LSDESC_DISCONN_CMD); 187 + discon_rqst->discon_cmd.desc_len = 188 + fcnvme_lsdesc_len( 189 + sizeof(struct fcnvme_lsdesc_disconn_cmd)); 190 + } 191 + 192 + static inline int 193 + nvmefc_vldt_lsreq_discon_assoc(u32 rqstlen, 194 + struct fcnvme_ls_disconnect_assoc_rqst *rqst) 195 + { 196 + int ret = 0; 197 + 198 + if (rqstlen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) 199 + ret = VERR_DISCONN_LEN; 200 + else if (rqst->desc_list_len != 201 + fcnvme_lsdesc_len( 202 + sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) 203 + ret = VERR_DISCONN_RQST_LEN; 204 + else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 205 + ret = VERR_ASSOC_ID; 206 + else if (rqst->associd.desc_len != 207 + fcnvme_lsdesc_len( 208 + sizeof(struct fcnvme_lsdesc_assoc_id))) 209 + ret = VERR_ASSOC_ID_LEN; 210 + else if (rqst->discon_cmd.desc_tag != 211 + cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) 212 + ret = VERR_DISCONN_CMD; 213 + else if (rqst->discon_cmd.desc_len != 214 + fcnvme_lsdesc_len( 215 + sizeof(struct fcnvme_lsdesc_disconn_cmd))) 216 + ret = VERR_DISCONN_CMD_LEN; 217 + /* 218 + * As the standard changed on the LS, check if old format and scope 219 + * something other than Association (e.g. 0). 220 + */ 221 + else if (rqst->discon_cmd.rsvd8[0]) 222 + ret = VERR_DISCONN_SCOPE; 223 + 224 + return ret; 225 + } 226 + 227 + #endif /* _NVME_FC_TRANSPORT_H */

+5 -2

drivers/nvme/host/lightnvm.c

··· 171 171 __le32 tdresv; 172 172 __le32 thresv; 173 173 __le32 rsvd2[8]; 174 - __u8 blk[0]; 174 + __u8 blk[]; 175 175 }; 176 176 177 177 struct nvme_nvm_id20_addrf { ··· 961 961 geo = &dev->geo; 962 962 geo->csecs = 1 << ns->lba_shift; 963 963 geo->sos = ns->ms; 964 - geo->ext = ns->ext; 964 + if (ns->features & NVME_NS_EXT_LBAS) 965 + geo->ext = true; 966 + else 967 + geo->ext = false; 965 968 geo->mdts = ns->ctrl->max_hw_sectors; 966 969 967 970 dev->q = q;

+12 -4

drivers/nvme/host/multipath.c

··· 3 3 * Copyright (c) 2017-2018 Christoph Hellwig. 4 4 */ 5 5 6 + #include <linux/backing-dev.h> 6 7 #include <linux/moduleparam.h> 7 8 #include <trace/events/block.h> 8 9 #include "nvme.h" ··· 294 293 static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, 295 294 struct bio *bio) 296 295 { 297 - struct nvme_ns_head *head = q->queuedata; 296 + struct nvme_ns_head *head = bio->bi_disk->private_data; 298 297 struct device *dev = disk_to_dev(head->disk); 299 298 struct nvme_ns *ns; 300 299 blk_qc_t ret = BLK_QC_T_NONE; ··· 372 371 * We also do this for private namespaces as the namespace sharing data could 373 372 * change after a rescan. 374 373 */ 375 - if (!(ctrl->subsys->cmic & (1 << 1)) || !multipath) 374 + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) 376 375 return 0; 377 376 378 377 q = blk_alloc_queue(nvme_ns_head_make_request, ctrl->numa_node); 379 378 if (!q) 380 379 goto out; 381 - q->queuedata = head; 382 380 blk_queue_flag_set(QUEUE_FLAG_NONROT, q); 383 381 /* set to a default value for 512 until disk is validated */ 384 382 blk_queue_logical_block_size(q, 512); ··· 666 666 nvme_mpath_set_live(ns); 667 667 mutex_unlock(&ns->head->lock); 668 668 } 669 + 670 + if (bdi_cap_stable_pages_required(ns->queue->backing_dev_info)) { 671 + struct backing_dev_info *info = 672 + ns->head->disk->queue->backing_dev_info; 673 + 674 + info->capabilities |= BDI_CAP_STABLE_WRITES; 675 + } 669 676 } 670 677 671 678 void nvme_mpath_remove_disk(struct nvme_ns_head *head) ··· 694 687 int error; 695 688 696 689 /* check if multipath is enabled and we have the capability */ 697 - if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3))) 690 + if (!multipath || !ctrl->subsys || 691 + !(ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA)) 698 692 return 0; 699 693 700 694 ctrl->anacap = id->anacap;

+25 -3

drivers/nvme/host/nvme.h

··· 16 16 #include <linux/fault-inject.h> 17 17 #include <linux/rcupdate.h> 18 18 #include <linux/wait.h> 19 + #include <linux/t10-pi.h> 19 20 20 21 #include <trace/events/block.h> 21 22 ··· 31 30 32 31 #ifdef CONFIG_ARCH_NO_SG_CHAIN 33 32 #define NVME_INLINE_SG_CNT 0 33 + #define NVME_INLINE_METADATA_SG_CNT 0 34 34 #else 35 35 #define NVME_INLINE_SG_CNT 2 36 + #define NVME_INLINE_METADATA_SG_CNT 1 36 37 #endif 37 38 38 39 extern struct workqueue_struct *nvme_wq; ··· 231 228 u32 page_size; 232 229 u32 max_hw_sectors; 233 230 u32 max_segments; 231 + u32 max_integrity_segments; 234 232 u16 crdt[3]; 235 233 u16 oncs; 236 234 u16 oacs; ··· 356 352 struct nvme_ns_ids ids; 357 353 struct list_head entry; 358 354 struct kref ref; 355 + bool shared; 359 356 int instance; 360 357 #ifdef CONFIG_NVME_MULTIPATH 361 358 struct gendisk *disk; ··· 366 361 struct mutex lock; 367 362 struct nvme_ns __rcu *current_path[]; 368 363 #endif 364 + }; 365 + 366 + enum nvme_ns_features { 367 + NVME_NS_EXT_LBAS = 1 << 0, /* support extended LBA format */ 368 + NVME_NS_METADATA_SUPPORTED = 1 << 1, /* support getting generated md */ 369 369 }; 370 370 371 371 struct nvme_ns { ··· 392 382 u16 ms; 393 383 u16 sgs; 394 384 u32 sws; 395 - bool ext; 396 385 u8 pi_type; 386 + unsigned long features; 397 387 unsigned long flags; 398 388 #define NVME_NS_REMOVING 0 399 389 #define NVME_NS_DEAD 1 400 390 #define NVME_NS_ANA_PENDING 2 401 - u16 noiob; 402 391 403 392 struct nvme_fault_inject fault_inject; 404 393 405 394 }; 395 + 396 + /* NVMe ns supports metadata actions by the controller (generate/strip) */ 397 + static inline bool nvme_ns_has_pi(struct nvme_ns *ns) 398 + { 399 + return ns->pi_type && ns->ms == sizeof(struct t10_pi_tuple); 400 + } 406 401 407 402 struct nvme_ctrl_ops { 408 403 const char *name; ··· 464 449 return lba << (ns->lba_shift - SECTOR_SHIFT); 465 450 } 466 451 452 + /* 453 + * Convert byte length to nvme's 0-based num dwords 454 + */ 455 + static inline u32 nvme_bytes_to_numd(size_t len) 456 + { 457 + return (len >> 2) - 1; 458 + } 459 + 467 460 static inline void nvme_end_request(struct request *req, __le16 status, 468 461 union nvme_result result) 469 462 { ··· 512 489 void nvme_uninit_ctrl(struct nvme_ctrl *ctrl); 513 490 void nvme_start_ctrl(struct nvme_ctrl *ctrl); 514 491 void nvme_stop_ctrl(struct nvme_ctrl *ctrl); 515 - void nvme_put_ctrl(struct nvme_ctrl *ctrl); 516 492 int nvme_init_identify(struct nvme_ctrl *ctrl); 517 493 518 494 void nvme_remove_namespaces(struct nvme_ctrl *ctrl);

+66 -51

drivers/nvme/host/pci.c

··· 68 68 module_param_cb(io_queue_depth, &io_queue_depth_ops, &io_queue_depth, 0644); 69 69 MODULE_PARM_DESC(io_queue_depth, "set io queue depth, should >= 2"); 70 70 71 + static int io_queue_count_set(const char *val, const struct kernel_param *kp) 72 + { 73 + unsigned int n; 74 + int ret; 75 + 76 + ret = kstrtouint(val, 10, &n); 77 + if (ret != 0 || n > num_possible_cpus()) 78 + return -EINVAL; 79 + return param_set_uint(val, kp); 80 + } 81 + 82 + static const struct kernel_param_ops io_queue_count_ops = { 83 + .set = io_queue_count_set, 84 + .get = param_get_uint, 85 + }; 86 + 71 87 static unsigned int write_queues; 72 - module_param(write_queues, uint, 0644); 88 + module_param_cb(write_queues, &io_queue_count_ops, &write_queues, 0644); 73 89 MODULE_PARM_DESC(write_queues, 74 90 "Number of queues to use for writes. If not set, reads and writes " 75 91 "will share a queue set."); 76 92 77 93 static unsigned int poll_queues; 78 - module_param(poll_queues, uint, 0644); 94 + module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644); 79 95 MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO."); 80 96 81 97 struct nvme_dev; ··· 144 128 dma_addr_t host_mem_descs_dma; 145 129 struct nvme_host_mem_buf_desc *host_mem_descs; 146 130 void **host_mem_desc_bufs; 131 + unsigned int nr_allocated_queues; 132 + unsigned int nr_write_queues; 133 + unsigned int nr_poll_queues; 147 134 }; 148 135 149 136 static int io_queue_depth_set(const char *val, const struct kernel_param *kp) ··· 185 166 void *sq_cmds; 186 167 /* only used for poll queues: */ 187 168 spinlock_t cq_poll_lock ____cacheline_aligned_in_smp; 188 - volatile struct nvme_completion *cqes; 169 + struct nvme_completion *cqes; 189 170 dma_addr_t sq_dma_addr; 190 171 dma_addr_t cq_dma_addr; 191 172 u32 __iomem *q_db; 192 173 u16 q_depth; 193 174 u16 cq_vector; 194 175 u16 sq_tail; 195 - u16 last_sq_tail; 196 176 u16 cq_head; 197 177 u16 qid; 198 178 u8 cq_phase; ··· 227 209 struct scatterlist *sg; 228 210 }; 229 211 230 - static unsigned int max_io_queues(void) 212 + static inline unsigned int nvme_dbbuf_size(struct nvme_dev *dev) 231 213 { 232 - return num_possible_cpus() + write_queues + poll_queues; 233 - } 234 - 235 - static unsigned int max_queue_count(void) 236 - { 237 - /* IO queues + admin queue */ 238 - return 1 + max_io_queues(); 239 - } 240 - 241 - static inline unsigned int nvme_dbbuf_size(u32 stride) 242 - { 243 - return (max_queue_count() * 8 * stride); 214 + return dev->nr_allocated_queues * 8 * dev->db_stride; 244 215 } 245 216 246 217 static int nvme_dbbuf_dma_alloc(struct nvme_dev *dev) 247 218 { 248 - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); 219 + unsigned int mem_size = nvme_dbbuf_size(dev); 249 220 250 221 if (dev->dbbuf_dbs) 251 222 return 0; ··· 259 252 260 253 static void nvme_dbbuf_dma_free(struct nvme_dev *dev) 261 254 { 262 - unsigned int mem_size = nvme_dbbuf_size(dev->db_stride); 255 + unsigned int mem_size = nvme_dbbuf_size(dev); 263 256 264 257 if (dev->dbbuf_dbs) { 265 258 dma_free_coherent(dev->dev, mem_size, ··· 453 446 return 0; 454 447 } 455 448 456 - /* 457 - * Write sq tail if we are asked to, or if the next command would wrap. 458 - */ 459 - static inline void nvme_write_sq_db(struct nvme_queue *nvmeq, bool write_sq) 449 + static inline void nvme_write_sq_db(struct nvme_queue *nvmeq) 460 450 { 461 - if (!write_sq) { 462 - u16 next_tail = nvmeq->sq_tail + 1; 463 - 464 - if (next_tail == nvmeq->q_depth) 465 - next_tail = 0; 466 - if (next_tail != nvmeq->last_sq_tail) 467 - return; 468 - } 469 - 470 451 if (nvme_dbbuf_update_and_check_event(nvmeq->sq_tail, 471 452 nvmeq->dbbuf_sq_db, nvmeq->dbbuf_sq_ei)) 472 453 writel(nvmeq->sq_tail, nvmeq->q_db); 473 - nvmeq->last_sq_tail = nvmeq->sq_tail; 474 454 } 475 455 476 456 /** ··· 474 480 cmd, sizeof(*cmd)); 475 481 if (++nvmeq->sq_tail == nvmeq->q_depth) 476 482 nvmeq->sq_tail = 0; 477 - nvme_write_sq_db(nvmeq, write_sq); 483 + if (write_sq) 484 + nvme_write_sq_db(nvmeq); 478 485 spin_unlock(&nvmeq->sq_lock); 479 486 } 480 487 ··· 484 489 struct nvme_queue *nvmeq = hctx->driver_data; 485 490 486 491 spin_lock(&nvmeq->sq_lock); 487 - if (nvmeq->sq_tail != nvmeq->last_sq_tail) 488 - nvme_write_sq_db(nvmeq, true); 492 + nvme_write_sq_db(nvmeq); 489 493 spin_unlock(&nvmeq->sq_lock); 490 494 } 491 495 ··· 916 922 /* We read the CQE phase first to check if the rest of the entry is valid */ 917 923 static inline bool nvme_cqe_pending(struct nvme_queue *nvmeq) 918 924 { 919 - return (le16_to_cpu(nvmeq->cqes[nvmeq->cq_head].status) & 1) == 920 - nvmeq->cq_phase; 925 + struct nvme_completion *hcqe = &nvmeq->cqes[nvmeq->cq_head]; 926 + 927 + return (le16_to_cpu(READ_ONCE(hcqe->status)) & 1) == nvmeq->cq_phase; 921 928 } 922 929 923 930 static inline void nvme_ring_cq_doorbell(struct nvme_queue *nvmeq) ··· 939 944 940 945 static inline void nvme_handle_cqe(struct nvme_queue *nvmeq, u16 idx) 941 946 { 942 - volatile struct nvme_completion *cqe = &nvmeq->cqes[idx]; 947 + struct nvme_completion *cqe = &nvmeq->cqes[idx]; 943 948 struct request *req; 944 949 945 950 if (unlikely(cqe->command_id >= nvmeq->q_depth)) { ··· 1496 1501 struct nvme_dev *dev = nvmeq->dev; 1497 1502 1498 1503 nvmeq->sq_tail = 0; 1499 - nvmeq->last_sq_tail = 0; 1500 1504 nvmeq->cq_head = 0; 1501 1505 nvmeq->cq_phase = 1; 1502 1506 nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; ··· 1997 2003 static void nvme_calc_irq_sets(struct irq_affinity *affd, unsigned int nrirqs) 1998 2004 { 1999 2005 struct nvme_dev *dev = affd->priv; 2000 - unsigned int nr_read_queues; 2006 + unsigned int nr_read_queues, nr_write_queues = dev->nr_write_queues; 2001 2007 2002 2008 /* 2003 2009 * If there is no interupt available for queues, ensure that ··· 2013 2019 if (!nrirqs) { 2014 2020 nrirqs = 1; 2015 2021 nr_read_queues = 0; 2016 - } else if (nrirqs == 1 || !write_queues) { 2022 + } else if (nrirqs == 1 || !nr_write_queues) { 2017 2023 nr_read_queues = 0; 2018 - } else if (write_queues >= nrirqs) { 2024 + } else if (nr_write_queues >= nrirqs) { 2019 2025 nr_read_queues = 1; 2020 2026 } else { 2021 - nr_read_queues = nrirqs - write_queues; 2027 + nr_read_queues = nrirqs - nr_write_queues; 2022 2028 } 2023 2029 2024 2030 dev->io_queues[HCTX_TYPE_DEFAULT] = nrirqs - nr_read_queues; ··· 2042 2048 * Poll queues don't need interrupts, but we need at least one IO 2043 2049 * queue left over for non-polled IO. 2044 2050 */ 2045 - this_p_queues = poll_queues; 2051 + this_p_queues = dev->nr_poll_queues; 2046 2052 if (this_p_queues >= nr_io_queues) { 2047 2053 this_p_queues = nr_io_queues - 1; 2048 2054 irq_queues = 1; ··· 2072 2078 __nvme_disable_io_queues(dev, nvme_admin_delete_cq); 2073 2079 } 2074 2080 2081 + static unsigned int nvme_max_io_queues(struct nvme_dev *dev) 2082 + { 2083 + return num_possible_cpus() + dev->nr_write_queues + dev->nr_poll_queues; 2084 + } 2085 + 2075 2086 static int nvme_setup_io_queues(struct nvme_dev *dev) 2076 2087 { 2077 2088 struct nvme_queue *adminq = &dev->queues[0]; 2078 2089 struct pci_dev *pdev = to_pci_dev(dev->dev); 2079 - int result, nr_io_queues; 2090 + unsigned int nr_io_queues; 2080 2091 unsigned long size; 2092 + int result; 2081 2093 2082 - nr_io_queues = max_io_queues(); 2094 + /* 2095 + * Sample the module parameters once at reset time so that we have 2096 + * stable values to work with. 2097 + */ 2098 + dev->nr_write_queues = write_queues; 2099 + dev->nr_poll_queues = poll_queues; 2083 2100 2084 2101 /* 2085 2102 * If tags are shared with admin queue (Apple bug), then ··· 2098 2093 */ 2099 2094 if (dev->ctrl.quirks & NVME_QUIRK_SHARED_TAGS) 2100 2095 nr_io_queues = 1; 2096 + else 2097 + nr_io_queues = min(nvme_max_io_queues(dev), 2098 + dev->nr_allocated_queues - 1); 2101 2099 2102 2100 result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); 2103 2101 if (result < 0) ··· 2573 2565 goto out; 2574 2566 } 2575 2567 2568 + /* 2569 + * We do not support an SGL for metadata (yet), so we are limited to a 2570 + * single integrity segment for the separate metadata pointer. 2571 + */ 2572 + dev->ctrl.max_integrity_segments = 1; 2573 + 2576 2574 result = nvme_init_identify(&dev->ctrl); 2577 2575 if (result) 2578 2576 goto out; ··· 2781 2767 if (!dev) 2782 2768 return -ENOMEM; 2783 2769 2784 - dev->queues = kcalloc_node(max_queue_count(), sizeof(struct nvme_queue), 2785 - GFP_KERNEL, node); 2770 + dev->nr_write_queues = write_queues; 2771 + dev->nr_poll_queues = poll_queues; 2772 + dev->nr_allocated_queues = nvme_max_io_queues(dev) + 1; 2773 + dev->queues = kcalloc_node(dev->nr_allocated_queues, 2774 + sizeof(struct nvme_queue), GFP_KERNEL, node); 2786 2775 if (!dev->queues) 2787 2776 goto free; 2788 2777 ··· 3148 3131 BUILD_BUG_ON(sizeof(struct nvme_delete_queue) != 64); 3149 3132 BUILD_BUG_ON(IRQ_AFFINITY_MAX_SETS < 2); 3150 3133 3151 - write_queues = min(write_queues, num_possible_cpus()); 3152 - poll_queues = min(poll_queues, num_possible_cpus()); 3153 3134 return pci_register_driver(&nvme_driver); 3154 3135 } 3155 3136

+294 -27

drivers/nvme/host/rdma.c

··· 34 34 35 35 #define NVME_RDMA_MAX_INLINE_SEGMENTS 4 36 36 37 + #define NVME_RDMA_DATA_SGL_SIZE \ 38 + (sizeof(struct scatterlist) * NVME_INLINE_SG_CNT) 39 + #define NVME_RDMA_METADATA_SGL_SIZE \ 40 + (sizeof(struct scatterlist) * NVME_INLINE_METADATA_SG_CNT) 41 + 37 42 struct nvme_rdma_device { 38 43 struct ib_device *dev; 39 44 struct ib_pd *pd; ··· 53 48 u64 dma; 54 49 }; 55 50 51 + struct nvme_rdma_sgl { 52 + int nents; 53 + struct sg_table sg_table; 54 + }; 55 + 56 56 struct nvme_rdma_queue; 57 57 struct nvme_rdma_request { 58 58 struct nvme_request req; ··· 68 58 refcount_t ref; 69 59 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; 70 60 u32 num_sge; 71 - int nents; 72 61 struct ib_reg_wr reg_wr; 73 62 struct ib_cqe reg_cqe; 74 63 struct nvme_rdma_queue *queue; 75 - struct sg_table sg_table; 76 - struct scatterlist first_sgl[]; 64 + struct nvme_rdma_sgl data_sgl; 65 + struct nvme_rdma_sgl *metadata_sgl; 66 + bool use_sig_mr; 77 67 }; 78 68 79 69 enum nvme_rdma_queue_flags { ··· 95 85 struct rdma_cm_id *cm_id; 96 86 int cm_error; 97 87 struct completion cm_done; 88 + bool pi_support; 98 89 }; 99 90 100 91 struct nvme_rdma_ctrl { ··· 272 261 init_attr.qp_type = IB_QPT_RC; 273 262 init_attr.send_cq = queue->ib_cq; 274 263 init_attr.recv_cq = queue->ib_cq; 264 + if (queue->pi_support) 265 + init_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 275 266 276 267 ret = rdma_create_qp(queue->cm_id, dev->pd, &init_attr); 277 268 ··· 302 289 req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL); 303 290 if (!req->sqe.data) 304 291 return -ENOMEM; 292 + 293 + /* metadata nvme_rdma_sgl struct is located after command's data SGL */ 294 + if (queue->pi_support) 295 + req->metadata_sgl = (void *)nvme_req(rq) + 296 + sizeof(struct nvme_rdma_request) + 297 + NVME_RDMA_DATA_SGL_SIZE; 305 298 306 299 req->queue = queue; 307 300 ··· 419 400 dev = queue->device; 420 401 ibdev = dev->dev; 421 402 403 + if (queue->pi_support) 404 + ib_mr_pool_destroy(queue->qp, &queue->qp->sig_mrs); 422 405 ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); 423 406 424 407 /* ··· 437 416 nvme_rdma_dev_put(dev); 438 417 } 439 418 440 - static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) 419 + static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev, bool pi_support) 441 420 { 442 - return min_t(u32, NVME_RDMA_MAX_SEGMENTS, 443 - ibdev->attrs.max_fast_reg_page_list_len - 1); 421 + u32 max_page_list_len; 422 + 423 + if (pi_support) 424 + max_page_list_len = ibdev->attrs.max_pi_fast_reg_page_list_len; 425 + else 426 + max_page_list_len = ibdev->attrs.max_fast_reg_page_list_len; 427 + 428 + return min_t(u32, NVME_RDMA_MAX_SEGMENTS, max_page_list_len - 1); 444 429 } 445 430 446 431 static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) ··· 503 476 * misaligned we'll end up using two entries for a single data page, 504 477 * so one additional entry is required. 505 478 */ 506 - pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev) + 1; 479 + pages_per_mr = nvme_rdma_get_max_fr_pages(ibdev, queue->pi_support) + 1; 507 480 ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, 508 481 queue->queue_size, 509 482 IB_MR_TYPE_MEM_REG, ··· 515 488 goto out_destroy_ring; 516 489 } 517 490 491 + if (queue->pi_support) { 492 + ret = ib_mr_pool_init(queue->qp, &queue->qp->sig_mrs, 493 + queue->queue_size, IB_MR_TYPE_INTEGRITY, 494 + pages_per_mr, pages_per_mr); 495 + if (ret) { 496 + dev_err(queue->ctrl->ctrl.device, 497 + "failed to initialize PI MR pool sized %d for QID %d\n", 498 + queue->queue_size, idx); 499 + goto out_destroy_mr_pool; 500 + } 501 + } 502 + 518 503 set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); 519 504 520 505 return 0; 521 506 507 + out_destroy_mr_pool: 508 + ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); 522 509 out_destroy_ring: 523 510 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, 524 511 sizeof(struct nvme_completion), DMA_FROM_DEVICE); ··· 554 513 555 514 queue = &ctrl->queues[idx]; 556 515 queue->ctrl = ctrl; 516 + if (idx && ctrl->ctrl.max_integrity_segments) 517 + queue->pi_support = true; 518 + else 519 + queue->pi_support = false; 557 520 init_completion(&queue->cm_done); 558 521 559 522 if (idx > 0) ··· 768 723 set->reserved_tags = 2; /* connect + keep-alive */ 769 724 set->numa_node = nctrl->numa_node; 770 725 set->cmd_size = sizeof(struct nvme_rdma_request) + 771 - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); 726 + NVME_RDMA_DATA_SGL_SIZE; 772 727 set->driver_data = ctrl; 773 728 set->nr_hw_queues = 1; 774 729 set->timeout = ADMIN_TIMEOUT; ··· 782 737 set->numa_node = nctrl->numa_node; 783 738 set->flags = BLK_MQ_F_SHOULD_MERGE; 784 739 set->cmd_size = sizeof(struct nvme_rdma_request) + 785 - NVME_INLINE_SG_CNT * sizeof(struct scatterlist); 740 + NVME_RDMA_DATA_SGL_SIZE; 741 + if (nctrl->max_integrity_segments) 742 + set->cmd_size += sizeof(struct nvme_rdma_sgl) + 743 + NVME_RDMA_METADATA_SGL_SIZE; 786 744 set->driver_data = ctrl; 787 745 set->nr_hw_queues = nctrl->queue_count - 1; 788 746 set->timeout = NVME_IO_TIMEOUT; ··· 818 770 static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, 819 771 bool new) 820 772 { 773 + bool pi_capable = false; 821 774 int error; 822 775 823 776 error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); ··· 828 779 ctrl->device = ctrl->queues[0].device; 829 780 ctrl->ctrl.numa_node = dev_to_node(ctrl->device->dev->dma_device); 830 781 831 - ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); 782 + /* T10-PI support */ 783 + if (ctrl->device->dev->attrs.device_cap_flags & 784 + IB_DEVICE_INTEGRITY_HANDOVER) 785 + pi_capable = true; 786 + 787 + ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev, 788 + pi_capable); 832 789 833 790 /* 834 791 * Bind the async event SQE DMA mapping to the admin queue lifetime. ··· 876 821 877 822 ctrl->ctrl.max_segments = ctrl->max_fr_pages; 878 823 ctrl->ctrl.max_hw_sectors = ctrl->max_fr_pages << (ilog2(SZ_4K) - 9); 824 + if (pi_capable) 825 + ctrl->ctrl.max_integrity_segments = ctrl->max_fr_pages; 826 + else 827 + ctrl->ctrl.max_integrity_segments = 0; 879 828 880 829 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); 881 830 ··· 1208 1149 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1209 1150 struct nvme_rdma_device *dev = queue->device; 1210 1151 struct ib_device *ibdev = dev->dev; 1152 + struct list_head *pool = &queue->qp->rdma_mrs; 1211 1153 1212 1154 if (!blk_rq_nr_phys_segments(rq)) 1213 1155 return; 1214 1156 1157 + if (blk_integrity_rq(rq)) { 1158 + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, 1159 + req->metadata_sgl->nents, rq_dma_dir(rq)); 1160 + sg_free_table_chained(&req->metadata_sgl->sg_table, 1161 + NVME_INLINE_METADATA_SG_CNT); 1162 + } 1163 + 1164 + if (req->use_sig_mr) 1165 + pool = &queue->qp->sig_mrs; 1166 + 1215 1167 if (req->mr) { 1216 - ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); 1168 + ib_mr_pool_put(queue->qp, pool, req->mr); 1217 1169 req->mr = NULL; 1218 1170 } 1219 1171 1220 - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); 1221 - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); 1172 + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, 1173 + rq_dma_dir(rq)); 1174 + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); 1222 1175 } 1223 1176 1224 1177 static int nvme_rdma_set_sg_null(struct nvme_command *c) ··· 1249 1178 int count) 1250 1179 { 1251 1180 struct nvme_sgl_desc *sg = &c->common.dptr.sgl; 1252 - struct scatterlist *sgl = req->sg_table.sgl; 1181 + struct scatterlist *sgl = req->data_sgl.sg_table.sgl; 1253 1182 struct ib_sge *sge = &req->sge[1]; 1254 1183 u32 len = 0; 1255 1184 int i; ··· 1274 1203 { 1275 1204 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 1276 1205 1277 - sg->addr = cpu_to_le64(sg_dma_address(req->sg_table.sgl)); 1278 - put_unaligned_le24(sg_dma_len(req->sg_table.sgl), sg->length); 1206 + sg->addr = cpu_to_le64(sg_dma_address(req->data_sgl.sg_table.sgl)); 1207 + put_unaligned_le24(sg_dma_len(req->data_sgl.sg_table.sgl), sg->length); 1279 1208 put_unaligned_le32(queue->device->pd->unsafe_global_rkey, sg->key); 1280 1209 sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; 1281 1210 return 0; ··· 1296 1225 * Align the MR to a 4K page size to match the ctrl page size and 1297 1226 * the block virtual boundary. 1298 1227 */ 1299 - nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); 1228 + nr = ib_map_mr_sg(req->mr, req->data_sgl.sg_table.sgl, count, NULL, 1229 + SZ_4K); 1300 1230 if (unlikely(nr < count)) { 1301 1231 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); 1302 1232 req->mr = NULL; ··· 1328 1256 return 0; 1329 1257 } 1330 1258 1259 + static void nvme_rdma_set_sig_domain(struct blk_integrity *bi, 1260 + struct nvme_command *cmd, struct ib_sig_domain *domain, 1261 + u16 control, u8 pi_type) 1262 + { 1263 + domain->sig_type = IB_SIG_TYPE_T10_DIF; 1264 + domain->sig.dif.bg_type = IB_T10DIF_CRC; 1265 + domain->sig.dif.pi_interval = 1 << bi->interval_exp; 1266 + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); 1267 + if (control & NVME_RW_PRINFO_PRCHK_REF) 1268 + domain->sig.dif.ref_remap = true; 1269 + 1270 + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); 1271 + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); 1272 + domain->sig.dif.app_escape = true; 1273 + if (pi_type == NVME_NS_DPS_PI_TYPE3) 1274 + domain->sig.dif.ref_escape = true; 1275 + } 1276 + 1277 + static void nvme_rdma_set_sig_attrs(struct blk_integrity *bi, 1278 + struct nvme_command *cmd, struct ib_sig_attrs *sig_attrs, 1279 + u8 pi_type) 1280 + { 1281 + u16 control = le16_to_cpu(cmd->rw.control); 1282 + 1283 + memset(sig_attrs, 0, sizeof(*sig_attrs)); 1284 + if (control & NVME_RW_PRINFO_PRACT) { 1285 + /* for WRITE_INSERT/READ_STRIP no memory domain */ 1286 + sig_attrs->mem.sig_type = IB_SIG_TYPE_NONE; 1287 + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 1288 + pi_type); 1289 + /* Clear the PRACT bit since HCA will generate/verify the PI */ 1290 + control &= ~NVME_RW_PRINFO_PRACT; 1291 + cmd->rw.control = cpu_to_le16(control); 1292 + } else { 1293 + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ 1294 + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 1295 + pi_type); 1296 + nvme_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 1297 + pi_type); 1298 + } 1299 + } 1300 + 1301 + static void nvme_rdma_set_prot_checks(struct nvme_command *cmd, u8 *mask) 1302 + { 1303 + *mask = 0; 1304 + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_REF) 1305 + *mask |= IB_SIG_CHECK_REFTAG; 1306 + if (le16_to_cpu(cmd->rw.control) & NVME_RW_PRINFO_PRCHK_GUARD) 1307 + *mask |= IB_SIG_CHECK_GUARD; 1308 + } 1309 + 1310 + static void nvme_rdma_sig_done(struct ib_cq *cq, struct ib_wc *wc) 1311 + { 1312 + if (unlikely(wc->status != IB_WC_SUCCESS)) 1313 + nvme_rdma_wr_error(cq, wc, "SIG"); 1314 + } 1315 + 1316 + static int nvme_rdma_map_sg_pi(struct nvme_rdma_queue *queue, 1317 + struct nvme_rdma_request *req, struct nvme_command *c, 1318 + int count, int pi_count) 1319 + { 1320 + struct nvme_rdma_sgl *sgl = &req->data_sgl; 1321 + struct ib_reg_wr *wr = &req->reg_wr; 1322 + struct request *rq = blk_mq_rq_from_pdu(req); 1323 + struct nvme_ns *ns = rq->q->queuedata; 1324 + struct bio *bio = rq->bio; 1325 + struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 1326 + int nr; 1327 + 1328 + req->mr = ib_mr_pool_get(queue->qp, &queue->qp->sig_mrs); 1329 + if (WARN_ON_ONCE(!req->mr)) 1330 + return -EAGAIN; 1331 + 1332 + nr = ib_map_mr_sg_pi(req->mr, sgl->sg_table.sgl, count, NULL, 1333 + req->metadata_sgl->sg_table.sgl, pi_count, NULL, 1334 + SZ_4K); 1335 + if (unlikely(nr)) 1336 + goto mr_put; 1337 + 1338 + nvme_rdma_set_sig_attrs(blk_get_integrity(bio->bi_disk), c, 1339 + req->mr->sig_attrs, ns->pi_type); 1340 + nvme_rdma_set_prot_checks(c, &req->mr->sig_attrs->check_mask); 1341 + 1342 + ib_update_fast_reg_key(req->mr, ib_inc_rkey(req->mr->rkey)); 1343 + 1344 + req->reg_cqe.done = nvme_rdma_sig_done; 1345 + memset(wr, 0, sizeof(*wr)); 1346 + wr->wr.opcode = IB_WR_REG_MR_INTEGRITY; 1347 + wr->wr.wr_cqe = &req->reg_cqe; 1348 + wr->wr.num_sge = 0; 1349 + wr->wr.send_flags = 0; 1350 + wr->mr = req->mr; 1351 + wr->key = req->mr->rkey; 1352 + wr->access = IB_ACCESS_LOCAL_WRITE | 1353 + IB_ACCESS_REMOTE_READ | 1354 + IB_ACCESS_REMOTE_WRITE; 1355 + 1356 + sg->addr = cpu_to_le64(req->mr->iova); 1357 + put_unaligned_le24(req->mr->length, sg->length); 1358 + put_unaligned_le32(req->mr->rkey, sg->key); 1359 + sg->type = NVME_KEY_SGL_FMT_DATA_DESC << 4; 1360 + 1361 + return 0; 1362 + 1363 + mr_put: 1364 + ib_mr_pool_put(queue->qp, &queue->qp->sig_mrs, req->mr); 1365 + req->mr = NULL; 1366 + if (nr < 0) 1367 + return nr; 1368 + return -EINVAL; 1369 + } 1370 + 1331 1371 static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, 1332 1372 struct request *rq, struct nvme_command *c) 1333 1373 { 1334 1374 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1335 1375 struct nvme_rdma_device *dev = queue->device; 1336 1376 struct ib_device *ibdev = dev->dev; 1377 + int pi_count = 0; 1337 1378 int count, ret; 1338 1379 1339 1380 req->num_sge = 1; ··· 1457 1272 if (!blk_rq_nr_phys_segments(rq)) 1458 1273 return nvme_rdma_set_sg_null(c); 1459 1274 1460 - req->sg_table.sgl = req->first_sgl; 1461 - ret = sg_alloc_table_chained(&req->sg_table, 1462 - blk_rq_nr_phys_segments(rq), req->sg_table.sgl, 1275 + req->data_sgl.sg_table.sgl = (struct scatterlist *)(req + 1); 1276 + ret = sg_alloc_table_chained(&req->data_sgl.sg_table, 1277 + blk_rq_nr_phys_segments(rq), req->data_sgl.sg_table.sgl, 1463 1278 NVME_INLINE_SG_CNT); 1464 1279 if (ret) 1465 1280 return -ENOMEM; 1466 1281 1467 - req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); 1282 + req->data_sgl.nents = blk_rq_map_sg(rq->q, rq, 1283 + req->data_sgl.sg_table.sgl); 1468 1284 1469 - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, 1470 - rq_dma_dir(rq)); 1285 + count = ib_dma_map_sg(ibdev, req->data_sgl.sg_table.sgl, 1286 + req->data_sgl.nents, rq_dma_dir(rq)); 1471 1287 if (unlikely(count <= 0)) { 1472 1288 ret = -EIO; 1473 1289 goto out_free_table; 1290 + } 1291 + 1292 + if (blk_integrity_rq(rq)) { 1293 + req->metadata_sgl->sg_table.sgl = 1294 + (struct scatterlist *)(req->metadata_sgl + 1); 1295 + ret = sg_alloc_table_chained(&req->metadata_sgl->sg_table, 1296 + blk_rq_count_integrity_sg(rq->q, rq->bio), 1297 + req->metadata_sgl->sg_table.sgl, 1298 + NVME_INLINE_METADATA_SG_CNT); 1299 + if (unlikely(ret)) { 1300 + ret = -ENOMEM; 1301 + goto out_unmap_sg; 1302 + } 1303 + 1304 + req->metadata_sgl->nents = blk_rq_map_integrity_sg(rq->q, 1305 + rq->bio, req->metadata_sgl->sg_table.sgl); 1306 + pi_count = ib_dma_map_sg(ibdev, 1307 + req->metadata_sgl->sg_table.sgl, 1308 + req->metadata_sgl->nents, 1309 + rq_dma_dir(rq)); 1310 + if (unlikely(pi_count <= 0)) { 1311 + ret = -EIO; 1312 + goto out_free_pi_table; 1313 + } 1314 + } 1315 + 1316 + if (req->use_sig_mr) { 1317 + ret = nvme_rdma_map_sg_pi(queue, req, c, count, pi_count); 1318 + goto out; 1474 1319 } 1475 1320 1476 1321 if (count <= dev->num_inline_segments) { ··· 1521 1306 ret = nvme_rdma_map_sg_fr(queue, req, c, count); 1522 1307 out: 1523 1308 if (unlikely(ret)) 1524 - goto out_unmap_sg; 1309 + goto out_unmap_pi_sg; 1525 1310 1526 1311 return 0; 1527 1312 1313 + out_unmap_pi_sg: 1314 + if (blk_integrity_rq(rq)) 1315 + ib_dma_unmap_sg(ibdev, req->metadata_sgl->sg_table.sgl, 1316 + req->metadata_sgl->nents, rq_dma_dir(rq)); 1317 + out_free_pi_table: 1318 + if (blk_integrity_rq(rq)) 1319 + sg_free_table_chained(&req->metadata_sgl->sg_table, 1320 + NVME_INLINE_METADATA_SG_CNT); 1528 1321 out_unmap_sg: 1529 - ib_dma_unmap_sg(ibdev, req->sg_table.sgl, req->nents, rq_dma_dir(rq)); 1322 + ib_dma_unmap_sg(ibdev, req->data_sgl.sg_table.sgl, req->data_sgl.nents, 1323 + rq_dma_dir(rq)); 1530 1324 out_free_table: 1531 - sg_free_table_chained(&req->sg_table, NVME_INLINE_SG_CNT); 1325 + sg_free_table_chained(&req->data_sgl.sg_table, NVME_INLINE_SG_CNT); 1532 1326 return ret; 1533 1327 } 1534 1328 ··· 1985 1761 1986 1762 blk_mq_start_request(rq); 1987 1763 1764 + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY) && 1765 + queue->pi_support && 1766 + (c->common.opcode == nvme_cmd_write || 1767 + c->common.opcode == nvme_cmd_read) && 1768 + nvme_ns_has_pi(ns)) 1769 + req->use_sig_mr = true; 1770 + else 1771 + req->use_sig_mr = false; 1772 + 1988 1773 err = nvme_rdma_map_data(queue, rq, c); 1989 1774 if (unlikely(err < 0)) { 1990 1775 dev_err(queue->ctrl->ctrl.device, ··· 2034 1801 return ib_process_cq_direct(queue->ib_cq, -1); 2035 1802 } 2036 1803 1804 + static void nvme_rdma_check_pi_status(struct nvme_rdma_request *req) 1805 + { 1806 + struct request *rq = blk_mq_rq_from_pdu(req); 1807 + struct ib_mr_status mr_status; 1808 + int ret; 1809 + 1810 + ret = ib_check_mr_status(req->mr, IB_MR_CHECK_SIG_STATUS, &mr_status); 1811 + if (ret) { 1812 + pr_err("ib_check_mr_status failed, ret %d\n", ret); 1813 + nvme_req(rq)->status = NVME_SC_INVALID_PI; 1814 + return; 1815 + } 1816 + 1817 + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 1818 + switch (mr_status.sig_err.err_type) { 1819 + case IB_SIG_BAD_GUARD: 1820 + nvme_req(rq)->status = NVME_SC_GUARD_CHECK; 1821 + break; 1822 + case IB_SIG_BAD_REFTAG: 1823 + nvme_req(rq)->status = NVME_SC_REFTAG_CHECK; 1824 + break; 1825 + case IB_SIG_BAD_APPTAG: 1826 + nvme_req(rq)->status = NVME_SC_APPTAG_CHECK; 1827 + break; 1828 + } 1829 + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", 1830 + mr_status.sig_err.err_type, mr_status.sig_err.expected, 1831 + mr_status.sig_err.actual); 1832 + } 1833 + } 1834 + 2037 1835 static void nvme_rdma_complete_rq(struct request *rq) 2038 1836 { 2039 1837 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 2040 1838 struct nvme_rdma_queue *queue = req->queue; 2041 1839 struct ib_device *ibdev = queue->device->dev; 1840 + 1841 + if (req->use_sig_mr) 1842 + nvme_rdma_check_pi_status(req); 2042 1843 2043 1844 nvme_rdma_unmap_data(queue, rq); 2044 1845 ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command), ··· 2193 1926 static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { 2194 1927 .name = "rdma", 2195 1928 .module = THIS_MODULE, 2196 - .flags = NVME_F_FABRICS, 1929 + .flags = NVME_F_FABRICS | NVME_F_METADATA_SUPPORTED, 2197 1930 .reg_read32 = nvmf_reg_read32, 2198 1931 .reg_read64 = nvmf_reg_read64, 2199 1932 .reg_write32 = nvmf_reg_write32,

+47 -17

drivers/nvme/host/tcp.c

··· 60 60 enum nvme_tcp_queue_flags { 61 61 NVME_TCP_Q_ALLOCATED = 0, 62 62 NVME_TCP_Q_LIVE = 1, 63 + NVME_TCP_Q_POLLING = 2, 63 64 }; 64 65 65 66 enum nvme_tcp_recv_state { ··· 76 75 int io_cpu; 77 76 78 77 spinlock_t lock; 78 + struct mutex send_mutex; 79 79 struct list_head send_list; 80 80 81 81 /* recv state */ ··· 133 131 static struct workqueue_struct *nvme_tcp_wq; 134 132 static struct blk_mq_ops nvme_tcp_mq_ops; 135 133 static struct blk_mq_ops nvme_tcp_admin_mq_ops; 134 + static int nvme_tcp_try_send(struct nvme_tcp_queue *queue); 136 135 137 136 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl) 138 137 { ··· 260 257 } 261 258 } 262 259 263 - static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req) 260 + static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req, 261 + bool sync) 264 262 { 265 263 struct nvme_tcp_queue *queue = req->queue; 264 + bool empty; 266 265 267 266 spin_lock(&queue->lock); 267 + empty = list_empty(&queue->send_list) && !queue->request; 268 268 list_add_tail(&req->entry, &queue->send_list); 269 269 spin_unlock(&queue->lock); 270 270 271 - queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 271 + /* 272 + * if we're the first on the send_list and we can try to send 273 + * directly, otherwise queue io_work. Also, only do that if we 274 + * are on the same cpu, so we don't introduce contention. 275 + */ 276 + if (queue->io_cpu == smp_processor_id() && 277 + sync && empty && mutex_trylock(&queue->send_mutex)) { 278 + nvme_tcp_try_send(queue); 279 + mutex_unlock(&queue->send_mutex); 280 + } else { 281 + queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 282 + } 272 283 } 273 284 274 285 static inline struct nvme_tcp_request * ··· 595 578 req->state = NVME_TCP_SEND_H2C_PDU; 596 579 req->offset = 0; 597 580 598 - nvme_tcp_queue_request(req); 581 + nvme_tcp_queue_request(req, false); 599 582 600 583 return 0; 601 584 } ··· 811 794 { 812 795 struct nvme_tcp_queue *queue; 813 796 814 - read_lock(&sk->sk_callback_lock); 797 + read_lock_bh(&sk->sk_callback_lock); 815 798 queue = sk->sk_user_data; 816 - if (likely(queue && queue->rd_enabled)) 799 + if (likely(queue && queue->rd_enabled) && 800 + !test_bit(NVME_TCP_Q_POLLING, &queue->flags)) 817 801 queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work); 818 - read_unlock(&sk->sk_callback_lock); 802 + read_unlock_bh(&sk->sk_callback_lock); 819 803 } 820 804 821 805 static void nvme_tcp_write_space(struct sock *sk) ··· 885 867 if (last && !queue->data_digest) 886 868 flags |= MSG_EOR; 887 869 else 888 - flags |= MSG_MORE; 870 + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; 889 871 890 872 /* can't zcopy slab pages */ 891 873 if (unlikely(PageSlab(page))) { ··· 924 906 struct nvme_tcp_queue *queue = req->queue; 925 907 struct nvme_tcp_cmd_pdu *pdu = req->pdu; 926 908 bool inline_data = nvme_tcp_has_inline_data(req); 927 - int flags = MSG_DONTWAIT | (inline_data ? MSG_MORE : MSG_EOR); 928 909 u8 hdgst = nvme_tcp_hdgst_len(queue); 929 910 int len = sizeof(*pdu) + hdgst - req->offset; 911 + int flags = MSG_DONTWAIT; 930 912 int ret; 913 + 914 + if (inline_data) 915 + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; 916 + else 917 + flags |= MSG_EOR; 931 918 932 919 if (queue->hdr_digest && !req->offset) 933 920 nvme_tcp_hdgst(queue->snd_hash, pdu, sizeof(*pdu)); ··· 972 949 973 950 ret = kernel_sendpage(queue->sock, virt_to_page(pdu), 974 951 offset_in_page(pdu) + req->offset, len, 975 - MSG_DONTWAIT | MSG_MORE); 952 + MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); 976 953 if (unlikely(ret <= 0)) 977 954 return ret; 978 955 ··· 1086 1063 bool pending = false; 1087 1064 int result; 1088 1065 1089 - result = nvme_tcp_try_send(queue); 1090 - if (result > 0) 1091 - pending = true; 1092 - else if (unlikely(result < 0)) 1093 - break; 1066 + if (mutex_trylock(&queue->send_mutex)) { 1067 + result = nvme_tcp_try_send(queue); 1068 + mutex_unlock(&queue->send_mutex); 1069 + if (result > 0) 1070 + pending = true; 1071 + else if (unlikely(result < 0)) 1072 + break; 1073 + } 1094 1074 1095 1075 result = nvme_tcp_try_recv(queue); 1096 1076 if (result > 0) ··· 1345 1319 queue->ctrl = ctrl; 1346 1320 INIT_LIST_HEAD(&queue->send_list); 1347 1321 spin_lock_init(&queue->lock); 1322 + mutex_init(&queue->send_mutex); 1348 1323 INIT_WORK(&queue->io_work, nvme_tcp_io_work); 1349 1324 queue->queue_size = queue_size; 1350 1325 ··· 1570 1543 set->queue_depth = NVME_AQ_MQ_TAG_DEPTH; 1571 1544 set->reserved_tags = 2; /* connect + keep-alive */ 1572 1545 set->numa_node = NUMA_NO_NODE; 1546 + set->flags = BLK_MQ_F_BLOCKING; 1573 1547 set->cmd_size = sizeof(struct nvme_tcp_request); 1574 1548 set->driver_data = ctrl; 1575 1549 set->nr_hw_queues = 1; ··· 1582 1554 set->queue_depth = nctrl->sqsize + 1; 1583 1555 set->reserved_tags = 1; /* fabric connect */ 1584 1556 set->numa_node = NUMA_NO_NODE; 1585 - set->flags = BLK_MQ_F_SHOULD_MERGE; 1557 + set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING; 1586 1558 set->cmd_size = sizeof(struct nvme_tcp_request); 1587 1559 set->driver_data = ctrl; 1588 1560 set->nr_hw_queues = nctrl->queue_count - 1; ··· 2141 2113 ctrl->async_req.curr_bio = NULL; 2142 2114 ctrl->async_req.data_len = 0; 2143 2115 2144 - nvme_tcp_queue_request(&ctrl->async_req); 2116 + nvme_tcp_queue_request(&ctrl->async_req, true); 2145 2117 } 2146 2118 2147 2119 static enum blk_eh_timer_return ··· 2272 2244 2273 2245 blk_mq_start_request(rq); 2274 2246 2275 - nvme_tcp_queue_request(req); 2247 + nvme_tcp_queue_request(req, true); 2276 2248 2277 2249 return BLK_STS_OK; 2278 2250 } ··· 2330 2302 if (!test_bit(NVME_TCP_Q_LIVE, &queue->flags)) 2331 2303 return 0; 2332 2304 2305 + set_bit(NVME_TCP_Q_POLLING, &queue->flags); 2333 2306 if (sk_can_busy_loop(sk) && skb_queue_empty_lockless(&sk->sk_receive_queue)) 2334 2307 sk_busy_loop(sk, true); 2335 2308 nvme_tcp_try_recv(queue); 2309 + clear_bit(NVME_TCP_Q_POLLING, &queue->flags); 2336 2310 return queue->nr_cqe; 2337 2311 } 2338 2312

+1

drivers/nvme/target/Kconfig

··· 4 4 tristate "NVMe Target support" 5 5 depends on BLOCK 6 6 depends on CONFIGFS_FS 7 + select BLK_DEV_INTEGRITY_T10 if BLK_DEV_INTEGRITY 7 8 select SGL_ALLOC 8 9 help 9 10 This enabled target side support for the NVMe protocol, that is

+31 -11

drivers/nvme/target/admin-cmd.c

··· 295 295 296 296 static void nvmet_execute_get_log_page(struct nvmet_req *req) 297 297 { 298 - if (!nvmet_check_data_len(req, nvmet_get_log_page_len(req->cmd))) 298 + if (!nvmet_check_transfer_len(req, nvmet_get_log_page_len(req->cmd))) 299 299 return; 300 300 301 301 switch (req->cmd->get_log_page.lid) { ··· 341 341 { 342 342 struct nvmet_ctrl *ctrl = req->sq->ctrl; 343 343 struct nvme_id_ctrl *id; 344 + u32 cmd_capsule_size; 344 345 u16 status = 0; 345 346 346 347 id = kzalloc(sizeof(*id), GFP_KERNEL); ··· 434 433 435 434 strlcpy(id->subnqn, ctrl->subsys->subsysnqn, sizeof(id->subnqn)); 436 435 437 - /* Max command capsule size is sqe + single page of in-capsule data */ 438 - id->ioccsz = cpu_to_le32((sizeof(struct nvme_command) + 439 - req->port->inline_data_size) / 16); 436 + /* 437 + * Max command capsule size is sqe + in-capsule data size. 438 + * Disable in-capsule data for Metadata capable controllers. 439 + */ 440 + cmd_capsule_size = sizeof(struct nvme_command); 441 + if (!ctrl->pi_support) 442 + cmd_capsule_size += req->port->inline_data_size; 443 + id->ioccsz = cpu_to_le32(cmd_capsule_size / 16); 444 + 440 445 /* Max response capsule size is cqe */ 441 446 id->iorcsz = cpu_to_le32(sizeof(struct nvme_completion) / 16); 442 447 ··· 472 465 473 466 static void nvmet_execute_identify_ns(struct nvmet_req *req) 474 467 { 468 + struct nvmet_ctrl *ctrl = req->sq->ctrl; 475 469 struct nvmet_ns *ns; 476 470 struct nvme_id_ns *id; 477 471 u16 status = 0; ··· 490 482 } 491 483 492 484 /* return an all zeroed buffer if we can't find an active namespace */ 493 - ns = nvmet_find_namespace(req->sq->ctrl, req->cmd->identify.nsid); 485 + ns = nvmet_find_namespace(ctrl, req->cmd->identify.nsid); 494 486 if (!ns) 495 487 goto done; 488 + 489 + nvmet_ns_revalidate(ns); 496 490 497 491 /* 498 492 * nuse = ncap = nsze isn't always true, but we have no way to find ··· 530 520 memcpy(&id->nguid, &ns->nguid, sizeof(id->nguid)); 531 521 532 522 id->lbaf[0].ds = ns->blksize_shift; 523 + 524 + if (ctrl->pi_support && nvmet_ns_has_pi(ns)) { 525 + id->dpc = NVME_NS_DPC_PI_FIRST | NVME_NS_DPC_PI_LAST | 526 + NVME_NS_DPC_PI_TYPE1 | NVME_NS_DPC_PI_TYPE2 | 527 + NVME_NS_DPC_PI_TYPE3; 528 + id->mc = NVME_MC_EXTENDED_LBA; 529 + id->dps = ns->pi_type; 530 + id->flbas = NVME_NS_FLBAS_META_EXT; 531 + id->lbaf[0].ms = cpu_to_le16(ns->metadata_size); 532 + } 533 533 534 534 if (ns->readonly) 535 535 id->nsattr |= (1 << 0); ··· 645 625 646 626 static void nvmet_execute_identify(struct nvmet_req *req) 647 627 { 648 - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) 628 + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) 649 629 return; 650 630 651 631 switch (req->cmd->identify.cns) { ··· 674 654 */ 675 655 static void nvmet_execute_abort(struct nvmet_req *req) 676 656 { 677 - if (!nvmet_check_data_len(req, 0)) 657 + if (!nvmet_check_transfer_len(req, 0)) 678 658 return; 679 659 nvmet_set_result(req, 1); 680 660 nvmet_req_complete(req, 0); ··· 763 743 u16 nsqr; 764 744 u16 ncqr; 765 745 766 - if (!nvmet_check_data_len(req, 0)) 746 + if (!nvmet_check_transfer_len(req, 0)) 767 747 return; 768 748 769 749 switch (cdw10 & 0xff) { ··· 835 815 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 836 816 u16 status = 0; 837 817 838 - if (!nvmet_check_data_len(req, nvmet_feat_data_len(req, cdw10))) 818 + if (!nvmet_check_transfer_len(req, nvmet_feat_data_len(req, cdw10))) 839 819 return; 840 820 841 821 switch (cdw10 & 0xff) { ··· 902 882 { 903 883 struct nvmet_ctrl *ctrl = req->sq->ctrl; 904 884 905 - if (!nvmet_check_data_len(req, 0)) 885 + if (!nvmet_check_transfer_len(req, 0)) 906 886 return; 907 887 908 888 mutex_lock(&ctrl->lock); ··· 921 901 { 922 902 struct nvmet_ctrl *ctrl = req->sq->ctrl; 923 903 924 - if (!nvmet_check_data_len(req, 0)) 904 + if (!nvmet_check_transfer_len(req, 0)) 925 905 return; 926 906 927 907 pr_debug("ctrl %d update keep-alive timer for %d secs\n",

+175 -97

drivers/nvme/target/configfs.c

··· 20 20 static LIST_HEAD(nvmet_ports_list); 21 21 struct list_head *nvmet_ports = &nvmet_ports_list; 22 22 23 - static const struct nvmet_transport_name { 23 + struct nvmet_type_name_map { 24 24 u8 type; 25 25 const char *name; 26 - } nvmet_transport_names[] = { 26 + }; 27 + 28 + static struct nvmet_type_name_map nvmet_transport[] = { 27 29 { NVMF_TRTYPE_RDMA, "rdma" }, 28 30 { NVMF_TRTYPE_FC, "fc" }, 29 31 { NVMF_TRTYPE_TCP, "tcp" }, 30 32 { NVMF_TRTYPE_LOOP, "loop" }, 31 33 }; 32 34 35 + static const struct nvmet_type_name_map nvmet_addr_family[] = { 36 + { NVMF_ADDR_FAMILY_PCI, "pcie" }, 37 + { NVMF_ADDR_FAMILY_IP4, "ipv4" }, 38 + { NVMF_ADDR_FAMILY_IP6, "ipv6" }, 39 + { NVMF_ADDR_FAMILY_IB, "ib" }, 40 + { NVMF_ADDR_FAMILY_FC, "fc" }, 41 + { NVMF_ADDR_FAMILY_LOOP, "loop" }, 42 + }; 43 + 44 + static bool nvmet_is_port_enabled(struct nvmet_port *p, const char *caller) 45 + { 46 + if (p->enabled) 47 + pr_err("Disable port '%u' before changing attribute in %s\n", 48 + le16_to_cpu(p->disc_addr.portid), caller); 49 + return p->enabled; 50 + } 51 + 33 52 /* 34 53 * nvmet_port Generic ConfigFS definitions. 35 54 * Used in any place in the ConfigFS tree that refers to an address. 36 55 */ 37 - static ssize_t nvmet_addr_adrfam_show(struct config_item *item, 38 - char *page) 56 + static ssize_t nvmet_addr_adrfam_show(struct config_item *item, char *page) 39 57 { 40 - switch (to_nvmet_port(item)->disc_addr.adrfam) { 41 - case NVMF_ADDR_FAMILY_IP4: 42 - return sprintf(page, "ipv4\n"); 43 - case NVMF_ADDR_FAMILY_IP6: 44 - return sprintf(page, "ipv6\n"); 45 - case NVMF_ADDR_FAMILY_IB: 46 - return sprintf(page, "ib\n"); 47 - case NVMF_ADDR_FAMILY_FC: 48 - return sprintf(page, "fc\n"); 49 - default: 50 - return sprintf(page, "\n"); 58 + u8 adrfam = to_nvmet_port(item)->disc_addr.adrfam; 59 + int i; 60 + 61 + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { 62 + if (nvmet_addr_family[i].type == adrfam) 63 + return sprintf(page, "%s\n", nvmet_addr_family[i].name); 51 64 } 65 + 66 + return sprintf(page, "\n"); 52 67 } 53 68 54 69 static ssize_t nvmet_addr_adrfam_store(struct config_item *item, 55 70 const char *page, size_t count) 56 71 { 57 72 struct nvmet_port *port = to_nvmet_port(item); 73 + int i; 58 74 59 - if (port->enabled) { 60 - pr_err("Cannot modify address while enabled\n"); 61 - pr_err("Disable the address before modifying\n"); 75 + if (nvmet_is_port_enabled(port, __func__)) 62 76 return -EACCES; 77 + 78 + for (i = 1; i < ARRAY_SIZE(nvmet_addr_family); i++) { 79 + if (sysfs_streq(page, nvmet_addr_family[i].name)) 80 + goto found; 63 81 } 64 82 65 - if (sysfs_streq(page, "ipv4")) { 66 - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP4; 67 - } else if (sysfs_streq(page, "ipv6")) { 68 - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IP6; 69 - } else if (sysfs_streq(page, "ib")) { 70 - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_IB; 71 - } else if (sysfs_streq(page, "fc")) { 72 - port->disc_addr.adrfam = NVMF_ADDR_FAMILY_FC; 73 - } else { 74 - pr_err("Invalid value '%s' for adrfam\n", page); 75 - return -EINVAL; 76 - } 83 + pr_err("Invalid value '%s' for adrfam\n", page); 84 + return -EINVAL; 77 85 86 + found: 87 + port->disc_addr.adrfam = nvmet_addr_family[i].type; 78 88 return count; 79 89 } 80 90 ··· 110 100 return -EINVAL; 111 101 } 112 102 113 - if (port->enabled) { 114 - pr_err("Cannot modify address while enabled\n"); 115 - pr_err("Disable the address before modifying\n"); 103 + if (nvmet_is_port_enabled(port, __func__)) 116 104 return -EACCES; 117 - } 105 + 118 106 port->disc_addr.portid = cpu_to_le16(portid); 119 107 return count; 120 108 } ··· 138 130 return -EINVAL; 139 131 } 140 132 141 - if (port->enabled) { 142 - pr_err("Cannot modify address while enabled\n"); 143 - pr_err("Disable the address before modifying\n"); 133 + if (nvmet_is_port_enabled(port, __func__)) 144 134 return -EACCES; 145 - } 146 135 147 136 if (sscanf(page, "%s\n", port->disc_addr.traddr) != 1) 148 137 return -EINVAL; ··· 148 143 149 144 CONFIGFS_ATTR(nvmet_, addr_traddr); 150 145 151 - static ssize_t nvmet_addr_treq_show(struct config_item *item, 152 - char *page) 146 + static const struct nvmet_type_name_map nvmet_addr_treq[] = { 147 + { NVMF_TREQ_NOT_SPECIFIED, "not specified" }, 148 + { NVMF_TREQ_REQUIRED, "required" }, 149 + { NVMF_TREQ_NOT_REQUIRED, "not required" }, 150 + }; 151 + 152 + static ssize_t nvmet_addr_treq_show(struct config_item *item, char *page) 153 153 { 154 - switch (to_nvmet_port(item)->disc_addr.treq & 155 - NVME_TREQ_SECURE_CHANNEL_MASK) { 156 - case NVMF_TREQ_NOT_SPECIFIED: 157 - return sprintf(page, "not specified\n"); 158 - case NVMF_TREQ_REQUIRED: 159 - return sprintf(page, "required\n"); 160 - case NVMF_TREQ_NOT_REQUIRED: 161 - return sprintf(page, "not required\n"); 162 - default: 163 - return sprintf(page, "\n"); 154 + u8 treq = to_nvmet_port(item)->disc_addr.treq & 155 + NVME_TREQ_SECURE_CHANNEL_MASK; 156 + int i; 157 + 158 + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { 159 + if (treq == nvmet_addr_treq[i].type) 160 + return sprintf(page, "%s\n", nvmet_addr_treq[i].name); 164 161 } 162 + 163 + return sprintf(page, "\n"); 165 164 } 166 165 167 166 static ssize_t nvmet_addr_treq_store(struct config_item *item, ··· 173 164 { 174 165 struct nvmet_port *port = to_nvmet_port(item); 175 166 u8 treq = port->disc_addr.treq & ~NVME_TREQ_SECURE_CHANNEL_MASK; 167 + int i; 176 168 177 - if (port->enabled) { 178 - pr_err("Cannot modify address while enabled\n"); 179 - pr_err("Disable the address before modifying\n"); 169 + if (nvmet_is_port_enabled(port, __func__)) 180 170 return -EACCES; 171 + 172 + for (i = 0; i < ARRAY_SIZE(nvmet_addr_treq); i++) { 173 + if (sysfs_streq(page, nvmet_addr_treq[i].name)) 174 + goto found; 181 175 } 182 176 183 - if (sysfs_streq(page, "not specified")) { 184 - treq |= NVMF_TREQ_NOT_SPECIFIED; 185 - } else if (sysfs_streq(page, "required")) { 186 - treq |= NVMF_TREQ_REQUIRED; 187 - } else if (sysfs_streq(page, "not required")) { 188 - treq |= NVMF_TREQ_NOT_REQUIRED; 189 - } else { 190 - pr_err("Invalid value '%s' for treq\n", page); 191 - return -EINVAL; 192 - } 177 + pr_err("Invalid value '%s' for treq\n", page); 178 + return -EINVAL; 179 + 180 + found: 181 + treq |= nvmet_addr_treq[i].type; 193 182 port->disc_addr.treq = treq; 194 - 195 183 return count; 196 184 } 197 185 ··· 212 206 pr_err("Invalid value '%s' for trsvcid\n", page); 213 207 return -EINVAL; 214 208 } 215 - if (port->enabled) { 216 - pr_err("Cannot modify address while enabled\n"); 217 - pr_err("Disable the address before modifying\n"); 209 + if (nvmet_is_port_enabled(port, __func__)) 218 210 return -EACCES; 219 - } 220 211 221 212 if (sscanf(page, "%s\n", port->disc_addr.trsvcid) != 1) 222 213 return -EINVAL; ··· 236 233 struct nvmet_port *port = to_nvmet_port(item); 237 234 int ret; 238 235 239 - if (port->enabled) { 240 - pr_err("Cannot modify inline_data_size while port enabled\n"); 241 - pr_err("Disable the port before modifying\n"); 236 + if (nvmet_is_port_enabled(port, __func__)) 242 237 return -EACCES; 243 - } 244 238 ret = kstrtoint(page, 0, &port->inline_data_size); 245 239 if (ret) { 246 240 pr_err("Invalid value '%s' for inline_data_size\n", page); ··· 248 248 249 249 CONFIGFS_ATTR(nvmet_, param_inline_data_size); 250 250 251 + #ifdef CONFIG_BLK_DEV_INTEGRITY 252 + static ssize_t nvmet_param_pi_enable_show(struct config_item *item, 253 + char *page) 254 + { 255 + struct nvmet_port *port = to_nvmet_port(item); 256 + 257 + return snprintf(page, PAGE_SIZE, "%d\n", port->pi_enable); 258 + } 259 + 260 + static ssize_t nvmet_param_pi_enable_store(struct config_item *item, 261 + const char *page, size_t count) 262 + { 263 + struct nvmet_port *port = to_nvmet_port(item); 264 + bool val; 265 + 266 + if (strtobool(page, &val)) 267 + return -EINVAL; 268 + 269 + if (port->enabled) { 270 + pr_err("Disable port before setting pi_enable value.\n"); 271 + return -EACCES; 272 + } 273 + 274 + port->pi_enable = val; 275 + return count; 276 + } 277 + 278 + CONFIGFS_ATTR(nvmet_, param_pi_enable); 279 + #endif 280 + 251 281 static ssize_t nvmet_addr_trtype_show(struct config_item *item, 252 282 char *page) 253 283 { 254 284 struct nvmet_port *port = to_nvmet_port(item); 255 285 int i; 256 286 257 - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { 258 - if (port->disc_addr.trtype != nvmet_transport_names[i].type) 259 - continue; 260 - return sprintf(page, "%s\n", nvmet_transport_names[i].name); 287 + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { 288 + if (port->disc_addr.trtype == nvmet_transport[i].type) 289 + return sprintf(page, "%s\n", nvmet_transport[i].name); 261 290 } 262 291 263 292 return sprintf(page, "\n"); ··· 305 276 struct nvmet_port *port = to_nvmet_port(item); 306 277 int i; 307 278 308 - if (port->enabled) { 309 - pr_err("Cannot modify address while enabled\n"); 310 - pr_err("Disable the address before modifying\n"); 279 + if (nvmet_is_port_enabled(port, __func__)) 311 280 return -EACCES; 312 - } 313 281 314 - for (i = 0; i < ARRAY_SIZE(nvmet_transport_names); i++) { 315 - if (sysfs_streq(page, nvmet_transport_names[i].name)) 282 + for (i = 0; i < ARRAY_SIZE(nvmet_transport); i++) { 283 + if (sysfs_streq(page, nvmet_transport[i].name)) 316 284 goto found; 317 285 } 318 286 319 287 pr_err("Invalid value '%s' for trtype\n", page); 320 288 return -EINVAL; 289 + 321 290 found: 322 291 memset(&port->disc_addr.tsas, 0, NVMF_TSAS_SIZE); 323 - port->disc_addr.trtype = nvmet_transport_names[i].type; 292 + port->disc_addr.trtype = nvmet_transport[i].type; 324 293 if (port->disc_addr.trtype == NVMF_TRTYPE_RDMA) 325 294 nvmet_port_init_tsas_rdma(port); 326 295 return count; ··· 354 327 355 328 kfree(ns->device_path); 356 329 ret = -ENOMEM; 357 - ns->device_path = kstrndup(page, len, GFP_KERNEL); 330 + ns->device_path = kmemdup_nul(page, len, GFP_KERNEL); 358 331 if (!ns->device_path) 359 332 goto out_unlock; 360 333 ··· 570 543 571 544 CONFIGFS_ATTR(nvmet_ns_, buffered_io); 572 545 546 + static ssize_t nvmet_ns_revalidate_size_store(struct config_item *item, 547 + const char *page, size_t count) 548 + { 549 + struct nvmet_ns *ns = to_nvmet_ns(item); 550 + bool val; 551 + 552 + if (strtobool(page, &val)) 553 + return -EINVAL; 554 + 555 + if (!val) 556 + return -EINVAL; 557 + 558 + mutex_lock(&ns->subsys->lock); 559 + if (!ns->enabled) { 560 + pr_err("enable ns before revalidate.\n"); 561 + mutex_unlock(&ns->subsys->lock); 562 + return -EINVAL; 563 + } 564 + nvmet_ns_revalidate(ns); 565 + mutex_unlock(&ns->subsys->lock); 566 + return count; 567 + } 568 + 569 + CONFIGFS_ATTR_WO(nvmet_ns_, revalidate_size); 570 + 573 571 static struct configfs_attribute *nvmet_ns_attrs[] = { 574 572 &nvmet_ns_attr_device_path, 575 573 &nvmet_ns_attr_device_nguid, ··· 602 550 &nvmet_ns_attr_ana_grpid, 603 551 &nvmet_ns_attr_enable, 604 552 &nvmet_ns_attr_buffered_io, 553 + &nvmet_ns_attr_revalidate_size, 605 554 #ifdef CONFIG_PCI_P2PDMA 606 555 &nvmet_ns_attr_p2pmem, 607 556 #endif ··· 1016 963 return -EINVAL; 1017 964 } 1018 965 1019 - new_model_number = kstrndup(page, len, GFP_KERNEL); 966 + new_model_number = kmemdup_nul(page, len, GFP_KERNEL); 1020 967 if (!new_model_number) 1021 968 return -ENOMEM; 1022 969 ··· 1040 987 } 1041 988 CONFIGFS_ATTR(nvmet_subsys_, attr_model); 1042 989 990 + #ifdef CONFIG_BLK_DEV_INTEGRITY 991 + static ssize_t nvmet_subsys_attr_pi_enable_show(struct config_item *item, 992 + char *page) 993 + { 994 + return snprintf(page, PAGE_SIZE, "%d\n", to_subsys(item)->pi_support); 995 + } 996 + 997 + static ssize_t nvmet_subsys_attr_pi_enable_store(struct config_item *item, 998 + const char *page, size_t count) 999 + { 1000 + struct nvmet_subsys *subsys = to_subsys(item); 1001 + bool pi_enable; 1002 + 1003 + if (strtobool(page, &pi_enable)) 1004 + return -EINVAL; 1005 + 1006 + subsys->pi_support = pi_enable; 1007 + return count; 1008 + } 1009 + CONFIGFS_ATTR(nvmet_subsys_, attr_pi_enable); 1010 + #endif 1011 + 1043 1012 static struct configfs_attribute *nvmet_subsys_attrs[] = { 1044 1013 &nvmet_subsys_attr_attr_allow_any_host, 1045 1014 &nvmet_subsys_attr_attr_version, ··· 1069 994 &nvmet_subsys_attr_attr_cntlid_min, 1070 995 &nvmet_subsys_attr_attr_cntlid_max, 1071 996 &nvmet_subsys_attr_attr_model, 997 + #ifdef CONFIG_BLK_DEV_INTEGRITY 998 + &nvmet_subsys_attr_attr_pi_enable, 999 + #endif 1072 1000 NULL, 1073 1001 }; 1074 1002 ··· 1227 1149 .ct_group_ops = &nvmet_referral_group_ops, 1228 1150 }; 1229 1151 1230 - static struct { 1231 - enum nvme_ana_state state; 1232 - const char *name; 1233 - } nvmet_ana_state_names[] = { 1152 + static struct nvmet_type_name_map nvmet_ana_state[] = { 1234 1153 { NVME_ANA_OPTIMIZED, "optimized" }, 1235 1154 { NVME_ANA_NONOPTIMIZED, "non-optimized" }, 1236 1155 { NVME_ANA_INACCESSIBLE, "inaccessible" }, ··· 1242 1167 enum nvme_ana_state state = grp->port->ana_state[grp->grpid]; 1243 1168 int i; 1244 1169 1245 - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { 1246 - if (state != nvmet_ana_state_names[i].state) 1247 - continue; 1248 - return sprintf(page, "%s\n", nvmet_ana_state_names[i].name); 1170 + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { 1171 + if (state == nvmet_ana_state[i].type) 1172 + return sprintf(page, "%s\n", nvmet_ana_state[i].name); 1249 1173 } 1250 1174 1251 1175 return sprintf(page, "\n"); ··· 1254 1180 const char *page, size_t count) 1255 1181 { 1256 1182 struct nvmet_ana_group *grp = to_ana_group(item); 1183 + enum nvme_ana_state *ana_state = grp->port->ana_state; 1257 1184 int i; 1258 1185 1259 - for (i = 0; i < ARRAY_SIZE(nvmet_ana_state_names); i++) { 1260 - if (sysfs_streq(page, nvmet_ana_state_names[i].name)) 1186 + for (i = 0; i < ARRAY_SIZE(nvmet_ana_state); i++) { 1187 + if (sysfs_streq(page, nvmet_ana_state[i].name)) 1261 1188 goto found; 1262 1189 } 1263 1190 ··· 1267 1192 1268 1193 found: 1269 1194 down_write(&nvmet_ana_sem); 1270 - grp->port->ana_state[grp->grpid] = nvmet_ana_state_names[i].state; 1195 + ana_state[grp->grpid] = (enum nvme_ana_state) nvmet_ana_state[i].type; 1271 1196 nvmet_ana_chgcnt++; 1272 1197 up_write(&nvmet_ana_sem); 1273 - 1274 1198 nvmet_port_send_ana_event(grp->port); 1275 1199 return count; 1276 1200 } ··· 1371 1297 &nvmet_attr_addr_trsvcid, 1372 1298 &nvmet_attr_addr_trtype, 1373 1299 &nvmet_attr_param_inline_data_size, 1300 + #ifdef CONFIG_BLK_DEV_INTEGRITY 1301 + &nvmet_attr_param_pi_enable, 1302 + #endif 1374 1303 NULL, 1375 1304 }; 1376 1305 ··· 1423 1346 port->inline_data_size = -1; /* < 0 == let the transport choose */ 1424 1347 1425 1348 port->disc_addr.portid = cpu_to_le16(portid); 1349 + port->disc_addr.adrfam = NVMF_ADDR_FAMILY_MAX; 1426 1350 port->disc_addr.treq = NVMF_TREQ_DISABLE_SQFLOW; 1427 1351 config_group_init_type_name(&port->group, name, &nvmet_port_type); 1428 1352

+115 -51

drivers/nvme/target/core.c

··· 134 134 struct nvmet_async_event *aen; 135 135 struct nvmet_req *req; 136 136 137 - while (1) { 138 - mutex_lock(&ctrl->lock); 139 - aen = list_first_entry_or_null(&ctrl->async_events, 140 - struct nvmet_async_event, entry); 141 - if (!aen || !ctrl->nr_async_event_cmds) { 142 - mutex_unlock(&ctrl->lock); 143 - break; 144 - } 145 - 137 + mutex_lock(&ctrl->lock); 138 + while (ctrl->nr_async_event_cmds && !list_empty(&ctrl->async_events)) { 139 + aen = list_first_entry(&ctrl->async_events, 140 + struct nvmet_async_event, entry); 146 141 req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 147 142 if (status == 0) 148 143 nvmet_set_result(req, nvmet_async_event_result(aen)); ··· 146 151 kfree(aen); 147 152 148 153 mutex_unlock(&ctrl->lock); 154 + trace_nvmet_async_event(ctrl, req->cqe->result.u32); 149 155 nvmet_req_complete(req, status); 156 + mutex_lock(&ctrl->lock); 150 157 } 158 + mutex_unlock(&ctrl->lock); 151 159 } 152 160 153 161 static void nvmet_async_events_free(struct nvmet_ctrl *ctrl) 154 162 { 155 - struct nvmet_req *req; 163 + struct nvmet_async_event *aen, *tmp; 156 164 157 165 mutex_lock(&ctrl->lock); 158 - while (ctrl->nr_async_event_cmds) { 159 - req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds]; 160 - mutex_unlock(&ctrl->lock); 161 - nvmet_req_complete(req, NVME_SC_INTERNAL | NVME_SC_DNR); 162 - mutex_lock(&ctrl->lock); 166 + list_for_each_entry_safe(aen, tmp, &ctrl->async_events, entry) { 167 + list_del(&aen->entry); 168 + kfree(aen); 163 169 } 164 170 mutex_unlock(&ctrl->lock); 165 171 } ··· 318 322 if (!try_module_get(ops->owner)) 319 323 return -EINVAL; 320 324 321 - ret = ops->add_port(port); 322 - if (ret) { 323 - module_put(ops->owner); 324 - return ret; 325 + /* 326 + * If the user requested PI support and the transport isn't pi capable, 327 + * don't enable the port. 328 + */ 329 + if (port->pi_enable && !ops->metadata_support) { 330 + pr_err("T10-PI is not supported by transport type %d\n", 331 + port->disc_addr.trtype); 332 + ret = -EINVAL; 333 + goto out_put; 325 334 } 335 + 336 + ret = ops->add_port(port); 337 + if (ret) 338 + goto out_put; 326 339 327 340 /* If the transport didn't set inline_data_size, then disable it. */ 328 341 if (port->inline_data_size < 0) ··· 340 335 port->enabled = true; 341 336 port->tr_ops = ops; 342 337 return 0; 338 + 339 + out_put: 340 + module_put(ops->owner); 341 + return ret; 343 342 } 344 343 345 344 void nvmet_disable_port(struct nvmet_port *port) ··· 521 512 522 513 pr_info("using p2pmem on %s for nsid %d\n", pci_name(p2p_dev), 523 514 ns->nsid); 515 + } 516 + 517 + void nvmet_ns_revalidate(struct nvmet_ns *ns) 518 + { 519 + loff_t oldsize = ns->size; 520 + 521 + if (ns->bdev) 522 + nvmet_bdev_ns_revalidate(ns); 523 + else 524 + nvmet_file_ns_revalidate(ns); 525 + 526 + if (oldsize != ns->size) 527 + nvmet_ns_changed(ns->subsys, ns->nsid); 524 528 } 525 529 526 530 int nvmet_ns_enable(struct nvmet_ns *ns) ··· 786 764 * If this is the admin queue, complete all AERs so that our 787 765 * queue doesn't have outstanding requests on it. 788 766 */ 789 - if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) { 767 + if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq) 790 768 nvmet_async_events_process(ctrl, status); 791 - nvmet_async_events_free(ctrl); 792 - } 793 769 percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq); 794 770 wait_for_completion(&sq->confirm_done); 795 771 wait_for_completion(&sq->free_done); ··· 893 873 req->sq = sq; 894 874 req->ops = ops; 895 875 req->sg = NULL; 876 + req->metadata_sg = NULL; 896 877 req->sg_cnt = 0; 878 + req->metadata_sg_cnt = 0; 897 879 req->transfer_len = 0; 880 + req->metadata_len = 0; 898 881 req->cqe->status = 0; 899 882 req->cqe->sq_head = 0; 900 883 req->ns = NULL; ··· 959 936 } 960 937 EXPORT_SYMBOL_GPL(nvmet_req_uninit); 961 938 962 - bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len) 939 + bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len) 963 940 { 964 - if (unlikely(data_len != req->transfer_len)) { 941 + if (unlikely(len != req->transfer_len)) { 965 942 req->error_loc = offsetof(struct nvme_common_command, dptr); 966 943 nvmet_req_complete(req, NVME_SC_SGL_INVALID_DATA | NVME_SC_DNR); 967 944 return false; ··· 969 946 970 947 return true; 971 948 } 972 - EXPORT_SYMBOL_GPL(nvmet_check_data_len); 949 + EXPORT_SYMBOL_GPL(nvmet_check_transfer_len); 973 950 974 951 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len) 975 952 { ··· 982 959 return true; 983 960 } 984 961 985 - int nvmet_req_alloc_sgl(struct nvmet_req *req) 962 + static unsigned int nvmet_data_transfer_len(struct nvmet_req *req) 986 963 { 987 - struct pci_dev *p2p_dev = NULL; 964 + return req->transfer_len - req->metadata_len; 965 + } 988 966 989 - if (IS_ENABLED(CONFIG_PCI_P2PDMA)) { 990 - if (req->sq->ctrl && req->ns) 991 - p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, 992 - req->ns->nsid); 967 + static int nvmet_req_alloc_p2pmem_sgls(struct nvmet_req *req) 968 + { 969 + req->sg = pci_p2pmem_alloc_sgl(req->p2p_dev, &req->sg_cnt, 970 + nvmet_data_transfer_len(req)); 971 + if (!req->sg) 972 + goto out_err; 993 973 994 - req->p2p_dev = NULL; 995 - if (req->sq->qid && p2p_dev) { 996 - req->sg = pci_p2pmem_alloc_sgl(p2p_dev, &req->sg_cnt, 997 - req->transfer_len); 998 - if (req->sg) { 999 - req->p2p_dev = p2p_dev; 1000 - return 0; 1001 - } 1002 - } 974 + if (req->metadata_len) { 975 + req->metadata_sg = pci_p2pmem_alloc_sgl(req->p2p_dev, 976 + &req->metadata_sg_cnt, req->metadata_len); 977 + if (!req->metadata_sg) 978 + goto out_free_sg; 979 + } 980 + return 0; 981 + out_free_sg: 982 + pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 983 + out_err: 984 + return -ENOMEM; 985 + } 1003 986 1004 - /* 1005 - * If no P2P memory was available we fallback to using 1006 - * regular memory 1007 - */ 987 + static bool nvmet_req_find_p2p_dev(struct nvmet_req *req) 988 + { 989 + if (!IS_ENABLED(CONFIG_PCI_P2PDMA)) 990 + return false; 991 + 992 + if (req->sq->ctrl && req->sq->qid && req->ns) { 993 + req->p2p_dev = radix_tree_lookup(&req->sq->ctrl->p2p_ns_map, 994 + req->ns->nsid); 995 + if (req->p2p_dev) 996 + return true; 1008 997 } 1009 998 1010 - req->sg = sgl_alloc(req->transfer_len, GFP_KERNEL, &req->sg_cnt); 999 + req->p2p_dev = NULL; 1000 + return false; 1001 + } 1002 + 1003 + int nvmet_req_alloc_sgls(struct nvmet_req *req) 1004 + { 1005 + if (nvmet_req_find_p2p_dev(req) && !nvmet_req_alloc_p2pmem_sgls(req)) 1006 + return 0; 1007 + 1008 + req->sg = sgl_alloc(nvmet_data_transfer_len(req), GFP_KERNEL, 1009 + &req->sg_cnt); 1011 1010 if (unlikely(!req->sg)) 1012 - return -ENOMEM; 1011 + goto out; 1012 + 1013 + if (req->metadata_len) { 1014 + req->metadata_sg = sgl_alloc(req->metadata_len, GFP_KERNEL, 1015 + &req->metadata_sg_cnt); 1016 + if (unlikely(!req->metadata_sg)) 1017 + goto out_free; 1018 + } 1013 1019 1014 1020 return 0; 1021 + out_free: 1022 + sgl_free(req->sg); 1023 + out: 1024 + return -ENOMEM; 1015 1025 } 1016 - EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgl); 1026 + EXPORT_SYMBOL_GPL(nvmet_req_alloc_sgls); 1017 1027 1018 - void nvmet_req_free_sgl(struct nvmet_req *req) 1028 + void nvmet_req_free_sgls(struct nvmet_req *req) 1019 1029 { 1020 - if (req->p2p_dev) 1030 + if (req->p2p_dev) { 1021 1031 pci_p2pmem_free_sgl(req->p2p_dev, req->sg); 1022 - else 1032 + if (req->metadata_sg) 1033 + pci_p2pmem_free_sgl(req->p2p_dev, req->metadata_sg); 1034 + } else { 1023 1035 sgl_free(req->sg); 1036 + if (req->metadata_sg) 1037 + sgl_free(req->metadata_sg); 1038 + } 1024 1039 1025 1040 req->sg = NULL; 1041 + req->metadata_sg = NULL; 1026 1042 req->sg_cnt = 0; 1043 + req->metadata_sg_cnt = 0; 1027 1044 } 1028 - EXPORT_SYMBOL_GPL(nvmet_req_free_sgl); 1045 + EXPORT_SYMBOL_GPL(nvmet_req_free_sgls); 1029 1046 1030 1047 static inline bool nvmet_cc_en(u32 cc) 1031 1048 { ··· 1420 1357 1421 1358 ida_simple_remove(&cntlid_ida, ctrl->cntlid); 1422 1359 1360 + nvmet_async_events_free(ctrl); 1423 1361 kfree(ctrl->sqs); 1424 1362 kfree(ctrl->cqs); 1425 1363 kfree(ctrl->changed_ns_list);

+4 -4

drivers/nvme/target/discovery.c

··· 171 171 u16 status = 0; 172 172 void *buffer; 173 173 174 - if (!nvmet_check_data_len(req, data_len)) 174 + if (!nvmet_check_transfer_len(req, data_len)) 175 175 return; 176 176 177 177 if (req->cmd->get_log_page.lid != NVME_LOG_DISC) { ··· 244 244 const char model[] = "Linux"; 245 245 u16 status = 0; 246 246 247 - if (!nvmet_check_data_len(req, NVME_IDENTIFY_DATA_SIZE)) 247 + if (!nvmet_check_transfer_len(req, NVME_IDENTIFY_DATA_SIZE)) 248 248 return; 249 249 250 250 if (req->cmd->identify.cns != NVME_ID_CNS_CTRL) { ··· 298 298 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 299 299 u16 stat; 300 300 301 - if (!nvmet_check_data_len(req, 0)) 301 + if (!nvmet_check_transfer_len(req, 0)) 302 302 return; 303 303 304 304 switch (cdw10 & 0xff) { ··· 324 324 u32 cdw10 = le32_to_cpu(req->cmd->common.cdw10); 325 325 u16 stat = 0; 326 326 327 - if (!nvmet_check_data_len(req, 0)) 327 + if (!nvmet_check_transfer_len(req, 0)) 328 328 return; 329 329 330 330 switch (cdw10 & 0xff) {

+9 -6

drivers/nvme/target/fabrics-cmd.c

··· 12 12 u64 val = le64_to_cpu(req->cmd->prop_set.value); 13 13 u16 status = 0; 14 14 15 - if (!nvmet_check_data_len(req, 0)) 15 + if (!nvmet_check_transfer_len(req, 0)) 16 16 return; 17 17 18 18 if (req->cmd->prop_set.attrib & 1) { ··· 41 41 u16 status = 0; 42 42 u64 val = 0; 43 43 44 - if (!nvmet_check_data_len(req, 0)) 44 + if (!nvmet_check_transfer_len(req, 0)) 45 45 return; 46 46 47 47 if (req->cmd->prop_get.attrib & 1) { ··· 156 156 struct nvmet_ctrl *ctrl = NULL; 157 157 u16 status = 0; 158 158 159 - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) 159 + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) 160 160 return; 161 161 162 162 d = kmalloc(sizeof(*d), GFP_KERNEL); ··· 197 197 goto out; 198 198 } 199 199 200 + ctrl->pi_support = ctrl->port->pi_enable && ctrl->subsys->pi_support; 201 + 200 202 uuid_copy(&ctrl->hostid, &d->hostid); 201 203 202 204 status = nvmet_install_queue(ctrl, req); ··· 207 205 goto out; 208 206 } 209 207 210 - pr_info("creating controller %d for subsystem %s for NQN %s.\n", 211 - ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn); 208 + pr_info("creating controller %d for subsystem %s for NQN %s%s.\n", 209 + ctrl->cntlid, ctrl->subsys->subsysnqn, ctrl->hostnqn, 210 + ctrl->pi_support ? " T10-PI is enabled" : ""); 212 211 req->cqe->result.u16 = cpu_to_le16(ctrl->cntlid); 213 212 214 213 out: ··· 226 223 u16 qid = le16_to_cpu(c->qid); 227 224 u16 status = 0; 228 225 229 - if (!nvmet_check_data_len(req, sizeof(struct nvmf_connect_data))) 226 + if (!nvmet_check_transfer_len(req, sizeof(struct nvmf_connect_data))) 230 227 return; 231 228 232 229 d = kmalloc(sizeof(*d), GFP_KERNEL);

+584 -219

drivers/nvme/target/fc.c

··· 14 14 #include "nvmet.h" 15 15 #include <linux/nvme-fc-driver.h> 16 16 #include <linux/nvme-fc.h> 17 + #include "../host/fc.h" 17 18 18 19 19 20 /* *************************** Data Structures/Defines ****************** */ ··· 22 21 23 22 #define NVMET_LS_CTX_COUNT 256 24 23 25 - /* for this implementation, assume small single frame rqst/rsp */ 26 - #define NVME_FC_MAX_LS_BUFFER_SIZE 2048 27 - 28 24 struct nvmet_fc_tgtport; 29 25 struct nvmet_fc_tgt_assoc; 30 26 31 - struct nvmet_fc_ls_iod { 32 - struct nvmefc_tgt_ls_req *lsreq; 27 + struct nvmet_fc_ls_iod { /* for an LS RQST RCV */ 28 + struct nvmefc_ls_rsp *lsrsp; 33 29 struct nvmefc_tgt_fcp_req *fcpreq; /* only if RS */ 34 30 35 - struct list_head ls_list; /* tgtport->ls_list */ 31 + struct list_head ls_rcv_list; /* tgtport->ls_rcv_list */ 36 32 37 33 struct nvmet_fc_tgtport *tgtport; 38 34 struct nvmet_fc_tgt_assoc *assoc; 35 + void *hosthandle; 39 36 40 - u8 *rqstbuf; 41 - u8 *rspbuf; 37 + union nvmefc_ls_requests *rqstbuf; 38 + union nvmefc_ls_responses *rspbuf; 42 39 u16 rqstdatalen; 43 40 dma_addr_t rspdma; 44 41 ··· 44 45 45 46 struct work_struct work; 46 47 } __aligned(sizeof(unsigned long long)); 48 + 49 + struct nvmet_fc_ls_req_op { /* for an LS RQST XMT */ 50 + struct nvmefc_ls_req ls_req; 51 + 52 + struct nvmet_fc_tgtport *tgtport; 53 + void *hosthandle; 54 + 55 + int ls_error; 56 + struct list_head lsreq_list; /* tgtport->ls_req_list */ 57 + bool req_queued; 58 + }; 59 + 47 60 48 61 /* desired maximum for a single sequence - if sg list allows it */ 49 62 #define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) ··· 94 83 }; 95 84 96 85 struct nvmet_fc_tgtport { 97 - 98 86 struct nvmet_fc_target_port fc_target_port; 99 87 100 88 struct list_head tgt_list; /* nvmet_fc_target_list */ ··· 102 92 103 93 struct nvmet_fc_ls_iod *iod; 104 94 spinlock_t lock; 105 - struct list_head ls_list; 95 + struct list_head ls_rcv_list; 96 + struct list_head ls_req_list; 106 97 struct list_head ls_busylist; 107 98 struct list_head assoc_list; 99 + struct list_head host_list; 108 100 struct ida assoc_cnt; 109 101 struct nvmet_fc_port_entry *pe; 110 102 struct kref ref; ··· 148 136 struct nvmet_fc_fcp_iod fod[]; /* array of fcp_iods */ 149 137 } __aligned(sizeof(unsigned long long)); 150 138 139 + struct nvmet_fc_hostport { 140 + struct nvmet_fc_tgtport *tgtport; 141 + void *hosthandle; 142 + struct list_head host_list; 143 + struct kref ref; 144 + u8 invalid; 145 + }; 146 + 151 147 struct nvmet_fc_tgt_assoc { 152 148 u64 association_id; 153 149 u32 a_id; 150 + atomic_t terminating; 154 151 struct nvmet_fc_tgtport *tgtport; 152 + struct nvmet_fc_hostport *hostport; 153 + struct nvmet_fc_ls_iod *rcv_disconn; 155 154 struct list_head a_list; 156 155 struct nvmet_fc_tgt_queue *queues[NVMET_NR_QUEUES + 1]; 157 156 struct kref ref; 158 157 struct work_struct del_work; 158 + atomic_t del_work_active; 159 159 }; 160 160 161 161 ··· 251 227 static void nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, 252 228 struct nvmet_fc_fcp_iod *fod); 253 229 static void nvmet_fc_delete_target_assoc(struct nvmet_fc_tgt_assoc *assoc); 230 + static void nvmet_fc_xmt_ls_rsp(struct nvmet_fc_tgtport *tgtport, 231 + struct nvmet_fc_ls_iod *iod); 254 232 255 233 256 234 /* *********************** FC-NVME DMA Handling **************************** */ ··· 344 318 } 345 319 346 320 321 + /* ********************** FC-NVME LS XMT Handling ************************* */ 322 + 323 + 324 + static void 325 + __nvmet_fc_finish_ls_req(struct nvmet_fc_ls_req_op *lsop) 326 + { 327 + struct nvmet_fc_tgtport *tgtport = lsop->tgtport; 328 + struct nvmefc_ls_req *lsreq = &lsop->ls_req; 329 + unsigned long flags; 330 + 331 + spin_lock_irqsave(&tgtport->lock, flags); 332 + 333 + if (!lsop->req_queued) { 334 + spin_unlock_irqrestore(&tgtport->lock, flags); 335 + return; 336 + } 337 + 338 + list_del(&lsop->lsreq_list); 339 + 340 + lsop->req_queued = false; 341 + 342 + spin_unlock_irqrestore(&tgtport->lock, flags); 343 + 344 + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, 345 + (lsreq->rqstlen + lsreq->rsplen), 346 + DMA_BIDIRECTIONAL); 347 + 348 + nvmet_fc_tgtport_put(tgtport); 349 + } 350 + 351 + static int 352 + __nvmet_fc_send_ls_req(struct nvmet_fc_tgtport *tgtport, 353 + struct nvmet_fc_ls_req_op *lsop, 354 + void (*done)(struct nvmefc_ls_req *req, int status)) 355 + { 356 + struct nvmefc_ls_req *lsreq = &lsop->ls_req; 357 + unsigned long flags; 358 + int ret = 0; 359 + 360 + if (!tgtport->ops->ls_req) 361 + return -EOPNOTSUPP; 362 + 363 + if (!nvmet_fc_tgtport_get(tgtport)) 364 + return -ESHUTDOWN; 365 + 366 + lsreq->done = done; 367 + lsop->req_queued = false; 368 + INIT_LIST_HEAD(&lsop->lsreq_list); 369 + 370 + lsreq->rqstdma = fc_dma_map_single(tgtport->dev, lsreq->rqstaddr, 371 + lsreq->rqstlen + lsreq->rsplen, 372 + DMA_BIDIRECTIONAL); 373 + if (fc_dma_mapping_error(tgtport->dev, lsreq->rqstdma)) { 374 + ret = -EFAULT; 375 + goto out_puttgtport; 376 + } 377 + lsreq->rspdma = lsreq->rqstdma + lsreq->rqstlen; 378 + 379 + spin_lock_irqsave(&tgtport->lock, flags); 380 + 381 + list_add_tail(&lsop->lsreq_list, &tgtport->ls_req_list); 382 + 383 + lsop->req_queued = true; 384 + 385 + spin_unlock_irqrestore(&tgtport->lock, flags); 386 + 387 + ret = tgtport->ops->ls_req(&tgtport->fc_target_port, lsop->hosthandle, 388 + lsreq); 389 + if (ret) 390 + goto out_unlink; 391 + 392 + return 0; 393 + 394 + out_unlink: 395 + lsop->ls_error = ret; 396 + spin_lock_irqsave(&tgtport->lock, flags); 397 + lsop->req_queued = false; 398 + list_del(&lsop->lsreq_list); 399 + spin_unlock_irqrestore(&tgtport->lock, flags); 400 + fc_dma_unmap_single(tgtport->dev, lsreq->rqstdma, 401 + (lsreq->rqstlen + lsreq->rsplen), 402 + DMA_BIDIRECTIONAL); 403 + out_puttgtport: 404 + nvmet_fc_tgtport_put(tgtport); 405 + 406 + return ret; 407 + } 408 + 409 + static int 410 + nvmet_fc_send_ls_req_async(struct nvmet_fc_tgtport *tgtport, 411 + struct nvmet_fc_ls_req_op *lsop, 412 + void (*done)(struct nvmefc_ls_req *req, int status)) 413 + { 414 + /* don't wait for completion */ 415 + 416 + return __nvmet_fc_send_ls_req(tgtport, lsop, done); 417 + } 418 + 419 + static void 420 + nvmet_fc_disconnect_assoc_done(struct nvmefc_ls_req *lsreq, int status) 421 + { 422 + struct nvmet_fc_ls_req_op *lsop = 423 + container_of(lsreq, struct nvmet_fc_ls_req_op, ls_req); 424 + 425 + __nvmet_fc_finish_ls_req(lsop); 426 + 427 + /* fc-nvme target doesn't care about success or failure of cmd */ 428 + 429 + kfree(lsop); 430 + } 431 + 432 + /* 433 + * This routine sends a FC-NVME LS to disconnect (aka terminate) 434 + * the FC-NVME Association. Terminating the association also 435 + * terminates the FC-NVME connections (per queue, both admin and io 436 + * queues) that are part of the association. E.g. things are torn 437 + * down, and the related FC-NVME Association ID and Connection IDs 438 + * become invalid. 439 + * 440 + * The behavior of the fc-nvme target is such that it's 441 + * understanding of the association and connections will implicitly 442 + * be torn down. The action is implicit as it may be due to a loss of 443 + * connectivity with the fc-nvme host, so the target may never get a 444 + * response even if it tried. As such, the action of this routine 445 + * is to asynchronously send the LS, ignore any results of the LS, and 446 + * continue on with terminating the association. If the fc-nvme host 447 + * is present and receives the LS, it too can tear down. 448 + */ 449 + static void 450 + nvmet_fc_xmt_disconnect_assoc(struct nvmet_fc_tgt_assoc *assoc) 451 + { 452 + struct nvmet_fc_tgtport *tgtport = assoc->tgtport; 453 + struct fcnvme_ls_disconnect_assoc_rqst *discon_rqst; 454 + struct fcnvme_ls_disconnect_assoc_acc *discon_acc; 455 + struct nvmet_fc_ls_req_op *lsop; 456 + struct nvmefc_ls_req *lsreq; 457 + int ret; 458 + 459 + /* 460 + * If ls_req is NULL or no hosthandle, it's an older lldd and no 461 + * message is normal. Otherwise, send unless the hostport has 462 + * already been invalidated by the lldd. 463 + */ 464 + if (!tgtport->ops->ls_req || !assoc->hostport || 465 + assoc->hostport->invalid) 466 + return; 467 + 468 + lsop = kzalloc((sizeof(*lsop) + 469 + sizeof(*discon_rqst) + sizeof(*discon_acc) + 470 + tgtport->ops->lsrqst_priv_sz), GFP_KERNEL); 471 + if (!lsop) { 472 + dev_info(tgtport->dev, 473 + "{%d:%d} send Disconnect Association failed: ENOMEM\n", 474 + tgtport->fc_target_port.port_num, assoc->a_id); 475 + return; 476 + } 477 + 478 + discon_rqst = (struct fcnvme_ls_disconnect_assoc_rqst *)&lsop[1]; 479 + discon_acc = (struct fcnvme_ls_disconnect_assoc_acc *)&discon_rqst[1]; 480 + lsreq = &lsop->ls_req; 481 + if (tgtport->ops->lsrqst_priv_sz) 482 + lsreq->private = (void *)&discon_acc[1]; 483 + else 484 + lsreq->private = NULL; 485 + 486 + lsop->tgtport = tgtport; 487 + lsop->hosthandle = assoc->hostport->hosthandle; 488 + 489 + nvmefc_fmt_lsreq_discon_assoc(lsreq, discon_rqst, discon_acc, 490 + assoc->association_id); 491 + 492 + ret = nvmet_fc_send_ls_req_async(tgtport, lsop, 493 + nvmet_fc_disconnect_assoc_done); 494 + if (ret) { 495 + dev_info(tgtport->dev, 496 + "{%d:%d} XMT Disconnect Association failed: %d\n", 497 + tgtport->fc_target_port.port_num, assoc->a_id, ret); 498 + kfree(lsop); 499 + } 500 + } 501 + 502 + 347 503 /* *********************** FC-NVME Port Management ************************ */ 348 504 349 505 ··· 545 337 for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { 546 338 INIT_WORK(&iod->work, nvmet_fc_handle_ls_rqst_work); 547 339 iod->tgtport = tgtport; 548 - list_add_tail(&iod->ls_list, &tgtport->ls_list); 340 + list_add_tail(&iod->ls_rcv_list, &tgtport->ls_rcv_list); 549 341 550 - iod->rqstbuf = kcalloc(2, NVME_FC_MAX_LS_BUFFER_SIZE, 551 - GFP_KERNEL); 342 + iod->rqstbuf = kzalloc(sizeof(union nvmefc_ls_requests) + 343 + sizeof(union nvmefc_ls_responses), 344 + GFP_KERNEL); 552 345 if (!iod->rqstbuf) 553 346 goto out_fail; 554 347 555 - iod->rspbuf = iod->rqstbuf + NVME_FC_MAX_LS_BUFFER_SIZE; 348 + iod->rspbuf = (union nvmefc_ls_responses *)&iod->rqstbuf[1]; 556 349 557 350 iod->rspdma = fc_dma_map_single(tgtport->dev, iod->rspbuf, 558 - NVME_FC_MAX_LS_BUFFER_SIZE, 351 + sizeof(*iod->rspbuf), 559 352 DMA_TO_DEVICE); 560 353 if (fc_dma_mapping_error(tgtport->dev, iod->rspdma)) 561 354 goto out_fail; ··· 566 357 567 358 out_fail: 568 359 kfree(iod->rqstbuf); 569 - list_del(&iod->ls_list); 360 + list_del(&iod->ls_rcv_list); 570 361 for (iod--, i--; i >= 0; iod--, i--) { 571 362 fc_dma_unmap_single(tgtport->dev, iod->rspdma, 572 - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); 363 + sizeof(*iod->rspbuf), DMA_TO_DEVICE); 573 364 kfree(iod->rqstbuf); 574 - list_del(&iod->ls_list); 365 + list_del(&iod->ls_rcv_list); 575 366 } 576 367 577 368 kfree(iod); ··· 587 378 588 379 for (i = 0; i < NVMET_LS_CTX_COUNT; iod++, i++) { 589 380 fc_dma_unmap_single(tgtport->dev, 590 - iod->rspdma, NVME_FC_MAX_LS_BUFFER_SIZE, 381 + iod->rspdma, sizeof(*iod->rspbuf), 591 382 DMA_TO_DEVICE); 592 383 kfree(iod->rqstbuf); 593 - list_del(&iod->ls_list); 384 + list_del(&iod->ls_rcv_list); 594 385 } 595 386 kfree(tgtport->iod); 596 387 } ··· 602 393 unsigned long flags; 603 394 604 395 spin_lock_irqsave(&tgtport->lock, flags); 605 - iod = list_first_entry_or_null(&tgtport->ls_list, 606 - struct nvmet_fc_ls_iod, ls_list); 396 + iod = list_first_entry_or_null(&tgtport->ls_rcv_list, 397 + struct nvmet_fc_ls_iod, ls_rcv_list); 607 398 if (iod) 608 - list_move_tail(&iod->ls_list, &tgtport->ls_busylist); 399 + list_move_tail(&iod->ls_rcv_list, &tgtport->ls_busylist); 609 400 spin_unlock_irqrestore(&tgtport->lock, flags); 610 401 return iod; 611 402 } ··· 618 409 unsigned long flags; 619 410 620 411 spin_lock_irqsave(&tgtport->lock, flags); 621 - list_move(&iod->ls_list, &tgtport->ls_list); 412 + list_move(&iod->ls_rcv_list, &tgtport->ls_rcv_list); 622 413 spin_unlock_irqrestore(&tgtport->lock, flags); 623 414 } 624 415 ··· 887 678 struct nvmet_fc_fcp_iod *fod = queue->fod; 888 679 struct nvmet_fc_defer_fcp_req *deferfcp, *tempptr; 889 680 unsigned long flags; 890 - int i, writedataactive; 681 + int i; 891 682 bool disconnect; 892 683 893 684 disconnect = atomic_xchg(&queue->connected, 0); 685 + 686 + /* if not connected, nothing to do */ 687 + if (!disconnect) 688 + return; 894 689 895 690 spin_lock_irqsave(&queue->qlock, flags); 896 691 /* abort outstanding io's */ ··· 902 689 if (fod->active) { 903 690 spin_lock(&fod->flock); 904 691 fod->abort = true; 905 - writedataactive = fod->writedataactive; 906 - spin_unlock(&fod->flock); 907 692 /* 908 693 * only call lldd abort routine if waiting for 909 694 * writedata. other outstanding ops should finish 910 695 * on their own. 911 696 */ 912 - if (writedataactive) { 913 - spin_lock(&fod->flock); 697 + if (fod->writedataactive) { 914 698 fod->aborted = true; 915 699 spin_unlock(&fod->flock); 916 700 tgtport->ops->fcp_abort( 917 701 &tgtport->fc_target_port, fod->fcpreq); 918 - } 702 + } else 703 + spin_unlock(&fod->flock); 919 704 } 920 705 } 921 706 ··· 953 742 954 743 flush_workqueue(queue->work_q); 955 744 956 - if (disconnect) 957 - nvmet_sq_destroy(&queue->nvme_sq); 745 + nvmet_sq_destroy(&queue->nvme_sq); 958 746 959 747 nvmet_fc_tgt_q_put(queue); 960 748 } ··· 988 778 } 989 779 990 780 static void 781 + nvmet_fc_hostport_free(struct kref *ref) 782 + { 783 + struct nvmet_fc_hostport *hostport = 784 + container_of(ref, struct nvmet_fc_hostport, ref); 785 + struct nvmet_fc_tgtport *tgtport = hostport->tgtport; 786 + unsigned long flags; 787 + 788 + spin_lock_irqsave(&tgtport->lock, flags); 789 + list_del(&hostport->host_list); 790 + spin_unlock_irqrestore(&tgtport->lock, flags); 791 + if (tgtport->ops->host_release && hostport->invalid) 792 + tgtport->ops->host_release(hostport->hosthandle); 793 + kfree(hostport); 794 + nvmet_fc_tgtport_put(tgtport); 795 + } 796 + 797 + static void 798 + nvmet_fc_hostport_put(struct nvmet_fc_hostport *hostport) 799 + { 800 + kref_put(&hostport->ref, nvmet_fc_hostport_free); 801 + } 802 + 803 + static int 804 + nvmet_fc_hostport_get(struct nvmet_fc_hostport *hostport) 805 + { 806 + return kref_get_unless_zero(&hostport->ref); 807 + } 808 + 809 + static void 810 + nvmet_fc_free_hostport(struct nvmet_fc_hostport *hostport) 811 + { 812 + /* if LLDD not implemented, leave as NULL */ 813 + if (!hostport->hosthandle) 814 + return; 815 + 816 + nvmet_fc_hostport_put(hostport); 817 + } 818 + 819 + static struct nvmet_fc_hostport * 820 + nvmet_fc_alloc_hostport(struct nvmet_fc_tgtport *tgtport, void *hosthandle) 821 + { 822 + struct nvmet_fc_hostport *newhost, *host, *match = NULL; 823 + unsigned long flags; 824 + 825 + /* if LLDD not implemented, leave as NULL */ 826 + if (!hosthandle) 827 + return NULL; 828 + 829 + /* take reference for what will be the newly allocated hostport */ 830 + if (!nvmet_fc_tgtport_get(tgtport)) 831 + return ERR_PTR(-EINVAL); 832 + 833 + newhost = kzalloc(sizeof(*newhost), GFP_KERNEL); 834 + if (!newhost) { 835 + spin_lock_irqsave(&tgtport->lock, flags); 836 + list_for_each_entry(host, &tgtport->host_list, host_list) { 837 + if (host->hosthandle == hosthandle && !host->invalid) { 838 + if (nvmet_fc_hostport_get(host)) { 839 + match = host; 840 + break; 841 + } 842 + } 843 + } 844 + spin_unlock_irqrestore(&tgtport->lock, flags); 845 + /* no allocation - release reference */ 846 + nvmet_fc_tgtport_put(tgtport); 847 + return (match) ? match : ERR_PTR(-ENOMEM); 848 + } 849 + 850 + newhost->tgtport = tgtport; 851 + newhost->hosthandle = hosthandle; 852 + INIT_LIST_HEAD(&newhost->host_list); 853 + kref_init(&newhost->ref); 854 + 855 + spin_lock_irqsave(&tgtport->lock, flags); 856 + list_for_each_entry(host, &tgtport->host_list, host_list) { 857 + if (host->hosthandle == hosthandle && !host->invalid) { 858 + if (nvmet_fc_hostport_get(host)) { 859 + match = host; 860 + break; 861 + } 862 + } 863 + } 864 + if (match) { 865 + kfree(newhost); 866 + newhost = NULL; 867 + /* releasing allocation - release reference */ 868 + nvmet_fc_tgtport_put(tgtport); 869 + } else 870 + list_add_tail(&newhost->host_list, &tgtport->host_list); 871 + spin_unlock_irqrestore(&tgtport->lock, flags); 872 + 873 + return (match) ? match : newhost; 874 + } 875 + 876 + static void 991 877 nvmet_fc_delete_assoc(struct work_struct *work) 992 878 { 993 879 struct nvmet_fc_tgt_assoc *assoc = 994 880 container_of(work, struct nvmet_fc_tgt_assoc, del_work); 995 881 996 882 nvmet_fc_delete_target_assoc(assoc); 883 + atomic_set(&assoc->del_work_active, 0); 997 884 nvmet_fc_tgt_a_put(assoc); 998 885 } 999 886 1000 887 static struct nvmet_fc_tgt_assoc * 1001 - nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport) 888 + nvmet_fc_alloc_target_assoc(struct nvmet_fc_tgtport *tgtport, void *hosthandle) 1002 889 { 1003 890 struct nvmet_fc_tgt_assoc *assoc, *tmpassoc; 1004 891 unsigned long flags; ··· 1112 805 goto out_free_assoc; 1113 806 1114 807 if (!nvmet_fc_tgtport_get(tgtport)) 1115 - goto out_ida_put; 808 + goto out_ida; 809 + 810 + assoc->hostport = nvmet_fc_alloc_hostport(tgtport, hosthandle); 811 + if (IS_ERR(assoc->hostport)) 812 + goto out_put; 1116 813 1117 814 assoc->tgtport = tgtport; 1118 815 assoc->a_id = idx; 1119 816 INIT_LIST_HEAD(&assoc->a_list); 1120 817 kref_init(&assoc->ref); 1121 818 INIT_WORK(&assoc->del_work, nvmet_fc_delete_assoc); 819 + atomic_set(&assoc->del_work_active, 0); 820 + atomic_set(&assoc->terminating, 0); 1122 821 1123 822 while (needrandom) { 1124 823 get_random_bytes(&ran, sizeof(ran) - BYTES_FOR_QID); ··· 1132 819 1133 820 spin_lock_irqsave(&tgtport->lock, flags); 1134 821 needrandom = false; 1135 - list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) 822 + list_for_each_entry(tmpassoc, &tgtport->assoc_list, a_list) { 1136 823 if (ran == tmpassoc->association_id) { 1137 824 needrandom = true; 1138 825 break; 1139 826 } 827 + } 1140 828 if (!needrandom) { 1141 829 assoc->association_id = ran; 1142 830 list_add_tail(&assoc->a_list, &tgtport->assoc_list); ··· 1147 833 1148 834 return assoc; 1149 835 1150 - out_ida_put: 836 + out_put: 837 + nvmet_fc_tgtport_put(tgtport); 838 + out_ida: 1151 839 ida_simple_remove(&tgtport->assoc_cnt, idx); 1152 840 out_free_assoc: 1153 841 kfree(assoc); ··· 1162 846 struct nvmet_fc_tgt_assoc *assoc = 1163 847 container_of(ref, struct nvmet_fc_tgt_assoc, ref); 1164 848 struct nvmet_fc_tgtport *tgtport = assoc->tgtport; 849 + struct nvmet_fc_ls_iod *oldls; 1165 850 unsigned long flags; 1166 851 852 + /* Send Disconnect now that all i/o has completed */ 853 + nvmet_fc_xmt_disconnect_assoc(assoc); 854 + 855 + nvmet_fc_free_hostport(assoc->hostport); 1167 856 spin_lock_irqsave(&tgtport->lock, flags); 1168 857 list_del(&assoc->a_list); 858 + oldls = assoc->rcv_disconn; 1169 859 spin_unlock_irqrestore(&tgtport->lock, flags); 860 + /* if pending Rcv Disconnect Association LS, send rsp now */ 861 + if (oldls) 862 + nvmet_fc_xmt_ls_rsp(tgtport, oldls); 1170 863 ida_simple_remove(&tgtport->assoc_cnt, assoc->a_id); 864 + dev_info(tgtport->dev, 865 + "{%d:%d} Association freed\n", 866 + tgtport->fc_target_port.port_num, assoc->a_id); 1171 867 kfree(assoc); 1172 868 nvmet_fc_tgtport_put(tgtport); 1173 869 } ··· 1202 874 struct nvmet_fc_tgtport *tgtport = assoc->tgtport; 1203 875 struct nvmet_fc_tgt_queue *queue; 1204 876 unsigned long flags; 1205 - int i; 877 + int i, terminating; 878 + 879 + terminating = atomic_xchg(&assoc->terminating, 1); 880 + 881 + /* if already terminating, do nothing */ 882 + if (terminating) 883 + return; 1206 884 1207 885 spin_lock_irqsave(&tgtport->lock, flags); 1208 886 for (i = NVMET_NR_QUEUES; i >= 0; i--) { ··· 1223 889 } 1224 890 } 1225 891 spin_unlock_irqrestore(&tgtport->lock, flags); 892 + 893 + dev_info(tgtport->dev, 894 + "{%d:%d} Association deleted\n", 895 + tgtport->fc_target_port.port_num, assoc->a_id); 1226 896 1227 897 nvmet_fc_tgt_a_put(assoc); 1228 898 } ··· 1386 1048 1387 1049 newrec->fc_target_port.node_name = pinfo->node_name; 1388 1050 newrec->fc_target_port.port_name = pinfo->port_name; 1389 - newrec->fc_target_port.private = &newrec[1]; 1051 + if (template->target_priv_sz) 1052 + newrec->fc_target_port.private = &newrec[1]; 1053 + else 1054 + newrec->fc_target_port.private = NULL; 1390 1055 newrec->fc_target_port.port_id = pinfo->port_id; 1391 1056 newrec->fc_target_port.port_num = idx; 1392 1057 INIT_LIST_HEAD(&newrec->tgt_list); 1393 1058 newrec->dev = dev; 1394 1059 newrec->ops = template; 1395 1060 spin_lock_init(&newrec->lock); 1396 - INIT_LIST_HEAD(&newrec->ls_list); 1061 + INIT_LIST_HEAD(&newrec->ls_rcv_list); 1062 + INIT_LIST_HEAD(&newrec->ls_req_list); 1397 1063 INIT_LIST_HEAD(&newrec->ls_busylist); 1398 1064 INIT_LIST_HEAD(&newrec->assoc_list); 1065 + INIT_LIST_HEAD(&newrec->host_list); 1399 1066 kref_init(&newrec->ref); 1400 1067 ida_init(&newrec->assoc_cnt); 1401 1068 newrec->max_sg_cnt = template->max_sgl_segments; ··· 1477 1134 { 1478 1135 struct nvmet_fc_tgt_assoc *assoc, *next; 1479 1136 unsigned long flags; 1137 + int ret; 1480 1138 1481 1139 spin_lock_irqsave(&tgtport->lock, flags); 1482 1140 list_for_each_entry_safe(assoc, next, 1483 1141 &tgtport->assoc_list, a_list) { 1484 1142 if (!nvmet_fc_tgt_a_get(assoc)) 1485 1143 continue; 1486 - if (!schedule_work(&assoc->del_work)) 1144 + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); 1145 + if (ret == 0) { 1146 + if (!schedule_work(&assoc->del_work)) 1147 + nvmet_fc_tgt_a_put(assoc); 1148 + } else { 1149 + /* already deleting - release local reference */ 1487 1150 nvmet_fc_tgt_a_put(assoc); 1151 + } 1488 1152 } 1489 1153 spin_unlock_irqrestore(&tgtport->lock, flags); 1490 1154 } 1155 + 1156 + /** 1157 + * nvmet_fc_invalidate_host - transport entry point called by an LLDD 1158 + * to remove references to a hosthandle for LS's. 1159 + * 1160 + * The nvmet-fc layer ensures that any references to the hosthandle 1161 + * on the targetport are forgotten (set to NULL). The LLDD will 1162 + * typically call this when a login with a remote host port has been 1163 + * lost, thus LS's for the remote host port are no longer possible. 1164 + * 1165 + * If an LS request is outstanding to the targetport/hosthandle (or 1166 + * issued concurrently with the call to invalidate the host), the 1167 + * LLDD is responsible for terminating/aborting the LS and completing 1168 + * the LS request. It is recommended that these terminations/aborts 1169 + * occur after calling to invalidate the host handle to avoid additional 1170 + * retries by the nvmet-fc transport. The nvmet-fc transport may 1171 + * continue to reference host handle while it cleans up outstanding 1172 + * NVME associations. The nvmet-fc transport will call the 1173 + * ops->host_release() callback to notify the LLDD that all references 1174 + * are complete and the related host handle can be recovered. 1175 + * Note: if there are no references, the callback may be called before 1176 + * the invalidate host call returns. 1177 + * 1178 + * @target_port: pointer to the (registered) target port that a prior 1179 + * LS was received on and which supplied the transport the 1180 + * hosthandle. 1181 + * @hosthandle: the handle (pointer) that represents the host port 1182 + * that no longer has connectivity and that LS's should 1183 + * no longer be directed to. 1184 + */ 1185 + void 1186 + nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, 1187 + void *hosthandle) 1188 + { 1189 + struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); 1190 + struct nvmet_fc_tgt_assoc *assoc, *next; 1191 + unsigned long flags; 1192 + bool noassoc = true; 1193 + int ret; 1194 + 1195 + spin_lock_irqsave(&tgtport->lock, flags); 1196 + list_for_each_entry_safe(assoc, next, 1197 + &tgtport->assoc_list, a_list) { 1198 + if (!assoc->hostport || 1199 + assoc->hostport->hosthandle != hosthandle) 1200 + continue; 1201 + if (!nvmet_fc_tgt_a_get(assoc)) 1202 + continue; 1203 + assoc->hostport->invalid = 1; 1204 + noassoc = false; 1205 + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); 1206 + if (ret == 0) { 1207 + if (!schedule_work(&assoc->del_work)) 1208 + nvmet_fc_tgt_a_put(assoc); 1209 + } else { 1210 + /* already deleting - release local reference */ 1211 + nvmet_fc_tgt_a_put(assoc); 1212 + } 1213 + } 1214 + spin_unlock_irqrestore(&tgtport->lock, flags); 1215 + 1216 + /* if there's nothing to wait for - call the callback */ 1217 + if (noassoc && tgtport->ops->host_release) 1218 + tgtport->ops->host_release(hosthandle); 1219 + } 1220 + EXPORT_SYMBOL_GPL(nvmet_fc_invalidate_host); 1491 1221 1492 1222 /* 1493 1223 * nvmet layer has called to terminate an association ··· 1573 1157 struct nvmet_fc_tgt_queue *queue; 1574 1158 unsigned long flags; 1575 1159 bool found_ctrl = false; 1160 + int ret; 1576 1161 1577 1162 /* this is a bit ugly, but don't want to make locks layered */ 1578 1163 spin_lock_irqsave(&nvmet_fc_tgtlock, flags); ··· 1597 1180 nvmet_fc_tgtport_put(tgtport); 1598 1181 1599 1182 if (found_ctrl) { 1600 - if (!schedule_work(&assoc->del_work)) 1183 + ret = atomic_cmpxchg(&assoc->del_work_active, 0, 1); 1184 + if (ret == 0) { 1185 + if (!schedule_work(&assoc->del_work)) 1186 + nvmet_fc_tgt_a_put(assoc); 1187 + } else { 1188 + /* already deleting - release local reference */ 1601 1189 nvmet_fc_tgt_a_put(assoc); 1190 + } 1602 1191 return; 1603 1192 } 1604 1193 ··· 1634 1211 /* terminate any outstanding associations */ 1635 1212 __nvmet_fc_free_assocs(tgtport); 1636 1213 1214 + /* 1215 + * should terminate LS's as well. However, LS's will be generated 1216 + * at the tail end of association termination, so they likely don't 1217 + * exist yet. And even if they did, it's worthwhile to just let 1218 + * them finish and targetport ref counting will clean things up. 1219 + */ 1220 + 1637 1221 nvmet_fc_tgtport_put(tgtport); 1638 1222 1639 1223 return 0; ··· 1648 1218 EXPORT_SYMBOL_GPL(nvmet_fc_unregister_targetport); 1649 1219 1650 1220 1651 - /* *********************** FC-NVME LS Handling **************************** */ 1221 + /* ********************** FC-NVME LS RCV Handling ************************* */ 1652 1222 1653 - 1654 - static void 1655 - nvmet_fc_format_rsp_hdr(void *buf, u8 ls_cmd, __be32 desc_len, u8 rqst_ls_cmd) 1656 - { 1657 - struct fcnvme_ls_acc_hdr *acc = buf; 1658 - 1659 - acc->w0.ls_cmd = ls_cmd; 1660 - acc->desc_list_len = desc_len; 1661 - acc->rqst.desc_tag = cpu_to_be32(FCNVME_LSDESC_RQST); 1662 - acc->rqst.desc_len = 1663 - fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rqst)); 1664 - acc->rqst.w0.ls_cmd = rqst_ls_cmd; 1665 - } 1666 - 1667 - static int 1668 - nvmet_fc_format_rjt(void *buf, u16 buflen, u8 ls_cmd, 1669 - u8 reason, u8 explanation, u8 vendor) 1670 - { 1671 - struct fcnvme_ls_rjt *rjt = buf; 1672 - 1673 - nvmet_fc_format_rsp_hdr(buf, FCNVME_LSDESC_RQST, 1674 - fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_rjt)), 1675 - ls_cmd); 1676 - rjt->rjt.desc_tag = cpu_to_be32(FCNVME_LSDESC_RJT); 1677 - rjt->rjt.desc_len = fcnvme_lsdesc_len(sizeof(struct fcnvme_lsdesc_rjt)); 1678 - rjt->rjt.reason_code = reason; 1679 - rjt->rjt.reason_explanation = explanation; 1680 - rjt->rjt.vendor = vendor; 1681 - 1682 - return sizeof(struct fcnvme_ls_rjt); 1683 - } 1684 - 1685 - /* Validation Error indexes into the string table below */ 1686 - enum { 1687 - VERR_NO_ERROR = 0, 1688 - VERR_CR_ASSOC_LEN = 1, 1689 - VERR_CR_ASSOC_RQST_LEN = 2, 1690 - VERR_CR_ASSOC_CMD = 3, 1691 - VERR_CR_ASSOC_CMD_LEN = 4, 1692 - VERR_ERSP_RATIO = 5, 1693 - VERR_ASSOC_ALLOC_FAIL = 6, 1694 - VERR_QUEUE_ALLOC_FAIL = 7, 1695 - VERR_CR_CONN_LEN = 8, 1696 - VERR_CR_CONN_RQST_LEN = 9, 1697 - VERR_ASSOC_ID = 10, 1698 - VERR_ASSOC_ID_LEN = 11, 1699 - VERR_NO_ASSOC = 12, 1700 - VERR_CONN_ID = 13, 1701 - VERR_CONN_ID_LEN = 14, 1702 - VERR_NO_CONN = 15, 1703 - VERR_CR_CONN_CMD = 16, 1704 - VERR_CR_CONN_CMD_LEN = 17, 1705 - VERR_DISCONN_LEN = 18, 1706 - VERR_DISCONN_RQST_LEN = 19, 1707 - VERR_DISCONN_CMD = 20, 1708 - VERR_DISCONN_CMD_LEN = 21, 1709 - VERR_DISCONN_SCOPE = 22, 1710 - VERR_RS_LEN = 23, 1711 - VERR_RS_RQST_LEN = 24, 1712 - VERR_RS_CMD = 25, 1713 - VERR_RS_CMD_LEN = 26, 1714 - VERR_RS_RCTL = 27, 1715 - VERR_RS_RO = 28, 1716 - }; 1717 - 1718 - static char *validation_errors[] = { 1719 - "OK", 1720 - "Bad CR_ASSOC Length", 1721 - "Bad CR_ASSOC Rqst Length", 1722 - "Not CR_ASSOC Cmd", 1723 - "Bad CR_ASSOC Cmd Length", 1724 - "Bad Ersp Ratio", 1725 - "Association Allocation Failed", 1726 - "Queue Allocation Failed", 1727 - "Bad CR_CONN Length", 1728 - "Bad CR_CONN Rqst Length", 1729 - "Not Association ID", 1730 - "Bad Association ID Length", 1731 - "No Association", 1732 - "Not Connection ID", 1733 - "Bad Connection ID Length", 1734 - "No Connection", 1735 - "Not CR_CONN Cmd", 1736 - "Bad CR_CONN Cmd Length", 1737 - "Bad DISCONN Length", 1738 - "Bad DISCONN Rqst Length", 1739 - "Not DISCONN Cmd", 1740 - "Bad DISCONN Cmd Length", 1741 - "Bad Disconnect Scope", 1742 - "Bad RS Length", 1743 - "Bad RS Rqst Length", 1744 - "Not RS Cmd", 1745 - "Bad RS Cmd Length", 1746 - "Bad RS R_CTL", 1747 - "Bad RS Relative Offset", 1748 - }; 1749 1223 1750 1224 static void 1751 1225 nvmet_fc_ls_create_association(struct nvmet_fc_tgtport *tgtport, 1752 1226 struct nvmet_fc_ls_iod *iod) 1753 1227 { 1754 - struct fcnvme_ls_cr_assoc_rqst *rqst = 1755 - (struct fcnvme_ls_cr_assoc_rqst *)iod->rqstbuf; 1756 - struct fcnvme_ls_cr_assoc_acc *acc = 1757 - (struct fcnvme_ls_cr_assoc_acc *)iod->rspbuf; 1228 + struct fcnvme_ls_cr_assoc_rqst *rqst = &iod->rqstbuf->rq_cr_assoc; 1229 + struct fcnvme_ls_cr_assoc_acc *acc = &iod->rspbuf->rsp_cr_assoc; 1758 1230 struct nvmet_fc_tgt_queue *queue; 1759 1231 int ret = 0; 1760 1232 ··· 1688 1356 1689 1357 else { 1690 1358 /* new association w/ admin queue */ 1691 - iod->assoc = nvmet_fc_alloc_target_assoc(tgtport); 1359 + iod->assoc = nvmet_fc_alloc_target_assoc( 1360 + tgtport, iod->hosthandle); 1692 1361 if (!iod->assoc) 1693 1362 ret = VERR_ASSOC_ALLOC_FAIL; 1694 1363 else { ··· 1704 1371 dev_err(tgtport->dev, 1705 1372 "Create Association LS failed: %s\n", 1706 1373 validation_errors[ret]); 1707 - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, 1708 - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, 1374 + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, 1375 + sizeof(*acc), rqst->w0.ls_cmd, 1709 1376 FCNVME_RJT_RC_LOGIC, 1710 1377 FCNVME_RJT_EXP_NONE, 0); 1711 1378 return; ··· 1715 1382 atomic_set(&queue->connected, 1); 1716 1383 queue->sqhd = 0; /* best place to init value */ 1717 1384 1385 + dev_info(tgtport->dev, 1386 + "{%d:%d} Association created\n", 1387 + tgtport->fc_target_port.port_num, iod->assoc->a_id); 1388 + 1718 1389 /* format a response */ 1719 1390 1720 - iod->lsreq->rsplen = sizeof(*acc); 1391 + iod->lsrsp->rsplen = sizeof(*acc); 1721 1392 1722 - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1393 + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1723 1394 fcnvme_lsdesc_len( 1724 1395 sizeof(struct fcnvme_ls_cr_assoc_acc)), 1725 1396 FCNVME_LS_CREATE_ASSOCIATION); ··· 1744 1407 nvmet_fc_ls_create_connection(struct nvmet_fc_tgtport *tgtport, 1745 1408 struct nvmet_fc_ls_iod *iod) 1746 1409 { 1747 - struct fcnvme_ls_cr_conn_rqst *rqst = 1748 - (struct fcnvme_ls_cr_conn_rqst *)iod->rqstbuf; 1749 - struct fcnvme_ls_cr_conn_acc *acc = 1750 - (struct fcnvme_ls_cr_conn_acc *)iod->rspbuf; 1410 + struct fcnvme_ls_cr_conn_rqst *rqst = &iod->rqstbuf->rq_cr_conn; 1411 + struct fcnvme_ls_cr_conn_acc *acc = &iod->rspbuf->rsp_cr_conn; 1751 1412 struct nvmet_fc_tgt_queue *queue; 1752 1413 int ret = 0; 1753 1414 ··· 1797 1462 dev_err(tgtport->dev, 1798 1463 "Create Connection LS failed: %s\n", 1799 1464 validation_errors[ret]); 1800 - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, 1801 - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, 1465 + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, 1466 + sizeof(*acc), rqst->w0.ls_cmd, 1802 1467 (ret == VERR_NO_ASSOC) ? 1803 1468 FCNVME_RJT_RC_INV_ASSOC : 1804 1469 FCNVME_RJT_RC_LOGIC, ··· 1812 1477 1813 1478 /* format a response */ 1814 1479 1815 - iod->lsreq->rsplen = sizeof(*acc); 1480 + iod->lsrsp->rsplen = sizeof(*acc); 1816 1481 1817 - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1482 + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1818 1483 fcnvme_lsdesc_len(sizeof(struct fcnvme_ls_cr_conn_acc)), 1819 1484 FCNVME_LS_CREATE_CONNECTION); 1820 1485 acc->connectid.desc_tag = cpu_to_be32(FCNVME_LSDESC_CONN_ID); ··· 1826 1491 be16_to_cpu(rqst->connect_cmd.qid))); 1827 1492 } 1828 1493 1829 - static void 1494 + /* 1495 + * Returns true if the LS response is to be transmit 1496 + * Returns false if the LS response is to be delayed 1497 + */ 1498 + static int 1830 1499 nvmet_fc_ls_disconnect(struct nvmet_fc_tgtport *tgtport, 1831 1500 struct nvmet_fc_ls_iod *iod) 1832 1501 { 1833 1502 struct fcnvme_ls_disconnect_assoc_rqst *rqst = 1834 - (struct fcnvme_ls_disconnect_assoc_rqst *)iod->rqstbuf; 1503 + &iod->rqstbuf->rq_dis_assoc; 1835 1504 struct fcnvme_ls_disconnect_assoc_acc *acc = 1836 - (struct fcnvme_ls_disconnect_assoc_acc *)iod->rspbuf; 1837 - struct nvmet_fc_tgt_assoc *assoc; 1505 + &iod->rspbuf->rsp_dis_assoc; 1506 + struct nvmet_fc_tgt_assoc *assoc = NULL; 1507 + struct nvmet_fc_ls_iod *oldls = NULL; 1508 + unsigned long flags; 1838 1509 int ret = 0; 1839 1510 1840 1511 memset(acc, 0, sizeof(*acc)); 1841 1512 1842 - if (iod->rqstdatalen < sizeof(struct fcnvme_ls_disconnect_assoc_rqst)) 1843 - ret = VERR_DISCONN_LEN; 1844 - else if (rqst->desc_list_len != 1845 - fcnvme_lsdesc_len( 1846 - sizeof(struct fcnvme_ls_disconnect_assoc_rqst))) 1847 - ret = VERR_DISCONN_RQST_LEN; 1848 - else if (rqst->associd.desc_tag != cpu_to_be32(FCNVME_LSDESC_ASSOC_ID)) 1849 - ret = VERR_ASSOC_ID; 1850 - else if (rqst->associd.desc_len != 1851 - fcnvme_lsdesc_len( 1852 - sizeof(struct fcnvme_lsdesc_assoc_id))) 1853 - ret = VERR_ASSOC_ID_LEN; 1854 - else if (rqst->discon_cmd.desc_tag != 1855 - cpu_to_be32(FCNVME_LSDESC_DISCONN_CMD)) 1856 - ret = VERR_DISCONN_CMD; 1857 - else if (rqst->discon_cmd.desc_len != 1858 - fcnvme_lsdesc_len( 1859 - sizeof(struct fcnvme_lsdesc_disconn_cmd))) 1860 - ret = VERR_DISCONN_CMD_LEN; 1861 - /* 1862 - * As the standard changed on the LS, check if old format and scope 1863 - * something other than Association (e.g. 0). 1864 - */ 1865 - else if (rqst->discon_cmd.rsvd8[0]) 1866 - ret = VERR_DISCONN_SCOPE; 1867 - else { 1868 - /* match an active association */ 1513 + ret = nvmefc_vldt_lsreq_discon_assoc(iod->rqstdatalen, rqst); 1514 + if (!ret) { 1515 + /* match an active association - takes an assoc ref if !NULL */ 1869 1516 assoc = nvmet_fc_find_target_assoc(tgtport, 1870 1517 be64_to_cpu(rqst->associd.association_id)); 1871 1518 iod->assoc = assoc; ··· 1855 1538 ret = VERR_NO_ASSOC; 1856 1539 } 1857 1540 1858 - if (ret) { 1541 + if (ret || !assoc) { 1859 1542 dev_err(tgtport->dev, 1860 1543 "Disconnect LS failed: %s\n", 1861 1544 validation_errors[ret]); 1862 - iod->lsreq->rsplen = nvmet_fc_format_rjt(acc, 1863 - NVME_FC_MAX_LS_BUFFER_SIZE, rqst->w0.ls_cmd, 1545 + iod->lsrsp->rsplen = nvme_fc_format_rjt(acc, 1546 + sizeof(*acc), rqst->w0.ls_cmd, 1864 1547 (ret == VERR_NO_ASSOC) ? 1865 1548 FCNVME_RJT_RC_INV_ASSOC : 1866 - (ret == VERR_NO_CONN) ? 1867 - FCNVME_RJT_RC_INV_CONN : 1868 - FCNVME_RJT_RC_LOGIC, 1549 + FCNVME_RJT_RC_LOGIC, 1869 1550 FCNVME_RJT_EXP_NONE, 0); 1870 - return; 1551 + return true; 1871 1552 } 1872 1553 1873 1554 /* format a response */ 1874 1555 1875 - iod->lsreq->rsplen = sizeof(*acc); 1556 + iod->lsrsp->rsplen = sizeof(*acc); 1876 1557 1877 - nvmet_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1558 + nvme_fc_format_rsp_hdr(acc, FCNVME_LS_ACC, 1878 1559 fcnvme_lsdesc_len( 1879 1560 sizeof(struct fcnvme_ls_disconnect_assoc_acc)), 1880 1561 FCNVME_LS_DISCONNECT_ASSOC); 1881 1562 1882 1563 /* release get taken in nvmet_fc_find_target_assoc */ 1883 - nvmet_fc_tgt_a_put(iod->assoc); 1564 + nvmet_fc_tgt_a_put(assoc); 1884 1565 1885 - nvmet_fc_delete_target_assoc(iod->assoc); 1566 + /* 1567 + * The rules for LS response says the response cannot 1568 + * go back until ABTS's have been sent for all outstanding 1569 + * I/O and a Disconnect Association LS has been sent. 1570 + * So... save off the Disconnect LS to send the response 1571 + * later. If there was a prior LS already saved, replace 1572 + * it with the newer one and send a can't perform reject 1573 + * on the older one. 1574 + */ 1575 + spin_lock_irqsave(&tgtport->lock, flags); 1576 + oldls = assoc->rcv_disconn; 1577 + assoc->rcv_disconn = iod; 1578 + spin_unlock_irqrestore(&tgtport->lock, flags); 1579 + 1580 + nvmet_fc_delete_target_assoc(assoc); 1581 + 1582 + if (oldls) { 1583 + dev_info(tgtport->dev, 1584 + "{%d:%d} Multiple Disconnect Association LS's " 1585 + "received\n", 1586 + tgtport->fc_target_port.port_num, assoc->a_id); 1587 + /* overwrite good response with bogus failure */ 1588 + oldls->lsrsp->rsplen = nvme_fc_format_rjt(oldls->rspbuf, 1589 + sizeof(*iod->rspbuf), 1590 + /* ok to use rqst, LS is same */ 1591 + rqst->w0.ls_cmd, 1592 + FCNVME_RJT_RC_UNAB, 1593 + FCNVME_RJT_EXP_NONE, 0); 1594 + nvmet_fc_xmt_ls_rsp(tgtport, oldls); 1595 + } 1596 + 1597 + return false; 1886 1598 } 1887 1599 1888 1600 ··· 1923 1577 static const struct nvmet_fabrics_ops nvmet_fc_tgt_fcp_ops; 1924 1578 1925 1579 static void 1926 - nvmet_fc_xmt_ls_rsp_done(struct nvmefc_tgt_ls_req *lsreq) 1580 + nvmet_fc_xmt_ls_rsp_done(struct nvmefc_ls_rsp *lsrsp) 1927 1581 { 1928 - struct nvmet_fc_ls_iod *iod = lsreq->nvmet_fc_private; 1582 + struct nvmet_fc_ls_iod *iod = lsrsp->nvme_fc_private; 1929 1583 struct nvmet_fc_tgtport *tgtport = iod->tgtport; 1930 1584 1931 1585 fc_dma_sync_single_for_cpu(tgtport->dev, iod->rspdma, 1932 - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); 1586 + sizeof(*iod->rspbuf), DMA_TO_DEVICE); 1933 1587 nvmet_fc_free_ls_iod(tgtport, iod); 1934 1588 nvmet_fc_tgtport_put(tgtport); 1935 1589 } ··· 1941 1595 int ret; 1942 1596 1943 1597 fc_dma_sync_single_for_device(tgtport->dev, iod->rspdma, 1944 - NVME_FC_MAX_LS_BUFFER_SIZE, DMA_TO_DEVICE); 1598 + sizeof(*iod->rspbuf), DMA_TO_DEVICE); 1945 1599 1946 - ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsreq); 1600 + ret = tgtport->ops->xmt_ls_rsp(&tgtport->fc_target_port, iod->lsrsp); 1947 1601 if (ret) 1948 - nvmet_fc_xmt_ls_rsp_done(iod->lsreq); 1602 + nvmet_fc_xmt_ls_rsp_done(iod->lsrsp); 1949 1603 } 1950 1604 1951 1605 /* ··· 1955 1609 nvmet_fc_handle_ls_rqst(struct nvmet_fc_tgtport *tgtport, 1956 1610 struct nvmet_fc_ls_iod *iod) 1957 1611 { 1958 - struct fcnvme_ls_rqst_w0 *w0 = 1959 - (struct fcnvme_ls_rqst_w0 *)iod->rqstbuf; 1612 + struct fcnvme_ls_rqst_w0 *w0 = &iod->rqstbuf->rq_cr_assoc.w0; 1613 + bool sendrsp = true; 1960 1614 1961 - iod->lsreq->nvmet_fc_private = iod; 1962 - iod->lsreq->rspbuf = iod->rspbuf; 1963 - iod->lsreq->rspdma = iod->rspdma; 1964 - iod->lsreq->done = nvmet_fc_xmt_ls_rsp_done; 1615 + iod->lsrsp->nvme_fc_private = iod; 1616 + iod->lsrsp->rspbuf = iod->rspbuf; 1617 + iod->lsrsp->rspdma = iod->rspdma; 1618 + iod->lsrsp->done = nvmet_fc_xmt_ls_rsp_done; 1965 1619 /* Be preventative. handlers will later set to valid length */ 1966 - iod->lsreq->rsplen = 0; 1620 + iod->lsrsp->rsplen = 0; 1967 1621 1968 1622 iod->assoc = NULL; 1969 1623 ··· 1983 1637 break; 1984 1638 case FCNVME_LS_DISCONNECT_ASSOC: 1985 1639 /* Terminate a Queue/Connection or the Association */ 1986 - nvmet_fc_ls_disconnect(tgtport, iod); 1640 + sendrsp = nvmet_fc_ls_disconnect(tgtport, iod); 1987 1641 break; 1988 1642 default: 1989 - iod->lsreq->rsplen = nvmet_fc_format_rjt(iod->rspbuf, 1990 - NVME_FC_MAX_LS_BUFFER_SIZE, w0->ls_cmd, 1643 + iod->lsrsp->rsplen = nvme_fc_format_rjt(iod->rspbuf, 1644 + sizeof(*iod->rspbuf), w0->ls_cmd, 1991 1645 FCNVME_RJT_RC_INVAL, FCNVME_RJT_EXP_NONE, 0); 1992 1646 } 1993 1647 1994 - nvmet_fc_xmt_ls_rsp(tgtport, iod); 1648 + if (sendrsp) 1649 + nvmet_fc_xmt_ls_rsp(tgtport, iod); 1995 1650 } 1996 1651 1997 1652 /* ··· 2021 1674 * 2022 1675 * @target_port: pointer to the (registered) target port the LS was 2023 1676 * received on. 2024 - * @lsreq: pointer to a lsreq request structure to be used to reference 1677 + * @lsrsp: pointer to a lsrsp structure to be used to reference 2025 1678 * the exchange corresponding to the LS. 2026 1679 * @lsreqbuf: pointer to the buffer containing the LS Request 2027 1680 * @lsreqbuf_len: length, in bytes, of the received LS request 2028 1681 */ 2029 1682 int 2030 1683 nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, 2031 - struct nvmefc_tgt_ls_req *lsreq, 1684 + void *hosthandle, 1685 + struct nvmefc_ls_rsp *lsrsp, 2032 1686 void *lsreqbuf, u32 lsreqbuf_len) 2033 1687 { 2034 1688 struct nvmet_fc_tgtport *tgtport = targetport_to_tgtport(target_port); 2035 1689 struct nvmet_fc_ls_iod *iod; 1690 + struct fcnvme_ls_rqst_w0 *w0 = (struct fcnvme_ls_rqst_w0 *)lsreqbuf; 2036 1691 2037 - if (lsreqbuf_len > NVME_FC_MAX_LS_BUFFER_SIZE) 1692 + if (lsreqbuf_len > sizeof(union nvmefc_ls_requests)) { 1693 + dev_info(tgtport->dev, 1694 + "RCV %s LS failed: payload too large (%d)\n", 1695 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1696 + nvmefc_ls_names[w0->ls_cmd] : "", 1697 + lsreqbuf_len); 2038 1698 return -E2BIG; 1699 + } 2039 1700 2040 - if (!nvmet_fc_tgtport_get(tgtport)) 1701 + if (!nvmet_fc_tgtport_get(tgtport)) { 1702 + dev_info(tgtport->dev, 1703 + "RCV %s LS failed: target deleting\n", 1704 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1705 + nvmefc_ls_names[w0->ls_cmd] : ""); 2041 1706 return -ESHUTDOWN; 1707 + } 2042 1708 2043 1709 iod = nvmet_fc_alloc_ls_iod(tgtport); 2044 1710 if (!iod) { 1711 + dev_info(tgtport->dev, 1712 + "RCV %s LS failed: context allocation failed\n", 1713 + (w0->ls_cmd <= NVME_FC_LAST_LS_CMD_VALUE) ? 1714 + nvmefc_ls_names[w0->ls_cmd] : ""); 2045 1715 nvmet_fc_tgtport_put(tgtport); 2046 1716 return -ENOENT; 2047 1717 } 2048 1718 2049 - iod->lsreq = lsreq; 1719 + iod->lsrsp = lsrsp; 2050 1720 iod->fcpreq = NULL; 2051 1721 memcpy(iod->rqstbuf, lsreqbuf, lsreqbuf_len); 2052 1722 iod->rqstdatalen = lsreqbuf_len; 1723 + iod->hosthandle = hosthandle; 2053 1724 2054 1725 schedule_work(&iod->work); 2055 1726

+134 -21

drivers/nvme/target/fcloop.c

··· 208 208 }; 209 209 210 210 struct fcloop_tport { 211 - struct nvmet_fc_target_port *targetport; 212 - struct nvme_fc_remote_port *remoteport; 213 - struct fcloop_nport *nport; 214 - struct fcloop_lport *lport; 211 + struct nvmet_fc_target_port *targetport; 212 + struct nvme_fc_remote_port *remoteport; 213 + struct fcloop_nport *nport; 214 + struct fcloop_lport *lport; 215 + spinlock_t lock; 216 + struct list_head ls_list; 217 + struct work_struct ls_work; 215 218 }; 216 219 217 220 struct fcloop_nport { ··· 231 228 232 229 struct fcloop_lsreq { 233 230 struct nvmefc_ls_req *lsreq; 234 - struct nvmefc_tgt_ls_req tgt_ls_req; 231 + struct nvmefc_ls_rsp ls_rsp; 232 + int lsdir; /* H2T or T2H */ 235 233 int status; 236 234 struct list_head ls_list; /* fcloop_rport->ls_list */ 237 235 }; ··· 271 267 }; 272 268 273 269 static inline struct fcloop_lsreq * 274 - tgt_ls_req_to_lsreq(struct nvmefc_tgt_ls_req *tgt_lsreq) 270 + ls_rsp_to_lsreq(struct nvmefc_ls_rsp *lsrsp) 275 271 { 276 - return container_of(tgt_lsreq, struct fcloop_lsreq, tgt_ls_req); 272 + return container_of(lsrsp, struct fcloop_lsreq, ls_rsp); 277 273 } 278 274 279 275 static inline struct fcloop_fcpreq * ··· 327 323 } 328 324 329 325 static int 330 - fcloop_ls_req(struct nvme_fc_local_port *localport, 326 + fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, 331 327 struct nvme_fc_remote_port *remoteport, 332 328 struct nvmefc_ls_req *lsreq) 333 329 { ··· 348 344 } 349 345 350 346 tls_req->status = 0; 351 - ret = nvmet_fc_rcv_ls_req(rport->targetport, &tls_req->tgt_ls_req, 352 - lsreq->rqstaddr, lsreq->rqstlen); 347 + ret = nvmet_fc_rcv_ls_req(rport->targetport, rport, 348 + &tls_req->ls_rsp, 349 + lsreq->rqstaddr, lsreq->rqstlen); 353 350 354 351 return ret; 355 352 } 356 353 357 354 static int 358 - fcloop_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, 359 - struct nvmefc_tgt_ls_req *tgt_lsreq) 355 + fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, 356 + struct nvmefc_ls_rsp *lsrsp) 360 357 { 361 - struct fcloop_lsreq *tls_req = tgt_ls_req_to_lsreq(tgt_lsreq); 358 + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); 362 359 struct nvmefc_ls_req *lsreq = tls_req->lsreq; 363 360 struct fcloop_tport *tport = targetport->private; 364 361 struct nvme_fc_remote_port *remoteport = tport->remoteport; 365 362 struct fcloop_rport *rport; 366 363 367 - memcpy(lsreq->rspaddr, tgt_lsreq->rspbuf, 368 - ((lsreq->rsplen < tgt_lsreq->rsplen) ? 369 - lsreq->rsplen : tgt_lsreq->rsplen)); 364 + memcpy(lsreq->rspaddr, lsrsp->rspbuf, 365 + ((lsreq->rsplen < lsrsp->rsplen) ? 366 + lsreq->rsplen : lsrsp->rsplen)); 370 367 371 - tgt_lsreq->done(tgt_lsreq); 368 + lsrsp->done(lsrsp); 372 369 373 370 if (remoteport) { 374 371 rport = remoteport->private; ··· 380 375 } 381 376 382 377 return 0; 378 + } 379 + 380 + static void 381 + fcloop_tport_lsrqst_work(struct work_struct *work) 382 + { 383 + struct fcloop_tport *tport = 384 + container_of(work, struct fcloop_tport, ls_work); 385 + struct fcloop_lsreq *tls_req; 386 + 387 + spin_lock(&tport->lock); 388 + for (;;) { 389 + tls_req = list_first_entry_or_null(&tport->ls_list, 390 + struct fcloop_lsreq, ls_list); 391 + if (!tls_req) 392 + break; 393 + 394 + list_del(&tls_req->ls_list); 395 + spin_unlock(&tport->lock); 396 + 397 + tls_req->lsreq->done(tls_req->lsreq, tls_req->status); 398 + /* 399 + * callee may free memory containing tls_req. 400 + * do not reference lsreq after this. 401 + */ 402 + 403 + spin_lock(&tport->lock); 404 + } 405 + spin_unlock(&tport->lock); 406 + } 407 + 408 + static int 409 + fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, 410 + struct nvmefc_ls_req *lsreq) 411 + { 412 + struct fcloop_lsreq *tls_req = lsreq->private; 413 + struct fcloop_tport *tport = targetport->private; 414 + int ret = 0; 415 + 416 + /* 417 + * hosthandle should be the dst.rport value. 418 + * hosthandle ignored as fcloop currently is 419 + * 1:1 tgtport vs remoteport 420 + */ 421 + tls_req->lsreq = lsreq; 422 + INIT_LIST_HEAD(&tls_req->ls_list); 423 + 424 + if (!tport->remoteport) { 425 + tls_req->status = -ECONNREFUSED; 426 + spin_lock(&tport->lock); 427 + list_add_tail(&tport->ls_list, &tls_req->ls_list); 428 + spin_unlock(&tport->lock); 429 + schedule_work(&tport->ls_work); 430 + return ret; 431 + } 432 + 433 + tls_req->status = 0; 434 + ret = nvme_fc_rcv_ls_req(tport->remoteport, &tls_req->ls_rsp, 435 + lsreq->rqstaddr, lsreq->rqstlen); 436 + 437 + return ret; 438 + } 439 + 440 + static int 441 + fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, 442 + struct nvme_fc_remote_port *remoteport, 443 + struct nvmefc_ls_rsp *lsrsp) 444 + { 445 + struct fcloop_lsreq *tls_req = ls_rsp_to_lsreq(lsrsp); 446 + struct nvmefc_ls_req *lsreq = tls_req->lsreq; 447 + struct fcloop_rport *rport = remoteport->private; 448 + struct nvmet_fc_target_port *targetport = rport->targetport; 449 + struct fcloop_tport *tport; 450 + 451 + memcpy(lsreq->rspaddr, lsrsp->rspbuf, 452 + ((lsreq->rsplen < lsrsp->rsplen) ? 453 + lsreq->rsplen : lsrsp->rsplen)); 454 + lsrsp->done(lsrsp); 455 + 456 + if (targetport) { 457 + tport = targetport->private; 458 + spin_lock(&tport->lock); 459 + list_add_tail(&tport->ls_list, &tls_req->ls_list); 460 + spin_unlock(&tport->lock); 461 + schedule_work(&tport->ls_work); 462 + } 463 + 464 + return 0; 465 + } 466 + 467 + static void 468 + fcloop_t2h_host_release(void *hosthandle) 469 + { 470 + /* host handle ignored for now */ 383 471 } 384 472 385 473 /* ··· 860 762 } 861 763 862 764 static void 863 - fcloop_ls_abort(struct nvme_fc_local_port *localport, 765 + fcloop_h2t_ls_abort(struct nvme_fc_local_port *localport, 864 766 struct nvme_fc_remote_port *remoteport, 865 767 struct nvmefc_ls_req *lsreq) 768 + { 769 + } 770 + 771 + static void 772 + fcloop_t2h_ls_abort(struct nvmet_fc_target_port *targetport, 773 + void *hosthandle, struct nvmefc_ls_req *lsreq) 866 774 { 867 775 } 868 776 ··· 971 867 { 972 868 struct fcloop_tport *tport = targetport->private; 973 869 870 + flush_work(&tport->ls_work); 974 871 fcloop_nport_put(tport->nport); 975 872 } 976 873 ··· 984 879 .remoteport_delete = fcloop_remoteport_delete, 985 880 .create_queue = fcloop_create_queue, 986 881 .delete_queue = fcloop_delete_queue, 987 - .ls_req = fcloop_ls_req, 882 + .ls_req = fcloop_h2t_ls_req, 988 883 .fcp_io = fcloop_fcp_req, 989 - .ls_abort = fcloop_ls_abort, 884 + .ls_abort = fcloop_h2t_ls_abort, 990 885 .fcp_abort = fcloop_fcp_abort, 886 + .xmt_ls_rsp = fcloop_t2h_xmt_ls_rsp, 991 887 .max_hw_queues = FCLOOP_HW_QUEUES, 992 888 .max_sgl_segments = FCLOOP_SGL_SEGS, 993 889 .max_dif_sgl_segments = FCLOOP_SGL_SEGS, ··· 1002 896 1003 897 static struct nvmet_fc_target_template tgttemplate = { 1004 898 .targetport_delete = fcloop_targetport_delete, 1005 - .xmt_ls_rsp = fcloop_xmt_ls_rsp, 899 + .xmt_ls_rsp = fcloop_h2t_xmt_ls_rsp, 1006 900 .fcp_op = fcloop_fcp_op, 1007 901 .fcp_abort = fcloop_tgt_fcp_abort, 1008 902 .fcp_req_release = fcloop_fcp_req_release, 1009 903 .discovery_event = fcloop_tgt_discovery_evt, 904 + .ls_req = fcloop_t2h_ls_req, 905 + .ls_abort = fcloop_t2h_ls_abort, 906 + .host_release = fcloop_t2h_host_release, 1010 907 .max_hw_queues = FCLOOP_HW_QUEUES, 1011 908 .max_sgl_segments = FCLOOP_SGL_SEGS, 1012 909 .max_dif_sgl_segments = FCLOOP_SGL_SEGS, ··· 1018 909 .target_features = 0, 1019 910 /* sizes of additional private data for data structures */ 1020 911 .target_priv_sz = sizeof(struct fcloop_tport), 912 + .lsrqst_priv_sz = sizeof(struct fcloop_lsreq), 1021 913 }; 1022 914 1023 915 static ssize_t ··· 1368 1258 tport->nport = nport; 1369 1259 tport->lport = nport->lport; 1370 1260 nport->tport = tport; 1261 + spin_lock_init(&tport->lock); 1262 + INIT_WORK(&tport->ls_work, fcloop_tport_lsrqst_work); 1263 + INIT_LIST_HEAD(&tport->ls_list); 1371 1264 1372 1265 return count; 1373 1266 }

+114 -4

drivers/nvme/target/io-cmd-bdev.c

··· 47 47 id->nows = to0based(ql->io_opt / ql->logical_block_size); 48 48 } 49 49 50 + static void nvmet_bdev_ns_enable_integrity(struct nvmet_ns *ns) 51 + { 52 + struct blk_integrity *bi = bdev_get_integrity(ns->bdev); 53 + 54 + if (bi) { 55 + ns->metadata_size = bi->tuple_size; 56 + if (bi->profile == &t10_pi_type1_crc) 57 + ns->pi_type = NVME_NS_DPS_PI_TYPE1; 58 + else if (bi->profile == &t10_pi_type3_crc) 59 + ns->pi_type = NVME_NS_DPS_PI_TYPE3; 60 + else 61 + /* Unsupported metadata type */ 62 + ns->metadata_size = 0; 63 + } 64 + } 65 + 50 66 int nvmet_bdev_ns_enable(struct nvmet_ns *ns) 51 67 { 52 68 int ret; ··· 80 64 } 81 65 ns->size = i_size_read(ns->bdev->bd_inode); 82 66 ns->blksize_shift = blksize_bits(bdev_logical_block_size(ns->bdev)); 67 + 68 + ns->pi_type = 0; 69 + ns->metadata_size = 0; 70 + if (IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY_T10)) 71 + nvmet_bdev_ns_enable_integrity(ns); 72 + 83 73 return 0; 84 74 } 85 75 ··· 95 73 blkdev_put(ns->bdev, FMODE_WRITE | FMODE_READ); 96 74 ns->bdev = NULL; 97 75 } 76 + } 77 + 78 + void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns) 79 + { 80 + ns->size = i_size_read(ns->bdev->bd_inode); 98 81 } 99 82 100 83 static u16 blk_to_nvme_status(struct nvmet_req *req, blk_status_t blk_sts) ··· 169 142 bio_put(bio); 170 143 } 171 144 145 + #ifdef CONFIG_BLK_DEV_INTEGRITY 146 + static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 147 + struct sg_mapping_iter *miter) 148 + { 149 + struct blk_integrity *bi; 150 + struct bio_integrity_payload *bip; 151 + struct block_device *bdev = req->ns->bdev; 152 + int rc; 153 + size_t resid, len; 154 + 155 + bi = bdev_get_integrity(bdev); 156 + if (unlikely(!bi)) { 157 + pr_err("Unable to locate bio_integrity\n"); 158 + return -ENODEV; 159 + } 160 + 161 + bip = bio_integrity_alloc(bio, GFP_NOIO, 162 + min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES)); 163 + if (IS_ERR(bip)) { 164 + pr_err("Unable to allocate bio_integrity_payload\n"); 165 + return PTR_ERR(bip); 166 + } 167 + 168 + bip->bip_iter.bi_size = bio_integrity_bytes(bi, bio_sectors(bio)); 169 + /* virtual start sector must be in integrity interval units */ 170 + bip_set_seed(bip, bio->bi_iter.bi_sector >> 171 + (bi->interval_exp - SECTOR_SHIFT)); 172 + 173 + resid = bip->bip_iter.bi_size; 174 + while (resid > 0 && sg_miter_next(miter)) { 175 + len = min_t(size_t, miter->length, resid); 176 + rc = bio_integrity_add_page(bio, miter->page, len, 177 + offset_in_page(miter->addr)); 178 + if (unlikely(rc != len)) { 179 + pr_err("bio_integrity_add_page() failed; %d\n", rc); 180 + sg_miter_stop(miter); 181 + return -ENOMEM; 182 + } 183 + 184 + resid -= len; 185 + if (len < miter->length) 186 + miter->consumed -= miter->length - len; 187 + } 188 + sg_miter_stop(miter); 189 + 190 + return 0; 191 + } 192 + #else 193 + static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio, 194 + struct sg_mapping_iter *miter) 195 + { 196 + return -EINVAL; 197 + } 198 + #endif /* CONFIG_BLK_DEV_INTEGRITY */ 199 + 172 200 static void nvmet_bdev_execute_rw(struct nvmet_req *req) 173 201 { 174 202 int sg_cnt = req->sg_cnt; ··· 231 149 struct scatterlist *sg; 232 150 struct blk_plug plug; 233 151 sector_t sector; 234 - int op, i; 152 + int op, i, rc; 153 + struct sg_mapping_iter prot_miter; 154 + unsigned int iter_flags; 155 + unsigned int total_len = nvmet_rw_data_len(req) + req->metadata_len; 235 156 236 - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) 157 + if (!nvmet_check_transfer_len(req, total_len)) 237 158 return; 238 159 239 160 if (!req->sg_cnt) { ··· 248 163 op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE; 249 164 if (req->cmd->rw.control & cpu_to_le16(NVME_RW_FUA)) 250 165 op |= REQ_FUA; 166 + iter_flags = SG_MITER_TO_SG; 251 167 } else { 252 168 op = REQ_OP_READ; 169 + iter_flags = SG_MITER_FROM_SG; 253 170 } 254 171 255 172 if (is_pci_p2pdma_page(sg_page(req->sg))) ··· 273 186 bio->bi_opf = op; 274 187 275 188 blk_start_plug(&plug); 189 + if (req->metadata_len) 190 + sg_miter_start(&prot_miter, req->metadata_sg, 191 + req->metadata_sg_cnt, iter_flags); 192 + 276 193 for_each_sg(req->sg, sg, req->sg_cnt, i) { 277 194 while (bio_add_page(bio, sg_page(sg), sg->length, sg->offset) 278 195 != sg->length) { 279 196 struct bio *prev = bio; 197 + 198 + if (req->metadata_len) { 199 + rc = nvmet_bdev_alloc_bip(req, bio, 200 + &prot_miter); 201 + if (unlikely(rc)) { 202 + bio_io_error(bio); 203 + return; 204 + } 205 + } 280 206 281 207 bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES)); 282 208 bio_set_dev(bio, req->ns->bdev); ··· 304 204 sg_cnt--; 305 205 } 306 206 207 + if (req->metadata_len) { 208 + rc = nvmet_bdev_alloc_bip(req, bio, &prot_miter); 209 + if (unlikely(rc)) { 210 + bio_io_error(bio); 211 + return; 212 + } 213 + } 214 + 307 215 submit_bio(bio); 308 216 blk_finish_plug(&plug); 309 217 } ··· 320 212 { 321 213 struct bio *bio = &req->b.inline_bio; 322 214 323 - if (!nvmet_check_data_len(req, 0)) 215 + if (!nvmet_check_transfer_len(req, 0)) 324 216 return; 325 217 326 218 bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec)); ··· 412 304 sector_t nr_sector; 413 305 int ret; 414 306 415 - if (!nvmet_check_data_len(req, 0)) 307 + if (!nvmet_check_transfer_len(req, 0)) 416 308 return; 417 309 418 310 sector = le64_to_cpu(write_zeroes->slba) << ··· 439 331 case nvme_cmd_read: 440 332 case nvme_cmd_write: 441 333 req->execute = nvmet_bdev_execute_rw; 334 + if (req->sq->ctrl->pi_support && nvmet_ns_has_pi(req->ns)) 335 + req->metadata_len = nvmet_rw_metadata_len(req); 442 336 return 0; 443 337 case nvme_cmd_flush: 444 338 req->execute = nvmet_bdev_execute_flush;

+16 -7

drivers/nvme/target/io-cmd-file.c

··· 13 13 #define NVMET_MAX_MPOOL_BVEC 16 14 14 #define NVMET_MIN_MPOOL_OBJ 16 15 15 16 + int nvmet_file_ns_revalidate(struct nvmet_ns *ns) 17 + { 18 + struct kstat stat; 19 + int ret; 20 + 21 + ret = vfs_getattr(&ns->file->f_path, &stat, STATX_SIZE, 22 + AT_STATX_FORCE_SYNC); 23 + if (!ret) 24 + ns->size = stat.size; 25 + return ret; 26 + } 27 + 16 28 void nvmet_file_ns_disable(struct nvmet_ns *ns) 17 29 { 18 30 if (ns->file) { ··· 42 30 int nvmet_file_ns_enable(struct nvmet_ns *ns) 43 31 { 44 32 int flags = O_RDWR | O_LARGEFILE; 45 - struct kstat stat; 46 33 int ret; 47 34 48 35 if (!ns->buffered_io) ··· 54 43 return PTR_ERR(ns->file); 55 44 } 56 45 57 - ret = vfs_getattr(&ns->file->f_path, 58 - &stat, STATX_SIZE, AT_STATX_FORCE_SYNC); 46 + ret = nvmet_file_ns_revalidate(ns); 59 47 if (ret) 60 48 goto err; 61 49 62 - ns->size = stat.size; 63 50 /* 64 51 * i_blkbits can be greater than the universally accepted upper bound, 65 52 * so make sure we export a sane namespace lba_shift. ··· 241 232 { 242 233 ssize_t nr_bvec = req->sg_cnt; 243 234 244 - if (!nvmet_check_data_len(req, nvmet_rw_len(req))) 235 + if (!nvmet_check_transfer_len(req, nvmet_rw_data_len(req))) 245 236 return; 246 237 247 238 if (!req->sg_cnt || !nr_bvec) { ··· 285 276 286 277 static void nvmet_file_execute_flush(struct nvmet_req *req) 287 278 { 288 - if (!nvmet_check_data_len(req, 0)) 279 + if (!nvmet_check_transfer_len(req, 0)) 289 280 return; 290 281 INIT_WORK(&req->f.work, nvmet_file_flush_work); 291 282 schedule_work(&req->f.work); ··· 375 366 376 367 static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) 377 368 { 378 - if (!nvmet_check_data_len(req, 0)) 369 + if (!nvmet_check_transfer_len(req, 0)) 379 370 return; 380 371 INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); 381 372 schedule_work(&req->f.work);

+32 -4

drivers/nvme/target/nvmet.h

··· 19 19 #include <linux/rcupdate.h> 20 20 #include <linux/blkdev.h> 21 21 #include <linux/radix-tree.h> 22 + #include <linux/t10-pi.h> 22 23 23 24 #define NVMET_ASYNC_EVENTS 4 24 25 #define NVMET_ERROR_LOG_SLOTS 128 ··· 78 77 79 78 int use_p2pmem; 80 79 struct pci_dev *p2p_dev; 80 + int pi_type; 81 + int metadata_size; 81 82 }; 82 83 83 84 static inline struct nvmet_ns *to_nvmet_ns(struct config_item *item) ··· 145 142 bool enabled; 146 143 int inline_data_size; 147 144 const struct nvmet_fabrics_ops *tr_ops; 145 + bool pi_enable; 148 146 }; 149 147 150 148 static inline struct nvmet_port *to_nvmet_port(struct config_item *item) ··· 205 201 spinlock_t error_lock; 206 202 u64 err_counter; 207 203 struct nvme_error_slot slots[NVMET_ERROR_LOG_SLOTS]; 204 + bool pi_support; 208 205 }; 209 206 210 207 struct nvmet_subsys_model { ··· 235 230 u64 ver; 236 231 u64 serial; 237 232 char *subsysnqn; 233 + bool pi_support; 238 234 239 235 struct config_group group; 240 236 ··· 287 281 unsigned int type; 288 282 unsigned int msdbd; 289 283 bool has_keyed_sgls : 1; 284 + bool metadata_support : 1; 290 285 void (*queue_response)(struct nvmet_req *req); 291 286 int (*add_port)(struct nvmet_port *port); 292 287 void (*remove_port)(struct nvmet_port *port); ··· 309 302 struct nvmet_cq *cq; 310 303 struct nvmet_ns *ns; 311 304 struct scatterlist *sg; 305 + struct scatterlist *metadata_sg; 312 306 struct bio_vec inline_bvec[NVMET_MAX_INLINE_BIOVEC]; 313 307 union { 314 308 struct { ··· 323 315 } f; 324 316 }; 325 317 int sg_cnt; 318 + int metadata_sg_cnt; 326 319 /* data length as parsed from the SGL descriptor: */ 327 320 size_t transfer_len; 321 + size_t metadata_len; 328 322 329 323 struct nvmet_port *port; 330 324 ··· 394 384 bool nvmet_req_init(struct nvmet_req *req, struct nvmet_cq *cq, 395 385 struct nvmet_sq *sq, const struct nvmet_fabrics_ops *ops); 396 386 void nvmet_req_uninit(struct nvmet_req *req); 397 - bool nvmet_check_data_len(struct nvmet_req *req, size_t data_len); 387 + bool nvmet_check_transfer_len(struct nvmet_req *req, size_t len); 398 388 bool nvmet_check_data_len_lte(struct nvmet_req *req, size_t data_len); 399 389 void nvmet_req_complete(struct nvmet_req *req, u16 status); 400 - int nvmet_req_alloc_sgl(struct nvmet_req *req); 401 - void nvmet_req_free_sgl(struct nvmet_req *req); 390 + int nvmet_req_alloc_sgls(struct nvmet_req *req); 391 + void nvmet_req_free_sgls(struct nvmet_req *req); 402 392 403 393 void nvmet_execute_keep_alive(struct nvmet_req *req); 404 394 ··· 508 498 u16 nvmet_bdev_flush(struct nvmet_req *req); 509 499 u16 nvmet_file_flush(struct nvmet_req *req); 510 500 void nvmet_ns_changed(struct nvmet_subsys *subsys, u32 nsid); 501 + void nvmet_bdev_ns_revalidate(struct nvmet_ns *ns); 502 + int nvmet_file_ns_revalidate(struct nvmet_ns *ns); 503 + void nvmet_ns_revalidate(struct nvmet_ns *ns); 511 504 512 - static inline u32 nvmet_rw_len(struct nvmet_req *req) 505 + static inline u32 nvmet_rw_data_len(struct nvmet_req *req) 513 506 { 514 507 return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) << 515 508 req->ns->blksize_shift; 509 + } 510 + 511 + static inline u32 nvmet_rw_metadata_len(struct nvmet_req *req) 512 + { 513 + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) 514 + return 0; 515 + return ((u32)le16_to_cpu(req->cmd->rw.length) + 1) * 516 + req->ns->metadata_size; 516 517 } 517 518 518 519 static inline u32 nvmet_dsm_len(struct nvmet_req *req) ··· 538 517 static inline __le16 to0based(u32 a) 539 518 { 540 519 return cpu_to_le16(max(1U, min(1U << 16, a)) - 1); 520 + } 521 + 522 + static inline bool nvmet_ns_has_pi(struct nvmet_ns *ns) 523 + { 524 + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) 525 + return false; 526 + return ns->pi_type && ns->metadata_size == sizeof(struct t10_pi_tuple); 541 527 } 542 528 543 529 #endif /* _NVMET_H */

+355 -65

drivers/nvme/target/rdma.c

··· 33 33 34 34 /* Assume mpsmin == device_page_size == 4KB */ 35 35 #define NVMET_RDMA_MAX_MDTS 8 36 + #define NVMET_RDMA_MAX_METADATA_MDTS 5 37 + 38 + struct nvmet_rdma_srq; 36 39 37 40 struct nvmet_rdma_cmd { 38 41 struct ib_sge sge[NVMET_RDMA_MAX_INLINE_SGE + 1]; ··· 44 41 struct scatterlist inline_sg[NVMET_RDMA_MAX_INLINE_SGE]; 45 42 struct nvme_command *nvme_cmd; 46 43 struct nvmet_rdma_queue *queue; 44 + struct nvmet_rdma_srq *nsrq; 47 45 }; 48 46 49 47 enum { ··· 61 57 struct nvmet_rdma_queue *queue; 62 58 63 59 struct ib_cqe read_cqe; 60 + struct ib_cqe write_cqe; 64 61 struct rdma_rw_ctx rw; 65 62 66 63 struct nvmet_req req; ··· 88 83 struct ib_cq *cq; 89 84 atomic_t sq_wr_avail; 90 85 struct nvmet_rdma_device *dev; 86 + struct nvmet_rdma_srq *nsrq; 91 87 spinlock_t state_lock; 92 88 enum nvmet_rdma_queue_state state; 93 89 struct nvmet_cq nvme_cq; ··· 106 100 107 101 int idx; 108 102 int host_qid; 103 + int comp_vector; 109 104 int recv_queue_size; 110 105 int send_queue_size; 111 106 ··· 120 113 struct delayed_work repair_work; 121 114 }; 122 115 116 + struct nvmet_rdma_srq { 117 + struct ib_srq *srq; 118 + struct nvmet_rdma_cmd *cmds; 119 + struct nvmet_rdma_device *ndev; 120 + }; 121 + 123 122 struct nvmet_rdma_device { 124 123 struct ib_device *device; 125 124 struct ib_pd *pd; 126 - struct ib_srq *srq; 127 - struct nvmet_rdma_cmd *srq_cmds; 125 + struct nvmet_rdma_srq **srqs; 126 + int srq_count; 128 127 size_t srq_size; 129 128 struct kref ref; 130 129 struct list_head entry; ··· 141 128 static bool nvmet_rdma_use_srq; 142 129 module_param_named(use_srq, nvmet_rdma_use_srq, bool, 0444); 143 130 MODULE_PARM_DESC(use_srq, "Use shared receive queue."); 131 + 132 + static int srq_size_set(const char *val, const struct kernel_param *kp); 133 + static const struct kernel_param_ops srq_size_ops = { 134 + .set = srq_size_set, 135 + .get = param_get_int, 136 + }; 137 + 138 + static int nvmet_rdma_srq_size = 1024; 139 + module_param_cb(srq_size, &srq_size_ops, &nvmet_rdma_srq_size, 0644); 140 + MODULE_PARM_DESC(srq_size, "set Shared Receive Queue (SRQ) size, should >= 256 (default: 1024)"); 144 141 145 142 static DEFINE_IDA(nvmet_rdma_queue_ida); 146 143 static LIST_HEAD(nvmet_rdma_queue_list); ··· 163 140 static void nvmet_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc); 164 141 static void nvmet_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); 165 142 static void nvmet_rdma_read_data_done(struct ib_cq *cq, struct ib_wc *wc); 143 + static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc); 166 144 static void nvmet_rdma_qp_event(struct ib_event *event, void *priv); 167 145 static void nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue); 168 146 static void nvmet_rdma_free_rsp(struct nvmet_rdma_device *ndev, ··· 172 148 struct nvmet_rdma_rsp *r); 173 149 174 150 static const struct nvmet_fabrics_ops nvmet_rdma_ops; 151 + 152 + static int srq_size_set(const char *val, const struct kernel_param *kp) 153 + { 154 + int n = 0, ret; 155 + 156 + ret = kstrtoint(val, 10, &n); 157 + if (ret != 0 || n < 256) 158 + return -EINVAL; 159 + 160 + return param_set_int(val, kp); 161 + } 175 162 176 163 static int num_pages(int len) 177 164 { ··· 426 391 427 392 /* Data In / RDMA READ */ 428 393 r->read_cqe.done = nvmet_rdma_read_data_done; 394 + /* Data Out / RDMA WRITE */ 395 + r->write_cqe.done = nvmet_rdma_write_data_done; 396 + 429 397 return 0; 430 398 431 399 out_free_rsp: ··· 504 466 cmd->sge[0].addr, cmd->sge[0].length, 505 467 DMA_FROM_DEVICE); 506 468 507 - if (ndev->srq) 508 - ret = ib_post_srq_recv(ndev->srq, &cmd->wr, NULL); 469 + if (cmd->nsrq) 470 + ret = ib_post_srq_recv(cmd->nsrq->srq, &cmd->wr, NULL); 509 471 else 510 472 ret = ib_post_recv(cmd->queue->qp, &cmd->wr, NULL); 511 473 ··· 538 500 spin_unlock(&queue->rsp_wr_wait_lock); 539 501 } 540 502 503 + static u16 nvmet_rdma_check_pi_status(struct ib_mr *sig_mr) 504 + { 505 + struct ib_mr_status mr_status; 506 + int ret; 507 + u16 status = 0; 508 + 509 + ret = ib_check_mr_status(sig_mr, IB_MR_CHECK_SIG_STATUS, &mr_status); 510 + if (ret) { 511 + pr_err("ib_check_mr_status failed, ret %d\n", ret); 512 + return NVME_SC_INVALID_PI; 513 + } 514 + 515 + if (mr_status.fail_status & IB_MR_CHECK_SIG_STATUS) { 516 + switch (mr_status.sig_err.err_type) { 517 + case IB_SIG_BAD_GUARD: 518 + status = NVME_SC_GUARD_CHECK; 519 + break; 520 + case IB_SIG_BAD_REFTAG: 521 + status = NVME_SC_REFTAG_CHECK; 522 + break; 523 + case IB_SIG_BAD_APPTAG: 524 + status = NVME_SC_APPTAG_CHECK; 525 + break; 526 + } 527 + pr_err("PI error found type %d expected 0x%x vs actual 0x%x\n", 528 + mr_status.sig_err.err_type, 529 + mr_status.sig_err.expected, 530 + mr_status.sig_err.actual); 531 + } 532 + 533 + return status; 534 + } 535 + 536 + static void nvmet_rdma_set_sig_domain(struct blk_integrity *bi, 537 + struct nvme_command *cmd, struct ib_sig_domain *domain, 538 + u16 control, u8 pi_type) 539 + { 540 + domain->sig_type = IB_SIG_TYPE_T10_DIF; 541 + domain->sig.dif.bg_type = IB_T10DIF_CRC; 542 + domain->sig.dif.pi_interval = 1 << bi->interval_exp; 543 + domain->sig.dif.ref_tag = le32_to_cpu(cmd->rw.reftag); 544 + if (control & NVME_RW_PRINFO_PRCHK_REF) 545 + domain->sig.dif.ref_remap = true; 546 + 547 + domain->sig.dif.app_tag = le16_to_cpu(cmd->rw.apptag); 548 + domain->sig.dif.apptag_check_mask = le16_to_cpu(cmd->rw.appmask); 549 + domain->sig.dif.app_escape = true; 550 + if (pi_type == NVME_NS_DPS_PI_TYPE3) 551 + domain->sig.dif.ref_escape = true; 552 + } 553 + 554 + static void nvmet_rdma_set_sig_attrs(struct nvmet_req *req, 555 + struct ib_sig_attrs *sig_attrs) 556 + { 557 + struct nvme_command *cmd = req->cmd; 558 + u16 control = le16_to_cpu(cmd->rw.control); 559 + u8 pi_type = req->ns->pi_type; 560 + struct blk_integrity *bi; 561 + 562 + bi = bdev_get_integrity(req->ns->bdev); 563 + 564 + memset(sig_attrs, 0, sizeof(*sig_attrs)); 565 + 566 + if (control & NVME_RW_PRINFO_PRACT) { 567 + /* for WRITE_INSERT/READ_STRIP no wire domain */ 568 + sig_attrs->wire.sig_type = IB_SIG_TYPE_NONE; 569 + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 570 + pi_type); 571 + /* Clear the PRACT bit since HCA will generate/verify the PI */ 572 + control &= ~NVME_RW_PRINFO_PRACT; 573 + cmd->rw.control = cpu_to_le16(control); 574 + /* PI is added by the HW */ 575 + req->transfer_len += req->metadata_len; 576 + } else { 577 + /* for WRITE_PASS/READ_PASS both wire/memory domains exist */ 578 + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->wire, control, 579 + pi_type); 580 + nvmet_rdma_set_sig_domain(bi, cmd, &sig_attrs->mem, control, 581 + pi_type); 582 + } 583 + 584 + if (control & NVME_RW_PRINFO_PRCHK_REF) 585 + sig_attrs->check_mask |= IB_SIG_CHECK_REFTAG; 586 + if (control & NVME_RW_PRINFO_PRCHK_GUARD) 587 + sig_attrs->check_mask |= IB_SIG_CHECK_GUARD; 588 + if (control & NVME_RW_PRINFO_PRCHK_APP) 589 + sig_attrs->check_mask |= IB_SIG_CHECK_APPTAG; 590 + } 591 + 592 + static int nvmet_rdma_rw_ctx_init(struct nvmet_rdma_rsp *rsp, u64 addr, u32 key, 593 + struct ib_sig_attrs *sig_attrs) 594 + { 595 + struct rdma_cm_id *cm_id = rsp->queue->cm_id; 596 + struct nvmet_req *req = &rsp->req; 597 + int ret; 598 + 599 + if (req->metadata_len) 600 + ret = rdma_rw_ctx_signature_init(&rsp->rw, cm_id->qp, 601 + cm_id->port_num, req->sg, req->sg_cnt, 602 + req->metadata_sg, req->metadata_sg_cnt, sig_attrs, 603 + addr, key, nvmet_data_dir(req)); 604 + else 605 + ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, 606 + req->sg, req->sg_cnt, 0, addr, key, 607 + nvmet_data_dir(req)); 608 + 609 + return ret; 610 + } 611 + 612 + static void nvmet_rdma_rw_ctx_destroy(struct nvmet_rdma_rsp *rsp) 613 + { 614 + struct rdma_cm_id *cm_id = rsp->queue->cm_id; 615 + struct nvmet_req *req = &rsp->req; 616 + 617 + if (req->metadata_len) 618 + rdma_rw_ctx_destroy_signature(&rsp->rw, cm_id->qp, 619 + cm_id->port_num, req->sg, req->sg_cnt, 620 + req->metadata_sg, req->metadata_sg_cnt, 621 + nvmet_data_dir(req)); 622 + else 623 + rdma_rw_ctx_destroy(&rsp->rw, cm_id->qp, cm_id->port_num, 624 + req->sg, req->sg_cnt, nvmet_data_dir(req)); 625 + } 541 626 542 627 static void nvmet_rdma_release_rsp(struct nvmet_rdma_rsp *rsp) 543 628 { ··· 668 507 669 508 atomic_add(1 + rsp->n_rdma, &queue->sq_wr_avail); 670 509 671 - if (rsp->n_rdma) { 672 - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, 673 - queue->cm_id->port_num, rsp->req.sg, 674 - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); 675 - } 510 + if (rsp->n_rdma) 511 + nvmet_rdma_rw_ctx_destroy(rsp); 676 512 677 513 if (rsp->req.sg != rsp->cmd->inline_sg) 678 - nvmet_req_free_sgl(&rsp->req); 514 + nvmet_req_free_sgls(&rsp->req); 679 515 680 516 if (unlikely(!list_empty_careful(&queue->rsp_wr_wait_list))) 681 517 nvmet_rdma_process_wr_wait_list(queue); ··· 724 566 rsp->send_wr.opcode = IB_WR_SEND; 725 567 } 726 568 727 - if (nvmet_rdma_need_data_out(rsp)) 728 - first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, 729 - cm_id->port_num, NULL, &rsp->send_wr); 730 - else 569 + if (nvmet_rdma_need_data_out(rsp)) { 570 + if (rsp->req.metadata_len) 571 + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, 572 + cm_id->port_num, &rsp->write_cqe, NULL); 573 + else 574 + first_wr = rdma_rw_ctx_wrs(&rsp->rw, cm_id->qp, 575 + cm_id->port_num, NULL, &rsp->send_wr); 576 + } else { 731 577 first_wr = &rsp->send_wr; 578 + } 732 579 733 580 nvmet_rdma_post_recv(rsp->queue->dev, rsp->cmd); 734 581 ··· 752 589 struct nvmet_rdma_rsp *rsp = 753 590 container_of(wc->wr_cqe, struct nvmet_rdma_rsp, read_cqe); 754 591 struct nvmet_rdma_queue *queue = cq->cq_context; 592 + u16 status = 0; 755 593 756 594 WARN_ON(rsp->n_rdma <= 0); 757 595 atomic_add(rsp->n_rdma, &queue->sq_wr_avail); 758 - rdma_rw_ctx_destroy(&rsp->rw, queue->qp, 759 - queue->cm_id->port_num, rsp->req.sg, 760 - rsp->req.sg_cnt, nvmet_data_dir(&rsp->req)); 761 596 rsp->n_rdma = 0; 762 597 763 598 if (unlikely(wc->status != IB_WC_SUCCESS)) { 599 + nvmet_rdma_rw_ctx_destroy(rsp); 764 600 nvmet_req_uninit(&rsp->req); 765 601 nvmet_rdma_release_rsp(rsp); 766 602 if (wc->status != IB_WC_WR_FLUSH_ERR) { ··· 770 608 return; 771 609 } 772 610 773 - rsp->req.execute(&rsp->req); 611 + if (rsp->req.metadata_len) 612 + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); 613 + nvmet_rdma_rw_ctx_destroy(rsp); 614 + 615 + if (unlikely(status)) 616 + nvmet_req_complete(&rsp->req, status); 617 + else 618 + rsp->req.execute(&rsp->req); 619 + } 620 + 621 + static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc) 622 + { 623 + struct nvmet_rdma_rsp *rsp = 624 + container_of(wc->wr_cqe, struct nvmet_rdma_rsp, write_cqe); 625 + struct nvmet_rdma_queue *queue = cq->cq_context; 626 + struct rdma_cm_id *cm_id = rsp->queue->cm_id; 627 + u16 status; 628 + 629 + if (!IS_ENABLED(CONFIG_BLK_DEV_INTEGRITY)) 630 + return; 631 + 632 + WARN_ON(rsp->n_rdma <= 0); 633 + atomic_add(rsp->n_rdma, &queue->sq_wr_avail); 634 + rsp->n_rdma = 0; 635 + 636 + if (unlikely(wc->status != IB_WC_SUCCESS)) { 637 + nvmet_rdma_rw_ctx_destroy(rsp); 638 + nvmet_req_uninit(&rsp->req); 639 + nvmet_rdma_release_rsp(rsp); 640 + if (wc->status != IB_WC_WR_FLUSH_ERR) { 641 + pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n", 642 + wc->wr_cqe, ib_wc_status_msg(wc->status), 643 + wc->status); 644 + nvmet_rdma_error_comp(queue); 645 + } 646 + return; 647 + } 648 + 649 + /* 650 + * Upon RDMA completion check the signature status 651 + * - if succeeded send good NVMe response 652 + * - if failed send bad NVMe response with appropriate error 653 + */ 654 + status = nvmet_rdma_check_pi_status(rsp->rw.reg->mr); 655 + if (unlikely(status)) 656 + rsp->req.cqe->status = cpu_to_le16(status << 1); 657 + nvmet_rdma_rw_ctx_destroy(rsp); 658 + 659 + if (unlikely(ib_post_send(cm_id->qp, &rsp->send_wr, NULL))) { 660 + pr_err("sending cmd response failed\n"); 661 + nvmet_rdma_release_rsp(rsp); 662 + } 774 663 } 775 664 776 665 static void nvmet_rdma_use_inline_sg(struct nvmet_rdma_rsp *rsp, u32 len, ··· 878 665 static u16 nvmet_rdma_map_sgl_keyed(struct nvmet_rdma_rsp *rsp, 879 666 struct nvme_keyed_sgl_desc *sgl, bool invalidate) 880 667 { 881 - struct rdma_cm_id *cm_id = rsp->queue->cm_id; 882 668 u64 addr = le64_to_cpu(sgl->addr); 883 669 u32 key = get_unaligned_le32(sgl->key); 670 + struct ib_sig_attrs sig_attrs; 884 671 int ret; 885 672 886 673 rsp->req.transfer_len = get_unaligned_le24(sgl->length); ··· 889 676 if (!rsp->req.transfer_len) 890 677 return 0; 891 678 892 - ret = nvmet_req_alloc_sgl(&rsp->req); 679 + if (rsp->req.metadata_len) 680 + nvmet_rdma_set_sig_attrs(&rsp->req, &sig_attrs); 681 + 682 + ret = nvmet_req_alloc_sgls(&rsp->req); 893 683 if (unlikely(ret < 0)) 894 684 goto error_out; 895 685 896 - ret = rdma_rw_ctx_init(&rsp->rw, cm_id->qp, cm_id->port_num, 897 - rsp->req.sg, rsp->req.sg_cnt, 0, addr, key, 898 - nvmet_data_dir(&rsp->req)); 686 + ret = nvmet_rdma_rw_ctx_init(rsp, addr, key, &sig_attrs); 899 687 if (unlikely(ret < 0)) 900 688 goto error_out; 901 689 rsp->n_rdma += ret; ··· 1059 845 nvmet_rdma_handle_command(queue, rsp); 1060 846 } 1061 847 1062 - static void nvmet_rdma_destroy_srq(struct nvmet_rdma_device *ndev) 848 + static void nvmet_rdma_destroy_srq(struct nvmet_rdma_srq *nsrq) 1063 849 { 1064 - if (!ndev->srq) 1065 - return; 850 + nvmet_rdma_free_cmds(nsrq->ndev, nsrq->cmds, nsrq->ndev->srq_size, 851 + false); 852 + ib_destroy_srq(nsrq->srq); 1066 853 1067 - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); 1068 - ib_destroy_srq(ndev->srq); 854 + kfree(nsrq); 1069 855 } 1070 856 1071 - static int nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) 857 + static void nvmet_rdma_destroy_srqs(struct nvmet_rdma_device *ndev) 858 + { 859 + int i; 860 + 861 + if (!ndev->srqs) 862 + return; 863 + 864 + for (i = 0; i < ndev->srq_count; i++) 865 + nvmet_rdma_destroy_srq(ndev->srqs[i]); 866 + 867 + kfree(ndev->srqs); 868 + } 869 + 870 + static struct nvmet_rdma_srq * 871 + nvmet_rdma_init_srq(struct nvmet_rdma_device *ndev) 1072 872 { 1073 873 struct ib_srq_init_attr srq_attr = { NULL, }; 874 + size_t srq_size = ndev->srq_size; 875 + struct nvmet_rdma_srq *nsrq; 1074 876 struct ib_srq *srq; 1075 - size_t srq_size; 1076 877 int ret, i; 1077 878 1078 - srq_size = 4095; /* XXX: tune */ 879 + nsrq = kzalloc(sizeof(*nsrq), GFP_KERNEL); 880 + if (!nsrq) 881 + return ERR_PTR(-ENOMEM); 1079 882 1080 883 srq_attr.attr.max_wr = srq_size; 1081 884 srq_attr.attr.max_sge = 1 + ndev->inline_page_count; ··· 1100 869 srq_attr.srq_type = IB_SRQT_BASIC; 1101 870 srq = ib_create_srq(ndev->pd, &srq_attr); 1102 871 if (IS_ERR(srq)) { 872 + ret = PTR_ERR(srq); 873 + goto out_free; 874 + } 875 + 876 + nsrq->cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); 877 + if (IS_ERR(nsrq->cmds)) { 878 + ret = PTR_ERR(nsrq->cmds); 879 + goto out_destroy_srq; 880 + } 881 + 882 + nsrq->srq = srq; 883 + nsrq->ndev = ndev; 884 + 885 + for (i = 0; i < srq_size; i++) { 886 + nsrq->cmds[i].nsrq = nsrq; 887 + ret = nvmet_rdma_post_recv(ndev, &nsrq->cmds[i]); 888 + if (ret) 889 + goto out_free_cmds; 890 + } 891 + 892 + return nsrq; 893 + 894 + out_free_cmds: 895 + nvmet_rdma_free_cmds(ndev, nsrq->cmds, srq_size, false); 896 + out_destroy_srq: 897 + ib_destroy_srq(srq); 898 + out_free: 899 + kfree(nsrq); 900 + return ERR_PTR(ret); 901 + } 902 + 903 + static int nvmet_rdma_init_srqs(struct nvmet_rdma_device *ndev) 904 + { 905 + int i, ret; 906 + 907 + if (!ndev->device->attrs.max_srq_wr || !ndev->device->attrs.max_srq) { 1103 908 /* 1104 909 * If SRQs aren't supported we just go ahead and use normal 1105 910 * non-shared receive queues. ··· 1144 877 return 0; 1145 878 } 1146 879 1147 - ndev->srq_cmds = nvmet_rdma_alloc_cmds(ndev, srq_size, false); 1148 - if (IS_ERR(ndev->srq_cmds)) { 1149 - ret = PTR_ERR(ndev->srq_cmds); 1150 - goto out_destroy_srq; 1151 - } 880 + ndev->srq_size = min(ndev->device->attrs.max_srq_wr, 881 + nvmet_rdma_srq_size); 882 + ndev->srq_count = min(ndev->device->num_comp_vectors, 883 + ndev->device->attrs.max_srq); 1152 884 1153 - ndev->srq = srq; 1154 - ndev->srq_size = srq_size; 885 + ndev->srqs = kcalloc(ndev->srq_count, sizeof(*ndev->srqs), GFP_KERNEL); 886 + if (!ndev->srqs) 887 + return -ENOMEM; 1155 888 1156 - for (i = 0; i < srq_size; i++) { 1157 - ret = nvmet_rdma_post_recv(ndev, &ndev->srq_cmds[i]); 1158 - if (ret) 1159 - goto out_free_cmds; 889 + for (i = 0; i < ndev->srq_count; i++) { 890 + ndev->srqs[i] = nvmet_rdma_init_srq(ndev); 891 + if (IS_ERR(ndev->srqs[i])) { 892 + ret = PTR_ERR(ndev->srqs[i]); 893 + goto err_srq; 894 + } 1160 895 } 1161 896 1162 897 return 0; 1163 898 1164 - out_free_cmds: 1165 - nvmet_rdma_free_cmds(ndev, ndev->srq_cmds, ndev->srq_size, false); 1166 - out_destroy_srq: 1167 - ib_destroy_srq(srq); 899 + err_srq: 900 + while (--i >= 0) 901 + nvmet_rdma_destroy_srq(ndev->srqs[i]); 902 + kfree(ndev->srqs); 1168 903 return ret; 1169 904 } 1170 905 ··· 1179 910 list_del(&ndev->entry); 1180 911 mutex_unlock(&device_list_mutex); 1181 912 1182 - nvmet_rdma_destroy_srq(ndev); 913 + nvmet_rdma_destroy_srqs(ndev); 1183 914 ib_dealloc_pd(ndev->pd); 1184 915 1185 916 kfree(ndev); ··· 1226 957 goto out_free_dev; 1227 958 1228 959 if (nvmet_rdma_use_srq) { 1229 - ret = nvmet_rdma_init_srq(ndev); 960 + ret = nvmet_rdma_init_srqs(ndev); 1230 961 if (ret) 1231 962 goto out_free_pd; 1232 963 } ··· 1250 981 { 1251 982 struct ib_qp_init_attr qp_attr; 1252 983 struct nvmet_rdma_device *ndev = queue->dev; 1253 - int comp_vector, nr_cqe, ret, i, factor; 1254 - 1255 - /* 1256 - * Spread the io queues across completion vectors, 1257 - * but still keep all admin queues on vector 0. 1258 - */ 1259 - comp_vector = !queue->host_qid ? 0 : 1260 - queue->idx % ndev->device->num_comp_vectors; 984 + int nr_cqe, ret, i, factor; 1261 985 1262 986 /* 1263 987 * Reserve CQ slots for RECV + RDMA_READ/RDMA_WRITE + RDMA_SEND. ··· 1258 996 nr_cqe = queue->recv_queue_size + 2 * queue->send_queue_size; 1259 997 1260 998 queue->cq = ib_alloc_cq(ndev->device, queue, 1261 - nr_cqe + 1, comp_vector, 999 + nr_cqe + 1, queue->comp_vector, 1262 1000 IB_POLL_WORKQUEUE); 1263 1001 if (IS_ERR(queue->cq)) { 1264 1002 ret = PTR_ERR(queue->cq); ··· 1282 1020 qp_attr.cap.max_send_sge = max(ndev->device->attrs.max_sge_rd, 1283 1021 ndev->device->attrs.max_send_sge); 1284 1022 1285 - if (ndev->srq) { 1286 - qp_attr.srq = ndev->srq; 1023 + if (queue->nsrq) { 1024 + qp_attr.srq = queue->nsrq->srq; 1287 1025 } else { 1288 1026 /* +1 for drain */ 1289 1027 qp_attr.cap.max_recv_wr = 1 + queue->recv_queue_size; 1290 1028 qp_attr.cap.max_recv_sge = 1 + ndev->inline_page_count; 1291 1029 } 1030 + 1031 + if (queue->port->pi_enable && queue->host_qid) 1032 + qp_attr.create_flags |= IB_QP_CREATE_INTEGRITY_EN; 1292 1033 1293 1034 ret = rdma_create_qp(queue->cm_id, ndev->pd, &qp_attr); 1294 1035 if (ret) { ··· 1306 1041 __func__, queue->cq->cqe, qp_attr.cap.max_send_sge, 1307 1042 qp_attr.cap.max_send_wr, queue->cm_id); 1308 1043 1309 - if (!ndev->srq) { 1044 + if (!queue->nsrq) { 1310 1045 for (i = 0; i < queue->recv_queue_size; i++) { 1311 1046 queue->cmds[i].queue = queue; 1312 1047 ret = nvmet_rdma_post_recv(ndev, &queue->cmds[i]); ··· 1341 1076 nvmet_sq_destroy(&queue->nvme_sq); 1342 1077 1343 1078 nvmet_rdma_destroy_queue_ib(queue); 1344 - if (!queue->dev->srq) { 1079 + if (!queue->nsrq) { 1345 1080 nvmet_rdma_free_cmds(queue->dev, queue->cmds, 1346 1081 queue->recv_queue_size, 1347 1082 !queue->host_qid); ··· 1411 1146 struct rdma_cm_id *cm_id, 1412 1147 struct rdma_cm_event *event) 1413 1148 { 1149 + struct nvmet_rdma_port *port = cm_id->context; 1414 1150 struct nvmet_rdma_queue *queue; 1415 1151 int ret; 1416 1152 ··· 1438 1172 INIT_WORK(&queue->release_work, nvmet_rdma_release_queue_work); 1439 1173 queue->dev = ndev; 1440 1174 queue->cm_id = cm_id; 1175 + queue->port = port->nport; 1441 1176 1442 1177 spin_lock_init(&queue->state_lock); 1443 1178 queue->state = NVMET_RDMA_Q_CONNECTING; ··· 1455 1188 goto out_destroy_sq; 1456 1189 } 1457 1190 1191 + /* 1192 + * Spread the io queues across completion vectors, 1193 + * but still keep all admin queues on vector 0. 1194 + */ 1195 + queue->comp_vector = !queue->host_qid ? 0 : 1196 + queue->idx % ndev->device->num_comp_vectors; 1197 + 1198 + 1458 1199 ret = nvmet_rdma_alloc_rsps(queue); 1459 1200 if (ret) { 1460 1201 ret = NVME_RDMA_CM_NO_RSC; 1461 1202 goto out_ida_remove; 1462 1203 } 1463 1204 1464 - if (!ndev->srq) { 1205 + if (ndev->srqs) { 1206 + queue->nsrq = ndev->srqs[queue->comp_vector % ndev->srq_count]; 1207 + } else { 1465 1208 queue->cmds = nvmet_rdma_alloc_cmds(ndev, 1466 1209 queue->recv_queue_size, 1467 1210 !queue->host_qid); ··· 1492 1215 return queue; 1493 1216 1494 1217 out_free_cmds: 1495 - if (!ndev->srq) { 1218 + if (!queue->nsrq) { 1496 1219 nvmet_rdma_free_cmds(queue->dev, queue->cmds, 1497 1220 queue->recv_queue_size, 1498 1221 !queue->host_qid); ··· 1517 1240 switch (event->event) { 1518 1241 case IB_EVENT_COMM_EST: 1519 1242 rdma_notify(queue->cm_id, event->event); 1243 + break; 1244 + case IB_EVENT_QP_LAST_WQE_REACHED: 1245 + pr_debug("received last WQE reached event for queue=0x%p\n", 1246 + queue); 1520 1247 break; 1521 1248 default: 1522 1249 pr_err("received IB QP event: %s (%d)\n", ··· 1556 1275 static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, 1557 1276 struct rdma_cm_event *event) 1558 1277 { 1559 - struct nvmet_rdma_port *port = cm_id->context; 1560 1278 struct nvmet_rdma_device *ndev; 1561 1279 struct nvmet_rdma_queue *queue; 1562 1280 int ret = -EINVAL; ··· 1571 1291 ret = -ENOMEM; 1572 1292 goto put_device; 1573 1293 } 1574 - queue->port = port->nport; 1575 1294 1576 1295 if (queue->host_qid == 0) { 1577 1296 /* Let inflight controller teardown complete */ ··· 1842 1563 goto out_destroy_id; 1843 1564 } 1844 1565 1566 + if (port->nport->pi_enable && 1567 + !(cm_id->device->attrs.device_cap_flags & 1568 + IB_DEVICE_INTEGRITY_HANDOVER)) { 1569 + pr_err("T10-PI is not supported for %pISpcs\n", addr); 1570 + ret = -EINVAL; 1571 + goto out_destroy_id; 1572 + } 1573 + 1845 1574 port->cm_id = cm_id; 1846 1575 return 0; 1847 1576 ··· 1959 1672 1960 1673 static u8 nvmet_rdma_get_mdts(const struct nvmet_ctrl *ctrl) 1961 1674 { 1675 + if (ctrl->pi_support) 1676 + return NVMET_RDMA_MAX_METADATA_MDTS; 1962 1677 return NVMET_RDMA_MAX_MDTS; 1963 1678 } 1964 1679 ··· 1969 1680 .type = NVMF_TRTYPE_RDMA, 1970 1681 .msdbd = 1, 1971 1682 .has_keyed_sgls = 1, 1683 + .metadata_support = 1, 1972 1684 .add_port = nvmet_rdma_add_port, 1973 1685 .remove_port = nvmet_rdma_remove_port, 1974 1686 .queue_response = nvmet_rdma_queue_response,

+30 -23

drivers/nvme/target/tcp.c

··· 325 325 kernel_sock_shutdown(queue->sock, SHUT_RDWR); 326 326 } 327 327 328 + static void nvmet_tcp_socket_error(struct nvmet_tcp_queue *queue, int status) 329 + { 330 + if (status == -EPIPE || status == -ECONNRESET) 331 + kernel_sock_shutdown(queue->sock, SHUT_RDWR); 332 + else 333 + nvmet_tcp_fatal_error(queue); 334 + } 335 + 328 336 static int nvmet_tcp_map_data(struct nvmet_tcp_cmd *cmd) 329 337 { 330 338 struct nvme_sgl_desc *sgl = &cmd->req.cmd->common.dptr.sgl; ··· 518 510 519 511 ret = kernel_sendpage(cmd->queue->sock, virt_to_page(cmd->data_pdu), 520 512 offset_in_page(cmd->data_pdu) + cmd->offset, 521 - left, MSG_DONTWAIT | MSG_MORE); 513 + left, MSG_DONTWAIT | MSG_MORE | MSG_SENDPAGE_NOTLAST); 522 514 if (ret <= 0) 523 515 return ret; 524 516 ··· 546 538 if ((!last_in_batch && cmd->queue->send_list_len) || 547 539 cmd->wbytes_done + left < cmd->req.transfer_len || 548 540 queue->data_digest || !queue->nvme_sq.sqhd_disabled) 549 - flags |= MSG_MORE; 541 + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; 550 542 551 543 ret = kernel_sendpage(cmd->queue->sock, page, cmd->offset, 552 544 left, flags); ··· 593 585 int ret; 594 586 595 587 if (!last_in_batch && cmd->queue->send_list_len) 596 - flags |= MSG_MORE; 588 + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; 597 589 else 598 590 flags |= MSG_EOR; 599 591 ··· 622 614 int ret; 623 615 624 616 if (!last_in_batch && cmd->queue->send_list_len) 625 - flags |= MSG_MORE; 617 + flags |= MSG_MORE | MSG_SENDPAGE_NOTLAST; 626 618 else 627 619 flags |= MSG_EOR; 628 620 ··· 652 644 653 645 if (!last_in_batch && cmd->queue->send_list_len) 654 646 msg.msg_flags |= MSG_MORE; 647 + else 648 + msg.msg_flags |= MSG_EOR; 655 649 656 650 ret = kernel_sendmsg(queue->sock, &msg, &iov, 1, iov.iov_len); 657 651 if (unlikely(ret <= 0)) ··· 726 716 727 717 for (i = 0; i < budget; i++) { 728 718 ret = nvmet_tcp_try_send_one(queue, i == budget - 1); 729 - if (ret <= 0) 719 + if (unlikely(ret < 0)) { 720 + nvmet_tcp_socket_error(queue, ret); 721 + goto done; 722 + } else if (ret == 0) { 730 723 break; 724 + } 731 725 (*sends)++; 732 726 } 733 - 727 + done: 734 728 return ret; 735 729 } 736 730 ··· 1171 1157 1172 1158 for (i = 0; i < budget; i++) { 1173 1159 ret = nvmet_tcp_try_recv_one(queue); 1174 - if (ret <= 0) 1160 + if (unlikely(ret < 0)) { 1161 + nvmet_tcp_socket_error(queue, ret); 1162 + goto done; 1163 + } else if (ret == 0) { 1175 1164 break; 1165 + } 1176 1166 (*recvs)++; 1177 1167 } 1178 - 1168 + done: 1179 1169 return ret; 1180 1170 } 1181 1171 ··· 1204 1186 pending = false; 1205 1187 1206 1188 ret = nvmet_tcp_try_recv(queue, NVMET_TCP_RECV_BUDGET, &ops); 1207 - if (ret > 0) { 1189 + if (ret > 0) 1208 1190 pending = true; 1209 - } else if (ret < 0) { 1210 - if (ret == -EPIPE || ret == -ECONNRESET) 1211 - kernel_sock_shutdown(queue->sock, SHUT_RDWR); 1212 - else 1213 - nvmet_tcp_fatal_error(queue); 1191 + else if (ret < 0) 1214 1192 return; 1215 - } 1216 1193 1217 1194 ret = nvmet_tcp_try_send(queue, NVMET_TCP_SEND_BUDGET, &ops); 1218 - if (ret > 0) { 1219 - /* transmitted message/data */ 1195 + if (ret > 0) 1220 1196 pending = true; 1221 - } else if (ret < 0) { 1222 - if (ret == -EPIPE || ret == -ECONNRESET) 1223 - kernel_sock_shutdown(queue->sock, SHUT_RDWR); 1224 - else 1225 - nvmet_tcp_fatal_error(queue); 1197 + else if (ret < 0) 1226 1198 return; 1227 - } 1228 1199 1229 1200 } while (pending && ops < NVMET_TCP_IO_WORK_BUDGET); 1230 1201

+28

drivers/nvme/target/trace.h

··· 130 130 131 131 ); 132 132 133 + #define aer_name(aer) { aer, #aer } 134 + 135 + TRACE_EVENT(nvmet_async_event, 136 + TP_PROTO(struct nvmet_ctrl *ctrl, __le32 result), 137 + TP_ARGS(ctrl, result), 138 + TP_STRUCT__entry( 139 + __field(int, ctrl_id) 140 + __field(u32, result) 141 + ), 142 + TP_fast_assign( 143 + __entry->ctrl_id = ctrl->cntlid; 144 + __entry->result = (le32_to_cpu(result) & 0xff00) >> 8; 145 + ), 146 + TP_printk("nvmet%d: NVME_AEN=%#08x [%s]", 147 + __entry->ctrl_id, __entry->result, 148 + __print_symbolic(__entry->result, 149 + aer_name(NVME_AER_NOTICE_NS_CHANGED), 150 + aer_name(NVME_AER_NOTICE_ANA), 151 + aer_name(NVME_AER_NOTICE_FW_ACT_STARTING), 152 + aer_name(NVME_AER_NOTICE_DISC_CHANGED), 153 + aer_name(NVME_AER_ERROR), 154 + aer_name(NVME_AER_SMART), 155 + aer_name(NVME_AER_CSS), 156 + aer_name(NVME_AER_VS)) 157 + ) 158 + ); 159 + #undef aer_name 160 + 133 161 #endif /* _TRACE_NVMET_H */ 134 162 135 163 #undef TRACE_INCLUDE_PATH

+57 -19

drivers/s390/block/dasd_ioctl.c

··· 22 22 #include <asm/schid.h> 23 23 #include <asm/cmb.h> 24 24 #include <linux/uaccess.h> 25 + #include <linux/dasd_mod.h> 25 26 26 27 /* This is ugly... */ 27 28 #define PRINTK_HEADER "dasd_ioctl:" ··· 458 457 /* 459 458 * Return dasd information. Used for BIODASDINFO and BIODASDINFO2. 460 459 */ 461 - static int dasd_ioctl_information(struct dasd_block *block, 462 - unsigned int cmd, void __user *argp) 460 + static int __dasd_ioctl_information(struct dasd_block *block, 461 + struct dasd_information2_t *dasd_info) 463 462 { 464 - struct dasd_information2_t *dasd_info; 465 463 struct subchannel_id sch_id; 466 464 struct ccw_dev_id dev_id; 467 465 struct dasd_device *base; ··· 473 473 if (!base->discipline || !base->discipline->fill_info) 474 474 return -EINVAL; 475 475 476 - dasd_info = kzalloc(sizeof(struct dasd_information2_t), GFP_KERNEL); 477 - if (dasd_info == NULL) 478 - return -ENOMEM; 479 - 480 476 rc = base->discipline->fill_info(base, dasd_info); 481 - if (rc) { 482 - kfree(dasd_info); 477 + if (rc) 483 478 return rc; 484 - } 485 479 486 480 cdev = base->cdev; 487 481 ccw_device_get_id(cdev, &dev_id); ··· 514 520 list_for_each(l, &base->ccw_queue) 515 521 dasd_info->chanq_len++; 516 522 spin_unlock_irqrestore(&block->queue_lock, flags); 523 + return 0; 524 + } 517 525 518 - rc = 0; 519 - if (copy_to_user(argp, dasd_info, 520 - ((cmd == (unsigned int) BIODASDINFO2) ? 521 - sizeof(struct dasd_information2_t) : 522 - sizeof(struct dasd_information_t)))) 523 - rc = -EFAULT; 526 + static int dasd_ioctl_information(struct dasd_block *block, void __user *argp, 527 + size_t copy_size) 528 + { 529 + struct dasd_information2_t *dasd_info; 530 + int error; 531 + 532 + dasd_info = kzalloc(sizeof(*dasd_info), GFP_KERNEL); 533 + if (!dasd_info) 534 + return -ENOMEM; 535 + 536 + error = __dasd_ioctl_information(block, dasd_info); 537 + if (!error && copy_to_user(argp, dasd_info, copy_size)) 538 + error = -EFAULT; 524 539 kfree(dasd_info); 525 - return rc; 540 + return error; 526 541 } 527 542 528 543 /* ··· 625 622 rc = dasd_ioctl_check_format(bdev, argp); 626 623 break; 627 624 case BIODASDINFO: 628 - rc = dasd_ioctl_information(block, cmd, argp); 625 + rc = dasd_ioctl_information(block, argp, 626 + sizeof(struct dasd_information_t)); 629 627 break; 630 628 case BIODASDINFO2: 631 - rc = dasd_ioctl_information(block, cmd, argp); 629 + rc = dasd_ioctl_information(block, argp, 630 + sizeof(struct dasd_information2_t)); 632 631 break; 633 632 case BIODASDPRRD: 634 633 rc = dasd_ioctl_read_profile(block, argp); ··· 665 660 dasd_put_device(base); 666 661 return rc; 667 662 } 663 + 664 + 665 + /** 666 + * dasd_biodasdinfo() - fill out the dasd information structure 667 + * @disk [in]: pointer to gendisk structure that references a DASD 668 + * @info [out]: pointer to the dasd_information2_t structure 669 + * 670 + * Provide access to DASD specific information. 671 + * The gendisk structure is checked if it belongs to the DASD driver by 672 + * comparing the gendisk->fops pointer. 673 + * If it does not belong to the DASD driver -EINVAL is returned. 674 + * Otherwise the provided dasd_information2_t structure is filled out. 675 + * 676 + * Returns: 677 + * %0 on success and a negative error value on failure. 678 + */ 679 + int dasd_biodasdinfo(struct gendisk *disk, struct dasd_information2_t *info) 680 + { 681 + struct dasd_device *base; 682 + int error; 683 + 684 + if (disk->fops != &dasd_device_operations) 685 + return -EINVAL; 686 + 687 + base = dasd_device_from_gendisk(disk); 688 + if (!base) 689 + return -ENODEV; 690 + error = __dasd_ioctl_information(base->block, info); 691 + dasd_put_device(base); 692 + return error; 693 + } 694 + /* export that symbol_get in partition detection is possible */ 695 + EXPORT_SYMBOL_GPL(dasd_biodasdinfo);

+1 -1

drivers/scsi/lpfc/lpfc.h

··· 143 143 144 144 struct lpfc_nvmet_ctxbuf { 145 145 struct list_head list; 146 - struct lpfc_nvmet_rcv_ctx *context; 146 + struct lpfc_async_xchg_ctx *context; 147 147 struct lpfc_iocbq *iocbq; 148 148 struct lpfc_sglq *sglq; 149 149 struct work_struct defer_work;

-3

drivers/scsi/lpfc/lpfc_attr.c

··· 37 37 #include <scsi/scsi_transport_fc.h> 38 38 #include <scsi/fc/fc_fs.h> 39 39 40 - #include <linux/nvme-fc-driver.h> 41 - 42 40 #include "lpfc_hw4.h" 43 41 #include "lpfc_hw.h" 44 42 #include "lpfc_sli.h" ··· 46 48 #include "lpfc.h" 47 49 #include "lpfc_scsi.h" 48 50 #include "lpfc_nvme.h" 49 - #include "lpfc_nvmet.h" 50 51 #include "lpfc_logmsg.h" 51 52 #include "lpfc_version.h" 52 53 #include "lpfc_compat.h"

+6 -3

drivers/scsi/lpfc/lpfc_crtn.h

··· 24 24 25 25 struct fc_rport; 26 26 struct fc_frame_header; 27 - struct lpfc_nvmet_rcv_ctx; 28 27 void lpfc_down_link(struct lpfc_hba *, LPFC_MBOXQ_t *); 29 28 void lpfc_sli_read_link_ste(struct lpfc_hba *); 30 29 void lpfc_dump_mem(struct lpfc_hba *, LPFC_MBOXQ_t *, uint16_t, uint16_t); ··· 563 564 int lpfc_nvmet_create_targetport(struct lpfc_hba *phba); 564 565 int lpfc_nvmet_update_targetport(struct lpfc_hba *phba); 565 566 void lpfc_nvmet_destroy_targetport(struct lpfc_hba *phba); 566 - void lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, 567 - struct lpfc_sli_ring *pring, struct lpfc_iocbq *piocb); 567 + int lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, 568 + struct lpfc_async_xchg_ctx *axchg); 569 + int lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, 570 + struct lpfc_async_xchg_ctx *axchg); 568 571 void lpfc_nvmet_unsol_fcp_event(struct lpfc_hba *phba, uint32_t idx, 569 572 struct rqb_dmabuf *nvmebuf, uint64_t isr_ts, 570 573 uint8_t cqflag); 571 574 void lpfc_nvme_mod_param_dep(struct lpfc_hba *phba); 575 + void lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, 576 + struct lpfc_nodelist *ndlp); 572 577 void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, 573 578 struct lpfc_iocbq *cmdiocb, 574 579 struct lpfc_wcqe_complete *abts_cmpl);

-1

drivers/scsi/lpfc/lpfc_ct.c

··· 44 44 #include "lpfc_disc.h" 45 45 #include "lpfc.h" 46 46 #include "lpfc_scsi.h" 47 - #include "lpfc_nvme.h" 48 47 #include "lpfc_logmsg.h" 49 48 #include "lpfc_crtn.h" 50 49 #include "lpfc_version.h"

+1 -4

drivers/scsi/lpfc/lpfc_debugfs.c

··· 39 39 #include <scsi/scsi_transport_fc.h> 40 40 #include <scsi/fc/fc_fs.h> 41 41 42 - #include <linux/nvme-fc-driver.h> 43 - 44 42 #include "lpfc_hw4.h" 45 43 #include "lpfc_hw.h" 46 44 #include "lpfc_sli.h" ··· 48 50 #include "lpfc.h" 49 51 #include "lpfc_scsi.h" 50 52 #include "lpfc_nvme.h" 51 - #include "lpfc_nvmet.h" 52 53 #include "lpfc_logmsg.h" 53 54 #include "lpfc_crtn.h" 54 55 #include "lpfc_vport.h" ··· 1032 1035 { 1033 1036 struct lpfc_hba *phba = vport->phba; 1034 1037 struct lpfc_nvmet_tgtport *tgtp; 1035 - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; 1038 + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; 1036 1039 struct nvme_fc_local_port *localport; 1037 1040 struct lpfc_fc4_ctrl_stat *cstat; 1038 1041 struct lpfc_nvme_lport *lport;

+6 -2

drivers/scsi/lpfc/lpfc_hbadisc.c

··· 36 36 #include <scsi/scsi_transport_fc.h> 37 37 #include <scsi/fc/fc_fs.h> 38 38 39 - #include <linux/nvme-fc-driver.h> 40 - 41 39 #include "lpfc_hw4.h" 42 40 #include "lpfc_hw.h" 43 41 #include "lpfc_nl.h" ··· 823 825 if ((phba->sli_rev < LPFC_SLI_REV4) && 824 826 (!remove && ndlp->nlp_type & NLP_FABRIC)) 825 827 continue; 828 + 829 + /* Notify transport of connectivity loss to trigger cleanup. */ 830 + if (phba->nvmet_support && 831 + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) 832 + lpfc_nvmet_invalidate_host(phba, ndlp); 833 + 826 834 lpfc_disc_state_machine(vport, ndlp, NULL, 827 835 remove 828 836 ? NLP_EVT_DEVICE_RM

+2 -5

drivers/scsi/lpfc/lpfc_init.c

··· 50 50 #include <scsi/scsi_tcq.h> 51 51 #include <scsi/fc/fc_fs.h> 52 52 53 - #include <linux/nvme-fc-driver.h> 54 - 55 53 #include "lpfc_hw4.h" 56 54 #include "lpfc_hw.h" 57 55 #include "lpfc_sli.h" ··· 59 61 #include "lpfc.h" 60 62 #include "lpfc_scsi.h" 61 63 #include "lpfc_nvme.h" 62 - #include "lpfc_nvmet.h" 63 64 #include "lpfc_logmsg.h" 64 65 #include "lpfc_crtn.h" 65 66 #include "lpfc_vport.h" ··· 1029 1032 lpfc_hba_down_post_s4(struct lpfc_hba *phba) 1030 1033 { 1031 1034 struct lpfc_io_buf *psb, *psb_next; 1032 - struct lpfc_nvmet_rcv_ctx *ctxp, *ctxp_next; 1035 + struct lpfc_async_xchg_ctx *ctxp, *ctxp_next; 1033 1036 struct lpfc_sli4_hdw_queue *qp; 1034 1037 LIST_HEAD(aborts); 1035 1038 LIST_HEAD(nvme_aborts); ··· 1096 1099 &nvmet_aborts); 1097 1100 spin_unlock_irq(&phba->sli4_hba.abts_nvmet_buf_list_lock); 1098 1101 list_for_each_entry_safe(ctxp, ctxp_next, &nvmet_aborts, list) { 1099 - ctxp->flag &= ~(LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP); 1102 + ctxp->flag &= ~(LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP); 1100 1103 lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); 1101 1104 } 1102 1105 }

-4

drivers/scsi/lpfc/lpfc_mem.c

··· 31 31 #include <scsi/scsi_transport_fc.h> 32 32 #include <scsi/fc/fc_fs.h> 33 33 34 - #include <linux/nvme-fc-driver.h> 35 - 36 34 #include "lpfc_hw4.h" 37 35 #include "lpfc_hw.h" 38 36 #include "lpfc_sli.h" ··· 39 41 #include "lpfc_disc.h" 40 42 #include "lpfc.h" 41 43 #include "lpfc_scsi.h" 42 - #include "lpfc_nvme.h" 43 - #include "lpfc_nvmet.h" 44 44 #include "lpfc_crtn.h" 45 45 #include "lpfc_logmsg.h" 46 46

+11 -2

drivers/scsi/lpfc/lpfc_nportdisc.c

··· 32 32 #include <scsi/scsi_transport_fc.h> 33 33 #include <scsi/fc/fc_fs.h> 34 34 35 - #include <linux/nvme-fc-driver.h> 36 - 37 35 #include "lpfc_hw4.h" 38 36 #include "lpfc_hw.h" 39 37 #include "lpfc_sli.h" ··· 489 491 (unsigned long long) 490 492 wwn_to_u64(sp->portName.u.wwn)); 491 493 494 + /* Notify transport of connectivity loss to trigger cleanup. */ 495 + if (phba->nvmet_support && 496 + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) 497 + lpfc_nvmet_invalidate_host(phba, ndlp); 498 + 492 499 ndlp->nlp_prev_state = ndlp->nlp_state; 493 500 /* rport needs to be unregistered first */ 494 501 lpfc_nlp_set_state(vport, ndlp, NLP_STE_NPR_NODE); ··· 844 841 lpfc_els_rsp_acc(vport, ELS_CMD_PRLO, cmdiocb, ndlp, NULL); 845 842 else 846 843 lpfc_els_rsp_acc(vport, ELS_CMD_ACC, cmdiocb, ndlp, NULL); 844 + 845 + /* Notify transport of connectivity loss to trigger cleanup. */ 846 + if (phba->nvmet_support && 847 + ndlp->nlp_state == NLP_STE_UNMAPPED_NODE) 848 + lpfc_nvmet_invalidate_host(phba, ndlp); 849 + 847 850 if (ndlp->nlp_DID == Fabric_DID) { 848 851 if (vport->port_state <= LPFC_FDISC) 849 852 goto out;

+348 -183

drivers/scsi/lpfc/lpfc_nvme.c

··· 36 36 #include <scsi/scsi_transport_fc.h> 37 37 #include <scsi/fc/fc_fs.h> 38 38 39 - #include <linux/nvme.h> 40 - #include <linux/nvme-fc-driver.h> 41 - #include <linux/nvme-fc.h> 42 39 #include "lpfc_version.h" 43 40 #include "lpfc_hw4.h" 44 41 #include "lpfc_hw.h" ··· 393 396 return; 394 397 } 395 398 399 + /** 400 + * lpfc_nvme_handle_lsreq - Process an unsolicited NVME LS request 401 + * @phba: pointer to lpfc hba data structure. 402 + * @axchg: pointer to exchange context for the NVME LS request 403 + * 404 + * This routine is used for processing an asychronously received NVME LS 405 + * request. Any remaining validation is done and the LS is then forwarded 406 + * to the nvme-fc transport via nvme_fc_rcv_ls_req(). 407 + * 408 + * The calling sequence should be: nvme_fc_rcv_ls_req() -> (processing) 409 + * -> lpfc_nvme_xmt_ls_rsp/cmp -> req->done. 410 + * __lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. 411 + * 412 + * Returns 0 if LS was handled and delivered to the transport 413 + * Returns 1 if LS failed to be handled and should be dropped 414 + */ 415 + int 416 + lpfc_nvme_handle_lsreq(struct lpfc_hba *phba, 417 + struct lpfc_async_xchg_ctx *axchg) 418 + { 419 + #if (IS_ENABLED(CONFIG_NVME_FC)) 420 + struct lpfc_vport *vport; 421 + struct lpfc_nvme_rport *lpfc_rport; 422 + struct nvme_fc_remote_port *remoteport; 423 + struct lpfc_nvme_lport *lport; 424 + uint32_t *payload = axchg->payload; 425 + int rc; 426 + 427 + vport = axchg->ndlp->vport; 428 + lpfc_rport = axchg->ndlp->nrport; 429 + if (!lpfc_rport) 430 + return -EINVAL; 431 + 432 + remoteport = lpfc_rport->remoteport; 433 + if (!vport->localport) 434 + return -EINVAL; 435 + 436 + lport = vport->localport->private; 437 + if (!lport) 438 + return -EINVAL; 439 + 440 + rc = nvme_fc_rcv_ls_req(remoteport, &axchg->ls_rsp, axchg->payload, 441 + axchg->size); 442 + 443 + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 444 + "6205 NVME Unsol rcv: sz %d rc %d: %08x %08x %08x " 445 + "%08x %08x %08x\n", 446 + axchg->size, rc, 447 + *payload, *(payload+1), *(payload+2), 448 + *(payload+3), *(payload+4), *(payload+5)); 449 + 450 + if (!rc) 451 + return 0; 452 + #endif 453 + return 1; 454 + } 455 + 456 + /** 457 + * __lpfc_nvme_ls_req_cmp - Generic completion handler for a NVME 458 + * LS request. 459 + * @phba: Pointer to HBA context object 460 + * @vport: The local port that issued the LS 461 + * @cmdwqe: Pointer to driver command WQE object. 462 + * @wcqe: Pointer to driver response CQE object. 463 + * 464 + * This function is the generic completion handler for NVME LS requests. 465 + * The function updates any states and statistics, calls the transport 466 + * ls_req done() routine, then tears down the command and buffers used 467 + * for the LS request. 468 + **/ 469 + void 470 + __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, 471 + struct lpfc_iocbq *cmdwqe, 472 + struct lpfc_wcqe_complete *wcqe) 473 + { 474 + struct nvmefc_ls_req *pnvme_lsreq; 475 + struct lpfc_dmabuf *buf_ptr; 476 + struct lpfc_nodelist *ndlp; 477 + uint32_t status; 478 + 479 + pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2; 480 + ndlp = (struct lpfc_nodelist *)cmdwqe->context1; 481 + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; 482 + 483 + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, 484 + "6047 NVMEx LS REQ %px cmpl DID %x Xri: %x " 485 + "status %x reason x%x cmd:x%px lsreg:x%px bmp:x%px " 486 + "ndlp:x%px\n", 487 + pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, 488 + cmdwqe->sli4_xritag, status, 489 + (wcqe->parameter & 0xffff), 490 + cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp); 491 + 492 + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x parm x%x\n", 493 + cmdwqe->sli4_xritag, status, wcqe->parameter); 494 + 495 + if (cmdwqe->context3) { 496 + buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3; 497 + lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); 498 + kfree(buf_ptr); 499 + cmdwqe->context3 = NULL; 500 + } 501 + if (pnvme_lsreq->done) 502 + pnvme_lsreq->done(pnvme_lsreq, status); 503 + else 504 + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, 505 + "6046 NVMEx cmpl without done call back? " 506 + "Data %px DID %x Xri: %x status %x\n", 507 + pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, 508 + cmdwqe->sli4_xritag, status); 509 + if (ndlp) { 510 + lpfc_nlp_put(ndlp); 511 + cmdwqe->context1 = NULL; 512 + } 513 + lpfc_sli_release_iocbq(phba, cmdwqe); 514 + } 515 + 396 516 static void 397 - lpfc_nvme_cmpl_gen_req(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 517 + lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 398 518 struct lpfc_wcqe_complete *wcqe) 399 519 { 400 520 struct lpfc_vport *vport = cmdwqe->vport; 401 521 struct lpfc_nvme_lport *lport; 402 522 uint32_t status; 403 - struct nvmefc_ls_req *pnvme_lsreq; 404 - struct lpfc_dmabuf *buf_ptr; 405 - struct lpfc_nodelist *ndlp; 406 523 407 - pnvme_lsreq = (struct nvmefc_ls_req *)cmdwqe->context2; 408 524 status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; 409 525 410 526 if (vport->localport) { ··· 532 422 } 533 423 } 534 424 535 - ndlp = (struct lpfc_nodelist *)cmdwqe->context1; 536 - lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, 537 - "6047 nvme cmpl Enter " 538 - "Data %px DID %x Xri: %x status %x reason x%x " 539 - "cmd:x%px lsreg:x%px bmp:x%px ndlp:x%px\n", 540 - pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, 541 - cmdwqe->sli4_xritag, status, 542 - (wcqe->parameter & 0xffff), 543 - cmdwqe, pnvme_lsreq, cmdwqe->context3, ndlp); 544 - 545 - lpfc_nvmeio_data(phba, "NVME LS CMPL: xri x%x stat x%x parm x%x\n", 546 - cmdwqe->sli4_xritag, status, wcqe->parameter); 547 - 548 - if (cmdwqe->context3) { 549 - buf_ptr = (struct lpfc_dmabuf *)cmdwqe->context3; 550 - lpfc_mbuf_free(phba, buf_ptr->virt, buf_ptr->phys); 551 - kfree(buf_ptr); 552 - cmdwqe->context3 = NULL; 553 - } 554 - if (pnvme_lsreq->done) 555 - pnvme_lsreq->done(pnvme_lsreq, status); 556 - else 557 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, 558 - "6046 nvme cmpl without done call back? " 559 - "Data %px DID %x Xri: %x status %x\n", 560 - pnvme_lsreq, ndlp ? ndlp->nlp_DID : 0, 561 - cmdwqe->sli4_xritag, status); 562 - if (ndlp) { 563 - lpfc_nlp_put(ndlp); 564 - cmdwqe->context1 = NULL; 565 - } 566 - lpfc_sli_release_iocbq(phba, cmdwqe); 425 + __lpfc_nvme_ls_req_cmp(phba, vport, cmdwqe, wcqe); 567 426 } 568 427 569 428 static int ··· 636 557 637 558 638 559 /* Issue GEN REQ WQE for NPORT <did> */ 639 - lpfc_printf_vlog(vport, KERN_INFO, LOG_ELS, 640 - "6050 Issue GEN REQ WQE to NPORT x%x " 641 - "Data: x%x x%x wq:x%px lsreq:x%px bmp:x%px " 642 - "xmit:%d 1st:%d\n", 643 - ndlp->nlp_DID, genwqe->iotag, 644 - vport->port_state, 645 - genwqe, pnvme_lsreq, bmp, xmit_len, first_len); 646 560 genwqe->wqe_cmpl = cmpl; 647 561 genwqe->iocb_cmpl = NULL; 648 562 genwqe->drvrTimeout = tmo + LPFC_DRVR_TIMEOUT; ··· 647 575 648 576 rc = lpfc_sli4_issue_wqe(phba, &phba->sli4_hba.hdwq[0], genwqe); 649 577 if (rc) { 650 - lpfc_printf_vlog(vport, KERN_ERR, LOG_ELS, 578 + lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC | LOG_ELS, 651 579 "6045 Issue GEN REQ WQE to NPORT x%x " 652 - "Data: x%x x%x\n", 580 + "Data: x%x x%x rc x%x\n", 653 581 ndlp->nlp_DID, genwqe->iotag, 654 - vport->port_state); 582 + vport->port_state, rc); 655 583 lpfc_sli_release_iocbq(phba, genwqe); 656 584 return 1; 657 585 } 586 + 587 + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_ELS, 588 + "6050 Issue GEN REQ WQE to NPORT x%x " 589 + "Data: oxid: x%x state: x%x wq:x%px lsreq:x%px " 590 + "bmp:x%px xmit:%d 1st:%d\n", 591 + ndlp->nlp_DID, genwqe->sli4_xritag, 592 + vport->port_state, 593 + genwqe, pnvme_lsreq, bmp, xmit_len, first_len); 658 594 return 0; 659 595 } 660 596 597 + 661 598 /** 662 - * lpfc_nvme_ls_req - Issue an Link Service request 663 - * @lpfc_pnvme: Pointer to the driver's nvme instance data 664 - * @lpfc_nvme_lport: Pointer to the driver's local port data 665 - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq 599 + * __lpfc_nvme_ls_req - Generic service routine to issue an NVME LS request 600 + * @vport: The local port issuing the LS 601 + * @ndlp: The remote port to send the LS to 602 + * @pnvme_lsreq: Pointer to LS request structure from the transport 666 603 * 667 - * Driver registers this routine to handle any link service request 668 - * from the nvme_fc transport to a remote nvme-aware port. 604 + * Routine validates the ndlp, builds buffers and sends a GEN_REQUEST 605 + * WQE to perform the LS operation. 669 606 * 670 607 * Return value : 671 608 * 0 - Success 672 - * TODO: What are the failure codes. 609 + * non-zero: various error codes, in form of -Exxx 673 610 **/ 674 - static int 675 - lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, 676 - struct nvme_fc_remote_port *pnvme_rport, 677 - struct nvmefc_ls_req *pnvme_lsreq) 611 + int 612 + __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, 613 + struct nvmefc_ls_req *pnvme_lsreq, 614 + void (*gen_req_cmp)(struct lpfc_hba *phba, 615 + struct lpfc_iocbq *cmdwqe, 616 + struct lpfc_wcqe_complete *wcqe)) 678 617 { 679 - int ret = 0; 680 - struct lpfc_nvme_lport *lport; 681 - struct lpfc_nvme_rport *rport; 682 - struct lpfc_vport *vport; 683 - struct lpfc_nodelist *ndlp; 684 - struct ulp_bde64 *bpl; 685 618 struct lpfc_dmabuf *bmp; 619 + struct ulp_bde64 *bpl; 620 + int ret; 686 621 uint16_t ntype, nstate; 687 622 688 - /* there are two dma buf in the request, actually there is one and 689 - * the second one is just the start address + cmd size. 690 - * Before calling lpfc_nvme_gen_req these buffers need to be wrapped 691 - * in a lpfc_dmabuf struct. When freeing we just free the wrapper 692 - * because the nvem layer owns the data bufs. 693 - * We do not have to break these packets open, we don't care what is in 694 - * them. And we do not have to look at the resonse data, we only care 695 - * that we got a response. All of the caring is going to happen in the 696 - * nvme-fc layer. 697 - */ 698 - 699 - lport = (struct lpfc_nvme_lport *)pnvme_lport->private; 700 - rport = (struct lpfc_nvme_rport *)pnvme_rport->private; 701 - if (unlikely(!lport) || unlikely(!rport)) 702 - return -EINVAL; 703 - 704 - vport = lport->vport; 705 - 706 - if (vport->load_flag & FC_UNLOADING) 707 - return -ENODEV; 708 - 709 - /* Need the ndlp. It is stored in the driver's rport. */ 710 - ndlp = rport->ndlp; 711 623 if (!ndlp || !NLP_CHK_NODE_ACT(ndlp)) { 712 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, 713 - "6051 Remoteport x%px, rport has invalid ndlp. " 714 - "Failing LS Req\n", pnvme_rport); 624 + lpfc_printf_vlog(vport, KERN_ERR, 625 + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, 626 + "6051 NVMEx LS REQ: Bad NDLP x%px, Failing " 627 + "LS Req\n", 628 + ndlp); 715 629 return -ENODEV; 716 630 } 717 631 718 - /* The remote node has to be a mapped nvme target or an 719 - * unmapped nvme initiator or it's an error. 720 - */ 721 632 ntype = ndlp->nlp_type; 722 633 nstate = ndlp->nlp_state; 723 634 if ((ntype & NLP_NVME_TARGET && nstate != NLP_STE_MAPPED_NODE) || 724 635 (ntype & NLP_NVME_INITIATOR && nstate != NLP_STE_UNMAPPED_NODE)) { 725 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NODE | LOG_NVME_IOERR, 726 - "6088 DID x%06x not ready for " 727 - "IO. State x%x, Type x%x\n", 728 - pnvme_rport->port_id, 729 - ndlp->nlp_state, ndlp->nlp_type); 636 + lpfc_printf_vlog(vport, KERN_ERR, 637 + LOG_NVME_DISC | LOG_NODE | LOG_NVME_IOERR, 638 + "6088 NVMEx LS REQ: Fail DID x%06x not " 639 + "ready for IO. Type x%x, State x%x\n", 640 + ndlp->nlp_DID, ntype, nstate); 730 641 return -ENODEV; 731 642 } 732 - bmp = kmalloc(sizeof(struct lpfc_dmabuf), GFP_KERNEL); 643 + 644 + /* 645 + * there are two dma buf in the request, actually there is one and 646 + * the second one is just the start address + cmd size. 647 + * Before calling lpfc_nvme_gen_req these buffers need to be wrapped 648 + * in a lpfc_dmabuf struct. When freeing we just free the wrapper 649 + * because the nvem layer owns the data bufs. 650 + * We do not have to break these packets open, we don't care what is 651 + * in them. And we do not have to look at the resonse data, we only 652 + * care that we got a response. All of the caring is going to happen 653 + * in the nvme-fc layer. 654 + */ 655 + 656 + bmp = kmalloc(sizeof(*bmp), GFP_KERNEL); 733 657 if (!bmp) { 734 658 735 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, 736 - "6044 Could not find node for DID %x\n", 737 - pnvme_rport->port_id); 738 - return 2; 659 + lpfc_printf_vlog(vport, KERN_ERR, 660 + LOG_NVME_DISC | LOG_NVME_IOERR, 661 + "6044 NVMEx LS REQ: Could not alloc LS buf " 662 + "for DID %x\n", 663 + ndlp->nlp_DID); 664 + return -ENOMEM; 739 665 } 740 - INIT_LIST_HEAD(&bmp->list); 666 + 741 667 bmp->virt = lpfc_mbuf_alloc(vport->phba, MEM_PRI, &(bmp->phys)); 742 668 if (!bmp->virt) { 743 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, 744 - "6042 Could not find node for DID %x\n", 745 - pnvme_rport->port_id); 669 + lpfc_printf_vlog(vport, KERN_ERR, 670 + LOG_NVME_DISC | LOG_NVME_IOERR, 671 + "6042 NVMEx LS REQ: Could not alloc mbuf " 672 + "for DID %x\n", 673 + ndlp->nlp_DID); 746 674 kfree(bmp); 747 - return 3; 675 + return -ENOMEM; 748 676 } 677 + 678 + INIT_LIST_HEAD(&bmp->list); 679 + 749 680 bpl = (struct ulp_bde64 *)bmp->virt; 750 681 bpl->addrHigh = le32_to_cpu(putPaddrHigh(pnvme_lsreq->rqstdma)); 751 682 bpl->addrLow = le32_to_cpu(putPaddrLow(pnvme_lsreq->rqstdma)); ··· 763 688 bpl->tus.f.bdeSize = pnvme_lsreq->rsplen; 764 689 bpl->tus.w = le32_to_cpu(bpl->tus.w); 765 690 766 - /* Expand print to include key fields. */ 767 691 lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC, 768 - "6149 Issue LS Req to DID 0x%06x lport x%px, " 769 - "rport x%px lsreq x%px rqstlen:%d rsplen:%d " 770 - "%pad %pad\n", 771 - ndlp->nlp_DID, pnvme_lport, pnvme_rport, 772 - pnvme_lsreq, pnvme_lsreq->rqstlen, 773 - pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, 774 - &pnvme_lsreq->rspdma); 692 + "6149 NVMEx LS REQ: Issue to DID 0x%06x lsreq x%px, " 693 + "rqstlen:%d rsplen:%d %pad %pad\n", 694 + ndlp->nlp_DID, pnvme_lsreq, pnvme_lsreq->rqstlen, 695 + pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, 696 + &pnvme_lsreq->rspdma); 775 697 776 - atomic_inc(&lport->fc4NvmeLsRequests); 777 - 778 - /* Hardcode the wait to 30 seconds. Connections are failing otherwise. 779 - * This code allows it all to work. 780 - */ 781 698 ret = lpfc_nvme_gen_req(vport, bmp, pnvme_lsreq->rqstaddr, 782 - pnvme_lsreq, lpfc_nvme_cmpl_gen_req, 783 - ndlp, 2, 30, 0); 699 + pnvme_lsreq, gen_req_cmp, ndlp, 2, 700 + LPFC_NVME_LS_TIMEOUT, 0); 784 701 if (ret != WQE_SUCCESS) { 785 - atomic_inc(&lport->xmt_ls_err); 786 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_DISC, 787 - "6052 EXIT. issue ls wqe failed lport x%px, " 788 - "rport x%px lsreq x%px Status %x DID %x\n", 789 - pnvme_lport, pnvme_rport, pnvme_lsreq, 790 - ret, ndlp->nlp_DID); 702 + lpfc_printf_vlog(vport, KERN_ERR, 703 + LOG_NVME_DISC | LOG_NVME_IOERR, 704 + "6052 NVMEx REQ: EXIT. issue ls wqe failed " 705 + "lsreq x%px Status %x DID %x\n", 706 + pnvme_lsreq, ret, ndlp->nlp_DID); 791 707 lpfc_mbuf_free(vport->phba, bmp->virt, bmp->phys); 792 708 kfree(bmp); 793 - return ret; 709 + return -EIO; 794 710 } 795 711 796 - /* Stub in routine and return 0 for now. */ 797 - return ret; 712 + return 0; 798 713 } 799 714 800 715 /** 801 - * lpfc_nvme_ls_abort - Issue an Link Service request 802 - * @lpfc_pnvme: Pointer to the driver's nvme instance data 803 - * @lpfc_nvme_lport: Pointer to the driver's local port data 804 - * @lpfc_nvme_rport: Pointer to the rport getting the @lpfc_nvme_ereq 716 + * lpfc_nvme_ls_req - Issue an NVME Link Service request 717 + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. 718 + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. 719 + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS 805 720 * 806 721 * Driver registers this routine to handle any link service request 807 722 * from the nvme_fc transport to a remote nvme-aware port. 808 723 * 809 724 * Return value : 810 725 * 0 - Success 811 - * TODO: What are the failure codes. 726 + * non-zero: various error codes, in form of -Exxx 727 + **/ 728 + static int 729 + lpfc_nvme_ls_req(struct nvme_fc_local_port *pnvme_lport, 730 + struct nvme_fc_remote_port *pnvme_rport, 731 + struct nvmefc_ls_req *pnvme_lsreq) 732 + { 733 + struct lpfc_nvme_lport *lport; 734 + struct lpfc_nvme_rport *rport; 735 + struct lpfc_vport *vport; 736 + int ret; 737 + 738 + lport = (struct lpfc_nvme_lport *)pnvme_lport->private; 739 + rport = (struct lpfc_nvme_rport *)pnvme_rport->private; 740 + if (unlikely(!lport) || unlikely(!rport)) 741 + return -EINVAL; 742 + 743 + vport = lport->vport; 744 + if (vport->load_flag & FC_UNLOADING) 745 + return -ENODEV; 746 + 747 + atomic_inc(&lport->fc4NvmeLsRequests); 748 + 749 + ret = __lpfc_nvme_ls_req(vport, rport->ndlp, pnvme_lsreq, 750 + lpfc_nvme_ls_req_cmp); 751 + if (ret) 752 + atomic_inc(&lport->xmt_ls_err); 753 + 754 + return ret; 755 + } 756 + 757 + /** 758 + * __lpfc_nvme_ls_abort - Generic service routine to abort a prior 759 + * NVME LS request 760 + * @vport: The local port that issued the LS 761 + * @ndlp: The remote port the LS was sent to 762 + * @pnvme_lsreq: Pointer to LS request structure from the transport 763 + * 764 + * The driver validates the ndlp, looks for the LS, and aborts the 765 + * LS if found. 766 + * 767 + * Returns: 768 + * 0 : if LS found and aborted 769 + * non-zero: various error conditions in form -Exxx 770 + **/ 771 + int 772 + __lpfc_nvme_ls_abort(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, 773 + struct nvmefc_ls_req *pnvme_lsreq) 774 + { 775 + struct lpfc_hba *phba = vport->phba; 776 + struct lpfc_sli_ring *pring; 777 + struct lpfc_iocbq *wqe, *next_wqe; 778 + bool foundit = false; 779 + 780 + if (!ndlp) { 781 + lpfc_printf_log(phba, KERN_ERR, 782 + LOG_NVME_DISC | LOG_NODE | 783 + LOG_NVME_IOERR | LOG_NVME_ABTS, 784 + "6049 NVMEx LS REQ Abort: Bad NDLP x%px DID " 785 + "x%06x, Failing LS Req\n", 786 + ndlp, ndlp ? ndlp->nlp_DID : 0); 787 + return -EINVAL; 788 + } 789 + 790 + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, 791 + "6040 NVMEx LS REQ Abort: Issue LS_ABORT for lsreq " 792 + "x%p rqstlen:%d rsplen:%d %pad %pad\n", 793 + pnvme_lsreq, pnvme_lsreq->rqstlen, 794 + pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, 795 + &pnvme_lsreq->rspdma); 796 + 797 + /* 798 + * Lock the ELS ring txcmplq and look for the wqe that matches 799 + * this ELS. If found, issue an abort on the wqe. 800 + */ 801 + pring = phba->sli4_hba.nvmels_wq->pring; 802 + spin_lock_irq(&phba->hbalock); 803 + spin_lock(&pring->ring_lock); 804 + list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) { 805 + if (wqe->context2 == pnvme_lsreq) { 806 + wqe->iocb_flag |= LPFC_DRIVER_ABORTED; 807 + foundit = true; 808 + break; 809 + } 810 + } 811 + spin_unlock(&pring->ring_lock); 812 + 813 + if (foundit) 814 + lpfc_sli_issue_abort_iotag(phba, pring, wqe); 815 + spin_unlock_irq(&phba->hbalock); 816 + 817 + if (foundit) 818 + return 0; 819 + 820 + lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_DISC | LOG_NVME_ABTS, 821 + "6213 NVMEx LS REQ Abort: Unable to locate req x%p\n", 822 + pnvme_lsreq); 823 + return -EINVAL; 824 + } 825 + 826 + static int 827 + lpfc_nvme_xmt_ls_rsp(struct nvme_fc_local_port *localport, 828 + struct nvme_fc_remote_port *remoteport, 829 + struct nvmefc_ls_rsp *ls_rsp) 830 + { 831 + struct lpfc_async_xchg_ctx *axchg = 832 + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); 833 + struct lpfc_nvme_lport *lport; 834 + int rc; 835 + 836 + if (axchg->phba->pport->load_flag & FC_UNLOADING) 837 + return -ENODEV; 838 + 839 + lport = (struct lpfc_nvme_lport *)localport->private; 840 + 841 + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, __lpfc_nvme_xmt_ls_rsp_cmp); 842 + 843 + if (rc) { 844 + /* 845 + * unless the failure is due to having already sent 846 + * the response, an abort will be generated for the 847 + * exchange if the rsp can't be sent. 848 + */ 849 + if (rc != -EALREADY) 850 + atomic_inc(&lport->xmt_ls_abort); 851 + return rc; 852 + } 853 + 854 + return 0; 855 + } 856 + 857 + /** 858 + * lpfc_nvme_ls_abort - Abort a prior NVME LS request 859 + * @lpfc_nvme_lport: Transport localport that LS is to be issued from. 860 + * @lpfc_nvme_rport: Transport remoteport that LS is to be sent to. 861 + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS 862 + * 863 + * Driver registers this routine to abort a NVME LS request that is 864 + * in progress (from the transports perspective). 812 865 **/ 813 866 static void 814 867 lpfc_nvme_ls_abort(struct nvme_fc_local_port *pnvme_lport, ··· 947 744 struct lpfc_vport *vport; 948 745 struct lpfc_hba *phba; 949 746 struct lpfc_nodelist *ndlp; 950 - LIST_HEAD(abort_list); 951 - struct lpfc_sli_ring *pring; 952 - struct lpfc_iocbq *wqe, *next_wqe; 747 + int ret; 953 748 954 749 lport = (struct lpfc_nvme_lport *)pnvme_lport->private; 955 750 if (unlikely(!lport)) ··· 959 758 return; 960 759 961 760 ndlp = lpfc_findnode_did(vport, pnvme_rport->port_id); 962 - if (!ndlp) { 963 - lpfc_printf_vlog(vport, KERN_ERR, LOG_NVME_ABTS, 964 - "6049 Could not find node for DID %x\n", 965 - pnvme_rport->port_id); 966 - return; 967 - } 968 761 969 - /* Expand print to include key fields. */ 970 - lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS, 971 - "6040 ENTER. lport x%px, rport x%px lsreq x%px rqstlen:%d " 972 - "rsplen:%d %pad %pad\n", 973 - pnvme_lport, pnvme_rport, 974 - pnvme_lsreq, pnvme_lsreq->rqstlen, 975 - pnvme_lsreq->rsplen, &pnvme_lsreq->rqstdma, 976 - &pnvme_lsreq->rspdma); 977 - 978 - /* 979 - * Lock the ELS ring txcmplq and build a local list of all ELS IOs 980 - * that need an ABTS. The IOs need to stay on the txcmplq so that 981 - * the abort operation completes them successfully. 982 - */ 983 - pring = phba->sli4_hba.nvmels_wq->pring; 984 - spin_lock_irq(&phba->hbalock); 985 - spin_lock(&pring->ring_lock); 986 - list_for_each_entry_safe(wqe, next_wqe, &pring->txcmplq, list) { 987 - /* Add to abort_list on on NDLP match. */ 988 - if (lpfc_check_sli_ndlp(phba, pring, wqe, ndlp)) { 989 - wqe->iocb_flag |= LPFC_DRIVER_ABORTED; 990 - list_add_tail(&wqe->dlist, &abort_list); 991 - } 992 - } 993 - spin_unlock(&pring->ring_lock); 994 - spin_unlock_irq(&phba->hbalock); 995 - 996 - /* Abort the targeted IOs and remove them from the abort list. */ 997 - list_for_each_entry_safe(wqe, next_wqe, &abort_list, dlist) { 762 + ret = __lpfc_nvme_ls_abort(vport, ndlp, pnvme_lsreq); 763 + if (!ret) 998 764 atomic_inc(&lport->xmt_ls_abort); 999 - spin_lock_irq(&phba->hbalock); 1000 - list_del_init(&wqe->dlist); 1001 - lpfc_sli_issue_abort_iotag(phba, pring, wqe); 1002 - spin_unlock_irq(&phba->hbalock); 1003 - } 1004 765 } 1005 766 1006 767 /* Fix up the existing sgls for NVME IO. */ ··· 2074 1911 .fcp_io = lpfc_nvme_fcp_io_submit, 2075 1912 .ls_abort = lpfc_nvme_ls_abort, 2076 1913 .fcp_abort = lpfc_nvme_fcp_abort, 1914 + .xmt_ls_rsp = lpfc_nvme_xmt_ls_rsp, 2077 1915 2078 1916 .max_hw_queues = 1, 2079 1917 .max_sgl_segments = LPFC_NVME_DEFAULT_SEGS, ··· 2270 2106 atomic_set(&lport->cmpl_fcp_err, 0); 2271 2107 atomic_set(&lport->cmpl_ls_xb, 0); 2272 2108 atomic_set(&lport->cmpl_ls_err, 0); 2109 + 2273 2110 atomic_set(&lport->fc4NvmeLsRequests, 0); 2274 2111 atomic_set(&lport->fc4NvmeLsCmpls, 0); 2275 2112 }

+180

drivers/scsi/lpfc/lpfc_nvme.h

··· 21 21 * included with this package. * 22 22 ********************************************************************/ 23 23 24 + #include <linux/nvme.h> 25 + #include <linux/nvme-fc-driver.h> 26 + #include <linux/nvme-fc.h> 27 + 24 28 #define LPFC_NVME_DEFAULT_SEGS (64 + 1) /* 256K IOs */ 25 29 26 30 #define LPFC_NVME_ERSP_LEN 0x20 ··· 78 74 struct lpfc_nvme_fcpreq_priv { 79 75 struct lpfc_io_buf *nvme_buf; 80 76 }; 77 + 78 + /* 79 + * set NVME LS request timeouts to 30s. It is larger than the 2*R_A_TOV 80 + * set by the spec, which appears to have issues with some devices. 81 + */ 82 + #define LPFC_NVME_LS_TIMEOUT 30 83 + 84 + 85 + #define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ 86 + #define LPFC_NVMET_RQE_MIN_POST 128 87 + #define LPFC_NVMET_RQE_DEF_POST 512 88 + #define LPFC_NVMET_RQE_DEF_COUNT 2048 89 + #define LPFC_NVMET_SUCCESS_LEN 12 90 + 91 + #define LPFC_NVMET_MRQ_AUTO 0 92 + #define LPFC_NVMET_MRQ_MAX 16 93 + 94 + #define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) 95 + 96 + /* Used for NVME Target */ 97 + #define LPFC_NVMET_INV_HOST_ACTIVE 1 98 + 99 + struct lpfc_nvmet_tgtport { 100 + struct lpfc_hba *phba; 101 + struct completion *tport_unreg_cmp; 102 + atomic_t state; /* tracks nvmet hosthandle invalidation */ 103 + 104 + /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ 105 + atomic_t rcv_ls_req_in; 106 + atomic_t rcv_ls_req_out; 107 + atomic_t rcv_ls_req_drop; 108 + atomic_t xmt_ls_abort; 109 + atomic_t xmt_ls_abort_cmpl; 110 + 111 + /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ 112 + atomic_t xmt_ls_rsp; 113 + atomic_t xmt_ls_drop; 114 + 115 + /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ 116 + atomic_t xmt_ls_rsp_error; 117 + atomic_t xmt_ls_rsp_aborted; 118 + atomic_t xmt_ls_rsp_xb_set; 119 + atomic_t xmt_ls_rsp_cmpl; 120 + 121 + /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ 122 + atomic_t rcv_fcp_cmd_in; 123 + atomic_t rcv_fcp_cmd_out; 124 + atomic_t rcv_fcp_cmd_drop; 125 + atomic_t rcv_fcp_cmd_defer; 126 + atomic_t xmt_fcp_release; 127 + 128 + /* Stats counters - lpfc_nvmet_xmt_fcp_op */ 129 + atomic_t xmt_fcp_drop; 130 + atomic_t xmt_fcp_read_rsp; 131 + atomic_t xmt_fcp_read; 132 + atomic_t xmt_fcp_write; 133 + atomic_t xmt_fcp_rsp; 134 + 135 + /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ 136 + atomic_t xmt_fcp_rsp_xb_set; 137 + atomic_t xmt_fcp_rsp_cmpl; 138 + atomic_t xmt_fcp_rsp_error; 139 + atomic_t xmt_fcp_rsp_aborted; 140 + atomic_t xmt_fcp_rsp_drop; 141 + 142 + /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ 143 + atomic_t xmt_fcp_xri_abort_cqe; 144 + atomic_t xmt_fcp_abort; 145 + atomic_t xmt_fcp_abort_cmpl; 146 + atomic_t xmt_abort_sol; 147 + atomic_t xmt_abort_unsol; 148 + atomic_t xmt_abort_rsp; 149 + atomic_t xmt_abort_rsp_error; 150 + 151 + /* Stats counters - defer IO */ 152 + atomic_t defer_ctx; 153 + atomic_t defer_fod; 154 + atomic_t defer_wqfull; 155 + }; 156 + 157 + struct lpfc_nvmet_ctx_info { 158 + struct list_head nvmet_ctx_list; 159 + spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ 160 + struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; 161 + struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; 162 + uint16_t nvmet_ctx_list_cnt; 163 + char pad[16]; /* pad to a cache-line */ 164 + }; 165 + 166 + /* This retrieves the context info associated with the specified cpu / mrq */ 167 + #define lpfc_get_ctx_list(phba, cpu, mrq) \ 168 + (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) 169 + 170 + /* Values for state field of struct lpfc_async_xchg_ctx */ 171 + #define LPFC_NVME_STE_LS_RCV 1 172 + #define LPFC_NVME_STE_LS_ABORT 2 173 + #define LPFC_NVME_STE_LS_RSP 3 174 + #define LPFC_NVME_STE_RCV 4 175 + #define LPFC_NVME_STE_DATA 5 176 + #define LPFC_NVME_STE_ABORT 6 177 + #define LPFC_NVME_STE_DONE 7 178 + #define LPFC_NVME_STE_FREE 0xff 179 + 180 + /* Values for flag field of struct lpfc_async_xchg_ctx */ 181 + #define LPFC_NVME_IO_INP 0x1 /* IO is in progress on exchange */ 182 + #define LPFC_NVME_ABORT_OP 0x2 /* Abort WQE issued on exchange */ 183 + #define LPFC_NVME_XBUSY 0x4 /* XB bit set on IO cmpl */ 184 + #define LPFC_NVME_CTX_RLS 0x8 /* ctx free requested */ 185 + #define LPFC_NVME_ABTS_RCV 0x10 /* ABTS received on exchange */ 186 + #define LPFC_NVME_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ 187 + #define LPFC_NVME_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ 188 + #define LPFC_NVME_TNOTIFY 0x80 /* notify transport of abts */ 189 + 190 + struct lpfc_async_xchg_ctx { 191 + union { 192 + struct nvmefc_tgt_fcp_req fcp_req; 193 + } hdlrctx; 194 + struct list_head list; 195 + struct lpfc_hba *phba; 196 + struct lpfc_nodelist *ndlp; 197 + struct nvmefc_ls_req *ls_req; 198 + struct nvmefc_ls_rsp ls_rsp; 199 + struct lpfc_iocbq *wqeq; 200 + struct lpfc_iocbq *abort_wqeq; 201 + spinlock_t ctxlock; /* protect flag access */ 202 + uint32_t sid; 203 + uint32_t offset; 204 + uint16_t oxid; 205 + uint16_t size; 206 + uint16_t entry_cnt; 207 + uint16_t cpu; 208 + uint16_t idx; 209 + uint16_t state; 210 + uint16_t flag; 211 + void *payload; 212 + struct rqb_dmabuf *rqb_buffer; 213 + struct lpfc_nvmet_ctxbuf *ctxbuf; 214 + struct lpfc_sli4_hdw_queue *hdwq; 215 + 216 + #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 217 + uint64_t ts_isr_cmd; 218 + uint64_t ts_cmd_nvme; 219 + uint64_t ts_nvme_data; 220 + uint64_t ts_data_wqput; 221 + uint64_t ts_isr_data; 222 + uint64_t ts_data_nvme; 223 + uint64_t ts_nvme_status; 224 + uint64_t ts_status_wqput; 225 + uint64_t ts_isr_status; 226 + uint64_t ts_status_nvme; 227 + #endif 228 + }; 229 + 230 + 231 + /* routines found in lpfc_nvme.c */ 232 + int __lpfc_nvme_ls_req(struct lpfc_vport *vport, struct lpfc_nodelist *ndlp, 233 + struct nvmefc_ls_req *pnvme_lsreq, 234 + void (*gen_req_cmp)(struct lpfc_hba *phba, 235 + struct lpfc_iocbq *cmdwqe, 236 + struct lpfc_wcqe_complete *wcqe)); 237 + void __lpfc_nvme_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_vport *vport, 238 + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe); 239 + int __lpfc_nvme_ls_abort(struct lpfc_vport *vport, 240 + struct lpfc_nodelist *ndlp, struct nvmefc_ls_req *pnvme_lsreq); 241 + 242 + /* routines found in lpfc_nvmet.c */ 243 + int lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, 244 + struct lpfc_async_xchg_ctx *ctxp, uint32_t sid, 245 + uint16_t xri); 246 + int __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, 247 + struct nvmefc_ls_rsp *ls_rsp, 248 + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, 249 + struct lpfc_iocbq *cmdwqe, 250 + struct lpfc_wcqe_complete *wcqe)); 251 + void __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, 252 + struct lpfc_iocbq *cmdwqe, struct lpfc_wcqe_complete *wcqe);

+499 -338

drivers/scsi/lpfc/lpfc_nvmet.c

··· 36 36 #include <scsi/scsi_transport_fc.h> 37 37 #include <scsi/fc/fc_fs.h> 38 38 39 - #include <linux/nvme.h> 40 - #include <linux/nvme-fc-driver.h> 41 - #include <linux/nvme-fc.h> 42 - 43 39 #include "lpfc_version.h" 44 40 #include "lpfc_hw4.h" 45 41 #include "lpfc_hw.h" ··· 46 50 #include "lpfc.h" 47 51 #include "lpfc_scsi.h" 48 52 #include "lpfc_nvme.h" 49 - #include "lpfc_nvmet.h" 50 53 #include "lpfc_logmsg.h" 51 54 #include "lpfc_crtn.h" 52 55 #include "lpfc_vport.h" 53 56 #include "lpfc_debugfs.h" 54 57 55 58 static struct lpfc_iocbq *lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *, 56 - struct lpfc_nvmet_rcv_ctx *, 59 + struct lpfc_async_xchg_ctx *, 57 60 dma_addr_t rspbuf, 58 61 uint16_t rspsize); 59 62 static struct lpfc_iocbq *lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *, 60 - struct lpfc_nvmet_rcv_ctx *); 63 + struct lpfc_async_xchg_ctx *); 61 64 static int lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *, 62 - struct lpfc_nvmet_rcv_ctx *, 65 + struct lpfc_async_xchg_ctx *, 63 66 uint32_t, uint16_t); 64 67 static int lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *, 65 - struct lpfc_nvmet_rcv_ctx *, 68 + struct lpfc_async_xchg_ctx *, 66 69 uint32_t, uint16_t); 67 - static int lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *, 68 - struct lpfc_nvmet_rcv_ctx *, 69 - uint32_t, uint16_t); 70 70 static void lpfc_nvmet_wqfull_flush(struct lpfc_hba *, struct lpfc_queue *, 71 - struct lpfc_nvmet_rcv_ctx *); 71 + struct lpfc_async_xchg_ctx *); 72 72 static void lpfc_nvmet_fcp_rqst_defer_work(struct work_struct *); 73 73 74 74 static void lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf); ··· 213 221 } 214 222 215 223 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 216 - static struct lpfc_nvmet_rcv_ctx * 224 + static struct lpfc_async_xchg_ctx * 217 225 lpfc_nvmet_get_ctx_for_xri(struct lpfc_hba *phba, u16 xri) 218 226 { 219 - struct lpfc_nvmet_rcv_ctx *ctxp; 227 + struct lpfc_async_xchg_ctx *ctxp; 220 228 unsigned long iflag; 221 229 bool found = false; 222 230 ··· 235 243 return NULL; 236 244 } 237 245 238 - static struct lpfc_nvmet_rcv_ctx * 246 + static struct lpfc_async_xchg_ctx * 239 247 lpfc_nvmet_get_ctx_for_oxid(struct lpfc_hba *phba, u16 oxid, u32 sid) 240 248 { 241 - struct lpfc_nvmet_rcv_ctx *ctxp; 249 + struct lpfc_async_xchg_ctx *ctxp; 242 250 unsigned long iflag; 243 251 bool found = false; 244 252 ··· 259 267 #endif 260 268 261 269 static void 262 - lpfc_nvmet_defer_release(struct lpfc_hba *phba, struct lpfc_nvmet_rcv_ctx *ctxp) 270 + lpfc_nvmet_defer_release(struct lpfc_hba *phba, 271 + struct lpfc_async_xchg_ctx *ctxp) 263 272 { 264 273 lockdep_assert_held(&ctxp->ctxlock); 265 274 ··· 268 275 "6313 NVMET Defer ctx release oxid x%x flg x%x\n", 269 276 ctxp->oxid, ctxp->flag); 270 277 271 - if (ctxp->flag & LPFC_NVMET_CTX_RLS) 278 + if (ctxp->flag & LPFC_NVME_CTX_RLS) 272 279 return; 273 280 274 - ctxp->flag |= LPFC_NVMET_CTX_RLS; 281 + ctxp->flag |= LPFC_NVME_CTX_RLS; 275 282 spin_lock(&phba->sli4_hba.t_active_list_lock); 276 283 list_del(&ctxp->list); 277 284 spin_unlock(&phba->sli4_hba.t_active_list_lock); 278 285 spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 279 286 list_add_tail(&ctxp->list, &phba->sli4_hba.lpfc_abts_nvmet_ctx_list); 280 287 spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 288 + } 289 + 290 + /** 291 + * __lpfc_nvme_xmt_ls_rsp_cmp - Generic completion handler for the 292 + * transmission of an NVME LS response. 293 + * @phba: Pointer to HBA context object. 294 + * @cmdwqe: Pointer to driver command WQE object. 295 + * @wcqe: Pointer to driver response CQE object. 296 + * 297 + * The function is called from SLI ring event handler with no 298 + * lock held. The function frees memory resources used for the command 299 + * used to send the NVME LS RSP. 300 + **/ 301 + void 302 + __lpfc_nvme_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 303 + struct lpfc_wcqe_complete *wcqe) 304 + { 305 + struct lpfc_async_xchg_ctx *axchg = cmdwqe->context2; 306 + struct nvmefc_ls_rsp *ls_rsp = &axchg->ls_rsp; 307 + uint32_t status, result; 308 + 309 + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; 310 + result = wcqe->parameter; 311 + 312 + if (axchg->state != LPFC_NVME_STE_LS_RSP || axchg->entry_cnt != 2) { 313 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, 314 + "6410 NVMEx LS cmpl state mismatch IO x%x: " 315 + "%d %d\n", 316 + axchg->oxid, axchg->state, axchg->entry_cnt); 317 + } 318 + 319 + lpfc_nvmeio_data(phba, "NVMEx LS CMPL: xri x%x stat x%x result x%x\n", 320 + axchg->oxid, status, result); 321 + 322 + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 323 + "6038 NVMEx LS rsp cmpl: %d %d oxid x%x\n", 324 + status, result, axchg->oxid); 325 + 326 + lpfc_nlp_put(cmdwqe->context1); 327 + cmdwqe->context2 = NULL; 328 + cmdwqe->context3 = NULL; 329 + lpfc_sli_release_iocbq(phba, cmdwqe); 330 + ls_rsp->done(ls_rsp); 331 + lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 332 + "6200 NVMEx LS rsp cmpl done status %d oxid x%x\n", 333 + status, axchg->oxid); 334 + kfree(axchg); 281 335 } 282 336 283 337 /** ··· 335 295 * 336 296 * The function is called from SLI ring event handler with no 337 297 * lock held. This function is the completion handler for NVME LS commands 338 - * The function frees memory resources used for the NVME commands. 298 + * The function updates any states and statistics, then calls the 299 + * generic completion handler to free resources. 339 300 **/ 340 301 static void 341 302 lpfc_nvmet_xmt_ls_rsp_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 342 303 struct lpfc_wcqe_complete *wcqe) 343 304 { 344 305 struct lpfc_nvmet_tgtport *tgtp; 345 - struct nvmefc_tgt_ls_req *rsp; 346 - struct lpfc_nvmet_rcv_ctx *ctxp; 347 306 uint32_t status, result; 348 307 349 - status = bf_get(lpfc_wcqe_c_status, wcqe); 350 - result = wcqe->parameter; 351 - ctxp = cmdwqe->context2; 352 - 353 - if (ctxp->state != LPFC_NVMET_STE_LS_RSP || ctxp->entry_cnt != 2) { 354 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 355 - "6410 NVMET LS cmpl state mismatch IO x%x: " 356 - "%d %d\n", 357 - ctxp->oxid, ctxp->state, ctxp->entry_cnt); 358 - } 359 - 360 308 if (!phba->targetport) 361 - goto out; 309 + goto finish; 310 + 311 + status = bf_get(lpfc_wcqe_c_status, wcqe) & LPFC_IOCB_STATUS_MASK; 312 + result = wcqe->parameter; 362 313 363 314 tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 364 - 365 315 if (tgtp) { 366 316 if (status) { 367 317 atomic_inc(&tgtp->xmt_ls_rsp_error); ··· 364 334 } 365 335 } 366 336 367 - out: 368 - rsp = &ctxp->ctx.ls_req; 369 - 370 - lpfc_nvmeio_data(phba, "NVMET LS CMPL: xri x%x stat x%x result x%x\n", 371 - ctxp->oxid, status, result); 372 - 373 - lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 374 - "6038 NVMET LS rsp cmpl: %d %d oxid x%x\n", 375 - status, result, ctxp->oxid); 376 - 377 - lpfc_nlp_put(cmdwqe->context1); 378 - cmdwqe->context2 = NULL; 379 - cmdwqe->context3 = NULL; 380 - lpfc_sli_release_iocbq(phba, cmdwqe); 381 - rsp->done(rsp); 382 - kfree(ctxp); 337 + finish: 338 + __lpfc_nvme_xmt_ls_rsp_cmp(phba, cmdwqe, wcqe); 383 339 } 384 340 385 341 /** ··· 385 369 lpfc_nvmet_ctxbuf_post(struct lpfc_hba *phba, struct lpfc_nvmet_ctxbuf *ctx_buf) 386 370 { 387 371 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 388 - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; 372 + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; 389 373 struct lpfc_nvmet_tgtport *tgtp; 390 374 struct fc_frame_header *fc_hdr; 391 375 struct rqb_dmabuf *nvmebuf; ··· 394 378 int cpu; 395 379 unsigned long iflag; 396 380 397 - if (ctxp->state == LPFC_NVMET_STE_FREE) { 381 + if (ctxp->state == LPFC_NVME_STE_FREE) { 398 382 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 399 383 "6411 NVMET free, already free IO x%x: %d %d\n", 400 384 ctxp->oxid, ctxp->state, ctxp->entry_cnt); ··· 406 390 /* check if freed in another path whilst acquiring lock */ 407 391 if (nvmebuf) { 408 392 ctxp->rqb_buffer = NULL; 409 - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { 410 - ctxp->flag &= ~LPFC_NVMET_CTX_REUSE_WQ; 393 + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { 394 + ctxp->flag &= ~LPFC_NVME_CTX_REUSE_WQ; 411 395 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 412 396 nvmebuf->hrq->rqbp->rqb_free_buffer(phba, 413 397 nvmebuf); ··· 420 404 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 421 405 } 422 406 } 423 - ctxp->state = LPFC_NVMET_STE_FREE; 407 + ctxp->state = LPFC_NVME_STE_FREE; 424 408 425 409 spin_lock_irqsave(&phba->sli4_hba.nvmet_io_wait_lock, iflag); 426 410 if (phba->sli4_hba.nvmet_io_wait_cnt) { ··· 437 421 size = nvmebuf->bytes_recv; 438 422 sid = sli4_sid_from_fc_hdr(fc_hdr); 439 423 440 - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; 424 + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; 441 425 ctxp->wqeq = NULL; 442 426 ctxp->offset = 0; 443 427 ctxp->phba = phba; 444 428 ctxp->size = size; 445 429 ctxp->oxid = oxid; 446 430 ctxp->sid = sid; 447 - ctxp->state = LPFC_NVMET_STE_RCV; 431 + ctxp->state = LPFC_NVME_STE_RCV; 448 432 ctxp->entry_cnt = 1; 449 433 ctxp->flag = 0; 450 434 ctxp->ctxbuf = ctx_buf; ··· 469 453 470 454 /* Indicate that a replacement buffer has been posted */ 471 455 spin_lock_irqsave(&ctxp->ctxlock, iflag); 472 - ctxp->flag |= LPFC_NVMET_CTX_REUSE_WQ; 456 + ctxp->flag |= LPFC_NVME_CTX_REUSE_WQ; 473 457 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 474 458 475 459 if (!queue_work(phba->wq, &ctx_buf->defer_work)) { ··· 511 495 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 512 496 static void 513 497 lpfc_nvmet_ktime(struct lpfc_hba *phba, 514 - struct lpfc_nvmet_rcv_ctx *ctxp) 498 + struct lpfc_async_xchg_ctx *ctxp) 515 499 { 516 500 uint64_t seg1, seg2, seg3, seg4, seg5; 517 501 uint64_t seg6, seg7, seg8, seg9, seg10; ··· 720 704 { 721 705 struct lpfc_nvmet_tgtport *tgtp; 722 706 struct nvmefc_tgt_fcp_req *rsp; 723 - struct lpfc_nvmet_rcv_ctx *ctxp; 707 + struct lpfc_async_xchg_ctx *ctxp; 724 708 uint32_t status, result, op, start_clean, logerr; 725 709 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 726 710 int id; 727 711 #endif 728 712 729 713 ctxp = cmdwqe->context2; 730 - ctxp->flag &= ~LPFC_NVMET_IO_INP; 714 + ctxp->flag &= ~LPFC_NVME_IO_INP; 731 715 732 - rsp = &ctxp->ctx.fcp_req; 716 + rsp = &ctxp->hdlrctx.fcp_req; 733 717 op = rsp->op; 734 718 735 719 status = bf_get(lpfc_wcqe_c_status, wcqe); ··· 756 740 757 741 /* pick up SLI4 exhange busy condition */ 758 742 if (bf_get(lpfc_wcqe_c_xb, wcqe)) { 759 - ctxp->flag |= LPFC_NVMET_XBUSY; 743 + ctxp->flag |= LPFC_NVME_XBUSY; 760 744 logerr |= LOG_NVME_ABTS; 761 745 if (tgtp) 762 746 atomic_inc(&tgtp->xmt_fcp_rsp_xb_set); 763 747 764 748 } else { 765 - ctxp->flag &= ~LPFC_NVMET_XBUSY; 749 + ctxp->flag &= ~LPFC_NVME_XBUSY; 766 750 } 767 751 768 752 lpfc_printf_log(phba, KERN_INFO, logerr, ··· 784 768 if ((op == NVMET_FCOP_READDATA_RSP) || 785 769 (op == NVMET_FCOP_RSP)) { 786 770 /* Sanity check */ 787 - ctxp->state = LPFC_NVMET_STE_DONE; 771 + ctxp->state = LPFC_NVME_STE_DONE; 788 772 ctxp->entry_cnt++; 789 773 790 774 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS ··· 842 826 #endif 843 827 } 844 828 845 - static int 846 - lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, 847 - struct nvmefc_tgt_ls_req *rsp) 829 + /** 830 + * __lpfc_nvme_xmt_ls_rsp - Generic service routine to issue transmit 831 + * an NVME LS rsp for a prior NVME LS request that was received. 832 + * @axchg: pointer to exchange context for the NVME LS request the response 833 + * is for. 834 + * @ls_rsp: pointer to the transport LS RSP that is to be sent 835 + * @xmt_ls_rsp_cmp: completion routine to call upon RSP transmit done 836 + * 837 + * This routine is used to format and send a WQE to transmit a NVME LS 838 + * Response. The response is for a prior NVME LS request that was 839 + * received and posted to the transport. 840 + * 841 + * Returns: 842 + * 0 : if response successfully transmit 843 + * non-zero : if response failed to transmit, of the form -Exxx. 844 + **/ 845 + int 846 + __lpfc_nvme_xmt_ls_rsp(struct lpfc_async_xchg_ctx *axchg, 847 + struct nvmefc_ls_rsp *ls_rsp, 848 + void (*xmt_ls_rsp_cmp)(struct lpfc_hba *phba, 849 + struct lpfc_iocbq *cmdwqe, 850 + struct lpfc_wcqe_complete *wcqe)) 848 851 { 849 - struct lpfc_nvmet_rcv_ctx *ctxp = 850 - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.ls_req); 851 - struct lpfc_hba *phba = ctxp->phba; 852 - struct hbq_dmabuf *nvmebuf = 853 - (struct hbq_dmabuf *)ctxp->rqb_buffer; 852 + struct lpfc_hba *phba = axchg->phba; 853 + struct hbq_dmabuf *nvmebuf = (struct hbq_dmabuf *)axchg->rqb_buffer; 854 854 struct lpfc_iocbq *nvmewqeq; 855 - struct lpfc_nvmet_tgtport *nvmep = tgtport->private; 856 855 struct lpfc_dmabuf dmabuf; 857 856 struct ulp_bde64 bpl; 858 857 int rc; ··· 875 844 if (phba->pport->load_flag & FC_UNLOADING) 876 845 return -ENODEV; 877 846 878 - if (phba->pport->load_flag & FC_UNLOADING) 879 - return -ENODEV; 880 - 881 847 lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 882 - "6023 NVMET LS rsp oxid x%x\n", ctxp->oxid); 848 + "6023 NVMEx LS rsp oxid x%x\n", axchg->oxid); 883 849 884 - if ((ctxp->state != LPFC_NVMET_STE_LS_RCV) || 885 - (ctxp->entry_cnt != 1)) { 886 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 887 - "6412 NVMET LS rsp state mismatch " 850 + if (axchg->state != LPFC_NVME_STE_LS_RCV || axchg->entry_cnt != 1) { 851 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, 852 + "6412 NVMEx LS rsp state mismatch " 888 853 "oxid x%x: %d %d\n", 889 - ctxp->oxid, ctxp->state, ctxp->entry_cnt); 854 + axchg->oxid, axchg->state, axchg->entry_cnt); 855 + return -EALREADY; 890 856 } 891 - ctxp->state = LPFC_NVMET_STE_LS_RSP; 892 - ctxp->entry_cnt++; 857 + axchg->state = LPFC_NVME_STE_LS_RSP; 858 + axchg->entry_cnt++; 893 859 894 - nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, ctxp, rsp->rspdma, 895 - rsp->rsplen); 860 + nvmewqeq = lpfc_nvmet_prep_ls_wqe(phba, axchg, ls_rsp->rspdma, 861 + ls_rsp->rsplen); 896 862 if (nvmewqeq == NULL) { 897 - atomic_inc(&nvmep->xmt_ls_drop); 898 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 899 - "6150 LS Drop IO x%x: Prep\n", 900 - ctxp->oxid); 901 - lpfc_in_buf_free(phba, &nvmebuf->dbuf); 902 - atomic_inc(&nvmep->xmt_ls_abort); 903 - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, 904 - ctxp->sid, ctxp->oxid); 905 - return -ENOMEM; 863 + lpfc_printf_log(phba, KERN_ERR, 864 + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, 865 + "6150 NVMEx LS Drop Rsp x%x: Prep\n", 866 + axchg->oxid); 867 + rc = -ENOMEM; 868 + goto out_free_buf; 906 869 } 907 870 908 871 /* Save numBdes for bpl2sgl */ ··· 906 881 dmabuf.virt = &bpl; 907 882 bpl.addrLow = nvmewqeq->wqe.xmit_sequence.bde.addrLow; 908 883 bpl.addrHigh = nvmewqeq->wqe.xmit_sequence.bde.addrHigh; 909 - bpl.tus.f.bdeSize = rsp->rsplen; 884 + bpl.tus.f.bdeSize = ls_rsp->rsplen; 910 885 bpl.tus.f.bdeFlags = 0; 911 886 bpl.tus.w = le32_to_cpu(bpl.tus.w); 887 + /* 888 + * Note: although we're using stack space for the dmabuf, the 889 + * call to lpfc_sli4_issue_wqe is synchronous, so it will not 890 + * be referenced after it returns back to this routine. 891 + */ 912 892 913 - nvmewqeq->wqe_cmpl = lpfc_nvmet_xmt_ls_rsp_cmp; 893 + nvmewqeq->wqe_cmpl = xmt_ls_rsp_cmp; 914 894 nvmewqeq->iocb_cmpl = NULL; 915 - nvmewqeq->context2 = ctxp; 895 + nvmewqeq->context2 = axchg; 916 896 917 - lpfc_nvmeio_data(phba, "NVMET LS RESP: xri x%x wqidx x%x len x%x\n", 918 - ctxp->oxid, nvmewqeq->hba_wqidx, rsp->rsplen); 897 + lpfc_nvmeio_data(phba, "NVMEx LS RSP: xri x%x wqidx x%x len x%x\n", 898 + axchg->oxid, nvmewqeq->hba_wqidx, ls_rsp->rsplen); 919 899 920 - rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); 900 + rc = lpfc_sli4_issue_wqe(phba, axchg->hdwq, nvmewqeq); 901 + 902 + /* clear to be sure there's no reference */ 903 + nvmewqeq->context3 = NULL; 904 + 921 905 if (rc == WQE_SUCCESS) { 922 906 /* 923 907 * Okay to repost buffer here, but wait till cmpl 924 908 * before freeing ctxp and iocbq. 925 909 */ 926 910 lpfc_in_buf_free(phba, &nvmebuf->dbuf); 927 - atomic_inc(&nvmep->xmt_ls_rsp); 928 911 return 0; 929 912 } 930 - /* Give back resources */ 931 - atomic_inc(&nvmep->xmt_ls_drop); 932 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 933 - "6151 LS Drop IO x%x: Issue %d\n", 934 - ctxp->oxid, rc); 913 + 914 + lpfc_printf_log(phba, KERN_ERR, 915 + LOG_NVME_DISC | LOG_NVME_IOERR | LOG_NVME_ABTS, 916 + "6151 NVMEx LS RSP x%x: failed to transmit %d\n", 917 + axchg->oxid, rc); 918 + 919 + rc = -ENXIO; 935 920 936 921 lpfc_nlp_put(nvmewqeq->context1); 937 922 923 + out_free_buf: 924 + /* Give back resources */ 938 925 lpfc_in_buf_free(phba, &nvmebuf->dbuf); 939 - atomic_inc(&nvmep->xmt_ls_abort); 940 - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, ctxp->sid, ctxp->oxid); 941 - return -ENXIO; 926 + 927 + /* 928 + * As transport doesn't track completions of responses, if the rsp 929 + * fails to send, the transport will effectively ignore the rsp 930 + * and consider the LS done. However, the driver has an active 931 + * exchange open for the LS - so be sure to abort the exchange 932 + * if the response isn't sent. 933 + */ 934 + lpfc_nvme_unsol_ls_issue_abort(phba, axchg, axchg->sid, axchg->oxid); 935 + return rc; 936 + } 937 + 938 + /** 939 + * lpfc_nvmet_xmt_ls_rsp - Transmit NVME LS response 940 + * @tgtport: pointer to target port that NVME LS is to be transmit from. 941 + * @ls_rsp: pointer to the transport LS RSP that is to be sent 942 + * 943 + * Driver registers this routine to transmit responses for received NVME 944 + * LS requests. 945 + * 946 + * This routine is used to format and send a WQE to transmit a NVME LS 947 + * Response. The ls_rsp is used to reverse-map the LS to the original 948 + * NVME LS request sequence, which provides addressing information for 949 + * the remote port the LS to be sent to, as well as the exchange id 950 + * that is the LS is bound to. 951 + * 952 + * Returns: 953 + * 0 : if response successfully transmit 954 + * non-zero : if response failed to transmit, of the form -Exxx. 955 + **/ 956 + static int 957 + lpfc_nvmet_xmt_ls_rsp(struct nvmet_fc_target_port *tgtport, 958 + struct nvmefc_ls_rsp *ls_rsp) 959 + { 960 + struct lpfc_async_xchg_ctx *axchg = 961 + container_of(ls_rsp, struct lpfc_async_xchg_ctx, ls_rsp); 962 + struct lpfc_nvmet_tgtport *nvmep = tgtport->private; 963 + int rc; 964 + 965 + if (axchg->phba->pport->load_flag & FC_UNLOADING) 966 + return -ENODEV; 967 + 968 + rc = __lpfc_nvme_xmt_ls_rsp(axchg, ls_rsp, lpfc_nvmet_xmt_ls_rsp_cmp); 969 + 970 + if (rc) { 971 + atomic_inc(&nvmep->xmt_ls_drop); 972 + /* 973 + * unless the failure is due to having already sent 974 + * the response, an abort will be generated for the 975 + * exchange if the rsp can't be sent. 976 + */ 977 + if (rc != -EALREADY) 978 + atomic_inc(&nvmep->xmt_ls_abort); 979 + return rc; 980 + } 981 + 982 + atomic_inc(&nvmep->xmt_ls_rsp); 983 + return 0; 942 984 } 943 985 944 986 static int ··· 1013 921 struct nvmefc_tgt_fcp_req *rsp) 1014 922 { 1015 923 struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; 1016 - struct lpfc_nvmet_rcv_ctx *ctxp = 1017 - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); 924 + struct lpfc_async_xchg_ctx *ctxp = 925 + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); 1018 926 struct lpfc_hba *phba = ctxp->phba; 1019 927 struct lpfc_queue *wq; 1020 928 struct lpfc_iocbq *nvmewqeq; ··· 1060 968 #endif 1061 969 1062 970 /* Sanity check */ 1063 - if ((ctxp->flag & LPFC_NVMET_ABTS_RCV) || 1064 - (ctxp->state == LPFC_NVMET_STE_ABORT)) { 971 + if ((ctxp->flag & LPFC_NVME_ABTS_RCV) || 972 + (ctxp->state == LPFC_NVME_STE_ABORT)) { 1065 973 atomic_inc(&lpfc_nvmep->xmt_fcp_drop); 1066 974 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 1067 975 "6102 IO oxid x%x aborted\n", ··· 1089 997 lpfc_nvmeio_data(phba, "NVMET FCP CMND: xri x%x op x%x len x%x\n", 1090 998 ctxp->oxid, rsp->op, rsp->rsplen); 1091 999 1092 - ctxp->flag |= LPFC_NVMET_IO_INP; 1000 + ctxp->flag |= LPFC_NVME_IO_INP; 1093 1001 rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); 1094 1002 if (rc == WQE_SUCCESS) { 1095 1003 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS ··· 1108 1016 * WQ was full, so queue nvmewqeq to be sent after 1109 1017 * WQE release CQE 1110 1018 */ 1111 - ctxp->flag |= LPFC_NVMET_DEFER_WQFULL; 1019 + ctxp->flag |= LPFC_NVME_DEFER_WQFULL; 1112 1020 wq = ctxp->hdwq->io_wq; 1113 1021 pring = wq->pring; 1114 1022 spin_lock_irqsave(&pring->ring_lock, iflags); ··· 1148 1056 struct nvmefc_tgt_fcp_req *req) 1149 1057 { 1150 1058 struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; 1151 - struct lpfc_nvmet_rcv_ctx *ctxp = 1152 - container_of(req, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); 1059 + struct lpfc_async_xchg_ctx *ctxp = 1060 + container_of(req, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); 1153 1061 struct lpfc_hba *phba = ctxp->phba; 1154 1062 struct lpfc_queue *wq; 1155 1063 unsigned long flags; ··· 1177 1085 /* Since iaab/iaar are NOT set, we need to check 1178 1086 * if the firmware is in process of aborting IO 1179 1087 */ 1180 - if (ctxp->flag & (LPFC_NVMET_XBUSY | LPFC_NVMET_ABORT_OP)) { 1088 + if (ctxp->flag & (LPFC_NVME_XBUSY | LPFC_NVME_ABORT_OP)) { 1181 1089 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 1182 1090 return; 1183 1091 } 1184 - ctxp->flag |= LPFC_NVMET_ABORT_OP; 1092 + ctxp->flag |= LPFC_NVME_ABORT_OP; 1185 1093 1186 - if (ctxp->flag & LPFC_NVMET_DEFER_WQFULL) { 1094 + if (ctxp->flag & LPFC_NVME_DEFER_WQFULL) { 1187 1095 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 1188 1096 lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, 1189 1097 ctxp->oxid); ··· 1193 1101 } 1194 1102 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 1195 1103 1196 - /* An state of LPFC_NVMET_STE_RCV means we have just received 1104 + /* A state of LPFC_NVME_STE_RCV means we have just received 1197 1105 * the NVME command and have not started processing it. 1198 1106 * (by issuing any IO WQEs on this exchange yet) 1199 1107 */ 1200 - if (ctxp->state == LPFC_NVMET_STE_RCV) 1108 + if (ctxp->state == LPFC_NVME_STE_RCV) 1201 1109 lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, ctxp->sid, 1202 1110 ctxp->oxid); 1203 1111 else ··· 1210 1118 struct nvmefc_tgt_fcp_req *rsp) 1211 1119 { 1212 1120 struct lpfc_nvmet_tgtport *lpfc_nvmep = tgtport->private; 1213 - struct lpfc_nvmet_rcv_ctx *ctxp = 1214 - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); 1121 + struct lpfc_async_xchg_ctx *ctxp = 1122 + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); 1215 1123 struct lpfc_hba *phba = ctxp->phba; 1216 1124 unsigned long flags; 1217 1125 bool aborting = false; 1218 1126 1219 1127 spin_lock_irqsave(&ctxp->ctxlock, flags); 1220 - if (ctxp->flag & LPFC_NVMET_XBUSY) 1128 + if (ctxp->flag & LPFC_NVME_XBUSY) 1221 1129 lpfc_printf_log(phba, KERN_INFO, LOG_NVME_IOERR, 1222 1130 "6027 NVMET release with XBUSY flag x%x" 1223 1131 " oxid x%x\n", 1224 1132 ctxp->flag, ctxp->oxid); 1225 - else if (ctxp->state != LPFC_NVMET_STE_DONE && 1226 - ctxp->state != LPFC_NVMET_STE_ABORT) 1133 + else if (ctxp->state != LPFC_NVME_STE_DONE && 1134 + ctxp->state != LPFC_NVME_STE_ABORT) 1227 1135 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 1228 1136 "6413 NVMET release bad state %d %d oxid x%x\n", 1229 1137 ctxp->state, ctxp->entry_cnt, ctxp->oxid); 1230 1138 1231 - if ((ctxp->flag & LPFC_NVMET_ABORT_OP) || 1232 - (ctxp->flag & LPFC_NVMET_XBUSY)) { 1139 + if ((ctxp->flag & LPFC_NVME_ABORT_OP) || 1140 + (ctxp->flag & LPFC_NVME_XBUSY)) { 1233 1141 aborting = true; 1234 1142 /* let the abort path do the real release */ 1235 1143 lpfc_nvmet_defer_release(phba, ctxp); ··· 1240 1148 ctxp->state, aborting); 1241 1149 1242 1150 atomic_inc(&lpfc_nvmep->xmt_fcp_release); 1243 - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; 1151 + ctxp->flag &= ~LPFC_NVME_TNOTIFY; 1244 1152 1245 1153 if (aborting) 1246 1154 return; ··· 1253 1161 struct nvmefc_tgt_fcp_req *rsp) 1254 1162 { 1255 1163 struct lpfc_nvmet_tgtport *tgtp; 1256 - struct lpfc_nvmet_rcv_ctx *ctxp = 1257 - container_of(rsp, struct lpfc_nvmet_rcv_ctx, ctx.fcp_req); 1164 + struct lpfc_async_xchg_ctx *ctxp = 1165 + container_of(rsp, struct lpfc_async_xchg_ctx, hdlrctx.fcp_req); 1258 1166 struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; 1259 1167 struct lpfc_hba *phba = ctxp->phba; 1260 1168 unsigned long iflag; ··· 1282 1190 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 1283 1191 } 1284 1192 1193 + /** 1194 + * lpfc_nvmet_ls_req_cmp - completion handler for a nvme ls request 1195 + * @phba: Pointer to HBA context object 1196 + * @cmdwqe: Pointer to driver command WQE object. 1197 + * @wcqe: Pointer to driver response CQE object. 1198 + * 1199 + * This function is the completion handler for NVME LS requests. 1200 + * The function updates any states and statistics, then calls the 1201 + * generic completion handler to finish completion of the request. 1202 + **/ 1203 + static void 1204 + lpfc_nvmet_ls_req_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 1205 + struct lpfc_wcqe_complete *wcqe) 1206 + { 1207 + __lpfc_nvme_ls_req_cmp(phba, cmdwqe->vport, cmdwqe, wcqe); 1208 + } 1209 + 1210 + /** 1211 + * lpfc_nvmet_ls_req - Issue an Link Service request 1212 + * @targetport - pointer to target instance registered with nvmet transport. 1213 + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. 1214 + * Driver sets this value to the ndlp pointer. 1215 + * @pnvme_lsreq - the transport nvme_ls_req structure for the LS 1216 + * 1217 + * Driver registers this routine to handle any link service request 1218 + * from the nvme_fc transport to a remote nvme-aware port. 1219 + * 1220 + * Return value : 1221 + * 0 - Success 1222 + * non-zero: various error codes, in form of -Exxx 1223 + **/ 1224 + static int 1225 + lpfc_nvmet_ls_req(struct nvmet_fc_target_port *targetport, 1226 + void *hosthandle, 1227 + struct nvmefc_ls_req *pnvme_lsreq) 1228 + { 1229 + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; 1230 + struct lpfc_hba *phba; 1231 + struct lpfc_nodelist *ndlp; 1232 + int ret; 1233 + u32 hstate; 1234 + 1235 + if (!lpfc_nvmet) 1236 + return -EINVAL; 1237 + 1238 + phba = lpfc_nvmet->phba; 1239 + if (phba->pport->load_flag & FC_UNLOADING) 1240 + return -EINVAL; 1241 + 1242 + hstate = atomic_read(&lpfc_nvmet->state); 1243 + if (hstate == LPFC_NVMET_INV_HOST_ACTIVE) 1244 + return -EACCES; 1245 + 1246 + ndlp = (struct lpfc_nodelist *)hosthandle; 1247 + 1248 + ret = __lpfc_nvme_ls_req(phba->pport, ndlp, pnvme_lsreq, 1249 + lpfc_nvmet_ls_req_cmp); 1250 + 1251 + return ret; 1252 + } 1253 + 1254 + /** 1255 + * lpfc_nvmet_ls_abort - Abort a prior NVME LS request 1256 + * @targetport: Transport targetport, that LS was issued from. 1257 + * @hosthandle - hosthandle set by the driver in a prior ls_rqst_rcv. 1258 + * Driver sets this value to the ndlp pointer. 1259 + * @pnvme_lsreq - the transport nvme_ls_req structure for LS to be aborted 1260 + * 1261 + * Driver registers this routine to abort an NVME LS request that is 1262 + * in progress (from the transports perspective). 1263 + **/ 1264 + static void 1265 + lpfc_nvmet_ls_abort(struct nvmet_fc_target_port *targetport, 1266 + void *hosthandle, 1267 + struct nvmefc_ls_req *pnvme_lsreq) 1268 + { 1269 + struct lpfc_nvmet_tgtport *lpfc_nvmet = targetport->private; 1270 + struct lpfc_hba *phba; 1271 + struct lpfc_nodelist *ndlp; 1272 + int ret; 1273 + 1274 + phba = lpfc_nvmet->phba; 1275 + if (phba->pport->load_flag & FC_UNLOADING) 1276 + return; 1277 + 1278 + ndlp = (struct lpfc_nodelist *)hosthandle; 1279 + 1280 + ret = __lpfc_nvme_ls_abort(phba->pport, ndlp, pnvme_lsreq); 1281 + if (!ret) 1282 + atomic_inc(&lpfc_nvmet->xmt_ls_abort); 1283 + } 1284 + 1285 + static void 1286 + lpfc_nvmet_host_release(void *hosthandle) 1287 + { 1288 + struct lpfc_nodelist *ndlp = hosthandle; 1289 + struct lpfc_hba *phba = NULL; 1290 + struct lpfc_nvmet_tgtport *tgtp; 1291 + 1292 + phba = ndlp->phba; 1293 + if (!phba->targetport || !phba->targetport->private) 1294 + return; 1295 + 1296 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME, 1297 + "6202 NVMET XPT releasing hosthandle x%px\n", 1298 + hosthandle); 1299 + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 1300 + atomic_set(&tgtp->state, 0); 1301 + } 1302 + 1285 1303 static void 1286 1304 lpfc_nvmet_discovery_event(struct nvmet_fc_target_port *tgtport) 1287 1305 { ··· 1416 1214 .fcp_req_release = lpfc_nvmet_xmt_fcp_release, 1417 1215 .defer_rcv = lpfc_nvmet_defer_rcv, 1418 1216 .discovery_event = lpfc_nvmet_discovery_event, 1217 + .ls_req = lpfc_nvmet_ls_req, 1218 + .ls_abort = lpfc_nvmet_ls_abort, 1219 + .host_release = lpfc_nvmet_host_release, 1419 1220 1420 1221 .max_hw_queues = 1, 1421 1222 .max_sgl_segments = LPFC_NVMET_DEFAULT_SEGS, ··· 1429 1224 .target_features = 0, 1430 1225 /* sizes of additional private data for data structures */ 1431 1226 .target_priv_sz = sizeof(struct lpfc_nvmet_tgtport), 1227 + .lsrqst_priv_sz = 0, 1432 1228 }; 1433 1229 1434 1230 static void ··· 1574 1368 return -ENOMEM; 1575 1369 } 1576 1370 ctx_buf->context->ctxbuf = ctx_buf; 1577 - ctx_buf->context->state = LPFC_NVMET_STE_FREE; 1371 + ctx_buf->context->state = LPFC_NVME_STE_FREE; 1578 1372 1579 1373 ctx_buf->iocbq = lpfc_sli_get_iocbq(phba); 1580 1374 if (!ctx_buf->iocbq) { ··· 1774 1568 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 1775 1569 uint16_t xri = bf_get(lpfc_wcqe_xa_xri, axri); 1776 1570 uint16_t rxid = bf_get(lpfc_wcqe_xa_remote_xid, axri); 1777 - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; 1571 + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; 1778 1572 struct lpfc_nvmet_tgtport *tgtp; 1779 1573 struct nvmefc_tgt_fcp_req *req = NULL; 1780 1574 struct lpfc_nodelist *ndlp; ··· 1805 1599 /* Check if we already received a free context call 1806 1600 * and we have completed processing an abort situation. 1807 1601 */ 1808 - if (ctxp->flag & LPFC_NVMET_CTX_RLS && 1809 - !(ctxp->flag & LPFC_NVMET_ABORT_OP)) { 1602 + if (ctxp->flag & LPFC_NVME_CTX_RLS && 1603 + !(ctxp->flag & LPFC_NVME_ABORT_OP)) { 1810 1604 list_del_init(&ctxp->list); 1811 1605 released = true; 1812 1606 } 1813 - ctxp->flag &= ~LPFC_NVMET_XBUSY; 1607 + ctxp->flag &= ~LPFC_NVME_XBUSY; 1814 1608 spin_unlock(&ctxp->ctxlock); 1815 1609 spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 1816 1610 ··· 1852 1646 rxid); 1853 1647 1854 1648 spin_lock_irqsave(&ctxp->ctxlock, iflag); 1855 - ctxp->flag |= LPFC_NVMET_ABTS_RCV; 1856 - ctxp->state = LPFC_NVMET_STE_ABORT; 1649 + ctxp->flag |= LPFC_NVME_ABTS_RCV; 1650 + ctxp->state = LPFC_NVME_STE_ABORT; 1857 1651 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 1858 1652 1859 1653 lpfc_nvmeio_data(phba, 1860 1654 "NVMET ABTS RCV: xri x%x CPU %02x rjt %d\n", 1861 1655 xri, raw_smp_processor_id(), 0); 1862 1656 1863 - req = &ctxp->ctx.fcp_req; 1657 + req = &ctxp->hdlrctx.fcp_req; 1864 1658 if (req) 1865 1659 nvmet_fc_rcv_fcp_abort(phba->targetport, req); 1866 1660 } ··· 1873 1667 { 1874 1668 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 1875 1669 struct lpfc_hba *phba = vport->phba; 1876 - struct lpfc_nvmet_rcv_ctx *ctxp, *next_ctxp; 1670 + struct lpfc_async_xchg_ctx *ctxp, *next_ctxp; 1877 1671 struct nvmefc_tgt_fcp_req *rsp; 1878 1672 uint32_t sid; 1879 1673 uint16_t oxid, xri; ··· 1896 1690 spin_unlock_irqrestore(&phba->hbalock, iflag); 1897 1691 1898 1692 spin_lock_irqsave(&ctxp->ctxlock, iflag); 1899 - ctxp->flag |= LPFC_NVMET_ABTS_RCV; 1693 + ctxp->flag |= LPFC_NVME_ABTS_RCV; 1900 1694 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 1901 1695 1902 1696 lpfc_nvmeio_data(phba, ··· 1906 1700 lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, 1907 1701 "6319 NVMET Rcv ABTS:acc xri x%x\n", xri); 1908 1702 1909 - rsp = &ctxp->ctx.fcp_req; 1703 + rsp = &ctxp->hdlrctx.fcp_req; 1910 1704 nvmet_fc_rcv_fcp_abort(phba->targetport, rsp); 1911 1705 1912 1706 /* Respond with BA_ACC accordingly */ ··· 1965 1759 xri = ctxp->ctxbuf->sglq->sli4_xritag; 1966 1760 1967 1761 spin_lock_irqsave(&ctxp->ctxlock, iflag); 1968 - ctxp->flag |= (LPFC_NVMET_ABTS_RCV | LPFC_NVMET_ABORT_OP); 1762 + ctxp->flag |= (LPFC_NVME_ABTS_RCV | LPFC_NVME_ABORT_OP); 1969 1763 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 1970 1764 1971 1765 lpfc_nvmeio_data(phba, ··· 1977 1771 "flag x%x state x%x\n", 1978 1772 ctxp->oxid, xri, ctxp->flag, ctxp->state); 1979 1773 1980 - if (ctxp->flag & LPFC_NVMET_TNOTIFY) { 1774 + if (ctxp->flag & LPFC_NVME_TNOTIFY) { 1981 1775 /* Notify the transport */ 1982 1776 nvmet_fc_rcv_fcp_abort(phba->targetport, 1983 - &ctxp->ctx.fcp_req); 1777 + &ctxp->hdlrctx.fcp_req); 1984 1778 } else { 1985 1779 cancel_work_sync(&ctxp->ctxbuf->defer_work); 1986 1780 spin_lock_irqsave(&ctxp->ctxlock, iflag); ··· 2008 1802 2009 1803 static void 2010 1804 lpfc_nvmet_wqfull_flush(struct lpfc_hba *phba, struct lpfc_queue *wq, 2011 - struct lpfc_nvmet_rcv_ctx *ctxp) 1805 + struct lpfc_async_xchg_ctx *ctxp) 2012 1806 { 2013 1807 struct lpfc_sli_ring *pring; 2014 1808 struct lpfc_iocbq *nvmewqeq; ··· 2059 1853 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 2060 1854 struct lpfc_sli_ring *pring; 2061 1855 struct lpfc_iocbq *nvmewqeq; 2062 - struct lpfc_nvmet_rcv_ctx *ctxp; 1856 + struct lpfc_async_xchg_ctx *ctxp; 2063 1857 unsigned long iflags; 2064 1858 int rc; 2065 1859 ··· 2073 1867 list_remove_head(&wq->wqfull_list, nvmewqeq, struct lpfc_iocbq, 2074 1868 list); 2075 1869 spin_unlock_irqrestore(&pring->ring_lock, iflags); 2076 - ctxp = (struct lpfc_nvmet_rcv_ctx *)nvmewqeq->context2; 1870 + ctxp = (struct lpfc_async_xchg_ctx *)nvmewqeq->context2; 2077 1871 rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, nvmewqeq); 2078 1872 spin_lock_irqsave(&pring->ring_lock, iflags); 2079 1873 if (rc == -EBUSY) { ··· 2085 1879 if (rc == WQE_SUCCESS) { 2086 1880 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 2087 1881 if (ctxp->ts_cmd_nvme) { 2088 - if (ctxp->ctx.fcp_req.op == NVMET_FCOP_RSP) 1882 + if (ctxp->hdlrctx.fcp_req.op == NVMET_FCOP_RSP) 2089 1883 ctxp->ts_status_wqput = ktime_get_ns(); 2090 1884 else 2091 1885 ctxp->ts_data_wqput = ktime_get_ns(); ··· 2132 1926 } 2133 1927 2134 1928 /** 2135 - * lpfc_nvmet_unsol_ls_buffer - Process an unsolicited event data buffer 1929 + * lpfc_nvmet_handle_lsreq - Process an NVME LS request 2136 1930 * @phba: pointer to lpfc hba data structure. 2137 - * @pring: pointer to a SLI ring. 2138 - * @nvmebuf: pointer to lpfc nvme command HBQ data structure. 1931 + * @axchg: pointer to exchange context for the NVME LS request 2139 1932 * 2140 - * This routine is used for processing the WQE associated with a unsolicited 2141 - * event. It first determines whether there is an existing ndlp that matches 2142 - * the DID from the unsolicited WQE. If not, it will create a new one with 2143 - * the DID from the unsolicited WQE. The ELS command from the unsolicited 2144 - * WQE is then used to invoke the proper routine and to set up proper state 2145 - * of the discovery state machine. 2146 - **/ 2147 - static void 2148 - lpfc_nvmet_unsol_ls_buffer(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, 2149 - struct hbq_dmabuf *nvmebuf) 1933 + * This routine is used for processing an asychronously received NVME LS 1934 + * request. Any remaining validation is done and the LS is then forwarded 1935 + * to the nvmet-fc transport via nvmet_fc_rcv_ls_req(). 1936 + * 1937 + * The calling sequence should be: nvmet_fc_rcv_ls_req() -> (processing) 1938 + * -> lpfc_nvmet_xmt_ls_rsp/cmp -> req->done. 1939 + * lpfc_nvme_xmt_ls_rsp_cmp should free the allocated axchg. 1940 + * 1941 + * Returns 0 if LS was handled and delivered to the transport 1942 + * Returns 1 if LS failed to be handled and should be dropped 1943 + */ 1944 + int 1945 + lpfc_nvmet_handle_lsreq(struct lpfc_hba *phba, 1946 + struct lpfc_async_xchg_ctx *axchg) 2150 1947 { 2151 1948 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 2152 - struct lpfc_nvmet_tgtport *tgtp; 2153 - struct fc_frame_header *fc_hdr; 2154 - struct lpfc_nvmet_rcv_ctx *ctxp; 2155 - uint32_t *payload; 2156 - uint32_t size, oxid, sid, rc; 1949 + struct lpfc_nvmet_tgtport *tgtp = phba->targetport->private; 1950 + uint32_t *payload = axchg->payload; 1951 + int rc; 2157 1952 2158 - 2159 - if (!nvmebuf || !phba->targetport) { 2160 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2161 - "6154 LS Drop IO\n"); 2162 - oxid = 0; 2163 - size = 0; 2164 - sid = 0; 2165 - ctxp = NULL; 2166 - goto dropit; 2167 - } 2168 - 2169 - fc_hdr = (struct fc_frame_header *)(nvmebuf->hbuf.virt); 2170 - oxid = be16_to_cpu(fc_hdr->fh_ox_id); 2171 - 2172 - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 2173 - payload = (uint32_t *)(nvmebuf->dbuf.virt); 2174 - size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); 2175 - sid = sli4_sid_from_fc_hdr(fc_hdr); 2176 - 2177 - ctxp = kzalloc(sizeof(struct lpfc_nvmet_rcv_ctx), GFP_ATOMIC); 2178 - if (ctxp == NULL) { 2179 - atomic_inc(&tgtp->rcv_ls_req_drop); 2180 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2181 - "6155 LS Drop IO x%x: Alloc\n", 2182 - oxid); 2183 - dropit: 2184 - lpfc_nvmeio_data(phba, "NVMET LS DROP: " 2185 - "xri x%x sz %d from %06x\n", 2186 - oxid, size, sid); 2187 - lpfc_in_buf_free(phba, &nvmebuf->dbuf); 2188 - return; 2189 - } 2190 - ctxp->phba = phba; 2191 - ctxp->size = size; 2192 - ctxp->oxid = oxid; 2193 - ctxp->sid = sid; 2194 - ctxp->wqeq = NULL; 2195 - ctxp->state = LPFC_NVMET_STE_LS_RCV; 2196 - ctxp->entry_cnt = 1; 2197 - ctxp->rqb_buffer = (void *)nvmebuf; 2198 - ctxp->hdwq = &phba->sli4_hba.hdwq[0]; 2199 - 2200 - lpfc_nvmeio_data(phba, "NVMET LS RCV: xri x%x sz %d from %06x\n", 2201 - oxid, size, sid); 2202 - /* 2203 - * The calling sequence should be: 2204 - * nvmet_fc_rcv_ls_req -> lpfc_nvmet_xmt_ls_rsp/cmp ->_req->done 2205 - * lpfc_nvmet_xmt_ls_rsp_cmp should free the allocated ctxp. 2206 - */ 2207 1953 atomic_inc(&tgtp->rcv_ls_req_in); 2208 - rc = nvmet_fc_rcv_ls_req(phba->targetport, &ctxp->ctx.ls_req, 2209 - payload, size); 1954 + 1955 + /* 1956 + * Driver passes the ndlp as the hosthandle argument allowing 1957 + * the transport to generate LS requests for any associateions 1958 + * that are created. 1959 + */ 1960 + rc = nvmet_fc_rcv_ls_req(phba->targetport, axchg->ndlp, &axchg->ls_rsp, 1961 + axchg->payload, axchg->size); 2210 1962 2211 1963 lpfc_printf_log(phba, KERN_INFO, LOG_NVME_DISC, 2212 1964 "6037 NVMET Unsol rcv: sz %d rc %d: %08x %08x %08x " 2213 - "%08x %08x %08x\n", size, rc, 1965 + "%08x %08x %08x\n", axchg->size, rc, 2214 1966 *payload, *(payload+1), *(payload+2), 2215 1967 *(payload+3), *(payload+4), *(payload+5)); 2216 1968 2217 - if (rc == 0) { 1969 + if (!rc) { 2218 1970 atomic_inc(&tgtp->rcv_ls_req_out); 2219 - return; 1971 + return 0; 2220 1972 } 2221 1973 2222 - lpfc_nvmeio_data(phba, "NVMET LS DROP: xri x%x sz %d from %06x\n", 2223 - oxid, size, sid); 2224 - 2225 1974 atomic_inc(&tgtp->rcv_ls_req_drop); 2226 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2227 - "6156 LS Drop IO x%x: nvmet_fc_rcv_ls_req %d\n", 2228 - ctxp->oxid, rc); 2229 - 2230 - /* We assume a rcv'ed cmd ALWAYs fits into 1 buffer */ 2231 - lpfc_in_buf_free(phba, &nvmebuf->dbuf); 2232 - 2233 - atomic_inc(&tgtp->xmt_ls_abort); 2234 - lpfc_nvmet_unsol_ls_issue_abort(phba, ctxp, sid, oxid); 2235 1975 #endif 1976 + return 1; 2236 1977 } 2237 1978 2238 1979 static void 2239 1980 lpfc_nvmet_process_rcv_fcp_req(struct lpfc_nvmet_ctxbuf *ctx_buf) 2240 1981 { 2241 1982 #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 2242 - struct lpfc_nvmet_rcv_ctx *ctxp = ctx_buf->context; 1983 + struct lpfc_async_xchg_ctx *ctxp = ctx_buf->context; 2243 1984 struct lpfc_hba *phba = ctxp->phba; 2244 1985 struct rqb_dmabuf *nvmebuf = ctxp->rqb_buffer; 2245 1986 struct lpfc_nvmet_tgtport *tgtp; ··· 2207 2054 return; 2208 2055 } 2209 2056 2210 - if (ctxp->flag & LPFC_NVMET_ABTS_RCV) { 2057 + if (ctxp->flag & LPFC_NVME_ABTS_RCV) { 2211 2058 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2212 2059 "6324 IO oxid x%x aborted\n", 2213 2060 ctxp->oxid); ··· 2216 2063 2217 2064 payload = (uint32_t *)(nvmebuf->dbuf.virt); 2218 2065 tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 2219 - ctxp->flag |= LPFC_NVMET_TNOTIFY; 2066 + ctxp->flag |= LPFC_NVME_TNOTIFY; 2220 2067 #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 2221 2068 if (ctxp->ts_isr_cmd) 2222 2069 ctxp->ts_cmd_nvme = ktime_get_ns(); ··· 2230 2077 * A buffer has already been reposted for this IO, so just free 2231 2078 * the nvmebuf. 2232 2079 */ 2233 - rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->ctx.fcp_req, 2080 + rc = nvmet_fc_rcv_fcp_req(phba->targetport, &ctxp->hdlrctx.fcp_req, 2234 2081 payload, ctxp->size); 2235 2082 /* Process FCP command */ 2236 2083 if (rc == 0) { 2237 2084 atomic_inc(&tgtp->rcv_fcp_cmd_out); 2238 2085 spin_lock_irqsave(&ctxp->ctxlock, iflags); 2239 - if ((ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) || 2086 + if ((ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) || 2240 2087 (nvmebuf != ctxp->rqb_buffer)) { 2241 2088 spin_unlock_irqrestore(&ctxp->ctxlock, iflags); 2242 2089 return; ··· 2255 2102 atomic_inc(&tgtp->rcv_fcp_cmd_out); 2256 2103 atomic_inc(&tgtp->defer_fod); 2257 2104 spin_lock_irqsave(&ctxp->ctxlock, iflags); 2258 - if (ctxp->flag & LPFC_NVMET_CTX_REUSE_WQ) { 2105 + if (ctxp->flag & LPFC_NVME_CTX_REUSE_WQ) { 2259 2106 spin_unlock_irqrestore(&ctxp->ctxlock, iflags); 2260 2107 return; 2261 2108 } ··· 2270 2117 phba->sli4_hba.nvmet_mrq_data[qno], 1, qno); 2271 2118 return; 2272 2119 } 2273 - ctxp->flag &= ~LPFC_NVMET_TNOTIFY; 2120 + ctxp->flag &= ~LPFC_NVME_TNOTIFY; 2274 2121 atomic_inc(&tgtp->rcv_fcp_cmd_drop); 2275 2122 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2276 2123 "2582 FCP Drop IO x%x: err x%x: x%x x%x x%x\n", ··· 2377 2224 uint64_t isr_timestamp, 2378 2225 uint8_t cqflag) 2379 2226 { 2380 - struct lpfc_nvmet_rcv_ctx *ctxp; 2227 + struct lpfc_async_xchg_ctx *ctxp; 2381 2228 struct lpfc_nvmet_tgtport *tgtp; 2382 2229 struct fc_frame_header *fc_hdr; 2383 2230 struct lpfc_nvmet_ctxbuf *ctx_buf; ··· 2459 2306 2460 2307 sid = sli4_sid_from_fc_hdr(fc_hdr); 2461 2308 2462 - ctxp = (struct lpfc_nvmet_rcv_ctx *)ctx_buf->context; 2309 + ctxp = (struct lpfc_async_xchg_ctx *)ctx_buf->context; 2463 2310 spin_lock_irqsave(&phba->sli4_hba.t_active_list_lock, iflag); 2464 2311 list_add_tail(&ctxp->list, &phba->sli4_hba.t_active_ctx_list); 2465 2312 spin_unlock_irqrestore(&phba->sli4_hba.t_active_list_lock, iflag); 2466 - if (ctxp->state != LPFC_NVMET_STE_FREE) { 2313 + if (ctxp->state != LPFC_NVME_STE_FREE) { 2467 2314 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2468 2315 "6414 NVMET Context corrupt %d %d oxid x%x\n", 2469 2316 ctxp->state, ctxp->entry_cnt, ctxp->oxid); ··· 2475 2322 ctxp->oxid = oxid; 2476 2323 ctxp->sid = sid; 2477 2324 ctxp->idx = idx; 2478 - ctxp->state = LPFC_NVMET_STE_RCV; 2325 + ctxp->state = LPFC_NVME_STE_RCV; 2479 2326 ctxp->entry_cnt = 1; 2480 2327 ctxp->flag = 0; 2481 2328 ctxp->ctxbuf = ctx_buf; ··· 2519 2366 spin_unlock_irqrestore(&ctxp->ctxlock, iflag); 2520 2367 lpfc_nvmet_unsol_fcp_issue_abort(phba, ctxp, sid, oxid); 2521 2368 } 2522 - } 2523 - 2524 - /** 2525 - * lpfc_nvmet_unsol_ls_event - Process an unsolicited event from an nvme nport 2526 - * @phba: pointer to lpfc hba data structure. 2527 - * @pring: pointer to a SLI ring. 2528 - * @nvmebuf: pointer to received nvme data structure. 2529 - * 2530 - * This routine is used to process an unsolicited event received from a SLI 2531 - * (Service Level Interface) ring. The actual processing of the data buffer 2532 - * associated with the unsolicited event is done by invoking the routine 2533 - * lpfc_nvmet_unsol_ls_buffer() after properly set up the buffer from the 2534 - * SLI RQ on which the unsolicited event was received. 2535 - **/ 2536 - void 2537 - lpfc_nvmet_unsol_ls_event(struct lpfc_hba *phba, struct lpfc_sli_ring *pring, 2538 - struct lpfc_iocbq *piocb) 2539 - { 2540 - struct lpfc_dmabuf *d_buf; 2541 - struct hbq_dmabuf *nvmebuf; 2542 - 2543 - d_buf = piocb->context2; 2544 - nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); 2545 - 2546 - if (!nvmebuf) { 2547 - lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 2548 - "3015 LS Drop IO\n"); 2549 - return; 2550 - } 2551 - if (phba->nvmet_support == 0) { 2552 - lpfc_in_buf_free(phba, &nvmebuf->dbuf); 2553 - return; 2554 - } 2555 - lpfc_nvmet_unsol_ls_buffer(phba, pring, nvmebuf); 2556 2369 } 2557 2370 2558 2371 /** ··· 2581 2462 **/ 2582 2463 static struct lpfc_iocbq * 2583 2464 lpfc_nvmet_prep_ls_wqe(struct lpfc_hba *phba, 2584 - struct lpfc_nvmet_rcv_ctx *ctxp, 2465 + struct lpfc_async_xchg_ctx *ctxp, 2585 2466 dma_addr_t rspbuf, uint16_t rspsize) 2586 2467 { 2587 2468 struct lpfc_nodelist *ndlp; ··· 2703 2584 2704 2585 static struct lpfc_iocbq * 2705 2586 lpfc_nvmet_prep_fcp_wqe(struct lpfc_hba *phba, 2706 - struct lpfc_nvmet_rcv_ctx *ctxp) 2587 + struct lpfc_async_xchg_ctx *ctxp) 2707 2588 { 2708 - struct nvmefc_tgt_fcp_req *rsp = &ctxp->ctx.fcp_req; 2589 + struct nvmefc_tgt_fcp_req *rsp = &ctxp->hdlrctx.fcp_req; 2709 2590 struct lpfc_nvmet_tgtport *tgtp; 2710 2591 struct sli4_sge *sgl; 2711 2592 struct lpfc_nodelist *ndlp; ··· 2766 2647 } 2767 2648 2768 2649 /* Sanity check */ 2769 - if (((ctxp->state == LPFC_NVMET_STE_RCV) && 2650 + if (((ctxp->state == LPFC_NVME_STE_RCV) && 2770 2651 (ctxp->entry_cnt == 1)) || 2771 - (ctxp->state == LPFC_NVMET_STE_DATA)) { 2652 + (ctxp->state == LPFC_NVME_STE_DATA)) { 2772 2653 wqe = &nvmewqe->wqe; 2773 2654 } else { 2774 2655 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, ··· 3031 2912 sgl++; 3032 2913 ctxp->offset += cnt; 3033 2914 } 3034 - ctxp->state = LPFC_NVMET_STE_DATA; 2915 + ctxp->state = LPFC_NVME_STE_DATA; 3035 2916 ctxp->entry_cnt++; 3036 2917 return nvmewqe; 3037 2918 } ··· 3050 2931 lpfc_nvmet_sol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 3051 2932 struct lpfc_wcqe_complete *wcqe) 3052 2933 { 3053 - struct lpfc_nvmet_rcv_ctx *ctxp; 2934 + struct lpfc_async_xchg_ctx *ctxp; 3054 2935 struct lpfc_nvmet_tgtport *tgtp; 3055 2936 uint32_t result; 3056 2937 unsigned long flags; ··· 3060 2941 result = wcqe->parameter; 3061 2942 3062 2943 tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3063 - if (ctxp->flag & LPFC_NVMET_ABORT_OP) 2944 + if (ctxp->flag & LPFC_NVME_ABORT_OP) 3064 2945 atomic_inc(&tgtp->xmt_fcp_abort_cmpl); 3065 2946 3066 2947 spin_lock_irqsave(&ctxp->ctxlock, flags); 3067 - ctxp->state = LPFC_NVMET_STE_DONE; 2948 + ctxp->state = LPFC_NVME_STE_DONE; 3068 2949 3069 2950 /* Check if we already received a free context call 3070 2951 * and we have completed processing an abort situation. 3071 2952 */ 3072 - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && 3073 - !(ctxp->flag & LPFC_NVMET_XBUSY)) { 2953 + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && 2954 + !(ctxp->flag & LPFC_NVME_XBUSY)) { 3074 2955 spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3075 2956 list_del_init(&ctxp->list); 3076 2957 spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3077 2958 released = true; 3078 2959 } 3079 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 2960 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3080 2961 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3081 2962 atomic_inc(&tgtp->xmt_abort_rsp); 3082 2963 ··· 3100 2981 lpfc_sli_release_iocbq(phba, cmdwqe); 3101 2982 3102 2983 /* Since iaab/iaar are NOT set, there is no work left. 3103 - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted 2984 + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted 3104 2985 * should have been called already. 3105 2986 */ 3106 2987 } ··· 3119 3000 lpfc_nvmet_unsol_fcp_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 3120 3001 struct lpfc_wcqe_complete *wcqe) 3121 3002 { 3122 - struct lpfc_nvmet_rcv_ctx *ctxp; 3003 + struct lpfc_async_xchg_ctx *ctxp; 3123 3004 struct lpfc_nvmet_tgtport *tgtp; 3124 3005 unsigned long flags; 3125 3006 uint32_t result; ··· 3139 3020 3140 3021 tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3141 3022 spin_lock_irqsave(&ctxp->ctxlock, flags); 3142 - if (ctxp->flag & LPFC_NVMET_ABORT_OP) 3023 + if (ctxp->flag & LPFC_NVME_ABORT_OP) 3143 3024 atomic_inc(&tgtp->xmt_fcp_abort_cmpl); 3144 3025 3145 3026 /* Sanity check */ 3146 - if (ctxp->state != LPFC_NVMET_STE_ABORT) { 3027 + if (ctxp->state != LPFC_NVME_STE_ABORT) { 3147 3028 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, 3148 3029 "6112 ABTS Wrong state:%d oxid x%x\n", 3149 3030 ctxp->state, ctxp->oxid); ··· 3152 3033 /* Check if we already received a free context call 3153 3034 * and we have completed processing an abort situation. 3154 3035 */ 3155 - ctxp->state = LPFC_NVMET_STE_DONE; 3156 - if ((ctxp->flag & LPFC_NVMET_CTX_RLS) && 3157 - !(ctxp->flag & LPFC_NVMET_XBUSY)) { 3036 + ctxp->state = LPFC_NVME_STE_DONE; 3037 + if ((ctxp->flag & LPFC_NVME_CTX_RLS) && 3038 + !(ctxp->flag & LPFC_NVME_XBUSY)) { 3158 3039 spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3159 3040 list_del_init(&ctxp->list); 3160 3041 spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3161 3042 released = true; 3162 3043 } 3163 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3044 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3164 3045 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3165 3046 atomic_inc(&tgtp->xmt_abort_rsp); 3166 3047 ··· 3181 3062 lpfc_nvmet_ctxbuf_post(phba, ctxp->ctxbuf); 3182 3063 3183 3064 /* Since iaab/iaar are NOT set, there is no work left. 3184 - * For LPFC_NVMET_XBUSY, lpfc_sli4_nvmet_xri_aborted 3065 + * For LPFC_NVME_XBUSY, lpfc_sli4_nvmet_xri_aborted 3185 3066 * should have been called already. 3186 3067 */ 3187 3068 } ··· 3200 3081 lpfc_nvmet_xmt_ls_abort_cmp(struct lpfc_hba *phba, struct lpfc_iocbq *cmdwqe, 3201 3082 struct lpfc_wcqe_complete *wcqe) 3202 3083 { 3203 - struct lpfc_nvmet_rcv_ctx *ctxp; 3084 + struct lpfc_async_xchg_ctx *ctxp; 3204 3085 struct lpfc_nvmet_tgtport *tgtp; 3205 3086 uint32_t result; 3206 3087 3207 3088 ctxp = cmdwqe->context2; 3208 3089 result = wcqe->parameter; 3209 3090 3210 - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3211 - atomic_inc(&tgtp->xmt_ls_abort_cmpl); 3091 + if (phba->nvmet_support) { 3092 + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3093 + atomic_inc(&tgtp->xmt_ls_abort_cmpl); 3094 + } 3212 3095 3213 3096 lpfc_printf_log(phba, KERN_INFO, LOG_NVME_ABTS, 3214 3097 "6083 Abort cmpl: ctx x%px WCQE:%08x %08x %08x %08x\n", ··· 3228 3107 return; 3229 3108 } 3230 3109 3231 - if (ctxp->state != LPFC_NVMET_STE_LS_ABORT) { 3110 + if (ctxp->state != LPFC_NVME_STE_LS_ABORT) { 3232 3111 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 3233 3112 "6416 NVMET LS abort cmpl state mismatch: " 3234 3113 "oxid x%x: %d %d\n", ··· 3243 3122 3244 3123 static int 3245 3124 lpfc_nvmet_unsol_issue_abort(struct lpfc_hba *phba, 3246 - struct lpfc_nvmet_rcv_ctx *ctxp, 3125 + struct lpfc_async_xchg_ctx *ctxp, 3247 3126 uint32_t sid, uint16_t xri) 3248 3127 { 3249 - struct lpfc_nvmet_tgtport *tgtp; 3128 + struct lpfc_nvmet_tgtport *tgtp = NULL; 3250 3129 struct lpfc_iocbq *abts_wqeq; 3251 3130 union lpfc_wqe128 *wqe_abts; 3252 3131 struct lpfc_nodelist *ndlp; ··· 3255 3134 "6067 ABTS: sid %x xri x%x/x%x\n", 3256 3135 sid, xri, ctxp->wqeq->sli4_xritag); 3257 3136 3258 - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3137 + if (phba->nvmet_support && phba->targetport) 3138 + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3259 3139 3260 3140 ndlp = lpfc_findnode_did(phba->pport, sid); 3261 3141 if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || 3262 3142 ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && 3263 3143 (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { 3264 - atomic_inc(&tgtp->xmt_abort_rsp_error); 3144 + if (tgtp) 3145 + atomic_inc(&tgtp->xmt_abort_rsp_error); 3265 3146 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, 3266 3147 "6134 Drop ABTS - wrong NDLP state x%x.\n", 3267 3148 (ndlp) ? ndlp->nlp_state : NLP_STE_MAX_STATE); ··· 3340 3217 3341 3218 static int 3342 3219 lpfc_nvmet_sol_fcp_issue_abort(struct lpfc_hba *phba, 3343 - struct lpfc_nvmet_rcv_ctx *ctxp, 3220 + struct lpfc_async_xchg_ctx *ctxp, 3344 3221 uint32_t sid, uint16_t xri) 3345 3222 { 3346 3223 struct lpfc_nvmet_tgtport *tgtp; ··· 3367 3244 3368 3245 /* No failure to an ABTS request. */ 3369 3246 spin_lock_irqsave(&ctxp->ctxlock, flags); 3370 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3247 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3371 3248 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3372 3249 return 0; 3373 3250 } ··· 3381 3258 "6161 ABORT failed: No wqeqs: " 3382 3259 "xri: x%x\n", ctxp->oxid); 3383 3260 /* No failure to an ABTS request. */ 3384 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3261 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3385 3262 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3386 3263 return 0; 3387 3264 } 3388 3265 abts_wqeq = ctxp->abort_wqeq; 3389 - ctxp->state = LPFC_NVMET_STE_ABORT; 3390 - opt = (ctxp->flag & LPFC_NVMET_ABTS_RCV) ? INHIBIT_ABORT : 0; 3266 + ctxp->state = LPFC_NVME_STE_ABORT; 3267 + opt = (ctxp->flag & LPFC_NVME_ABTS_RCV) ? INHIBIT_ABORT : 0; 3391 3268 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3392 3269 3393 3270 /* Announce entry to new IO submit field. */ ··· 3410 3287 phba->hba_flag, ctxp->oxid); 3411 3288 lpfc_sli_release_iocbq(phba, abts_wqeq); 3412 3289 spin_lock_irqsave(&ctxp->ctxlock, flags); 3413 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3290 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3414 3291 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3415 3292 return 0; 3416 3293 } ··· 3425 3302 ctxp->oxid); 3426 3303 lpfc_sli_release_iocbq(phba, abts_wqeq); 3427 3304 spin_lock_irqsave(&ctxp->ctxlock, flags); 3428 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3305 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3429 3306 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3430 3307 return 0; 3431 3308 } ··· 3454 3331 3455 3332 atomic_inc(&tgtp->xmt_abort_rsp_error); 3456 3333 spin_lock_irqsave(&ctxp->ctxlock, flags); 3457 - ctxp->flag &= ~LPFC_NVMET_ABORT_OP; 3334 + ctxp->flag &= ~LPFC_NVME_ABORT_OP; 3458 3335 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3459 3336 lpfc_sli_release_iocbq(phba, abts_wqeq); 3460 3337 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, ··· 3466 3343 3467 3344 static int 3468 3345 lpfc_nvmet_unsol_fcp_issue_abort(struct lpfc_hba *phba, 3469 - struct lpfc_nvmet_rcv_ctx *ctxp, 3346 + struct lpfc_async_xchg_ctx *ctxp, 3470 3347 uint32_t sid, uint16_t xri) 3471 3348 { 3472 3349 struct lpfc_nvmet_tgtport *tgtp; ··· 3481 3358 ctxp->wqeq->hba_wqidx = 0; 3482 3359 } 3483 3360 3484 - if (ctxp->state == LPFC_NVMET_STE_FREE) { 3361 + if (ctxp->state == LPFC_NVME_STE_FREE) { 3485 3362 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 3486 3363 "6417 NVMET ABORT ctx freed %d %d oxid x%x\n", 3487 3364 ctxp->state, ctxp->entry_cnt, ctxp->oxid); 3488 3365 rc = WQE_BUSY; 3489 3366 goto aerr; 3490 3367 } 3491 - ctxp->state = LPFC_NVMET_STE_ABORT; 3368 + ctxp->state = LPFC_NVME_STE_ABORT; 3492 3369 ctxp->entry_cnt++; 3493 3370 rc = lpfc_nvmet_unsol_issue_abort(phba, ctxp, sid, xri); 3494 3371 if (rc == 0) ··· 3510 3387 3511 3388 aerr: 3512 3389 spin_lock_irqsave(&ctxp->ctxlock, flags); 3513 - if (ctxp->flag & LPFC_NVMET_CTX_RLS) { 3390 + if (ctxp->flag & LPFC_NVME_CTX_RLS) { 3514 3391 spin_lock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3515 3392 list_del_init(&ctxp->list); 3516 3393 spin_unlock(&phba->sli4_hba.abts_nvmet_buf_list_lock); 3517 3394 released = true; 3518 3395 } 3519 - ctxp->flag &= ~(LPFC_NVMET_ABORT_OP | LPFC_NVMET_CTX_RLS); 3396 + ctxp->flag &= ~(LPFC_NVME_ABORT_OP | LPFC_NVME_CTX_RLS); 3520 3397 spin_unlock_irqrestore(&ctxp->ctxlock, flags); 3521 3398 3522 3399 atomic_inc(&tgtp->xmt_abort_rsp_error); ··· 3529 3406 return 1; 3530 3407 } 3531 3408 3532 - static int 3533 - lpfc_nvmet_unsol_ls_issue_abort(struct lpfc_hba *phba, 3534 - struct lpfc_nvmet_rcv_ctx *ctxp, 3409 + /** 3410 + * lpfc_nvme_unsol_ls_issue_abort - issue ABTS on an exchange received 3411 + * via async frame receive where the frame is not handled. 3412 + * @phba: pointer to adapter structure 3413 + * @ctxp: pointer to the asynchronously received received sequence 3414 + * @sid: address of the remote port to send the ABTS to 3415 + * @xri: oxid value to for the ABTS (other side's exchange id). 3416 + **/ 3417 + int 3418 + lpfc_nvme_unsol_ls_issue_abort(struct lpfc_hba *phba, 3419 + struct lpfc_async_xchg_ctx *ctxp, 3535 3420 uint32_t sid, uint16_t xri) 3536 3421 { 3537 - struct lpfc_nvmet_tgtport *tgtp; 3422 + struct lpfc_nvmet_tgtport *tgtp = NULL; 3538 3423 struct lpfc_iocbq *abts_wqeq; 3539 3424 unsigned long flags; 3540 3425 int rc; 3541 3426 3542 - if ((ctxp->state == LPFC_NVMET_STE_LS_RCV && ctxp->entry_cnt == 1) || 3543 - (ctxp->state == LPFC_NVMET_STE_LS_RSP && ctxp->entry_cnt == 2)) { 3544 - ctxp->state = LPFC_NVMET_STE_LS_ABORT; 3427 + if ((ctxp->state == LPFC_NVME_STE_LS_RCV && ctxp->entry_cnt == 1) || 3428 + (ctxp->state == LPFC_NVME_STE_LS_RSP && ctxp->entry_cnt == 2)) { 3429 + ctxp->state = LPFC_NVME_STE_LS_ABORT; 3545 3430 ctxp->entry_cnt++; 3546 3431 } else { 3547 3432 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_IOERR, 3548 3433 "6418 NVMET LS abort state mismatch " 3549 3434 "IO x%x: %d %d\n", 3550 3435 ctxp->oxid, ctxp->state, ctxp->entry_cnt); 3551 - ctxp->state = LPFC_NVMET_STE_LS_ABORT; 3436 + ctxp->state = LPFC_NVME_STE_LS_ABORT; 3552 3437 } 3553 3438 3554 - tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3439 + if (phba->nvmet_support && phba->targetport) 3440 + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3441 + 3555 3442 if (!ctxp->wqeq) { 3556 3443 /* Issue ABTS for this WQE based on iotag */ 3557 3444 ctxp->wqeq = lpfc_sli_get_iocbq(phba); ··· 3588 3455 rc = lpfc_sli4_issue_wqe(phba, ctxp->hdwq, abts_wqeq); 3589 3456 spin_unlock_irqrestore(&phba->hbalock, flags); 3590 3457 if (rc == WQE_SUCCESS) { 3591 - atomic_inc(&tgtp->xmt_abort_unsol); 3458 + if (tgtp) 3459 + atomic_inc(&tgtp->xmt_abort_unsol); 3592 3460 return 0; 3593 3461 } 3594 3462 out: 3595 - atomic_inc(&tgtp->xmt_abort_rsp_error); 3463 + if (tgtp) 3464 + atomic_inc(&tgtp->xmt_abort_rsp_error); 3596 3465 abts_wqeq->context2 = NULL; 3597 3466 abts_wqeq->context3 = NULL; 3598 3467 lpfc_sli_release_iocbq(phba, abts_wqeq); 3599 - kfree(ctxp); 3600 3468 lpfc_printf_log(phba, KERN_ERR, LOG_NVME_ABTS, 3601 3469 "6056 Failed to Issue ABTS. Status x%x\n", rc); 3602 - return 0; 3470 + return 1; 3471 + } 3472 + 3473 + /** 3474 + * lpfc_nvmet_invalidate_host 3475 + * 3476 + * @phba - pointer to the driver instance bound to an adapter port. 3477 + * @ndlp - pointer to an lpfc_nodelist type 3478 + * 3479 + * This routine upcalls the nvmet transport to invalidate an NVME 3480 + * host to which this target instance had active connections. 3481 + */ 3482 + void 3483 + lpfc_nvmet_invalidate_host(struct lpfc_hba *phba, struct lpfc_nodelist *ndlp) 3484 + { 3485 + struct lpfc_nvmet_tgtport *tgtp; 3486 + 3487 + lpfc_printf_log(phba, KERN_INFO, LOG_NVME | LOG_NVME_ABTS, 3488 + "6203 Invalidating hosthandle x%px\n", 3489 + ndlp); 3490 + 3491 + tgtp = (struct lpfc_nvmet_tgtport *)phba->targetport->private; 3492 + atomic_set(&tgtp->state, LPFC_NVMET_INV_HOST_ACTIVE); 3493 + 3494 + #if (IS_ENABLED(CONFIG_NVME_TARGET_FC)) 3495 + /* Need to get the nvmet_fc_target_port pointer here.*/ 3496 + nvmet_fc_invalidate_host(phba->targetport, ndlp); 3497 + #endif 3603 3498 }

-158

drivers/scsi/lpfc/lpfc_nvmet.h

··· 1 - /******************************************************************* 2 - * This file is part of the Emulex Linux Device Driver for * 3 - * Fibre Channel Host Bus Adapters. * 4 - * Copyright (C) 2017-2019 Broadcom. All Rights Reserved. The term * 5 - * “Broadcom” refers to Broadcom Inc. and/or its subsidiaries. * 6 - * Copyright (C) 2004-2016 Emulex. All rights reserved. * 7 - * EMULEX and SLI are trademarks of Emulex. * 8 - * www.broadcom.com * 9 - * Portions Copyright (C) 2004-2005 Christoph Hellwig * 10 - * * 11 - * This program is free software; you can redistribute it and/or * 12 - * modify it under the terms of version 2 of the GNU General * 13 - * Public License as published by the Free Software Foundation. * 14 - * This program is distributed in the hope that it will be useful. * 15 - * ALL EXPRESS OR IMPLIED CONDITIONS, REPRESENTATIONS AND * 16 - * WARRANTIES, INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, * 17 - * FITNESS FOR A PARTICULAR PURPOSE, OR NON-INFRINGEMENT, ARE * 18 - * DISCLAIMED, EXCEPT TO THE EXTENT THAT SUCH DISCLAIMERS ARE HELD * 19 - * TO BE LEGALLY INVALID. See the GNU General Public License for * 20 - * more details, a copy of which can be found in the file COPYING * 21 - * included with this package. * 22 - ********************************************************************/ 23 - 24 - #define LPFC_NVMET_DEFAULT_SEGS (64 + 1) /* 256K IOs */ 25 - #define LPFC_NVMET_RQE_MIN_POST 128 26 - #define LPFC_NVMET_RQE_DEF_POST 512 27 - #define LPFC_NVMET_RQE_DEF_COUNT 2048 28 - #define LPFC_NVMET_SUCCESS_LEN 12 29 - 30 - #define LPFC_NVMET_MRQ_AUTO 0 31 - #define LPFC_NVMET_MRQ_MAX 16 32 - 33 - #define LPFC_NVMET_WAIT_TMO (5 * MSEC_PER_SEC) 34 - 35 - /* Used for NVME Target */ 36 - struct lpfc_nvmet_tgtport { 37 - struct lpfc_hba *phba; 38 - struct completion *tport_unreg_cmp; 39 - 40 - /* Stats counters - lpfc_nvmet_unsol_ls_buffer */ 41 - atomic_t rcv_ls_req_in; 42 - atomic_t rcv_ls_req_out; 43 - atomic_t rcv_ls_req_drop; 44 - atomic_t xmt_ls_abort; 45 - atomic_t xmt_ls_abort_cmpl; 46 - 47 - /* Stats counters - lpfc_nvmet_xmt_ls_rsp */ 48 - atomic_t xmt_ls_rsp; 49 - atomic_t xmt_ls_drop; 50 - 51 - /* Stats counters - lpfc_nvmet_xmt_ls_rsp_cmp */ 52 - atomic_t xmt_ls_rsp_error; 53 - atomic_t xmt_ls_rsp_aborted; 54 - atomic_t xmt_ls_rsp_xb_set; 55 - atomic_t xmt_ls_rsp_cmpl; 56 - 57 - /* Stats counters - lpfc_nvmet_unsol_fcp_buffer */ 58 - atomic_t rcv_fcp_cmd_in; 59 - atomic_t rcv_fcp_cmd_out; 60 - atomic_t rcv_fcp_cmd_drop; 61 - atomic_t rcv_fcp_cmd_defer; 62 - atomic_t xmt_fcp_release; 63 - 64 - /* Stats counters - lpfc_nvmet_xmt_fcp_op */ 65 - atomic_t xmt_fcp_drop; 66 - atomic_t xmt_fcp_read_rsp; 67 - atomic_t xmt_fcp_read; 68 - atomic_t xmt_fcp_write; 69 - atomic_t xmt_fcp_rsp; 70 - 71 - /* Stats counters - lpfc_nvmet_xmt_fcp_op_cmp */ 72 - atomic_t xmt_fcp_rsp_xb_set; 73 - atomic_t xmt_fcp_rsp_cmpl; 74 - atomic_t xmt_fcp_rsp_error; 75 - atomic_t xmt_fcp_rsp_aborted; 76 - atomic_t xmt_fcp_rsp_drop; 77 - 78 - /* Stats counters - lpfc_nvmet_xmt_fcp_abort */ 79 - atomic_t xmt_fcp_xri_abort_cqe; 80 - atomic_t xmt_fcp_abort; 81 - atomic_t xmt_fcp_abort_cmpl; 82 - atomic_t xmt_abort_sol; 83 - atomic_t xmt_abort_unsol; 84 - atomic_t xmt_abort_rsp; 85 - atomic_t xmt_abort_rsp_error; 86 - 87 - /* Stats counters - defer IO */ 88 - atomic_t defer_ctx; 89 - atomic_t defer_fod; 90 - atomic_t defer_wqfull; 91 - }; 92 - 93 - struct lpfc_nvmet_ctx_info { 94 - struct list_head nvmet_ctx_list; 95 - spinlock_t nvmet_ctx_list_lock; /* lock per CPU */ 96 - struct lpfc_nvmet_ctx_info *nvmet_ctx_next_cpu; 97 - struct lpfc_nvmet_ctx_info *nvmet_ctx_start_cpu; 98 - uint16_t nvmet_ctx_list_cnt; 99 - char pad[16]; /* pad to a cache-line */ 100 - }; 101 - 102 - /* This retrieves the context info associated with the specified cpu / mrq */ 103 - #define lpfc_get_ctx_list(phba, cpu, mrq) \ 104 - (phba->sli4_hba.nvmet_ctx_info + ((cpu * phba->cfg_nvmet_mrq) + mrq)) 105 - 106 - struct lpfc_nvmet_rcv_ctx { 107 - union { 108 - struct nvmefc_tgt_ls_req ls_req; 109 - struct nvmefc_tgt_fcp_req fcp_req; 110 - } ctx; 111 - struct list_head list; 112 - struct lpfc_hba *phba; 113 - struct lpfc_iocbq *wqeq; 114 - struct lpfc_iocbq *abort_wqeq; 115 - spinlock_t ctxlock; /* protect flag access */ 116 - uint32_t sid; 117 - uint32_t offset; 118 - uint16_t oxid; 119 - uint16_t size; 120 - uint16_t entry_cnt; 121 - uint16_t cpu; 122 - uint16_t idx; 123 - uint16_t state; 124 - /* States */ 125 - #define LPFC_NVMET_STE_LS_RCV 1 126 - #define LPFC_NVMET_STE_LS_ABORT 2 127 - #define LPFC_NVMET_STE_LS_RSP 3 128 - #define LPFC_NVMET_STE_RCV 4 129 - #define LPFC_NVMET_STE_DATA 5 130 - #define LPFC_NVMET_STE_ABORT 6 131 - #define LPFC_NVMET_STE_DONE 7 132 - #define LPFC_NVMET_STE_FREE 0xff 133 - uint16_t flag; 134 - #define LPFC_NVMET_IO_INP 0x1 /* IO is in progress on exchange */ 135 - #define LPFC_NVMET_ABORT_OP 0x2 /* Abort WQE issued on exchange */ 136 - #define LPFC_NVMET_XBUSY 0x4 /* XB bit set on IO cmpl */ 137 - #define LPFC_NVMET_CTX_RLS 0x8 /* ctx free requested */ 138 - #define LPFC_NVMET_ABTS_RCV 0x10 /* ABTS received on exchange */ 139 - #define LPFC_NVMET_CTX_REUSE_WQ 0x20 /* ctx reused via WQ */ 140 - #define LPFC_NVMET_DEFER_WQFULL 0x40 /* Waiting on a free WQE */ 141 - #define LPFC_NVMET_TNOTIFY 0x80 /* notify transport of abts */ 142 - struct rqb_dmabuf *rqb_buffer; 143 - struct lpfc_nvmet_ctxbuf *ctxbuf; 144 - struct lpfc_sli4_hdw_queue *hdwq; 145 - 146 - #ifdef CONFIG_SCSI_LPFC_DEBUG_FS 147 - uint64_t ts_isr_cmd; 148 - uint64_t ts_cmd_nvme; 149 - uint64_t ts_nvme_data; 150 - uint64_t ts_data_wqput; 151 - uint64_t ts_isr_data; 152 - uint64_t ts_data_nvme; 153 - uint64_t ts_nvme_status; 154 - uint64_t ts_status_wqput; 155 - uint64_t ts_isr_status; 156 - uint64_t ts_status_nvme; 157 - #endif 158 - };

+121 -7

drivers/scsi/lpfc/lpfc_sli.c

··· 39 39 #include <asm/set_memory.h> 40 40 #endif 41 41 42 - #include <linux/nvme-fc-driver.h> 43 - 44 42 #include "lpfc_hw4.h" 45 43 #include "lpfc_hw.h" 46 44 #include "lpfc_sli.h" ··· 48 50 #include "lpfc.h" 49 51 #include "lpfc_scsi.h" 50 52 #include "lpfc_nvme.h" 51 - #include "lpfc_nvmet.h" 52 53 #include "lpfc_crtn.h" 53 54 #include "lpfc_logmsg.h" 54 55 #include "lpfc_compat.h" ··· 2793 2796 } 2794 2797 2795 2798 /** 2799 + * lpfc_nvme_unsol_ls_handler - Process an unsolicited event data buffer 2800 + * containing a NVME LS request. 2801 + * @phba: pointer to lpfc hba data structure. 2802 + * @piocb: pointer to the iocbq struct representing the sequence starting 2803 + * frame. 2804 + * 2805 + * This routine initially validates the NVME LS, validates there is a login 2806 + * with the port that sent the LS, and then calls the appropriate nvme host 2807 + * or target LS request handler. 2808 + **/ 2809 + static void 2810 + lpfc_nvme_unsol_ls_handler(struct lpfc_hba *phba, struct lpfc_iocbq *piocb) 2811 + { 2812 + struct lpfc_nodelist *ndlp; 2813 + struct lpfc_dmabuf *d_buf; 2814 + struct hbq_dmabuf *nvmebuf; 2815 + struct fc_frame_header *fc_hdr; 2816 + struct lpfc_async_xchg_ctx *axchg = NULL; 2817 + char *failwhy = NULL; 2818 + uint32_t oxid, sid, did, fctl, size; 2819 + int ret = 1; 2820 + 2821 + d_buf = piocb->context2; 2822 + 2823 + nvmebuf = container_of(d_buf, struct hbq_dmabuf, dbuf); 2824 + fc_hdr = nvmebuf->hbuf.virt; 2825 + oxid = be16_to_cpu(fc_hdr->fh_ox_id); 2826 + sid = sli4_sid_from_fc_hdr(fc_hdr); 2827 + did = sli4_did_from_fc_hdr(fc_hdr); 2828 + fctl = (fc_hdr->fh_f_ctl[0] << 16 | 2829 + fc_hdr->fh_f_ctl[1] << 8 | 2830 + fc_hdr->fh_f_ctl[2]); 2831 + size = bf_get(lpfc_rcqe_length, &nvmebuf->cq_event.cqe.rcqe_cmpl); 2832 + 2833 + lpfc_nvmeio_data(phba, "NVME LS RCV: xri x%x sz %d from %06x\n", 2834 + oxid, size, sid); 2835 + 2836 + if (phba->pport->load_flag & FC_UNLOADING) { 2837 + failwhy = "Driver Unloading"; 2838 + } else if (!(phba->cfg_enable_fc4_type & LPFC_ENABLE_NVME)) { 2839 + failwhy = "NVME FC4 Disabled"; 2840 + } else if (!phba->nvmet_support && !phba->pport->localport) { 2841 + failwhy = "No Localport"; 2842 + } else if (phba->nvmet_support && !phba->targetport) { 2843 + failwhy = "No Targetport"; 2844 + } else if (unlikely(fc_hdr->fh_r_ctl != FC_RCTL_ELS4_REQ)) { 2845 + failwhy = "Bad NVME LS R_CTL"; 2846 + } else if (unlikely((fctl & 0x00FF0000) != 2847 + (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT))) { 2848 + failwhy = "Bad NVME LS F_CTL"; 2849 + } else { 2850 + axchg = kzalloc(sizeof(*axchg), GFP_ATOMIC); 2851 + if (!axchg) 2852 + failwhy = "No CTX memory"; 2853 + } 2854 + 2855 + if (unlikely(failwhy)) { 2856 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, 2857 + "6154 Drop NVME LS: SID %06X OXID x%X: %s\n", 2858 + sid, oxid, failwhy); 2859 + goto out_fail; 2860 + } 2861 + 2862 + /* validate the source of the LS is logged in */ 2863 + ndlp = lpfc_findnode_did(phba->pport, sid); 2864 + if (!ndlp || !NLP_CHK_NODE_ACT(ndlp) || 2865 + ((ndlp->nlp_state != NLP_STE_UNMAPPED_NODE) && 2866 + (ndlp->nlp_state != NLP_STE_MAPPED_NODE))) { 2867 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC, 2868 + "6216 NVME Unsol rcv: No ndlp: " 2869 + "NPort_ID x%x oxid x%x\n", 2870 + sid, oxid); 2871 + goto out_fail; 2872 + } 2873 + 2874 + axchg->phba = phba; 2875 + axchg->ndlp = ndlp; 2876 + axchg->size = size; 2877 + axchg->oxid = oxid; 2878 + axchg->sid = sid; 2879 + axchg->wqeq = NULL; 2880 + axchg->state = LPFC_NVME_STE_LS_RCV; 2881 + axchg->entry_cnt = 1; 2882 + axchg->rqb_buffer = (void *)nvmebuf; 2883 + axchg->hdwq = &phba->sli4_hba.hdwq[0]; 2884 + axchg->payload = nvmebuf->dbuf.virt; 2885 + INIT_LIST_HEAD(&axchg->list); 2886 + 2887 + if (phba->nvmet_support) 2888 + ret = lpfc_nvmet_handle_lsreq(phba, axchg); 2889 + else 2890 + ret = lpfc_nvme_handle_lsreq(phba, axchg); 2891 + 2892 + /* if zero, LS was successfully handled. If non-zero, LS not handled */ 2893 + if (!ret) 2894 + return; 2895 + 2896 + lpfc_printf_log(phba, KERN_ERR, LOG_NVME_DISC | LOG_NVME_IOERR, 2897 + "6155 Drop NVME LS from DID %06X: SID %06X OXID x%X " 2898 + "NVMe%s handler failed %d\n", 2899 + did, sid, oxid, 2900 + (phba->nvmet_support) ? "T" : "I", ret); 2901 + 2902 + out_fail: 2903 + 2904 + /* recycle receive buffer */ 2905 + lpfc_in_buf_free(phba, &nvmebuf->dbuf); 2906 + 2907 + /* If start of new exchange, abort it */ 2908 + if (axchg && (fctl & FC_FC_FIRST_SEQ && !(fctl & FC_FC_EX_CTX))) 2909 + ret = lpfc_nvme_unsol_ls_issue_abort(phba, axchg, sid, oxid); 2910 + 2911 + if (ret) 2912 + kfree(axchg); 2913 + } 2914 + 2915 + /** 2796 2916 * lpfc_complete_unsol_iocb - Complete an unsolicited sequence 2797 2917 * @phba: Pointer to HBA context object. 2798 2918 * @pring: Pointer to driver SLI ring object. ··· 2930 2816 2931 2817 switch (fch_type) { 2932 2818 case FC_TYPE_NVME: 2933 - lpfc_nvmet_unsol_ls_event(phba, pring, saveq); 2819 + lpfc_nvme_unsol_ls_handler(phba, saveq); 2934 2820 return 1; 2935 2821 default: 2936 2822 break; ··· 14095 13981 14096 13982 /* Just some basic sanity checks on FCP Command frame */ 14097 13983 fctl = (fc_hdr->fh_f_ctl[0] << 16 | 14098 - fc_hdr->fh_f_ctl[1] << 8 | 14099 - fc_hdr->fh_f_ctl[2]); 13984 + fc_hdr->fh_f_ctl[1] << 8 | 13985 + fc_hdr->fh_f_ctl[2]); 14100 13986 if (((fctl & 14101 13987 (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) != 14102 13988 (FC_FC_FIRST_SEQ | FC_FC_END_SEQ | FC_FC_SEQ_INIT)) || ··· 20005 19891 struct lpfc_iocbq *pwqe) 20006 19892 { 20007 19893 union lpfc_wqe128 *wqe = &pwqe->wqe; 20008 - struct lpfc_nvmet_rcv_ctx *ctxp; 19894 + struct lpfc_async_xchg_ctx *ctxp; 20009 19895 struct lpfc_queue *wq; 20010 19896 struct lpfc_sglq *sglq; 20011 19897 struct lpfc_sli_ring *pring;

-12

fs/block_dev.c

··· 2164 2164 .fallocate = blkdev_fallocate, 2165 2165 }; 2166 2166 2167 - int ioctl_by_bdev(struct block_device *bdev, unsigned cmd, unsigned long arg) 2168 - { 2169 - int res; 2170 - mm_segment_t old_fs = get_fs(); 2171 - set_fs(KERNEL_DS); 2172 - res = blkdev_ioctl(bdev, 0, cmd, arg); 2173 - set_fs(old_fs); 2174 - return res; 2175 - } 2176 - 2177 - EXPORT_SYMBOL(ioctl_by_bdev); 2178 - 2179 2167 /** 2180 2168 * lookup_bdev - lookup a struct block_device by name 2181 2169 * @pathname: special file representing the block device

+9

include/linux/dasd_mod.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #ifndef DASD_MOD_H 3 + #define DASD_MOD_H 4 + 5 + #include <asm/dasd.h> 6 + 7 + extern int dasd_biodasdinfo(struct gendisk *disk, dasd_information2_t *info); 8 + 9 + #endif

-1

include/linux/fs.h

··· 2646 2646 extern const struct file_operations def_blk_fops; 2647 2647 extern const struct file_operations def_chr_fops; 2648 2648 #ifdef CONFIG_BLOCK 2649 - extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); 2650 2649 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); 2651 2650 extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); 2652 2651 extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder);

-4

include/linux/genhd.h

··· 159 159 struct disk_events; 160 160 struct badblocks; 161 161 162 - #if defined(CONFIG_BLK_DEV_INTEGRITY) 163 - 164 162 struct blk_integrity { 165 163 const struct blk_integrity_profile *profile; 166 164 unsigned char flags; ··· 166 168 unsigned char interval_exp; 167 169 unsigned char tag_size; 168 170 }; 169 - 170 - #endif /* CONFIG_BLK_DEV_INTEGRITY */ 171 171 172 172 struct gendisk { 173 173 /* major, first_minor and minors are input parameters only,

+277 -105

include/linux/nvme-fc-driver.h

··· 10 10 11 11 12 12 /* 13 + * ********************** FC-NVME LS API ******************** 14 + * 15 + * Data structures used by both FC-NVME hosts and FC-NVME 16 + * targets to perform FC-NVME LS requests or transmit 17 + * responses. 18 + * 19 + * *********************************************************** 20 + */ 21 + 22 + /** 23 + * struct nvmefc_ls_req - Request structure passed from the transport 24 + * to the LLDD to perform a NVME-FC LS request and obtain 25 + * a response. 26 + * Used by nvme-fc transport (host) to send LS's such as 27 + * Create Association, Create Connection and Disconnect 28 + * Association. 29 + * Used by the nvmet-fc transport (controller) to send 30 + * LS's such as Disconnect Association. 31 + * 32 + * Values set by the requestor prior to calling the LLDD ls_req entrypoint: 33 + * @rqstaddr: pointer to request buffer 34 + * @rqstdma: PCI DMA address of request buffer 35 + * @rqstlen: Length, in bytes, of request buffer 36 + * @rspaddr: pointer to response buffer 37 + * @rspdma: PCI DMA address of response buffer 38 + * @rsplen: Length, in bytes, of response buffer 39 + * @timeout: Maximum amount of time, in seconds, to wait for the LS response. 40 + * If timeout exceeded, LLDD to abort LS exchange and complete 41 + * LS request with error status. 42 + * @private: pointer to memory allocated alongside the ls request structure 43 + * that is specifically for the LLDD to use while processing the 44 + * request. The length of the buffer corresponds to the 45 + * lsrqst_priv_sz value specified in the xxx_template supplied 46 + * by the LLDD. 47 + * @done: The callback routine the LLDD is to invoke upon completion of 48 + * the LS request. req argument is the pointer to the original LS 49 + * request structure. Status argument must be 0 upon success, a 50 + * negative errno on failure (example: -ENXIO). 51 + */ 52 + struct nvmefc_ls_req { 53 + void *rqstaddr; 54 + dma_addr_t rqstdma; 55 + u32 rqstlen; 56 + void *rspaddr; 57 + dma_addr_t rspdma; 58 + u32 rsplen; 59 + u32 timeout; 60 + 61 + void *private; 62 + 63 + void (*done)(struct nvmefc_ls_req *req, int status); 64 + 65 + } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 66 + 67 + 68 + /** 69 + * struct nvmefc_ls_rsp - Structure passed from the transport to the LLDD 70 + * to request the transmit the NVME-FC LS response to a 71 + * NVME-FC LS request. The structure originates in the LLDD 72 + * and is given to the transport via the xxx_rcv_ls_req() 73 + * transport routine. As such, the structure represents the 74 + * FC exchange context for the NVME-FC LS request that was 75 + * received and which the response is to be sent for. 76 + * Used by the LLDD to pass the nvmet-fc transport (controller) 77 + * received LS's such as Create Association, Create Connection 78 + * and Disconnect Association. 79 + * Used by the LLDD to pass the nvme-fc transport (host) 80 + * received LS's such as Disconnect Association or Disconnect 81 + * Connection. 82 + * 83 + * The structure is allocated by the LLDD whenever a LS Request is received 84 + * from the FC link. The address of the structure is passed to the nvmet-fc 85 + * or nvme-fc layer via the xxx_rcv_ls_req() transport routines. 86 + * 87 + * The address of the structure is to be passed back to the LLDD 88 + * when the response is to be transmit. The LLDD will use the address to 89 + * map back to the LLDD exchange structure which maintains information such 90 + * the remote N_Port that sent the LS as well as any FC exchange context. 91 + * Upon completion of the LS response transmit, the LLDD will pass the 92 + * address of the structure back to the transport LS rsp done() routine, 93 + * allowing the transport release dma resources. Upon completion of 94 + * the done() routine, no further access to the structure will be made by 95 + * the transport and the LLDD can de-allocate the structure. 96 + * 97 + * Field initialization: 98 + * At the time of the xxx_rcv_ls_req() call, there is no content that 99 + * is valid in the structure. 100 + * 101 + * When the structure is used for the LLDD->xmt_ls_rsp() call, the 102 + * transport layer will fully set the fields in order to specify the 103 + * response payload buffer and its length as well as the done routine 104 + * to be called upon completion of the transmit. The transport layer 105 + * will also set a private pointer for its own use in the done routine. 106 + * 107 + * Values set by the transport layer prior to calling the LLDD xmt_ls_rsp 108 + * entrypoint: 109 + * @rspbuf: pointer to the LS response buffer 110 + * @rspdma: PCI DMA address of the LS response buffer 111 + * @rsplen: Length, in bytes, of the LS response buffer 112 + * @done: The callback routine the LLDD is to invoke upon completion of 113 + * transmitting the LS response. req argument is the pointer to 114 + * the original ls request. 115 + * @nvme_fc_private: pointer to an internal transport-specific structure 116 + * used as part of the transport done() processing. The LLDD is 117 + * not to access this pointer. 118 + */ 119 + struct nvmefc_ls_rsp { 120 + void *rspbuf; 121 + dma_addr_t rspdma; 122 + u16 rsplen; 123 + 124 + void (*done)(struct nvmefc_ls_rsp *rsp); 125 + void *nvme_fc_private; /* LLDD is not to access !! */ 126 + }; 127 + 128 + 129 + 130 + /* 13 131 * ********************** LLDD FC-NVME Host API ******************** 14 132 * 15 133 * For FC LLDD's that are the NVME Host role. 16 134 * 17 135 * ****************************************************************** 18 136 */ 19 - 20 137 21 138 22 139 /** ··· 159 42 u32 port_id; 160 43 u32 dev_loss_tmo; 161 44 }; 162 - 163 - 164 - /** 165 - * struct nvmefc_ls_req - Request structure passed from NVME-FC transport 166 - * to LLDD in order to perform a NVME FC-4 LS 167 - * request and obtain a response. 168 - * 169 - * Values set by the NVME-FC layer prior to calling the LLDD ls_req 170 - * entrypoint. 171 - * @rqstaddr: pointer to request buffer 172 - * @rqstdma: PCI DMA address of request buffer 173 - * @rqstlen: Length, in bytes, of request buffer 174 - * @rspaddr: pointer to response buffer 175 - * @rspdma: PCI DMA address of response buffer 176 - * @rsplen: Length, in bytes, of response buffer 177 - * @timeout: Maximum amount of time, in seconds, to wait for the LS response. 178 - * If timeout exceeded, LLDD to abort LS exchange and complete 179 - * LS request with error status. 180 - * @private: pointer to memory allocated alongside the ls request structure 181 - * that is specifically for the LLDD to use while processing the 182 - * request. The length of the buffer corresponds to the 183 - * lsrqst_priv_sz value specified in the nvme_fc_port_template 184 - * supplied by the LLDD. 185 - * @done: The callback routine the LLDD is to invoke upon completion of 186 - * the LS request. req argument is the pointer to the original LS 187 - * request structure. Status argument must be 0 upon success, a 188 - * negative errno on failure (example: -ENXIO). 189 - */ 190 - struct nvmefc_ls_req { 191 - void *rqstaddr; 192 - dma_addr_t rqstdma; 193 - u32 rqstlen; 194 - void *rspaddr; 195 - dma_addr_t rspdma; 196 - u32 rsplen; 197 - u32 timeout; 198 - 199 - void *private; 200 - 201 - void (*done)(struct nvmefc_ls_req *req, int status); 202 - 203 - } __aligned(sizeof(u64)); /* alignment for other things alloc'd with */ 204 - 205 45 206 46 enum nvmefc_fcp_datadir { 207 47 NVMEFC_FCP_NODATA, /* payload_length and sg_cnt will be zero */ ··· 411 337 * indicating an FC transport Aborted status. 412 338 * Entrypoint is Mandatory. 413 339 * 340 + * @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service. 341 + * The nvmefc_ls_rsp structure is the same LLDD-supplied exchange 342 + * structure specified in the nvme_fc_rcv_ls_req() call made when 343 + * the LS request was received. The structure will fully describe 344 + * the buffers for the response payload and the dma address of the 345 + * payload. The LLDD is to transmit the response (or return a 346 + * non-zero errno status), and upon completion of the transmit, call 347 + * the "done" routine specified in the nvmefc_ls_rsp structure 348 + * (argument to done is the address of the nvmefc_ls_rsp structure 349 + * itself). Upon the completion of the done routine, the LLDD shall 350 + * consider the LS handling complete and the nvmefc_ls_rsp structure 351 + * may be freed/released. 352 + * Entrypoint is mandatory if the LLDD calls the nvme_fc_rcv_ls_req() 353 + * entrypoint. 354 + * 414 355 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 415 356 * supports for cpu affinitization. 416 357 * Value is Mandatory. Must be at least 1. ··· 460 371 * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional 461 372 * memory that it would like fc nvme layer to allocate on the LLDD's 462 373 * behalf whenever a ls request structure is allocated. The additional 463 - * memory area solely for the of the LLDD and its location is 374 + * memory area is solely for use by the LLDD and its location is 464 375 * specified by the ls_request->private pointer. 465 376 * Value is Mandatory. Allowed to be zero. 466 377 * ··· 494 405 struct nvme_fc_remote_port *, 495 406 void *hw_queue_handle, 496 407 struct nvmefc_fcp_req *); 408 + int (*xmt_ls_rsp)(struct nvme_fc_local_port *localport, 409 + struct nvme_fc_remote_port *rport, 410 + struct nvmefc_ls_rsp *ls_rsp); 497 411 498 412 u32 max_hw_queues; 499 413 u16 max_sgl_segments; ··· 533 441 int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport, 534 442 u32 dev_loss_tmo); 535 443 444 + /* 445 + * Routine called to pass a NVME-FC LS request, received by the lldd, 446 + * to the nvme-fc transport. 447 + * 448 + * If the return value is zero: the LS was successfully accepted by the 449 + * transport. 450 + * If the return value is non-zero: the transport has not accepted the 451 + * LS. The lldd should ABTS-LS the LS. 452 + * 453 + * Note: if the LLDD receives and ABTS for the LS prior to the transport 454 + * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD 455 + * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the 456 + * response shall not be transmit and the struct nvmefc_ls_rsp() done 457 + * routine shall be called. The LLDD may transmit the ABTS response as 458 + * soon as the LS was marked or can delay until the xmt_ls_rsp() call is 459 + * made. 460 + * Note: if an RCV LS was successfully posted to the transport and the 461 + * remoteport is then unregistered before xmt_ls_rsp() was called for 462 + * the lsrsp structure, the transport will still call xmt_ls_rsp() 463 + * afterward to cleanup the outstanding lsrsp structure. The LLDD should 464 + * noop the transmission of the rsp and call the lsrsp->done() routine 465 + * to allow the lsrsp structure to be released. 466 + */ 467 + int nvme_fc_rcv_ls_req(struct nvme_fc_remote_port *remoteport, 468 + struct nvmefc_ls_rsp *lsrsp, 469 + void *lsreqbuf, u32 lsreqbuf_len); 470 + 471 + 536 472 537 473 /* 538 474 * *************** LLDD FC-NVME Target/Subsystem API *************** ··· 589 469 u32 port_id; 590 470 }; 591 471 592 - 593 - /** 594 - * struct nvmefc_tgt_ls_req - Structure used between LLDD and NVMET-FC 595 - * layer to represent the exchange context for 596 - * a FC-NVME Link Service (LS). 597 - * 598 - * The structure is allocated by the LLDD whenever a LS Request is received 599 - * from the FC link. The address of the structure is passed to the nvmet-fc 600 - * layer via the nvmet_fc_rcv_ls_req() call. The address of the structure 601 - * will be passed back to the LLDD when the response is to be transmit. 602 - * The LLDD is to use the address to map back to the LLDD exchange structure 603 - * which maintains information such as the targetport the LS was received 604 - * on, the remote FC NVME initiator that sent the LS, and any FC exchange 605 - * context. Upon completion of the LS response transmit, the address of the 606 - * structure will be passed back to the LS rsp done() routine, allowing the 607 - * nvmet-fc layer to release dma resources. Upon completion of the done() 608 - * routine, no further access will be made by the nvmet-fc layer and the 609 - * LLDD can de-allocate the structure. 610 - * 611 - * Field initialization: 612 - * At the time of the nvmet_fc_rcv_ls_req() call, there is no content that 613 - * is valid in the structure. 614 - * 615 - * When the structure is used for the LLDD->xmt_ls_rsp() call, the nvmet-fc 616 - * layer will fully set the fields in order to specify the response 617 - * payload buffer and its length as well as the done routine to be called 618 - * upon compeletion of the transmit. The nvmet-fc layer will also set a 619 - * private pointer for its own use in the done routine. 620 - * 621 - * Values set by the NVMET-FC layer prior to calling the LLDD xmt_ls_rsp 622 - * entrypoint. 623 - * @rspbuf: pointer to the LS response buffer 624 - * @rspdma: PCI DMA address of the LS response buffer 625 - * @rsplen: Length, in bytes, of the LS response buffer 626 - * @done: The callback routine the LLDD is to invoke upon completion of 627 - * transmitting the LS response. req argument is the pointer to 628 - * the original ls request. 629 - * @nvmet_fc_private: pointer to an internal NVMET-FC layer structure used 630 - * as part of the NVMET-FC processing. The LLDD is not to access 631 - * this pointer. 632 - */ 633 - struct nvmefc_tgt_ls_req { 634 - void *rspbuf; 635 - dma_addr_t rspdma; 636 - u16 rsplen; 637 - 638 - void (*done)(struct nvmefc_tgt_ls_req *req); 639 - void *nvmet_fc_private; /* LLDD is not to access !! */ 640 - }; 641 472 642 473 /* Operations that NVME-FC layer may request the LLDD to perform for FCP */ 643 474 enum { ··· 764 693 * Entrypoint is Mandatory. 765 694 * 766 695 * @xmt_ls_rsp: Called to transmit the response to a FC-NVME FC-4 LS service. 767 - * The nvmefc_tgt_ls_req structure is the same LLDD-supplied exchange 696 + * The nvmefc_ls_rsp structure is the same LLDD-supplied exchange 768 697 * structure specified in the nvmet_fc_rcv_ls_req() call made when 769 - * the LS request was received. The structure will fully describe 698 + * the LS request was received. The structure will fully describe 770 699 * the buffers for the response payload and the dma address of the 771 - * payload. The LLDD is to transmit the response (or return a non-zero 772 - * errno status), and upon completion of the transmit, call the 773 - * "done" routine specified in the nvmefc_tgt_ls_req structure 774 - * (argument to done is the ls reqwuest structure itself). 775 - * After calling the done routine, the LLDD shall consider the 776 - * LS handling complete and the nvmefc_tgt_ls_req structure may 777 - * be freed/released. 700 + * payload. The LLDD is to transmit the response (or return a 701 + * non-zero errno status), and upon completion of the transmit, call 702 + * the "done" routine specified in the nvmefc_ls_rsp structure 703 + * (argument to done is the address of the nvmefc_ls_rsp structure 704 + * itself). Upon the completion of the done() routine, the LLDD shall 705 + * consider the LS handling complete and the nvmefc_ls_rsp structure 706 + * may be freed/released. 707 + * The transport will always call the xmt_ls_rsp() routine for any 708 + * LS received. 778 709 * Entrypoint is Mandatory. 779 710 * 780 711 * @fcp_op: Called to perform a data transfer or transmit a response. ··· 871 798 * should cause the initiator to rescan the discovery controller 872 799 * on the targetport. 873 800 * 801 + * @ls_req: Called to issue a FC-NVME FC-4 LS service request. 802 + * The nvme_fc_ls_req structure will fully describe the buffers for 803 + * the request payload and where to place the response payload. 804 + * The targetport that is to issue the LS request is identified by 805 + * the targetport argument. The remote port that is to receive the 806 + * LS request is identified by the hosthandle argument. The nvmet-fc 807 + * transport is only allowed to issue FC-NVME LS's on behalf of an 808 + * association that was created prior by a Create Association LS. 809 + * The hosthandle will originate from the LLDD in the struct 810 + * nvmefc_ls_rsp structure for the Create Association LS that 811 + * was delivered to the transport. The transport will save the 812 + * hosthandle as an attribute of the association. If the LLDD 813 + * loses connectivity with the remote port, it must call the 814 + * nvmet_fc_invalidate_host() routine to remove any references to 815 + * the remote port in the transport. 816 + * The LLDD is to allocate an exchange, issue the LS request, obtain 817 + * the LS response, and call the "done" routine specified in the 818 + * request structure (argument to done is the ls request structure 819 + * itself). 820 + * Entrypoint is Optional - but highly recommended. 821 + * 822 + * @ls_abort: called to request the LLDD to abort the indicated ls request. 823 + * The call may return before the abort has completed. After aborting 824 + * the request, the LLDD must still call the ls request done routine 825 + * indicating an FC transport Aborted status. 826 + * Entrypoint is Mandatory if the ls_req entry point is specified. 827 + * 828 + * @host_release: called to inform the LLDD that the request to invalidate 829 + * the host port indicated by the hosthandle has been fully completed. 830 + * No associations exist with the host port and there will be no 831 + * further references to hosthandle. 832 + * Entrypoint is Mandatory if the lldd calls nvmet_fc_invalidate_host(). 833 + * 874 834 * @max_hw_queues: indicates the maximum number of hw queues the LLDD 875 835 * supports for cpu affinitization. 876 836 * Value is Mandatory. Must be at least 1. ··· 932 826 * area solely for the of the LLDD and its location is specified by 933 827 * the targetport->private pointer. 934 828 * Value is Mandatory. Allowed to be zero. 829 + * 830 + * @lsrqst_priv_sz: The LLDD sets this field to the amount of additional 831 + * memory that it would like nvmet-fc layer to allocate on the LLDD's 832 + * behalf whenever a ls request structure is allocated. The additional 833 + * memory area is solely for use by the LLDD and its location is 834 + * specified by the ls_request->private pointer. 835 + * Value is Mandatory. Allowed to be zero. 836 + * 935 837 */ 936 838 struct nvmet_fc_target_template { 937 839 void (*targetport_delete)(struct nvmet_fc_target_port *tgtport); 938 840 int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport, 939 - struct nvmefc_tgt_ls_req *tls_req); 841 + struct nvmefc_ls_rsp *ls_rsp); 940 842 int (*fcp_op)(struct nvmet_fc_target_port *tgtport, 941 843 struct nvmefc_tgt_fcp_req *fcpreq); 942 844 void (*fcp_abort)(struct nvmet_fc_target_port *tgtport, ··· 954 840 void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, 955 841 struct nvmefc_tgt_fcp_req *fcpreq); 956 842 void (*discovery_event)(struct nvmet_fc_target_port *tgtport); 843 + int (*ls_req)(struct nvmet_fc_target_port *targetport, 844 + void *hosthandle, struct nvmefc_ls_req *lsreq); 845 + void (*ls_abort)(struct nvmet_fc_target_port *targetport, 846 + void *hosthandle, struct nvmefc_ls_req *lsreq); 847 + void (*host_release)(void *hosthandle); 957 848 958 849 u32 max_hw_queues; 959 850 u16 max_sgl_segments; ··· 967 848 968 849 u32 target_features; 969 850 851 + /* sizes of additional private data for data structures */ 970 852 u32 target_priv_sz; 853 + u32 lsrqst_priv_sz; 971 854 }; 972 855 973 856 ··· 980 859 981 860 int nvmet_fc_unregister_targetport(struct nvmet_fc_target_port *tgtport); 982 861 862 + /* 863 + * Routine called to pass a NVME-FC LS request, received by the lldd, 864 + * to the nvmet-fc transport. 865 + * 866 + * If the return value is zero: the LS was successfully accepted by the 867 + * transport. 868 + * If the return value is non-zero: the transport has not accepted the 869 + * LS. The lldd should ABTS-LS the LS. 870 + * 871 + * Note: if the LLDD receives and ABTS for the LS prior to the transport 872 + * calling the ops->xmt_ls_rsp() routine to transmit a response, the LLDD 873 + * shall mark the LS as aborted, and when the xmt_ls_rsp() is called: the 874 + * response shall not be transmit and the struct nvmefc_ls_rsp() done 875 + * routine shall be called. The LLDD may transmit the ABTS response as 876 + * soon as the LS was marked or can delay until the xmt_ls_rsp() call is 877 + * made. 878 + * Note: if an RCV LS was successfully posted to the transport and the 879 + * targetport is then unregistered before xmt_ls_rsp() was called for 880 + * the lsrsp structure, the transport will still call xmt_ls_rsp() 881 + * afterward to cleanup the outstanding lsrsp structure. The LLDD should 882 + * noop the transmission of the rsp and call the lsrsp->done() routine 883 + * to allow the lsrsp structure to be released. 884 + */ 983 885 int nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *tgtport, 984 - struct nvmefc_tgt_ls_req *lsreq, 886 + void *hosthandle, 887 + struct nvmefc_ls_rsp *rsp, 985 888 void *lsreqbuf, u32 lsreqbuf_len); 986 889 890 + /* 891 + * Routine called by the LLDD whenever it has a logout or loss of 892 + * connectivity to a NVME-FC host port which there had been active 893 + * NVMe controllers for. The host port is indicated by the 894 + * hosthandle. The hosthandle is given to the nvmet-fc transport 895 + * when a NVME LS was received, typically to create a new association. 896 + * The nvmet-fc transport will cache the hostport value with the 897 + * association for use in LS requests for the association. 898 + * When the LLDD calls this routine, the nvmet-fc transport will 899 + * immediately terminate all associations that were created with 900 + * the hosthandle host port. 901 + * The LLDD, after calling this routine and having control returned, 902 + * must assume the transport may subsequently utilize hosthandle as 903 + * part of sending LS's to terminate the association. The LLDD 904 + * should reject the LS's if they are attempted. 905 + * Once the last association has terminated for the hosthandle host 906 + * port, the nvmet-fc transport will call the ops->host_release() 907 + * callback. As of the callback, the nvmet-fc transport will no 908 + * longer reference hosthandle. 909 + */ 910 + void nvmet_fc_invalidate_host(struct nvmet_fc_target_port *tgtport, 911 + void *hosthandle); 912 + 913 + /* 914 + * If nvmet_fc_rcv_fcp_req returns non-zero, the transport has not accepted 915 + * the FCP cmd. The lldd should ABTS-LS the cmd. 916 + */ 987 917 int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport, 988 918 struct nvmefc_tgt_fcp_req *fcpreq, 989 919 void *cmdiubuf, u32 cmdiubuf_len);

+6 -5

include/linux/nvme-fc.h

··· 4 4 */ 5 5 6 6 /* 7 - * This file contains definitions relative to FC-NVME-2 r1.06 8 - * (T11-2019-00210-v001). 7 + * This file contains definitions relative to FC-NVME-2 r1.08 8 + * (T11-2019-00210-v004). 9 9 */ 10 10 11 11 #ifndef _NVME_FC_H ··· 81 81 }; 82 82 83 83 84 - #define FCNVME_NVME_SR_OPCODE 0x01 84 + #define FCNVME_NVME_SR_OPCODE 0x01 85 + #define FCNVME_NVME_SR_RSP_OPCODE 0x02 85 86 86 87 struct nvme_fc_nvme_sr_iu { 87 88 __u8 fc_id; ··· 95 94 96 95 enum { 97 96 FCNVME_SRSTAT_ACC = 0x0, 98 - FCNVME_SRSTAT_INV_FCID = 0x1, 97 + /* reserved 0x1 */ 99 98 /* reserved 0x2 */ 100 99 FCNVME_SRSTAT_LOGICAL_ERR = 0x3, 101 100 FCNVME_SRSTAT_INV_QUALIF = 0x4, ··· 398 397 struct fcnvme_ls_rqst_w0 w0; 399 398 __be32 desc_list_len; 400 399 struct fcnvme_lsdesc_assoc_id associd; 401 - struct fcnvme_lsdesc_disconn_cmd connectid; 400 + struct fcnvme_lsdesc_conn_id connectid; 402 401 }; 403 402 404 403 struct fcnvme_ls_disconnect_conn_acc {

+15 -1

include/linux/nvme.h

··· 38 38 NVMF_ADDR_FAMILY_IP6 = 2, /* IP6 */ 39 39 NVMF_ADDR_FAMILY_IB = 3, /* InfiniBand */ 40 40 NVMF_ADDR_FAMILY_FC = 4, /* Fibre Channel */ 41 + NVMF_ADDR_FAMILY_LOOP = 254, /* Reserved for host usage */ 42 + NVMF_ADDR_FAMILY_MAX, 41 43 }; 42 44 43 45 /* Transport Type codes for Discovery Log Page entry TRTYPE field */ ··· 301 299 }; 302 300 303 301 enum { 302 + NVME_CTRL_CMIC_MULTI_CTRL = 1 << 1, 303 + NVME_CTRL_CMIC_ANA = 1 << 3, 304 304 NVME_CTRL_ONCS_COMPARE = 1 << 0, 305 305 NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, 306 306 NVME_CTRL_ONCS_DSM = 1 << 2, ··· 398 394 399 395 enum { 400 396 NVME_NS_FEAT_THIN = 1 << 0, 397 + NVME_NS_FEAT_ATOMICS = 1 << 1, 398 + NVME_NS_FEAT_IO_OPT = 1 << 4, 399 + NVME_NS_ATTR_RO = 1 << 0, 401 400 NVME_NS_FLBAS_LBA_MASK = 0xf, 402 401 NVME_NS_FLBAS_META_EXT = 0x10, 402 + NVME_NS_NMIC_SHARED = 1 << 0, 403 403 NVME_LBAF_RP_BEST = 0, 404 404 NVME_LBAF_RP_BETTER = 1, 405 405 NVME_LBAF_RP_GOOD = 2, ··· 418 410 NVME_NS_DPS_PI_TYPE1 = 1, 419 411 NVME_NS_DPS_PI_TYPE2 = 2, 420 412 NVME_NS_DPS_PI_TYPE3 = 3, 413 + }; 414 + 415 + /* Identify Namespace Metadata Capabilities (MC): */ 416 + enum { 417 + NVME_MC_EXTENDED_LBA = (1 << 0), 418 + NVME_MC_METADATA_PTR = (1 << 1), 421 419 }; 422 420 423 421 struct nvme_ns_id_desc { ··· 1191 1177 __le64 numrec; 1192 1178 __le16 recfmt; 1193 1179 __u8 resv14[1006]; 1194 - struct nvmf_disc_rsp_page_entry entries[0]; 1180 + struct nvmf_disc_rsp_page_entry entries[]; 1195 1181 }; 1196 1182 1197 1183 enum {

+22 -4

include/uapi/linux/fd.h

··· 172 172 * used in succession to try to read the disk. If the FDC cannot lock onto 173 173 * the disk, the next format is tried. This uses the variable 'probing'. 174 174 */ 175 - short autodetect[8]; /* autodetected formats */ 175 + 176 + #define FD_AUTODETECT_SIZE 8 177 + 178 + short autodetect[FD_AUTODETECT_SIZE]; /* autodetected formats */ 176 179 177 180 int checkfreq; /* how often should the drive be checked for disk 178 181 * changes */ ··· 360 357 int buffer_length; /* length of allocated buffer */ 361 358 362 359 unsigned char rate; 360 + 361 + #define FD_RAW_CMD_SIZE 16 362 + #define FD_RAW_REPLY_SIZE 16 363 + #define FD_RAW_CMD_FULLSIZE (FD_RAW_CMD_SIZE + 1 + FD_RAW_REPLY_SIZE) 364 + 365 + /* The command may take up the space initially intended for the reply 366 + * and the reply count. Needed for long 82078 commands such as RESTORE, 367 + * which takes 17 command bytes. 368 + */ 369 + 363 370 unsigned char cmd_count; 364 - unsigned char cmd[16]; 365 - unsigned char reply_count; 366 - unsigned char reply[16]; 371 + union { 372 + struct { 373 + unsigned char cmd[FD_RAW_CMD_SIZE]; 374 + unsigned char reply_count; 375 + unsigned char reply[FD_RAW_REPLY_SIZE]; 376 + }; 377 + unsigned char fullcmd[FD_RAW_CMD_FULLSIZE]; 378 + }; 367 379 int track; 368 380 int resultcode; 369 381

+13 -3

include/uapi/linux/fdreg.h

··· 7 7 * Handbook", Sanches and Canton. 8 8 */ 9 9 10 - /* Fd controller regs. S&C, about page 340 */ 11 - #define FD_STATUS 4 12 - #define FD_DATA 5 10 + /* 82077's auxiliary status registers A & B (R) */ 11 + #define FD_SRA 0 12 + #define FD_SRB 1 13 13 14 14 /* Digital Output Register */ 15 15 #define FD_DOR 2 16 + 17 + /* 82077's tape drive register (R/W) */ 18 + #define FD_TDR 3 19 + 20 + /* 82077's data rate select register (W) */ 21 + #define FD_DSR 4 22 + 23 + /* Fd controller regs. S&C, about page 340 */ 24 + #define FD_STATUS 4 25 + #define FD_DATA 5 16 26 17 27 /* Digital Input Register (read) */ 18 28 #define FD_DIR 7

+29 -2

include/uapi/linux/loop.h

··· 25 25 LO_FLAGS_DIRECT_IO = 16, 26 26 }; 27 27 28 + /* LO_FLAGS that can be set using LOOP_SET_STATUS(64) */ 29 + #define LOOP_SET_STATUS_SETTABLE_FLAGS (LO_FLAGS_AUTOCLEAR | LO_FLAGS_PARTSCAN) 30 + 31 + /* LO_FLAGS that can be cleared using LOOP_SET_STATUS(64) */ 32 + #define LOOP_SET_STATUS_CLEARABLE_FLAGS (LO_FLAGS_AUTOCLEAR) 33 + 34 + /* LO_FLAGS that can be set using LOOP_CONFIGURE */ 35 + #define LOOP_CONFIGURE_SETTABLE_FLAGS (LO_FLAGS_READ_ONLY | LO_FLAGS_AUTOCLEAR \ 36 + | LO_FLAGS_PARTSCAN | LO_FLAGS_DIRECT_IO) 37 + 28 38 #include <asm/posix_types.h> /* for __kernel_old_dev_t */ 29 39 #include <linux/types.h> /* for __u64 */ 30 40 ··· 47 37 int lo_offset; 48 38 int lo_encrypt_type; 49 39 int lo_encrypt_key_size; /* ioctl w/o */ 50 - int lo_flags; /* ioctl r/o */ 40 + int lo_flags; 51 41 char lo_name[LO_NAME_SIZE]; 52 42 unsigned char lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 53 43 unsigned long lo_init[2]; ··· 63 53 __u32 lo_number; /* ioctl r/o */ 64 54 __u32 lo_encrypt_type; 65 55 __u32 lo_encrypt_key_size; /* ioctl w/o */ 66 - __u32 lo_flags; /* ioctl r/o */ 56 + __u32 lo_flags; 67 57 __u8 lo_file_name[LO_NAME_SIZE]; 68 58 __u8 lo_crypt_name[LO_NAME_SIZE]; 69 59 __u8 lo_encrypt_key[LO_KEY_SIZE]; /* ioctl w/o */ 70 60 __u64 lo_init[2]; 61 + }; 62 + 63 + /** 64 + * struct loop_config - Complete configuration for a loop device. 65 + * @fd: fd of the file to be used as a backing file for the loop device. 66 + * @block_size: block size to use; ignored if 0. 67 + * @info: struct loop_info64 to configure the loop device with. 68 + * 69 + * This structure is used with the LOOP_CONFIGURE ioctl, and can be used to 70 + * atomically setup and configure all loop device parameters at once. 71 + */ 72 + struct loop_config { 73 + __u32 fd; 74 + __u32 block_size; 75 + struct loop_info64 info; 76 + __u64 __reserved[8]; 71 77 }; 72 78 73 79 /* ··· 116 90 #define LOOP_SET_CAPACITY 0x4C07 117 91 #define LOOP_SET_DIRECT_IO 0x4C08 118 92 #define LOOP_SET_BLOCK_SIZE 0x4C09 93 + #define LOOP_CONFIGURE 0x4C0A 119 94 120 95 /* /dev/loop-control interface */ 121 96 #define LOOP_CTL_ADD 0x4C80