Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net

+18

MAINTAINERS

··· 16689 16689 S: Maintained 16690 16690 F: drivers/media/tuners/tuner-xc2028.* 16691 16691 16692 + XDP (eXpress Data Path) 16693 + M: Alexei Starovoitov <ast@kernel.org> 16694 + M: Daniel Borkmann <daniel@iogearbox.net> 16695 + M: David S. Miller <davem@davemloft.net> 16696 + M: Jakub Kicinski <jakub.kicinski@netronome.com> 16697 + M: Jesper Dangaard Brouer <hawk@kernel.org> 16698 + M: John Fastabend <john.fastabend@gmail.com> 16699 + L: netdev@vger.kernel.org 16700 + L: xdp-newbies@vger.kernel.org 16701 + S: Supported 16702 + F: net/core/xdp.c 16703 + F: include/net/xdp.h 16704 + F: kernel/bpf/devmap.c 16705 + F: kernel/bpf/cpumap.c 16706 + F: include/trace/events/xdp.h 16707 + K: xdp 16708 + N: xdp 16709 + 16692 16710 XDP SOCKETS (AF_XDP) 16693 16711 M: Björn Töpel <bjorn.topel@intel.com> 16694 16712 M: Magnus Karlsson <magnus.karlsson@intel.com>

+1 -1

Makefile

··· 2 2 VERSION = 5 3 3 PATCHLEVEL = 0 4 4 SUBLEVEL = 0 5 - EXTRAVERSION = -rc3 5 + EXTRAVERSION = -rc4 6 6 NAME = Shy Crocodile 7 7 8 8 # *DOCUMENTATION*

+1 -1

arch/x86/Kconfig

··· 198 198 select IRQ_FORCED_THREADING 199 199 select NEED_SG_DMA_LENGTH 200 200 select PCI_DOMAINS if PCI 201 - select PCI_LOCKLESS_CONFIG 201 + select PCI_LOCKLESS_CONFIG if PCI 202 202 select PERF_EVENTS 203 203 select RTC_LIB 204 204 select RTC_MC146818_LIB

+4 -2

arch/x86/entry/entry_64_compat.S

··· 361 361 362 362 /* Need to switch before accessing the thread stack. */ 363 363 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi 364 - movq %rsp, %rdi 364 + /* In the Xen PV case we already run on the thread stack. */ 365 + ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV 365 366 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp 366 367 367 368 pushq 6*8(%rdi) /* regs->ss */ ··· 371 370 pushq 3*8(%rdi) /* regs->cs */ 372 371 pushq 2*8(%rdi) /* regs->ip */ 373 372 pushq 1*8(%rdi) /* regs->orig_ax */ 374 - 375 373 pushq (%rdi) /* pt_regs->di */ 374 + .Lint80_keep_stack: 375 + 376 376 pushq %rsi /* pt_regs->si */ 377 377 xorl %esi, %esi /* nospec si */ 378 378 pushq %rdx /* pt_regs->dx */

+18

arch/x86/include/asm/mmu_context.h

··· 178 178 179 179 void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk); 180 180 181 + /* 182 + * Init a new mm. Used on mm copies, like at fork() 183 + * and on mm's that are brand-new, like at execve(). 184 + */ 181 185 static inline int init_new_context(struct task_struct *tsk, 182 186 struct mm_struct *mm) 183 187 { ··· 232 228 } while (0) 233 229 #endif 234 230 231 + static inline void arch_dup_pkeys(struct mm_struct *oldmm, 232 + struct mm_struct *mm) 233 + { 234 + #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS 235 + if (!cpu_feature_enabled(X86_FEATURE_OSPKE)) 236 + return; 237 + 238 + /* Duplicate the oldmm pkey state in mm: */ 239 + mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map; 240 + mm->context.execute_only_pkey = oldmm->context.execute_only_pkey; 241 + #endif 242 + } 243 + 235 244 static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm) 236 245 { 246 + arch_dup_pkeys(oldmm, mm); 237 247 paravirt_arch_dup_mmap(oldmm, mm); 238 248 return ldt_dup_context(oldmm, mm); 239 249 }

+1

arch/x86/kernel/crash.c

··· 470 470 471 471 kbuf.memsz = kbuf.bufsz; 472 472 kbuf.buf_align = ELF_CORE_HEADER_ALIGN; 473 + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 473 474 ret = kexec_add_buffer(&kbuf); 474 475 if (ret) { 475 476 vfree((void *)image->arch.elf_headers);

-4

arch/x86/kernel/hpet.c

··· 21 21 22 22 #define HPET_MASK CLOCKSOURCE_MASK(32) 23 23 24 - /* FSEC = 10^-15 25 - NSEC = 10^-9 */ 26 - #define FSEC_PER_NSEC 1000000L 27 - 28 24 #define HPET_DEV_USED_BIT 2 29 25 #define HPET_DEV_USED (1 << HPET_DEV_USED_BIT) 30 26 #define HPET_DEV_VALID 0x8

+2

arch/x86/kernel/kexec-bzimage64.c

··· 434 434 kbuf.memsz = PAGE_ALIGN(header->init_size); 435 435 kbuf.buf_align = header->kernel_alignment; 436 436 kbuf.buf_min = MIN_KERNEL_LOAD_ADDR; 437 + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 437 438 ret = kexec_add_buffer(&kbuf); 438 439 if (ret) 439 440 goto out_free_params; ··· 449 448 kbuf.bufsz = kbuf.memsz = initrd_len; 450 449 kbuf.buf_align = PAGE_SIZE; 451 450 kbuf.buf_min = MIN_INITRD_LOAD_ADDR; 451 + kbuf.mem = KEXEC_BUF_MEM_UNKNOWN; 452 452 ret = kexec_add_buffer(&kbuf); 453 453 if (ret) 454 454 goto out_free_params;

+16 -14

arch/x86/kernel/tsc.c

··· 297 297 298 298 __setup("tsc=", tsc_setup); 299 299 300 - #define MAX_RETRIES 5 301 - #define SMI_TRESHOLD 50000 300 + #define MAX_RETRIES 5 301 + #define TSC_DEFAULT_THRESHOLD 0x20000 302 302 303 303 /* 304 - * Read TSC and the reference counters. Take care of SMI disturbance 304 + * Read TSC and the reference counters. Take care of any disturbances 305 305 */ 306 306 static u64 tsc_read_refs(u64 *p, int hpet) 307 307 { 308 308 u64 t1, t2; 309 + u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD; 309 310 int i; 310 311 311 312 for (i = 0; i < MAX_RETRIES; i++) { ··· 316 315 else 317 316 *p = acpi_pm_read_early(); 318 317 t2 = get_cycles(); 319 - if ((t2 - t1) < SMI_TRESHOLD) 318 + if ((t2 - t1) < thresh) 320 319 return t2; 321 320 } 322 321 return ULLONG_MAX; ··· 704 703 * zero. In each wait loop iteration we read the TSC and check 705 704 * the delta to the previous read. We keep track of the min 706 705 * and max values of that delta. The delta is mostly defined 707 - * by the IO time of the PIT access, so we can detect when a 708 - * SMI/SMM disturbance happened between the two reads. If the 706 + * by the IO time of the PIT access, so we can detect when 707 + * any disturbance happened between the two reads. If the 709 708 * maximum time is significantly larger than the minimum time, 710 709 * then we discard the result and have another try. 711 710 * 712 711 * 2) Reference counter. If available we use the HPET or the 713 712 * PMTIMER as a reference to check the sanity of that value. 714 713 * We use separate TSC readouts and check inside of the 715 - * reference read for a SMI/SMM disturbance. We dicard 714 + * reference read for any possible disturbance. We dicard 716 715 * disturbed values here as well. We do that around the PIT 717 716 * calibration delay loop as we have to wait for a certain 718 717 * amount of time anyway. ··· 745 744 if (ref1 == ref2) 746 745 continue; 747 746 748 - /* Check, whether the sampling was disturbed by an SMI */ 747 + /* Check, whether the sampling was disturbed */ 749 748 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) 750 749 continue; 751 750 ··· 1269 1268 */ 1270 1269 static void tsc_refine_calibration_work(struct work_struct *work) 1271 1270 { 1272 - static u64 tsc_start = -1, ref_start; 1271 + static u64 tsc_start = ULLONG_MAX, ref_start; 1273 1272 static int hpet; 1274 1273 u64 tsc_stop, ref_stop, delta; 1275 1274 unsigned long freq; ··· 1284 1283 * delayed the first time we expire. So set the workqueue 1285 1284 * again once we know timers are working. 1286 1285 */ 1287 - if (tsc_start == -1) { 1286 + if (tsc_start == ULLONG_MAX) { 1287 + restart: 1288 1288 /* 1289 1289 * Only set hpet once, to avoid mixing hardware 1290 1290 * if the hpet becomes enabled later. 1291 1291 */ 1292 1292 hpet = is_hpet_enabled(); 1293 - schedule_delayed_work(&tsc_irqwork, HZ); 1294 1293 tsc_start = tsc_read_refs(&ref_start, hpet); 1294 + schedule_delayed_work(&tsc_irqwork, HZ); 1295 1295 return; 1296 1296 } 1297 1297 ··· 1302 1300 if (ref_start == ref_stop) 1303 1301 goto out; 1304 1302 1305 - /* Check, whether the sampling was disturbed by an SMI */ 1306 - if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX) 1307 - goto out; 1303 + /* Check, whether the sampling was disturbed */ 1304 + if (tsc_stop == ULLONG_MAX) 1305 + goto restart; 1308 1306 1309 1307 delta = tsc_stop - tsc_start; 1310 1308 delta *= 1000000LL;

+2 -2

arch/x86/lib/kaslr.c

··· 36 36 u16 status, timer; 37 37 38 38 do { 39 - outb(I8254_PORT_CONTROL, 40 - I8254_CMD_READBACK | I8254_SELECT_COUNTER0); 39 + outb(I8254_CMD_READBACK | I8254_SELECT_COUNTER0, 40 + I8254_PORT_CONTROL); 41 41 status = inb(I8254_PORT_COUNTER0); 42 42 timer = inb(I8254_PORT_COUNTER0); 43 43 timer |= inb(I8254_PORT_COUNTER0) << 8;

+2 -2

arch/x86/mm/mem_encrypt_identity.c

··· 158 158 pmd = pmd_offset(pud, ppd->vaddr); 159 159 if (pmd_none(*pmd)) { 160 160 pte = ppd->pgtable_area; 161 - memset(pte, 0, sizeof(pte) * PTRS_PER_PTE); 162 - ppd->pgtable_area += sizeof(pte) * PTRS_PER_PTE; 161 + memset(pte, 0, sizeof(*pte) * PTRS_PER_PTE); 162 + ppd->pgtable_area += sizeof(*pte) * PTRS_PER_PTE; 163 163 set_pmd(pmd, __pmd(PMD_FLAGS | __pa(pte))); 164 164 } 165 165

-11

block/blk-core.c

··· 1083 1083 /* Create a fresh bio_list for all subordinate requests */ 1084 1084 bio_list_on_stack[1] = bio_list_on_stack[0]; 1085 1085 bio_list_init(&bio_list_on_stack[0]); 1086 - 1087 - /* 1088 - * Since we're recursing into make_request here, ensure 1089 - * that we mark this bio as already having entered the queue. 1090 - * If not, and the queue is going away, we can get stuck 1091 - * forever on waiting for the queue reference to drop. But 1092 - * that will never happen, as we're already holding a 1093 - * reference to it. 1094 - */ 1095 - bio_set_flag(bio, BIO_QUEUE_ENTERED); 1096 1086 ret = q->make_request_fn(q, bio); 1097 - bio_clear_flag(bio, BIO_QUEUE_ENTERED); 1098 1087 1099 1088 /* sort new bios into those for a lower level 1100 1089 * and those for the same level

+10

block/blk-merge.c

··· 272 272 /* there isn't chance to merge the splitted bio */ 273 273 split->bi_opf |= REQ_NOMERGE; 274 274 275 + /* 276 + * Since we're recursing into make_request here, ensure 277 + * that we mark this bio as already having entered the queue. 278 + * If not, and the queue is going away, we can get stuck 279 + * forever on waiting for the queue reference to drop. But 280 + * that will never happen, as we're already holding a 281 + * reference to it. 282 + */ 283 + bio_set_flag(*bio, BIO_QUEUE_ENTERED); 284 + 275 285 bio_chain(split, *bio); 276 286 trace_block_split(q, split, (*bio)->bi_iter.bi_sector); 277 287 generic_make_request(*bio);

+2 -2

drivers/edac/altera_edac.h

··· 295 295 #define S10_SYSMGR_ECC_INTSTAT_DERR_OFST 0xA0 296 296 297 297 /* Sticky registers for Uncorrected Errors */ 298 - #define S10_SYSMGR_UE_VAL_OFST 0x120 299 - #define S10_SYSMGR_UE_ADDR_OFST 0x124 298 + #define S10_SYSMGR_UE_VAL_OFST 0x220 299 + #define S10_SYSMGR_UE_ADDR_OFST 0x224 300 300 301 301 #define S10_DDR0_IRQ_MASK BIT(16) 302 302

+13 -12

drivers/irqchip/irq-gic-v3-its.c

··· 2399 2399 kfree(its_dev); 2400 2400 } 2401 2401 2402 - static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq) 2402 + static int its_alloc_device_irq(struct its_device *dev, int nvecs, irq_hw_number_t *hwirq) 2403 2403 { 2404 2404 int idx; 2405 2405 2406 - idx = find_first_zero_bit(dev->event_map.lpi_map, 2407 - dev->event_map.nr_lpis); 2408 - if (idx == dev->event_map.nr_lpis) 2406 + idx = bitmap_find_free_region(dev->event_map.lpi_map, 2407 + dev->event_map.nr_lpis, 2408 + get_count_order(nvecs)); 2409 + if (idx < 0) 2409 2410 return -ENOSPC; 2410 2411 2411 2412 *hwirq = dev->event_map.lpi_base + idx; ··· 2502 2501 int err; 2503 2502 int i; 2504 2503 2505 - for (i = 0; i < nr_irqs; i++) { 2506 - err = its_alloc_device_irq(its_dev, &hwirq); 2507 - if (err) 2508 - return err; 2504 + err = its_alloc_device_irq(its_dev, nr_irqs, &hwirq); 2505 + if (err) 2506 + return err; 2509 2507 2510 - err = its_irq_gic_domain_alloc(domain, virq + i, hwirq); 2508 + for (i = 0; i < nr_irqs; i++) { 2509 + err = its_irq_gic_domain_alloc(domain, virq + i, hwirq + i); 2511 2510 if (err) 2512 2511 return err; 2513 2512 2514 2513 irq_domain_set_hwirq_and_chip(domain, virq + i, 2515 - hwirq, &its_irq_chip, its_dev); 2514 + hwirq + i, &its_irq_chip, its_dev); 2516 2515 irqd_set_single_target(irq_desc_get_irq_data(irq_to_desc(virq + i))); 2517 2516 pr_debug("ID:%d pID:%d vID:%d\n", 2518 - (int)(hwirq - its_dev->event_map.lpi_base), 2519 - (int) hwirq, virq + i); 2517 + (int)(hwirq + i - its_dev->event_map.lpi_base), 2518 + (int)(hwirq + i), virq + i); 2520 2519 } 2521 2520 2522 2521 return 0;

+1 -1

drivers/irqchip/irq-gic-v3-mbi.c

··· 24 24 unsigned long *bm; 25 25 }; 26 26 27 - static struct mutex mbi_lock; 27 + static DEFINE_MUTEX(mbi_lock); 28 28 static phys_addr_t mbi_phys_base; 29 29 static struct mbi_range *mbi_ranges; 30 30 static unsigned int mbi_range_nr;

-2

drivers/irqchip/irq-madera.c

··· 7 7 */ 8 8 9 9 #include <linux/module.h> 10 - #include <linux/gpio.h> 11 10 #include <linux/interrupt.h> 12 11 #include <linux/irq.h> 13 12 #include <linux/irqdomain.h> ··· 15 16 #include <linux/slab.h> 16 17 #include <linux/of.h> 17 18 #include <linux/of_device.h> 18 - #include <linux/of_gpio.h> 19 19 #include <linux/of_irq.h> 20 20 #include <linux/irqchip/irq-madera.h> 21 21 #include <linux/mfd/madera/core.h>

+1

drivers/irqchip/irq-stm32-exti.c

··· 822 822 static const struct irq_domain_ops stm32_exti_h_domain_ops = { 823 823 .alloc = stm32_exti_h_domain_alloc, 824 824 .free = irq_domain_free_irqs_common, 825 + .xlate = irq_domain_xlate_twocell, 825 826 }; 826 827 827 828 static int

+1 -4

drivers/net/caif/caif_serial.c

··· 257 257 if (skb->len == 0) { 258 258 struct sk_buff *tmp = skb_dequeue(&ser->head); 259 259 WARN_ON(tmp != skb); 260 - if (in_interrupt()) 261 - dev_kfree_skb_irq(skb); 262 - else 263 - kfree_skb(skb); 260 + dev_consume_skb_any(skb); 264 261 } 265 262 } 266 263 /* Send flow off if queue is empty */

+1 -1

drivers/net/dsa/mv88e6xxx/serdes.c

··· 664 664 if (port < 9) 665 665 return 0; 666 666 667 - return mv88e6390_serdes_irq_setup(chip, port); 667 + return mv88e6390x_serdes_irq_setup(chip, port); 668 668 } 669 669 670 670 void mv88e6390x_serdes_irq_free(struct mv88e6xxx_chip *chip, int port)

+1 -1

drivers/net/ethernet/alteon/acenic.c

··· 2059 2059 if (skb) { 2060 2060 dev->stats.tx_packets++; 2061 2061 dev->stats.tx_bytes += skb->len; 2062 - dev_kfree_skb_irq(skb); 2062 + dev_consume_skb_irq(skb); 2063 2063 info->skb = NULL; 2064 2064 } 2065 2065

+2 -1

drivers/net/ethernet/altera/altera_msgdma.c

··· 145 145 & 0xffff; 146 146 147 147 if (inuse) { /* Tx FIFO is not empty */ 148 - ready = priv->tx_prod - priv->tx_cons - inuse - 1; 148 + ready = max_t(int, 149 + priv->tx_prod - priv->tx_cons - inuse - 1, 0); 149 150 } else { 150 151 /* Check for buffered last packet */ 151 152 status = csrrd32(priv->tx_dma_csr, msgdma_csroffs(status));

+1 -1

drivers/net/ethernet/amd/amd8111e.c

··· 666 666 pci_unmap_single(lp->pci_dev, lp->tx_dma_addr[tx_index], 667 667 lp->tx_skbuff[tx_index]->len, 668 668 PCI_DMA_TODEVICE); 669 - dev_kfree_skb_irq (lp->tx_skbuff[tx_index]); 669 + dev_consume_skb_irq(lp->tx_skbuff[tx_index]); 670 670 lp->tx_skbuff[tx_index] = NULL; 671 671 lp->tx_dma_addr[tx_index] = 0; 672 672 }

+1 -1

drivers/net/ethernet/apple/bmac.c

··· 777 777 778 778 if (bp->tx_bufs[bp->tx_empty]) { 779 779 ++dev->stats.tx_packets; 780 - dev_kfree_skb_irq(bp->tx_bufs[bp->tx_empty]); 780 + dev_consume_skb_irq(bp->tx_bufs[bp->tx_empty]); 781 781 } 782 782 bp->tx_bufs[bp->tx_empty] = NULL; 783 783 bp->tx_fullup = 0;

+2 -2

drivers/net/ethernet/broadcom/b44.c

··· 638 638 bytes_compl += skb->len; 639 639 pkts_compl++; 640 640 641 - dev_kfree_skb_irq(skb); 641 + dev_consume_skb_irq(skb); 642 642 } 643 643 644 644 netdev_completed_queue(bp->dev, pkts_compl, bytes_compl); ··· 1012 1012 } 1013 1013 1014 1014 skb_copy_from_linear_data(skb, skb_put(bounce_skb, len), len); 1015 - dev_kfree_skb_any(skb); 1015 + dev_consume_skb_any(skb); 1016 1016 skb = bounce_skb; 1017 1017 } 1018 1018

+3

drivers/net/ethernet/cadence/macb.h

··· 643 643 #define MACB_CAPS_JUMBO 0x00000020 644 644 #define MACB_CAPS_GEM_HAS_PTP 0x00000040 645 645 #define MACB_CAPS_BD_RD_PREFETCH 0x00000080 646 + #define MACB_CAPS_NEEDS_RSTONUBR 0x00000100 646 647 #define MACB_CAPS_FIFO_MODE 0x10000000 647 648 #define MACB_CAPS_GIGABIT_MODE_AVAILABLE 0x20000000 648 649 #define MACB_CAPS_SG_DISABLED 0x40000000 ··· 1215 1214 1216 1215 int rx_bd_rd_prefetch; 1217 1216 int tx_bd_rd_prefetch; 1217 + 1218 + u32 rx_intr_mask; 1218 1219 }; 1219 1220 1220 1221 #ifdef CONFIG_MACB_USE_HWSTAMP

+17 -11

drivers/net/ethernet/cadence/macb_main.c

··· 56 56 /* level of occupied TX descriptors under which we wake up TX process */ 57 57 #define MACB_TX_WAKEUP_THRESH(bp) (3 * (bp)->tx_ring_size / 4) 58 58 59 - #define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(RXUBR) \ 60 - | MACB_BIT(ISR_ROVR)) 59 + #define MACB_RX_INT_FLAGS (MACB_BIT(RCOMP) | MACB_BIT(ISR_ROVR)) 61 60 #define MACB_TX_ERR_FLAGS (MACB_BIT(ISR_TUND) \ 62 61 | MACB_BIT(ISR_RLE) \ 63 62 | MACB_BIT(TXERR)) ··· 1269 1270 queue_writel(queue, ISR, MACB_BIT(RCOMP)); 1270 1271 napi_reschedule(napi); 1271 1272 } else { 1272 - queue_writel(queue, IER, MACB_RX_INT_FLAGS); 1273 + queue_writel(queue, IER, bp->rx_intr_mask); 1273 1274 } 1274 1275 } 1275 1276 ··· 1287 1288 u32 ctrl; 1288 1289 1289 1290 for (q = 0, queue = bp->queues; q < bp->num_queues; ++q, ++queue) { 1290 - queue_writel(queue, IDR, MACB_RX_INT_FLAGS | 1291 + queue_writel(queue, IDR, bp->rx_intr_mask | 1291 1292 MACB_TX_INT_FLAGS | 1292 1293 MACB_BIT(HRESP)); 1293 1294 } ··· 1317 1318 1318 1319 /* Enable interrupts */ 1319 1320 queue_writel(queue, IER, 1320 - MACB_RX_INT_FLAGS | 1321 + bp->rx_intr_mask | 1321 1322 MACB_TX_INT_FLAGS | 1322 1323 MACB_BIT(HRESP)); 1323 1324 } ··· 1371 1372 (unsigned int)(queue - bp->queues), 1372 1373 (unsigned long)status); 1373 1374 1374 - if (status & MACB_RX_INT_FLAGS) { 1375 + if (status & bp->rx_intr_mask) { 1375 1376 /* There's no point taking any more interrupts 1376 1377 * until we have processed the buffers. The 1377 1378 * scheduling call may fail if the poll routine 1378 1379 * is already scheduled, so disable interrupts 1379 1380 * now. 1380 1381 */ 1381 - queue_writel(queue, IDR, MACB_RX_INT_FLAGS); 1382 + queue_writel(queue, IDR, bp->rx_intr_mask); 1382 1383 if (bp->caps & MACB_CAPS_ISR_CLEAR_ON_WRITE) 1383 1384 queue_writel(queue, ISR, MACB_BIT(RCOMP)); 1384 1385 ··· 1411 1412 /* There is a hardware issue under heavy load where DMA can 1412 1413 * stop, this causes endless "used buffer descriptor read" 1413 1414 * interrupts but it can be cleared by re-enabling RX. See 1414 - * the at91 manual, section 41.3.1 or the Zynq manual 1415 - * section 16.7.4 for details. 1415 + * the at91rm9200 manual, section 41.3.1 or the Zynq manual 1416 + * section 16.7.4 for details. RXUBR is only enabled for 1417 + * these two versions. 1416 1418 */ 1417 1419 if (status & MACB_BIT(RXUBR)) { 1418 1420 ctrl = macb_readl(bp, NCR); ··· 2259 2259 2260 2260 /* Enable interrupts */ 2261 2261 queue_writel(queue, IER, 2262 - MACB_RX_INT_FLAGS | 2262 + bp->rx_intr_mask | 2263 2263 MACB_TX_INT_FLAGS | 2264 2264 MACB_BIT(HRESP)); 2265 2265 } ··· 3907 3907 }; 3908 3908 3909 3909 static const struct macb_config emac_config = { 3910 + .caps = MACB_CAPS_NEEDS_RSTONUBR, 3910 3911 .clk_init = at91ether_clk_init, 3911 3912 .init = at91ether_init, 3912 3913 }; ··· 3929 3928 }; 3930 3929 3931 3930 static const struct macb_config zynq_config = { 3932 - .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF, 3931 + .caps = MACB_CAPS_GIGABIT_MODE_AVAILABLE | MACB_CAPS_NO_GIGABIT_HALF | 3932 + MACB_CAPS_NEEDS_RSTONUBR, 3933 3933 .dma_burst_length = 16, 3934 3934 .clk_init = macb_clk_init, 3935 3935 .init = macb_init, ··· 4084 4082 bp->tx_bd_rd_prefetch = (2 << (val - 1)) * 4085 4083 macb_dma_desc_get_size(bp); 4086 4084 } 4085 + 4086 + bp->rx_intr_mask = MACB_RX_INT_FLAGS; 4087 + if (bp->caps & MACB_CAPS_NEEDS_RSTONUBR) 4088 + bp->rx_intr_mask |= MACB_BIT(RXUBR); 4087 4089 4088 4090 mac = of_get_mac_address(np); 4089 4091 if (mac) {

+5

drivers/net/ethernet/hisilicon/hns/hns_enet.c

··· 2418 2418 out_notify_fail: 2419 2419 (void)cancel_work_sync(&priv->service_task); 2420 2420 out_read_prop_fail: 2421 + /* safe for ACPI FW */ 2422 + of_node_put(to_of_node(priv->fwnode)); 2421 2423 free_netdev(ndev); 2422 2424 return ret; 2423 2425 } ··· 2448 2446 2449 2447 set_bit(NIC_STATE_REMOVING, &priv->state); 2450 2448 (void)cancel_work_sync(&priv->service_task); 2449 + 2450 + /* safe for ACPI FW */ 2451 + of_node_put(to_of_node(priv->fwnode)); 2451 2452 2452 2453 free_netdev(ndev); 2453 2454 return 0;

+9 -7

drivers/net/ethernet/hisilicon/hns/hns_ethtool.c

··· 1157 1157 */ 1158 1158 static int hns_nic_nway_reset(struct net_device *netdev) 1159 1159 { 1160 - int ret = 0; 1161 1160 struct phy_device *phy = netdev->phydev; 1162 1161 1163 - if (netif_running(netdev)) { 1164 - /* if autoneg is disabled, don't restart auto-negotiation */ 1165 - if (phy && phy->autoneg == AUTONEG_ENABLE) 1166 - ret = genphy_restart_aneg(phy); 1167 - } 1162 + if (!netif_running(netdev)) 1163 + return 0; 1168 1164 1169 - return ret; 1165 + if (!phy) 1166 + return -EOPNOTSUPP; 1167 + 1168 + if (phy->autoneg != AUTONEG_ENABLE) 1169 + return -EINVAL; 1170 + 1171 + return genphy_restart_aneg(phy); 1170 1172 } 1171 1173 1172 1174 static u32

+1 -1

drivers/net/ethernet/hisilicon/hns_mdio.c

··· 321 321 } 322 322 323 323 hns_mdio_cmd_write(mdio_dev, is_c45, 324 - MDIO_C45_WRITE_ADDR, phy_id, devad); 324 + MDIO_C45_READ, phy_id, devad); 325 325 } 326 326 327 327 /* Step 5: waitting for MDIO_COMMAND_REG 's mdio_start==0,*/

+1 -1

drivers/net/ethernet/i825xx/82596.c

··· 1310 1310 dev->stats.tx_aborted_errors++; 1311 1311 } 1312 1312 1313 - dev_kfree_skb_irq(skb); 1313 + dev_consume_skb_irq(skb); 1314 1314 1315 1315 tx_cmd->cmd.command = 0; /* Mark free */ 1316 1316 break;

+1 -1

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 949 949 if (params->rx_dim_enabled) 950 950 __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state); 951 951 952 - if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE) 952 + if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)) 953 953 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state); 954 954 955 955 return 0;

+23 -2

drivers/net/ethernet/mellanox/mlx5/core/en_rep.c

··· 1114 1114 struct mlx5e_priv *priv = netdev_priv(dev); 1115 1115 struct mlx5e_rep_priv *rpriv = priv->ppriv; 1116 1116 struct mlx5_eswitch_rep *rep = rpriv->rep; 1117 - int ret; 1117 + int ret, pf_num; 1118 1118 1119 - ret = snprintf(buf, len, "%d", rep->vport - 1); 1119 + ret = mlx5_lag_get_pf_num(priv->mdev, &pf_num); 1120 + if (ret) 1121 + return ret; 1122 + 1123 + if (rep->vport == FDB_UPLINK_VPORT) 1124 + ret = snprintf(buf, len, "p%d", pf_num); 1125 + else 1126 + ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1); 1127 + 1120 1128 if (ret >= len) 1121 1129 return -EOPNOTSUPP; 1122 1130 ··· 1272 1264 return 0; 1273 1265 } 1274 1266 1267 + static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos, 1268 + __be16 vlan_proto) 1269 + { 1270 + netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n"); 1271 + 1272 + if (vlan != 0) 1273 + return -EOPNOTSUPP; 1274 + 1275 + /* allow setting 0-vid for compatibility with libvirt */ 1276 + return 0; 1277 + } 1278 + 1275 1279 static const struct switchdev_ops mlx5e_rep_switchdev_ops = { 1276 1280 .switchdev_port_attr_get = mlx5e_attr_get, 1277 1281 }; ··· 1318 1298 .ndo_set_vf_rate = mlx5e_set_vf_rate, 1319 1299 .ndo_get_vf_config = mlx5e_get_vf_config, 1320 1300 .ndo_get_vf_stats = mlx5e_get_vf_stats, 1301 + .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan, 1321 1302 }; 1322 1303 1323 1304 bool mlx5e_eswitch_rep(struct net_device *netdev)

+8 -14

drivers/net/ethernet/mellanox/mlx5/core/eswitch.c

··· 1134 1134 int err = 0; 1135 1135 u8 *smac_v; 1136 1136 1137 - if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) { 1138 - mlx5_core_warn(esw->dev, 1139 - "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n", 1140 - vport->vport); 1141 - return -EPERM; 1142 - } 1143 - 1144 1137 esw_vport_cleanup_ingress_rules(esw, vport); 1145 1138 1146 1139 if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) { ··· 1721 1728 int vport_num; 1722 1729 int err; 1723 1730 1724 - if (!MLX5_ESWITCH_MANAGER(dev)) 1731 + if (!MLX5_VPORT_MANAGER(dev)) 1725 1732 return 0; 1726 1733 1727 1734 esw_info(dev, ··· 1790 1797 1791 1798 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) 1792 1799 { 1793 - if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev)) 1800 + if (!esw || !MLX5_VPORT_MANAGER(esw->dev)) 1794 1801 return; 1795 1802 1796 1803 esw_info(esw->dev, "cleanup\n"); ··· 1820 1827 mutex_lock(&esw->state_lock); 1821 1828 evport = &esw->vports[vport]; 1822 1829 1823 - if (evport->info.spoofchk && !is_valid_ether_addr(mac)) { 1830 + if (evport->info.spoofchk && !is_valid_ether_addr(mac)) 1824 1831 mlx5_core_warn(esw->dev, 1825 - "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n", 1832 + "Set invalid MAC while spoofchk is on, vport(%d)\n", 1826 1833 vport); 1827 - err = -EPERM; 1828 - goto unlock; 1829 - } 1830 1834 1831 1835 err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac); 1832 1836 if (err) { ··· 1969 1979 evport = &esw->vports[vport]; 1970 1980 pschk = evport->info.spoofchk; 1971 1981 evport->info.spoofchk = spoofchk; 1982 + if (pschk && !is_valid_ether_addr(evport->info.mac)) 1983 + mlx5_core_warn(esw->dev, 1984 + "Spoofchk in set while MAC is invalid, vport(%d)\n", 1985 + evport->vport); 1972 1986 if (evport->enabled && esw->mode == SRIOV_LEGACY) 1973 1987 err = esw_vport_ingress_config(esw, evport); 1974 1988 if (err)

+21

drivers/net/ethernet/mellanox/mlx5/core/lag.c

··· 616 616 } 617 617 } 618 618 619 + int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num) 620 + { 621 + struct mlx5_lag *ldev; 622 + int n; 623 + 624 + ldev = mlx5_lag_dev_get(dev); 625 + if (!ldev) { 626 + mlx5_core_warn(dev, "no lag device, can't get pf num\n"); 627 + return -EINVAL; 628 + } 629 + 630 + for (n = 0; n < MLX5_MAX_PORTS; n++) 631 + if (ldev->pf[n].dev == dev) { 632 + *pf_num = n; 633 + return 0; 634 + } 635 + 636 + mlx5_core_warn(dev, "wasn't able to locate pf in the lag device\n"); 637 + return -EINVAL; 638 + } 639 + 619 640 /* Must be called with intf_mutex held */ 620 641 void mlx5_lag_remove(struct mlx5_core_dev *dev) 621 642 {

+2

drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h

··· 187 187 MLX5_CAP_GEN(dev, lag_master); 188 188 } 189 189 190 + int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num); 191 + 190 192 void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); 191 193 void mlx5_lag_update(struct mlx5_core_dev *dev); 192 194

+3 -2

drivers/net/ethernet/mellanox/mlx5/core/qp.c

··· 44 44 mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn) 45 45 { 46 46 struct mlx5_core_rsc_common *common; 47 + unsigned long flags; 47 48 48 - spin_lock(&table->lock); 49 + spin_lock_irqsave(&table->lock, flags); 49 50 50 51 common = radix_tree_lookup(&table->tree, rsn); 51 52 if (common) 52 53 atomic_inc(&common->refcount); 53 54 54 - spin_unlock(&table->lock); 55 + spin_unlock_irqrestore(&table->lock, flags); 55 56 56 57 return common; 57 58 }

+4 -4

drivers/net/ethernet/qlogic/qed/qed_dev.c

··· 795 795 796 796 /* get pq index according to PQ_FLAGS */ 797 797 static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn, 798 - u32 pq_flags) 798 + unsigned long pq_flags) 799 799 { 800 800 struct qed_qm_info *qm_info = &p_hwfn->qm_info; 801 801 802 802 /* Can't have multiple flags set here */ 803 - if (bitmap_weight((unsigned long *)&pq_flags, 803 + if (bitmap_weight(&pq_flags, 804 804 sizeof(pq_flags) * BITS_PER_BYTE) > 1) { 805 - DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags); 805 + DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags); 806 806 goto err; 807 807 } 808 808 809 809 if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) { 810 - DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags); 810 + DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags); 811 811 goto err; 812 812 } 813 813

+11 -1

drivers/net/ethernet/qlogic/qed/qed_l2.c

··· 609 609 (!!(accept_filter & QED_ACCEPT_MCAST_MATCHED) && 610 610 !!(accept_filter & QED_ACCEPT_MCAST_UNMATCHED))); 611 611 612 + SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, 613 + (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && 614 + !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); 615 + 612 616 SET_FIELD(state, ETH_VPORT_TX_MODE_BCAST_ACCEPT_ALL, 613 617 !!(accept_filter & QED_ACCEPT_BCAST)); 614 618 ··· 746 742 if (rc) { 747 743 qed_sp_destroy_request(p_hwfn, p_ent); 748 744 return rc; 745 + } 746 + 747 + if (p_params->update_ctl_frame_check) { 748 + p_cmn->ctl_frame_mac_check_en = p_params->mac_chk_en; 749 + p_cmn->ctl_frame_ethtype_check_en = p_params->ethtype_chk_en; 749 750 } 750 751 751 752 /* Update mcast bins for VFs, PF doesn't use this functionality */ ··· 2697 2688 if (type == QED_FILTER_RX_MODE_TYPE_PROMISC) { 2698 2689 accept_flags.rx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | 2699 2690 QED_ACCEPT_MCAST_UNMATCHED; 2700 - accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; 2691 + accept_flags.tx_accept_filter |= QED_ACCEPT_UCAST_UNMATCHED | 2692 + QED_ACCEPT_MCAST_UNMATCHED; 2701 2693 } else if (type == QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC) { 2702 2694 accept_flags.rx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED; 2703 2695 accept_flags.tx_accept_filter |= QED_ACCEPT_MCAST_UNMATCHED;

+3

drivers/net/ethernet/qlogic/qed/qed_l2.h

··· 219 219 struct qed_rss_params *rss_params; 220 220 struct qed_filter_accept_flags accept_flags; 221 221 struct qed_sge_tpa_params *sge_tpa_params; 222 + u8 update_ctl_frame_check; 223 + u8 mac_chk_en; 224 + u8 ethtype_chk_en; 222 225 }; 223 226 224 227 int qed_sp_vport_update(struct qed_hwfn *p_hwfn,

+15 -5

drivers/net/ethernet/qlogic/qed/qed_ll2.c

··· 2451 2451 { 2452 2452 struct qed_ll2_tx_pkt_info pkt; 2453 2453 const skb_frag_t *frag; 2454 + u8 flags = 0, nr_frags; 2454 2455 int rc = -EINVAL, i; 2455 2456 dma_addr_t mapping; 2456 2457 u16 vlan = 0; 2457 - u8 flags = 0; 2458 2458 2459 2459 if (unlikely(skb->ip_summed != CHECKSUM_NONE)) { 2460 2460 DP_INFO(cdev, "Cannot transmit a checksummed packet\n"); 2461 2461 return -EINVAL; 2462 2462 } 2463 2463 2464 - if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { 2464 + /* Cache number of fragments from SKB since SKB may be freed by 2465 + * the completion routine after calling qed_ll2_prepare_tx_packet() 2466 + */ 2467 + nr_frags = skb_shinfo(skb)->nr_frags; 2468 + 2469 + if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) { 2465 2470 DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n", 2466 - 1 + skb_shinfo(skb)->nr_frags); 2471 + 1 + nr_frags); 2467 2472 return -EINVAL; 2468 2473 } 2469 2474 ··· 2490 2485 } 2491 2486 2492 2487 memset(&pkt, 0, sizeof(pkt)); 2493 - pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags; 2488 + pkt.num_of_bds = 1 + nr_frags; 2494 2489 pkt.vlan = vlan; 2495 2490 pkt.bd_flags = flags; 2496 2491 pkt.tx_dest = QED_LL2_TX_DEST_NW; ··· 2501 2496 test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags)) 2502 2497 pkt.remove_stag = true; 2503 2498 2499 + /* qed_ll2_prepare_tx_packet() may actually send the packet if 2500 + * there are no fragments in the skb and subsequently the completion 2501 + * routine may run and free the SKB, so no dereferencing the SKB 2502 + * beyond this point unless skb has any fragments. 2503 + */ 2504 2504 rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle, 2505 2505 &pkt, 1); 2506 2506 if (rc) 2507 2507 goto err; 2508 2508 2509 - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { 2509 + for (i = 0; i < nr_frags; i++) { 2510 2510 frag = &skb_shinfo(skb)->frags[i]; 2511 2511 2512 2512 mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,

+8 -2

drivers/net/ethernet/qlogic/qed/qed_sriov.c

··· 1969 1969 params.vport_id = vf->vport_id; 1970 1970 params.max_buffers_per_cqe = start->max_buffers_per_cqe; 1971 1971 params.mtu = vf->mtu; 1972 - params.check_mac = true; 1972 + 1973 + /* Non trusted VFs should enable control frame filtering */ 1974 + params.check_mac = !vf->p_vf_info.is_trusted_configured; 1973 1975 1974 1976 rc = qed_sp_eth_vport_start(p_hwfn, &params); 1975 1977 if (rc) { ··· 5139 5137 params.opaque_fid = vf->opaque_fid; 5140 5138 params.vport_id = vf->vport_id; 5141 5139 5140 + params.update_ctl_frame_check = 1; 5141 + params.mac_chk_en = !vf_info->is_trusted_configured; 5142 + 5142 5143 if (vf_info->rx_accept_mode & mask) { 5143 5144 flags->update_rx_mode_config = 1; 5144 5145 flags->rx_accept_filter = vf_info->rx_accept_mode; ··· 5159 5154 } 5160 5155 5161 5156 if (flags->update_rx_mode_config || 5162 - flags->update_tx_mode_config) 5157 + flags->update_tx_mode_config || 5158 + params.update_ctl_frame_check) 5163 5159 qed_sp_vport_update(hwfn, &params, 5164 5160 QED_SPQ_MODE_EBLOCK, NULL); 5165 5161 }

+10

drivers/net/ethernet/qlogic/qed/qed_vf.c

··· 261 261 struct pfvf_acquire_resp_tlv *resp = &p_iov->pf2vf_reply->acquire_resp; 262 262 struct pf_vf_pfdev_info *pfdev_info = &resp->pfdev_info; 263 263 struct vf_pf_resc_request *p_resc; 264 + u8 retry_cnt = VF_ACQUIRE_THRESH; 264 265 bool resources_acquired = false; 265 266 struct vfpf_acquire_tlv *req; 266 267 int rc = 0, attempts = 0; ··· 315 314 316 315 /* send acquire request */ 317 316 rc = qed_send_msg2pf(p_hwfn, &resp->hdr.status, sizeof(*resp)); 317 + 318 + /* Re-try acquire in case of vf-pf hw channel timeout */ 319 + if (retry_cnt && rc == -EBUSY) { 320 + DP_VERBOSE(p_hwfn, QED_MSG_IOV, 321 + "VF retrying to acquire due to VPC timeout\n"); 322 + retry_cnt--; 323 + continue; 324 + } 325 + 318 326 if (rc) 319 327 goto exit; 320 328

+1 -1

drivers/net/ethernet/realtek/8139cp.c

··· 691 691 } 692 692 bytes_compl += skb->len; 693 693 pkts_compl++; 694 - dev_kfree_skb_irq(skb); 694 + dev_consume_skb_irq(skb); 695 695 } 696 696 697 697 cp->tx_skb[tx_tail] = NULL;

+3 -1

drivers/net/ethernet/stmicro/stmmac/dwmac-rk.c

··· 1342 1342 } 1343 1343 1344 1344 ret = phy_power_on(bsp_priv, true); 1345 - if (ret) 1345 + if (ret) { 1346 + gmac_clk_enable(bsp_priv, false); 1346 1347 return ret; 1348 + } 1347 1349 1348 1350 pm_runtime_enable(dev); 1349 1351 pm_runtime_get_sync(dev);

+1 -1

drivers/net/ethernet/ti/cpmac.c

··· 608 608 netdev_dbg(dev, "sent 0x%p, len=%d\n", 609 609 desc->skb, desc->skb->len); 610 610 611 - dev_kfree_skb_irq(desc->skb); 611 + dev_consume_skb_irq(desc->skb); 612 612 desc->skb = NULL; 613 613 if (__netif_subqueue_stopped(dev, queue)) 614 614 netif_wake_subqueue(dev, queue);

+2 -1

drivers/vhost/net.c

··· 1337 1337 n->vqs[i].rx_ring = NULL; 1338 1338 vhost_net_buf_init(&n->vqs[i].rxq); 1339 1339 } 1340 - vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); 1340 + vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX, 1341 + UIO_MAXIOV + VHOST_NET_BATCH); 1341 1342 1342 1343 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, EPOLLOUT, dev); 1343 1344 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, EPOLLIN, dev);

+1 -1

drivers/vhost/scsi.c

··· 1627 1627 vqs[i] = &vs->vqs[i].vq; 1628 1628 vs->vqs[i].vq.handle_kick = vhost_scsi_handle_kick; 1629 1629 } 1630 - vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ); 1630 + vhost_dev_init(&vs->dev, vqs, VHOST_SCSI_MAX_VQ, UIO_MAXIOV); 1631 1631 1632 1632 vhost_scsi_init_inflight(vs, NULL); 1633 1633

+4 -3

drivers/vhost/vhost.c

··· 390 390 vq->indirect = kmalloc_array(UIO_MAXIOV, 391 391 sizeof(*vq->indirect), 392 392 GFP_KERNEL); 393 - vq->log = kmalloc_array(UIO_MAXIOV, sizeof(*vq->log), 393 + vq->log = kmalloc_array(dev->iov_limit, sizeof(*vq->log), 394 394 GFP_KERNEL); 395 - vq->heads = kmalloc_array(UIO_MAXIOV, sizeof(*vq->heads), 395 + vq->heads = kmalloc_array(dev->iov_limit, sizeof(*vq->heads), 396 396 GFP_KERNEL); 397 397 if (!vq->indirect || !vq->log || !vq->heads) 398 398 goto err_nomem; ··· 414 414 } 415 415 416 416 void vhost_dev_init(struct vhost_dev *dev, 417 - struct vhost_virtqueue **vqs, int nvqs) 417 + struct vhost_virtqueue **vqs, int nvqs, int iov_limit) 418 418 { 419 419 struct vhost_virtqueue *vq; 420 420 int i; ··· 427 427 dev->iotlb = NULL; 428 428 dev->mm = NULL; 429 429 dev->worker = NULL; 430 + dev->iov_limit = iov_limit; 430 431 init_llist_head(&dev->work_list); 431 432 init_waitqueue_head(&dev->wait); 432 433 INIT_LIST_HEAD(&dev->read_list);

+3 -1

drivers/vhost/vhost.h

··· 170 170 struct list_head read_list; 171 171 struct list_head pending_list; 172 172 wait_queue_head_t wait; 173 + int iov_limit; 173 174 }; 174 175 175 - void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs); 176 + void vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, 177 + int nvqs, int iov_limit); 176 178 long vhost_dev_set_owner(struct vhost_dev *dev); 177 179 bool vhost_dev_has_owner(struct vhost_dev *dev); 178 180 long vhost_dev_check_owner(struct vhost_dev *);

+1 -1

drivers/vhost/vsock.c

··· 531 531 vsock->vqs[VSOCK_VQ_TX].handle_kick = vhost_vsock_handle_tx_kick; 532 532 vsock->vqs[VSOCK_VQ_RX].handle_kick = vhost_vsock_handle_rx_kick; 533 533 534 - vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs)); 534 + vhost_dev_init(&vsock->dev, vqs, ARRAY_SIZE(vsock->vqs), UIO_MAXIOV); 535 535 536 536 file->private_data = vsock; 537 537 spin_lock_init(&vsock->send_pkt_list_lock);

+1

include/linux/interrupt.h

··· 260 260 /** 261 261 * struct irq_affinity_desc - Interrupt affinity descriptor 262 262 * @mask: cpumask to hold the affinity assignment 263 + * @is_managed: 1 if the interrupt is managed internally 263 264 */ 264 265 struct irq_affinity_desc { 265 266 struct cpumask mask;

+5 -1

include/linux/sched/wake_q.h

··· 24 24 * called near the end of a function. Otherwise, the list can be 25 25 * re-initialized for later re-use by wake_q_init(). 26 26 * 27 - * Note that this can cause spurious wakeups. schedule() callers 27 + * NOTE that this can cause spurious wakeups. schedule() callers 28 28 * must ensure the call is done inside a loop, confirming that the 29 29 * wakeup condition has in fact occurred. 30 + * 31 + * NOTE that there is no guarantee the wakeup will happen any later than the 32 + * wake_q_add() location. Therefore task must be ready to be woken at the 33 + * location of the wake_q_add(). 30 34 */ 31 35 32 36 #include <linux/sched.h>

+2

include/net/tls.h

··· 120 120 struct scatterlist sg_aead_out[2]; 121 121 122 122 char aad_space[TLS_AAD_SPACE_SIZE]; 123 + u8 iv_data[TLS_CIPHER_AES_GCM_128_IV_SIZE + 124 + TLS_CIPHER_AES_GCM_128_SALT_SIZE]; 123 125 struct aead_request aead_req; 124 126 u8 aead_req_ctx[]; 125 127 };

+1 -1

kernel/exit.c

··· 307 307 * MB (A) MB (B) 308 308 * [L] cond [L] tsk 309 309 */ 310 - smp_rmb(); /* (B) */ 310 + smp_mb(); /* (B) */ 311 311 312 312 /* 313 313 * Avoid using task_rcu_dereference() magic as long as we are careful,

+8 -5

kernel/futex.c

··· 1452 1452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n")) 1453 1453 return; 1454 1454 1455 - /* 1456 - * Queue the task for later wakeup for after we've released 1457 - * the hb->lock. wake_q_add() grabs reference to p. 1458 - */ 1459 - wake_q_add(wake_q, p); 1455 + get_task_struct(p); 1460 1456 __unqueue_futex(q); 1461 1457 /* 1462 1458 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL ··· 1462 1466 * plist_del in __unqueue_futex(). 1463 1467 */ 1464 1468 smp_store_release(&q->lock_ptr, NULL); 1469 + 1470 + /* 1471 + * Queue the task for later wakeup for after we've released 1472 + * the hb->lock. wake_q_add() grabs reference to p. 1473 + */ 1474 + wake_q_add(wake_q, p); 1475 + put_task_struct(p); 1465 1476 } 1466 1477 1467 1478 /*

+1 -1

kernel/irq/irqdesc.c

··· 457 457 458 458 /* Validate affinity mask(s) */ 459 459 if (affinity) { 460 - for (i = 0; i < cnt; i++, i++) { 460 + for (i = 0; i < cnt; i++) { 461 461 if (cpumask_empty(&affinity[i].mask)) 462 462 return -EINVAL; 463 463 }

+3

kernel/irq/manage.c

··· 393 393 } 394 394 395 395 cpumask_and(&mask, cpu_online_mask, set); 396 + if (cpumask_empty(&mask)) 397 + cpumask_copy(&mask, cpu_online_mask); 398 + 396 399 if (node != NUMA_NO_NODE) { 397 400 const struct cpumask *nodemask = cpumask_of_node(node); 398 401

+9 -2

kernel/locking/rwsem-xadd.c

··· 198 198 woken++; 199 199 tsk = waiter->task; 200 200 201 - wake_q_add(wake_q, tsk); 201 + get_task_struct(tsk); 202 202 list_del(&waiter->list); 203 203 /* 204 - * Ensure that the last operation is setting the reader 204 + * Ensure calling get_task_struct() before setting the reader 205 205 * waiter to nil such that rwsem_down_read_failed() cannot 206 206 * race with do_exit() by always holding a reference count 207 207 * to the task to wakeup. 208 208 */ 209 209 smp_store_release(&waiter->task, NULL); 210 + /* 211 + * Ensure issuing the wakeup (either by us or someone else) 212 + * after setting the reader waiter to nil. 213 + */ 214 + wake_q_add(wake_q, tsk); 215 + /* wake_q_add() already take the task ref */ 216 + put_task_struct(tsk); 210 217 } 211 218 212 219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;

+16 -3

kernel/sched/core.c

··· 396 396 #endif 397 397 #endif 398 398 399 + /** 400 + * wake_q_add() - queue a wakeup for 'later' waking. 401 + * @head: the wake_q_head to add @task to 402 + * @task: the task to queue for 'later' wakeup 403 + * 404 + * Queue a task for later wakeup, most likely by the wake_up_q() call in the 405 + * same context, _HOWEVER_ this is not guaranteed, the wakeup can come 406 + * instantly. 407 + * 408 + * This function must be used as-if it were wake_up_process(); IOW the task 409 + * must be ready to be woken at this location. 410 + */ 399 411 void wake_q_add(struct wake_q_head *head, struct task_struct *task) 400 412 { 401 413 struct wake_q_node *node = &task->wake_q; ··· 417 405 * its already queued (either by us or someone else) and will get the 418 406 * wakeup due to that. 419 407 * 420 - * This cmpxchg() executes a full barrier, which pairs with the full 421 - * barrier executed by the wakeup in wake_up_q(). 408 + * In order to ensure that a pending wakeup will observe our pending 409 + * state, even in the failed case, an explicit smp_mb() must be used. 422 410 */ 423 - if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL)) 411 + smp_mb__before_atomic(); 412 + if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL)) 424 413 return; 425 414 426 415 get_task_struct(task);

+1

kernel/time/posix-cpu-timers.c

··· 685 685 * set up the signal and overrun bookkeeping. 686 686 */ 687 687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval); 688 + timer->it_interval = ns_to_ktime(timer->it.cpu.incr); 688 689 689 690 /* 690 691 * This acts as a modification timestamp for the timer,

-12

mm/page_alloc.c

··· 5701 5701 cond_resched(); 5702 5702 } 5703 5703 } 5704 - #ifdef CONFIG_SPARSEMEM 5705 - /* 5706 - * If the zone does not span the rest of the section then 5707 - * we should at least initialize those pages. Otherwise we 5708 - * could blow up on a poisoned page in some paths which depend 5709 - * on full sections being initialized (e.g. memory hotplug). 5710 - */ 5711 - while (end_pfn % PAGES_PER_SECTION) { 5712 - __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid); 5713 - end_pfn++; 5714 - } 5715 - #endif 5716 5704 } 5717 5705 5718 5706 #ifdef CONFIG_ZONE_DEVICE

+6 -3

net/bridge/netfilter/ebtables.c

··· 2293 2293 2294 2294 xt_compat_lock(NFPROTO_BRIDGE); 2295 2295 2296 - ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); 2297 - if (ret < 0) 2298 - goto out_unlock; 2296 + if (tmp.nentries) { 2297 + ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries); 2298 + if (ret < 0) 2299 + goto out_unlock; 2300 + } 2301 + 2299 2302 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state); 2300 2303 if (ret < 0) 2301 2304 goto out_unlock;

+3

net/core/dev.c

··· 8712 8712 set_bit(__LINK_STATE_PRESENT, &dev->state); 8713 8713 set_bit(__LINK_STATE_START, &dev->state); 8714 8714 8715 + /* napi_busy_loop stats accounting wants this */ 8716 + dev_net_set(dev, &init_net); 8717 + 8715 8718 /* Note : We dont allocate pcpu_refcnt for dummy devices, 8716 8719 * because users of this 'device' dont need to change 8717 8720 * its refcount.

+1 -1

net/decnet/dn_dev.c

··· 56 56 #include <net/dn_neigh.h> 57 57 #include <net/dn_fib.h> 58 58 59 - #define DN_IFREQ_SIZE (sizeof(struct ifreq) - sizeof(struct sockaddr) + sizeof(struct sockaddr_dn)) 59 + #define DN_IFREQ_SIZE (offsetof(struct ifreq, ifr_ifru) + sizeof(struct sockaddr_dn)) 60 60 61 61 static char dn_rt_all_end_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x04,0x00,0x00}; 62 62 static char dn_rt_all_rt_mcast[ETH_ALEN] = {0xAB,0x00,0x00,0x03,0x00,0x00};

+50

net/ipv4/ip_vti.c

··· 74 74 return 0; 75 75 } 76 76 77 + static int vti_input_ipip(struct sk_buff *skb, int nexthdr, __be32 spi, 78 + int encap_type) 79 + { 80 + struct ip_tunnel *tunnel; 81 + const struct iphdr *iph = ip_hdr(skb); 82 + struct net *net = dev_net(skb->dev); 83 + struct ip_tunnel_net *itn = net_generic(net, vti_net_id); 84 + 85 + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, 86 + iph->saddr, iph->daddr, 0); 87 + if (tunnel) { 88 + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) 89 + goto drop; 90 + 91 + XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = tunnel; 92 + 93 + skb->dev = tunnel->dev; 94 + 95 + return xfrm_input(skb, nexthdr, spi, encap_type); 96 + } 97 + 98 + return -EINVAL; 99 + drop: 100 + kfree_skb(skb); 101 + return 0; 102 + } 103 + 77 104 static int vti_rcv(struct sk_buff *skb) 78 105 { 79 106 XFRM_SPI_SKB_CB(skb)->family = AF_INET; 80 107 XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); 81 108 82 109 return vti_input(skb, ip_hdr(skb)->protocol, 0, 0); 110 + } 111 + 112 + static int vti_rcv_ipip(struct sk_buff *skb) 113 + { 114 + XFRM_SPI_SKB_CB(skb)->family = AF_INET; 115 + XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr); 116 + 117 + return vti_input_ipip(skb, ip_hdr(skb)->protocol, ip_hdr(skb)->saddr, 0); 83 118 } 84 119 85 120 static int vti_rcv_cb(struct sk_buff *skb, int err) ··· 470 435 .priority = 100, 471 436 }; 472 437 438 + static struct xfrm_tunnel ipip_handler __read_mostly = { 439 + .handler = vti_rcv_ipip, 440 + .err_handler = vti4_err, 441 + .priority = 0, 442 + }; 443 + 473 444 static int __net_init vti_init_net(struct net *net) 474 445 { 475 446 int err; ··· 644 603 if (err < 0) 645 604 goto xfrm_proto_comp_failed; 646 605 606 + msg = "ipip tunnel"; 607 + err = xfrm4_tunnel_register(&ipip_handler, AF_INET); 608 + if (err < 0) { 609 + pr_info("%s: cant't register tunnel\n",__func__); 610 + goto xfrm_tunnel_failed; 611 + } 612 + 647 613 msg = "netlink interface"; 648 614 err = rtnl_link_register(&vti_link_ops); 649 615 if (err < 0) ··· 660 612 661 613 rtnl_link_failed: 662 614 xfrm4_protocol_deregister(&vti_ipcomp4_protocol, IPPROTO_COMP); 615 + xfrm_tunnel_failed: 616 + xfrm4_tunnel_deregister(&ipip_handler, AF_INET); 663 617 xfrm_proto_comp_failed: 664 618 xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); 665 619 xfrm_proto_ah_failed:

+1 -1

net/ipv4/netfilter/ipt_CLUSTERIP.c

··· 846 846 847 847 static void clusterip_net_exit(struct net *net) 848 848 { 849 + #ifdef CONFIG_PROC_FS 849 850 struct clusterip_net *cn = clusterip_pernet(net); 850 851 851 - #ifdef CONFIG_PROC_FS 852 852 mutex_lock(&cn->mutex); 853 853 proc_remove(cn->procdir); 854 854 cn->procdir = NULL;

+3 -4

net/ipv6/ip6mr.c

··· 1516 1516 continue; 1517 1517 rhltable_remove(&mrt->mfc_hash, &c->mnode, ip6mr_rht_params); 1518 1518 list_del_rcu(&c->list); 1519 + call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1520 + FIB_EVENT_ENTRY_DEL, 1521 + (struct mfc6_cache *)c, mrt->id); 1519 1522 mr6_netlink_event(mrt, (struct mfc6_cache *)c, RTM_DELROUTE); 1520 1523 mr_cache_put(c); 1521 1524 } ··· 1527 1524 spin_lock_bh(&mfc_unres_lock); 1528 1525 list_for_each_entry_safe(c, tmp, &mrt->mfc_unres_queue, list) { 1529 1526 list_del(&c->list); 1530 - call_ip6mr_mfc_entry_notifiers(read_pnet(&mrt->net), 1531 - FIB_EVENT_ENTRY_DEL, 1532 - (struct mfc6_cache *)c, 1533 - mrt->id); 1534 1527 mr6_netlink_event(mrt, (struct mfc6_cache *)c, 1535 1528 RTM_DELROUTE); 1536 1529 ip6mr_destroy_unres(mrt, (struct mfc6_cache *)c);

+12

net/netfilter/ipvs/ip_vs_ctl.c

··· 2221 2221 u->udp_timeout); 2222 2222 2223 2223 #ifdef CONFIG_IP_VS_PROTO_TCP 2224 + if (u->tcp_timeout < 0 || u->tcp_timeout > (INT_MAX / HZ) || 2225 + u->tcp_fin_timeout < 0 || u->tcp_fin_timeout > (INT_MAX / HZ)) { 2226 + return -EINVAL; 2227 + } 2228 + #endif 2229 + 2230 + #ifdef CONFIG_IP_VS_PROTO_UDP 2231 + if (u->udp_timeout < 0 || u->udp_timeout > (INT_MAX / HZ)) 2232 + return -EINVAL; 2233 + #endif 2234 + 2235 + #ifdef CONFIG_IP_VS_PROTO_TCP 2224 2236 if (u->tcp_timeout) { 2225 2237 pd = ip_vs_proto_data_get(ipvs, IPPROTO_TCP); 2226 2238 pd->timeout_table[IP_VS_TCP_S_ESTABLISHED]

+4

net/netfilter/nfnetlink_osf.c

··· 66 66 int ttl_check, 67 67 struct nf_osf_hdr_ctx *ctx) 68 68 { 69 + const __u8 *optpinit = ctx->optp; 69 70 unsigned int check_WSS = 0; 70 71 int fmatch = FMATCH_WRONG; 71 72 int foptsize, optnum; ··· 155 154 break; 156 155 } 157 156 } 157 + 158 + if (fmatch != FMATCH_OK) 159 + ctx->optp = optpinit; 158 160 159 161 return fmatch == FMATCH_OK; 160 162 }

+142 -47

net/netfilter/nft_compat.c

··· 22 22 #include <linux/netfilter_bridge/ebtables.h> 23 23 #include <linux/netfilter_arp/arp_tables.h> 24 24 #include <net/netfilter/nf_tables.h> 25 + #include <net/netns/generic.h> 25 26 26 27 struct nft_xt { 27 28 struct list_head head; 28 29 struct nft_expr_ops ops; 29 - unsigned int refcnt; 30 + refcount_t refcnt; 31 + 32 + /* used only when transaction mutex is locked */ 33 + unsigned int listcnt; 30 34 31 35 /* Unlike other expressions, ops doesn't have static storage duration. 32 36 * nft core assumes they do. We use kfree_rcu so that nft core can ··· 47 43 void *info; 48 44 }; 49 45 46 + struct nft_compat_net { 47 + struct list_head nft_target_list; 48 + struct list_head nft_match_list; 49 + }; 50 + 51 + static unsigned int nft_compat_net_id __read_mostly; 52 + static struct nft_expr_type nft_match_type; 53 + static struct nft_expr_type nft_target_type; 54 + 55 + static struct nft_compat_net *nft_compat_pernet(struct net *net) 56 + { 57 + return net_generic(net, nft_compat_net_id); 58 + } 59 + 50 60 static bool nft_xt_put(struct nft_xt *xt) 51 61 { 52 - if (--xt->refcnt == 0) { 53 - list_del(&xt->head); 62 + if (refcount_dec_and_test(&xt->refcnt)) { 63 + WARN_ON_ONCE(!list_empty(&xt->head)); 54 64 kfree_rcu(xt, rcu_head); 55 65 return true; 56 66 } ··· 291 273 return -EINVAL; 292 274 293 275 nft_xt = container_of(expr->ops, struct nft_xt, ops); 294 - nft_xt->refcnt++; 276 + refcount_inc(&nft_xt->refcnt); 295 277 return 0; 296 278 } 297 279 ··· 504 486 return ret; 505 487 506 488 nft_xt = container_of(expr->ops, struct nft_xt, ops); 507 - nft_xt->refcnt++; 489 + refcount_inc(&nft_xt->refcnt); 508 490 return 0; 509 491 } 510 492 ··· 556 538 nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) 557 539 { 558 540 __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); 541 + } 542 + 543 + static void nft_compat_activate(const struct nft_ctx *ctx, 544 + const struct nft_expr *expr, 545 + struct list_head *h) 546 + { 547 + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); 548 + 549 + if (xt->listcnt == 0) 550 + list_add(&xt->head, h); 551 + 552 + xt->listcnt++; 553 + } 554 + 555 + static void nft_compat_activate_mt(const struct nft_ctx *ctx, 556 + const struct nft_expr *expr) 557 + { 558 + struct nft_compat_net *cn = nft_compat_pernet(ctx->net); 559 + 560 + nft_compat_activate(ctx, expr, &cn->nft_match_list); 561 + } 562 + 563 + static void nft_compat_activate_tg(const struct nft_ctx *ctx, 564 + const struct nft_expr *expr) 565 + { 566 + struct nft_compat_net *cn = nft_compat_pernet(ctx->net); 567 + 568 + nft_compat_activate(ctx, expr, &cn->nft_target_list); 569 + } 570 + 571 + static void nft_compat_deactivate(const struct nft_ctx *ctx, 572 + const struct nft_expr *expr) 573 + { 574 + struct nft_xt *xt = container_of(expr->ops, struct nft_xt, ops); 575 + 576 + if (--xt->listcnt == 0) 577 + list_del_init(&xt->head); 559 578 } 560 579 561 580 static void ··· 789 734 .cb = nfnl_nft_compat_cb, 790 735 }; 791 736 792 - static LIST_HEAD(nft_match_list); 793 - 794 - static struct nft_expr_type nft_match_type; 795 - 796 737 static bool nft_match_cmp(const struct xt_match *match, 797 738 const char *name, u32 rev, u32 family) 798 739 { ··· 800 749 nft_match_select_ops(const struct nft_ctx *ctx, 801 750 const struct nlattr * const tb[]) 802 751 { 752 + struct nft_compat_net *cn; 803 753 struct nft_xt *nft_match; 804 754 struct xt_match *match; 805 755 unsigned int matchsize; ··· 817 765 rev = ntohl(nla_get_be32(tb[NFTA_MATCH_REV])); 818 766 family = ctx->family; 819 767 768 + cn = nft_compat_pernet(ctx->net); 769 + 820 770 /* Re-use the existing match if it's already loaded. */ 821 - list_for_each_entry(nft_match, &nft_match_list, head) { 771 + list_for_each_entry(nft_match, &cn->nft_match_list, head) { 822 772 struct xt_match *match = nft_match->ops.data; 823 773 824 774 if (nft_match_cmp(match, mt_name, rev, family)) ··· 843 789 goto err; 844 790 } 845 791 846 - nft_match->refcnt = 0; 792 + refcount_set(&nft_match->refcnt, 0); 847 793 nft_match->ops.type = &nft_match_type; 848 794 nft_match->ops.eval = nft_match_eval; 849 795 nft_match->ops.init = nft_match_init; 850 796 nft_match->ops.destroy = nft_match_destroy; 797 + nft_match->ops.activate = nft_compat_activate_mt; 798 + nft_match->ops.deactivate = nft_compat_deactivate; 851 799 nft_match->ops.dump = nft_match_dump; 852 800 nft_match->ops.validate = nft_match_validate; 853 801 nft_match->ops.data = match; ··· 866 810 867 811 nft_match->ops.size = matchsize; 868 812 869 - list_add(&nft_match->head, &nft_match_list); 813 + nft_match->listcnt = 1; 814 + list_add(&nft_match->head, &cn->nft_match_list); 870 815 871 816 return &nft_match->ops; 872 817 err: ··· 883 826 .owner = THIS_MODULE, 884 827 }; 885 828 886 - static LIST_HEAD(nft_target_list); 887 - 888 - static struct nft_expr_type nft_target_type; 889 - 890 829 static bool nft_target_cmp(const struct xt_target *tg, 891 830 const char *name, u32 rev, u32 family) 892 831 { ··· 894 841 nft_target_select_ops(const struct nft_ctx *ctx, 895 842 const struct nlattr * const tb[]) 896 843 { 844 + struct nft_compat_net *cn; 897 845 struct nft_xt *nft_target; 898 846 struct xt_target *target; 899 847 char *tg_name; ··· 915 861 strcmp(tg_name, "standard") == 0) 916 862 return ERR_PTR(-EINVAL); 917 863 864 + cn = nft_compat_pernet(ctx->net); 918 865 /* Re-use the existing target if it's already loaded. */ 919 - list_for_each_entry(nft_target, &nft_target_list, head) { 866 + list_for_each_entry(nft_target, &cn->nft_target_list, head) { 920 867 struct xt_target *target = nft_target->ops.data; 921 868 922 869 if (!target->target) ··· 948 893 goto err; 949 894 } 950 895 951 - nft_target->refcnt = 0; 896 + refcount_set(&nft_target->refcnt, 0); 952 897 nft_target->ops.type = &nft_target_type; 953 898 nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); 954 899 nft_target->ops.init = nft_target_init; 955 900 nft_target->ops.destroy = nft_target_destroy; 901 + nft_target->ops.activate = nft_compat_activate_tg; 902 + nft_target->ops.deactivate = nft_compat_deactivate; 956 903 nft_target->ops.dump = nft_target_dump; 957 904 nft_target->ops.validate = nft_target_validate; 958 905 nft_target->ops.data = target; ··· 964 907 else 965 908 nft_target->ops.eval = nft_target_eval_xt; 966 909 967 - list_add(&nft_target->head, &nft_target_list); 910 + nft_target->listcnt = 1; 911 + list_add(&nft_target->head, &cn->nft_target_list); 968 912 969 913 return &nft_target->ops; 970 914 err: ··· 981 923 .owner = THIS_MODULE, 982 924 }; 983 925 926 + static int __net_init nft_compat_init_net(struct net *net) 927 + { 928 + struct nft_compat_net *cn = nft_compat_pernet(net); 929 + 930 + INIT_LIST_HEAD(&cn->nft_target_list); 931 + INIT_LIST_HEAD(&cn->nft_match_list); 932 + 933 + return 0; 934 + } 935 + 936 + static void __net_exit nft_compat_exit_net(struct net *net) 937 + { 938 + struct nft_compat_net *cn = nft_compat_pernet(net); 939 + struct nft_xt *xt, *next; 940 + 941 + if (list_empty(&cn->nft_match_list) && 942 + list_empty(&cn->nft_target_list)) 943 + return; 944 + 945 + /* If there was an error that caused nft_xt expr to not be initialized 946 + * fully and noone else requested the same expression later, the lists 947 + * contain 0-refcount entries that still hold module reference. 948 + * 949 + * Clean them here. 950 + */ 951 + mutex_lock(&net->nft.commit_mutex); 952 + list_for_each_entry_safe(xt, next, &cn->nft_target_list, head) { 953 + struct xt_target *target = xt->ops.data; 954 + 955 + list_del_init(&xt->head); 956 + 957 + if (refcount_read(&xt->refcnt)) 958 + continue; 959 + module_put(target->me); 960 + kfree(xt); 961 + } 962 + 963 + list_for_each_entry_safe(xt, next, &cn->nft_match_list, head) { 964 + struct xt_match *match = xt->ops.data; 965 + 966 + list_del_init(&xt->head); 967 + 968 + if (refcount_read(&xt->refcnt)) 969 + continue; 970 + module_put(match->me); 971 + kfree(xt); 972 + } 973 + mutex_unlock(&net->nft.commit_mutex); 974 + } 975 + 976 + static struct pernet_operations nft_compat_net_ops = { 977 + .init = nft_compat_init_net, 978 + .exit = nft_compat_exit_net, 979 + .id = &nft_compat_net_id, 980 + .size = sizeof(struct nft_compat_net), 981 + }; 982 + 984 983 static int __init nft_compat_module_init(void) 985 984 { 986 985 int ret; 987 986 987 + ret = register_pernet_subsys(&nft_compat_net_ops); 988 + if (ret < 0) 989 + goto err_target; 990 + 988 991 ret = nft_register_expr(&nft_match_type); 989 992 if (ret < 0) 990 - return ret; 993 + goto err_pernet; 991 994 992 995 ret = nft_register_expr(&nft_target_type); 993 996 if (ret < 0) ··· 1061 942 } 1062 943 1063 944 return ret; 1064 - 1065 945 err_target: 1066 946 nft_unregister_expr(&nft_target_type); 1067 947 err_match: 1068 948 nft_unregister_expr(&nft_match_type); 949 + err_pernet: 950 + unregister_pernet_subsys(&nft_compat_net_ops); 1069 951 return ret; 1070 952 } 1071 953 1072 954 static void __exit nft_compat_module_exit(void) 1073 955 { 1074 - struct nft_xt *xt, *next; 1075 - 1076 - /* list should be empty here, it can be non-empty only in case there 1077 - * was an error that caused nft_xt expr to not be initialized fully 1078 - * and noone else requested the same expression later. 1079 - * 1080 - * In this case, the lists contain 0-refcount entries that still 1081 - * hold module reference. 1082 - */ 1083 - list_for_each_entry_safe(xt, next, &nft_target_list, head) { 1084 - struct xt_target *target = xt->ops.data; 1085 - 1086 - if (WARN_ON_ONCE(xt->refcnt)) 1087 - continue; 1088 - module_put(target->me); 1089 - kfree(xt); 1090 - } 1091 - 1092 - list_for_each_entry_safe(xt, next, &nft_match_list, head) { 1093 - struct xt_match *match = xt->ops.data; 1094 - 1095 - if (WARN_ON_ONCE(xt->refcnt)) 1096 - continue; 1097 - module_put(match->me); 1098 - kfree(xt); 1099 - } 1100 956 nfnetlink_subsys_unregister(&nfnl_compat_subsys); 1101 957 nft_unregister_expr(&nft_target_type); 1102 958 nft_unregister_expr(&nft_match_type); 959 + unregister_pernet_subsys(&nft_compat_net_ops); 1103 960 } 1104 961 1105 962 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_NFT_COMPAT);

+10 -10

net/netrom/nr_timer.c

··· 52 52 { 53 53 struct nr_sock *nr = nr_sk(sk); 54 54 55 - mod_timer(&nr->t1timer, jiffies + nr->t1); 55 + sk_reset_timer(sk, &nr->t1timer, jiffies + nr->t1); 56 56 } 57 57 58 58 void nr_start_t2timer(struct sock *sk) 59 59 { 60 60 struct nr_sock *nr = nr_sk(sk); 61 61 62 - mod_timer(&nr->t2timer, jiffies + nr->t2); 62 + sk_reset_timer(sk, &nr->t2timer, jiffies + nr->t2); 63 63 } 64 64 65 65 void nr_start_t4timer(struct sock *sk) 66 66 { 67 67 struct nr_sock *nr = nr_sk(sk); 68 68 69 - mod_timer(&nr->t4timer, jiffies + nr->t4); 69 + sk_reset_timer(sk, &nr->t4timer, jiffies + nr->t4); 70 70 } 71 71 72 72 void nr_start_idletimer(struct sock *sk) ··· 74 74 struct nr_sock *nr = nr_sk(sk); 75 75 76 76 if (nr->idle > 0) 77 - mod_timer(&nr->idletimer, jiffies + nr->idle); 77 + sk_reset_timer(sk, &nr->idletimer, jiffies + nr->idle); 78 78 } 79 79 80 80 void nr_start_heartbeat(struct sock *sk) 81 81 { 82 - mod_timer(&sk->sk_timer, jiffies + 5 * HZ); 82 + sk_reset_timer(sk, &sk->sk_timer, jiffies + 5 * HZ); 83 83 } 84 84 85 85 void nr_stop_t1timer(struct sock *sk) 86 86 { 87 - del_timer(&nr_sk(sk)->t1timer); 87 + sk_stop_timer(sk, &nr_sk(sk)->t1timer); 88 88 } 89 89 90 90 void nr_stop_t2timer(struct sock *sk) 91 91 { 92 - del_timer(&nr_sk(sk)->t2timer); 92 + sk_stop_timer(sk, &nr_sk(sk)->t2timer); 93 93 } 94 94 95 95 void nr_stop_t4timer(struct sock *sk) 96 96 { 97 - del_timer(&nr_sk(sk)->t4timer); 97 + sk_stop_timer(sk, &nr_sk(sk)->t4timer); 98 98 } 99 99 100 100 void nr_stop_idletimer(struct sock *sk) 101 101 { 102 - del_timer(&nr_sk(sk)->idletimer); 102 + sk_stop_timer(sk, &nr_sk(sk)->idletimer); 103 103 } 104 104 105 105 void nr_stop_heartbeat(struct sock *sk) 106 106 { 107 - del_timer(&sk->sk_timer); 107 + sk_stop_timer(sk, &sk->sk_timer); 108 108 } 109 109 110 110 int nr_t1timer_running(struct sock *sk)

+5

net/rose/rose_route.c

··· 850 850 851 851 /* 852 852 * Route a frame to an appropriate AX.25 connection. 853 + * A NULL ax25_cb indicates an internally generated frame. 853 854 */ 854 855 int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25) 855 856 { ··· 868 867 869 868 if (skb->len < ROSE_MIN_LEN) 870 869 return res; 870 + 871 + if (!ax25) 872 + return rose_loopback_queue(skb, NULL); 873 + 871 874 frametype = skb->data[2]; 872 875 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF); 873 876 if (frametype == ROSE_CALL_REQUEST &&

+5 -1

net/tls/tls_sw.c

··· 447 447 struct scatterlist *sge = sk_msg_elem(msg_en, start); 448 448 int rc; 449 449 450 + memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data)); 451 + 450 452 sge->offset += tls_ctx->tx.prepend_size; 451 453 sge->length -= tls_ctx->tx.prepend_size; 452 454 ··· 458 456 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE); 459 457 aead_request_set_crypt(aead_req, rec->sg_aead_in, 460 458 rec->sg_aead_out, 461 - data_len, tls_ctx->tx.iv); 459 + data_len, rec->iv_data); 462 460 463 461 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG, 464 462 tls_encrypt_done, sk); ··· 1903 1901 if (atomic_read(&ctx->encrypt_pending)) 1904 1902 crypto_wait_req(-EINPROGRESS, &ctx->async_wait); 1905 1903 1904 + release_sock(sk); 1906 1905 cancel_delayed_work_sync(&ctx->tx_work.work); 1906 + lock_sock(sk); 1907 1907 1908 1908 /* Tx whatever records we can transmit and abandon the rest */ 1909 1909 tls_tx_records(sk, -1);

+33 -30

net/xfrm/xfrm_policy.c

··· 680 680 mutex_unlock(&hash_resize_mutex); 681 681 } 682 682 683 - static void xfrm_hash_reset_inexact_table(struct net *net) 684 - { 685 - struct xfrm_pol_inexact_bin *b; 686 - 687 - lockdep_assert_held(&net->xfrm.xfrm_policy_lock); 688 - 689 - list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins) 690 - INIT_HLIST_HEAD(&b->hhead); 691 - } 692 - 693 683 /* Make sure *pol can be inserted into fastbin. 694 684 * Useful to check that later insert requests will be sucessful 695 685 * (provided xfrm_policy_lock is held throughout). ··· 823 833 u16 family) 824 834 { 825 835 unsigned int matched_s, matched_d; 826 - struct hlist_node *newpos = NULL; 827 836 struct xfrm_policy *policy, *p; 828 837 829 838 matched_s = 0; 830 839 matched_d = 0; 831 840 832 841 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { 842 + struct hlist_node *newpos = NULL; 833 843 bool matches_s, matches_d; 834 844 835 845 if (!policy->bydst_reinsert) ··· 839 849 840 850 policy->bydst_reinsert = false; 841 851 hlist_for_each_entry(p, &n->hhead, bydst) { 842 - if (policy->priority >= p->priority) 852 + if (policy->priority > p->priority) 853 + newpos = &p->bydst; 854 + else if (policy->priority == p->priority && 855 + policy->pos > p->pos) 843 856 newpos = &p->bydst; 844 857 else 845 858 break; 846 859 } 847 860 848 861 if (newpos) 849 - hlist_add_behind(&policy->bydst, newpos); 862 + hlist_add_behind_rcu(&policy->bydst, newpos); 850 863 else 851 - hlist_add_head(&policy->bydst, &n->hhead); 864 + hlist_add_head_rcu(&policy->bydst, &n->hhead); 852 865 853 866 /* paranoia checks follow. 854 867 * Check that the reinserted policy matches at least ··· 886 893 struct rb_root *new, 887 894 u16 family) 888 895 { 889 - struct rb_node **p, *parent = NULL; 890 896 struct xfrm_pol_inexact_node *node; 897 + struct rb_node **p, *parent; 891 898 892 899 /* we should not have another subtree here */ 893 900 WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root)); 894 - 901 + restart: 902 + parent = NULL; 895 903 p = &new->rb_node; 896 904 while (*p) { 897 905 u8 prefixlen; ··· 912 918 } else { 913 919 struct xfrm_policy *tmp; 914 920 915 - hlist_for_each_entry(tmp, &node->hhead, bydst) 921 + hlist_for_each_entry(tmp, &n->hhead, bydst) { 916 922 tmp->bydst_reinsert = true; 917 - hlist_for_each_entry(tmp, &n->hhead, bydst) 918 - tmp->bydst_reinsert = true; 923 + hlist_del_rcu(&tmp->bydst); 924 + } 919 925 920 - INIT_HLIST_HEAD(&node->hhead); 921 926 xfrm_policy_inexact_list_reinsert(net, node, family); 922 927 923 928 if (node->prefixlen == n->prefixlen) { ··· 928 935 kfree_rcu(n, rcu); 929 936 n = node; 930 937 n->prefixlen = prefixlen; 931 - *p = new->rb_node; 932 - parent = NULL; 938 + goto restart; 933 939 } 934 940 } 935 941 ··· 957 965 family); 958 966 } 959 967 960 - hlist_for_each_entry(tmp, &v->hhead, bydst) 968 + hlist_for_each_entry(tmp, &v->hhead, bydst) { 961 969 tmp->bydst_reinsert = true; 962 - hlist_for_each_entry(tmp, &n->hhead, bydst) 963 - tmp->bydst_reinsert = true; 970 + hlist_del_rcu(&tmp->bydst); 971 + } 964 972 965 - INIT_HLIST_HEAD(&n->hhead); 966 973 xfrm_policy_inexact_list_reinsert(net, n, family); 967 974 } 968 975 ··· 1226 1235 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); 1227 1236 1228 1237 spin_lock_bh(&net->xfrm.xfrm_policy_lock); 1238 + write_seqcount_begin(&xfrm_policy_hash_generation); 1229 1239 1230 1240 /* make sure that we can insert the indirect policies again before 1231 1241 * we start with destructive action. ··· 1270 1278 } 1271 1279 1272 1280 /* reset the bydst and inexact table in all directions */ 1273 - xfrm_hash_reset_inexact_table(net); 1274 - 1275 1281 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { 1276 - INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); 1282 + struct hlist_node *n; 1283 + 1284 + hlist_for_each_entry_safe(policy, n, 1285 + &net->xfrm.policy_inexact[dir], 1286 + bydst_inexact_list) 1287 + hlist_del_init(&policy->bydst_inexact_list); 1288 + 1277 1289 hmask = net->xfrm.policy_bydst[dir].hmask; 1278 1290 odst = net->xfrm.policy_bydst[dir].table; 1279 1291 for (i = hmask; i >= 0; i--) ··· 1309 1313 newpos = NULL; 1310 1314 chain = policy_hash_bysel(net, &policy->selector, 1311 1315 policy->family, dir); 1316 + 1317 + hlist_del_rcu(&policy->bydst); 1318 + 1312 1319 if (!chain) { 1313 1320 void *p = xfrm_policy_inexact_insert(policy, dir, 0); 1314 1321 ··· 1333 1334 1334 1335 out_unlock: 1335 1336 __xfrm_policy_inexact_flush(net); 1337 + write_seqcount_end(&xfrm_policy_hash_generation); 1336 1338 spin_unlock_bh(&net->xfrm.xfrm_policy_lock); 1337 1339 1338 1340 mutex_unlock(&hash_resize_mutex); ··· 2600 2600 dst_copy_metrics(dst1, dst); 2601 2601 2602 2602 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { 2603 - __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); 2603 + __u32 mark = 0; 2604 + 2605 + if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m) 2606 + mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]); 2604 2607 2605 2608 family = xfrm[i]->props.family; 2606 2609 dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,

+9 -4

net/xfrm/xfrm_user.c

··· 1488 1488 if (!ut[i].family) 1489 1489 ut[i].family = family; 1490 1490 1491 - if ((ut[i].mode == XFRM_MODE_TRANSPORT) && 1492 - (ut[i].family != prev_family)) 1493 - return -EINVAL; 1494 - 1491 + switch (ut[i].mode) { 1492 + case XFRM_MODE_TUNNEL: 1493 + case XFRM_MODE_BEET: 1494 + break; 1495 + default: 1496 + if (ut[i].family != prev_family) 1497 + return -EINVAL; 1498 + break; 1499 + } 1495 1500 if (ut[i].mode >= XFRM_MODE_MAX) 1496 1501 return -EINVAL; 1497 1502

+131 -22

tools/testing/selftests/net/xfrm_policy.sh

··· 28 28 SPI1=0x1 29 29 SPI2=0x2 30 30 31 + do_esp_policy() { 32 + local ns=$1 33 + local me=$2 34 + local remote=$3 35 + local lnet=$4 36 + local rnet=$5 37 + 38 + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 39 + ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow 40 + # to fwd decrypted packets after esp processing: 41 + ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow 42 + } 43 + 31 44 do_esp() { 32 45 local ns=$1 33 46 local me=$2 ··· 53 40 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet 54 41 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet 55 42 56 - # to encrypt packets as they go out (includes forwarded packets that need encapsulation) 57 - ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow 58 - # to fwd decrypted packets after esp processing: 59 - ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow 43 + do_esp_policy $ns $me $remote $lnet $rnet 44 + } 45 + 46 + # add policies with different netmasks, to make sure kernel carries 47 + # the policies contained within new netmask over when search tree is 48 + # re-built. 49 + # peer netns that are supposed to be encapsulated via esp have addresses 50 + # in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively. 51 + # 52 + # Adding a policy for '10.0.1.0/23' will make it necessary to 53 + # alter the prefix of 10.0.1.0 subnet. 54 + # In case new prefix overlaps with existing node, the node and all 55 + # policies it carries need to be merged with the existing one(s). 56 + # 57 + # Do that here. 58 + do_overlap() 59 + { 60 + local ns=$1 61 + 62 + # adds new nodes to tree (neither network exists yet in policy database). 63 + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block 64 + 65 + # adds a new node in the 10.0.0.0/24 tree (dst node exists). 66 + ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block 67 + 68 + # adds a 10.2.0.0/23 node, but for different dst. 69 + ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block 70 + 71 + # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd. 72 + # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23. 73 + # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node 74 + # also has to be merged too, including source-sorted subtrees. 75 + # old: 76 + # 10.0.0.0/24 (node 1 in dst tree of the bin) 77 + # 10.1.0.0/24 (node in src tree of dst node 1) 78 + # 10.2.0.0/24 (node in src tree of dst node 1) 79 + # 10.0.1.0/24 (node 2 in dst tree of the bin) 80 + # 10.0.2.0/24 (node in src tree of dst node 2) 81 + # 10.2.0.0/24 (node in src tree of dst node 2) 82 + # 83 + # The next 'policy add' adds dst '10.0.0.0/23', which means 84 + # that dst node 1 and dst node 2 have to be merged including 85 + # the sub-tree. As no duplicates are allowed, policies in 86 + # the two '10.0.2.0/24' are also merged. 87 + # 88 + # after the 'add', internal search tree should look like this: 89 + # 10.0.0.0/23 (node in dst tree of bin) 90 + # 10.0.2.0/24 (node in src tree of dst node) 91 + # 10.1.0.0/24 (node in src tree of dst node) 92 + # 10.2.0.0/24 (node in src tree of dst node) 93 + # 94 + # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23. 95 + ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block 60 96 } 61 97 62 98 do_esp_policy_get_check() { ··· 222 160 return $lret 223 161 } 224 162 163 + check_exceptions() 164 + { 165 + logpostfix="$1" 166 + local lret=0 167 + 168 + # ping to .254 should be excluded from the tunnel (exception is in place). 169 + check_xfrm 0 254 170 + if [ $? -ne 0 ]; then 171 + echo "FAIL: expected ping to .254 to fail ($logpostfix)" 172 + lret=1 173 + else 174 + echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)" 175 + fi 176 + 177 + # ping to .253 should use use ipsec due to direct policy exception. 178 + check_xfrm 1 253 179 + if [ $? -ne 0 ]; then 180 + echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)" 181 + lret=1 182 + else 183 + echo "PASS: direct policy matches ($logpostfix)" 184 + fi 185 + 186 + # ping to .2 should use ipsec. 187 + check_xfrm 1 2 188 + if [ $? -ne 0 ]; then 189 + echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)" 190 + lret=1 191 + else 192 + echo "PASS: policy matches ($logpostfix)" 193 + fi 194 + 195 + return $lret 196 + } 197 + 225 198 #check for needed privileges 226 199 if [ "$(id -u)" -ne 0 ];then 227 200 echo "SKIP: Need root privileges" ··· 367 270 do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 368 271 do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 369 272 370 - # ping to .254 should now be excluded from the tunnel 371 - check_xfrm 0 254 273 + check_exceptions "exceptions" 372 274 if [ $? -ne 0 ]; then 373 - echo "FAIL: expected ping to .254 to fail" 374 275 ret=1 375 - else 376 - echo "PASS: ping to .254 bypassed ipsec tunnel" 377 276 fi 378 277 379 - # ping to .253 should use use ipsec due to direct policy exception. 380 - check_xfrm 1 253 278 + # insert block policies with adjacent/overlapping netmasks 279 + do_overlap ns3 280 + 281 + check_exceptions "exceptions and block policies" 381 282 if [ $? -ne 0 ]; then 382 - echo "FAIL: expected ping to .253 to use ipsec tunnel" 383 283 ret=1 384 - else 385 - echo "PASS: direct policy matches" 386 284 fi 387 285 388 - # ping to .2 should use ipsec. 389 - check_xfrm 1 2 390 - if [ $? -ne 0 ]; then 391 - echo "FAIL: expected ping to .2 to use ipsec tunnel" 392 - ret=1 393 - else 394 - echo "PASS: policy matches" 395 - fi 286 + for n in ns3 ns4;do 287 + ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 288 + sleep $((RANDOM%5)) 289 + done 290 + 291 + check_exceptions "exceptions and block policies after hresh changes" 292 + 293 + # full flush of policy db, check everything gets freed incl. internal meta data 294 + ip -net ns3 xfrm policy flush 295 + 296 + do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 297 + do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 298 + 299 + # move inexact policies to hash table 300 + ip -net ns3 xfrm policy set hthresh4 16 16 301 + 302 + sleep $((RANDOM%5)) 303 + check_exceptions "exceptions and block policies after hthresh change in ns3" 304 + 305 + # restore original hthresh settings -- move policies back to tables 306 + for n in ns3 ns4;do 307 + ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 308 + sleep $((RANDOM%5)) 309 + done 310 + check_exceptions "exceptions and block policies after hresh change to normal" 396 311 397 312 for i in 1 2 3 4;do ip netns del ns$i;done 398 313

+31 -10

tools/testing/selftests/x86/protection_keys.c

··· 1133 1133 pkey_assert(err); 1134 1134 } 1135 1135 1136 + void become_child(void) 1137 + { 1138 + pid_t forkret; 1139 + 1140 + forkret = fork(); 1141 + pkey_assert(forkret >= 0); 1142 + dprintf3("[%d] fork() ret: %d\n", getpid(), forkret); 1143 + 1144 + if (!forkret) { 1145 + /* in the child */ 1146 + return; 1147 + } 1148 + exit(0); 1149 + } 1150 + 1136 1151 /* Assumes that all pkeys other than 'pkey' are unallocated */ 1137 1152 void test_pkey_alloc_exhaust(int *ptr, u16 pkey) 1138 1153 { ··· 1156 1141 int nr_allocated_pkeys = 0; 1157 1142 int i; 1158 1143 1159 - for (i = 0; i < NR_PKEYS*2; i++) { 1144 + for (i = 0; i < NR_PKEYS*3; i++) { 1160 1145 int new_pkey; 1161 1146 dprintf1("%s() alloc loop: %d\n", __func__, i); 1162 1147 new_pkey = alloc_pkey(); ··· 1167 1152 if ((new_pkey == -1) && (errno == ENOSPC)) { 1168 1153 dprintf2("%s() failed to allocate pkey after %d tries\n", 1169 1154 __func__, nr_allocated_pkeys); 1170 - break; 1155 + } else { 1156 + /* 1157 + * Ensure the number of successes never 1158 + * exceeds the number of keys supported 1159 + * in the hardware. 1160 + */ 1161 + pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1162 + allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1171 1163 } 1172 - pkey_assert(nr_allocated_pkeys < NR_PKEYS); 1173 - allocated_pkeys[nr_allocated_pkeys++] = new_pkey; 1164 + 1165 + /* 1166 + * Make sure that allocation state is properly 1167 + * preserved across fork(). 1168 + */ 1169 + if (i == NR_PKEYS*2) 1170 + become_child(); 1174 1171 } 1175 1172 1176 1173 dprintf3("%s()::%d\n", __func__, __LINE__); 1177 - 1178 - /* 1179 - * ensure it did not reach the end of the loop without 1180 - * failure: 1181 - */ 1182 - pkey_assert(i < NR_PKEYS*2); 1183 1174 1184 1175 /* 1185 1176 * There are 16 pkeys supported in hardware. Three are