···1668916689S: Maintained1669016690F: drivers/media/tuners/tuner-xc2028.*16691166911669216692+XDP (eXpress Data Path)1669316693+M: Alexei Starovoitov <ast@kernel.org>1669416694+M: Daniel Borkmann <daniel@iogearbox.net>1669516695+M: David S. Miller <davem@davemloft.net>1669616696+M: Jakub Kicinski <jakub.kicinski@netronome.com>1669716697+M: Jesper Dangaard Brouer <hawk@kernel.org>1669816698+M: John Fastabend <john.fastabend@gmail.com>1669916699+L: netdev@vger.kernel.org1670016700+L: xdp-newbies@vger.kernel.org1670116701+S: Supported1670216702+F: net/core/xdp.c1670316703+F: include/net/xdp.h1670416704+F: kernel/bpf/devmap.c1670516705+F: kernel/bpf/cpumap.c1670616706+F: include/trace/events/xdp.h1670716707+K: xdp1670816708+N: xdp1670916709+1669216710XDP SOCKETS (AF_XDP)1669316711M: Björn Töpel <bjorn.topel@intel.com>1669416712M: Magnus Karlsson <magnus.karlsson@intel.com>
···361361362362 /* Need to switch before accessing the thread stack. */363363 SWITCH_TO_KERNEL_CR3 scratch_reg=%rdi364364- movq %rsp, %rdi364364+ /* In the Xen PV case we already run on the thread stack. */365365+ ALTERNATIVE "movq %rsp, %rdi", "jmp .Lint80_keep_stack", X86_FEATURE_XENPV365366 movq PER_CPU_VAR(cpu_current_top_of_stack), %rsp366367367368 pushq 6*8(%rdi) /* regs->ss */···371370 pushq 3*8(%rdi) /* regs->cs */372371 pushq 2*8(%rdi) /* regs->ip */373372 pushq 1*8(%rdi) /* regs->orig_ax */374374-375373 pushq (%rdi) /* pt_regs->di */374374+.Lint80_keep_stack:375375+376376 pushq %rsi /* pt_regs->si */377377 xorl %esi, %esi /* nospec si */378378 pushq %rdx /* pt_regs->dx */
+18
arch/x86/include/asm/mmu_context.h
···178178179179void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk);180180181181+/*182182+ * Init a new mm. Used on mm copies, like at fork()183183+ * and on mm's that are brand-new, like at execve().184184+ */181185static inline int init_new_context(struct task_struct *tsk,182186 struct mm_struct *mm)183187{···232228} while (0)233229#endif234230231231+static inline void arch_dup_pkeys(struct mm_struct *oldmm,232232+ struct mm_struct *mm)233233+{234234+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS235235+ if (!cpu_feature_enabled(X86_FEATURE_OSPKE))236236+ return;237237+238238+ /* Duplicate the oldmm pkey state in mm: */239239+ mm->context.pkey_allocation_map = oldmm->context.pkey_allocation_map;240240+ mm->context.execute_only_pkey = oldmm->context.execute_only_pkey;241241+#endif242242+}243243+235244static inline int arch_dup_mmap(struct mm_struct *oldmm, struct mm_struct *mm)236245{246246+ arch_dup_pkeys(oldmm, mm);237247 paravirt_arch_dup_mmap(oldmm, mm);238248 return ldt_dup_context(oldmm, mm);239249}
+1
arch/x86/kernel/crash.c
···470470471471 kbuf.memsz = kbuf.bufsz;472472 kbuf.buf_align = ELF_CORE_HEADER_ALIGN;473473+ kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;473474 ret = kexec_add_buffer(&kbuf);474475 if (ret) {475476 vfree((void *)image->arch.elf_headers);
···297297298298__setup("tsc=", tsc_setup);299299300300-#define MAX_RETRIES 5301301-#define SMI_TRESHOLD 50000300300+#define MAX_RETRIES 5301301+#define TSC_DEFAULT_THRESHOLD 0x20000302302303303/*304304- * Read TSC and the reference counters. Take care of SMI disturbance304304+ * Read TSC and the reference counters. Take care of any disturbances305305 */306306static u64 tsc_read_refs(u64 *p, int hpet)307307{308308 u64 t1, t2;309309+ u64 thresh = tsc_khz ? tsc_khz >> 5 : TSC_DEFAULT_THRESHOLD;309310 int i;310311311312 for (i = 0; i < MAX_RETRIES; i++) {···316315 else317316 *p = acpi_pm_read_early();318317 t2 = get_cycles();319319- if ((t2 - t1) < SMI_TRESHOLD)318318+ if ((t2 - t1) < thresh)320319 return t2;321320 }322321 return ULLONG_MAX;···704703 * zero. In each wait loop iteration we read the TSC and check705704 * the delta to the previous read. We keep track of the min706705 * and max values of that delta. The delta is mostly defined707707- * by the IO time of the PIT access, so we can detect when a708708- * SMI/SMM disturbance happened between the two reads. If the706706+ * by the IO time of the PIT access, so we can detect when707707+ * any disturbance happened between the two reads. If the709708 * maximum time is significantly larger than the minimum time,710709 * then we discard the result and have another try.711710 *712711 * 2) Reference counter. If available we use the HPET or the713712 * PMTIMER as a reference to check the sanity of that value.714713 * We use separate TSC readouts and check inside of the715715- * reference read for a SMI/SMM disturbance. We dicard714714+ * reference read for any possible disturbance. We dicard716715 * disturbed values here as well. We do that around the PIT717716 * calibration delay loop as we have to wait for a certain718717 * amount of time anyway.···745744 if (ref1 == ref2)746745 continue;747746748748- /* Check, whether the sampling was disturbed by an SMI */747747+ /* Check, whether the sampling was disturbed */749748 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX)750749 continue;751750···12691268 */12701269static void tsc_refine_calibration_work(struct work_struct *work)12711270{12721272- static u64 tsc_start = -1, ref_start;12711271+ static u64 tsc_start = ULLONG_MAX, ref_start;12731272 static int hpet;12741273 u64 tsc_stop, ref_stop, delta;12751274 unsigned long freq;···12841283 * delayed the first time we expire. So set the workqueue12851284 * again once we know timers are working.12861285 */12871287- if (tsc_start == -1) {12861286+ if (tsc_start == ULLONG_MAX) {12871287+restart:12881288 /*12891289 * Only set hpet once, to avoid mixing hardware12901290 * if the hpet becomes enabled later.12911291 */12921292 hpet = is_hpet_enabled();12931293- schedule_delayed_work(&tsc_irqwork, HZ);12941293 tsc_start = tsc_read_refs(&ref_start, hpet);12941294+ schedule_delayed_work(&tsc_irqwork, HZ);12951295 return;12961296 }12971297···13021300 if (ref_start == ref_stop)13031301 goto out;1304130213051305- /* Check, whether the sampling was disturbed by an SMI */13061306- if (tsc_start == ULLONG_MAX || tsc_stop == ULLONG_MAX)13071307- goto out;13031303+ /* Check, whether the sampling was disturbed */13041304+ if (tsc_stop == ULLONG_MAX)13051305+ goto restart;1308130613091307 delta = tsc_stop - tsc_start;13101308 delta *= 1000000LL;
···10831083 /* Create a fresh bio_list for all subordinate requests */10841084 bio_list_on_stack[1] = bio_list_on_stack[0];10851085 bio_list_init(&bio_list_on_stack[0]);10861086-10871087- /*10881088- * Since we're recursing into make_request here, ensure10891089- * that we mark this bio as already having entered the queue.10901090- * If not, and the queue is going away, we can get stuck10911091- * forever on waiting for the queue reference to drop. But10921092- * that will never happen, as we're already holding a10931093- * reference to it.10941094- */10951095- bio_set_flag(bio, BIO_QUEUE_ENTERED);10961086 ret = q->make_request_fn(q, bio);10971097- bio_clear_flag(bio, BIO_QUEUE_ENTERED);1098108710991088 /* sort new bios into those for a lower level11001089 * and those for the same level
+10
block/blk-merge.c
···272272 /* there isn't chance to merge the splitted bio */273273 split->bi_opf |= REQ_NOMERGE;274274275275+ /*276276+ * Since we're recursing into make_request here, ensure277277+ * that we mark this bio as already having entered the queue.278278+ * If not, and the queue is going away, we can get stuck279279+ * forever on waiting for the queue reference to drop. But280280+ * that will never happen, as we're already holding a281281+ * reference to it.282282+ */283283+ bio_set_flag(*bio, BIO_QUEUE_ENTERED);284284+275285 bio_chain(split, *bio);276286 trace_block_split(q, split, (*bio)->bi_iter.bi_sector);277287 generic_make_request(*bio);
···949949 if (params->rx_dim_enabled)950950 __set_bit(MLX5E_RQ_STATE_AM, &c->rq.state);951951952952- if (params->pflags & MLX5E_PFLAG_RX_NO_CSUM_COMPLETE)952952+ if (MLX5E_GET_PFLAG(params, MLX5E_PFLAG_RX_NO_CSUM_COMPLETE))953953 __set_bit(MLX5E_RQ_STATE_NO_CSUM_COMPLETE, &c->rq.state);954954955955 return 0;
+23-2
drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
···11141114 struct mlx5e_priv *priv = netdev_priv(dev);11151115 struct mlx5e_rep_priv *rpriv = priv->ppriv;11161116 struct mlx5_eswitch_rep *rep = rpriv->rep;11171117- int ret;11171117+ int ret, pf_num;1118111811191119- ret = snprintf(buf, len, "%d", rep->vport - 1);11191119+ ret = mlx5_lag_get_pf_num(priv->mdev, &pf_num);11201120+ if (ret)11211121+ return ret;11221122+11231123+ if (rep->vport == FDB_UPLINK_VPORT)11241124+ ret = snprintf(buf, len, "p%d", pf_num);11251125+ else11261126+ ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1);11271127+11201128 if (ret >= len)11211129 return -EOPNOTSUPP;11221130···12721264 return 0;12731265}1274126612671267+static int mlx5e_uplink_rep_set_vf_vlan(struct net_device *dev, int vf, u16 vlan, u8 qos,12681268+ __be16 vlan_proto)12691269+{12701270+ netdev_warn_once(dev, "legacy vf vlan setting isn't supported in switchdev mode\n");12711271+12721272+ if (vlan != 0)12731273+ return -EOPNOTSUPP;12741274+12751275+ /* allow setting 0-vid for compatibility with libvirt */12761276+ return 0;12771277+}12781278+12751279static const struct switchdev_ops mlx5e_rep_switchdev_ops = {12761280 .switchdev_port_attr_get = mlx5e_attr_get,12771281};···13181298 .ndo_set_vf_rate = mlx5e_set_vf_rate,13191299 .ndo_get_vf_config = mlx5e_get_vf_config,13201300 .ndo_get_vf_stats = mlx5e_get_vf_stats,13011301+ .ndo_set_vf_vlan = mlx5e_uplink_rep_set_vf_vlan,13211302};1322130313231304bool mlx5e_eswitch_rep(struct net_device *netdev)
+8-14
drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
···11341134 int err = 0;11351135 u8 *smac_v;1136113611371137- if (vport->info.spoofchk && !is_valid_ether_addr(vport->info.mac)) {11381138- mlx5_core_warn(esw->dev,11391139- "vport[%d] configure ingress rules failed, illegal mac with spoofchk\n",11401140- vport->vport);11411141- return -EPERM;11421142- }11431143-11441137 esw_vport_cleanup_ingress_rules(esw, vport);1145113811461139 if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {···17211728 int vport_num;17221729 int err;1723173017241724- if (!MLX5_ESWITCH_MANAGER(dev))17311731+ if (!MLX5_VPORT_MANAGER(dev))17251732 return 0;1726173317271734 esw_info(dev,···1790179717911798void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)17921799{17931793- if (!esw || !MLX5_ESWITCH_MANAGER(esw->dev))18001800+ if (!esw || !MLX5_VPORT_MANAGER(esw->dev))17941801 return;1795180217961803 esw_info(esw->dev, "cleanup\n");···18201827 mutex_lock(&esw->state_lock);18211828 evport = &esw->vports[vport];1822182918231823- if (evport->info.spoofchk && !is_valid_ether_addr(mac)) {18301830+ if (evport->info.spoofchk && !is_valid_ether_addr(mac))18241831 mlx5_core_warn(esw->dev,18251825- "MAC invalidation is not allowed when spoofchk is on, vport(%d)\n",18321832+ "Set invalid MAC while spoofchk is on, vport(%d)\n",18261833 vport);18271827- err = -EPERM;18281828- goto unlock;18291829- }1830183418311835 err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);18321836 if (err) {···19691979 evport = &esw->vports[vport];19701980 pschk = evport->info.spoofchk;19711981 evport->info.spoofchk = spoofchk;19821982+ if (pschk && !is_valid_ether_addr(evport->info.mac))19831983+ mlx5_core_warn(esw->dev,19841984+ "Spoofchk in set while MAC is invalid, vport(%d)\n",19851985+ evport->vport);19721986 if (evport->enabled && esw->mode == SRIOV_LEGACY)19731987 err = esw_vport_ingress_config(esw, evport);19741988 if (err)
+21
drivers/net/ethernet/mellanox/mlx5/core/lag.c
···616616 }617617}618618619619+int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num)620620+{621621+ struct mlx5_lag *ldev;622622+ int n;623623+624624+ ldev = mlx5_lag_dev_get(dev);625625+ if (!ldev) {626626+ mlx5_core_warn(dev, "no lag device, can't get pf num\n");627627+ return -EINVAL;628628+ }629629+630630+ for (n = 0; n < MLX5_MAX_PORTS; n++)631631+ if (ldev->pf[n].dev == dev) {632632+ *pf_num = n;633633+ return 0;634634+ }635635+636636+ mlx5_core_warn(dev, "wasn't able to locate pf in the lag device\n");637637+ return -EINVAL;638638+}639639+619640/* Must be called with intf_mutex held */620641void mlx5_lag_remove(struct mlx5_core_dev *dev)621642{
···187187 MLX5_CAP_GEN(dev, lag_master);188188}189189190190+int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num);191191+190192void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol);191193void mlx5_lag_update(struct mlx5_core_dev *dev);192194
+3-2
drivers/net/ethernet/mellanox/mlx5/core/qp.c
···4444mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)4545{4646 struct mlx5_core_rsc_common *common;4747+ unsigned long flags;47484848- spin_lock(&table->lock);4949+ spin_lock_irqsave(&table->lock, flags);49505051 common = radix_tree_lookup(&table->tree, rsn);5152 if (common)5253 atomic_inc(&common->refcount);53545454- spin_unlock(&table->lock);5555+ spin_unlock_irqrestore(&table->lock, flags);55565657 return common;5758}
+4-4
drivers/net/ethernet/qlogic/qed/qed_dev.c
···795795796796/* get pq index according to PQ_FLAGS */797797static u16 *qed_init_qm_get_idx_from_flags(struct qed_hwfn *p_hwfn,798798- u32 pq_flags)798798+ unsigned long pq_flags)799799{800800 struct qed_qm_info *qm_info = &p_hwfn->qm_info;801801802802 /* Can't have multiple flags set here */803803- if (bitmap_weight((unsigned long *)&pq_flags,803803+ if (bitmap_weight(&pq_flags,804804 sizeof(pq_flags) * BITS_PER_BYTE) > 1) {805805- DP_ERR(p_hwfn, "requested multiple pq flags 0x%x\n", pq_flags);805805+ DP_ERR(p_hwfn, "requested multiple pq flags 0x%lx\n", pq_flags);806806 goto err;807807 }808808809809 if (!(qed_get_pq_flags(p_hwfn) & pq_flags)) {810810- DP_ERR(p_hwfn, "pq flag 0x%x is not set\n", pq_flags);810810+ DP_ERR(p_hwfn, "pq flag 0x%lx is not set\n", pq_flags);811811 goto err;812812 }813813
···24512451{24522452 struct qed_ll2_tx_pkt_info pkt;24532453 const skb_frag_t *frag;24542454+ u8 flags = 0, nr_frags;24542455 int rc = -EINVAL, i;24552456 dma_addr_t mapping;24562457 u16 vlan = 0;24572457- u8 flags = 0;2458245824592459 if (unlikely(skb->ip_summed != CHECKSUM_NONE)) {24602460 DP_INFO(cdev, "Cannot transmit a checksummed packet\n");24612461 return -EINVAL;24622462 }2463246324642464- if (1 + skb_shinfo(skb)->nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {24642464+ /* Cache number of fragments from SKB since SKB may be freed by24652465+ * the completion routine after calling qed_ll2_prepare_tx_packet()24662466+ */24672467+ nr_frags = skb_shinfo(skb)->nr_frags;24682468+24692469+ if (1 + nr_frags > CORE_LL2_TX_MAX_BDS_PER_PACKET) {24652470 DP_ERR(cdev, "Cannot transmit a packet with %d fragments\n",24662466- 1 + skb_shinfo(skb)->nr_frags);24712471+ 1 + nr_frags);24672472 return -EINVAL;24682473 }24692474···24902485 }2491248624922487 memset(&pkt, 0, sizeof(pkt));24932493- pkt.num_of_bds = 1 + skb_shinfo(skb)->nr_frags;24882488+ pkt.num_of_bds = 1 + nr_frags;24942489 pkt.vlan = vlan;24952490 pkt.bd_flags = flags;24962491 pkt.tx_dest = QED_LL2_TX_DEST_NW;···25012496 test_bit(QED_LL2_XMIT_FLAGS_FIP_DISCOVERY, &xmit_flags))25022497 pkt.remove_stag = true;2503249824992499+ /* qed_ll2_prepare_tx_packet() may actually send the packet if25002500+ * there are no fragments in the skb and subsequently the completion25012501+ * routine may run and free the SKB, so no dereferencing the SKB25022502+ * beyond this point unless skb has any fragments.25032503+ */25042504 rc = qed_ll2_prepare_tx_packet(&cdev->hwfns[0], cdev->ll2->handle,25052505 &pkt, 1);25062506 if (rc)25072507 goto err;2508250825092509- for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {25092509+ for (i = 0; i < nr_frags; i++) {25102510 frag = &skb_shinfo(skb)->frags[i];2511251125122512 mapping = skb_frag_dma_map(&cdev->pdev->dev, frag, 0,
···260260/**261261 * struct irq_affinity_desc - Interrupt affinity descriptor262262 * @mask: cpumask to hold the affinity assignment263263+ * @is_managed: 1 if the interrupt is managed internally263264 */264265struct irq_affinity_desc {265266 struct cpumask mask;
+5-1
include/linux/sched/wake_q.h
···2424 * called near the end of a function. Otherwise, the list can be2525 * re-initialized for later re-use by wake_q_init().2626 *2727- * Note that this can cause spurious wakeups. schedule() callers2727+ * NOTE that this can cause spurious wakeups. schedule() callers2828 * must ensure the call is done inside a loop, confirming that the2929 * wakeup condition has in fact occurred.3030+ *3131+ * NOTE that there is no guarantee the wakeup will happen any later than the3232+ * wake_q_add() location. Therefore task must be ready to be woken at the3333+ * location of the wake_q_add().3034 */31353236#include <linux/sched.h>
···307307 * MB (A) MB (B)308308 * [L] cond [L] tsk309309 */310310- smp_rmb(); /* (B) */310310+ smp_mb(); /* (B) */311311312312 /*313313 * Avoid using task_rcu_dereference() magic as long as we are careful,
+8-5
kernel/futex.c
···14521452 if (WARN(q->pi_state || q->rt_waiter, "refusing to wake PI futex\n"))14531453 return;1454145414551455- /*14561456- * Queue the task for later wakeup for after we've released14571457- * the hb->lock. wake_q_add() grabs reference to p.14581458- */14591459- wake_q_add(wake_q, p);14551455+ get_task_struct(p);14601456 __unqueue_futex(q);14611457 /*14621458 * The waiting task can free the futex_q as soon as q->lock_ptr = NULL···14621466 * plist_del in __unqueue_futex().14631467 */14641468 smp_store_release(&q->lock_ptr, NULL);14691469+14701470+ /*14711471+ * Queue the task for later wakeup for after we've released14721472+ * the hb->lock. wake_q_add() grabs reference to p.14731473+ */14741474+ wake_q_add(wake_q, p);14751475+ put_task_struct(p);14651476}1466147714671478/*
+1-1
kernel/irq/irqdesc.c
···457457458458 /* Validate affinity mask(s) */459459 if (affinity) {460460- for (i = 0; i < cnt; i++, i++) {460460+ for (i = 0; i < cnt; i++) {461461 if (cpumask_empty(&affinity[i].mask))462462 return -EINVAL;463463 }
+3
kernel/irq/manage.c
···393393 }394394395395 cpumask_and(&mask, cpu_online_mask, set);396396+ if (cpumask_empty(&mask))397397+ cpumask_copy(&mask, cpu_online_mask);398398+396399 if (node != NUMA_NO_NODE) {397400 const struct cpumask *nodemask = cpumask_of_node(node);398401
+9-2
kernel/locking/rwsem-xadd.c
···198198 woken++;199199 tsk = waiter->task;200200201201- wake_q_add(wake_q, tsk);201201+ get_task_struct(tsk);202202 list_del(&waiter->list);203203 /*204204- * Ensure that the last operation is setting the reader204204+ * Ensure calling get_task_struct() before setting the reader205205 * waiter to nil such that rwsem_down_read_failed() cannot206206 * race with do_exit() by always holding a reference count207207 * to the task to wakeup.208208 */209209 smp_store_release(&waiter->task, NULL);210210+ /*211211+ * Ensure issuing the wakeup (either by us or someone else)212212+ * after setting the reader waiter to nil.213213+ */214214+ wake_q_add(wake_q, tsk);215215+ /* wake_q_add() already take the task ref */216216+ put_task_struct(tsk);210217 }211218212219 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
+16-3
kernel/sched/core.c
···396396#endif397397#endif398398399399+/**400400+ * wake_q_add() - queue a wakeup for 'later' waking.401401+ * @head: the wake_q_head to add @task to402402+ * @task: the task to queue for 'later' wakeup403403+ *404404+ * Queue a task for later wakeup, most likely by the wake_up_q() call in the405405+ * same context, _HOWEVER_ this is not guaranteed, the wakeup can come406406+ * instantly.407407+ *408408+ * This function must be used as-if it were wake_up_process(); IOW the task409409+ * must be ready to be woken at this location.410410+ */399411void wake_q_add(struct wake_q_head *head, struct task_struct *task)400412{401413 struct wake_q_node *node = &task->wake_q;···417405 * its already queued (either by us or someone else) and will get the418406 * wakeup due to that.419407 *420420- * This cmpxchg() executes a full barrier, which pairs with the full421421- * barrier executed by the wakeup in wake_up_q().408408+ * In order to ensure that a pending wakeup will observe our pending409409+ * state, even in the failed case, an explicit smp_mb() must be used.422410 */423423- if (cmpxchg(&node->next, NULL, WAKE_Q_TAIL))411411+ smp_mb__before_atomic();412412+ if (cmpxchg_relaxed(&node->next, NULL, WAKE_Q_TAIL))424413 return;425414426415 get_task_struct(task);
+1
kernel/time/posix-cpu-timers.c
···685685 * set up the signal and overrun bookkeeping.686686 */687687 timer->it.cpu.incr = timespec64_to_ns(&new->it_interval);688688+ timer->it_interval = ns_to_ktime(timer->it.cpu.incr);688689689690 /*690691 * This acts as a modification timestamp for the timer,
-12
mm/page_alloc.c
···57015701 cond_resched();57025702 }57035703 }57045704-#ifdef CONFIG_SPARSEMEM57055705- /*57065706- * If the zone does not span the rest of the section then57075707- * we should at least initialize those pages. Otherwise we57085708- * could blow up on a poisoned page in some paths which depend57095709- * on full sections being initialized (e.g. memory hotplug).57105710- */57115711- while (end_pfn % PAGES_PER_SECTION) {57125712- __init_single_page(pfn_to_page(end_pfn), end_pfn, zone, nid);57135713- end_pfn++;57145714- }57155715-#endif57165704}5717570557185706#ifdef CONFIG_ZONE_DEVICE
+6-3
net/bridge/netfilter/ebtables.c
···2293229322942294 xt_compat_lock(NFPROTO_BRIDGE);2295229522962296- ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);22972297- if (ret < 0)22982298- goto out_unlock;22962296+ if (tmp.nentries) {22972297+ ret = xt_compat_init_offsets(NFPROTO_BRIDGE, tmp.nentries);22982298+ if (ret < 0)22992299+ goto out_unlock;23002300+ }23012301+22992302 ret = compat_copy_entries(entries_tmp, tmp.entries_size, &state);23002303 if (ret < 0)23012304 goto out_unlock;
+3
net/core/dev.c
···87128712 set_bit(__LINK_STATE_PRESENT, &dev->state);87138713 set_bit(__LINK_STATE_START, &dev->state);8714871487158715+ /* napi_busy_loop stats accounting wants this */87168716+ dev_net_set(dev, &init_net);87178717+87158718 /* Note : We dont allocate pcpu_refcnt for dummy devices,87168719 * because users of this 'device' dont need to change87178720 * its refcount.
···850850851851/*852852 * Route a frame to an appropriate AX.25 connection.853853+ * A NULL ax25_cb indicates an internally generated frame.853854 */854855int rose_route_frame(struct sk_buff *skb, ax25_cb *ax25)855856{···868867869868 if (skb->len < ROSE_MIN_LEN)870869 return res;870870+871871+ if (!ax25)872872+ return rose_loopback_queue(skb, NULL);873873+871874 frametype = skb->data[2];872875 lci = ((skb->data[0] << 8) & 0xF00) + ((skb->data[1] << 0) & 0x0FF);873876 if (frametype == ROSE_CALL_REQUEST &&
+5-1
net/tls/tls_sw.c
···447447 struct scatterlist *sge = sk_msg_elem(msg_en, start);448448 int rc;449449450450+ memcpy(rec->iv_data, tls_ctx->tx.iv, sizeof(rec->iv_data));451451+450452 sge->offset += tls_ctx->tx.prepend_size;451453 sge->length -= tls_ctx->tx.prepend_size;452454···458456 aead_request_set_ad(aead_req, TLS_AAD_SPACE_SIZE);459457 aead_request_set_crypt(aead_req, rec->sg_aead_in,460458 rec->sg_aead_out,461461- data_len, tls_ctx->tx.iv);459459+ data_len, rec->iv_data);462460463461 aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,464462 tls_encrypt_done, sk);···19031901 if (atomic_read(&ctx->encrypt_pending))19041902 crypto_wait_req(-EINPROGRESS, &ctx->async_wait);1905190319041904+ release_sock(sk);19061905 cancel_delayed_work_sync(&ctx->tx_work.work);19061906+ lock_sock(sk);1907190719081908 /* Tx whatever records we can transmit and abandon the rest */19091909 tls_tx_records(sk, -1);
+33-30
net/xfrm/xfrm_policy.c
···680680 mutex_unlock(&hash_resize_mutex);681681}682682683683-static void xfrm_hash_reset_inexact_table(struct net *net)684684-{685685- struct xfrm_pol_inexact_bin *b;686686-687687- lockdep_assert_held(&net->xfrm.xfrm_policy_lock);688688-689689- list_for_each_entry(b, &net->xfrm.inexact_bins, inexact_bins)690690- INIT_HLIST_HEAD(&b->hhead);691691-}692692-693683/* Make sure *pol can be inserted into fastbin.694684 * Useful to check that later insert requests will be sucessful695685 * (provided xfrm_policy_lock is held throughout).···823833 u16 family)824834{825835 unsigned int matched_s, matched_d;826826- struct hlist_node *newpos = NULL;827836 struct xfrm_policy *policy, *p;828837829838 matched_s = 0;830839 matched_d = 0;831840832841 list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {842842+ struct hlist_node *newpos = NULL;833843 bool matches_s, matches_d;834844835845 if (!policy->bydst_reinsert)···839849840850 policy->bydst_reinsert = false;841851 hlist_for_each_entry(p, &n->hhead, bydst) {842842- if (policy->priority >= p->priority)852852+ if (policy->priority > p->priority)853853+ newpos = &p->bydst;854854+ else if (policy->priority == p->priority &&855855+ policy->pos > p->pos)843856 newpos = &p->bydst;844857 else845858 break;846859 }847860848861 if (newpos)849849- hlist_add_behind(&policy->bydst, newpos);862862+ hlist_add_behind_rcu(&policy->bydst, newpos);850863 else851851- hlist_add_head(&policy->bydst, &n->hhead);864864+ hlist_add_head_rcu(&policy->bydst, &n->hhead);852865853866 /* paranoia checks follow.854867 * Check that the reinserted policy matches at least···886893 struct rb_root *new,887894 u16 family)888895{889889- struct rb_node **p, *parent = NULL;890896 struct xfrm_pol_inexact_node *node;897897+ struct rb_node **p, *parent;891898892899 /* we should not have another subtree here */893900 WARN_ON_ONCE(!RB_EMPTY_ROOT(&n->root));894894-901901+restart:902902+ parent = NULL;895903 p = &new->rb_node;896904 while (*p) {897905 u8 prefixlen;···912918 } else {913919 struct xfrm_policy *tmp;914920915915- hlist_for_each_entry(tmp, &node->hhead, bydst)921921+ hlist_for_each_entry(tmp, &n->hhead, bydst) {916922 tmp->bydst_reinsert = true;917917- hlist_for_each_entry(tmp, &n->hhead, bydst)918918- tmp->bydst_reinsert = true;923923+ hlist_del_rcu(&tmp->bydst);924924+ }919925920920- INIT_HLIST_HEAD(&node->hhead);921926 xfrm_policy_inexact_list_reinsert(net, node, family);922927923928 if (node->prefixlen == n->prefixlen) {···928935 kfree_rcu(n, rcu);929936 n = node;930937 n->prefixlen = prefixlen;931931- *p = new->rb_node;932932- parent = NULL;938938+ goto restart;933939 }934940 }935941···957965 family);958966 }959967960960- hlist_for_each_entry(tmp, &v->hhead, bydst)968968+ hlist_for_each_entry(tmp, &v->hhead, bydst) {961969 tmp->bydst_reinsert = true;962962- hlist_for_each_entry(tmp, &n->hhead, bydst)963963- tmp->bydst_reinsert = true;970970+ hlist_del_rcu(&tmp->bydst);971971+ }964972965965- INIT_HLIST_HEAD(&n->hhead);966973 xfrm_policy_inexact_list_reinsert(net, n, family);967974}968975···12261235 } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));1227123612281237 spin_lock_bh(&net->xfrm.xfrm_policy_lock);12381238+ write_seqcount_begin(&xfrm_policy_hash_generation);1229123912301240 /* make sure that we can insert the indirect policies again before12311241 * we start with destructive action.···12701278 }1271127912721280 /* reset the bydst and inexact table in all directions */12731273- xfrm_hash_reset_inexact_table(net);12741274-12751281 for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {12761276- INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);12821282+ struct hlist_node *n;12831283+12841284+ hlist_for_each_entry_safe(policy, n,12851285+ &net->xfrm.policy_inexact[dir],12861286+ bydst_inexact_list)12871287+ hlist_del_init(&policy->bydst_inexact_list);12881288+12771289 hmask = net->xfrm.policy_bydst[dir].hmask;12781290 odst = net->xfrm.policy_bydst[dir].table;12791291 for (i = hmask; i >= 0; i--)···13091313 newpos = NULL;13101314 chain = policy_hash_bysel(net, &policy->selector,13111315 policy->family, dir);13161316+13171317+ hlist_del_rcu(&policy->bydst);13181318+13121319 if (!chain) {13131320 void *p = xfrm_policy_inexact_insert(policy, dir, 0);13141321···1333133413341335out_unlock:13351336 __xfrm_policy_inexact_flush(net);13371337+ write_seqcount_end(&xfrm_policy_hash_generation);13361338 spin_unlock_bh(&net->xfrm.xfrm_policy_lock);1337133913381340 mutex_unlock(&hash_resize_mutex);···26002600 dst_copy_metrics(dst1, dst);2601260126022602 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {26032603- __u32 mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);26032603+ __u32 mark = 0;26042604+26052605+ if (xfrm[i]->props.smark.v || xfrm[i]->props.smark.m)26062606+ mark = xfrm_smark_get(fl->flowi_mark, xfrm[i]);2604260726052608 family = xfrm[i]->props.family;26062609 dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
+9-4
net/xfrm/xfrm_user.c
···14881488 if (!ut[i].family)14891489 ut[i].family = family;1490149014911491- if ((ut[i].mode == XFRM_MODE_TRANSPORT) &&14921492- (ut[i].family != prev_family))14931493- return -EINVAL;14941494-14911491+ switch (ut[i].mode) {14921492+ case XFRM_MODE_TUNNEL:14931493+ case XFRM_MODE_BEET:14941494+ break;14951495+ default:14961496+ if (ut[i].family != prev_family)14971497+ return -EINVAL;14981498+ break;14991499+ }14951500 if (ut[i].mode >= XFRM_MODE_MAX)14961501 return -EINVAL;14971502
+131-22
tools/testing/selftests/net/xfrm_policy.sh
···2828SPI1=0x12929SPI2=0x230303131+do_esp_policy() {3232+ local ns=$13333+ local me=$23434+ local remote=$33535+ local lnet=$43636+ local rnet=$53737+3838+ # to encrypt packets as they go out (includes forwarded packets that need encapsulation)3939+ ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow4040+ # to fwd decrypted packets after esp processing:4141+ ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow4242+}4343+3144do_esp() {3245 local ns=$13346 local me=$2···5340 ip -net $ns xfrm state add src $remote dst $me proto esp spi $spi_in enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $rnet dst $lnet5441 ip -net $ns xfrm state add src $me dst $remote proto esp spi $spi_out enc aes $KEY_AES auth sha1 $KEY_SHA mode tunnel sel src $lnet dst $rnet55425656- # to encrypt packets as they go out (includes forwarded packets that need encapsulation)5757- ip -net $ns xfrm policy add src $lnet dst $rnet dir out tmpl src $me dst $remote proto esp mode tunnel priority 100 action allow5858- # to fwd decrypted packets after esp processing:5959- ip -net $ns xfrm policy add src $rnet dst $lnet dir fwd tmpl src $remote dst $me proto esp mode tunnel priority 100 action allow4343+ do_esp_policy $ns $me $remote $lnet $rnet4444+}4545+4646+# add policies with different netmasks, to make sure kernel carries4747+# the policies contained within new netmask over when search tree is4848+# re-built.4949+# peer netns that are supposed to be encapsulated via esp have addresses5050+# in the 10.0.1.0/24 and 10.0.2.0/24 subnets, respectively.5151+#5252+# Adding a policy for '10.0.1.0/23' will make it necessary to5353+# alter the prefix of 10.0.1.0 subnet.5454+# In case new prefix overlaps with existing node, the node and all5555+# policies it carries need to be merged with the existing one(s).5656+#5757+# Do that here.5858+do_overlap()5959+{6060+ local ns=$16161+6262+ # adds new nodes to tree (neither network exists yet in policy database).6363+ ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block6464+6565+ # adds a new node in the 10.0.0.0/24 tree (dst node exists).6666+ ip -net $ns xfrm policy add src 10.2.0.0/24 dst 10.0.0.0/24 dir fwd priority 200 action block6767+6868+ # adds a 10.2.0.0/23 node, but for different dst.6969+ ip -net $ns xfrm policy add src 10.2.0.0/23 dst 10.0.1.0/24 dir fwd priority 200 action block7070+7171+ # dst now overlaps with the 10.0.1.0/24 ESP policy in fwd.7272+ # kernel must 'promote' existing one (10.0.0.0/24) to 10.0.0.0/23.7373+ # But 10.0.0.0/23 also includes existing 10.0.1.0/24, so that node7474+ # also has to be merged too, including source-sorted subtrees.7575+ # old:7676+ # 10.0.0.0/24 (node 1 in dst tree of the bin)7777+ # 10.1.0.0/24 (node in src tree of dst node 1)7878+ # 10.2.0.0/24 (node in src tree of dst node 1)7979+ # 10.0.1.0/24 (node 2 in dst tree of the bin)8080+ # 10.0.2.0/24 (node in src tree of dst node 2)8181+ # 10.2.0.0/24 (node in src tree of dst node 2)8282+ #8383+ # The next 'policy add' adds dst '10.0.0.0/23', which means8484+ # that dst node 1 and dst node 2 have to be merged including8585+ # the sub-tree. As no duplicates are allowed, policies in8686+ # the two '10.0.2.0/24' are also merged.8787+ #8888+ # after the 'add', internal search tree should look like this:8989+ # 10.0.0.0/23 (node in dst tree of bin)9090+ # 10.0.2.0/24 (node in src tree of dst node)9191+ # 10.1.0.0/24 (node in src tree of dst node)9292+ # 10.2.0.0/24 (node in src tree of dst node)9393+ #9494+ # 10.0.0.0/24 and 10.0.1.0/24 nodes have been merged as 10.0.0.0/23.9595+ ip -net $ns xfrm policy add src 10.1.0.0/24 dst 10.0.0.0/23 dir fwd priority 200 action block6096}61976298do_esp_policy_get_check() {···222160 return $lret223161}224162163163+check_exceptions()164164+{165165+ logpostfix="$1"166166+ local lret=0167167+168168+ # ping to .254 should be excluded from the tunnel (exception is in place).169169+ check_xfrm 0 254170170+ if [ $? -ne 0 ]; then171171+ echo "FAIL: expected ping to .254 to fail ($logpostfix)"172172+ lret=1173173+ else174174+ echo "PASS: ping to .254 bypassed ipsec tunnel ($logpostfix)"175175+ fi176176+177177+ # ping to .253 should use use ipsec due to direct policy exception.178178+ check_xfrm 1 253179179+ if [ $? -ne 0 ]; then180180+ echo "FAIL: expected ping to .253 to use ipsec tunnel ($logpostfix)"181181+ lret=1182182+ else183183+ echo "PASS: direct policy matches ($logpostfix)"184184+ fi185185+186186+ # ping to .2 should use ipsec.187187+ check_xfrm 1 2188188+ if [ $? -ne 0 ]; then189189+ echo "FAIL: expected ping to .2 to use ipsec tunnel ($logpostfix)"190190+ lret=1191191+ else192192+ echo "PASS: policy matches ($logpostfix)"193193+ fi194194+195195+ return $lret196196+}197197+225198#check for needed privileges226199if [ "$(id -u)" -ne 0 ];then227200 echo "SKIP: Need root privileges"···367270do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96368271do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96369272370370-# ping to .254 should now be excluded from the tunnel371371-check_xfrm 0 254273273+check_exceptions "exceptions"372274if [ $? -ne 0 ]; then373373- echo "FAIL: expected ping to .254 to fail"374275 ret=1375375-else376376- echo "PASS: ping to .254 bypassed ipsec tunnel"377276fi378277379379-# ping to .253 should use use ipsec due to direct policy exception.380380-check_xfrm 1 253278278+# insert block policies with adjacent/overlapping netmasks279279+do_overlap ns3280280+281281+check_exceptions "exceptions and block policies"381282if [ $? -ne 0 ]; then382382- echo "FAIL: expected ping to .253 to use ipsec tunnel"383283 ret=1384384-else385385- echo "PASS: direct policy matches"386284fi387285388388-# ping to .2 should use ipsec.389389-check_xfrm 1 2390390-if [ $? -ne 0 ]; then391391- echo "FAIL: expected ping to .2 to use ipsec tunnel"392392- ret=1393393-else394394- echo "PASS: policy matches"395395-fi286286+for n in ns3 ns4;do287287+ ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125288288+ sleep $((RANDOM%5))289289+done290290+291291+check_exceptions "exceptions and block policies after hresh changes"292292+293293+# full flush of policy db, check everything gets freed incl. internal meta data294294+ip -net ns3 xfrm policy flush295295+296296+do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24297297+do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28298298+299299+# move inexact policies to hash table300300+ip -net ns3 xfrm policy set hthresh4 16 16301301+302302+sleep $((RANDOM%5))303303+check_exceptions "exceptions and block policies after hthresh change in ns3"304304+305305+# restore original hthresh settings -- move policies back to tables306306+for n in ns3 ns4;do307307+ ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128308308+ sleep $((RANDOM%5))309309+done310310+check_exceptions "exceptions and block policies after hresh change to normal"396311397312for i in 1 2 3 4;do ip netns del ns$i;done398313
+31-10
tools/testing/selftests/x86/protection_keys.c
···11331133 pkey_assert(err);11341134}1135113511361136+void become_child(void)11371137+{11381138+ pid_t forkret;11391139+11401140+ forkret = fork();11411141+ pkey_assert(forkret >= 0);11421142+ dprintf3("[%d] fork() ret: %d\n", getpid(), forkret);11431143+11441144+ if (!forkret) {11451145+ /* in the child */11461146+ return;11471147+ }11481148+ exit(0);11491149+}11501150+11361151/* Assumes that all pkeys other than 'pkey' are unallocated */11371152void test_pkey_alloc_exhaust(int *ptr, u16 pkey)11381153{···11561141 int nr_allocated_pkeys = 0;11571142 int i;1158114311591159- for (i = 0; i < NR_PKEYS*2; i++) {11441144+ for (i = 0; i < NR_PKEYS*3; i++) {11601145 int new_pkey;11611146 dprintf1("%s() alloc loop: %d\n", __func__, i);11621147 new_pkey = alloc_pkey();···11671152 if ((new_pkey == -1) && (errno == ENOSPC)) {11681153 dprintf2("%s() failed to allocate pkey after %d tries\n",11691154 __func__, nr_allocated_pkeys);11701170- break;11551155+ } else {11561156+ /*11571157+ * Ensure the number of successes never11581158+ * exceeds the number of keys supported11591159+ * in the hardware.11601160+ */11611161+ pkey_assert(nr_allocated_pkeys < NR_PKEYS);11621162+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;11711163 }11721172- pkey_assert(nr_allocated_pkeys < NR_PKEYS);11731173- allocated_pkeys[nr_allocated_pkeys++] = new_pkey;11641164+11651165+ /*11661166+ * Make sure that allocation state is properly11671167+ * preserved across fork().11681168+ */11691169+ if (i == NR_PKEYS*2)11701170+ become_child();11741171 }1175117211761173 dprintf3("%s()::%d\n", __func__, __LINE__);11771177-11781178- /*11791179- * ensure it did not reach the end of the loop without11801180- * failure:11811181- */11821182- pkey_assert(i < NR_PKEYS*2);1183117411841175 /*11851176 * There are 16 pkeys supported in hardware. Three are