Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net/mlx4_en: Don't use irq_affinity_notifier to track changes in IRQ affinity map

IRQ affinity notifier can only have a single notifier - cpu_rmap
notifier. Can't use it to track changes in IRQ affinity map.
Detect IRQ affinity changes by comparing CPU to current IRQ affinity map
during NAPI poll thread.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ben Hutchings <ben@decadent.org.uk>
Fixes: 2eacc23 ("net/mlx4_core: Enforce irq affinity changes immediatly")
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Amir Vadai and committed by
David S. Miller
35f6f453 1b037474

+28 -74
-2
drivers/net/ethernet/mellanox/mlx4/cq.c
··· 294 294 init_completion(&cq->free); 295 295 296 296 cq->irq = priv->eq_table.eq[cq->vector].irq; 297 - cq->irq_affinity_change = false; 298 - 299 297 return 0; 300 298 301 299 err_radix:
+4
drivers/net/ethernet/mellanox/mlx4/en_cq.c
··· 128 128 mlx4_warn(mdev, "Failed assigning an EQ to %s, falling back to legacy EQ's\n", 129 129 name); 130 130 } 131 + 132 + cq->irq_desc = 133 + irq_to_desc(mlx4_eq_get_irq(mdev->dev, 134 + cq->vector)); 131 135 } 132 136 } else { 133 137 cq->vector = (cq->ring + 1 + priv->port) %
+13 -3
drivers/net/ethernet/mellanox/mlx4/en_rx.c
··· 40 40 #include <linux/if_ether.h> 41 41 #include <linux/if_vlan.h> 42 42 #include <linux/vmalloc.h> 43 + #include <linux/irq.h> 43 44 44 45 #include "mlx4_en.h" 45 46 ··· 897 896 898 897 /* If we used up all the quota - we're probably not done yet... */ 899 898 if (done == budget) { 899 + int cpu_curr; 900 + const struct cpumask *aff; 901 + 900 902 INC_PERF_COUNTER(priv->pstats.napi_quota); 901 - if (unlikely(cq->mcq.irq_affinity_change)) { 902 - cq->mcq.irq_affinity_change = false; 903 + 904 + cpu_curr = smp_processor_id(); 905 + aff = irq_desc_get_irq_data(cq->irq_desc)->affinity; 906 + 907 + if (unlikely(!cpumask_test_cpu(cpu_curr, aff))) { 908 + /* Current cpu is not according to smp_irq_affinity - 909 + * probably affinity changed. need to stop this NAPI 910 + * poll, and restart it on the right CPU 911 + */ 903 912 napi_complete(napi); 904 913 mlx4_en_arm_cq(priv, cq); 905 914 return 0; 906 915 } 907 916 } else { 908 917 /* Done for now */ 909 - cq->mcq.irq_affinity_change = false; 910 918 napi_complete(napi); 911 919 mlx4_en_arm_cq(priv, cq); 912 920 }
-6
drivers/net/ethernet/mellanox/mlx4/en_tx.c
··· 474 474 /* If we used up all the quota - we're probably not done yet... */ 475 475 if (done < budget) { 476 476 /* Done for now */ 477 - cq->mcq.irq_affinity_change = false; 478 477 napi_complete(napi); 479 478 mlx4_en_arm_cq(priv, cq); 480 479 return done; 481 - } else if (unlikely(cq->mcq.irq_affinity_change)) { 482 - cq->mcq.irq_affinity_change = false; 483 - napi_complete(napi); 484 - mlx4_en_arm_cq(priv, cq); 485 - return 0; 486 480 } 487 481 return budget; 488 482 }
+8 -61
drivers/net/ethernet/mellanox/mlx4/eq.c
··· 53 53 MLX4_EQ_ENTRY_SIZE = 0x20 54 54 }; 55 55 56 - struct mlx4_irq_notify { 57 - void *arg; 58 - struct irq_affinity_notify notify; 59 - }; 60 - 61 56 #define MLX4_EQ_STATUS_OK ( 0 << 28) 62 57 #define MLX4_EQ_STATUS_WRITE_FAIL (10 << 28) 63 58 #define MLX4_EQ_OWNER_SW ( 0 << 24) ··· 1083 1088 iounmap(priv->clr_base); 1084 1089 } 1085 1090 1086 - static void mlx4_irq_notifier_notify(struct irq_affinity_notify *notify, 1087 - const cpumask_t *mask) 1088 - { 1089 - struct mlx4_irq_notify *n = container_of(notify, 1090 - struct mlx4_irq_notify, 1091 - notify); 1092 - struct mlx4_priv *priv = (struct mlx4_priv *)n->arg; 1093 - struct radix_tree_iter iter; 1094 - void **slot; 1095 - 1096 - radix_tree_for_each_slot(slot, &priv->cq_table.tree, &iter, 0) { 1097 - struct mlx4_cq *cq = (struct mlx4_cq *)(*slot); 1098 - 1099 - if (cq->irq == notify->irq) 1100 - cq->irq_affinity_change = true; 1101 - } 1102 - } 1103 - 1104 - static void mlx4_release_irq_notifier(struct kref *ref) 1105 - { 1106 - struct mlx4_irq_notify *n = container_of(ref, struct mlx4_irq_notify, 1107 - notify.kref); 1108 - kfree(n); 1109 - } 1110 - 1111 - static void mlx4_assign_irq_notifier(struct mlx4_priv *priv, 1112 - struct mlx4_dev *dev, int irq) 1113 - { 1114 - struct mlx4_irq_notify *irq_notifier = NULL; 1115 - int err = 0; 1116 - 1117 - irq_notifier = kzalloc(sizeof(*irq_notifier), GFP_KERNEL); 1118 - if (!irq_notifier) { 1119 - mlx4_warn(dev, "Failed to allocate irq notifier. irq %d\n", 1120 - irq); 1121 - return; 1122 - } 1123 - 1124 - irq_notifier->notify.irq = irq; 1125 - irq_notifier->notify.notify = mlx4_irq_notifier_notify; 1126 - irq_notifier->notify.release = mlx4_release_irq_notifier; 1127 - irq_notifier->arg = priv; 1128 - err = irq_set_affinity_notifier(irq, &irq_notifier->notify); 1129 - if (err) { 1130 - kfree(irq_notifier); 1131 - irq_notifier = NULL; 1132 - mlx4_warn(dev, "Failed to set irq notifier. irq %d\n", irq); 1133 - } 1134 - } 1135 - 1136 - 1137 1091 int mlx4_alloc_eq_table(struct mlx4_dev *dev) 1138 1092 { 1139 1093 struct mlx4_priv *priv = mlx4_priv(dev); ··· 1353 1409 continue; 1354 1410 /*we dont want to break here*/ 1355 1411 } 1356 - mlx4_assign_irq_notifier(priv, dev, 1357 - priv->eq_table.eq[vec].irq); 1358 1412 1359 1413 eq_set_ci(&priv->eq_table.eq[vec], 1); 1360 1414 } ··· 1369 1427 } 1370 1428 EXPORT_SYMBOL(mlx4_assign_eq); 1371 1429 1430 + int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec) 1431 + { 1432 + struct mlx4_priv *priv = mlx4_priv(dev); 1433 + 1434 + return priv->eq_table.eq[vec].irq; 1435 + } 1436 + EXPORT_SYMBOL(mlx4_eq_get_irq); 1437 + 1372 1438 void mlx4_release_eq(struct mlx4_dev *dev, int vec) 1373 1439 { 1374 1440 struct mlx4_priv *priv = mlx4_priv(dev); ··· 1388 1438 Belonging to a legacy EQ*/ 1389 1439 mutex_lock(&priv->msix_ctl.pool_lock); 1390 1440 if (priv->msix_ctl.pool_bm & 1ULL << i) { 1391 - irq_set_affinity_notifier( 1392 - priv->eq_table.eq[vec].irq, 1393 - NULL); 1394 1441 free_irq(priv->eq_table.eq[vec].irq, 1395 1442 &priv->eq_table.eq[vec]); 1396 1443 priv->msix_ctl.pool_bm &= ~(1ULL << i);
+1
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
··· 343 343 #define CQ_USER_PEND (MLX4_EN_CQ_STATE_POLL | MLX4_EN_CQ_STATE_POLL_YIELD) 344 344 spinlock_t poll_lock; /* protects from LLS/napi conflicts */ 345 345 #endif /* CONFIG_NET_RX_BUSY_POLL */ 346 + struct irq_desc *irq_desc; 346 347 }; 347 348 348 349 struct mlx4_en_port_profile {
+2 -2
include/linux/mlx4/device.h
··· 578 578 u32 cons_index; 579 579 580 580 u16 irq; 581 - bool irq_affinity_change; 582 - 583 581 __be32 *set_ci_db; 584 582 __be32 *arm_db; 585 583 int arm_sn; ··· 1164 1166 int mlx4_assign_eq(struct mlx4_dev *dev, char *name, struct cpu_rmap *rmap, 1165 1167 int *vector); 1166 1168 void mlx4_release_eq(struct mlx4_dev *dev, int vec); 1169 + 1170 + int mlx4_eq_get_irq(struct mlx4_dev *dev, int vec); 1167 1171 1168 1172 int mlx4_get_phys_port_id(struct mlx4_dev *dev); 1169 1173 int mlx4_wol_read(struct mlx4_dev *dev, u64 *config, int port);