Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

ice: update malicious driver detection event handling

Update the PF VFs MDD event message to rate limit once per second and
report the total number Rx|Tx event count. Add support to print pending
MDD events that occur during the rate limit. The use of net_ratelimit did
not allow for per VF Rx|Tx granularity.

Additional PF MDD log messages are guarded by netif_msg_[rx|tx]_err().

Since VF RX MDD events disable the queue, add ethtool private flag
mdd-auto-reset-vf to configure VF reset to re-enable the queue.

Disable anti-spoof detection interrupt to prevent spurious events
during a function reset.

To avoid race condition do not make PF MDD register reads conditional
on global MDD result.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>

authored by

Paul Greenwalt and committed by
Jeff Kirsher
9d5c5a52 a29a912d

+151 -60
+4
drivers/net/ethernet/intel/ice/ice.h
··· 212 212 __ICE_SERVICE_SCHED, 213 213 __ICE_SERVICE_DIS, 214 214 __ICE_OICR_INTR_DIS, /* Global OICR interrupt disabled */ 215 + __ICE_MDD_VF_PRINT_PENDING, /* set when MDD event handle */ 215 216 __ICE_STATE_NBITS /* must be last */ 216 217 }; 217 218 ··· 341 340 ICE_FLAG_FW_LLDP_AGENT, 342 341 ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ 343 342 ICE_FLAG_LEGACY_RX, 343 + ICE_FLAG_MDD_AUTO_RESET_VF, 344 344 ICE_PF_FLAGS_NBITS /* must be last */ 345 345 }; 346 346 ··· 365 363 u16 num_vfs_supported; /* num VFs supported for this PF */ 366 364 u16 num_vf_qps; /* num queue pairs per VF */ 367 365 u16 num_vf_msix; /* num vectors per VF */ 366 + /* used to ratelimit the MDD event logging */ 367 + unsigned long last_printed_mdd_jiffies; 368 368 DECLARE_BITMAP(state, __ICE_STATE_NBITS); 369 369 DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); 370 370 unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */
+1
drivers/net/ethernet/intel/ice/ice_ethtool.c
··· 157 157 static const struct ice_priv_flag ice_gstrings_priv_flags[] = { 158 158 ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA), 159 159 ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT), 160 + ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF), 160 161 ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX), 161 162 }; 162 163
+2
drivers/net/ethernet/intel/ice/ice_hw_autogen.h
··· 217 217 #define VPLAN_TX_QBASE_VFNUMQ_M ICE_M(0xFF, 16) 218 218 #define VPLAN_TXQ_MAPENA(_VF) (0x00073800 + ((_VF) * 4)) 219 219 #define VPLAN_TXQ_MAPENA_TX_ENA_M BIT(0) 220 + #define GL_MDCK_TX_TDPU 0x00049348 221 + #define GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M BIT(1) 220 222 #define GL_MDET_RX 0x00294C00 221 223 #define GL_MDET_RX_QNUM_S 0 222 224 #define GL_MDET_RX_QNUM_M ICE_M(0x7FFF, 0)
+71 -57
drivers/net/ethernet/intel/ice/ice_main.c
··· 1187 1187 * ice_handle_mdd_event - handle malicious driver detect event 1188 1188 * @pf: pointer to the PF structure 1189 1189 * 1190 - * Called from service task. OICR interrupt handler indicates MDD event 1190 + * Called from service task. OICR interrupt handler indicates MDD event. 1191 + * VF MDD logging is guarded by net_ratelimit. Additional PF and VF log 1192 + * messages are wrapped by netif_msg_[rx|tx]_err. Since VF Rx MDD events 1193 + * disable the queue, the PF can be configured to reset the VF using ethtool 1194 + * private flag mdd-auto-reset-vf. 1191 1195 */ 1192 1196 static void ice_handle_mdd_event(struct ice_pf *pf) 1193 1197 { 1194 1198 struct device *dev = ice_pf_to_dev(pf); 1195 1199 struct ice_hw *hw = &pf->hw; 1196 - bool mdd_detected = false; 1197 1200 u32 reg; 1198 1201 int i; 1199 1202 1200 - if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) 1203 + if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) { 1204 + /* Since the VF MDD event logging is rate limited, check if 1205 + * there are pending MDD events. 1206 + */ 1207 + ice_print_vfs_mdd_events(pf); 1201 1208 return; 1209 + } 1202 1210 1203 - /* find what triggered the MDD event */ 1211 + /* find what triggered an MDD event */ 1204 1212 reg = rd32(hw, GL_MDET_TX_PQM); 1205 1213 if (reg & GL_MDET_TX_PQM_VALID_M) { 1206 1214 u8 pf_num = (reg & GL_MDET_TX_PQM_PF_NUM_M) >> ··· 1224 1216 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", 1225 1217 event, queue, pf_num, vf_num); 1226 1218 wr32(hw, GL_MDET_TX_PQM, 0xffffffff); 1227 - mdd_detected = true; 1228 1219 } 1229 1220 1230 1221 reg = rd32(hw, GL_MDET_TX_TCLAN); ··· 1241 1234 dev_info(dev, "Malicious Driver Detection event %d on TX queue %d PF# %d VF# %d\n", 1242 1235 event, queue, pf_num, vf_num); 1243 1236 wr32(hw, GL_MDET_TX_TCLAN, 0xffffffff); 1244 - mdd_detected = true; 1245 1237 } 1246 1238 1247 1239 reg = rd32(hw, GL_MDET_RX); ··· 1258 1252 dev_info(dev, "Malicious Driver Detection event %d on RX queue %d PF# %d VF# %d\n", 1259 1253 event, queue, pf_num, vf_num); 1260 1254 wr32(hw, GL_MDET_RX, 0xffffffff); 1261 - mdd_detected = true; 1262 1255 } 1263 1256 1264 - if (mdd_detected) { 1265 - bool pf_mdd_detected = false; 1266 - 1267 - reg = rd32(hw, PF_MDET_TX_PQM); 1268 - if (reg & PF_MDET_TX_PQM_VALID_M) { 1269 - wr32(hw, PF_MDET_TX_PQM, 0xFFFF); 1270 - dev_info(dev, "TX driver issue detected, PF reset issued\n"); 1271 - pf_mdd_detected = true; 1272 - } 1273 - 1274 - reg = rd32(hw, PF_MDET_TX_TCLAN); 1275 - if (reg & PF_MDET_TX_TCLAN_VALID_M) { 1276 - wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); 1277 - dev_info(dev, "TX driver issue detected, PF reset issued\n"); 1278 - pf_mdd_detected = true; 1279 - } 1280 - 1281 - reg = rd32(hw, PF_MDET_RX); 1282 - if (reg & PF_MDET_RX_VALID_M) { 1283 - wr32(hw, PF_MDET_RX, 0xFFFF); 1284 - dev_info(dev, "RX driver issue detected, PF reset issued\n"); 1285 - pf_mdd_detected = true; 1286 - } 1287 - /* Queue belongs to the PF initiate a reset */ 1288 - if (pf_mdd_detected) { 1289 - set_bit(__ICE_NEEDS_RESTART, pf->state); 1290 - ice_service_task_schedule(pf); 1291 - } 1257 + /* check to see if this PF caused an MDD event */ 1258 + reg = rd32(hw, PF_MDET_TX_PQM); 1259 + if (reg & PF_MDET_TX_PQM_VALID_M) { 1260 + wr32(hw, PF_MDET_TX_PQM, 0xFFFF); 1261 + if (netif_msg_tx_err(pf)) 1262 + dev_info(dev, "Malicious Driver Detection event TX_PQM detected on PF\n"); 1292 1263 } 1293 1264 1294 - /* check to see if one of the VFs caused the MDD */ 1265 + reg = rd32(hw, PF_MDET_TX_TCLAN); 1266 + if (reg & PF_MDET_TX_TCLAN_VALID_M) { 1267 + wr32(hw, PF_MDET_TX_TCLAN, 0xFFFF); 1268 + if (netif_msg_tx_err(pf)) 1269 + dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on PF\n"); 1270 + } 1271 + 1272 + reg = rd32(hw, PF_MDET_RX); 1273 + if (reg & PF_MDET_RX_VALID_M) { 1274 + wr32(hw, PF_MDET_RX, 0xFFFF); 1275 + if (netif_msg_rx_err(pf)) 1276 + dev_info(dev, "Malicious Driver Detection event RX detected on PF\n"); 1277 + } 1278 + 1279 + /* Check to see if one of the VFs caused an MDD event, and then 1280 + * increment counters and set print pending 1281 + */ 1295 1282 ice_for_each_vf(pf, i) { 1296 1283 struct ice_vf *vf = &pf->vf[i]; 1297 - 1298 - bool vf_mdd_detected = false; 1299 1284 1300 1285 reg = rd32(hw, VP_MDET_TX_PQM(i)); 1301 1286 if (reg & VP_MDET_TX_PQM_VALID_M) { 1302 1287 wr32(hw, VP_MDET_TX_PQM(i), 0xFFFF); 1303 - vf_mdd_detected = true; 1304 - dev_info(dev, "TX driver issue detected on VF %d\n", 1305 - i); 1288 + vf->mdd_tx_events.count++; 1289 + set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); 1290 + if (netif_msg_tx_err(pf)) 1291 + dev_info(dev, "Malicious Driver Detection event TX_PQM detected on VF %d\n", 1292 + i); 1306 1293 } 1307 1294 1308 1295 reg = rd32(hw, VP_MDET_TX_TCLAN(i)); 1309 1296 if (reg & VP_MDET_TX_TCLAN_VALID_M) { 1310 1297 wr32(hw, VP_MDET_TX_TCLAN(i), 0xFFFF); 1311 - vf_mdd_detected = true; 1312 - dev_info(dev, "TX driver issue detected on VF %d\n", 1313 - i); 1298 + vf->mdd_tx_events.count++; 1299 + set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); 1300 + if (netif_msg_tx_err(pf)) 1301 + dev_info(dev, "Malicious Driver Detection event TX_TCLAN detected on VF %d\n", 1302 + i); 1314 1303 } 1315 1304 1316 1305 reg = rd32(hw, VP_MDET_TX_TDPU(i)); 1317 1306 if (reg & VP_MDET_TX_TDPU_VALID_M) { 1318 1307 wr32(hw, VP_MDET_TX_TDPU(i), 0xFFFF); 1319 - vf_mdd_detected = true; 1320 - dev_info(dev, "TX driver issue detected on VF %d\n", 1321 - i); 1308 + vf->mdd_tx_events.count++; 1309 + set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); 1310 + if (netif_msg_tx_err(pf)) 1311 + dev_info(dev, "Malicious Driver Detection event TX_TDPU detected on VF %d\n", 1312 + i); 1322 1313 } 1323 1314 1324 1315 reg = rd32(hw, VP_MDET_RX(i)); 1325 1316 if (reg & VP_MDET_RX_VALID_M) { 1326 1317 wr32(hw, VP_MDET_RX(i), 0xFFFF); 1327 - vf_mdd_detected = true; 1328 - dev_info(dev, "RX driver issue detected on VF %d\n", 1329 - i); 1330 - } 1318 + vf->mdd_rx_events.count++; 1319 + set_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state); 1320 + if (netif_msg_rx_err(pf)) 1321 + dev_info(dev, "Malicious Driver Detection event RX detected on VF %d\n", 1322 + i); 1331 1323 1332 - if (vf_mdd_detected) { 1333 - vf->num_mdd_events++; 1334 - if (vf->num_mdd_events && 1335 - vf->num_mdd_events <= ICE_MDD_EVENTS_THRESHOLD) 1336 - dev_info(dev, "VF %d has had %llu MDD events since last boot, Admin might need to reload AVF driver with this number of events\n", 1337 - i, vf->num_mdd_events); 1324 + /* Since the queue is disabled on VF Rx MDD events, the 1325 + * PF can be configured to reset the VF through ethtool 1326 + * private flag mdd-auto-reset-vf. 1327 + */ 1328 + if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) 1329 + ice_reset_vf(&pf->vf[i], false); 1338 1330 } 1339 1331 } 1332 + 1333 + ice_print_vfs_mdd_events(pf); 1340 1334 } 1341 1335 1342 1336 /** ··· 2000 1994 { 2001 1995 struct ice_hw *hw = &pf->hw; 2002 1996 u32 val; 1997 + 1998 + /* Disable anti-spoof detection interrupt to prevent spurious event 1999 + * interrupts during a function reset. Anti-spoof functionally is 2000 + * still supported. 2001 + */ 2002 + val = rd32(hw, GL_MDCK_TX_TDPU); 2003 + val |= GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M; 2004 + wr32(hw, GL_MDCK_TX_TDPU, val); 2003 2005 2004 2006 /* clear things first */ 2005 2007 wr32(hw, PFINT_OICR_ENA, 0); /* disable all */
+55 -1
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
··· 171 171 } 172 172 173 173 last_vector_idx = vf->first_vector_idx + pf->num_vf_msix - 1; 174 + 175 + /* clear VF MDD event information */ 176 + memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events)); 177 + memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events)); 178 + 174 179 /* Disable interrupts so that VF starts in a known state */ 175 180 for (i = vf->first_vector_idx; i <= last_vector_idx; i++) { 176 181 wr32(&pf->hw, GLINT_DYN_CTL(i), GLINT_DYN_CTL_CLEARPBA_M); ··· 1180 1175 * 1181 1176 * Returns true if the VF is reset, false otherwise. 1182 1177 */ 1183 - static bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) 1178 + bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) 1184 1179 { 1185 1180 struct ice_pf *pf = vf->pf; 1186 1181 struct ice_vsi *vsi; ··· 3533 3528 vf_stats->tx_dropped = stats->tx_discards; 3534 3529 3535 3530 return 0; 3531 + } 3532 + 3533 + /** 3534 + * ice_print_vfs_mdd_event - print VFs malicious driver detect event 3535 + * @pf: pointer to the PF structure 3536 + * 3537 + * Called from ice_handle_mdd_event to rate limit and print VFs MDD events. 3538 + */ 3539 + void ice_print_vfs_mdd_events(struct ice_pf *pf) 3540 + { 3541 + struct device *dev = ice_pf_to_dev(pf); 3542 + struct ice_hw *hw = &pf->hw; 3543 + int i; 3544 + 3545 + /* check that there are pending MDD events to print */ 3546 + if (!test_and_clear_bit(__ICE_MDD_VF_PRINT_PENDING, pf->state)) 3547 + return; 3548 + 3549 + /* VF MDD event logs are rate limited to one second intervals */ 3550 + if (time_is_after_jiffies(pf->last_printed_mdd_jiffies + HZ * 1)) 3551 + return; 3552 + 3553 + pf->last_printed_mdd_jiffies = jiffies; 3554 + 3555 + ice_for_each_vf(pf, i) { 3556 + struct ice_vf *vf = &pf->vf[i]; 3557 + 3558 + /* only print Rx MDD event message if there are new events */ 3559 + if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) { 3560 + vf->mdd_rx_events.last_printed = 3561 + vf->mdd_rx_events.count; 3562 + 3563 + dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n", 3564 + vf->mdd_rx_events.count, hw->pf_id, i, 3565 + vf->dflt_lan_addr.addr, 3566 + test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags) 3567 + ? "on" : "off"); 3568 + } 3569 + 3570 + /* only print Tx MDD event message if there are new events */ 3571 + if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) { 3572 + vf->mdd_tx_events.last_printed = 3573 + vf->mdd_tx_events.count; 3574 + 3575 + dev_info(dev, "%d Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pM.\n", 3576 + vf->mdd_tx_events.count, hw->pf_id, i, 3577 + vf->dflt_lan_addr.addr); 3578 + } 3579 + } 3536 3580 }
+18 -2
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
··· 55 55 ICE_VIRTCHNL_VF_CAP_PRIVILEGE, 56 56 }; 57 57 58 + /* VF MDD events print structure */ 59 + struct ice_mdd_vf_events { 60 + u16 count; /* total count of Rx|Tx events */ 61 + /* count number of the last printed event */ 62 + u16 last_printed; 63 + }; 64 + 58 65 /* VF information structure */ 59 66 struct ice_vf { 60 67 struct ice_pf *pf; ··· 90 83 unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ 91 84 DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ 92 85 93 - u64 num_mdd_events; /* number of MDD events detected */ 94 86 u64 num_inval_msgs; /* number of continuous invalid msgs */ 95 87 u64 num_valid_msgs; /* number of valid msgs detected */ 96 88 unsigned long vf_caps; /* VF's adv. capabilities */ 97 89 u8 num_req_qs; /* num of queue pairs requested by VF */ 98 90 u16 num_mac; 99 91 u16 num_vf_qs; /* num of queue configured per VF */ 92 + struct ice_mdd_vf_events mdd_rx_events; 93 + struct ice_mdd_vf_events mdd_tx_events; 100 94 }; 101 95 102 96 #ifdef CONFIG_PCI_IOV ··· 112 104 void ice_vc_notify_link_state(struct ice_pf *pf); 113 105 void ice_vc_notify_reset(struct ice_pf *pf); 114 106 bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr); 107 + bool ice_reset_vf(struct ice_vf *vf, bool is_vflr); 115 108 116 109 int 117 110 ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, ··· 132 123 struct ifla_vf_stats *vf_stats); 133 124 void 134 125 ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event); 135 - 126 + void ice_print_vfs_mdd_events(struct ice_pf *pf); 136 127 #else /* CONFIG_PCI_IOV */ 137 128 #define ice_process_vflr_event(pf) do {} while (0) 138 129 #define ice_free_vfs(pf) do {} while (0) ··· 141 132 #define ice_vc_notify_reset(pf) do {} while (0) 142 133 #define ice_set_vf_state_qs_dis(vf) do {} while (0) 143 134 #define ice_vf_lan_overflow_event(pf, event) do {} while (0) 135 + #define ice_print_vfs_mdd_events(pf) do {} while (0) 144 136 145 137 static inline bool 146 138 ice_reset_all_vfs(struct ice_pf __always_unused *pf, 147 139 bool __always_unused is_vflr) 140 + { 141 + return true; 142 + } 143 + 144 + static inline bool 145 + ice_reset_vf(struct ice_vf __always_unused *vf, bool __always_unused is_vflr) 148 146 { 149 147 return true; 150 148 }