Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

sfc: protect filter table against use-after-free

If MCDI timeouts are encountered during efx_ef10_filter_table_remove(),
an FLR will be queued, but efx->filter_state will still be kfree()d.
The queued FLR will then call efx_ef10_filter_table_restore(), which
will try to use efx->filter_state. This previously caused a panic.
This patch adds an rwsem to protect the existence of efx->filter_state,
separately from the spinlock protecting its contents. Users which can
race against efx_ef10_filter_table_remove() should down_read this rwsem.

Signed-off-by: Shradha Shah <sshah@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Edward Cree and committed by
David S. Miller
0d322413 f1122a34

+55 -12
+19
drivers/net/ethernet/sfc/ef10.c
··· 3314 3314 return rc; 3315 3315 } 3316 3316 3317 + /* Caller must hold efx->filter_sem for read if race against 3318 + * efx_ef10_filter_table_remove() is possible 3319 + */ 3317 3320 static void efx_ef10_filter_table_restore(struct efx_nic *efx) 3318 3321 { 3319 3322 struct efx_ef10_filter_table *table = efx->filter_state; ··· 3326 3323 bool failed = false; 3327 3324 int rc; 3328 3325 3326 + WARN_ON(!rwsem_is_locked(&efx->filter_sem)); 3327 + 3329 3328 if (!nic_data->must_restore_filters) 3329 + return; 3330 + 3331 + if (!table) 3330 3332 return; 3331 3333 3332 3334 spin_lock_bh(&efx->filter_lock); ··· 3369 3361 nic_data->must_restore_filters = false; 3370 3362 } 3371 3363 3364 + /* Caller must hold efx->filter_sem for write */ 3372 3365 static void efx_ef10_filter_table_remove(struct efx_nic *efx) 3373 3366 { 3374 3367 struct efx_ef10_filter_table *table = efx->filter_state; ··· 3377 3368 struct efx_filter_spec *spec; 3378 3369 unsigned int filter_idx; 3379 3370 int rc; 3371 + 3372 + efx->filter_state = NULL; 3373 + if (!table) 3374 + return; 3380 3375 3381 3376 for (filter_idx = 0; filter_idx < HUNT_FILTER_TBL_ROWS; filter_idx++) { 3382 3377 spec = efx_ef10_filter_entry_spec(table, filter_idx); ··· 3407 3394 kfree(table); 3408 3395 } 3409 3396 3397 + /* Caller must hold efx->filter_sem for read if race against 3398 + * efx_ef10_filter_table_remove() is possible 3399 + */ 3410 3400 static void efx_ef10_filter_sync_rx_mode(struct efx_nic *efx) 3411 3401 { 3412 3402 struct efx_ef10_filter_table *table = efx->filter_state; ··· 3422 3406 int i, n, rc; 3423 3407 3424 3408 if (!efx_dev_registered(efx)) 3409 + return; 3410 + 3411 + if (!table) 3425 3412 return; 3426 3413 3427 3414 /* Mark old filters that may need to be removed */
+29 -10
drivers/net/ethernet/sfc/efx.c
··· 949 949 950 950 static void efx_fini_port(struct efx_nic *efx); 951 951 952 + /* We assume that efx->type->reconfigure_mac will always try to sync RX 953 + * filters and therefore needs to read-lock the filter table against freeing 954 + */ 955 + void efx_mac_reconfigure(struct efx_nic *efx) 956 + { 957 + down_read(&efx->filter_sem); 958 + efx->type->reconfigure_mac(efx); 959 + up_read(&efx->filter_sem); 960 + } 961 + 952 962 /* Push loopback/power/transmit disable settings to the PHY, and reconfigure 953 963 * the MAC appropriately. All other PHY configuration changes are pushed 954 964 * through phy_op->set_settings(), and pushed asynchronously to the MAC ··· 1012 1002 1013 1003 mutex_lock(&efx->mac_lock); 1014 1004 if (efx->port_enabled) 1015 - efx->type->reconfigure_mac(efx); 1005 + efx_mac_reconfigure(efx); 1016 1006 mutex_unlock(&efx->mac_lock); 1017 1007 } 1018 1008 ··· 1052 1042 1053 1043 /* Reconfigure the MAC before creating dma queues (required for 1054 1044 * Falcon/A1 where RX_INGR_EN/TX_DRAIN_EN isn't supported) */ 1055 - efx->type->reconfigure_mac(efx); 1045 + efx_mac_reconfigure(efx); 1056 1046 1057 1047 /* Ensure the PHY advertises the correct flow control settings */ 1058 1048 rc = efx->phy_op->reconfigure(efx); ··· 1078 1068 efx->port_enabled = true; 1079 1069 1080 1070 /* Ensure MAC ingress/egress is enabled */ 1081 - efx->type->reconfigure_mac(efx); 1071 + efx_mac_reconfigure(efx); 1082 1072 1083 1073 mutex_unlock(&efx->mac_lock); 1084 1074 } ··· 1682 1672 int rc; 1683 1673 1684 1674 spin_lock_init(&efx->filter_lock); 1685 - 1675 + init_rwsem(&efx->filter_sem); 1676 + down_write(&efx->filter_sem); 1686 1677 rc = efx->type->filter_table_probe(efx); 1687 1678 if (rc) 1688 - return rc; 1679 + goto out_unlock; 1689 1680 1690 1681 #ifdef CONFIG_RFS_ACCEL 1691 1682 if (efx->type->offload_features & NETIF_F_NTUPLE) { ··· 1695 1684 GFP_KERNEL); 1696 1685 if (!efx->rps_flow_id) { 1697 1686 efx->type->filter_table_remove(efx); 1698 - return -ENOMEM; 1687 + rc = -ENOMEM; 1688 + goto out_unlock; 1699 1689 } 1700 1690 } 1701 1691 #endif 1702 - 1703 - return 0; 1692 + out_unlock: 1693 + up_write(&efx->filter_sem); 1694 + return rc; 1704 1695 } 1705 1696 1706 1697 static void efx_remove_filters(struct efx_nic *efx) ··· 1710 1697 #ifdef CONFIG_RFS_ACCEL 1711 1698 kfree(efx->rps_flow_id); 1712 1699 #endif 1700 + down_write(&efx->filter_sem); 1713 1701 efx->type->filter_table_remove(efx); 1702 + up_write(&efx->filter_sem); 1714 1703 } 1715 1704 1716 1705 static void efx_restore_filters(struct efx_nic *efx) 1717 1706 { 1707 + down_read(&efx->filter_sem); 1718 1708 efx->type->filter_table_restore(efx); 1709 + up_read(&efx->filter_sem); 1719 1710 } 1720 1711 1721 1712 /************************************************************************** ··· 2200 2183 2201 2184 mutex_lock(&efx->mac_lock); 2202 2185 net_dev->mtu = new_mtu; 2203 - efx->type->reconfigure_mac(efx); 2186 + efx_mac_reconfigure(efx); 2204 2187 mutex_unlock(&efx->mac_lock); 2205 2188 2206 2189 efx_start_all(efx); ··· 2236 2219 2237 2220 /* Reconfigure the MAC */ 2238 2221 mutex_lock(&efx->mac_lock); 2239 - efx->type->reconfigure_mac(efx); 2222 + efx_mac_reconfigure(efx); 2240 2223 mutex_unlock(&efx->mac_lock); 2241 2224 2242 2225 return 0; ··· 2481 2464 " VFs may not function\n", rc); 2482 2465 #endif 2483 2466 2467 + down_read(&efx->filter_sem); 2484 2468 efx_restore_filters(efx); 2469 + up_read(&efx->filter_sem); 2485 2470 if (efx->type->sriov_reset) 2486 2471 efx->type->sriov_reset(efx); 2487 2472
+2
drivers/net/ethernet/sfc/efx.h
··· 74 74 75 75 /* Filters */ 76 76 77 + void efx_mac_reconfigure(struct efx_nic *efx); 78 + 77 79 /** 78 80 * efx_filter_insert_filter - add or replace a filter 79 81 * @efx: NIC in which to insert the filter
+1 -1
drivers/net/ethernet/sfc/ethtool.c
··· 734 734 /* Reconfigure the MAC. The PHY *may* generate a link state change event 735 735 * if the user just changed the advertised capabilities, but there's no 736 736 * harm doing this twice */ 737 - efx->type->reconfigure_mac(efx); 737 + efx_mac_reconfigure(efx); 738 738 739 739 out: 740 740 mutex_unlock(&efx->mac_lock);
+4 -1
drivers/net/ethernet/sfc/net_driver.h
··· 25 25 #include <linux/highmem.h> 26 26 #include <linux/workqueue.h> 27 27 #include <linux/mutex.h> 28 + #include <linux/rwsem.h> 28 29 #include <linux/vmalloc.h> 29 30 #include <linux/i2c.h> 30 31 #include <linux/mtd/mtd.h> ··· 897 896 * @loopback_mode: Loopback status 898 897 * @loopback_modes: Supported loopback mode bitmask 899 898 * @loopback_selftest: Offline self-test private state 900 - * @filter_lock: Filter table lock 899 + * @filter_sem: Filter table rw_semaphore, for freeing the table 900 + * @filter_lock: Filter table lock, for mere content changes 901 901 * @filter_state: Architecture-dependent filter table state 902 902 * @rps_flow_id: Flow IDs of filters allocated for accelerated RFS, 903 903 * indexed by filter ID ··· 1040 1038 1041 1039 void *loopback_selftest; 1042 1040 1041 + struct rw_semaphore filter_sem; 1043 1042 spinlock_t filter_lock; 1044 1043 void *filter_state; 1045 1044 #ifdef CONFIG_RFS_ACCEL