Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

vdpa: mlx5: synchronize driver status with CVQ

Currently, CVQ doesn't have any synchronization with the driver
status. Then CVQ emulation code run in the middle of:

1) device reset
2) device status changed
3) map updating

The will lead several unexpected issue like trying to execute CVQ
command after the driver has been teared down.

Fixing this by using reslock to synchronize CVQ emulation code with
the driver status changing:

- protect the whole device reset, status changing and set_map()
updating with reslock
- protect the CVQ handler with the reslock and check
VIRTIO_CONFIG_S_DRIVER_OK in the CVQ handler

This will guarantee that:

1) CVQ handler won't work if VIRTIO_CONFIG_S_DRIVER_OK is not set
2) CVQ handler will see a consistent state of the driver instead of
the partial one when it is running in the middle of the
teardown_driver() or setup_driver().

Cc: 5262912ef3cfc ("vdpa/mlx5: Add support for control VQ and MAC setting")
Signed-off-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/r/20220329042109.4029-2-jasowang@redhat.com
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Eli Cohen <elic@nvidia.com>

authored by

Jason Wang and committed by
Michael S. Tsirkin
1c80cf03 55ebf0d6

+37 -14
+37 -14
drivers/vdpa/mlx5/net/mlx5_vnet.c
··· 1659 1659 mvdev = wqent->mvdev; 1660 1660 ndev = to_mlx5_vdpa_ndev(mvdev); 1661 1661 cvq = &mvdev->cvq; 1662 + 1663 + mutex_lock(&ndev->reslock); 1664 + 1665 + if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 1666 + goto out; 1667 + 1662 1668 if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))) 1663 - return; 1669 + goto out; 1664 1670 1665 1671 if (!cvq->ready) 1666 - return; 1672 + goto out; 1667 1673 1668 1674 while (true) { 1669 1675 err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head, ··· 1707 1701 queue_work(mvdev->wq, &wqent->work); 1708 1702 break; 1709 1703 } 1704 + 1705 + out: 1706 + mutex_unlock(&ndev->reslock); 1710 1707 } 1711 1708 1712 1709 static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx) ··· 2184 2175 goto err_mr; 2185 2176 2186 2177 if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK)) 2187 - return 0; 2178 + goto err_mr; 2188 2179 2189 2180 restore_channels_info(ndev); 2190 2181 err = setup_driver(mvdev); ··· 2199 2190 return err; 2200 2191 } 2201 2192 2193 + /* reslock must be held for this function */ 2202 2194 static int setup_driver(struct mlx5_vdpa_dev *mvdev) 2203 2195 { 2204 2196 struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2205 2197 int err; 2206 2198 2207 - mutex_lock(&ndev->reslock); 2199 + WARN_ON(!mutex_is_locked(&ndev->reslock)); 2200 + 2208 2201 if (ndev->setup) { 2209 2202 mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n"); 2210 2203 err = 0; ··· 2236 2225 goto err_fwd; 2237 2226 } 2238 2227 ndev->setup = true; 2239 - mutex_unlock(&ndev->reslock); 2240 2228 2241 2229 return 0; 2242 2230 ··· 2246 2236 err_rqt: 2247 2237 teardown_virtqueues(ndev); 2248 2238 out: 2249 - mutex_unlock(&ndev->reslock); 2250 2239 return err; 2251 2240 } 2252 2241 2242 + /* reslock must be held for this function */ 2253 2243 static void teardown_driver(struct mlx5_vdpa_net *ndev) 2254 2244 { 2255 - mutex_lock(&ndev->reslock); 2245 + 2246 + WARN_ON(!mutex_is_locked(&ndev->reslock)); 2247 + 2256 2248 if (!ndev->setup) 2257 - goto out; 2249 + return; 2258 2250 2259 2251 remove_fwd_to_tir(ndev); 2260 2252 destroy_tir(ndev); 2261 2253 destroy_rqt(ndev); 2262 2254 teardown_virtqueues(ndev); 2263 2255 ndev->setup = false; 2264 - out: 2265 - mutex_unlock(&ndev->reslock); 2266 2256 } 2267 2257 2268 2258 static void clear_vqs_ready(struct mlx5_vdpa_net *ndev) ··· 2283 2273 2284 2274 print_status(mvdev, status, true); 2285 2275 2276 + mutex_lock(&ndev->reslock); 2277 + 2286 2278 if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) { 2287 2279 if (status & VIRTIO_CONFIG_S_DRIVER_OK) { 2288 2280 err = setup_driver(mvdev); ··· 2294 2282 } 2295 2283 } else { 2296 2284 mlx5_vdpa_warn(mvdev, "did not expect DRIVER_OK to be cleared\n"); 2297 - return; 2285 + goto err_clear; 2298 2286 } 2299 2287 } 2300 2288 2301 2289 ndev->mvdev.status = status; 2290 + mutex_unlock(&ndev->reslock); 2302 2291 return; 2303 2292 2304 2293 err_setup: 2305 2294 mlx5_vdpa_destroy_mr(&ndev->mvdev); 2306 2295 ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED; 2296 + err_clear: 2297 + mutex_unlock(&ndev->reslock); 2307 2298 } 2308 2299 2309 2300 static int mlx5_vdpa_reset(struct vdpa_device *vdev) ··· 2316 2301 2317 2302 print_status(mvdev, 0, true); 2318 2303 mlx5_vdpa_info(mvdev, "performing device reset\n"); 2304 + 2305 + mutex_lock(&ndev->reslock); 2319 2306 teardown_driver(ndev); 2320 2307 clear_vqs_ready(ndev); 2321 2308 mlx5_vdpa_destroy_mr(&ndev->mvdev); ··· 2330 2313 if (mlx5_vdpa_create_mr(mvdev, NULL)) 2331 2314 mlx5_vdpa_warn(mvdev, "create MR failed\n"); 2332 2315 } 2316 + mutex_unlock(&ndev->reslock); 2333 2317 2334 2318 return 0; 2335 2319 } ··· 2366 2348 static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb) 2367 2349 { 2368 2350 struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev); 2351 + struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev); 2369 2352 bool change_map; 2370 2353 int err; 2354 + 2355 + mutex_lock(&ndev->reslock); 2371 2356 2372 2357 err = mlx5_vdpa_handle_set_map(mvdev, iotlb, &change_map); 2373 2358 if (err) { 2374 2359 mlx5_vdpa_warn(mvdev, "set map failed(%d)\n", err); 2375 - return err; 2360 + goto err; 2376 2361 } 2377 2362 2378 2363 if (change_map) 2379 - return mlx5_vdpa_change_map(mvdev, iotlb); 2364 + err = mlx5_vdpa_change_map(mvdev, iotlb); 2380 2365 2381 - return 0; 2366 + err: 2367 + mutex_unlock(&ndev->reslock); 2368 + return err; 2382 2369 } 2383 2370 2384 2371 static void mlx5_vdpa_free(struct vdpa_device *vdev)