Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/vmwgfx: Replace the hw mutex with a hw spinlock

Fixes a case where we call vmw_fifo_idle() from within a wait function with
task state !TASK_RUNNING, which is illegal.

In addition, make the locking fine-grained, so that it is performed once
for every read- and write operation. This is of course more costly, but we
don't perform much register access in the timing critical paths anyway. Instead
we have the extra benefit of being sure that we don't forget the hw lock around
register accesses. I think currently the kms code was quite buggy w r t this.

This fixes Red Hat Bugzilla Bug 1180796

Cc: stable@vger.kernel.org
Signed-off-by: Thomas Hellstrom <thellstrom@vmware.com>
Reviewed-by: Jakob Bornecrantz <jakob@vmware.com>

+56 -86
+5 -23
drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
··· 406 406 if (unlikely(ret != 0)) 407 407 --dev_priv->num_3d_resources; 408 408 } else if (unhide_svga) { 409 - mutex_lock(&dev_priv->hw_mutex); 410 409 vmw_write(dev_priv, SVGA_REG_ENABLE, 411 410 vmw_read(dev_priv, SVGA_REG_ENABLE) & 412 411 ~SVGA_REG_ENABLE_HIDE); 413 - mutex_unlock(&dev_priv->hw_mutex); 414 412 } 415 413 416 414 mutex_unlock(&dev_priv->release_mutex); ··· 431 433 mutex_lock(&dev_priv->release_mutex); 432 434 if (unlikely(--dev_priv->num_3d_resources == 0)) 433 435 vmw_release_device(dev_priv); 434 - else if (hide_svga) { 435 - mutex_lock(&dev_priv->hw_mutex); 436 + else if (hide_svga) 436 437 vmw_write(dev_priv, SVGA_REG_ENABLE, 437 438 vmw_read(dev_priv, SVGA_REG_ENABLE) | 438 439 SVGA_REG_ENABLE_HIDE); 439 - mutex_unlock(&dev_priv->hw_mutex); 440 - } 441 440 442 441 n3d = (int32_t) dev_priv->num_3d_resources; 443 442 mutex_unlock(&dev_priv->release_mutex); ··· 595 600 dev_priv->dev = dev; 596 601 dev_priv->vmw_chipset = chipset; 597 602 dev_priv->last_read_seqno = (uint32_t) -100; 598 - mutex_init(&dev_priv->hw_mutex); 599 603 mutex_init(&dev_priv->cmdbuf_mutex); 600 604 mutex_init(&dev_priv->release_mutex); 601 605 mutex_init(&dev_priv->binding_mutex); 602 606 rwlock_init(&dev_priv->resource_lock); 603 607 ttm_lock_init(&dev_priv->reservation_sem); 608 + spin_lock_init(&dev_priv->hw_lock); 609 + spin_lock_init(&dev_priv->waiter_lock); 610 + spin_lock_init(&dev_priv->cap_lock); 604 611 605 612 for (i = vmw_res_context; i < vmw_res_max; ++i) { 606 613 idr_init(&dev_priv->res_idr[i]); ··· 623 626 624 627 dev_priv->enable_fb = enable_fbdev; 625 628 626 - mutex_lock(&dev_priv->hw_mutex); 627 - 628 629 vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2); 629 630 svga_id = vmw_read(dev_priv, SVGA_REG_ID); 630 631 if (svga_id != SVGA_ID_2) { 631 632 ret = -ENOSYS; 632 633 DRM_ERROR("Unsupported SVGA ID 0x%x\n", svga_id); 633 - mutex_unlock(&dev_priv->hw_mutex); 634 634 goto out_err0; 635 635 } 636 636 ··· 677 683 dev_priv->prim_bb_mem = dev_priv->vram_size; 678 684 679 685 ret = vmw_dma_masks(dev_priv); 680 - if (unlikely(ret != 0)) { 681 - mutex_unlock(&dev_priv->hw_mutex); 686 + if (unlikely(ret != 0)) 682 687 goto out_err0; 683 - } 684 688 685 689 /* 686 690 * Limit back buffer size to VRAM size. Remove this once ··· 686 694 */ 687 695 if (dev_priv->prim_bb_mem > dev_priv->vram_size) 688 696 dev_priv->prim_bb_mem = dev_priv->vram_size; 689 - 690 - mutex_unlock(&dev_priv->hw_mutex); 691 697 692 698 vmw_print_capabilities(dev_priv->capabilities); 693 699 ··· 1150 1160 if (unlikely(ret != 0)) 1151 1161 return ret; 1152 1162 vmw_kms_save_vga(dev_priv); 1153 - mutex_lock(&dev_priv->hw_mutex); 1154 1163 vmw_write(dev_priv, SVGA_REG_TRACES, 0); 1155 - mutex_unlock(&dev_priv->hw_mutex); 1156 1164 } 1157 1165 1158 1166 if (active) { ··· 1184 1196 if (!dev_priv->enable_fb) { 1185 1197 vmw_kms_restore_vga(dev_priv); 1186 1198 vmw_3d_resource_dec(dev_priv, true); 1187 - mutex_lock(&dev_priv->hw_mutex); 1188 1199 vmw_write(dev_priv, SVGA_REG_TRACES, 1); 1189 - mutex_unlock(&dev_priv->hw_mutex); 1190 1200 } 1191 1201 return ret; 1192 1202 } ··· 1219 1233 DRM_ERROR("Unable to clean VRAM on master drop.\n"); 1220 1234 vmw_kms_restore_vga(dev_priv); 1221 1235 vmw_3d_resource_dec(dev_priv, true); 1222 - mutex_lock(&dev_priv->hw_mutex); 1223 1236 vmw_write(dev_priv, SVGA_REG_TRACES, 1); 1224 - mutex_unlock(&dev_priv->hw_mutex); 1225 1237 } 1226 1238 1227 1239 dev_priv->active_master = &dev_priv->fbdev_master; ··· 1351 1367 struct drm_device *dev = pci_get_drvdata(pdev); 1352 1368 struct vmw_private *dev_priv = vmw_priv(dev); 1353 1369 1354 - mutex_lock(&dev_priv->hw_mutex); 1355 1370 vmw_write(dev_priv, SVGA_REG_ID, SVGA_ID_2); 1356 1371 (void) vmw_read(dev_priv, SVGA_REG_ID); 1357 - mutex_unlock(&dev_priv->hw_mutex); 1358 1372 1359 1373 /** 1360 1374 * Reclaim 3d reference held by fbdev and potentially
+21 -4
drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
··· 399 399 uint32_t memory_size; 400 400 bool has_gmr; 401 401 bool has_mob; 402 - struct mutex hw_mutex; 402 + spinlock_t hw_lock; 403 + spinlock_t cap_lock; 403 404 404 405 /* 405 406 * VGA registers. ··· 450 449 atomic_t marker_seq; 451 450 wait_queue_head_t fence_queue; 452 451 wait_queue_head_t fifo_queue; 453 - int fence_queue_waiters; /* Protected by hw_mutex */ 454 - int goal_queue_waiters; /* Protected by hw_mutex */ 452 + spinlock_t waiter_lock; 453 + int fence_queue_waiters; /* Protected by waiter_lock */ 454 + int goal_queue_waiters; /* Protected by waiter_lock */ 455 455 atomic_t fifo_queue_waiters; 456 456 uint32_t last_read_seqno; 457 457 spinlock_t irq_lock; ··· 555 553 return (struct vmw_master *) master->driver_priv; 556 554 } 557 555 556 + /* 557 + * The locking here is fine-grained, so that it is performed once 558 + * for every read- and write operation. This is of course costly, but we 559 + * don't perform much register access in the timing critical paths anyway. 560 + * Instead we have the extra benefit of being sure that we don't forget 561 + * the hw lock around register accesses. 562 + */ 558 563 static inline void vmw_write(struct vmw_private *dev_priv, 559 564 unsigned int offset, uint32_t value) 560 565 { 566 + unsigned long irq_flags; 567 + 568 + spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); 561 569 outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); 562 570 outl(value, dev_priv->io_start + VMWGFX_VALUE_PORT); 571 + spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); 563 572 } 564 573 565 574 static inline uint32_t vmw_read(struct vmw_private *dev_priv, 566 575 unsigned int offset) 567 576 { 568 - uint32_t val; 577 + unsigned long irq_flags; 578 + u32 val; 569 579 580 + spin_lock_irqsave(&dev_priv->hw_lock, irq_flags); 570 581 outl(offset, dev_priv->io_start + VMWGFX_INDEX_PORT); 571 582 val = inl(dev_priv->io_start + VMWGFX_VALUE_PORT); 583 + spin_unlock_irqrestore(&dev_priv->hw_lock, irq_flags); 584 + 572 585 return val; 573 586 } 574 587
+2 -16
drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
··· 35 35 struct vmw_private *dev_priv; 36 36 spinlock_t lock; 37 37 struct list_head fence_list; 38 - struct work_struct work, ping_work; 38 + struct work_struct work; 39 39 u32 user_fence_size; 40 40 u32 fence_size; 41 41 u32 event_fence_action_size; ··· 134 134 return "svga"; 135 135 } 136 136 137 - static void vmw_fence_ping_func(struct work_struct *work) 138 - { 139 - struct vmw_fence_manager *fman = 140 - container_of(work, struct vmw_fence_manager, ping_work); 141 - 142 - vmw_fifo_ping_host(fman->dev_priv, SVGA_SYNC_GENERIC); 143 - } 144 - 145 137 static bool vmw_fence_enable_signaling(struct fence *f) 146 138 { 147 139 struct vmw_fence_obj *fence = ··· 147 155 if (seqno - fence->base.seqno < VMW_FENCE_WRAP) 148 156 return false; 149 157 150 - if (mutex_trylock(&dev_priv->hw_mutex)) { 151 - vmw_fifo_ping_host_locked(dev_priv, SVGA_SYNC_GENERIC); 152 - mutex_unlock(&dev_priv->hw_mutex); 153 - } else 154 - schedule_work(&fman->ping_work); 158 + vmw_fifo_ping_host(dev_priv, SVGA_SYNC_GENERIC); 155 159 156 160 return true; 157 161 } ··· 293 305 INIT_LIST_HEAD(&fman->fence_list); 294 306 INIT_LIST_HEAD(&fman->cleanup_list); 295 307 INIT_WORK(&fman->work, &vmw_fence_work_func); 296 - INIT_WORK(&fman->ping_work, &vmw_fence_ping_func); 297 308 fman->fifo_down = true; 298 309 fman->user_fence_size = ttm_round_pot(sizeof(struct vmw_user_fence)); 299 310 fman->fence_size = ttm_round_pot(sizeof(struct vmw_fence_obj)); ··· 310 323 bool lists_empty; 311 324 312 325 (void) cancel_work_sync(&fman->work); 313 - (void) cancel_work_sync(&fman->ping_work); 314 326 315 327 spin_lock_irqsave(&fman->lock, irq_flags); 316 328 lists_empty = list_empty(&fman->fence_list) &&
+15 -21
drivers/gpu/drm/vmwgfx/vmwgfx_fifo.c
··· 44 44 if (!dev_priv->has_mob) 45 45 return false; 46 46 47 - mutex_lock(&dev_priv->hw_mutex); 47 + spin_lock(&dev_priv->cap_lock); 48 48 vmw_write(dev_priv, SVGA_REG_DEV_CAP, SVGA3D_DEVCAP_3D); 49 49 result = vmw_read(dev_priv, SVGA_REG_DEV_CAP); 50 - mutex_unlock(&dev_priv->hw_mutex); 50 + spin_unlock(&dev_priv->cap_lock); 51 51 52 52 return (result != 0); 53 53 } ··· 120 120 DRM_INFO("height %d\n", vmw_read(dev_priv, SVGA_REG_HEIGHT)); 121 121 DRM_INFO("bpp %d\n", vmw_read(dev_priv, SVGA_REG_BITS_PER_PIXEL)); 122 122 123 - mutex_lock(&dev_priv->hw_mutex); 124 123 dev_priv->enable_state = vmw_read(dev_priv, SVGA_REG_ENABLE); 125 124 dev_priv->config_done_state = vmw_read(dev_priv, SVGA_REG_CONFIG_DONE); 126 125 dev_priv->traces_state = vmw_read(dev_priv, SVGA_REG_TRACES); ··· 142 143 mb(); 143 144 144 145 vmw_write(dev_priv, SVGA_REG_CONFIG_DONE, 1); 145 - mutex_unlock(&dev_priv->hw_mutex); 146 146 147 147 max = ioread32(fifo_mem + SVGA_FIFO_MAX); 148 148 min = ioread32(fifo_mem + SVGA_FIFO_MIN); ··· 158 160 return vmw_fifo_send_fence(dev_priv, &dummy); 159 161 } 160 162 161 - void vmw_fifo_ping_host_locked(struct vmw_private *dev_priv, uint32_t reason) 163 + void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason) 162 164 { 163 165 __le32 __iomem *fifo_mem = dev_priv->mmio_virt; 166 + static DEFINE_SPINLOCK(ping_lock); 167 + unsigned long irq_flags; 164 168 169 + /* 170 + * The ping_lock is needed because we don't have an atomic 171 + * test-and-set of the SVGA_FIFO_BUSY register. 172 + */ 173 + spin_lock_irqsave(&ping_lock, irq_flags); 165 174 if (unlikely(ioread32(fifo_mem + SVGA_FIFO_BUSY) == 0)) { 166 175 iowrite32(1, fifo_mem + SVGA_FIFO_BUSY); 167 176 vmw_write(dev_priv, SVGA_REG_SYNC, reason); 168 177 } 169 - } 170 - 171 - void vmw_fifo_ping_host(struct vmw_private *dev_priv, uint32_t reason) 172 - { 173 - mutex_lock(&dev_priv->hw_mutex); 174 - 175 - vmw_fifo_ping_host_locked(dev_priv, reason); 176 - 177 - mutex_unlock(&dev_priv->hw_mutex); 178 + spin_unlock_irqrestore(&ping_lock, irq_flags); 178 179 } 179 180 180 181 void vmw_fifo_release(struct vmw_private *dev_priv, struct vmw_fifo_state *fifo) 181 182 { 182 183 __le32 __iomem *fifo_mem = dev_priv->mmio_virt; 183 - 184 - mutex_lock(&dev_priv->hw_mutex); 185 184 186 185 vmw_write(dev_priv, SVGA_REG_SYNC, SVGA_SYNC_GENERIC); 187 186 while (vmw_read(dev_priv, SVGA_REG_BUSY) != 0) ··· 193 198 vmw_write(dev_priv, SVGA_REG_TRACES, 194 199 dev_priv->traces_state); 195 200 196 - mutex_unlock(&dev_priv->hw_mutex); 197 201 vmw_marker_queue_takedown(&fifo->marker_queue); 198 202 199 203 if (likely(fifo->static_buffer != NULL)) { ··· 265 271 return vmw_fifo_wait_noirq(dev_priv, bytes, 266 272 interruptible, timeout); 267 273 268 - mutex_lock(&dev_priv->hw_mutex); 274 + spin_lock(&dev_priv->waiter_lock); 269 275 if (atomic_add_return(1, &dev_priv->fifo_queue_waiters) > 0) { 270 276 spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); 271 277 outl(SVGA_IRQFLAG_FIFO_PROGRESS, ··· 274 280 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 275 281 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 276 282 } 277 - mutex_unlock(&dev_priv->hw_mutex); 283 + spin_unlock(&dev_priv->waiter_lock); 278 284 279 285 if (interruptible) 280 286 ret = wait_event_interruptible_timeout ··· 290 296 else if (likely(ret > 0)) 291 297 ret = 0; 292 298 293 - mutex_lock(&dev_priv->hw_mutex); 299 + spin_lock(&dev_priv->waiter_lock); 294 300 if (atomic_dec_and_test(&dev_priv->fifo_queue_waiters)) { 295 301 spin_lock_irqsave(&dev_priv->irq_lock, irq_flags); 296 302 dev_priv->irq_mask &= ~SVGA_IRQFLAG_FIFO_PROGRESS; 297 303 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 298 304 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 299 305 } 300 - mutex_unlock(&dev_priv->hw_mutex); 306 + spin_unlock(&dev_priv->waiter_lock); 301 307 302 308 return ret; 303 309 }
+4 -4
drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
··· 135 135 (pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32); 136 136 compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS; 137 137 138 - mutex_lock(&dev_priv->hw_mutex); 138 + spin_lock(&dev_priv->cap_lock); 139 139 for (i = 0; i < max_size; ++i) { 140 140 vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); 141 141 compat_cap->pairs[i][0] = i; 142 142 compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP); 143 143 } 144 - mutex_unlock(&dev_priv->hw_mutex); 144 + spin_unlock(&dev_priv->cap_lock); 145 145 146 146 return 0; 147 147 } ··· 191 191 if (num > SVGA3D_DEVCAP_MAX) 192 192 num = SVGA3D_DEVCAP_MAX; 193 193 194 - mutex_lock(&dev_priv->hw_mutex); 194 + spin_lock(&dev_priv->cap_lock); 195 195 for (i = 0; i < num; ++i) { 196 196 vmw_write(dev_priv, SVGA_REG_DEV_CAP, i); 197 197 *bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP); 198 198 } 199 - mutex_unlock(&dev_priv->hw_mutex); 199 + spin_unlock(&dev_priv->cap_lock); 200 200 } else if (gb_objects) { 201 201 ret = vmw_fill_compat_cap(dev_priv, bounce, size); 202 202 if (unlikely(ret != 0))
+9 -16
drivers/gpu/drm/vmwgfx/vmwgfx_irq.c
··· 62 62 63 63 static bool vmw_fifo_idle(struct vmw_private *dev_priv, uint32_t seqno) 64 64 { 65 - uint32_t busy; 66 65 67 - mutex_lock(&dev_priv->hw_mutex); 68 - busy = vmw_read(dev_priv, SVGA_REG_BUSY); 69 - mutex_unlock(&dev_priv->hw_mutex); 70 - 71 - return (busy == 0); 66 + return (vmw_read(dev_priv, SVGA_REG_BUSY) == 0); 72 67 } 73 68 74 69 void vmw_update_seqno(struct vmw_private *dev_priv, ··· 179 184 180 185 void vmw_seqno_waiter_add(struct vmw_private *dev_priv) 181 186 { 182 - mutex_lock(&dev_priv->hw_mutex); 187 + spin_lock(&dev_priv->waiter_lock); 183 188 if (dev_priv->fence_queue_waiters++ == 0) { 184 189 unsigned long irq_flags; 185 190 ··· 190 195 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 191 196 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 192 197 } 193 - mutex_unlock(&dev_priv->hw_mutex); 198 + spin_unlock(&dev_priv->waiter_lock); 194 199 } 195 200 196 201 void vmw_seqno_waiter_remove(struct vmw_private *dev_priv) 197 202 { 198 - mutex_lock(&dev_priv->hw_mutex); 203 + spin_lock(&dev_priv->waiter_lock); 199 204 if (--dev_priv->fence_queue_waiters == 0) { 200 205 unsigned long irq_flags; 201 206 ··· 204 209 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 205 210 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 206 211 } 207 - mutex_unlock(&dev_priv->hw_mutex); 212 + spin_unlock(&dev_priv->waiter_lock); 208 213 } 209 214 210 215 211 216 void vmw_goal_waiter_add(struct vmw_private *dev_priv) 212 217 { 213 - mutex_lock(&dev_priv->hw_mutex); 218 + spin_lock(&dev_priv->waiter_lock); 214 219 if (dev_priv->goal_queue_waiters++ == 0) { 215 220 unsigned long irq_flags; 216 221 ··· 221 226 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 222 227 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 223 228 } 224 - mutex_unlock(&dev_priv->hw_mutex); 229 + spin_unlock(&dev_priv->waiter_lock); 225 230 } 226 231 227 232 void vmw_goal_waiter_remove(struct vmw_private *dev_priv) 228 233 { 229 - mutex_lock(&dev_priv->hw_mutex); 234 + spin_lock(&dev_priv->waiter_lock); 230 235 if (--dev_priv->goal_queue_waiters == 0) { 231 236 unsigned long irq_flags; 232 237 ··· 235 240 vmw_write(dev_priv, SVGA_REG_IRQMASK, dev_priv->irq_mask); 236 241 spin_unlock_irqrestore(&dev_priv->irq_lock, irq_flags); 237 242 } 238 - mutex_unlock(&dev_priv->hw_mutex); 243 + spin_unlock(&dev_priv->waiter_lock); 239 244 } 240 245 241 246 int vmw_wait_seqno(struct vmw_private *dev_priv, ··· 310 315 if (!(dev_priv->capabilities & SVGA_CAP_IRQMASK)) 311 316 return; 312 317 313 - mutex_lock(&dev_priv->hw_mutex); 314 318 vmw_write(dev_priv, SVGA_REG_IRQMASK, 0); 315 - mutex_unlock(&dev_priv->hw_mutex); 316 319 317 320 status = inl(dev_priv->io_start + VMWGFX_IRQSTATUS_PORT); 318 321 outl(status, dev_priv->io_start + VMWGFX_IRQSTATUS_PORT);
-2
drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
··· 1828 1828 struct vmw_private *dev_priv = vmw_priv(dev); 1829 1829 struct vmw_display_unit *du = vmw_connector_to_du(connector); 1830 1830 1831 - mutex_lock(&dev_priv->hw_mutex); 1832 1831 num_displays = vmw_read(dev_priv, SVGA_REG_NUM_DISPLAYS); 1833 - mutex_unlock(&dev_priv->hw_mutex); 1834 1832 1835 1833 return ((vmw_connector_to_du(connector)->unit < num_displays && 1836 1834 du->pref_active) ?