Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/nve0: use async copy engine for ttm buffer moves if available

Kepler PFIFO lost the ability to address multiple engines from a single
channel, so we need a separate one for the copy engine.

v2: Marcin Slusarz <marcin.slusarz@gmail.com>
- regression fix: restore hw accelerated buffer copies

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

+72 -26
+5
drivers/gpu/drm/nouveau/nouveau_abi16.c
··· 264 264 abi16->handles |= (1 << init->channel); 265 265 266 266 /* create channel object and initialise dma and fence management */ 267 + if (device->card_type >= NV_E0) { 268 + init->fb_ctxdma_handle = NVE0_CHANNEL_IND_ENGINE_GR; 269 + init->tt_ctxdma_handle = 0; 270 + } 271 + 267 272 ret = nouveau_channel_new(drm, cli, NVDRM_DEVICE, NVDRM_CHAN | 268 273 init->channel, init->fb_ctxdma_handle, 269 274 init->tt_ctxdma_handle, &chan->chan);
+25 -6
drivers/gpu/drm/nouveau/nouveau_bo.c
··· 571 571 } 572 572 573 573 static int 574 + nve0_bo_move_init(struct nouveau_channel *chan, u32 handle) 575 + { 576 + int ret = RING_SPACE(chan, 2); 577 + if (ret == 0) { 578 + BEGIN_NVC0(chan, NvSubCopy, 0x0000, 1); 579 + OUT_RING (chan, handle); 580 + FIRE_RING (chan); 581 + } 582 + return ret; 583 + } 584 + 585 + static int 574 586 nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo, 575 587 struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem) 576 588 { ··· 1003 991 } 1004 992 1005 993 void 1006 - nouveau_bo_move_init(struct nouveau_channel *chan) 994 + nouveau_bo_move_init(struct nouveau_drm *drm) 1007 995 { 1008 - struct nouveau_cli *cli = chan->cli; 1009 - struct nouveau_drm *drm = chan->drm; 1010 996 static const struct { 1011 997 const char *name; 1012 998 int engine; ··· 1014 1004 struct ttm_mem_reg *, struct ttm_mem_reg *); 1015 1005 int (*init)(struct nouveau_channel *, u32 handle); 1016 1006 } _methods[] = { 1017 - { "COPY", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init }, 1007 + { "COPY", 0, 0xa0b5, nve0_bo_move_copy, nve0_bo_move_init }, 1008 + { "GRCE", 0, 0xa0b5, nve0_bo_move_copy, nvc0_bo_move_init }, 1018 1009 { "COPY1", 5, 0x90b8, nvc0_bo_move_copy, nvc0_bo_move_init }, 1019 1010 { "COPY0", 4, 0x90b5, nvc0_bo_move_copy, nvc0_bo_move_init }, 1020 1011 { "COPY", 0, 0x85b5, nva3_bo_move_copy, nv50_bo_move_init }, ··· 1031 1020 1032 1021 do { 1033 1022 struct nouveau_object *object; 1023 + struct nouveau_channel *chan; 1034 1024 u32 handle = (mthd->engine << 16) | mthd->oclass; 1035 1025 1036 - ret = nouveau_object_new(nv_object(cli), chan->handle, handle, 1026 + if (mthd->init == nve0_bo_move_init) 1027 + chan = drm->cechan; 1028 + else 1029 + chan = drm->channel; 1030 + if (chan == NULL) 1031 + continue; 1032 + 1033 + ret = nouveau_object_new(nv_object(drm), chan->handle, handle, 1037 1034 mthd->oclass, NULL, 0, &object); 1038 1035 if (ret == 0) { 1039 1036 ret = mthd->init(chan, handle); 1040 1037 if (ret) { 1041 - nouveau_object_del(nv_object(cli), 1038 + nouveau_object_del(nv_object(drm), 1042 1039 chan->handle, handle); 1043 1040 continue; 1044 1041 }
+1 -1
drivers/gpu/drm/nouveau/nouveau_bo.h
··· 61 61 62 62 extern struct ttm_bo_driver nouveau_bo_driver; 63 63 64 - void nouveau_bo_move_init(struct nouveau_channel *); 64 + void nouveau_bo_move_init(struct nouveau_drm *); 65 65 int nouveau_bo_new(struct drm_device *, int size, int align, u32 flags, 66 66 u32 tile_mode, u32 tile_flags, struct sg_table *sg, 67 67 struct nouveau_bo **);
+19 -16
drivers/gpu/drm/nouveau/nouveau_chan.c
··· 184 184 185 185 int 186 186 nouveau_channel_ind(struct nouveau_drm *drm, struct nouveau_cli *cli, 187 - u32 parent, u32 handle, struct nouveau_channel **pchan) 187 + u32 parent, u32 handle, u32 engine, 188 + struct nouveau_channel **pchan) 188 189 { 189 190 static const u16 oclasses[] = { 0xa06f, 0x906f, 0x826f, 0x506f, 0 }; 190 191 const u16 *oclass = oclasses; ··· 203 202 args.pushbuf = chan->push.handle; 204 203 args.ioffset = 0x10000 + chan->push.vma.offset; 205 204 args.ilength = 0x02000; 206 - args.engine = NVE0_CHANNEL_IND_ENGINE_GR; 205 + args.engine = engine; 207 206 208 207 do { 209 208 ret = nouveau_object_new(nv_object(cli), parent, handle, ··· 262 261 struct nv_dma_class args; 263 262 int ret, i; 264 263 265 - chan->vram = vram; 266 - chan->gart = gart; 267 - 268 264 /* allocate dma objects to cover all allowed vram, and gart */ 269 265 if (device->card_type < NV_C0) { 270 266 if (device->card_type >= NV_50) { ··· 299 301 0x003d, &args, sizeof(args), &object); 300 302 if (ret) 301 303 return ret; 304 + 305 + chan->vram = vram; 306 + chan->gart = gart; 302 307 } 303 308 304 309 /* initialise dma tracking parameters */ ··· 337 336 /* allocate software object class (used for fences on <= nv05, and 338 337 * to signal flip completion), bind it to a subchannel. 339 338 */ 340 - ret = nouveau_object_new(nv_object(client), chan->handle, 341 - NvSw, nouveau_abi16_swclass(chan->drm), 342 - NULL, 0, &object); 343 - if (ret) 344 - return ret; 339 + if (chan != chan->drm->cechan) { 340 + ret = nouveau_object_new(nv_object(client), chan->handle, 341 + NvSw, nouveau_abi16_swclass(chan->drm), 342 + NULL, 0, &object); 343 + if (ret) 344 + return ret; 345 345 346 - swch = (void *)object->parent; 347 - swch->flip = nouveau_flip_complete; 348 - swch->flip_data = chan; 346 + swch = (void *)object->parent; 347 + swch->flip = nouveau_flip_complete; 348 + swch->flip_data = chan; 349 + } 349 350 350 351 if (device->card_type < NV_C0) { 351 352 ret = RING_SPACE(chan, 2); ··· 365 362 366 363 int 367 364 nouveau_channel_new(struct nouveau_drm *drm, struct nouveau_cli *cli, 368 - u32 parent, u32 handle, u32 vram, u32 gart, 365 + u32 parent, u32 handle, u32 arg0, u32 arg1, 369 366 struct nouveau_channel **pchan) 370 367 { 371 368 int ret; 372 369 373 - ret = nouveau_channel_ind(drm, cli, parent, handle, pchan); 370 + ret = nouveau_channel_ind(drm, cli, parent, handle, arg0, pchan); 374 371 if (ret) { 375 372 NV_DEBUG(drm, "ib channel create, %d\n", ret); 376 373 ret = nouveau_channel_dma(drm, cli, parent, handle, pchan); ··· 380 377 } 381 378 } 382 379 383 - ret = nouveau_channel_init(*pchan, vram, gart); 380 + ret = nouveau_channel_init(*pchan, arg0, arg1); 384 381 if (ret) { 385 382 NV_ERROR(drm, "channel failed to initialise, %d\n", ret); 386 383 nouveau_channel_del(pchan);
+1 -1
drivers/gpu/drm/nouveau/nouveau_chan.h
··· 39 39 40 40 41 41 int nouveau_channel_new(struct nouveau_drm *, struct nouveau_cli *, 42 - u32 parent, u32 handle, u32 vram, u32 gart, 42 + u32 parent, u32 handle, u32 arg0, u32 arg1, 43 43 struct nouveau_channel **); 44 44 void nouveau_channel_del(struct nouveau_channel **); 45 45 int nouveau_channel_idle(struct nouveau_channel *);
+20 -2
drivers/gpu/drm/nouveau/nouveau_drm.c
··· 109 109 { 110 110 nouveau_gpuobj_ref(NULL, &drm->notify); 111 111 nouveau_channel_del(&drm->channel); 112 + nouveau_channel_del(&drm->cechan); 112 113 if (drm->fence) 113 114 nouveau_fence(drm)->dtor(drm); 114 115 } ··· 119 118 { 120 119 struct nouveau_device *device = nv_device(drm->device); 121 120 struct nouveau_object *object; 121 + u32 arg0, arg1; 122 122 int ret; 123 123 124 124 if (nouveau_noaccel) ··· 136 134 return; 137 135 } 138 136 137 + if (device->card_type >= NV_E0) { 138 + ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE, 139 + NVDRM_CHAN + 1, 140 + NVE0_CHANNEL_IND_ENGINE_CE0 | 141 + NVE0_CHANNEL_IND_ENGINE_CE1, 0, 142 + &drm->cechan); 143 + if (ret) 144 + NV_ERROR(drm, "failed to create ce channel, %d\n", ret); 145 + 146 + arg0 = NVE0_CHANNEL_IND_ENGINE_GR; 147 + arg1 = 0; 148 + } else { 149 + arg0 = NvDmaFB; 150 + arg1 = NvDmaTT; 151 + } 152 + 139 153 ret = nouveau_channel_new(drm, &drm->client, NVDRM_DEVICE, NVDRM_CHAN, 140 - NvDmaFB, NvDmaTT, &drm->channel); 154 + arg0, arg1, &drm->channel); 141 155 if (ret) { 142 156 NV_ERROR(drm, "failed to create kernel channel, %d\n", ret); 143 157 nouveau_accel_fini(drm); ··· 185 167 } 186 168 187 169 188 - nouveau_bo_move_init(drm->channel); 170 + nouveau_bo_move_init(drm); 189 171 } 190 172 191 173 static int __devinit
+1
drivers/gpu/drm/nouveau/nouveau_drm.h
··· 97 97 void *fence; 98 98 99 99 /* context for accelerated drm-internal operations */ 100 + struct nouveau_channel *cechan; 100 101 struct nouveau_channel *channel; 101 102 struct nouveau_gpuobj *notify; 102 103 struct nouveau_fbdev *fbcon;