drm/v3d: Introduce a new DRM driver for Broadcom V3D V3.x+

+1

Documentation/gpu/drivers.rst

··· 10 10 tegra 11 11 tinydrm 12 12 tve200 13 + v3d 13 14 vc4 14 15 bridge/dw-hdmi 15 16 xen-front

+8

MAINTAINERS

··· 4795 4795 F: drivers/gpu/drm/omapdrm/ 4796 4796 F: Documentation/devicetree/bindings/display/ti/ 4797 4797 4798 + DRM DRIVERS FOR V3D 4799 + M: Eric Anholt <eric@anholt.net> 4800 + S: Supported 4801 + F: drivers/gpu/drm/v3d/ 4802 + F: include/uapi/drm/v3d_drm.h 4803 + F: Documentation/devicetree/bindings/display/brcm,bcm-v3d.txt 4804 + T: git git://anongit.freedesktop.org/drm/drm-misc 4805 + 4798 4806 DRM DRIVERS FOR VC4 4799 4807 M: Eric Anholt <eric@anholt.net> 4800 4808 T: git git://github.com/anholt/linux

+2

drivers/gpu/drm/Kconfig

··· 267 267 268 268 source "drivers/gpu/drm/imx/Kconfig" 269 269 270 + source "drivers/gpu/drm/v3d/Kconfig" 271 + 270 272 source "drivers/gpu/drm/vc4/Kconfig" 271 273 272 274 source "drivers/gpu/drm/etnaviv/Kconfig"

+1

drivers/gpu/drm/Makefile

··· 61 61 obj-$(CONFIG_DRM_I810) += i810/ 62 62 obj-$(CONFIG_DRM_I915) += i915/ 63 63 obj-$(CONFIG_DRM_MGAG200) += mgag200/ 64 + obj-$(CONFIG_DRM_V3D) += v3d/ 64 65 obj-$(CONFIG_DRM_VC4) += vc4/ 65 66 obj-$(CONFIG_DRM_CIRRUS_QEMU) += cirrus/ 66 67 obj-$(CONFIG_DRM_SIS) += sis/

+9

drivers/gpu/drm/v3d/Kconfig

··· 1 + config DRM_V3D 2 + tristate "Broadcom V3D 3.x and newer" 3 + depends on ARCH_BCM || ARCH_BCMSTB || COMPILE_TEST 4 + depends on DRM 5 + depends on COMMON_CLK 6 + select DRM_SCHED 7 + help 8 + Choose this option if you have a system that has a Broadcom 9 + V3D 3.x or newer GPU, such as BCM7268.

+18

drivers/gpu/drm/v3d/Makefile

··· 1 + # Please keep these build lists sorted! 2 + 3 + # core driver code 4 + v3d-y := \ 5 + v3d_bo.o \ 6 + v3d_drv.o \ 7 + v3d_fence.o \ 8 + v3d_gem.o \ 9 + v3d_irq.o \ 10 + v3d_mmu.o \ 11 + v3d_trace_points.o \ 12 + v3d_sched.o 13 + 14 + v3d-$(CONFIG_DEBUG_FS) += v3d_debugfs.o 15 + 16 + obj-$(CONFIG_DRM_V3D) += v3d.o 17 + 18 + CFLAGS_v3d_trace_points.o := -I$(src)

+389

drivers/gpu/drm/v3d/v3d_bo.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2015-2018 Broadcom */ 3 + 4 + /** 5 + * DOC: V3D GEM BO management support 6 + * 7 + * Compared to VC4 (V3D 2.x), V3D 3.3 introduces an MMU between the 8 + * GPU and the bus, allowing us to use shmem objects for our storage 9 + * instead of CMA. 10 + * 11 + * Physically contiguous objects may still be imported to V3D, but the 12 + * driver doesn't allocate physically contiguous objects on its own. 13 + * Display engines requiring physically contiguous allocations should 14 + * look into Mesa's "renderonly" support (as used by the Mesa pl111 15 + * driver) for an example of how to integrate with V3D. 16 + * 17 + * Long term, we should support evicting pages from the MMU when under 18 + * memory pressure (thus the v3d_bo_get_pages() refcounting), but 19 + * that's not a high priority since our systems tend to not have swap. 20 + */ 21 + 22 + #include <linux/dma-buf.h> 23 + #include <linux/pfn_t.h> 24 + 25 + #include "v3d_drv.h" 26 + #include "uapi/drm/v3d_drm.h" 27 + 28 + /* Pins the shmem pages, fills in the .pages and .sgt fields of the BO, and maps 29 + * it for DMA. 30 + */ 31 + static int 32 + v3d_bo_get_pages(struct v3d_bo *bo) 33 + { 34 + struct drm_gem_object *obj = &bo->base; 35 + struct drm_device *dev = obj->dev; 36 + int npages = obj->size >> PAGE_SHIFT; 37 + int ret = 0; 38 + 39 + mutex_lock(&bo->lock); 40 + if (bo->pages_refcount++ != 0) 41 + goto unlock; 42 + 43 + if (!obj->import_attach) { 44 + bo->pages = drm_gem_get_pages(obj); 45 + if (IS_ERR(bo->pages)) { 46 + ret = PTR_ERR(bo->pages); 47 + goto unlock; 48 + } 49 + 50 + bo->sgt = drm_prime_pages_to_sg(bo->pages, npages); 51 + if (IS_ERR(bo->sgt)) { 52 + ret = PTR_ERR(bo->sgt); 53 + goto put_pages; 54 + } 55 + 56 + /* Map the pages for use by the GPU. */ 57 + dma_map_sg(dev->dev, bo->sgt->sgl, 58 + bo->sgt->nents, DMA_BIDIRECTIONAL); 59 + } else { 60 + bo->pages = kcalloc(npages, sizeof(*bo->pages), GFP_KERNEL); 61 + if (!bo->pages) 62 + goto put_pages; 63 + 64 + drm_prime_sg_to_page_addr_arrays(bo->sgt, bo->pages, 65 + NULL, npages); 66 + 67 + /* Note that dma-bufs come in mapped. */ 68 + } 69 + 70 + mutex_unlock(&bo->lock); 71 + 72 + return 0; 73 + 74 + put_pages: 75 + drm_gem_put_pages(obj, bo->pages, true, true); 76 + bo->pages = NULL; 77 + unlock: 78 + bo->pages_refcount--; 79 + mutex_unlock(&bo->lock); 80 + return ret; 81 + } 82 + 83 + static void 84 + v3d_bo_put_pages(struct v3d_bo *bo) 85 + { 86 + struct drm_gem_object *obj = &bo->base; 87 + 88 + mutex_lock(&bo->lock); 89 + if (--bo->pages_refcount == 0) { 90 + if (!obj->import_attach) { 91 + dma_unmap_sg(obj->dev->dev, bo->sgt->sgl, 92 + bo->sgt->nents, DMA_BIDIRECTIONAL); 93 + sg_free_table(bo->sgt); 94 + kfree(bo->sgt); 95 + drm_gem_put_pages(obj, bo->pages, true, true); 96 + } else { 97 + kfree(bo->pages); 98 + } 99 + } 100 + mutex_unlock(&bo->lock); 101 + } 102 + 103 + static struct v3d_bo *v3d_bo_create_struct(struct drm_device *dev, 104 + size_t unaligned_size) 105 + { 106 + struct v3d_dev *v3d = to_v3d_dev(dev); 107 + struct drm_gem_object *obj; 108 + struct v3d_bo *bo; 109 + size_t size = roundup(unaligned_size, PAGE_SIZE); 110 + int ret; 111 + 112 + if (size == 0) 113 + return ERR_PTR(-EINVAL); 114 + 115 + bo = kzalloc(sizeof(*bo), GFP_KERNEL); 116 + if (!bo) 117 + return ERR_PTR(-ENOMEM); 118 + obj = &bo->base; 119 + 120 + INIT_LIST_HEAD(&bo->vmas); 121 + INIT_LIST_HEAD(&bo->unref_head); 122 + mutex_init(&bo->lock); 123 + 124 + ret = drm_gem_object_init(dev, obj, size); 125 + if (ret) 126 + goto free_bo; 127 + 128 + spin_lock(&v3d->mm_lock); 129 + ret = drm_mm_insert_node_generic(&v3d->mm, &bo->node, 130 + obj->size >> PAGE_SHIFT, 131 + GMP_GRANULARITY >> PAGE_SHIFT, 0, 0); 132 + spin_unlock(&v3d->mm_lock); 133 + if (ret) 134 + goto free_obj; 135 + 136 + return bo; 137 + 138 + free_obj: 139 + drm_gem_object_release(obj); 140 + free_bo: 141 + kfree(bo); 142 + return ERR_PTR(ret); 143 + } 144 + 145 + struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, 146 + size_t unaligned_size) 147 + { 148 + struct v3d_dev *v3d = to_v3d_dev(dev); 149 + struct drm_gem_object *obj; 150 + struct v3d_bo *bo; 151 + int ret; 152 + 153 + bo = v3d_bo_create_struct(dev, unaligned_size); 154 + if (IS_ERR(bo)) 155 + return bo; 156 + obj = &bo->base; 157 + 158 + bo->resv = &bo->_resv; 159 + reservation_object_init(bo->resv); 160 + 161 + ret = v3d_bo_get_pages(bo); 162 + if (ret) 163 + goto free_mm; 164 + 165 + v3d_mmu_insert_ptes(bo); 166 + 167 + mutex_lock(&v3d->bo_lock); 168 + v3d->bo_stats.num_allocated++; 169 + v3d->bo_stats.pages_allocated += obj->size >> PAGE_SHIFT; 170 + mutex_unlock(&v3d->bo_lock); 171 + 172 + return bo; 173 + 174 + free_mm: 175 + spin_lock(&v3d->mm_lock); 176 + drm_mm_remove_node(&bo->node); 177 + spin_unlock(&v3d->mm_lock); 178 + 179 + drm_gem_object_release(obj); 180 + kfree(bo); 181 + return ERR_PTR(ret); 182 + } 183 + 184 + /* Called DRM core on the last userspace/kernel unreference of the 185 + * BO. 186 + */ 187 + void v3d_free_object(struct drm_gem_object *obj) 188 + { 189 + struct v3d_dev *v3d = to_v3d_dev(obj->dev); 190 + struct v3d_bo *bo = to_v3d_bo(obj); 191 + 192 + mutex_lock(&v3d->bo_lock); 193 + v3d->bo_stats.num_allocated--; 194 + v3d->bo_stats.pages_allocated -= obj->size >> PAGE_SHIFT; 195 + mutex_unlock(&v3d->bo_lock); 196 + 197 + reservation_object_fini(&bo->_resv); 198 + 199 + v3d_bo_put_pages(bo); 200 + 201 + if (obj->import_attach) 202 + drm_prime_gem_destroy(obj, bo->sgt); 203 + 204 + v3d_mmu_remove_ptes(bo); 205 + spin_lock(&v3d->mm_lock); 206 + drm_mm_remove_node(&bo->node); 207 + spin_unlock(&v3d->mm_lock); 208 + 209 + mutex_destroy(&bo->lock); 210 + 211 + drm_gem_object_release(obj); 212 + kfree(bo); 213 + } 214 + 215 + struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj) 216 + { 217 + struct v3d_bo *bo = to_v3d_bo(obj); 218 + 219 + return bo->resv; 220 + } 221 + 222 + static void 223 + v3d_set_mmap_vma_flags(struct vm_area_struct *vma) 224 + { 225 + vma->vm_flags &= ~VM_PFNMAP; 226 + vma->vm_flags |= VM_MIXEDMAP; 227 + vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags)); 228 + } 229 + 230 + int v3d_gem_fault(struct vm_fault *vmf) 231 + { 232 + struct vm_area_struct *vma = vmf->vma; 233 + struct drm_gem_object *obj = vma->vm_private_data; 234 + struct v3d_bo *bo = to_v3d_bo(obj); 235 + unsigned long pfn; 236 + pgoff_t pgoff; 237 + int ret; 238 + 239 + /* We don't use vmf->pgoff since that has the fake offset: */ 240 + pgoff = (vmf->address - vma->vm_start) >> PAGE_SHIFT; 241 + pfn = page_to_pfn(bo->pages[pgoff]); 242 + 243 + ret = vm_insert_mixed(vma, vmf->address, __pfn_to_pfn_t(pfn, PFN_DEV)); 244 + 245 + switch (ret) { 246 + case -EAGAIN: 247 + case 0: 248 + case -ERESTARTSYS: 249 + case -EINTR: 250 + case -EBUSY: 251 + /* 252 + * EBUSY is ok: this just means that another thread 253 + * already did the job. 254 + */ 255 + return VM_FAULT_NOPAGE; 256 + case -ENOMEM: 257 + return VM_FAULT_OOM; 258 + default: 259 + return VM_FAULT_SIGBUS; 260 + } 261 + } 262 + 263 + int v3d_mmap(struct file *filp, struct vm_area_struct *vma) 264 + { 265 + int ret; 266 + 267 + ret = drm_gem_mmap(filp, vma); 268 + if (ret) 269 + return ret; 270 + 271 + v3d_set_mmap_vma_flags(vma); 272 + 273 + return ret; 274 + } 275 + 276 + int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) 277 + { 278 + int ret; 279 + 280 + ret = drm_gem_mmap_obj(obj, obj->size, vma); 281 + if (ret < 0) 282 + return ret; 283 + 284 + v3d_set_mmap_vma_flags(vma); 285 + 286 + return 0; 287 + } 288 + 289 + struct sg_table * 290 + v3d_prime_get_sg_table(struct drm_gem_object *obj) 291 + { 292 + struct v3d_bo *bo = to_v3d_bo(obj); 293 + int npages = obj->size >> PAGE_SHIFT; 294 + 295 + return drm_prime_pages_to_sg(bo->pages, npages); 296 + } 297 + 298 + struct drm_gem_object * 299 + v3d_prime_import_sg_table(struct drm_device *dev, 300 + struct dma_buf_attachment *attach, 301 + struct sg_table *sgt) 302 + { 303 + struct drm_gem_object *obj; 304 + struct v3d_bo *bo; 305 + 306 + bo = v3d_bo_create_struct(dev, attach->dmabuf->size); 307 + if (IS_ERR(bo)) 308 + return ERR_CAST(bo); 309 + obj = &bo->base; 310 + 311 + bo->resv = attach->dmabuf->resv; 312 + 313 + bo->sgt = sgt; 314 + v3d_bo_get_pages(bo); 315 + 316 + v3d_mmu_insert_ptes(bo); 317 + 318 + return obj; 319 + } 320 + 321 + int v3d_create_bo_ioctl(struct drm_device *dev, void *data, 322 + struct drm_file *file_priv) 323 + { 324 + struct drm_v3d_create_bo *args = data; 325 + struct v3d_bo *bo = NULL; 326 + int ret; 327 + 328 + if (args->flags != 0) { 329 + DRM_INFO("unknown create_bo flags: %d\n", args->flags); 330 + return -EINVAL; 331 + } 332 + 333 + bo = v3d_bo_create(dev, file_priv, PAGE_ALIGN(args->size)); 334 + if (IS_ERR(bo)) 335 + return PTR_ERR(bo); 336 + 337 + args->offset = bo->node.start << PAGE_SHIFT; 338 + 339 + ret = drm_gem_handle_create(file_priv, &bo->base, &args->handle); 340 + drm_gem_object_put_unlocked(&bo->base); 341 + 342 + return ret; 343 + } 344 + 345 + int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, 346 + struct drm_file *file_priv) 347 + { 348 + struct drm_v3d_mmap_bo *args = data; 349 + struct drm_gem_object *gem_obj; 350 + int ret; 351 + 352 + if (args->flags != 0) { 353 + DRM_INFO("unknown mmap_bo flags: %d\n", args->flags); 354 + return -EINVAL; 355 + } 356 + 357 + gem_obj = drm_gem_object_lookup(file_priv, args->handle); 358 + if (!gem_obj) { 359 + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); 360 + return -ENOENT; 361 + } 362 + 363 + ret = drm_gem_create_mmap_offset(gem_obj); 364 + if (ret == 0) 365 + args->offset = drm_vma_node_offset_addr(&gem_obj->vma_node); 366 + drm_gem_object_put_unlocked(gem_obj); 367 + 368 + return ret; 369 + } 370 + 371 + int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, 372 + struct drm_file *file_priv) 373 + { 374 + struct drm_v3d_get_bo_offset *args = data; 375 + struct drm_gem_object *gem_obj; 376 + struct v3d_bo *bo; 377 + 378 + gem_obj = drm_gem_object_lookup(file_priv, args->handle); 379 + if (!gem_obj) { 380 + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); 381 + return -ENOENT; 382 + } 383 + bo = to_v3d_bo(gem_obj); 384 + 385 + args->offset = bo->node.start << PAGE_SHIFT; 386 + 387 + drm_gem_object_put_unlocked(gem_obj); 388 + return 0; 389 + }

+191

drivers/gpu/drm/v3d/v3d_debugfs.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2014-2018 Broadcom */ 3 + 4 + #include <linux/circ_buf.h> 5 + #include <linux/ctype.h> 6 + #include <linux/debugfs.h> 7 + #include <linux/pm_runtime.h> 8 + #include <linux/seq_file.h> 9 + #include <drm/drmP.h> 10 + 11 + #include "v3d_drv.h" 12 + #include "v3d_regs.h" 13 + 14 + #define REGDEF(reg) { reg, #reg } 15 + struct v3d_reg_def { 16 + u32 reg; 17 + const char *name; 18 + }; 19 + 20 + static const struct v3d_reg_def v3d_hub_reg_defs[] = { 21 + REGDEF(V3D_HUB_AXICFG), 22 + REGDEF(V3D_HUB_UIFCFG), 23 + REGDEF(V3D_HUB_IDENT0), 24 + REGDEF(V3D_HUB_IDENT1), 25 + REGDEF(V3D_HUB_IDENT2), 26 + REGDEF(V3D_HUB_IDENT3), 27 + REGDEF(V3D_HUB_INT_STS), 28 + REGDEF(V3D_HUB_INT_MSK_STS), 29 + }; 30 + 31 + static const struct v3d_reg_def v3d_gca_reg_defs[] = { 32 + REGDEF(V3D_GCA_SAFE_SHUTDOWN), 33 + REGDEF(V3D_GCA_SAFE_SHUTDOWN_ACK), 34 + }; 35 + 36 + static const struct v3d_reg_def v3d_core_reg_defs[] = { 37 + REGDEF(V3D_CTL_IDENT0), 38 + REGDEF(V3D_CTL_IDENT1), 39 + REGDEF(V3D_CTL_IDENT2), 40 + REGDEF(V3D_CTL_MISCCFG), 41 + REGDEF(V3D_CTL_INT_STS), 42 + REGDEF(V3D_CTL_INT_MSK_STS), 43 + REGDEF(V3D_CLE_CT0CS), 44 + REGDEF(V3D_CLE_CT0CA), 45 + REGDEF(V3D_CLE_CT0EA), 46 + REGDEF(V3D_CLE_CT1CS), 47 + REGDEF(V3D_CLE_CT1CA), 48 + REGDEF(V3D_CLE_CT1EA), 49 + 50 + REGDEF(V3D_PTB_BPCA), 51 + REGDEF(V3D_PTB_BPCS), 52 + 53 + REGDEF(V3D_MMU_CTL), 54 + REGDEF(V3D_MMU_VIO_ADDR), 55 + 56 + REGDEF(V3D_GMP_STATUS), 57 + REGDEF(V3D_GMP_CFG), 58 + REGDEF(V3D_GMP_VIO_ADDR), 59 + }; 60 + 61 + static int v3d_v3d_debugfs_regs(struct seq_file *m, void *unused) 62 + { 63 + struct drm_info_node *node = (struct drm_info_node *)m->private; 64 + struct drm_device *dev = node->minor->dev; 65 + struct v3d_dev *v3d = to_v3d_dev(dev); 66 + int i, core; 67 + 68 + for (i = 0; i < ARRAY_SIZE(v3d_hub_reg_defs); i++) { 69 + seq_printf(m, "%s (0x%04x): 0x%08x\n", 70 + v3d_hub_reg_defs[i].name, v3d_hub_reg_defs[i].reg, 71 + V3D_READ(v3d_hub_reg_defs[i].reg)); 72 + } 73 + 74 + for (i = 0; i < ARRAY_SIZE(v3d_gca_reg_defs); i++) { 75 + seq_printf(m, "%s (0x%04x): 0x%08x\n", 76 + v3d_gca_reg_defs[i].name, v3d_gca_reg_defs[i].reg, 77 + V3D_GCA_READ(v3d_gca_reg_defs[i].reg)); 78 + } 79 + 80 + for (core = 0; core < v3d->cores; core++) { 81 + for (i = 0; i < ARRAY_SIZE(v3d_core_reg_defs); i++) { 82 + seq_printf(m, "core %d %s (0x%04x): 0x%08x\n", 83 + core, 84 + v3d_core_reg_defs[i].name, 85 + v3d_core_reg_defs[i].reg, 86 + V3D_CORE_READ(core, 87 + v3d_core_reg_defs[i].reg)); 88 + } 89 + } 90 + 91 + return 0; 92 + } 93 + 94 + static int v3d_v3d_debugfs_ident(struct seq_file *m, void *unused) 95 + { 96 + struct drm_info_node *node = (struct drm_info_node *)m->private; 97 + struct drm_device *dev = node->minor->dev; 98 + struct v3d_dev *v3d = to_v3d_dev(dev); 99 + u32 ident0, ident1, ident2, ident3, cores; 100 + int ret, core; 101 + 102 + ret = pm_runtime_get_sync(v3d->dev); 103 + if (ret < 0) 104 + return ret; 105 + 106 + ident0 = V3D_READ(V3D_HUB_IDENT0); 107 + ident1 = V3D_READ(V3D_HUB_IDENT1); 108 + ident2 = V3D_READ(V3D_HUB_IDENT2); 109 + ident3 = V3D_READ(V3D_HUB_IDENT3); 110 + cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); 111 + 112 + seq_printf(m, "Revision: %d.%d.%d.%d\n", 113 + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER), 114 + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV), 115 + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPREV), 116 + V3D_GET_FIELD(ident3, V3D_HUB_IDENT3_IPIDX)); 117 + seq_printf(m, "MMU: %s\n", 118 + (ident2 & V3D_HUB_IDENT2_WITH_MMU) ? "yes" : "no"); 119 + seq_printf(m, "TFU: %s\n", 120 + (ident1 & V3D_HUB_IDENT1_WITH_TFU) ? "yes" : "no"); 121 + seq_printf(m, "TSY: %s\n", 122 + (ident1 & V3D_HUB_IDENT1_WITH_TSY) ? "yes" : "no"); 123 + seq_printf(m, "MSO: %s\n", 124 + (ident1 & V3D_HUB_IDENT1_WITH_MSO) ? "yes" : "no"); 125 + seq_printf(m, "L3C: %s (%dkb)\n", 126 + (ident1 & V3D_HUB_IDENT1_WITH_L3C) ? "yes" : "no", 127 + V3D_GET_FIELD(ident2, V3D_HUB_IDENT2_L3C_NKB)); 128 + 129 + for (core = 0; core < cores; core++) { 130 + u32 misccfg; 131 + u32 nslc, ntmu, qups; 132 + 133 + ident0 = V3D_CORE_READ(core, V3D_CTL_IDENT0); 134 + ident1 = V3D_CORE_READ(core, V3D_CTL_IDENT1); 135 + ident2 = V3D_CORE_READ(core, V3D_CTL_IDENT2); 136 + misccfg = V3D_CORE_READ(core, V3D_CTL_MISCCFG); 137 + 138 + nslc = V3D_GET_FIELD(ident1, V3D_IDENT1_NSLC); 139 + ntmu = V3D_GET_FIELD(ident1, V3D_IDENT1_NTMU); 140 + qups = V3D_GET_FIELD(ident1, V3D_IDENT1_QUPS); 141 + 142 + seq_printf(m, "Core %d:\n", core); 143 + seq_printf(m, " Revision: %d.%d\n", 144 + V3D_GET_FIELD(ident0, V3D_IDENT0_VER), 145 + V3D_GET_FIELD(ident1, V3D_IDENT1_REV)); 146 + seq_printf(m, " Slices: %d\n", nslc); 147 + seq_printf(m, " TMUs: %d\n", nslc * ntmu); 148 + seq_printf(m, " QPUs: %d\n", nslc * qups); 149 + seq_printf(m, " Semaphores: %d\n", 150 + V3D_GET_FIELD(ident1, V3D_IDENT1_NSEM)); 151 + seq_printf(m, " BCG int: %d\n", 152 + (ident2 & V3D_IDENT2_BCG_INT) != 0); 153 + seq_printf(m, " Override TMU: %d\n", 154 + (misccfg & V3D_MISCCFG_OVRTMUOUT) != 0); 155 + } 156 + 157 + pm_runtime_mark_last_busy(v3d->dev); 158 + pm_runtime_put_autosuspend(v3d->dev); 159 + 160 + return 0; 161 + } 162 + 163 + static int v3d_debugfs_bo_stats(struct seq_file *m, void *unused) 164 + { 165 + struct drm_info_node *node = (struct drm_info_node *)m->private; 166 + struct drm_device *dev = node->minor->dev; 167 + struct v3d_dev *v3d = to_v3d_dev(dev); 168 + 169 + mutex_lock(&v3d->bo_lock); 170 + seq_printf(m, "allocated bos: %d\n", 171 + v3d->bo_stats.num_allocated); 172 + seq_printf(m, "allocated bo size (kb): %ld\n", 173 + (long)v3d->bo_stats.pages_allocated << (PAGE_SHIFT - 10)); 174 + mutex_unlock(&v3d->bo_lock); 175 + 176 + return 0; 177 + } 178 + 179 + static const struct drm_info_list v3d_debugfs_list[] = { 180 + {"v3d_ident", v3d_v3d_debugfs_ident, 0}, 181 + {"v3d_regs", v3d_v3d_debugfs_regs, 0}, 182 + {"bo_stats", v3d_debugfs_bo_stats, 0}, 183 + }; 184 + 185 + int 186 + v3d_debugfs_init(struct drm_minor *minor) 187 + { 188 + return drm_debugfs_create_files(v3d_debugfs_list, 189 + ARRAY_SIZE(v3d_debugfs_list), 190 + minor->debugfs_root, minor); 191 + }

+371

drivers/gpu/drm/v3d/v3d_drv.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2014-2018 Broadcom */ 3 + 4 + /** 5 + * DOC: Broadcom V3D Graphics Driver 6 + * 7 + * This driver supports the Broadcom V3D 3.3 and 4.1 OpenGL ES GPUs. 8 + * For V3D 2.x support, see the VC4 driver. 9 + * 10 + * Currently only single-core rendering using the binner and renderer 11 + * is supported. The TFU (texture formatting unit) and V3D 4.x's CSD 12 + * (compute shader dispatch) are not yet supported. 13 + */ 14 + 15 + #include <linux/clk.h> 16 + #include <linux/device.h> 17 + #include <linux/io.h> 18 + #include <linux/module.h> 19 + #include <linux/of_platform.h> 20 + #include <linux/platform_device.h> 21 + #include <linux/pm_runtime.h> 22 + #include <drm/drm_fb_cma_helper.h> 23 + #include <drm/drm_fb_helper.h> 24 + 25 + #include "uapi/drm/v3d_drm.h" 26 + #include "v3d_drv.h" 27 + #include "v3d_regs.h" 28 + 29 + #define DRIVER_NAME "v3d" 30 + #define DRIVER_DESC "Broadcom V3D graphics" 31 + #define DRIVER_DATE "20180419" 32 + #define DRIVER_MAJOR 1 33 + #define DRIVER_MINOR 0 34 + #define DRIVER_PATCHLEVEL 0 35 + 36 + #ifdef CONFIG_PM 37 + static int v3d_runtime_suspend(struct device *dev) 38 + { 39 + struct drm_device *drm = dev_get_drvdata(dev); 40 + struct v3d_dev *v3d = to_v3d_dev(drm); 41 + 42 + v3d_irq_disable(v3d); 43 + 44 + clk_disable_unprepare(v3d->clk); 45 + 46 + return 0; 47 + } 48 + 49 + static int v3d_runtime_resume(struct device *dev) 50 + { 51 + struct drm_device *drm = dev_get_drvdata(dev); 52 + struct v3d_dev *v3d = to_v3d_dev(drm); 53 + int ret; 54 + 55 + ret = clk_prepare_enable(v3d->clk); 56 + if (ret != 0) 57 + return ret; 58 + 59 + /* XXX: VPM base */ 60 + 61 + v3d_mmu_set_page_table(v3d); 62 + v3d_irq_enable(v3d); 63 + 64 + return 0; 65 + } 66 + #endif 67 + 68 + static const struct dev_pm_ops v3d_v3d_pm_ops = { 69 + SET_RUNTIME_PM_OPS(v3d_runtime_suspend, v3d_runtime_resume, NULL) 70 + }; 71 + 72 + static int v3d_get_param_ioctl(struct drm_device *dev, void *data, 73 + struct drm_file *file_priv) 74 + { 75 + struct v3d_dev *v3d = to_v3d_dev(dev); 76 + struct drm_v3d_get_param *args = data; 77 + int ret; 78 + static const u32 reg_map[] = { 79 + [DRM_V3D_PARAM_V3D_UIFCFG] = V3D_HUB_UIFCFG, 80 + [DRM_V3D_PARAM_V3D_HUB_IDENT1] = V3D_HUB_IDENT1, 81 + [DRM_V3D_PARAM_V3D_HUB_IDENT2] = V3D_HUB_IDENT2, 82 + [DRM_V3D_PARAM_V3D_HUB_IDENT3] = V3D_HUB_IDENT3, 83 + [DRM_V3D_PARAM_V3D_CORE0_IDENT0] = V3D_CTL_IDENT0, 84 + [DRM_V3D_PARAM_V3D_CORE0_IDENT1] = V3D_CTL_IDENT1, 85 + [DRM_V3D_PARAM_V3D_CORE0_IDENT2] = V3D_CTL_IDENT2, 86 + }; 87 + 88 + if (args->pad != 0) 89 + return -EINVAL; 90 + 91 + /* Note that DRM_V3D_PARAM_V3D_CORE0_IDENT0 is 0, so we need 92 + * to explicitly allow it in the "the register in our 93 + * parameter map" check. 94 + */ 95 + if (args->param < ARRAY_SIZE(reg_map) && 96 + (reg_map[args->param] || 97 + args->param == DRM_V3D_PARAM_V3D_CORE0_IDENT0)) { 98 + u32 offset = reg_map[args->param]; 99 + 100 + if (args->value != 0) 101 + return -EINVAL; 102 + 103 + ret = pm_runtime_get_sync(v3d->dev); 104 + if (args->param >= DRM_V3D_PARAM_V3D_CORE0_IDENT0 && 105 + args->param <= DRM_V3D_PARAM_V3D_CORE0_IDENT2) { 106 + args->value = V3D_CORE_READ(0, offset); 107 + } else { 108 + args->value = V3D_READ(offset); 109 + } 110 + pm_runtime_mark_last_busy(v3d->dev); 111 + pm_runtime_put_autosuspend(v3d->dev); 112 + return 0; 113 + } 114 + 115 + /* Any params that aren't just register reads would go here. */ 116 + 117 + DRM_DEBUG("Unknown parameter %d\n", args->param); 118 + return -EINVAL; 119 + } 120 + 121 + static int 122 + v3d_open(struct drm_device *dev, struct drm_file *file) 123 + { 124 + struct v3d_dev *v3d = to_v3d_dev(dev); 125 + struct v3d_file_priv *v3d_priv; 126 + int i; 127 + 128 + v3d_priv = kzalloc(sizeof(*v3d_priv), GFP_KERNEL); 129 + if (!v3d_priv) 130 + return -ENOMEM; 131 + 132 + v3d_priv->v3d = v3d; 133 + 134 + for (i = 0; i < V3D_MAX_QUEUES; i++) { 135 + drm_sched_entity_init(&v3d->queue[i].sched, 136 + &v3d_priv->sched_entity[i], 137 + &v3d->queue[i].sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL], 138 + 32, NULL); 139 + } 140 + 141 + file->driver_priv = v3d_priv; 142 + 143 + return 0; 144 + } 145 + 146 + static void 147 + v3d_postclose(struct drm_device *dev, struct drm_file *file) 148 + { 149 + struct v3d_dev *v3d = to_v3d_dev(dev); 150 + struct v3d_file_priv *v3d_priv = file->driver_priv; 151 + enum v3d_queue q; 152 + 153 + for (q = 0; q < V3D_MAX_QUEUES; q++) { 154 + drm_sched_entity_fini(&v3d->queue[q].sched, 155 + &v3d_priv->sched_entity[q]); 156 + } 157 + 158 + kfree(v3d_priv); 159 + } 160 + 161 + static const struct file_operations v3d_drm_fops = { 162 + .owner = THIS_MODULE, 163 + .open = drm_open, 164 + .release = drm_release, 165 + .unlocked_ioctl = drm_ioctl, 166 + .mmap = v3d_mmap, 167 + .poll = drm_poll, 168 + .read = drm_read, 169 + .compat_ioctl = drm_compat_ioctl, 170 + .llseek = noop_llseek, 171 + }; 172 + 173 + /* DRM_AUTH is required on SUBMIT_CL for now, while we don't have GMP 174 + * protection between clients. Note that render nodes would be be 175 + * able to submit CLs that could access BOs from clients authenticated 176 + * with the master node. 177 + */ 178 + static const struct drm_ioctl_desc v3d_drm_ioctls[] = { 179 + DRM_IOCTL_DEF_DRV(V3D_SUBMIT_CL, v3d_submit_cl_ioctl, DRM_RENDER_ALLOW | DRM_AUTH), 180 + DRM_IOCTL_DEF_DRV(V3D_WAIT_BO, v3d_wait_bo_ioctl, DRM_RENDER_ALLOW), 181 + DRM_IOCTL_DEF_DRV(V3D_CREATE_BO, v3d_create_bo_ioctl, DRM_RENDER_ALLOW), 182 + DRM_IOCTL_DEF_DRV(V3D_MMAP_BO, v3d_mmap_bo_ioctl, DRM_RENDER_ALLOW), 183 + DRM_IOCTL_DEF_DRV(V3D_GET_PARAM, v3d_get_param_ioctl, DRM_RENDER_ALLOW), 184 + DRM_IOCTL_DEF_DRV(V3D_GET_BO_OFFSET, v3d_get_bo_offset_ioctl, DRM_RENDER_ALLOW), 185 + }; 186 + 187 + static const struct vm_operations_struct v3d_vm_ops = { 188 + .fault = v3d_gem_fault, 189 + .open = drm_gem_vm_open, 190 + .close = drm_gem_vm_close, 191 + }; 192 + 193 + static struct drm_driver v3d_drm_driver = { 194 + .driver_features = (DRIVER_GEM | 195 + DRIVER_RENDER | 196 + DRIVER_PRIME | 197 + DRIVER_SYNCOBJ), 198 + 199 + .open = v3d_open, 200 + .postclose = v3d_postclose, 201 + 202 + #if defined(CONFIG_DEBUG_FS) 203 + .debugfs_init = v3d_debugfs_init, 204 + #endif 205 + 206 + .gem_free_object_unlocked = v3d_free_object, 207 + .gem_vm_ops = &v3d_vm_ops, 208 + 209 + .prime_handle_to_fd = drm_gem_prime_handle_to_fd, 210 + .prime_fd_to_handle = drm_gem_prime_fd_to_handle, 211 + .gem_prime_import = drm_gem_prime_import, 212 + .gem_prime_export = drm_gem_prime_export, 213 + .gem_prime_res_obj = v3d_prime_res_obj, 214 + .gem_prime_get_sg_table = v3d_prime_get_sg_table, 215 + .gem_prime_import_sg_table = v3d_prime_import_sg_table, 216 + .gem_prime_mmap = v3d_prime_mmap, 217 + 218 + .ioctls = v3d_drm_ioctls, 219 + .num_ioctls = ARRAY_SIZE(v3d_drm_ioctls), 220 + .fops = &v3d_drm_fops, 221 + 222 + .name = DRIVER_NAME, 223 + .desc = DRIVER_DESC, 224 + .date = DRIVER_DATE, 225 + .major = DRIVER_MAJOR, 226 + .minor = DRIVER_MINOR, 227 + .patchlevel = DRIVER_PATCHLEVEL, 228 + }; 229 + 230 + static const struct of_device_id v3d_of_match[] = { 231 + { .compatible = "brcm,7268-v3d" }, 232 + { .compatible = "brcm,7278-v3d" }, 233 + {}, 234 + }; 235 + MODULE_DEVICE_TABLE(of, v3d_of_match); 236 + 237 + static int 238 + map_regs(struct v3d_dev *v3d, void __iomem **regs, const char *name) 239 + { 240 + struct resource *res = 241 + platform_get_resource_byname(v3d->pdev, IORESOURCE_MEM, name); 242 + 243 + *regs = devm_ioremap_resource(v3d->dev, res); 244 + return PTR_ERR_OR_ZERO(*regs); 245 + } 246 + 247 + static int v3d_platform_drm_probe(struct platform_device *pdev) 248 + { 249 + struct device *dev = &pdev->dev; 250 + struct drm_device *drm; 251 + struct v3d_dev *v3d; 252 + int ret; 253 + u32 ident1; 254 + 255 + dev->coherent_dma_mask = DMA_BIT_MASK(36); 256 + 257 + v3d = kzalloc(sizeof(*v3d), GFP_KERNEL); 258 + if (!v3d) 259 + return -ENOMEM; 260 + v3d->dev = dev; 261 + v3d->pdev = pdev; 262 + drm = &v3d->drm; 263 + 264 + ret = map_regs(v3d, &v3d->bridge_regs, "bridge"); 265 + if (ret) 266 + goto dev_free; 267 + 268 + ret = map_regs(v3d, &v3d->hub_regs, "hub"); 269 + if (ret) 270 + goto dev_free; 271 + 272 + ret = map_regs(v3d, &v3d->core_regs[0], "core0"); 273 + if (ret) 274 + goto dev_free; 275 + 276 + ident1 = V3D_READ(V3D_HUB_IDENT1); 277 + v3d->ver = (V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_TVER) * 10 + 278 + V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_REV)); 279 + v3d->cores = V3D_GET_FIELD(ident1, V3D_HUB_IDENT1_NCORES); 280 + WARN_ON(v3d->cores > 1); /* multicore not yet implemented */ 281 + 282 + if (v3d->ver < 41) { 283 + ret = map_regs(v3d, &v3d->gca_regs, "gca"); 284 + if (ret) 285 + goto dev_free; 286 + } 287 + 288 + v3d->mmu_scratch = dma_alloc_wc(dev, 4096, &v3d->mmu_scratch_paddr, 289 + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 290 + if (!v3d->mmu_scratch) { 291 + dev_err(dev, "Failed to allocate MMU scratch page\n"); 292 + ret = -ENOMEM; 293 + goto dev_free; 294 + } 295 + 296 + pm_runtime_use_autosuspend(dev); 297 + pm_runtime_set_autosuspend_delay(dev, 50); 298 + pm_runtime_enable(dev); 299 + 300 + ret = drm_dev_init(&v3d->drm, &v3d_drm_driver, dev); 301 + if (ret) 302 + goto dma_free; 303 + 304 + platform_set_drvdata(pdev, drm); 305 + drm->dev_private = v3d; 306 + 307 + ret = v3d_gem_init(drm); 308 + if (ret) 309 + goto dev_destroy; 310 + 311 + v3d_irq_init(v3d); 312 + 313 + ret = drm_dev_register(drm, 0); 314 + if (ret) 315 + goto gem_destroy; 316 + 317 + return 0; 318 + 319 + gem_destroy: 320 + v3d_gem_destroy(drm); 321 + dev_destroy: 322 + drm_dev_put(drm); 323 + dma_free: 324 + dma_free_wc(dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); 325 + dev_free: 326 + kfree(v3d); 327 + return ret; 328 + } 329 + 330 + static int v3d_platform_drm_remove(struct platform_device *pdev) 331 + { 332 + struct drm_device *drm = platform_get_drvdata(pdev); 333 + struct v3d_dev *v3d = to_v3d_dev(drm); 334 + 335 + drm_dev_unregister(drm); 336 + 337 + v3d_gem_destroy(drm); 338 + 339 + drm_dev_put(drm); 340 + 341 + dma_free_wc(v3d->dev, 4096, v3d->mmu_scratch, v3d->mmu_scratch_paddr); 342 + 343 + return 0; 344 + } 345 + 346 + static struct platform_driver v3d_platform_driver = { 347 + .probe = v3d_platform_drm_probe, 348 + .remove = v3d_platform_drm_remove, 349 + .driver = { 350 + .name = "v3d", 351 + .of_match_table = v3d_of_match, 352 + }, 353 + }; 354 + 355 + static int __init v3d_drm_register(void) 356 + { 357 + return platform_driver_register(&v3d_platform_driver); 358 + } 359 + 360 + static void __exit v3d_drm_unregister(void) 361 + { 362 + platform_driver_unregister(&v3d_platform_driver); 363 + } 364 + 365 + module_init(v3d_drm_register); 366 + module_exit(v3d_drm_unregister); 367 + 368 + MODULE_ALIAS("platform:v3d-drm"); 369 + MODULE_DESCRIPTION("Broadcom V3D DRM Driver"); 370 + MODULE_AUTHOR("Eric Anholt <eric@anholt.net>"); 371 + MODULE_LICENSE("GPL v2");

+294

drivers/gpu/drm/v3d/v3d_drv.h

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2015-2018 Broadcom */ 3 + 4 + #include <linux/reservation.h> 5 + #include <drm/drmP.h> 6 + #include <drm/drm_encoder.h> 7 + #include <drm/drm_gem.h> 8 + #include <drm/gpu_scheduler.h> 9 + 10 + #define GMP_GRANULARITY (128 * 1024) 11 + 12 + /* Enum for each of the V3D queues. We maintain various queue 13 + * tracking as an array because at some point we'll want to support 14 + * the TFU (texture formatting unit) as another queue. 15 + */ 16 + enum v3d_queue { 17 + V3D_BIN, 18 + V3D_RENDER, 19 + }; 20 + 21 + #define V3D_MAX_QUEUES (V3D_RENDER + 1) 22 + 23 + struct v3d_queue_state { 24 + struct drm_gpu_scheduler sched; 25 + 26 + u64 fence_context; 27 + u64 emit_seqno; 28 + u64 finished_seqno; 29 + }; 30 + 31 + struct v3d_dev { 32 + struct drm_device drm; 33 + 34 + /* Short representation (e.g. 33, 41) of the V3D tech version 35 + * and revision. 36 + */ 37 + int ver; 38 + 39 + struct device *dev; 40 + struct platform_device *pdev; 41 + void __iomem *hub_regs; 42 + void __iomem *core_regs[3]; 43 + void __iomem *bridge_regs; 44 + void __iomem *gca_regs; 45 + struct clk *clk; 46 + 47 + /* Virtual and DMA addresses of the single shared page table. */ 48 + volatile u32 *pt; 49 + dma_addr_t pt_paddr; 50 + 51 + /* Virtual and DMA addresses of the MMU's scratch page. When 52 + * a read or write is invalid in the MMU, it will be 53 + * redirected here. 54 + */ 55 + void *mmu_scratch; 56 + dma_addr_t mmu_scratch_paddr; 57 + 58 + /* Number of V3D cores. */ 59 + u32 cores; 60 + 61 + /* Allocator managing the address space. All units are in 62 + * number of pages. 63 + */ 64 + struct drm_mm mm; 65 + spinlock_t mm_lock; 66 + 67 + struct work_struct overflow_mem_work; 68 + 69 + struct v3d_exec_info *bin_job; 70 + struct v3d_exec_info *render_job; 71 + 72 + struct v3d_queue_state queue[V3D_MAX_QUEUES]; 73 + 74 + /* Spinlock used to synchronize the overflow memory 75 + * management against bin job submission. 76 + */ 77 + spinlock_t job_lock; 78 + 79 + /* Protects bo_stats */ 80 + struct mutex bo_lock; 81 + 82 + /* Lock taken when resetting the GPU, to keep multiple 83 + * processes from trying to park the scheduler threads and 84 + * reset at once. 85 + */ 86 + struct mutex reset_lock; 87 + 88 + struct { 89 + u32 num_allocated; 90 + u32 pages_allocated; 91 + } bo_stats; 92 + }; 93 + 94 + static inline struct v3d_dev * 95 + to_v3d_dev(struct drm_device *dev) 96 + { 97 + return (struct v3d_dev *)dev->dev_private; 98 + } 99 + 100 + /* The per-fd struct, which tracks the MMU mappings. */ 101 + struct v3d_file_priv { 102 + struct v3d_dev *v3d; 103 + 104 + struct drm_sched_entity sched_entity[V3D_MAX_QUEUES]; 105 + }; 106 + 107 + /* Tracks a mapping of a BO into a per-fd address space */ 108 + struct v3d_vma { 109 + struct v3d_page_table *pt; 110 + struct list_head list; /* entry in v3d_bo.vmas */ 111 + }; 112 + 113 + struct v3d_bo { 114 + struct drm_gem_object base; 115 + 116 + struct mutex lock; 117 + 118 + struct drm_mm_node node; 119 + 120 + u32 pages_refcount; 121 + struct page **pages; 122 + struct sg_table *sgt; 123 + void *vaddr; 124 + 125 + struct list_head vmas; /* list of v3d_vma */ 126 + 127 + /* List entry for the BO's position in 128 + * v3d_exec_info->unref_list 129 + */ 130 + struct list_head unref_head; 131 + 132 + /* normally (resv == &_resv) except for imported bo's */ 133 + struct reservation_object *resv; 134 + struct reservation_object _resv; 135 + }; 136 + 137 + static inline struct v3d_bo * 138 + to_v3d_bo(struct drm_gem_object *bo) 139 + { 140 + return (struct v3d_bo *)bo; 141 + } 142 + 143 + struct v3d_fence { 144 + struct dma_fence base; 145 + struct drm_device *dev; 146 + /* v3d seqno for signaled() test */ 147 + u64 seqno; 148 + enum v3d_queue queue; 149 + }; 150 + 151 + static inline struct v3d_fence * 152 + to_v3d_fence(struct dma_fence *fence) 153 + { 154 + return (struct v3d_fence *)fence; 155 + } 156 + 157 + #define V3D_READ(offset) readl(v3d->hub_regs + offset) 158 + #define V3D_WRITE(offset, val) writel(val, v3d->hub_regs + offset) 159 + 160 + #define V3D_BRIDGE_READ(offset) readl(v3d->bridge_regs + offset) 161 + #define V3D_BRIDGE_WRITE(offset, val) writel(val, v3d->bridge_regs + offset) 162 + 163 + #define V3D_GCA_READ(offset) readl(v3d->gca_regs + offset) 164 + #define V3D_GCA_WRITE(offset, val) writel(val, v3d->gca_regs + offset) 165 + 166 + #define V3D_CORE_READ(core, offset) readl(v3d->core_regs[core] + offset) 167 + #define V3D_CORE_WRITE(core, offset, val) writel(val, v3d->core_regs[core] + offset) 168 + 169 + struct v3d_job { 170 + struct drm_sched_job base; 171 + 172 + struct v3d_exec_info *exec; 173 + 174 + /* An optional fence userspace can pass in for the job to depend on. */ 175 + struct dma_fence *in_fence; 176 + 177 + /* v3d fence to be signaled by IRQ handler when the job is complete. */ 178 + struct dma_fence *done_fence; 179 + 180 + /* GPU virtual addresses of the start/end of the CL job. */ 181 + u32 start, end; 182 + }; 183 + 184 + struct v3d_exec_info { 185 + struct v3d_dev *v3d; 186 + 187 + struct v3d_job bin, render; 188 + 189 + /* Fence for when the scheduler considers the binner to be 190 + * done, for render to depend on. 191 + */ 192 + struct dma_fence *bin_done_fence; 193 + 194 + struct kref refcount; 195 + 196 + /* This is the array of BOs that were looked up at the start of exec. */ 197 + struct v3d_bo **bo; 198 + u32 bo_count; 199 + 200 + /* List of overflow BOs used in the job that need to be 201 + * released once the job is complete. 202 + */ 203 + struct list_head unref_list; 204 + 205 + /* Submitted tile memory allocation start/size, tile state. */ 206 + u32 qma, qms, qts; 207 + }; 208 + 209 + /** 210 + * _wait_for - magic (register) wait macro 211 + * 212 + * Does the right thing for modeset paths when run under kdgb or similar atomic 213 + * contexts. Note that it's important that we check the condition again after 214 + * having timed out, since the timeout could be due to preemption or similar and 215 + * we've never had a chance to check the condition before the timeout. 216 + */ 217 + #define wait_for(COND, MS) ({ \ 218 + unsigned long timeout__ = jiffies + msecs_to_jiffies(MS) + 1; \ 219 + int ret__ = 0; \ 220 + while (!(COND)) { \ 221 + if (time_after(jiffies, timeout__)) { \ 222 + if (!(COND)) \ 223 + ret__ = -ETIMEDOUT; \ 224 + break; \ 225 + } \ 226 + msleep(1); \ 227 + } \ 228 + ret__; \ 229 + }) 230 + 231 + static inline unsigned long nsecs_to_jiffies_timeout(const u64 n) 232 + { 233 + /* nsecs_to_jiffies64() does not guard against overflow */ 234 + if (NSEC_PER_SEC % HZ && 235 + div_u64(n, NSEC_PER_SEC) >= MAX_JIFFY_OFFSET / HZ) 236 + return MAX_JIFFY_OFFSET; 237 + 238 + return min_t(u64, MAX_JIFFY_OFFSET, nsecs_to_jiffies64(n) + 1); 239 + } 240 + 241 + /* v3d_bo.c */ 242 + void v3d_free_object(struct drm_gem_object *gem_obj); 243 + struct v3d_bo *v3d_bo_create(struct drm_device *dev, struct drm_file *file_priv, 244 + size_t size); 245 + int v3d_create_bo_ioctl(struct drm_device *dev, void *data, 246 + struct drm_file *file_priv); 247 + int v3d_mmap_bo_ioctl(struct drm_device *dev, void *data, 248 + struct drm_file *file_priv); 249 + int v3d_get_bo_offset_ioctl(struct drm_device *dev, void *data, 250 + struct drm_file *file_priv); 251 + int v3d_gem_fault(struct vm_fault *vmf); 252 + int v3d_mmap(struct file *filp, struct vm_area_struct *vma); 253 + struct reservation_object *v3d_prime_res_obj(struct drm_gem_object *obj); 254 + int v3d_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); 255 + struct sg_table *v3d_prime_get_sg_table(struct drm_gem_object *obj); 256 + struct drm_gem_object *v3d_prime_import_sg_table(struct drm_device *dev, 257 + struct dma_buf_attachment *attach, 258 + struct sg_table *sgt); 259 + 260 + /* v3d_debugfs.c */ 261 + int v3d_debugfs_init(struct drm_minor *minor); 262 + 263 + /* v3d_fence.c */ 264 + extern const struct dma_fence_ops v3d_fence_ops; 265 + struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue); 266 + 267 + /* v3d_gem.c */ 268 + int v3d_gem_init(struct drm_device *dev); 269 + void v3d_gem_destroy(struct drm_device *dev); 270 + int v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 271 + struct drm_file *file_priv); 272 + int v3d_wait_bo_ioctl(struct drm_device *dev, void *data, 273 + struct drm_file *file_priv); 274 + void v3d_exec_put(struct v3d_exec_info *exec); 275 + void v3d_reset(struct v3d_dev *v3d); 276 + void v3d_invalidate_caches(struct v3d_dev *v3d); 277 + void v3d_flush_caches(struct v3d_dev *v3d); 278 + 279 + /* v3d_irq.c */ 280 + void v3d_irq_init(struct v3d_dev *v3d); 281 + void v3d_irq_enable(struct v3d_dev *v3d); 282 + void v3d_irq_disable(struct v3d_dev *v3d); 283 + void v3d_irq_reset(struct v3d_dev *v3d); 284 + 285 + /* v3d_mmu.c */ 286 + int v3d_mmu_get_offset(struct drm_file *file_priv, struct v3d_bo *bo, 287 + u32 *offset); 288 + int v3d_mmu_set_page_table(struct v3d_dev *v3d); 289 + void v3d_mmu_insert_ptes(struct v3d_bo *bo); 290 + void v3d_mmu_remove_ptes(struct v3d_bo *bo); 291 + 292 + /* v3d_sched.c */ 293 + int v3d_sched_init(struct v3d_dev *v3d); 294 + void v3d_sched_fini(struct v3d_dev *v3d);

+58

drivers/gpu/drm/v3d/v3d_fence.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2017-2018 Broadcom */ 3 + 4 + #include "v3d_drv.h" 5 + 6 + struct dma_fence *v3d_fence_create(struct v3d_dev *v3d, enum v3d_queue queue) 7 + { 8 + struct v3d_fence *fence; 9 + 10 + fence = kzalloc(sizeof(*fence), GFP_KERNEL); 11 + if (!fence) 12 + return ERR_PTR(-ENOMEM); 13 + 14 + fence->dev = &v3d->drm; 15 + fence->queue = queue; 16 + fence->seqno = ++v3d->queue[queue].emit_seqno; 17 + dma_fence_init(&fence->base, &v3d_fence_ops, &v3d->job_lock, 18 + v3d->queue[queue].fence_context, fence->seqno); 19 + 20 + return &fence->base; 21 + } 22 + 23 + static const char *v3d_fence_get_driver_name(struct dma_fence *fence) 24 + { 25 + return "v3d"; 26 + } 27 + 28 + static const char *v3d_fence_get_timeline_name(struct dma_fence *fence) 29 + { 30 + struct v3d_fence *f = to_v3d_fence(fence); 31 + 32 + if (f->queue == V3D_BIN) 33 + return "v3d-bin"; 34 + else 35 + return "v3d-render"; 36 + } 37 + 38 + static bool v3d_fence_enable_signaling(struct dma_fence *fence) 39 + { 40 + return true; 41 + } 42 + 43 + static bool v3d_fence_signaled(struct dma_fence *fence) 44 + { 45 + struct v3d_fence *f = to_v3d_fence(fence); 46 + struct v3d_dev *v3d = to_v3d_dev(f->dev); 47 + 48 + return v3d->queue[f->queue].finished_seqno >= f->seqno; 49 + } 50 + 51 + const struct dma_fence_ops v3d_fence_ops = { 52 + .get_driver_name = v3d_fence_get_driver_name, 53 + .get_timeline_name = v3d_fence_get_timeline_name, 54 + .enable_signaling = v3d_fence_enable_signaling, 55 + .signaled = v3d_fence_signaled, 56 + .wait = dma_fence_default_wait, 57 + .release = dma_fence_free, 58 + };

+668

drivers/gpu/drm/v3d/v3d_gem.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2014-2018 Broadcom */ 3 + 4 + #include <drm/drmP.h> 5 + #include <drm/drm_syncobj.h> 6 + #include <linux/module.h> 7 + #include <linux/platform_device.h> 8 + #include <linux/pm_runtime.h> 9 + #include <linux/device.h> 10 + #include <linux/io.h> 11 + #include <linux/sched/signal.h> 12 + 13 + #include "uapi/drm/v3d_drm.h" 14 + #include "v3d_drv.h" 15 + #include "v3d_regs.h" 16 + #include "v3d_trace.h" 17 + 18 + static void 19 + v3d_init_core(struct v3d_dev *v3d, int core) 20 + { 21 + /* Set OVRTMUOUT, which means that the texture sampler uniform 22 + * configuration's tmu output type field is used, instead of 23 + * using the hardware default behavior based on the texture 24 + * type. If you want the default behavior, you can still put 25 + * "2" in the indirect texture state's output_type field. 26 + */ 27 + V3D_CORE_WRITE(core, V3D_CTL_MISCCFG, V3D_MISCCFG_OVRTMUOUT); 28 + 29 + /* Whenever we flush the L2T cache, we always want to flush 30 + * the whole thing. 31 + */ 32 + V3D_CORE_WRITE(core, V3D_CTL_L2TFLSTA, 0); 33 + V3D_CORE_WRITE(core, V3D_CTL_L2TFLEND, ~0); 34 + } 35 + 36 + /* Sets invariant state for the HW. */ 37 + static void 38 + v3d_init_hw_state(struct v3d_dev *v3d) 39 + { 40 + v3d_init_core(v3d, 0); 41 + } 42 + 43 + static void 44 + v3d_idle_axi(struct v3d_dev *v3d, int core) 45 + { 46 + V3D_CORE_WRITE(core, V3D_GMP_CFG, V3D_GMP_CFG_STOP_REQ); 47 + 48 + if (wait_for((V3D_CORE_READ(core, V3D_GMP_STATUS) & 49 + (V3D_GMP_STATUS_RD_COUNT_MASK | 50 + V3D_GMP_STATUS_WR_COUNT_MASK | 51 + V3D_GMP_STATUS_CFG_BUSY)) == 0, 100)) { 52 + DRM_ERROR("Failed to wait for safe GMP shutdown\n"); 53 + } 54 + } 55 + 56 + static void 57 + v3d_idle_gca(struct v3d_dev *v3d) 58 + { 59 + if (v3d->ver >= 41) 60 + return; 61 + 62 + V3D_GCA_WRITE(V3D_GCA_SAFE_SHUTDOWN, V3D_GCA_SAFE_SHUTDOWN_EN); 63 + 64 + if (wait_for((V3D_GCA_READ(V3D_GCA_SAFE_SHUTDOWN_ACK) & 65 + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED) == 66 + V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED, 100)) { 67 + DRM_ERROR("Failed to wait for safe GCA shutdown\n"); 68 + } 69 + } 70 + 71 + static void 72 + v3d_reset_v3d(struct v3d_dev *v3d) 73 + { 74 + int version = V3D_BRIDGE_READ(V3D_TOP_GR_BRIDGE_REVISION); 75 + 76 + if (V3D_GET_FIELD(version, V3D_TOP_GR_BRIDGE_MAJOR) == 2) { 77 + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 78 + V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT); 79 + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_0, 0); 80 + 81 + /* GFXH-1383: The SW_INIT may cause a stray write to address 0 82 + * of the unit, so reset it to its power-on value here. 83 + */ 84 + V3D_WRITE(V3D_HUB_AXICFG, V3D_HUB_AXICFG_MAX_LEN_MASK); 85 + } else { 86 + WARN_ON_ONCE(V3D_GET_FIELD(version, 87 + V3D_TOP_GR_BRIDGE_MAJOR) != 7); 88 + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 89 + V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT); 90 + V3D_BRIDGE_WRITE(V3D_TOP_GR_BRIDGE_SW_INIT_1, 0); 91 + } 92 + 93 + v3d_init_hw_state(v3d); 94 + } 95 + 96 + void 97 + v3d_reset(struct v3d_dev *v3d) 98 + { 99 + struct drm_device *dev = &v3d->drm; 100 + 101 + DRM_ERROR("Resetting GPU.\n"); 102 + trace_v3d_reset_begin(dev); 103 + 104 + /* XXX: only needed for safe powerdown, not reset. */ 105 + if (false) 106 + v3d_idle_axi(v3d, 0); 107 + 108 + v3d_idle_gca(v3d); 109 + v3d_reset_v3d(v3d); 110 + 111 + v3d_mmu_set_page_table(v3d); 112 + v3d_irq_reset(v3d); 113 + 114 + trace_v3d_reset_end(dev); 115 + } 116 + 117 + static void 118 + v3d_flush_l3(struct v3d_dev *v3d) 119 + { 120 + if (v3d->ver < 41) { 121 + u32 gca_ctrl = V3D_GCA_READ(V3D_GCA_CACHE_CTRL); 122 + 123 + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, 124 + gca_ctrl | V3D_GCA_CACHE_CTRL_FLUSH); 125 + 126 + if (v3d->ver < 33) { 127 + V3D_GCA_WRITE(V3D_GCA_CACHE_CTRL, 128 + gca_ctrl & ~V3D_GCA_CACHE_CTRL_FLUSH); 129 + } 130 + } 131 + } 132 + 133 + /* Invalidates the (read-only) L2 cache. */ 134 + static void 135 + v3d_invalidate_l2(struct v3d_dev *v3d, int core) 136 + { 137 + V3D_CORE_WRITE(core, V3D_CTL_L2CACTL, 138 + V3D_L2CACTL_L2CCLR | 139 + V3D_L2CACTL_L2CENA); 140 + } 141 + 142 + static void 143 + v3d_invalidate_l1td(struct v3d_dev *v3d, int core) 144 + { 145 + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, V3D_L2TCACTL_TMUWCF); 146 + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & 147 + V3D_L2TCACTL_L2TFLS), 100)) { 148 + DRM_ERROR("Timeout waiting for L1T write combiner flush\n"); 149 + } 150 + } 151 + 152 + /* Invalidates texture L2 cachelines */ 153 + static void 154 + v3d_flush_l2t(struct v3d_dev *v3d, int core) 155 + { 156 + v3d_invalidate_l1td(v3d, core); 157 + 158 + V3D_CORE_WRITE(core, V3D_CTL_L2TCACTL, 159 + V3D_L2TCACTL_L2TFLS | 160 + V3D_SET_FIELD(V3D_L2TCACTL_FLM_FLUSH, V3D_L2TCACTL_FLM)); 161 + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & 162 + V3D_L2TCACTL_L2TFLS), 100)) { 163 + DRM_ERROR("Timeout waiting for L2T flush\n"); 164 + } 165 + } 166 + 167 + /* Invalidates the slice caches. These are read-only caches. */ 168 + static void 169 + v3d_invalidate_slices(struct v3d_dev *v3d, int core) 170 + { 171 + V3D_CORE_WRITE(core, V3D_CTL_SLCACTL, 172 + V3D_SET_FIELD(0xf, V3D_SLCACTL_TVCCS) | 173 + V3D_SET_FIELD(0xf, V3D_SLCACTL_TDCCS) | 174 + V3D_SET_FIELD(0xf, V3D_SLCACTL_UCC) | 175 + V3D_SET_FIELD(0xf, V3D_SLCACTL_ICC)); 176 + } 177 + 178 + /* Invalidates texture L2 cachelines */ 179 + static void 180 + v3d_invalidate_l2t(struct v3d_dev *v3d, int core) 181 + { 182 + V3D_CORE_WRITE(core, 183 + V3D_CTL_L2TCACTL, 184 + V3D_L2TCACTL_L2TFLS | 185 + V3D_SET_FIELD(V3D_L2TCACTL_FLM_CLEAR, V3D_L2TCACTL_FLM)); 186 + if (wait_for(!(V3D_CORE_READ(core, V3D_CTL_L2TCACTL) & 187 + V3D_L2TCACTL_L2TFLS), 100)) { 188 + DRM_ERROR("Timeout waiting for L2T invalidate\n"); 189 + } 190 + } 191 + 192 + void 193 + v3d_invalidate_caches(struct v3d_dev *v3d) 194 + { 195 + v3d_flush_l3(v3d); 196 + 197 + v3d_invalidate_l2(v3d, 0); 198 + v3d_invalidate_slices(v3d, 0); 199 + v3d_flush_l2t(v3d, 0); 200 + } 201 + 202 + void 203 + v3d_flush_caches(struct v3d_dev *v3d) 204 + { 205 + v3d_invalidate_l1td(v3d, 0); 206 + v3d_invalidate_l2t(v3d, 0); 207 + } 208 + 209 + static void 210 + v3d_attach_object_fences(struct v3d_exec_info *exec) 211 + { 212 + struct dma_fence *out_fence = &exec->render.base.s_fence->finished; 213 + struct v3d_bo *bo; 214 + int i; 215 + 216 + for (i = 0; i < exec->bo_count; i++) { 217 + bo = to_v3d_bo(&exec->bo[i]->base); 218 + 219 + /* XXX: Use shared fences for read-only objects. */ 220 + reservation_object_add_excl_fence(bo->resv, out_fence); 221 + } 222 + } 223 + 224 + static void 225 + v3d_unlock_bo_reservations(struct drm_device *dev, 226 + struct v3d_exec_info *exec, 227 + struct ww_acquire_ctx *acquire_ctx) 228 + { 229 + int i; 230 + 231 + for (i = 0; i < exec->bo_count; i++) { 232 + struct v3d_bo *bo = to_v3d_bo(&exec->bo[i]->base); 233 + 234 + ww_mutex_unlock(&bo->resv->lock); 235 + } 236 + 237 + ww_acquire_fini(acquire_ctx); 238 + } 239 + 240 + /* Takes the reservation lock on all the BOs being referenced, so that 241 + * at queue submit time we can update the reservations. 242 + * 243 + * We don't lock the RCL the tile alloc/state BOs, or overflow memory 244 + * (all of which are on exec->unref_list). They're entirely private 245 + * to v3d, so we don't attach dma-buf fences to them. 246 + */ 247 + static int 248 + v3d_lock_bo_reservations(struct drm_device *dev, 249 + struct v3d_exec_info *exec, 250 + struct ww_acquire_ctx *acquire_ctx) 251 + { 252 + int contended_lock = -1; 253 + int i, ret; 254 + struct v3d_bo *bo; 255 + 256 + ww_acquire_init(acquire_ctx, &reservation_ww_class); 257 + 258 + retry: 259 + if (contended_lock != -1) { 260 + bo = to_v3d_bo(&exec->bo[contended_lock]->base); 261 + ret = ww_mutex_lock_slow_interruptible(&bo->resv->lock, 262 + acquire_ctx); 263 + if (ret) { 264 + ww_acquire_done(acquire_ctx); 265 + return ret; 266 + } 267 + } 268 + 269 + for (i = 0; i < exec->bo_count; i++) { 270 + if (i == contended_lock) 271 + continue; 272 + 273 + bo = to_v3d_bo(&exec->bo[i]->base); 274 + 275 + ret = ww_mutex_lock_interruptible(&bo->resv->lock, acquire_ctx); 276 + if (ret) { 277 + int j; 278 + 279 + for (j = 0; j < i; j++) { 280 + bo = to_v3d_bo(&exec->bo[j]->base); 281 + ww_mutex_unlock(&bo->resv->lock); 282 + } 283 + 284 + if (contended_lock != -1 && contended_lock >= i) { 285 + bo = to_v3d_bo(&exec->bo[contended_lock]->base); 286 + 287 + ww_mutex_unlock(&bo->resv->lock); 288 + } 289 + 290 + if (ret == -EDEADLK) { 291 + contended_lock = i; 292 + goto retry; 293 + } 294 + 295 + ww_acquire_done(acquire_ctx); 296 + return ret; 297 + } 298 + } 299 + 300 + ww_acquire_done(acquire_ctx); 301 + 302 + /* Reserve space for our shared (read-only) fence references, 303 + * before we commit the CL to the hardware. 304 + */ 305 + for (i = 0; i < exec->bo_count; i++) { 306 + bo = to_v3d_bo(&exec->bo[i]->base); 307 + 308 + ret = reservation_object_reserve_shared(bo->resv); 309 + if (ret) { 310 + v3d_unlock_bo_reservations(dev, exec, acquire_ctx); 311 + return ret; 312 + } 313 + } 314 + 315 + return 0; 316 + } 317 + 318 + /** 319 + * v3d_cl_lookup_bos() - Sets up exec->bo[] with the GEM objects 320 + * referenced by the job. 321 + * @dev: DRM device 322 + * @file_priv: DRM file for this fd 323 + * @exec: V3D job being set up 324 + * 325 + * The command validator needs to reference BOs by their index within 326 + * the submitted job's BO list. This does the validation of the job's 327 + * BO list and reference counting for the lifetime of the job. 328 + * 329 + * Note that this function doesn't need to unreference the BOs on 330 + * failure, because that will happen at v3d_exec_cleanup() time. 331 + */ 332 + static int 333 + v3d_cl_lookup_bos(struct drm_device *dev, 334 + struct drm_file *file_priv, 335 + struct drm_v3d_submit_cl *args, 336 + struct v3d_exec_info *exec) 337 + { 338 + u32 *handles; 339 + int ret = 0; 340 + int i; 341 + 342 + exec->bo_count = args->bo_handle_count; 343 + 344 + if (!exec->bo_count) { 345 + /* See comment on bo_index for why we have to check 346 + * this. 347 + */ 348 + DRM_DEBUG("Rendering requires BOs\n"); 349 + return -EINVAL; 350 + } 351 + 352 + exec->bo = kvmalloc_array(exec->bo_count, 353 + sizeof(struct drm_gem_cma_object *), 354 + GFP_KERNEL | __GFP_ZERO); 355 + if (!exec->bo) { 356 + DRM_DEBUG("Failed to allocate validated BO pointers\n"); 357 + return -ENOMEM; 358 + } 359 + 360 + handles = kvmalloc_array(exec->bo_count, sizeof(u32), GFP_KERNEL); 361 + if (!handles) { 362 + ret = -ENOMEM; 363 + DRM_DEBUG("Failed to allocate incoming GEM handles\n"); 364 + goto fail; 365 + } 366 + 367 + if (copy_from_user(handles, 368 + (void __user *)(uintptr_t)args->bo_handles, 369 + exec->bo_count * sizeof(u32))) { 370 + ret = -EFAULT; 371 + DRM_DEBUG("Failed to copy in GEM handles\n"); 372 + goto fail; 373 + } 374 + 375 + spin_lock(&file_priv->table_lock); 376 + for (i = 0; i < exec->bo_count; i++) { 377 + struct drm_gem_object *bo = idr_find(&file_priv->object_idr, 378 + handles[i]); 379 + if (!bo) { 380 + DRM_DEBUG("Failed to look up GEM BO %d: %d\n", 381 + i, handles[i]); 382 + ret = -ENOENT; 383 + spin_unlock(&file_priv->table_lock); 384 + goto fail; 385 + } 386 + drm_gem_object_get(bo); 387 + exec->bo[i] = to_v3d_bo(bo); 388 + } 389 + spin_unlock(&file_priv->table_lock); 390 + 391 + fail: 392 + kvfree(handles); 393 + return ret; 394 + } 395 + 396 + static void 397 + v3d_exec_cleanup(struct kref *ref) 398 + { 399 + struct v3d_exec_info *exec = container_of(ref, struct v3d_exec_info, 400 + refcount); 401 + struct v3d_dev *v3d = exec->v3d; 402 + unsigned int i; 403 + struct v3d_bo *bo, *save; 404 + 405 + dma_fence_put(exec->bin.in_fence); 406 + dma_fence_put(exec->render.in_fence); 407 + 408 + dma_fence_put(exec->bin.done_fence); 409 + dma_fence_put(exec->render.done_fence); 410 + 411 + dma_fence_put(exec->bin_done_fence); 412 + 413 + for (i = 0; i < exec->bo_count; i++) 414 + drm_gem_object_put_unlocked(&exec->bo[i]->base); 415 + kvfree(exec->bo); 416 + 417 + list_for_each_entry_safe(bo, save, &exec->unref_list, unref_head) { 418 + drm_gem_object_put_unlocked(&bo->base); 419 + } 420 + 421 + pm_runtime_mark_last_busy(v3d->dev); 422 + pm_runtime_put_autosuspend(v3d->dev); 423 + 424 + kfree(exec); 425 + } 426 + 427 + void v3d_exec_put(struct v3d_exec_info *exec) 428 + { 429 + kref_put(&exec->refcount, v3d_exec_cleanup); 430 + } 431 + 432 + int 433 + v3d_wait_bo_ioctl(struct drm_device *dev, void *data, 434 + struct drm_file *file_priv) 435 + { 436 + int ret; 437 + struct drm_v3d_wait_bo *args = data; 438 + struct drm_gem_object *gem_obj; 439 + struct v3d_bo *bo; 440 + ktime_t start = ktime_get(); 441 + u64 delta_ns; 442 + unsigned long timeout_jiffies = 443 + nsecs_to_jiffies_timeout(args->timeout_ns); 444 + 445 + if (args->pad != 0) 446 + return -EINVAL; 447 + 448 + gem_obj = drm_gem_object_lookup(file_priv, args->handle); 449 + if (!gem_obj) { 450 + DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle); 451 + return -EINVAL; 452 + } 453 + bo = to_v3d_bo(gem_obj); 454 + 455 + ret = reservation_object_wait_timeout_rcu(bo->resv, 456 + true, true, 457 + timeout_jiffies); 458 + 459 + if (ret == 0) 460 + ret = -ETIME; 461 + else if (ret > 0) 462 + ret = 0; 463 + 464 + /* Decrement the user's timeout, in case we got interrupted 465 + * such that the ioctl will be restarted. 466 + */ 467 + delta_ns = ktime_to_ns(ktime_sub(ktime_get(), start)); 468 + if (delta_ns < args->timeout_ns) 469 + args->timeout_ns -= delta_ns; 470 + else 471 + args->timeout_ns = 0; 472 + 473 + /* Asked to wait beyond the jiffie/scheduler precision? */ 474 + if (ret == -ETIME && args->timeout_ns) 475 + ret = -EAGAIN; 476 + 477 + drm_gem_object_put_unlocked(gem_obj); 478 + 479 + return ret; 480 + } 481 + 482 + /** 483 + * v3d_submit_cl_ioctl() - Submits a job (frame) to the V3D. 484 + * @dev: DRM device 485 + * @data: ioctl argument 486 + * @file_priv: DRM file for this fd 487 + * 488 + * This is the main entrypoint for userspace to submit a 3D frame to 489 + * the GPU. Userspace provides the binner command list (if 490 + * applicable), and the kernel sets up the render command list to draw 491 + * to the framebuffer described in the ioctl, using the command lists 492 + * that the 3D engine's binner will produce. 493 + */ 494 + int 495 + v3d_submit_cl_ioctl(struct drm_device *dev, void *data, 496 + struct drm_file *file_priv) 497 + { 498 + struct v3d_dev *v3d = to_v3d_dev(dev); 499 + struct v3d_file_priv *v3d_priv = file_priv->driver_priv; 500 + struct drm_v3d_submit_cl *args = data; 501 + struct v3d_exec_info *exec; 502 + struct ww_acquire_ctx acquire_ctx; 503 + struct drm_syncobj *sync_out; 504 + int ret = 0; 505 + 506 + if (args->pad != 0) { 507 + DRM_INFO("pad must be zero: %d\n", args->pad); 508 + return -EINVAL; 509 + } 510 + 511 + exec = kcalloc(1, sizeof(*exec), GFP_KERNEL); 512 + if (!exec) 513 + return -ENOMEM; 514 + 515 + ret = pm_runtime_get_sync(v3d->dev); 516 + if (ret < 0) { 517 + kfree(exec); 518 + return ret; 519 + } 520 + 521 + kref_init(&exec->refcount); 522 + 523 + ret = drm_syncobj_find_fence(file_priv, args->in_sync_bcl, 524 + &exec->bin.in_fence); 525 + if (ret == -EINVAL) 526 + goto fail; 527 + 528 + ret = drm_syncobj_find_fence(file_priv, args->in_sync_rcl, 529 + &exec->render.in_fence); 530 + if (ret == -EINVAL) 531 + goto fail; 532 + 533 + exec->qma = args->qma; 534 + exec->qms = args->qms; 535 + exec->qts = args->qts; 536 + exec->bin.exec = exec; 537 + exec->bin.start = args->bcl_start; 538 + exec->bin.end = args->bcl_end; 539 + exec->render.exec = exec; 540 + exec->render.start = args->rcl_start; 541 + exec->render.end = args->rcl_end; 542 + exec->v3d = v3d; 543 + INIT_LIST_HEAD(&exec->unref_list); 544 + 545 + ret = v3d_cl_lookup_bos(dev, file_priv, args, exec); 546 + if (ret) 547 + goto fail; 548 + 549 + ret = v3d_lock_bo_reservations(dev, exec, &acquire_ctx); 550 + if (ret) 551 + goto fail; 552 + 553 + if (exec->bin.start != exec->bin.end) { 554 + ret = drm_sched_job_init(&exec->bin.base, 555 + &v3d->queue[V3D_BIN].sched, 556 + &v3d_priv->sched_entity[V3D_BIN], 557 + v3d_priv); 558 + if (ret) 559 + goto fail_unreserve; 560 + 561 + exec->bin_done_fence = 562 + dma_fence_get(&exec->bin.base.s_fence->finished); 563 + 564 + kref_get(&exec->refcount); /* put by scheduler job completion */ 565 + drm_sched_entity_push_job(&exec->bin.base, 566 + &v3d_priv->sched_entity[V3D_BIN]); 567 + } 568 + 569 + ret = drm_sched_job_init(&exec->render.base, 570 + &v3d->queue[V3D_RENDER].sched, 571 + &v3d_priv->sched_entity[V3D_RENDER], 572 + v3d_priv); 573 + if (ret) 574 + goto fail_unreserve; 575 + 576 + kref_get(&exec->refcount); /* put by scheduler job completion */ 577 + drm_sched_entity_push_job(&exec->render.base, 578 + &v3d_priv->sched_entity[V3D_RENDER]); 579 + 580 + v3d_attach_object_fences(exec); 581 + 582 + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); 583 + 584 + /* Update the return sync object for the */ 585 + sync_out = drm_syncobj_find(file_priv, args->out_sync); 586 + if (sync_out) { 587 + drm_syncobj_replace_fence(sync_out, 588 + &exec->render.base.s_fence->finished); 589 + drm_syncobj_put(sync_out); 590 + } 591 + 592 + v3d_exec_put(exec); 593 + 594 + return 0; 595 + 596 + fail_unreserve: 597 + v3d_unlock_bo_reservations(dev, exec, &acquire_ctx); 598 + fail: 599 + v3d_exec_put(exec); 600 + 601 + return ret; 602 + } 603 + 604 + int 605 + v3d_gem_init(struct drm_device *dev) 606 + { 607 + struct v3d_dev *v3d = to_v3d_dev(dev); 608 + u32 pt_size = 4096 * 1024; 609 + int ret, i; 610 + 611 + for (i = 0; i < V3D_MAX_QUEUES; i++) 612 + v3d->queue[i].fence_context = dma_fence_context_alloc(1); 613 + 614 + spin_lock_init(&v3d->mm_lock); 615 + spin_lock_init(&v3d->job_lock); 616 + mutex_init(&v3d->bo_lock); 617 + mutex_init(&v3d->reset_lock); 618 + 619 + /* Note: We don't allocate address 0. Various bits of HW 620 + * treat 0 as special, such as the occlusion query counters 621 + * where 0 means "disabled". 622 + */ 623 + drm_mm_init(&v3d->mm, 1, pt_size / sizeof(u32) - 1); 624 + 625 + v3d->pt = dma_alloc_wc(v3d->dev, pt_size, 626 + &v3d->pt_paddr, 627 + GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO); 628 + if (!v3d->pt) { 629 + drm_mm_takedown(&v3d->mm); 630 + dev_err(v3d->dev, 631 + "Failed to allocate page tables. " 632 + "Please ensure you have CMA enabled.\n"); 633 + return -ENOMEM; 634 + } 635 + 636 + v3d_init_hw_state(v3d); 637 + v3d_mmu_set_page_table(v3d); 638 + 639 + ret = v3d_sched_init(v3d); 640 + if (ret) { 641 + drm_mm_takedown(&v3d->mm); 642 + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, 643 + v3d->pt_paddr); 644 + } 645 + 646 + return 0; 647 + } 648 + 649 + void 650 + v3d_gem_destroy(struct drm_device *dev) 651 + { 652 + struct v3d_dev *v3d = to_v3d_dev(dev); 653 + enum v3d_queue q; 654 + 655 + v3d_sched_fini(v3d); 656 + 657 + /* Waiting for exec to finish would need to be done before 658 + * unregistering V3D. 659 + */ 660 + for (q = 0; q < V3D_MAX_QUEUES; q++) { 661 + WARN_ON(v3d->queue[q].emit_seqno != 662 + v3d->queue[q].finished_seqno); 663 + } 664 + 665 + drm_mm_takedown(&v3d->mm); 666 + 667 + dma_free_coherent(v3d->dev, 4096 * 1024, (void *)v3d->pt, v3d->pt_paddr); 668 + }

+206

drivers/gpu/drm/v3d/v3d_irq.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2014-2018 Broadcom */ 3 + 4 + /** 5 + * DOC: Interrupt management for the V3D engine 6 + * 7 + * When we take a binning or rendering flush done interrupt, we need 8 + * to signal the fence for that job so that the scheduler can queue up 9 + * the next one and unblock any waiters. 10 + * 11 + * When we take the binner out of memory interrupt, we need to 12 + * allocate some new memory and pass it to the binner so that the 13 + * current job can make progress. 14 + */ 15 + 16 + #include "v3d_drv.h" 17 + #include "v3d_regs.h" 18 + 19 + #define V3D_CORE_IRQS ((u32)(V3D_INT_OUTOMEM | \ 20 + V3D_INT_FLDONE | \ 21 + V3D_INT_FRDONE | \ 22 + V3D_INT_GMPV)) 23 + 24 + #define V3D_HUB_IRQS ((u32)(V3D_HUB_INT_MMU_WRV | \ 25 + V3D_HUB_INT_MMU_PTI | \ 26 + V3D_HUB_INT_MMU_CAP)) 27 + 28 + static void 29 + v3d_overflow_mem_work(struct work_struct *work) 30 + { 31 + struct v3d_dev *v3d = 32 + container_of(work, struct v3d_dev, overflow_mem_work); 33 + struct drm_device *dev = &v3d->drm; 34 + struct v3d_bo *bo = v3d_bo_create(dev, NULL /* XXX: GMP */, 256 * 1024); 35 + unsigned long irqflags; 36 + 37 + if (IS_ERR(bo)) { 38 + DRM_ERROR("Couldn't allocate binner overflow mem\n"); 39 + return; 40 + } 41 + 42 + /* We lost a race, and our work task came in after the bin job 43 + * completed and exited. This can happen because the HW 44 + * signals OOM before it's fully OOM, so the binner might just 45 + * barely complete. 46 + * 47 + * If we lose the race and our work task comes in after a new 48 + * bin job got scheduled, that's fine. We'll just give them 49 + * some binner pool anyway. 50 + */ 51 + spin_lock_irqsave(&v3d->job_lock, irqflags); 52 + if (!v3d->bin_job) { 53 + spin_unlock_irqrestore(&v3d->job_lock, irqflags); 54 + goto out; 55 + } 56 + 57 + drm_gem_object_get(&bo->base); 58 + list_add_tail(&bo->unref_head, &v3d->bin_job->unref_list); 59 + spin_unlock_irqrestore(&v3d->job_lock, irqflags); 60 + 61 + V3D_CORE_WRITE(0, V3D_PTB_BPOA, bo->node.start << PAGE_SHIFT); 62 + V3D_CORE_WRITE(0, V3D_PTB_BPOS, bo->base.size); 63 + 64 + out: 65 + drm_gem_object_put_unlocked(&bo->base); 66 + } 67 + 68 + static irqreturn_t 69 + v3d_irq(int irq, void *arg) 70 + { 71 + struct v3d_dev *v3d = arg; 72 + u32 intsts; 73 + irqreturn_t status = IRQ_NONE; 74 + 75 + intsts = V3D_CORE_READ(0, V3D_CTL_INT_STS); 76 + 77 + /* Acknowledge the interrupts we're handling here. */ 78 + V3D_CORE_WRITE(0, V3D_CTL_INT_CLR, intsts); 79 + 80 + if (intsts & V3D_INT_OUTOMEM) { 81 + /* Note that the OOM status is edge signaled, so the 82 + * interrupt won't happen again until the we actually 83 + * add more memory. 84 + */ 85 + schedule_work(&v3d->overflow_mem_work); 86 + status = IRQ_HANDLED; 87 + } 88 + 89 + if (intsts & V3D_INT_FLDONE) { 90 + v3d->queue[V3D_BIN].finished_seqno++; 91 + dma_fence_signal(v3d->bin_job->bin.done_fence); 92 + status = IRQ_HANDLED; 93 + } 94 + 95 + if (intsts & V3D_INT_FRDONE) { 96 + v3d->queue[V3D_RENDER].finished_seqno++; 97 + dma_fence_signal(v3d->render_job->render.done_fence); 98 + 99 + status = IRQ_HANDLED; 100 + } 101 + 102 + /* We shouldn't be triggering these if we have GMP in 103 + * always-allowed mode. 104 + */ 105 + if (intsts & V3D_INT_GMPV) 106 + dev_err(v3d->dev, "GMP violation\n"); 107 + 108 + return status; 109 + } 110 + 111 + static irqreturn_t 112 + v3d_hub_irq(int irq, void *arg) 113 + { 114 + struct v3d_dev *v3d = arg; 115 + u32 intsts; 116 + irqreturn_t status = IRQ_NONE; 117 + 118 + intsts = V3D_READ(V3D_HUB_INT_STS); 119 + 120 + /* Acknowledge the interrupts we're handling here. */ 121 + V3D_WRITE(V3D_HUB_INT_CLR, intsts); 122 + 123 + if (intsts & (V3D_HUB_INT_MMU_WRV | 124 + V3D_HUB_INT_MMU_PTI | 125 + V3D_HUB_INT_MMU_CAP)) { 126 + u32 axi_id = V3D_READ(V3D_MMU_VIO_ID); 127 + u64 vio_addr = (u64)V3D_READ(V3D_MMU_VIO_ADDR) << 8; 128 + 129 + dev_err(v3d->dev, "MMU error from client %d at 0x%08llx%s%s%s\n", 130 + axi_id, (long long)vio_addr, 131 + ((intsts & V3D_HUB_INT_MMU_WRV) ? 132 + ", write violation" : ""), 133 + ((intsts & V3D_HUB_INT_MMU_PTI) ? 134 + ", pte invalid" : ""), 135 + ((intsts & V3D_HUB_INT_MMU_CAP) ? 136 + ", cap exceeded" : "")); 137 + status = IRQ_HANDLED; 138 + } 139 + 140 + return status; 141 + } 142 + 143 + void 144 + v3d_irq_init(struct v3d_dev *v3d) 145 + { 146 + int ret, core; 147 + 148 + INIT_WORK(&v3d->overflow_mem_work, v3d_overflow_mem_work); 149 + 150 + /* Clear any pending interrupts someone might have left around 151 + * for us. 152 + */ 153 + for (core = 0; core < v3d->cores; core++) 154 + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); 155 + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); 156 + 157 + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 0), 158 + v3d_hub_irq, IRQF_SHARED, 159 + "v3d_hub", v3d); 160 + ret = devm_request_irq(v3d->dev, platform_get_irq(v3d->pdev, 1), 161 + v3d_irq, IRQF_SHARED, 162 + "v3d_core0", v3d); 163 + if (ret) 164 + dev_err(v3d->dev, "IRQ setup failed: %d\n", ret); 165 + 166 + v3d_irq_enable(v3d); 167 + } 168 + 169 + void 170 + v3d_irq_enable(struct v3d_dev *v3d) 171 + { 172 + int core; 173 + 174 + /* Enable our set of interrupts, masking out any others. */ 175 + for (core = 0; core < v3d->cores; core++) { 176 + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~V3D_CORE_IRQS); 177 + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_CLR, V3D_CORE_IRQS); 178 + } 179 + 180 + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~V3D_HUB_IRQS); 181 + V3D_WRITE(V3D_HUB_INT_MSK_CLR, V3D_HUB_IRQS); 182 + } 183 + 184 + void 185 + v3d_irq_disable(struct v3d_dev *v3d) 186 + { 187 + int core; 188 + 189 + /* Disable all interrupts. */ 190 + for (core = 0; core < v3d->cores; core++) 191 + V3D_CORE_WRITE(core, V3D_CTL_INT_MSK_SET, ~0); 192 + V3D_WRITE(V3D_HUB_INT_MSK_SET, ~0); 193 + 194 + /* Clear any pending interrupts we might have left. */ 195 + for (core = 0; core < v3d->cores; core++) 196 + V3D_CORE_WRITE(core, V3D_CTL_INT_CLR, V3D_CORE_IRQS); 197 + V3D_WRITE(V3D_HUB_INT_CLR, V3D_HUB_IRQS); 198 + 199 + cancel_work_sync(&v3d->overflow_mem_work); 200 + } 201 + 202 + /** Reinitializes interrupt registers when a GPU reset is performed. */ 203 + void v3d_irq_reset(struct v3d_dev *v3d) 204 + { 205 + v3d_irq_enable(v3d); 206 + }

+122

drivers/gpu/drm/v3d/v3d_mmu.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2017-2018 Broadcom */ 3 + 4 + /** 5 + * DOC: Broadcom V3D MMU 6 + * 7 + * The V3D 3.x hardware (compared to VC4) now includes an MMU. It has 8 + * a single level of page tables for the V3D's 4GB address space to 9 + * map to AXI bus addresses, thus it could need up to 4MB of 10 + * physically contiguous memory to store the PTEs. 11 + * 12 + * Because the 4MB of contiguous memory for page tables is precious, 13 + * and switching between them is expensive, we load all BOs into the 14 + * same 4GB address space. 15 + * 16 + * To protect clients from each other, we should use the GMP to 17 + * quickly mask out (at 128kb granularity) what pages are available to 18 + * each client. This is not yet implemented. 19 + */ 20 + 21 + #include "v3d_drv.h" 22 + #include "v3d_regs.h" 23 + 24 + #define V3D_MMU_PAGE_SHIFT 12 25 + 26 + /* Note: All PTEs for the 1MB superpage must be filled with the 27 + * superpage bit set. 28 + */ 29 + #define V3D_PTE_SUPERPAGE BIT(31) 30 + #define V3D_PTE_WRITEABLE BIT(29) 31 + #define V3D_PTE_VALID BIT(28) 32 + 33 + static int v3d_mmu_flush_all(struct v3d_dev *v3d) 34 + { 35 + int ret; 36 + 37 + /* Make sure that another flush isn't already running when we 38 + * start this one. 39 + */ 40 + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & 41 + V3D_MMU_CTL_TLB_CLEARING), 100); 42 + if (ret) 43 + dev_err(v3d->dev, "TLB clear wait idle pre-wait failed\n"); 44 + 45 + V3D_WRITE(V3D_MMU_CTL, V3D_READ(V3D_MMU_CTL) | 46 + V3D_MMU_CTL_TLB_CLEAR); 47 + 48 + V3D_WRITE(V3D_MMUC_CONTROL, 49 + V3D_MMUC_CONTROL_FLUSH | 50 + V3D_MMUC_CONTROL_ENABLE); 51 + 52 + ret = wait_for(!(V3D_READ(V3D_MMU_CTL) & 53 + V3D_MMU_CTL_TLB_CLEARING), 100); 54 + if (ret) { 55 + dev_err(v3d->dev, "TLB clear wait idle failed\n"); 56 + return ret; 57 + } 58 + 59 + ret = wait_for(!(V3D_READ(V3D_MMUC_CONTROL) & 60 + V3D_MMUC_CONTROL_FLUSHING), 100); 61 + if (ret) 62 + dev_err(v3d->dev, "MMUC flush wait idle failed\n"); 63 + 64 + return ret; 65 + } 66 + 67 + int v3d_mmu_set_page_table(struct v3d_dev *v3d) 68 + { 69 + V3D_WRITE(V3D_MMU_PT_PA_BASE, v3d->pt_paddr >> V3D_MMU_PAGE_SHIFT); 70 + V3D_WRITE(V3D_MMU_CTL, 71 + V3D_MMU_CTL_ENABLE | 72 + V3D_MMU_CTL_PT_INVALID | 73 + V3D_MMU_CTL_PT_INVALID_ABORT | 74 + V3D_MMU_CTL_WRITE_VIOLATION_ABORT | 75 + V3D_MMU_CTL_CAP_EXCEEDED_ABORT); 76 + V3D_WRITE(V3D_MMU_ILLEGAL_ADDR, 77 + (v3d->mmu_scratch_paddr >> V3D_MMU_PAGE_SHIFT) | 78 + V3D_MMU_ILLEGAL_ADDR_ENABLE); 79 + V3D_WRITE(V3D_MMUC_CONTROL, V3D_MMUC_CONTROL_ENABLE); 80 + 81 + return v3d_mmu_flush_all(v3d); 82 + } 83 + 84 + void v3d_mmu_insert_ptes(struct v3d_bo *bo) 85 + { 86 + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); 87 + u32 page = bo->node.start; 88 + u32 page_prot = V3D_PTE_WRITEABLE | V3D_PTE_VALID; 89 + unsigned int count; 90 + struct scatterlist *sgl; 91 + 92 + for_each_sg(bo->sgt->sgl, sgl, bo->sgt->nents, count) { 93 + u32 page_address = sg_dma_address(sgl) >> V3D_MMU_PAGE_SHIFT; 94 + u32 pte = page_prot | page_address; 95 + u32 i; 96 + 97 + BUG_ON(page_address + (sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT) >= 98 + BIT(24)); 99 + 100 + for (i = 0; i < sg_dma_len(sgl) >> V3D_MMU_PAGE_SHIFT; i++) 101 + v3d->pt[page++] = pte + i; 102 + } 103 + 104 + WARN_ON_ONCE(page - bo->node.start != 105 + bo->base.size >> V3D_MMU_PAGE_SHIFT); 106 + 107 + if (v3d_mmu_flush_all(v3d)) 108 + dev_err(v3d->dev, "MMU flush timeout\n"); 109 + } 110 + 111 + void v3d_mmu_remove_ptes(struct v3d_bo *bo) 112 + { 113 + struct v3d_dev *v3d = to_v3d_dev(bo->base.dev); 114 + u32 npages = bo->base.size >> V3D_MMU_PAGE_SHIFT; 115 + u32 page; 116 + 117 + for (page = bo->node.start; page < bo->node.start + npages; page++) 118 + v3d->pt[page] = 0; 119 + 120 + if (v3d_mmu_flush_all(v3d)) 121 + dev_err(v3d->dev, "MMU flush timeout\n"); 122 + }

+295

drivers/gpu/drm/v3d/v3d_regs.h

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2017-2018 Broadcom */ 3 + 4 + #ifndef V3D_REGS_H 5 + #define V3D_REGS_H 6 + 7 + #include <linux/bitops.h> 8 + 9 + #define V3D_MASK(high, low) ((u32)GENMASK(high, low)) 10 + /* Using the GNU statement expression extension */ 11 + #define V3D_SET_FIELD(value, field) \ 12 + ({ \ 13 + u32 fieldval = (value) << field##_SHIFT; \ 14 + WARN_ON((fieldval & ~field##_MASK) != 0); \ 15 + fieldval & field##_MASK; \ 16 + }) 17 + 18 + #define V3D_GET_FIELD(word, field) (((word) & field##_MASK) >> \ 19 + field##_SHIFT) 20 + 21 + /* Hub registers for shared hardware between V3D cores. */ 22 + 23 + #define V3D_HUB_AXICFG 0x00000 24 + # define V3D_HUB_AXICFG_MAX_LEN_MASK V3D_MASK(3, 0) 25 + # define V3D_HUB_AXICFG_MAX_LEN_SHIFT 0 26 + #define V3D_HUB_UIFCFG 0x00004 27 + #define V3D_HUB_IDENT0 0x00008 28 + 29 + #define V3D_HUB_IDENT1 0x0000c 30 + # define V3D_HUB_IDENT1_WITH_MSO BIT(19) 31 + # define V3D_HUB_IDENT1_WITH_TSY BIT(18) 32 + # define V3D_HUB_IDENT1_WITH_TFU BIT(17) 33 + # define V3D_HUB_IDENT1_WITH_L3C BIT(16) 34 + # define V3D_HUB_IDENT1_NHOSTS_MASK V3D_MASK(15, 12) 35 + # define V3D_HUB_IDENT1_NHOSTS_SHIFT 12 36 + # define V3D_HUB_IDENT1_NCORES_MASK V3D_MASK(11, 8) 37 + # define V3D_HUB_IDENT1_NCORES_SHIFT 8 38 + # define V3D_HUB_IDENT1_REV_MASK V3D_MASK(7, 4) 39 + # define V3D_HUB_IDENT1_REV_SHIFT 4 40 + # define V3D_HUB_IDENT1_TVER_MASK V3D_MASK(3, 0) 41 + # define V3D_HUB_IDENT1_TVER_SHIFT 0 42 + 43 + #define V3D_HUB_IDENT2 0x00010 44 + # define V3D_HUB_IDENT2_WITH_MMU BIT(8) 45 + # define V3D_HUB_IDENT2_L3C_NKB_MASK V3D_MASK(7, 0) 46 + # define V3D_HUB_IDENT2_L3C_NKB_SHIFT 0 47 + 48 + #define V3D_HUB_IDENT3 0x00014 49 + # define V3D_HUB_IDENT3_IPREV_MASK V3D_MASK(15, 8) 50 + # define V3D_HUB_IDENT3_IPREV_SHIFT 8 51 + # define V3D_HUB_IDENT3_IPIDX_MASK V3D_MASK(7, 0) 52 + # define V3D_HUB_IDENT3_IPIDX_SHIFT 0 53 + 54 + #define V3D_HUB_INT_STS 0x00050 55 + #define V3D_HUB_INT_SET 0x00054 56 + #define V3D_HUB_INT_CLR 0x00058 57 + #define V3D_HUB_INT_MSK_STS 0x0005c 58 + #define V3D_HUB_INT_MSK_SET 0x00060 59 + #define V3D_HUB_INT_MSK_CLR 0x00064 60 + # define V3D_HUB_INT_MMU_WRV BIT(5) 61 + # define V3D_HUB_INT_MMU_PTI BIT(4) 62 + # define V3D_HUB_INT_MMU_CAP BIT(3) 63 + # define V3D_HUB_INT_MSO BIT(2) 64 + # define V3D_HUB_INT_TFUC BIT(1) 65 + # define V3D_HUB_INT_TFUF BIT(0) 66 + 67 + #define V3D_GCA_CACHE_CTRL 0x0000c 68 + # define V3D_GCA_CACHE_CTRL_FLUSH BIT(0) 69 + 70 + #define V3D_GCA_SAFE_SHUTDOWN 0x000b0 71 + # define V3D_GCA_SAFE_SHUTDOWN_EN BIT(0) 72 + 73 + #define V3D_GCA_SAFE_SHUTDOWN_ACK 0x000b4 74 + # define V3D_GCA_SAFE_SHUTDOWN_ACK_ACKED 3 75 + 76 + # define V3D_TOP_GR_BRIDGE_REVISION 0x00000 77 + # define V3D_TOP_GR_BRIDGE_MAJOR_MASK V3D_MASK(15, 8) 78 + # define V3D_TOP_GR_BRIDGE_MAJOR_SHIFT 8 79 + # define V3D_TOP_GR_BRIDGE_MINOR_MASK V3D_MASK(7, 0) 80 + # define V3D_TOP_GR_BRIDGE_MINOR_SHIFT 0 81 + 82 + /* 7268 reset reg */ 83 + # define V3D_TOP_GR_BRIDGE_SW_INIT_0 0x00008 84 + # define V3D_TOP_GR_BRIDGE_SW_INIT_0_V3D_CLK_108_SW_INIT BIT(0) 85 + /* 7278 reset reg */ 86 + # define V3D_TOP_GR_BRIDGE_SW_INIT_1 0x0000c 87 + # define V3D_TOP_GR_BRIDGE_SW_INIT_1_V3D_CLK_108_SW_INIT BIT(0) 88 + 89 + /* Per-MMU registers. */ 90 + 91 + #define V3D_MMUC_CONTROL 0x01000 92 + # define V3D_MMUC_CONTROL_CLEAR BIT(3) 93 + # define V3D_MMUC_CONTROL_FLUSHING BIT(2) 94 + # define V3D_MMUC_CONTROL_FLUSH BIT(1) 95 + # define V3D_MMUC_CONTROL_ENABLE BIT(0) 96 + 97 + #define V3D_MMU_CTL 0x01200 98 + # define V3D_MMU_CTL_CAP_EXCEEDED BIT(27) 99 + # define V3D_MMU_CTL_CAP_EXCEEDED_ABORT BIT(26) 100 + # define V3D_MMU_CTL_CAP_EXCEEDED_INT BIT(25) 101 + # define V3D_MMU_CTL_CAP_EXCEEDED_EXCEPTION BIT(24) 102 + # define V3D_MMU_CTL_PT_INVALID BIT(20) 103 + # define V3D_MMU_CTL_PT_INVALID_ABORT BIT(19) 104 + # define V3D_MMU_CTL_PT_INVALID_INT BIT(18) 105 + # define V3D_MMU_CTL_PT_INVALID_EXCEPTION BIT(17) 106 + # define V3D_MMU_CTL_WRITE_VIOLATION BIT(16) 107 + # define V3D_MMU_CTL_WRITE_VIOLATION_ABORT BIT(11) 108 + # define V3D_MMU_CTL_WRITE_VIOLATION_INT BIT(10) 109 + # define V3D_MMU_CTL_WRITE_VIOLATION_EXCEPTION BIT(9) 110 + # define V3D_MMU_CTL_TLB_CLEARING BIT(7) 111 + # define V3D_MMU_CTL_TLB_STATS_CLEAR BIT(3) 112 + # define V3D_MMU_CTL_TLB_CLEAR BIT(2) 113 + # define V3D_MMU_CTL_TLB_STATS_ENABLE BIT(1) 114 + # define V3D_MMU_CTL_ENABLE BIT(0) 115 + 116 + #define V3D_MMU_PT_PA_BASE 0x01204 117 + #define V3D_MMU_HIT 0x01208 118 + #define V3D_MMU_MISSES 0x0120c 119 + #define V3D_MMU_STALLS 0x01210 120 + 121 + #define V3D_MMU_ADDR_CAP 0x01214 122 + # define V3D_MMU_ADDR_CAP_ENABLE BIT(31) 123 + # define V3D_MMU_ADDR_CAP_MPAGE_MASK V3D_MASK(11, 0) 124 + # define V3D_MMU_ADDR_CAP_MPAGE_SHIFT 0 125 + 126 + #define V3D_MMU_SHOOT_DOWN 0x01218 127 + # define V3D_MMU_SHOOT_DOWN_SHOOTING BIT(29) 128 + # define V3D_MMU_SHOOT_DOWN_SHOOT BIT(28) 129 + # define V3D_MMU_SHOOT_DOWN_PAGE_MASK V3D_MASK(27, 0) 130 + # define V3D_MMU_SHOOT_DOWN_PAGE_SHIFT 0 131 + 132 + #define V3D_MMU_BYPASS_START 0x0121c 133 + #define V3D_MMU_BYPASS_END 0x01220 134 + 135 + /* AXI ID of the access that faulted */ 136 + #define V3D_MMU_VIO_ID 0x0122c 137 + 138 + /* Address for illegal PTEs to return */ 139 + #define V3D_MMU_ILLEGAL_ADDR 0x01230 140 + # define V3D_MMU_ILLEGAL_ADDR_ENABLE BIT(31) 141 + 142 + /* Address that faulted */ 143 + #define V3D_MMU_VIO_ADDR 0x01234 144 + 145 + /* Per-V3D-core registers */ 146 + 147 + #define V3D_CTL_IDENT0 0x00000 148 + # define V3D_IDENT0_VER_MASK V3D_MASK(31, 24) 149 + # define V3D_IDENT0_VER_SHIFT 24 150 + 151 + #define V3D_CTL_IDENT1 0x00004 152 + /* Multiples of 1kb */ 153 + # define V3D_IDENT1_VPM_SIZE_MASK V3D_MASK(31, 28) 154 + # define V3D_IDENT1_VPM_SIZE_SHIFT 28 155 + # define V3D_IDENT1_NSEM_MASK V3D_MASK(23, 16) 156 + # define V3D_IDENT1_NSEM_SHIFT 16 157 + # define V3D_IDENT1_NTMU_MASK V3D_MASK(15, 12) 158 + # define V3D_IDENT1_NTMU_SHIFT 12 159 + # define V3D_IDENT1_QUPS_MASK V3D_MASK(11, 8) 160 + # define V3D_IDENT1_QUPS_SHIFT 8 161 + # define V3D_IDENT1_NSLC_MASK V3D_MASK(7, 4) 162 + # define V3D_IDENT1_NSLC_SHIFT 4 163 + # define V3D_IDENT1_REV_MASK V3D_MASK(3, 0) 164 + # define V3D_IDENT1_REV_SHIFT 0 165 + 166 + #define V3D_CTL_IDENT2 0x00008 167 + # define V3D_IDENT2_BCG_INT BIT(28) 168 + 169 + #define V3D_CTL_MISCCFG 0x00018 170 + # define V3D_MISCCFG_OVRTMUOUT BIT(0) 171 + 172 + #define V3D_CTL_L2CACTL 0x00020 173 + # define V3D_L2CACTL_L2CCLR BIT(2) 174 + # define V3D_L2CACTL_L2CDIS BIT(1) 175 + # define V3D_L2CACTL_L2CENA BIT(0) 176 + 177 + #define V3D_CTL_SLCACTL 0x00024 178 + # define V3D_SLCACTL_TVCCS_MASK V3D_MASK(27, 24) 179 + # define V3D_SLCACTL_TVCCS_SHIFT 24 180 + # define V3D_SLCACTL_TDCCS_MASK V3D_MASK(19, 16) 181 + # define V3D_SLCACTL_TDCCS_SHIFT 16 182 + # define V3D_SLCACTL_UCC_MASK V3D_MASK(11, 8) 183 + # define V3D_SLCACTL_UCC_SHIFT 8 184 + # define V3D_SLCACTL_ICC_MASK V3D_MASK(3, 0) 185 + # define V3D_SLCACTL_ICC_SHIFT 0 186 + 187 + #define V3D_CTL_L2TCACTL 0x00030 188 + # define V3D_L2TCACTL_TMUWCF BIT(8) 189 + # define V3D_L2TCACTL_L2T_NO_WM BIT(4) 190 + # define V3D_L2TCACTL_FLM_FLUSH 0 191 + # define V3D_L2TCACTL_FLM_CLEAR 1 192 + # define V3D_L2TCACTL_FLM_CLEAN 2 193 + # define V3D_L2TCACTL_FLM_MASK V3D_MASK(2, 1) 194 + # define V3D_L2TCACTL_FLM_SHIFT 1 195 + # define V3D_L2TCACTL_L2TFLS BIT(0) 196 + #define V3D_CTL_L2TFLSTA 0x00034 197 + #define V3D_CTL_L2TFLEND 0x00038 198 + 199 + #define V3D_CTL_INT_STS 0x00050 200 + #define V3D_CTL_INT_SET 0x00054 201 + #define V3D_CTL_INT_CLR 0x00058 202 + #define V3D_CTL_INT_MSK_STS 0x0005c 203 + #define V3D_CTL_INT_MSK_SET 0x00060 204 + #define V3D_CTL_INT_MSK_CLR 0x00064 205 + # define V3D_INT_QPU_MASK V3D_MASK(27, 16) 206 + # define V3D_INT_QPU_SHIFT 16 207 + # define V3D_INT_GMPV BIT(5) 208 + # define V3D_INT_TRFB BIT(4) 209 + # define V3D_INT_SPILLUSE BIT(3) 210 + # define V3D_INT_OUTOMEM BIT(2) 211 + # define V3D_INT_FLDONE BIT(1) 212 + # define V3D_INT_FRDONE BIT(0) 213 + 214 + #define V3D_CLE_CT0CS 0x00100 215 + #define V3D_CLE_CT1CS 0x00104 216 + #define V3D_CLE_CTNCS(n) (V3D_CLE_CT0CS + 4 * n) 217 + #define V3D_CLE_CT0EA 0x00108 218 + #define V3D_CLE_CT1EA 0x0010c 219 + #define V3D_CLE_CTNEA(n) (V3D_CLE_CT0EA + 4 * n) 220 + #define V3D_CLE_CT0CA 0x00110 221 + #define V3D_CLE_CT1CA 0x00114 222 + #define V3D_CLE_CTNCA(n) (V3D_CLE_CT0CA + 4 * n) 223 + #define V3D_CLE_CT0RA 0x00118 224 + #define V3D_CLE_CT1RA 0x0011c 225 + #define V3D_CLE_CT0LC 0x00120 226 + #define V3D_CLE_CT1LC 0x00124 227 + #define V3D_CLE_CT0PC 0x00128 228 + #define V3D_CLE_CT1PC 0x0012c 229 + #define V3D_CLE_PCS 0x00130 230 + #define V3D_CLE_BFC 0x00134 231 + #define V3D_CLE_RFC 0x00138 232 + #define V3D_CLE_TFBC 0x0013c 233 + #define V3D_CLE_TFIT 0x00140 234 + #define V3D_CLE_CT1CFG 0x00144 235 + #define V3D_CLE_CT1TILECT 0x00148 236 + #define V3D_CLE_CT1TSKIP 0x0014c 237 + #define V3D_CLE_CT1PTCT 0x00150 238 + #define V3D_CLE_CT0SYNC 0x00154 239 + #define V3D_CLE_CT1SYNC 0x00158 240 + #define V3D_CLE_CT0QTS 0x0015c 241 + # define V3D_CLE_CT0QTS_ENABLE BIT(1) 242 + #define V3D_CLE_CT0QBA 0x00160 243 + #define V3D_CLE_CT1QBA 0x00164 244 + #define V3D_CLE_CTNQBA(n) (V3D_CLE_CT0QBA + 4 * n) 245 + #define V3D_CLE_CT0QEA 0x00168 246 + #define V3D_CLE_CT1QEA 0x0016c 247 + #define V3D_CLE_CTNQEA(n) (V3D_CLE_CT0QEA + 4 * n) 248 + #define V3D_CLE_CT0QMA 0x00170 249 + #define V3D_CLE_CT0QMS 0x00174 250 + #define V3D_CLE_CT1QCFG 0x00178 251 + /* If set without ETPROC, entirely skip tiles with no primitives. */ 252 + # define V3D_CLE_QCFG_ETFILT BIT(7) 253 + /* If set with ETFILT, just write the clear color to tiles with no 254 + * primitives. 255 + */ 256 + # define V3D_CLE_QCFG_ETPROC BIT(6) 257 + # define V3D_CLE_QCFG_ETSFLUSH BIT(1) 258 + # define V3D_CLE_QCFG_MCDIS BIT(0) 259 + 260 + #define V3D_PTB_BPCA 0x00300 261 + #define V3D_PTB_BPCS 0x00304 262 + #define V3D_PTB_BPOA 0x00308 263 + #define V3D_PTB_BPOS 0x0030c 264 + 265 + #define V3D_PTB_BXCF 0x00310 266 + # define V3D_PTB_BXCF_RWORDERDISA BIT(1) 267 + # define V3D_PTB_BXCF_CLIPDISA BIT(0) 268 + 269 + #define V3D_GMP_STATUS 0x00800 270 + # define V3D_GMP_STATUS_GMPRST BIT(31) 271 + # define V3D_GMP_STATUS_WR_COUNT_MASK V3D_MASK(30, 24) 272 + # define V3D_GMP_STATUS_WR_COUNT_SHIFT 24 273 + # define V3D_GMP_STATUS_RD_COUNT_MASK V3D_MASK(22, 16) 274 + # define V3D_GMP_STATUS_RD_COUNT_SHIFT 16 275 + # define V3D_GMP_STATUS_WR_ACTIVE BIT(5) 276 + # define V3D_GMP_STATUS_RD_ACTIVE BIT(4) 277 + # define V3D_GMP_STATUS_CFG_BUSY BIT(3) 278 + # define V3D_GMP_STATUS_CNTOVF BIT(2) 279 + # define V3D_GMP_STATUS_INVPROT BIT(1) 280 + # define V3D_GMP_STATUS_VIO BIT(0) 281 + 282 + #define V3D_GMP_CFG 0x00804 283 + # define V3D_GMP_CFG_LBURSTEN BIT(3) 284 + # define V3D_GMP_CFG_PGCRSEN BIT() 285 + # define V3D_GMP_CFG_STOP_REQ BIT(1) 286 + # define V3D_GMP_CFG_PROT_ENABLE BIT(0) 287 + 288 + #define V3D_GMP_VIO_ADDR 0x00808 289 + #define V3D_GMP_VIO_TYPE 0x0080c 290 + #define V3D_GMP_TABLE_ADDR 0x00810 291 + #define V3D_GMP_CLEAR_LOAD 0x00814 292 + #define V3D_GMP_PRESERVE_LOAD 0x00818 293 + #define V3D_GMP_VALID_LINES 0x00820 294 + 295 + #endif /* V3D_REGS_H */

+228

drivers/gpu/drm/v3d/v3d_sched.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2018 Broadcom */ 3 + 4 + /** 5 + * DOC: Broadcom V3D scheduling 6 + * 7 + * The shared DRM GPU scheduler is used to coordinate submitting jobs 8 + * to the hardware. Each DRM fd (roughly a client process) gets its 9 + * own scheduler entity, which will process jobs in order. The GPU 10 + * scheduler will round-robin between clients to submit the next job. 11 + * 12 + * For simplicity, and in order to keep latency low for interactive 13 + * jobs when bulk background jobs are queued up, we submit a new job 14 + * to the HW only when it has completed the last one, instead of 15 + * filling up the CT[01]Q FIFOs with jobs. Similarly, we use 16 + * v3d_job_dependency() to manage the dependency between bin and 17 + * render, instead of having the clients submit jobs with using the 18 + * HW's semaphores to interlock between them. 19 + */ 20 + 21 + #include <linux/kthread.h> 22 + 23 + #include "v3d_drv.h" 24 + #include "v3d_regs.h" 25 + #include "v3d_trace.h" 26 + 27 + static struct v3d_job * 28 + to_v3d_job(struct drm_sched_job *sched_job) 29 + { 30 + return container_of(sched_job, struct v3d_job, base); 31 + } 32 + 33 + static void 34 + v3d_job_free(struct drm_sched_job *sched_job) 35 + { 36 + struct v3d_job *job = to_v3d_job(sched_job); 37 + 38 + v3d_exec_put(job->exec); 39 + } 40 + 41 + /** 42 + * Returns the fences that the bin job depends on, one by one. 43 + * v3d_job_run() won't be called until all of them have been signaled. 44 + */ 45 + static struct dma_fence * 46 + v3d_job_dependency(struct drm_sched_job *sched_job, 47 + struct drm_sched_entity *s_entity) 48 + { 49 + struct v3d_job *job = to_v3d_job(sched_job); 50 + struct v3d_exec_info *exec = job->exec; 51 + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; 52 + struct dma_fence *fence; 53 + 54 + fence = job->in_fence; 55 + if (fence) { 56 + job->in_fence = NULL; 57 + return fence; 58 + } 59 + 60 + if (q == V3D_RENDER) { 61 + /* If we had a bin job, the render job definitely depends on 62 + * it. We first have to wait for bin to be scheduled, so that 63 + * its done_fence is created. 64 + */ 65 + fence = exec->bin_done_fence; 66 + if (fence) { 67 + exec->bin_done_fence = NULL; 68 + return fence; 69 + } 70 + } 71 + 72 + /* XXX: Wait on a fence for switching the GMP if necessary, 73 + * and then do so. 74 + */ 75 + 76 + return fence; 77 + } 78 + 79 + static struct dma_fence *v3d_job_run(struct drm_sched_job *sched_job) 80 + { 81 + struct v3d_job *job = to_v3d_job(sched_job); 82 + struct v3d_exec_info *exec = job->exec; 83 + enum v3d_queue q = job == &exec->bin ? V3D_BIN : V3D_RENDER; 84 + struct v3d_dev *v3d = exec->v3d; 85 + struct drm_device *dev = &v3d->drm; 86 + struct dma_fence *fence; 87 + unsigned long irqflags; 88 + 89 + if (unlikely(job->base.s_fence->finished.error)) 90 + return NULL; 91 + 92 + /* Lock required around bin_job update vs 93 + * v3d_overflow_mem_work(). 94 + */ 95 + spin_lock_irqsave(&v3d->job_lock, irqflags); 96 + if (q == V3D_BIN) { 97 + v3d->bin_job = job->exec; 98 + 99 + /* Clear out the overflow allocation, so we don't 100 + * reuse the overflow attached to a previous job. 101 + */ 102 + V3D_CORE_WRITE(0, V3D_PTB_BPOS, 0); 103 + } else { 104 + v3d->render_job = job->exec; 105 + } 106 + spin_unlock_irqrestore(&v3d->job_lock, irqflags); 107 + 108 + /* Can we avoid this flush when q==RENDER? We need to be 109 + * careful of scheduling, though -- imagine job0 rendering to 110 + * texture and job1 reading, and them being executed as bin0, 111 + * bin1, render0, render1, so that render1's flush at bin time 112 + * wasn't enough. 113 + */ 114 + v3d_invalidate_caches(v3d); 115 + 116 + fence = v3d_fence_create(v3d, q); 117 + if (!fence) 118 + return fence; 119 + 120 + if (job->done_fence) 121 + dma_fence_put(job->done_fence); 122 + job->done_fence = dma_fence_get(fence); 123 + 124 + trace_v3d_submit_cl(dev, q == V3D_RENDER, to_v3d_fence(fence)->seqno, 125 + job->start, job->end); 126 + 127 + if (q == V3D_BIN) { 128 + if (exec->qma) { 129 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMA, exec->qma); 130 + V3D_CORE_WRITE(0, V3D_CLE_CT0QMS, exec->qms); 131 + } 132 + if (exec->qts) { 133 + V3D_CORE_WRITE(0, V3D_CLE_CT0QTS, 134 + V3D_CLE_CT0QTS_ENABLE | 135 + exec->qts); 136 + } 137 + } else { 138 + /* XXX: Set the QCFG */ 139 + } 140 + 141 + /* Set the current and end address of the control list. 142 + * Writing the end register is what starts the job. 143 + */ 144 + V3D_CORE_WRITE(0, V3D_CLE_CTNQBA(q), job->start); 145 + V3D_CORE_WRITE(0, V3D_CLE_CTNQEA(q), job->end); 146 + 147 + return fence; 148 + } 149 + 150 + static void 151 + v3d_job_timedout(struct drm_sched_job *sched_job) 152 + { 153 + struct v3d_job *job = to_v3d_job(sched_job); 154 + struct v3d_exec_info *exec = job->exec; 155 + struct v3d_dev *v3d = exec->v3d; 156 + enum v3d_queue q; 157 + 158 + mutex_lock(&v3d->reset_lock); 159 + 160 + /* block scheduler */ 161 + for (q = 0; q < V3D_MAX_QUEUES; q++) { 162 + struct drm_gpu_scheduler *sched = &v3d->queue[q].sched; 163 + 164 + kthread_park(sched->thread); 165 + drm_sched_hw_job_reset(sched, (sched_job->sched == sched ? 166 + sched_job : NULL)); 167 + } 168 + 169 + /* get the GPU back into the init state */ 170 + v3d_reset(v3d); 171 + 172 + /* Unblock schedulers and restart their jobs. */ 173 + for (q = 0; q < V3D_MAX_QUEUES; q++) { 174 + drm_sched_job_recovery(&v3d->queue[q].sched); 175 + kthread_unpark(v3d->queue[q].sched.thread); 176 + } 177 + 178 + mutex_unlock(&v3d->reset_lock); 179 + } 180 + 181 + static const struct drm_sched_backend_ops v3d_sched_ops = { 182 + .dependency = v3d_job_dependency, 183 + .run_job = v3d_job_run, 184 + .timedout_job = v3d_job_timedout, 185 + .free_job = v3d_job_free 186 + }; 187 + 188 + int 189 + v3d_sched_init(struct v3d_dev *v3d) 190 + { 191 + int hw_jobs_limit = 1; 192 + int job_hang_limit = 0; 193 + int hang_limit_ms = 500; 194 + int ret; 195 + 196 + ret = drm_sched_init(&v3d->queue[V3D_BIN].sched, 197 + &v3d_sched_ops, 198 + hw_jobs_limit, job_hang_limit, 199 + msecs_to_jiffies(hang_limit_ms), 200 + "v3d_bin"); 201 + if (ret) { 202 + dev_err(v3d->dev, "Failed to create bin scheduler: %d.", ret); 203 + return ret; 204 + } 205 + 206 + ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched, 207 + &v3d_sched_ops, 208 + hw_jobs_limit, job_hang_limit, 209 + msecs_to_jiffies(hang_limit_ms), 210 + "v3d_render"); 211 + if (ret) { 212 + dev_err(v3d->dev, "Failed to create render scheduler: %d.", 213 + ret); 214 + drm_sched_fini(&v3d->queue[V3D_BIN].sched); 215 + return ret; 216 + } 217 + 218 + return 0; 219 + } 220 + 221 + void 222 + v3d_sched_fini(struct v3d_dev *v3d) 223 + { 224 + enum v3d_queue q; 225 + 226 + for (q = 0; q < V3D_MAX_QUEUES; q++) 227 + drm_sched_fini(&v3d->queue[q].sched); 228 + }

+82

drivers/gpu/drm/v3d/v3d_trace.h

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2015-2018 Broadcom */ 3 + 4 + #if !defined(_V3D_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ) 5 + #define _V3D_TRACE_H_ 6 + 7 + #include <linux/stringify.h> 8 + #include <linux/types.h> 9 + #include <linux/tracepoint.h> 10 + 11 + #undef TRACE_SYSTEM 12 + #define TRACE_SYSTEM v3d 13 + #define TRACE_INCLUDE_FILE v3d_trace 14 + 15 + TRACE_EVENT(v3d_submit_cl, 16 + TP_PROTO(struct drm_device *dev, bool is_render, 17 + uint64_t seqno, 18 + u32 ctnqba, u32 ctnqea), 19 + TP_ARGS(dev, is_render, seqno, ctnqba, ctnqea), 20 + 21 + TP_STRUCT__entry( 22 + __field(u32, dev) 23 + __field(bool, is_render) 24 + __field(u64, seqno) 25 + __field(u32, ctnqba) 26 + __field(u32, ctnqea) 27 + ), 28 + 29 + TP_fast_assign( 30 + __entry->dev = dev->primary->index; 31 + __entry->is_render = is_render; 32 + __entry->seqno = seqno; 33 + __entry->ctnqba = ctnqba; 34 + __entry->ctnqea = ctnqea; 35 + ), 36 + 37 + TP_printk("dev=%u, %s, seqno=%llu, 0x%08x..0x%08x", 38 + __entry->dev, 39 + __entry->is_render ? "RCL" : "BCL", 40 + __entry->seqno, 41 + __entry->ctnqba, 42 + __entry->ctnqea) 43 + ); 44 + 45 + TRACE_EVENT(v3d_reset_begin, 46 + TP_PROTO(struct drm_device *dev), 47 + TP_ARGS(dev), 48 + 49 + TP_STRUCT__entry( 50 + __field(u32, dev) 51 + ), 52 + 53 + TP_fast_assign( 54 + __entry->dev = dev->primary->index; 55 + ), 56 + 57 + TP_printk("dev=%u", 58 + __entry->dev) 59 + ); 60 + 61 + TRACE_EVENT(v3d_reset_end, 62 + TP_PROTO(struct drm_device *dev), 63 + TP_ARGS(dev), 64 + 65 + TP_STRUCT__entry( 66 + __field(u32, dev) 67 + ), 68 + 69 + TP_fast_assign( 70 + __entry->dev = dev->primary->index; 71 + ), 72 + 73 + TP_printk("dev=%u", 74 + __entry->dev) 75 + ); 76 + 77 + #endif /* _V3D_TRACE_H_ */ 78 + 79 + /* This part must be outside protection */ 80 + #undef TRACE_INCLUDE_PATH 81 + #define TRACE_INCLUDE_PATH . 82 + #include <trace/define_trace.h>

+9

drivers/gpu/drm/v3d/v3d_trace_points.c

··· 1 + // SPDX-License-Identifier: GPL-2.0+ 2 + /* Copyright (C) 2015 Broadcom */ 3 + 4 + #include "v3d_drv.h" 5 + 6 + #ifndef __CHECKER__ 7 + #define CREATE_TRACE_POINTS 8 + #include "v3d_trace.h" 9 + #endif

+194

include/uapi/drm/v3d_drm.h

··· 1 + /* 2 + * Copyright © 2014-2018 Broadcom 3 + * 4 + * Permission is hereby granted, free of charge, to any person obtaining a 5 + * copy of this software and associated documentation files (the "Software"), 6 + * to deal in the Software without restriction, including without limitation 7 + * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 + * and/or sell copies of the Software, and to permit persons to whom the 9 + * Software is furnished to do so, subject to the following conditions: 10 + * 11 + * The above copyright notice and this permission notice (including the next 12 + * paragraph) shall be included in all copies or substantial portions of the 13 + * Software. 14 + * 15 + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 + * IN THE SOFTWARE. 22 + */ 23 + 24 + #ifndef _V3D_DRM_H_ 25 + #define _V3D_DRM_H_ 26 + 27 + #include "drm.h" 28 + 29 + #if defined(__cplusplus) 30 + extern "C" { 31 + #endif 32 + 33 + #define DRM_V3D_SUBMIT_CL 0x00 34 + #define DRM_V3D_WAIT_BO 0x01 35 + #define DRM_V3D_CREATE_BO 0x02 36 + #define DRM_V3D_MMAP_BO 0x03 37 + #define DRM_V3D_GET_PARAM 0x04 38 + #define DRM_V3D_GET_BO_OFFSET 0x05 39 + 40 + #define DRM_IOCTL_V3D_SUBMIT_CL DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_SUBMIT_CL, struct drm_v3d_submit_cl) 41 + #define DRM_IOCTL_V3D_WAIT_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_WAIT_BO, struct drm_v3d_wait_bo) 42 + #define DRM_IOCTL_V3D_CREATE_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_CREATE_BO, struct drm_v3d_create_bo) 43 + #define DRM_IOCTL_V3D_MMAP_BO DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_MMAP_BO, struct drm_v3d_mmap_bo) 44 + #define DRM_IOCTL_V3D_GET_PARAM DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_PARAM, struct drm_v3d_get_param) 45 + #define DRM_IOCTL_V3D_GET_BO_OFFSET DRM_IOWR(DRM_COMMAND_BASE + DRM_V3D_GET_BO_OFFSET, struct drm_v3d_get_bo_offset) 46 + 47 + /** 48 + * struct drm_v3d_submit_cl - ioctl argument for submitting commands to the 3D 49 + * engine. 50 + * 51 + * This asks the kernel to have the GPU execute an optional binner 52 + * command list, and a render command list. 53 + */ 54 + struct drm_v3d_submit_cl { 55 + /* Pointer to the binner command list. 56 + * 57 + * This is the first set of commands executed, which runs the 58 + * coordinate shader to determine where primitives land on the screen, 59 + * then writes out the state updates and draw calls necessary per tile 60 + * to the tile allocation BO. 61 + */ 62 + __u32 bcl_start; 63 + 64 + /** End address of the BCL (first byte after the BCL) */ 65 + __u32 bcl_end; 66 + 67 + /* Offset of the render command list. 68 + * 69 + * This is the second set of commands executed, which will either 70 + * execute the tiles that have been set up by the BCL, or a fixed set 71 + * of tiles (in the case of RCL-only blits). 72 + */ 73 + __u32 rcl_start; 74 + 75 + /** End address of the RCL (first byte after the RCL) */ 76 + __u32 rcl_end; 77 + 78 + /** An optional sync object to wait on before starting the BCL. */ 79 + __u32 in_sync_bcl; 80 + /** An optional sync object to wait on before starting the RCL. */ 81 + __u32 in_sync_rcl; 82 + /** An optional sync object to place the completion fence in. */ 83 + __u32 out_sync; 84 + 85 + /* Offset of the tile alloc memory 86 + * 87 + * This is optional on V3D 3.3 (where the CL can set the value) but 88 + * required on V3D 4.1. 89 + */ 90 + __u32 qma; 91 + 92 + /** Size of the tile alloc memory. */ 93 + __u32 qms; 94 + 95 + /** Offset of the tile state data array. */ 96 + __u32 qts; 97 + 98 + /* Pointer to a u32 array of the BOs that are referenced by the job. 99 + */ 100 + __u64 bo_handles; 101 + 102 + /* Number of BO handles passed in (size is that times 4). */ 103 + __u32 bo_handle_count; 104 + 105 + /* Pad, must be zero-filled. */ 106 + __u32 pad; 107 + }; 108 + 109 + /** 110 + * struct drm_v3d_wait_bo - ioctl argument for waiting for 111 + * completion of the last DRM_V3D_SUBMIT_CL on a BO. 112 + * 113 + * This is useful for cases where multiple processes might be 114 + * rendering to a BO and you want to wait for all rendering to be 115 + * completed. 116 + */ 117 + struct drm_v3d_wait_bo { 118 + __u32 handle; 119 + __u32 pad; 120 + __u64 timeout_ns; 121 + }; 122 + 123 + /** 124 + * struct drm_v3d_create_bo - ioctl argument for creating V3D BOs. 125 + * 126 + * There are currently no values for the flags argument, but it may be 127 + * used in a future extension. 128 + */ 129 + struct drm_v3d_create_bo { 130 + __u32 size; 131 + __u32 flags; 132 + /** Returned GEM handle for the BO. */ 133 + __u32 handle; 134 + /** 135 + * Returned offset for the BO in the V3D address space. This offset 136 + * is private to the DRM fd and is valid for the lifetime of the GEM 137 + * handle. 138 + * 139 + * This offset value will always be nonzero, since various HW 140 + * units treat 0 specially. 141 + */ 142 + __u32 offset; 143 + }; 144 + 145 + /** 146 + * struct drm_v3d_mmap_bo - ioctl argument for mapping V3D BOs. 147 + * 148 + * This doesn't actually perform an mmap. Instead, it returns the 149 + * offset you need to use in an mmap on the DRM device node. This 150 + * means that tools like valgrind end up knowing about the mapped 151 + * memory. 152 + * 153 + * There are currently no values for the flags argument, but it may be 154 + * used in a future extension. 155 + */ 156 + struct drm_v3d_mmap_bo { 157 + /** Handle for the object being mapped. */ 158 + __u32 handle; 159 + __u32 flags; 160 + /** offset into the drm node to use for subsequent mmap call. */ 161 + __u64 offset; 162 + }; 163 + 164 + enum drm_v3d_param { 165 + DRM_V3D_PARAM_V3D_UIFCFG, 166 + DRM_V3D_PARAM_V3D_HUB_IDENT1, 167 + DRM_V3D_PARAM_V3D_HUB_IDENT2, 168 + DRM_V3D_PARAM_V3D_HUB_IDENT3, 169 + DRM_V3D_PARAM_V3D_CORE0_IDENT0, 170 + DRM_V3D_PARAM_V3D_CORE0_IDENT1, 171 + DRM_V3D_PARAM_V3D_CORE0_IDENT2, 172 + }; 173 + 174 + struct drm_v3d_get_param { 175 + __u32 param; 176 + __u32 pad; 177 + __u64 value; 178 + }; 179 + 180 + /** 181 + * Returns the offset for the BO in the V3D address space for this DRM fd. 182 + * This is the same value returned by drm_v3d_create_bo, if that was called 183 + * from this DRM fd. 184 + */ 185 + struct drm_v3d_get_bo_offset { 186 + __u32 handle; 187 + __u32 offset; 188 + }; 189 + 190 + #if defined(__cplusplus) 191 + } 192 + #endif 193 + 194 + #endif /* _V3D_DRM_H_ */