Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

gpu: host1x: Add IOMMU support

Add support for the Host1x unit to be located behind
an IOMMU. This is required when gather buffers may be
allocated non-contiguously in physical memory, as can
be the case when TegraDRM is also using the IOMMU.

Signed-off-by: Mikko Perttunen <mperttunen@nvidia.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>

authored by

Mikko Perttunen and committed by
Thierry Reding
404bfb78 8cadb01d

+177 -39
+58 -16
drivers/gpu/host1x/cdma.c
··· 51 51 struct host1x_cdma *cdma = pb_to_cdma(pb); 52 52 struct host1x *host1x = cdma_to_host1x(cdma); 53 53 54 - if (pb->phys != 0) 55 - dma_free_wc(host1x->dev, pb->size_bytes + 4, pb->mapped, 56 - pb->phys); 54 + if (!pb->phys) 55 + return; 56 + 57 + if (host1x->domain) { 58 + iommu_unmap(host1x->domain, pb->dma, pb->alloc_size); 59 + free_iova(&host1x->iova, iova_pfn(&host1x->iova, pb->dma)); 60 + } 61 + 62 + dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); 57 63 58 64 pb->mapped = NULL; 59 65 pb->phys = 0; ··· 72 66 { 73 67 struct host1x_cdma *cdma = pb_to_cdma(pb); 74 68 struct host1x *host1x = cdma_to_host1x(cdma); 69 + struct iova *alloc; 70 + u32 size; 71 + int err; 75 72 76 73 pb->mapped = NULL; 77 74 pb->phys = 0; 78 - pb->size_bytes = HOST1X_PUSHBUFFER_SLOTS * 8; 75 + pb->size = HOST1X_PUSHBUFFER_SLOTS * 8; 76 + 77 + size = pb->size + 4; 79 78 80 79 /* initialize buffer pointers */ 81 - pb->fence = pb->size_bytes - 8; 80 + pb->fence = pb->size - 8; 82 81 pb->pos = 0; 83 82 84 - /* allocate and map pushbuffer memory */ 85 - pb->mapped = dma_alloc_wc(host1x->dev, pb->size_bytes + 4, &pb->phys, 86 - GFP_KERNEL); 87 - if (!pb->mapped) 88 - goto fail; 83 + if (host1x->domain) { 84 + unsigned long shift; 85 + 86 + size = iova_align(&host1x->iova, size); 87 + 88 + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 89 + GFP_KERNEL); 90 + if (!pb->mapped) 91 + return -ENOMEM; 92 + 93 + shift = iova_shift(&host1x->iova); 94 + alloc = alloc_iova(&host1x->iova, size >> shift, 95 + host1x->iova_end >> shift, true); 96 + if (!alloc) { 97 + err = -ENOMEM; 98 + goto iommu_free_mem; 99 + } 100 + 101 + pb->dma = iova_dma_addr(&host1x->iova, alloc); 102 + err = iommu_map(host1x->domain, pb->dma, pb->phys, size, 103 + IOMMU_READ); 104 + if (err) 105 + goto iommu_free_iova; 106 + } else { 107 + pb->mapped = dma_alloc_wc(host1x->dev, size, &pb->phys, 108 + GFP_KERNEL); 109 + if (!pb->mapped) 110 + return -ENOMEM; 111 + 112 + pb->dma = pb->phys; 113 + } 114 + 115 + pb->alloc_size = size; 89 116 90 117 host1x_hw_pushbuffer_init(host1x, pb); 91 118 92 119 return 0; 93 120 94 - fail: 95 - host1x_pushbuffer_destroy(pb); 96 - return -ENOMEM; 121 + iommu_free_iova: 122 + __free_iova(&host1x->iova, alloc); 123 + iommu_free_mem: 124 + dma_free_wc(host1x->dev, pb->alloc_size, pb->mapped, pb->phys); 125 + 126 + return err; 97 127 } 98 128 99 129 /* ··· 143 101 WARN_ON(pb->pos == pb->fence); 144 102 *(p++) = op1; 145 103 *(p++) = op2; 146 - pb->pos = (pb->pos + 8) & (pb->size_bytes - 1); 104 + pb->pos = (pb->pos + 8) & (pb->size - 1); 147 105 } 148 106 149 107 /* ··· 153 111 static void host1x_pushbuffer_pop(struct push_buffer *pb, unsigned int slots) 154 112 { 155 113 /* Advance the next write position */ 156 - pb->fence = (pb->fence + slots * 8) & (pb->size_bytes - 1); 114 + pb->fence = (pb->fence + slots * 8) & (pb->size - 1); 157 115 } 158 116 159 117 /* ··· 161 119 */ 162 120 static u32 host1x_pushbuffer_space(struct push_buffer *pb) 163 121 { 164 - return ((pb->fence - pb->pos) & (pb->size_bytes - 1)) / 8; 122 + return ((pb->fence - pb->pos) & (pb->size - 1)) / 8; 165 123 } 166 124 167 125 /*
+4 -2
drivers/gpu/host1x/cdma.h
··· 43 43 44 44 struct push_buffer { 45 45 void *mapped; /* mapped pushbuffer memory */ 46 - dma_addr_t phys; /* physical address of pushbuffer */ 46 + dma_addr_t dma; /* device address of pushbuffer */ 47 + phys_addr_t phys; /* physical address of pushbuffer */ 47 48 u32 fence; /* index we've written */ 48 49 u32 pos; /* index to write to */ 49 - u32 size_bytes; 50 + u32 size; 51 + u32 alloc_size; 50 52 }; 51 53 52 54 struct buffer_timeout {
+39 -2
drivers/gpu/host1x/dev.c
··· 27 27 28 28 #define CREATE_TRACE_POINTS 29 29 #include <trace/events/host1x.h> 30 + #undef CREATE_TRACE_POINTS 30 31 31 32 #include "bus.h" 32 33 #include "dev.h" ··· 169 168 return err; 170 169 } 171 170 171 + if (iommu_present(&platform_bus_type)) { 172 + struct iommu_domain_geometry *geometry; 173 + unsigned long order; 174 + 175 + host->domain = iommu_domain_alloc(&platform_bus_type); 176 + if (!host->domain) 177 + return -ENOMEM; 178 + 179 + err = iommu_attach_device(host->domain, &pdev->dev); 180 + if (err) 181 + goto fail_free_domain; 182 + 183 + geometry = &host->domain->geometry; 184 + 185 + order = __ffs(host->domain->pgsize_bitmap); 186 + init_iova_domain(&host->iova, 1UL << order, 187 + geometry->aperture_start >> order, 188 + geometry->aperture_end >> order); 189 + host->iova_end = geometry->aperture_end; 190 + } 191 + 172 192 err = host1x_channel_list_init(host); 173 193 if (err) { 174 194 dev_err(&pdev->dev, "failed to initialize channel list\n"); 175 - return err; 195 + goto fail_detach_device; 176 196 } 177 197 178 198 err = clk_prepare_enable(host->clk); 179 199 if (err < 0) { 180 200 dev_err(&pdev->dev, "failed to enable clock\n"); 181 - return err; 201 + goto fail_detach_device; 182 202 } 183 203 184 204 err = host1x_syncpt_init(host); ··· 228 206 host1x_syncpt_deinit(host); 229 207 fail_unprepare_disable: 230 208 clk_disable_unprepare(host->clk); 209 + fail_detach_device: 210 + if (host->domain) { 211 + put_iova_domain(&host->iova); 212 + iommu_detach_device(host->domain, &pdev->dev); 213 + } 214 + fail_free_domain: 215 + if (host->domain) 216 + iommu_domain_free(host->domain); 217 + 231 218 return err; 232 219 } 233 220 ··· 248 217 host1x_intr_deinit(host); 249 218 host1x_syncpt_deinit(host); 250 219 clk_disable_unprepare(host->clk); 220 + 221 + if (host->domain) { 222 + put_iova_domain(&host->iova); 223 + iommu_detach_device(host->domain, &pdev->dev); 224 + iommu_domain_free(host->domain); 225 + } 251 226 252 227 return 0; 253 228 }
+6
drivers/gpu/host1x/dev.h
··· 19 19 20 20 #include <linux/platform_device.h> 21 21 #include <linux/device.h> 22 + #include <linux/iommu.h> 23 + #include <linux/iova.h> 22 24 23 25 #include "channel.h" 24 26 #include "syncpt.h" ··· 109 107 struct host1x_syncpt_base *bases; 110 108 struct device *dev; 111 109 struct clk *clk; 110 + 111 + struct iommu_domain *domain; 112 + struct iova_domain iova; 113 + dma_addr_t iova_end; 112 114 113 115 struct mutex intr_mutex; 114 116 int intr_syncpt_irq;
+7 -9
drivers/gpu/host1x/hw/cdma_hw.c
··· 30 30 */ 31 31 static void push_buffer_init(struct push_buffer *pb) 32 32 { 33 - *(u32 *)(pb->mapped + pb->size_bytes) = host1x_opcode_restart(0); 33 + *(u32 *)(pb->mapped + pb->size) = host1x_opcode_restart(0); 34 34 } 35 35 36 36 /* ··· 55 55 *(p++) = HOST1X_OPCODE_NOP; 56 56 *(p++) = HOST1X_OPCODE_NOP; 57 57 dev_dbg(host1x->dev, "%s: NOP at %pad+%#x\n", __func__, 58 - &pb->phys, getptr); 59 - getptr = (getptr + 8) & (pb->size_bytes - 1); 58 + &pb->dma, getptr); 59 + getptr = (getptr + 8) & (pb->size - 1); 60 60 } 61 61 62 62 wmb(); ··· 78 78 HOST1X_CHANNEL_DMACTRL); 79 79 80 80 /* set base, put and end pointer */ 81 - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); 81 + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); 82 82 host1x_ch_writel(ch, cdma->push_buffer.pos, HOST1X_CHANNEL_DMAPUT); 83 - host1x_ch_writel(ch, cdma->push_buffer.phys + 84 - cdma->push_buffer.size_bytes + 4, 83 + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size + 4, 85 84 HOST1X_CHANNEL_DMAEND); 86 85 87 86 /* reset GET */ ··· 114 115 HOST1X_CHANNEL_DMACTRL); 115 116 116 117 /* set base, end pointer (all of memory) */ 117 - host1x_ch_writel(ch, cdma->push_buffer.phys, HOST1X_CHANNEL_DMASTART); 118 - host1x_ch_writel(ch, cdma->push_buffer.phys + 119 - cdma->push_buffer.size_bytes, 118 + host1x_ch_writel(ch, cdma->push_buffer.dma, HOST1X_CHANNEL_DMASTART); 119 + host1x_ch_writel(ch, cdma->push_buffer.dma + cdma->push_buffer.size, 120 120 HOST1X_CHANNEL_DMAEND); 121 121 122 122 /* set GET, by loading the value in PUT (then reset GET) */
+62 -10
drivers/gpu/host1x/job.c
··· 174 174 return 0; 175 175 } 176 176 177 - static unsigned int pin_job(struct host1x_job *job) 177 + static unsigned int pin_job(struct host1x *host, struct host1x_job *job) 178 178 { 179 179 unsigned int i; 180 + int err; 180 181 181 182 job->num_unpins = 0; 182 183 ··· 187 186 dma_addr_t phys_addr; 188 187 189 188 reloc->target.bo = host1x_bo_get(reloc->target.bo); 190 - if (!reloc->target.bo) 189 + if (!reloc->target.bo) { 190 + err = -EINVAL; 191 191 goto unpin; 192 + } 192 193 193 194 phys_addr = host1x_bo_pin(reloc->target.bo, &sgt); 194 - if (!phys_addr) 195 + if (!phys_addr) { 196 + err = -EINVAL; 195 197 goto unpin; 198 + } 196 199 197 200 job->addr_phys[job->num_unpins] = phys_addr; 198 201 job->unpins[job->num_unpins].bo = reloc->target.bo; ··· 206 201 207 202 for (i = 0; i < job->num_gathers; i++) { 208 203 struct host1x_job_gather *g = &job->gathers[i]; 204 + size_t gather_size = 0; 205 + struct scatterlist *sg; 209 206 struct sg_table *sgt; 210 207 dma_addr_t phys_addr; 208 + unsigned long shift; 209 + struct iova *alloc; 210 + unsigned int j; 211 211 212 212 g->bo = host1x_bo_get(g->bo); 213 - if (!g->bo) 213 + if (!g->bo) { 214 + err = -EINVAL; 214 215 goto unpin; 216 + } 215 217 216 218 phys_addr = host1x_bo_pin(g->bo, &sgt); 217 - if (!phys_addr) 219 + if (!phys_addr) { 220 + err = -EINVAL; 218 221 goto unpin; 222 + } 219 223 220 - job->addr_phys[job->num_unpins] = phys_addr; 224 + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { 225 + for_each_sg(sgt->sgl, sg, sgt->nents, j) 226 + gather_size += sg->length; 227 + gather_size = iova_align(&host->iova, gather_size); 228 + 229 + shift = iova_shift(&host->iova); 230 + alloc = alloc_iova(&host->iova, gather_size >> shift, 231 + host->iova_end >> shift, true); 232 + if (!alloc) { 233 + err = -ENOMEM; 234 + goto unpin; 235 + } 236 + 237 + err = iommu_map_sg(host->domain, 238 + iova_dma_addr(&host->iova, alloc), 239 + sgt->sgl, sgt->nents, IOMMU_READ); 240 + if (err == 0) { 241 + __free_iova(&host->iova, alloc); 242 + err = -EINVAL; 243 + goto unpin; 244 + } 245 + 246 + job->addr_phys[job->num_unpins] = 247 + iova_dma_addr(&host->iova, alloc); 248 + job->unpins[job->num_unpins].size = gather_size; 249 + } else { 250 + job->addr_phys[job->num_unpins] = phys_addr; 251 + } 252 + 253 + job->gather_addr_phys[i] = job->addr_phys[job->num_unpins]; 254 + 221 255 job->unpins[job->num_unpins].bo = g->bo; 222 256 job->unpins[job->num_unpins].sgt = sgt; 223 257 job->num_unpins++; 224 258 } 225 259 226 - return job->num_unpins; 260 + return 0; 227 261 228 262 unpin: 229 263 host1x_job_unpin(job); 230 - return 0; 264 + return err; 231 265 } 232 266 233 267 static int do_relocs(struct host1x_job *job, struct host1x_bo *cmdbuf) ··· 569 525 host1x_syncpt_load(host->syncpt + i); 570 526 571 527 /* pin memory */ 572 - err = pin_job(job); 573 - if (!err) 528 + err = pin_job(host, job); 529 + if (err) 574 530 goto out; 575 531 576 532 /* patch gathers */ ··· 616 572 617 573 void host1x_job_unpin(struct host1x_job *job) 618 574 { 575 + struct host1x *host = dev_get_drvdata(job->channel->dev->parent); 619 576 unsigned int i; 620 577 621 578 for (i = 0; i < job->num_unpins; i++) { 622 579 struct host1x_job_unpin_data *unpin = &job->unpins[i]; 580 + 581 + if (!IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) && host->domain) { 582 + iommu_unmap(host->domain, job->addr_phys[i], 583 + unpin->size); 584 + free_iova(&host->iova, 585 + iova_pfn(&host->iova, job->addr_phys[i])); 586 + } 623 587 624 588 host1x_bo_unpin(unpin->bo, unpin->sgt); 625 589 host1x_bo_put(unpin->bo);
+1
drivers/gpu/host1x/job.h
··· 44 44 struct host1x_job_unpin_data { 45 45 struct host1x_bo *bo; 46 46 struct sg_table *sgt; 47 + size_t size; 47 48 }; 48 49 49 50 /*