Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

gpu: host1x: Set DMA mask based on IOMMU setup

If the Tegra DRM clients are backed by an IOMMU, push buffers are likely
to be allocated beyond the 32-bit boundary if sufficient system memory
is available. This is problematic on earlier generations of Tegra where
host1x supports a maximum of 32 address bits for the GATHER opcode. More
recent versions of Tegra (Tegra186 and later) have a wide variant of the
GATHER opcode, which allows addressing up to 64 bits of memory.

If host1x itself is behind an IOMMU as well this doesn't matter because
the IOMMU's input address space is restricted to 32 bits on generations
without support for wide GATHER opcodes.

However, if host1x is not behind an IOMMU, it won't be able to process
push buffers beyond the 32-bit boundary on Tegra generations that don't
support wide GATHER opcodes. Restrict the DMA mask to 32 bits on these
generations prevents buffers from being allocated from beyond the 32-bit
boundary.

Signed-off-by: Thierry Reding <treding@nvidia.com>

+136 -79
+135 -79
drivers/gpu/host1x/dev.c
··· 73 73 .init = host1x01_init, 74 74 .sync_offset = 0x3000, 75 75 .dma_mask = DMA_BIT_MASK(32), 76 + .has_wide_gather = false, 76 77 .has_hypervisor = false, 77 78 .num_sid_entries = 0, 78 79 .sid_table = NULL, ··· 87 86 .init = host1x02_init, 88 87 .sync_offset = 0x3000, 89 88 .dma_mask = DMA_BIT_MASK(32), 89 + .has_wide_gather = false, 90 90 .has_hypervisor = false, 91 91 .num_sid_entries = 0, 92 92 .sid_table = NULL, ··· 101 99 .init = host1x04_init, 102 100 .sync_offset = 0x2100, 103 101 .dma_mask = DMA_BIT_MASK(34), 102 + .has_wide_gather = false, 104 103 .has_hypervisor = false, 105 104 .num_sid_entries = 0, 106 105 .sid_table = NULL, ··· 115 112 .init = host1x05_init, 116 113 .sync_offset = 0x2100, 117 114 .dma_mask = DMA_BIT_MASK(34), 115 + .has_wide_gather = false, 118 116 .has_hypervisor = false, 119 117 .num_sid_entries = 0, 120 118 .sid_table = NULL, ··· 138 134 .init = host1x06_init, 139 135 .sync_offset = 0x0, 140 136 .dma_mask = DMA_BIT_MASK(40), 137 + .has_wide_gather = true, 141 138 .has_hypervisor = true, 142 139 .num_sid_entries = ARRAY_SIZE(tegra186_sid_table), 143 140 .sid_table = tegra186_sid_table, ··· 161 156 .init = host1x07_init, 162 157 .sync_offset = 0x0, 163 158 .dma_mask = DMA_BIT_MASK(40), 159 + .has_wide_gather = true, 164 160 .has_hypervisor = true, 165 161 .num_sid_entries = ARRAY_SIZE(tegra194_sid_table), 166 162 .sid_table = tegra194_sid_table, ··· 189 183 190 184 host1x_hypervisor_writel(host, entry->offset, entry->base); 191 185 host1x_hypervisor_writel(host, entry->limit, entry->base + 4); 186 + } 187 + } 188 + 189 + static struct iommu_domain *host1x_iommu_attach(struct host1x *host) 190 + { 191 + struct iommu_domain *domain = iommu_get_domain_for_dev(host->dev); 192 + int err; 193 + 194 + /* 195 + * If the host1x firewall is enabled, there's no need to enable IOMMU 196 + * support. Similarly, if host1x is already attached to an IOMMU (via 197 + * the DMA API), don't try to attach again. 198 + */ 199 + if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL) || domain) 200 + return domain; 201 + 202 + host->group = iommu_group_get(host->dev); 203 + if (host->group) { 204 + struct iommu_domain_geometry *geometry; 205 + dma_addr_t start, end; 206 + unsigned long order; 207 + 208 + err = iova_cache_get(); 209 + if (err < 0) 210 + goto put_group; 211 + 212 + host->domain = iommu_domain_alloc(&platform_bus_type); 213 + if (!host->domain) { 214 + err = -ENOMEM; 215 + goto put_cache; 216 + } 217 + 218 + err = iommu_attach_group(host->domain, host->group); 219 + if (err) { 220 + if (err == -ENODEV) 221 + err = 0; 222 + 223 + goto free_domain; 224 + } 225 + 226 + geometry = &host->domain->geometry; 227 + start = geometry->aperture_start & host->info->dma_mask; 228 + end = geometry->aperture_end & host->info->dma_mask; 229 + 230 + order = __ffs(host->domain->pgsize_bitmap); 231 + init_iova_domain(&host->iova, 1UL << order, start >> order); 232 + host->iova_end = end; 233 + 234 + domain = host->domain; 235 + } 236 + 237 + return domain; 238 + 239 + free_domain: 240 + iommu_domain_free(host->domain); 241 + host->domain = NULL; 242 + put_cache: 243 + iova_cache_put(); 244 + put_group: 245 + iommu_group_put(host->group); 246 + host->group = NULL; 247 + 248 + return ERR_PTR(err); 249 + } 250 + 251 + static int host1x_iommu_init(struct host1x *host) 252 + { 253 + u64 mask = host->info->dma_mask; 254 + struct iommu_domain *domain; 255 + int err; 256 + 257 + domain = host1x_iommu_attach(host); 258 + if (IS_ERR(domain)) { 259 + err = PTR_ERR(domain); 260 + dev_err(host->dev, "failed to attach to IOMMU: %d\n", err); 261 + return err; 262 + } 263 + 264 + /* 265 + * If we're not behind an IOMMU make sure we don't get push buffers 266 + * that are allocated outside of the range addressable by the GATHER 267 + * opcode. 268 + * 269 + * Newer generations of Tegra (Tegra186 and later) support a wide 270 + * variant of the GATHER opcode that allows addressing more bits. 271 + */ 272 + if (!domain && !host->info->has_wide_gather) 273 + mask = DMA_BIT_MASK(32); 274 + 275 + err = dma_coerce_mask_and_coherent(host->dev, mask); 276 + if (err < 0) { 277 + dev_err(host->dev, "failed to set DMA mask: %d\n", err); 278 + return err; 279 + } 280 + 281 + return 0; 282 + } 283 + 284 + static void host1x_iommu_exit(struct host1x *host) 285 + { 286 + if (host->domain) { 287 + put_iova_domain(&host->iova); 288 + iommu_detach_group(host->domain, host->group); 289 + 290 + iommu_domain_free(host->domain); 291 + host->domain = NULL; 292 + 293 + iova_cache_put(); 294 + 295 + iommu_group_put(host->group); 296 + host->group = NULL; 192 297 } 193 298 } 194 299 ··· 365 248 host->dev->dma_parms = &host->dma_parms; 366 249 dma_set_max_seg_size(host->dev, UINT_MAX); 367 250 368 - dma_set_mask_and_coherent(host->dev, host->info->dma_mask); 369 - 370 251 if (host->info->init) { 371 252 err = host->info->init(host); 372 253 if (err) ··· 388 273 return err; 389 274 } 390 275 391 - if (IS_ENABLED(CONFIG_TEGRA_HOST1X_FIREWALL)) 392 - goto skip_iommu; 393 - 394 - if (iommu_get_domain_for_dev(&pdev->dev)) 395 - goto skip_iommu; 396 - 397 - host->group = iommu_group_get(&pdev->dev); 398 - if (host->group) { 399 - struct iommu_domain_geometry *geometry; 400 - u64 mask = dma_get_mask(host->dev); 401 - dma_addr_t start, end; 402 - unsigned long order; 403 - 404 - err = iova_cache_get(); 405 - if (err < 0) 406 - goto put_group; 407 - 408 - host->domain = iommu_domain_alloc(&platform_bus_type); 409 - if (!host->domain) { 410 - err = -ENOMEM; 411 - goto put_cache; 412 - } 413 - 414 - err = iommu_attach_group(host->domain, host->group); 415 - if (err) { 416 - if (err == -ENODEV) { 417 - iommu_domain_free(host->domain); 418 - host->domain = NULL; 419 - iova_cache_put(); 420 - iommu_group_put(host->group); 421 - host->group = NULL; 422 - goto skip_iommu; 423 - } 424 - 425 - goto fail_free_domain; 426 - } 427 - 428 - geometry = &host->domain->geometry; 429 - start = geometry->aperture_start & mask; 430 - end = geometry->aperture_end & mask; 431 - 432 - order = __ffs(host->domain->pgsize_bitmap); 433 - init_iova_domain(&host->iova, 1UL << order, start >> order); 434 - host->iova_end = end; 276 + err = host1x_iommu_init(host); 277 + if (err < 0) { 278 + dev_err(&pdev->dev, "failed to setup IOMMU: %d\n", err); 279 + return err; 435 280 } 436 281 437 - skip_iommu: 438 282 err = host1x_channel_list_init(&host->channel_list, 439 283 host->info->nb_channels); 440 284 if (err) { 441 285 dev_err(&pdev->dev, "failed to initialize channel list\n"); 442 - goto fail_detach_device; 286 + goto iommu_exit; 443 287 } 444 288 445 289 err = clk_prepare_enable(host->clk); 446 290 if (err < 0) { 447 291 dev_err(&pdev->dev, "failed to enable clock\n"); 448 - goto fail_free_channels; 292 + goto free_channels; 449 293 } 450 294 451 295 err = reset_control_deassert(host->rst); 452 296 if (err < 0) { 453 297 dev_err(&pdev->dev, "failed to deassert reset: %d\n", err); 454 - goto fail_unprepare_disable; 298 + goto unprepare_disable; 455 299 } 456 300 457 301 err = host1x_syncpt_init(host); 458 302 if (err) { 459 303 dev_err(&pdev->dev, "failed to initialize syncpts\n"); 460 - goto fail_reset_assert; 304 + goto reset_assert; 461 305 } 462 306 463 307 err = host1x_intr_init(host, syncpt_irq); 464 308 if (err) { 465 309 dev_err(&pdev->dev, "failed to initialize interrupts\n"); 466 - goto fail_deinit_syncpt; 310 + goto deinit_syncpt; 467 311 } 468 312 469 313 host1x_debug_init(host); ··· 432 358 433 359 err = host1x_register(host); 434 360 if (err < 0) 435 - goto fail_deinit_intr; 361 + goto deinit_intr; 436 362 437 363 return 0; 438 364 439 - fail_deinit_intr: 365 + deinit_intr: 440 366 host1x_intr_deinit(host); 441 - fail_deinit_syncpt: 367 + deinit_syncpt: 442 368 host1x_syncpt_deinit(host); 443 - fail_reset_assert: 369 + reset_assert: 444 370 reset_control_assert(host->rst); 445 - fail_unprepare_disable: 371 + unprepare_disable: 446 372 clk_disable_unprepare(host->clk); 447 - fail_free_channels: 373 + free_channels: 448 374 host1x_channel_list_free(&host->channel_list); 449 - fail_detach_device: 450 - if (host->group && host->domain) { 451 - put_iova_domain(&host->iova); 452 - iommu_detach_group(host->domain, host->group); 453 - } 454 - fail_free_domain: 455 - if (host->domain) 456 - iommu_domain_free(host->domain); 457 - put_cache: 458 - if (host->group) 459 - iova_cache_put(); 460 - put_group: 461 - iommu_group_put(host->group); 375 + iommu_exit: 376 + host1x_iommu_exit(host); 462 377 463 378 return err; 464 379 } ··· 462 399 host1x_syncpt_deinit(host); 463 400 reset_control_assert(host->rst); 464 401 clk_disable_unprepare(host->clk); 465 - 466 - if (host->domain) { 467 - put_iova_domain(&host->iova); 468 - iommu_detach_group(host->domain, host->group); 469 - iommu_domain_free(host->domain); 470 - iova_cache_put(); 471 - iommu_group_put(host->group); 472 - } 402 + host1x_iommu_exit(host); 473 403 474 404 return 0; 475 405 }
+1
drivers/gpu/host1x/dev.h
··· 97 97 int (*init)(struct host1x *host1x); /* initialize per SoC ops */ 98 98 unsigned int sync_offset; /* offset of syncpoint registers */ 99 99 u64 dma_mask; /* mask of addressable memory */ 100 + bool has_wide_gather; /* supports GATHER_W opcode */ 100 101 bool has_hypervisor; /* has hypervisor registers */ 101 102 unsigned int num_sid_entries; 102 103 const struct host1x_sid_entry *sid_table;