Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

"This tag contains habanalabs driver changes for v6.1:

- Support new notifier event for device state change through eventfd.
- Add uAPI to retrieve device attestation information for Gaudi2.
- Add uAPI to retrieve the h/w status of all h/w blocks.
- Add uAPI to control the running mode of the engine cores in Gaudi2.
- Expose whether the device runs with secured firmware through the INFO ioctl
and sysfs.
- Support trace events in DMA allocations and MMU map/unmap operations.
- Notify firmware when the device was acquired by a user process and when it
was released. This is done as part of the RAS that the f/w performs.
- Multiple bug fixes, refactors and renames.
- Cleanup of error messages, moving some to debug level.
- Enhance log prints in case of h/w error events for Gaudi2."

* tag 'misc-habanalabs-next-2022-09-21' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux: (68 commits)
habanalabs: eliminate aggregate use warning
habanalabs/gaudi: use 8KB aligned address for TPC kernels
habanalabs: remove some f/w descriptor validations
habanalabs: build ASICs from new to old
habanalabs/gaudi2: allow user to flush PCIE by read
habanalabs: failure to open device due to reset is debug level
habanalabs/gaudi2: Remove unnecessary (void*) conversions
habanalabs/gaudi2: add secured attestation info uapi
habanalabs/gaudi2: add handling to pmmu events in eqe handler
habanalabs/gaudi: change TPC Assert to use TPC DEC instead of QMAN err
habanalabs: rename error info structure
habanalabs/gaudi2: get f/w reset status register dynamically
habanalabs/gaudi2: increase hard-reset sleep time to 2 sec
habanalabs/gaudi2: print RAZWI info upon PCIe access error
habanalabs: MMU invalidation h/w is per device
habanalabs: new notifier events for device state
habanalabs/gaudi2: free event irq if init fails
habanalabs: fix resetting the DRAM BAR
habanalabs: add support for new cpucp return codes
habanalabs/gaudi2: read F/W security indication after hard reset
...

+1859 -794
+9 -3
Documentation/ABI/testing/sysfs-driver-habanalabs
··· 16 16 17 17 What: /sys/class/habanalabs/hl<n>/clk_max_freq_mhz 18 18 Date: Jun 2019 19 - KernelVersion: not yet upstreamed 19 + KernelVersion: 5.7 20 20 Contact: ogabbay@kernel.org 21 21 Description: Allows the user to set the maximum clock frequency, in MHz. 22 22 The device clock might be set to lower value than the maximum. ··· 26 26 27 27 What: /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz 28 28 Date: Jun 2019 29 - KernelVersion: not yet upstreamed 29 + KernelVersion: 5.7 30 30 Contact: ogabbay@kernel.org 31 31 Description: Displays the current frequency, in MHz, of the device clock. 32 32 This property is valid only for the Gaudi ASIC family ··· 176 176 Contact: ogabbay@kernel.org 177 177 Description: Version of the device's preboot F/W code 178 178 179 + What: /sys/class/habanalabs/hl<n>/security_enabled 180 + Date: Oct 2022 181 + KernelVersion: 6.1 182 + Contact: obitton@habana.ai 183 + Description: Displays the device's security status 184 + 179 185 What: /sys/class/habanalabs/hl<n>/soft_reset 180 186 Date: Jan 2019 181 187 KernelVersion: 5.1 ··· 236 230 237 231 What: /sys/class/habanalabs/hl<n>/vrm_ver 238 232 Date: Jan 2022 239 - KernelVersion: not yet upstreamed 233 + KernelVersion: 5.17 240 234 Contact: ogabbay@kernel.org 241 235 Description: Version of the Device's Voltage Regulator Monitor F/W code. N/A to GOYA and GAUDI
+1
MAINTAINERS
··· 8878 8878 F: Documentation/ABI/testing/debugfs-driver-habanalabs 8879 8879 F: Documentation/ABI/testing/sysfs-driver-habanalabs 8880 8880 F: drivers/misc/habanalabs/ 8881 + F: include/trace/events/habanalabs.h 8881 8882 F: include/uapi/misc/habanalabs.h 8882 8883 8883 8884 HACKRF MEDIA DRIVER
+1
drivers/misc/habanalabs/Kconfig
··· 10 10 select HWMON 11 11 select DMA_SHARED_BUFFER 12 12 select CRC32 13 + select FW_LOADER 13 14 help 14 15 Enables PCIe card driver for Habana's AI Processors (AIP) that are 15 16 designed to accelerate Deep Learning inference and training workloads.
+4 -4
drivers/misc/habanalabs/Makefile
··· 8 8 include $(src)/common/Makefile 9 9 habanalabs-y += $(HL_COMMON_FILES) 10 10 11 - include $(src)/goya/Makefile 12 - habanalabs-y += $(HL_GOYA_FILES) 11 + include $(src)/gaudi2/Makefile 12 + habanalabs-y += $(HL_GAUDI2_FILES) 13 13 14 14 include $(src)/gaudi/Makefile 15 15 habanalabs-y += $(HL_GAUDI_FILES) 16 16 17 - include $(src)/gaudi2/Makefile 18 - habanalabs-y += $(HL_GAUDI2_FILES) 17 + include $(src)/goya/Makefile 18 + habanalabs-y += $(HL_GOYA_FILES) 19 19 20 20 habanalabs-$(CONFIG_DEBUG_FS) += common/debugfs.o
+35 -92
drivers/misc/habanalabs/common/command_buffer.c
··· 12 12 #include <linux/slab.h> 13 13 #include <linux/uaccess.h> 14 14 15 + #define CB_VA_POOL_SIZE (4UL * SZ_1G) 16 + 15 17 static int cb_map_mem(struct hl_ctx *ctx, struct hl_cb *cb) 16 18 { 17 19 struct hl_device *hdev = ctx->hdev; 18 20 struct asic_fixed_properties *prop = &hdev->asic_prop; 19 - struct hl_vm_va_block *va_block, *tmp; 20 - dma_addr_t bus_addr; 21 - u64 virt_addr; 22 21 u32 page_size = prop->pmmu.page_size; 23 - s32 offset; 24 22 int rc; 25 23 26 24 if (!hdev->supports_cb_mapping) { 27 25 dev_err_ratelimited(hdev->dev, 28 - "Cannot map CB because no VA range is allocated for CB mapping\n"); 26 + "Mapping a CB to the device's MMU is not supported\n"); 29 27 return -EINVAL; 30 28 } 31 29 ··· 33 35 return -EINVAL; 34 36 } 35 37 36 - INIT_LIST_HEAD(&cb->va_block_list); 38 + if (cb->is_mmu_mapped) 39 + return 0; 37 40 38 - for (bus_addr = cb->bus_address; 39 - bus_addr < cb->bus_address + cb->size; 40 - bus_addr += page_size) { 41 + cb->roundup_size = roundup(cb->size, page_size); 41 42 42 - virt_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, page_size); 43 - if (!virt_addr) { 44 - dev_err(hdev->dev, 45 - "Failed to allocate device virtual address for CB\n"); 46 - rc = -ENOMEM; 47 - goto err_va_pool_free; 48 - } 49 - 50 - va_block = kzalloc(sizeof(*va_block), GFP_KERNEL); 51 - if (!va_block) { 52 - rc = -ENOMEM; 53 - gen_pool_free(ctx->cb_va_pool, virt_addr, page_size); 54 - goto err_va_pool_free; 55 - } 56 - 57 - va_block->start = virt_addr; 58 - va_block->end = virt_addr + page_size - 1; 59 - va_block->size = page_size; 60 - list_add_tail(&va_block->node, &cb->va_block_list); 43 + cb->virtual_addr = (u64) gen_pool_alloc(ctx->cb_va_pool, cb->roundup_size); 44 + if (!cb->virtual_addr) { 45 + dev_err(hdev->dev, "Failed to allocate device virtual address for CB\n"); 46 + return -ENOMEM; 61 47 } 62 48 63 - mutex_lock(&ctx->mmu_lock); 64 - 65 - bus_addr = cb->bus_address; 66 - offset = 0; 67 - list_for_each_entry(va_block, &cb->va_block_list, node) { 68 - rc = hl_mmu_map_page(ctx, va_block->start, bus_addr, 69 - va_block->size, list_is_last(&va_block->node, 70 - &cb->va_block_list)); 71 - if (rc) { 72 - dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", 73 - va_block->start); 74 - goto err_va_umap; 75 - } 76 - 77 - bus_addr += va_block->size; 78 - offset += va_block->size; 49 + mutex_lock(&hdev->mmu_lock); 50 + rc = hl_mmu_map_contiguous(ctx, cb->virtual_addr, cb->bus_address, cb->roundup_size); 51 + if (rc) { 52 + dev_err(hdev->dev, "Failed to map VA %#llx to CB\n", cb->virtual_addr); 53 + goto err_va_umap; 79 54 } 80 - 81 55 rc = hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV); 82 - 83 - mutex_unlock(&ctx->mmu_lock); 56 + mutex_unlock(&hdev->mmu_lock); 84 57 85 58 cb->is_mmu_mapped = true; 86 - 87 59 return rc; 88 60 89 61 err_va_umap: 90 - list_for_each_entry(va_block, &cb->va_block_list, node) { 91 - if (offset <= 0) 92 - break; 93 - hl_mmu_unmap_page(ctx, va_block->start, va_block->size, 94 - offset <= va_block->size); 95 - offset -= va_block->size; 96 - } 97 - 98 - rc = hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 99 - 100 - mutex_unlock(&ctx->mmu_lock); 101 - 102 - err_va_pool_free: 103 - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 104 - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 105 - list_del(&va_block->node); 106 - kfree(va_block); 107 - } 108 - 62 + mutex_unlock(&hdev->mmu_lock); 63 + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 109 64 return rc; 110 65 } 111 66 112 67 static void cb_unmap_mem(struct hl_ctx *ctx, struct hl_cb *cb) 113 68 { 114 69 struct hl_device *hdev = ctx->hdev; 115 - struct hl_vm_va_block *va_block, *tmp; 116 70 117 - mutex_lock(&ctx->mmu_lock); 118 - 119 - list_for_each_entry(va_block, &cb->va_block_list, node) 120 - if (hl_mmu_unmap_page(ctx, va_block->start, va_block->size, 121 - list_is_last(&va_block->node, 122 - &cb->va_block_list))) 123 - dev_warn_ratelimited(hdev->dev, 124 - "Failed to unmap CB's va 0x%llx\n", 125 - va_block->start); 126 - 71 + mutex_lock(&hdev->mmu_lock); 72 + hl_mmu_unmap_contiguous(ctx, cb->virtual_addr, cb->roundup_size); 127 73 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 74 + mutex_unlock(&hdev->mmu_lock); 128 75 129 - mutex_unlock(&ctx->mmu_lock); 130 - 131 - list_for_each_entry_safe(va_block, tmp, &cb->va_block_list, node) { 132 - gen_pool_free(ctx->cb_va_pool, va_block->start, va_block->size); 133 - list_del(&va_block->node); 134 - kfree(va_block); 135 - } 76 + gen_pool_free(ctx->cb_va_pool, cb->virtual_addr, cb->roundup_size); 136 77 } 137 78 138 79 static void cb_fini(struct hl_device *hdev, struct hl_cb *cb) ··· 313 376 static int hl_cb_info(struct hl_mem_mgr *mmg, 314 377 u64 handle, u32 flags, u32 *usage_cnt, u64 *device_va) 315 378 { 316 - struct hl_vm_va_block *va_block; 317 379 struct hl_cb *cb; 318 380 int rc = 0; 319 381 ··· 324 388 } 325 389 326 390 if (flags & HL_CB_FLAGS_GET_DEVICE_VA) { 327 - va_block = list_first_entry(&cb->va_block_list, struct hl_vm_va_block, node); 328 - if (va_block) { 329 - *device_va = va_block->start; 391 + if (cb->is_mmu_mapped) { 392 + *device_va = cb->virtual_addr; 330 393 } else { 331 394 dev_err(mmg->dev, "CB is not mapped to the device's MMU\n"); 332 395 rc = -EINVAL; ··· 501 566 return -ENOMEM; 502 567 } 503 568 504 - rc = gen_pool_add(ctx->cb_va_pool, prop->cb_va_start_addr, 505 - prop->cb_va_end_addr - prop->cb_va_start_addr, -1); 569 + ctx->cb_va_pool_base = hl_reserve_va_block(hdev, ctx, HL_VA_RANGE_TYPE_HOST, 570 + CB_VA_POOL_SIZE, HL_MMU_VA_ALIGNMENT_NOT_NEEDED); 571 + if (!ctx->cb_va_pool_base) { 572 + rc = -ENOMEM; 573 + goto err_pool_destroy; 574 + } 575 + rc = gen_pool_add(ctx->cb_va_pool, ctx->cb_va_pool_base, CB_VA_POOL_SIZE, -1); 506 576 if (rc) { 507 577 dev_err(hdev->dev, 508 578 "Failed to add memory to VA gen pool for CB mapping\n"); 509 - goto err_pool_destroy; 579 + goto err_unreserve_va_block; 510 580 } 511 581 512 582 return 0; 513 583 584 + err_unreserve_va_block: 585 + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 514 586 err_pool_destroy: 515 587 gen_pool_destroy(ctx->cb_va_pool); 516 588 ··· 532 590 return; 533 591 534 592 gen_pool_destroy(ctx->cb_va_pool); 593 + hl_unreserve_va_block(hdev, ctx, ctx->cb_va_pool_base, CB_VA_POOL_SIZE); 535 594 }
+61 -14
drivers/misc/habanalabs/common/command_submission.c
··· 12 12 #include <linux/slab.h> 13 13 14 14 #define HL_CS_FLAGS_TYPE_MASK (HL_CS_FLAGS_SIGNAL | HL_CS_FLAGS_WAIT | \ 15 - HL_CS_FLAGS_COLLECTIVE_WAIT) 15 + HL_CS_FLAGS_COLLECTIVE_WAIT | HL_CS_FLAGS_RESERVE_SIGNALS_ONLY | \ 16 + HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY | HL_CS_FLAGS_ENGINE_CORE_COMMAND) 17 + 16 18 17 19 #define MAX_TS_ITER_NUM 10 18 20 ··· 826 824 } 827 825 828 826 /* Save only the first CS timeout parameters */ 829 - rc = atomic_cmpxchg(&hdev->last_error.cs_timeout.write_enable, 1, 0); 827 + rc = atomic_cmpxchg(&hdev->captured_err_info.cs_timeout.write_enable, 1, 0); 830 828 if (rc) { 831 - hdev->last_error.cs_timeout.timestamp = ktime_get(); 832 - hdev->last_error.cs_timeout.seq = cs->sequence; 829 + hdev->captured_err_info.cs_timeout.timestamp = ktime_get(); 830 + hdev->captured_err_info.cs_timeout.seq = cs->sequence; 833 831 834 832 event_mask = device_reset ? (HL_NOTIFIER_EVENT_CS_TIMEOUT | 835 833 HL_NOTIFIER_EVENT_DEVICE_RESET) : HL_NOTIFIER_EVENT_CS_TIMEOUT; ··· 1244 1242 return CS_RESERVE_SIGNALS; 1245 1243 else if (cs_type_flags & HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY) 1246 1244 return CS_UNRESERVE_SIGNALS; 1245 + else if (cs_type_flags & HL_CS_FLAGS_ENGINE_CORE_COMMAND) 1246 + return CS_TYPE_ENGINE_CORE; 1247 1247 else 1248 1248 return CS_TYPE_DEFAULT; 1249 1249 } ··· 1257 1253 u32 cs_type_flags, num_chunks; 1258 1254 enum hl_device_status status; 1259 1255 enum hl_cs_type cs_type; 1256 + bool is_sync_stream; 1260 1257 1261 1258 if (!hl_device_operational(hdev, &status)) { 1262 1259 return -EBUSY; ··· 1281 1276 cs_type = hl_cs_get_cs_type(cs_type_flags); 1282 1277 num_chunks = args->in.num_chunks_execute; 1283 1278 1284 - if (unlikely((cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 1285 - cs_type == CS_TYPE_COLLECTIVE_WAIT) && 1286 - !hdev->supports_sync_stream)) { 1279 + is_sync_stream = (cs_type == CS_TYPE_SIGNAL || cs_type == CS_TYPE_WAIT || 1280 + cs_type == CS_TYPE_COLLECTIVE_WAIT); 1281 + 1282 + if (unlikely(is_sync_stream && !hdev->supports_sync_stream)) { 1287 1283 dev_err(hdev->dev, "Sync stream CS is not supported\n"); 1288 1284 return -EINVAL; 1289 1285 } ··· 1294 1288 dev_err(hdev->dev, "Got execute CS with 0 chunks, context %d\n", ctx->asid); 1295 1289 return -EINVAL; 1296 1290 } 1297 - } else if (num_chunks != 1) { 1291 + } else if (is_sync_stream && num_chunks != 1) { 1298 1292 dev_err(hdev->dev, 1299 1293 "Sync stream CS mandates one chunk only, context %d\n", 1300 1294 ctx->asid); ··· 1590 1584 struct hl_device *hdev = hpriv->hdev; 1591 1585 struct hl_ctx *ctx = hpriv->ctx; 1592 1586 bool need_soft_reset = false; 1593 - int rc = 0, do_ctx_switch; 1587 + int rc = 0, do_ctx_switch = 0; 1594 1588 void __user *chunks; 1595 1589 u32 num_chunks, tmp; 1596 1590 u16 sob_count; 1597 1591 int ret; 1598 1592 1599 - do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 1593 + if (hdev->supports_ctx_switch) 1594 + do_ctx_switch = atomic_cmpxchg(&ctx->thread_ctx_switch_token, 1, 0); 1600 1595 1601 1596 if (do_ctx_switch || (args->in.cs_flags & HL_CS_FLAGS_FORCE_RESTORE)) { 1602 1597 mutex_lock(&hpriv->restore_phase_mutex); ··· 1668 1661 } 1669 1662 } 1670 1663 1671 - ctx->thread_ctx_switch_wait_token = 1; 1664 + if (hdev->supports_ctx_switch) 1665 + ctx->thread_ctx_switch_wait_token = 1; 1672 1666 1673 - } else if (!ctx->thread_ctx_switch_wait_token) { 1667 + } else if (hdev->supports_ctx_switch && !ctx->thread_ctx_switch_wait_token) { 1674 1668 rc = hl_poll_timeout_memory(hdev, 1675 1669 &ctx->thread_ctx_switch_wait_token, tmp, (tmp == 1), 1676 1670 100, jiffies_to_usecs(hdev->timeout_jiffies), false); ··· 2359 2351 return rc; 2360 2352 } 2361 2353 2354 + static int cs_ioctl_engine_cores(struct hl_fpriv *hpriv, u64 engine_cores, 2355 + u32 num_engine_cores, u32 core_command) 2356 + { 2357 + int rc; 2358 + struct hl_device *hdev = hpriv->hdev; 2359 + void __user *engine_cores_arr; 2360 + u32 *cores; 2361 + 2362 + if (!num_engine_cores || num_engine_cores > hdev->asic_prop.num_engine_cores) { 2363 + dev_err(hdev->dev, "Number of engine cores %d is invalid\n", num_engine_cores); 2364 + return -EINVAL; 2365 + } 2366 + 2367 + if (core_command != HL_ENGINE_CORE_RUN && core_command != HL_ENGINE_CORE_HALT) { 2368 + dev_err(hdev->dev, "Engine core command is invalid\n"); 2369 + return -EINVAL; 2370 + } 2371 + 2372 + engine_cores_arr = (void __user *) (uintptr_t) engine_cores; 2373 + cores = kmalloc_array(num_engine_cores, sizeof(u32), GFP_KERNEL); 2374 + if (!cores) 2375 + return -ENOMEM; 2376 + 2377 + if (copy_from_user(cores, engine_cores_arr, num_engine_cores * sizeof(u32))) { 2378 + dev_err(hdev->dev, "Failed to copy core-ids array from user\n"); 2379 + kfree(cores); 2380 + return -EFAULT; 2381 + } 2382 + 2383 + rc = hdev->asic_funcs->set_engine_cores(hdev, cores, num_engine_cores, core_command); 2384 + kfree(cores); 2385 + 2386 + return rc; 2387 + } 2388 + 2362 2389 int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data) 2363 2390 { 2364 2391 union hl_cs_args *args = data; ··· 2445 2402 case CS_UNRESERVE_SIGNALS: 2446 2403 rc = cs_ioctl_unreserve_signals(hpriv, 2447 2404 args->in.encaps_sig_handle_id); 2405 + break; 2406 + case CS_TYPE_ENGINE_CORE: 2407 + rc = cs_ioctl_engine_cores(hpriv, args->in.engine_cores, 2408 + args->in.num_engine_cores, args->in.core_command); 2448 2409 break; 2449 2410 default: 2450 2411 rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq, ··· 2571 2524 ktime_t max_ktime, first_cs_time; 2572 2525 enum hl_cs_wait_status status; 2573 2526 2574 - memset(fence_ptr, 0, arr_len * sizeof(*fence_ptr)); 2527 + memset(fence_ptr, 0, arr_len * sizeof(struct hl_fence *)); 2575 2528 2576 2529 /* get all fences under the same lock */ 2577 2530 rc = hl_ctx_get_fences(mcs_data->ctx, seq_arr, fence_ptr, arr_len); ··· 2873 2826 } 2874 2827 2875 2828 /* allocate array for the fences */ 2876 - fence_arr = kmalloc_array(seq_arr_len, sizeof(*fence_arr), GFP_KERNEL); 2829 + fence_arr = kmalloc_array(seq_arr_len, sizeof(struct hl_fence *), GFP_KERNEL); 2877 2830 if (!fence_arr) { 2878 2831 rc = -ENOMEM; 2879 2832 goto free_seq_arr;
+28 -7
drivers/misc/habanalabs/common/debugfs.c
··· 291 291 if (ctx->asid != HL_KERNEL_ASID_ID && 292 292 !list_empty(&ctx->hw_block_mem_list)) { 293 293 seq_puts(s, "\nhw_block mappings:\n\n"); 294 - seq_puts(s, " virtual address size HW block id\n"); 295 - seq_puts(s, "-------------------------------------------\n"); 294 + seq_puts(s, 295 + " virtual address block size mapped size HW block id\n"); 296 + seq_puts(s, 297 + "---------------------------------------------------------------\n"); 296 298 mutex_lock(&ctx->hw_block_list_lock); 297 - list_for_each_entry(lnode, &ctx->hw_block_mem_list, 298 - node) { 299 + list_for_each_entry(lnode, &ctx->hw_block_mem_list, node) { 299 300 seq_printf(s, 300 - " 0x%-14lx %-6u %-9u\n", 301 - lnode->vaddr, lnode->size, lnode->id); 301 + " 0x%-14lx %-6u %-6u %-9u\n", 302 + lnode->vaddr, lnode->block_size, lnode->mapped_size, 303 + lnode->id); 302 304 } 303 305 mutex_unlock(&ctx->hw_block_list_lock); 304 306 } ··· 593 591 struct hl_debugfs_entry *entry = s->private; 594 592 struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 595 593 struct hl_device *hdev = dev_entry->hdev; 594 + struct engines_data eng_data; 596 595 597 596 if (hdev->reset_info.in_reset) { 598 597 dev_warn_ratelimited(hdev->dev, ··· 601 598 return 0; 602 599 } 603 600 604 - hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s); 601 + eng_data.actual_size = 0; 602 + eng_data.allocated_buf_size = HL_ENGINES_DATA_MAX_SIZE; 603 + eng_data.buf = vmalloc(eng_data.allocated_buf_size); 604 + if (!eng_data.buf) 605 + return -ENOMEM; 606 + 607 + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); 608 + 609 + if (eng_data.actual_size > eng_data.allocated_buf_size) { 610 + dev_err(hdev->dev, 611 + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", 612 + eng_data.actual_size, eng_data.allocated_buf_size); 613 + vfree(eng_data.buf); 614 + return -ENOMEM; 615 + } 616 + 617 + seq_write(s, eng_data.buf, eng_data.actual_size); 618 + 619 + vfree(eng_data.buf); 605 620 606 621 return 0; 607 622 }
+102 -45
drivers/misc/habanalabs/common/device.c
··· 13 13 #include <linux/pci.h> 14 14 #include <linux/hwmon.h> 15 15 16 + #include <trace/events/habanalabs.h> 17 + 16 18 #define HL_RESET_DELAY_USEC 10000 /* 10ms */ 17 19 18 20 enum dma_alloc_type { ··· 28 26 /* 29 27 * hl_set_dram_bar- sets the bar to allow later access to address 30 28 * 31 - * @hdev: pointer to habanalabs device structure 29 + * @hdev: pointer to habanalabs device structure. 32 30 * @addr: the address the caller wants to access. 31 + * @region: the PCI region. 33 32 * 34 33 * @return: the old BAR base address on success, U64_MAX for failure. 35 34 * The caller should set it back to the old address after use. ··· 40 37 * This function can be called also if the bar doesn't need to be set, 41 38 * in that case it just won't change the base. 42 39 */ 43 - static uint64_t hl_set_dram_bar(struct hl_device *hdev, u64 addr) 40 + static u64 hl_set_dram_bar(struct hl_device *hdev, u64 addr, struct pci_mem_region *region) 44 41 { 45 42 struct asic_fixed_properties *prop = &hdev->asic_prop; 46 - u64 bar_base_addr; 43 + u64 bar_base_addr, old_base; 47 44 48 - bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 45 + if (is_power_of_2(prop->dram_pci_bar_size)) 46 + bar_base_addr = addr & ~(prop->dram_pci_bar_size - 0x1ull); 47 + else 48 + bar_base_addr = DIV_ROUND_DOWN_ULL(addr, prop->dram_pci_bar_size) * 49 + prop->dram_pci_bar_size; 49 50 50 - return hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 51 + old_base = hdev->asic_funcs->set_dram_bar_base(hdev, bar_base_addr); 52 + 53 + /* in case of success we need to update the new BAR base */ 54 + if (old_base != U64_MAX) 55 + region->region_base = bar_base_addr; 56 + 57 + return old_base; 51 58 } 52 - 53 59 54 60 static int hl_access_sram_dram_region(struct hl_device *hdev, u64 addr, u64 *val, 55 61 enum debugfs_access_type acc_type, enum pci_region region_type) 56 62 { 57 63 struct pci_mem_region *region = &hdev->pci_mem_region[region_type]; 64 + void __iomem *acc_addr; 58 65 u64 old_base = 0, rc; 59 66 60 67 if (region_type == PCI_REGION_DRAM) { 61 - old_base = hl_set_dram_bar(hdev, addr); 68 + old_base = hl_set_dram_bar(hdev, addr, region); 62 69 if (old_base == U64_MAX) 63 70 return -EIO; 64 71 } 65 72 73 + acc_addr = hdev->pcie_bar[region->bar_id] + addr - region->region_base + 74 + region->offset_in_bar; 66 75 switch (acc_type) { 67 76 case DEBUGFS_READ8: 68 - *val = readb(hdev->pcie_bar[region->bar_id] + 69 - addr - region->region_base + region->offset_in_bar); 77 + *val = readb(acc_addr); 70 78 break; 71 79 case DEBUGFS_WRITE8: 72 - writeb(*val, hdev->pcie_bar[region->bar_id] + 73 - addr - region->region_base + region->offset_in_bar); 80 + writeb(*val, acc_addr); 74 81 break; 75 82 case DEBUGFS_READ32: 76 - *val = readl(hdev->pcie_bar[region->bar_id] + 77 - addr - region->region_base + region->offset_in_bar); 83 + *val = readl(acc_addr); 78 84 break; 79 85 case DEBUGFS_WRITE32: 80 - writel(*val, hdev->pcie_bar[region->bar_id] + 81 - addr - region->region_base + region->offset_in_bar); 86 + writel(*val, acc_addr); 82 87 break; 83 88 case DEBUGFS_READ64: 84 - *val = readq(hdev->pcie_bar[region->bar_id] + 85 - addr - region->region_base + region->offset_in_bar); 89 + *val = readq(acc_addr); 86 90 break; 87 91 case DEBUGFS_WRITE64: 88 - writeq(*val, hdev->pcie_bar[region->bar_id] + 89 - addr - region->region_base + region->offset_in_bar); 92 + writeq(*val, acc_addr); 90 93 break; 91 94 } 92 95 93 96 if (region_type == PCI_REGION_DRAM) { 94 - rc = hl_set_dram_bar(hdev, old_base); 97 + rc = hl_set_dram_bar(hdev, old_base, region); 95 98 if (rc == U64_MAX) 96 99 return -EIO; 97 100 } ··· 106 97 } 107 98 108 99 static void *hl_dma_alloc_common(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 109 - gfp_t flag, enum dma_alloc_type alloc_type) 100 + gfp_t flag, enum dma_alloc_type alloc_type, 101 + const char *caller) 110 102 { 111 - void *ptr; 103 + void *ptr = NULL; 112 104 113 105 switch (alloc_type) { 114 106 case DMA_ALLOC_COHERENT: ··· 123 113 break; 124 114 } 125 115 116 + if (trace_habanalabs_dma_alloc_enabled() && !ZERO_OR_NULL_PTR(ptr)) 117 + trace_habanalabs_dma_alloc(hdev->dev, (u64) (uintptr_t) ptr, *dma_handle, size, 118 + caller); 119 + 126 120 return ptr; 127 121 } 128 122 129 123 static void hl_asic_dma_free_common(struct hl_device *hdev, size_t size, void *cpu_addr, 130 - dma_addr_t dma_handle, enum dma_alloc_type alloc_type) 124 + dma_addr_t dma_handle, enum dma_alloc_type alloc_type, 125 + const char *caller) 131 126 { 132 127 switch (alloc_type) { 133 128 case DMA_ALLOC_COHERENT: ··· 145 130 hdev->asic_funcs->asic_dma_pool_free(hdev, cpu_addr, dma_handle); 146 131 break; 147 132 } 133 + 134 + trace_habanalabs_dma_free(hdev->dev, (u64) (uintptr_t) cpu_addr, dma_handle, size, caller); 148 135 } 149 136 150 - void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 151 - gfp_t flag) 137 + void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 138 + gfp_t flag, const char *caller) 152 139 { 153 - return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT); 140 + return hl_dma_alloc_common(hdev, size, dma_handle, flag, DMA_ALLOC_COHERENT, caller); 154 141 } 155 142 156 - void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, 157 - dma_addr_t dma_handle) 143 + void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 144 + dma_addr_t dma_handle, const char *caller) 158 145 { 159 - hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT); 146 + hl_asic_dma_free_common(hdev, size, cpu_addr, dma_handle, DMA_ALLOC_COHERENT, caller); 160 147 } 161 148 162 - void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle) 149 + void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, 150 + dma_addr_t *dma_handle, const char *caller) 163 151 { 164 - return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE); 152 + return hl_dma_alloc_common(hdev, size, dma_handle, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); 165 153 } 166 154 167 - void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr) 155 + void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, 156 + const char *caller) 168 157 { 169 - hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE); 158 + hl_asic_dma_free_common(hdev, size, vaddr, 0, DMA_ALLOC_CPU_ACCESSIBLE, caller); 170 159 } 171 160 172 - void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, 173 - dma_addr_t *dma_handle) 161 + void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 162 + dma_addr_t *dma_handle, const char *caller) 174 163 { 175 - return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL); 164 + return hl_dma_alloc_common(hdev, size, dma_handle, mem_flags, DMA_ALLOC_POOL, caller); 176 165 } 177 166 178 - void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr) 167 + void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 168 + const char *caller) 179 169 { 180 - hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL); 170 + hl_asic_dma_free_common(hdev, 0, vaddr, dma_addr, DMA_ALLOC_POOL, caller); 181 171 } 182 172 183 173 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir) ··· 287 267 return 0; 288 268 } 289 269 270 + void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...) 271 + { 272 + va_list args; 273 + int str_size; 274 + 275 + va_start(args, fmt); 276 + /* Calculate formatted string length. Assuming each string is null terminated, hence 277 + * increment result by 1 278 + */ 279 + str_size = vsnprintf(NULL, 0, fmt, args) + 1; 280 + va_end(args); 281 + 282 + if ((e->actual_size + str_size) < e->allocated_buf_size) { 283 + va_start(args, fmt); 284 + vsnprintf(e->buf + e->actual_size, str_size, fmt, args); 285 + va_end(args); 286 + } 287 + 288 + /* Need to update the size even when not updating destination buffer to get the exact size 289 + * of all input strings 290 + */ 291 + e->actual_size += str_size; 292 + } 293 + 290 294 enum hl_device_status hl_device_status(struct hl_device *hdev) 291 295 { 292 296 enum hl_device_status status; ··· 365 321 hpriv = container_of(ref, struct hl_fpriv, refcount); 366 322 367 323 hdev = hpriv->hdev; 324 + 325 + hdev->asic_funcs->send_device_activity(hdev, false); 368 326 369 327 put_pid(hpriv->taskpid); 370 328 ··· 719 673 720 674 if (hdev->asic_prop.completion_queues_count) { 721 675 hdev->cq_wq = kcalloc(hdev->asic_prop.completion_queues_count, 722 - sizeof(*hdev->cq_wq), 676 + sizeof(struct workqueue_struct *), 723 677 GFP_KERNEL); 724 678 if (!hdev->cq_wq) { 725 679 rc = -ENOMEM; ··· 1137 1091 /* 'in_reset' was set to true during suspend, now we must clear it in order 1138 1092 * for hard reset to be performed 1139 1093 */ 1094 + spin_lock(&hdev->reset_info.lock); 1140 1095 hdev->reset_info.in_reset = 0; 1096 + spin_unlock(&hdev->reset_info.lock); 1141 1097 1142 1098 rc = hl_device_reset(hdev, HL_DRV_RESET_HARD); 1143 1099 if (rc) { ··· 1566 1518 */ 1567 1519 hdev->disabled = false; 1568 1520 1521 + /* F/W security enabled indication might be updated after hard-reset */ 1522 + if (hard_reset) { 1523 + rc = hl_fw_read_preboot_status(hdev); 1524 + if (rc) 1525 + goto out_err; 1526 + } 1527 + 1569 1528 rc = hdev->asic_funcs->hw_init(hdev); 1570 1529 if (rc) { 1571 1530 dev_err(hdev->dev, "failed to initialize the H/W after reset\n"); ··· 1611 1556 if (!hdev->asic_prop.fw_security_enabled) 1612 1557 hl_fw_set_max_power(hdev); 1613 1558 } else { 1614 - rc = hdev->asic_funcs->non_hard_reset_late_init(hdev); 1559 + rc = hdev->asic_funcs->compute_reset_late_init(hdev); 1615 1560 if (rc) { 1616 1561 if (reset_upon_device_release) 1617 1562 dev_err(hdev->dev, ··· 1759 1704 char *name; 1760 1705 bool add_cdev_sysfs_on_err = false; 1761 1706 1762 - name = kasprintf(GFP_KERNEL, "hl%d", hdev->id / 2); 1707 + hdev->cdev_idx = hdev->id / 2; 1708 + 1709 + name = kasprintf(GFP_KERNEL, "hl%d", hdev->cdev_idx); 1763 1710 if (!name) { 1764 1711 rc = -ENOMEM; 1765 1712 goto out_disabled; ··· 1776 1719 if (rc) 1777 1720 goto out_disabled; 1778 1721 1779 - name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->id / 2); 1722 + name = kasprintf(GFP_KERNEL, "hl_controlD%d", hdev->cdev_idx); 1780 1723 if (!name) { 1781 1724 rc = -ENOMEM; 1782 1725 goto free_dev; ··· 1863 1806 } 1864 1807 1865 1808 hdev->shadow_cs_queue = kcalloc(hdev->asic_prop.max_pending_cs, 1866 - sizeof(*hdev->shadow_cs_queue), GFP_KERNEL); 1809 + sizeof(struct hl_cs *), GFP_KERNEL); 1867 1810 if (!hdev->shadow_cs_queue) { 1868 1811 rc = -ENOMEM; 1869 1812 goto cq_fini; ··· 2054 1997 if (hdev->pdev) 2055 1998 dev_err(&hdev->pdev->dev, 2056 1999 "Failed to initialize hl%d. Device is NOT usable !\n", 2057 - hdev->id / 2); 2000 + hdev->cdev_idx); 2058 2001 else 2059 2002 pr_err("Failed to initialize hl%d. Device is NOT usable !\n", 2060 - hdev->id / 2); 2003 + hdev->cdev_idx); 2061 2004 2062 2005 return rc; 2063 2006 }
+121 -63
drivers/misc/habanalabs/common/firmware_if.c
··· 15 15 16 16 #define FW_FILE_MAX_SIZE 0x1400000 /* maximum size of 20MB */ 17 17 18 - struct fw_binning_conf { 19 - u64 tpc_binning; 20 - u32 dec_binning; 21 - u32 hbm_binning; 22 - u32 edma_binning; 23 - u32 mme_redundancy; 24 - }; 25 - 26 18 static char *extract_fw_ver_from_str(const char *fw_str) 27 19 { 28 20 char *str, *fw_ver, *whitespace; ··· 252 260 struct cpucp_packet *pkt; 253 261 dma_addr_t pkt_dma_addr; 254 262 struct hl_bd *sent_bd; 255 - u32 tmp, expected_ack_val, pi; 263 + u32 tmp, expected_ack_val, pi, opcode; 256 264 int rc; 257 265 258 266 pkt = hl_cpu_accessible_dma_pool_alloc(hdev, len, &pkt_dma_addr); ··· 319 327 320 328 rc = (tmp & CPUCP_PKT_CTL_RC_MASK) >> CPUCP_PKT_CTL_RC_SHIFT; 321 329 if (rc) { 322 - dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", 323 - rc, (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT); 330 + opcode = (tmp & CPUCP_PKT_CTL_OPCODE_MASK) >> CPUCP_PKT_CTL_OPCODE_SHIFT; 331 + 332 + if (!prop->supports_advanced_cpucp_rc) { 333 + dev_dbg(hdev->dev, "F/W ERROR %d for CPU packet %d\n", rc, opcode); 334 + goto scrub_descriptor; 335 + } 336 + 337 + switch (rc) { 338 + case cpucp_packet_invalid: 339 + dev_err(hdev->dev, 340 + "CPU packet %d is not supported by F/W\n", opcode); 341 + break; 342 + case cpucp_packet_fault: 343 + dev_err(hdev->dev, 344 + "F/W failed processing CPU packet %d\n", opcode); 345 + break; 346 + case cpucp_packet_invalid_pkt: 347 + dev_dbg(hdev->dev, 348 + "CPU packet %d is not supported by F/W\n", opcode); 349 + break; 350 + case cpucp_packet_invalid_params: 351 + dev_err(hdev->dev, 352 + "F/W reports invalid parameters for CPU packet %d\n", opcode); 353 + break; 354 + 355 + default: 356 + dev_err(hdev->dev, 357 + "Unknown F/W ERROR %d for CPU packet %d\n", rc, opcode); 358 + } 324 359 325 360 /* propagate the return code from the f/w to the callers who want to check it */ 326 361 if (result) ··· 359 340 *result = le64_to_cpu(pkt->result); 360 341 } 361 342 343 + scrub_descriptor: 362 344 /* Scrub previous buffer descriptor 'ctl' field which contains the 363 345 * previous PI value written during packet submission. 364 346 * We must do this or else F/W can read an old value upon queue wraparound. ··· 482 462 size); 483 463 } 484 464 465 + int hl_fw_send_device_activity(struct hl_device *hdev, bool open) 466 + { 467 + struct cpucp_packet pkt; 468 + int rc; 469 + 470 + memset(&pkt, 0, sizeof(pkt)); 471 + pkt.ctl = cpu_to_le32(CPUCP_PACKET_ACTIVE_STATUS_SET << CPUCP_PKT_CTL_OPCODE_SHIFT); 472 + pkt.value = cpu_to_le64(open); 473 + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 0, NULL); 474 + if (rc) 475 + dev_err(hdev->dev, "failed to send device activity msg(%u)\n", open); 476 + 477 + return rc; 478 + } 479 + 485 480 int hl_fw_send_heartbeat(struct hl_device *hdev) 486 481 { 487 482 struct cpucp_packet hb_pkt; ··· 616 581 dev_dbg(hdev->dev, "Device status0 %#x\n", sts_val); 617 582 618 583 /* All warnings should go here in order not to reach the unknown error validation */ 584 + if (err_val & CPU_BOOT_ERR0_EEPROM_FAIL) { 585 + dev_warn(hdev->dev, 586 + "Device boot warning - EEPROM failure detected, default settings applied\n"); 587 + /* This is a warning so we don't want it to disable the 588 + * device 589 + */ 590 + err_val &= ~CPU_BOOT_ERR0_EEPROM_FAIL; 591 + } 592 + 619 593 if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED) { 620 594 dev_warn(hdev->dev, 621 595 "Device boot warning - Skipped DRAM initialization\n"); ··· 1520 1476 */ 1521 1477 prop->hard_reset_done_by_fw = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_FW_HARD_RST_EN); 1522 1478 1479 + prop->fw_security_enabled = !!(cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_SECURITY_EN); 1480 + 1523 1481 dev_dbg(hdev->dev, "Firmware preboot boot device status0 %#x\n", 1524 1482 cpu_boot_dev_sts0); 1525 1483 ··· 1560 1514 hdev->asic_funcs->init_firmware_preload_params(hdev); 1561 1515 1562 1516 /* 1563 - * In order to determine boot method (static VS dymanic) we need to 1517 + * In order to determine boot method (static VS dynamic) we need to 1564 1518 * read the boot caps register 1565 1519 */ 1566 1520 rc = hl_fw_read_preboot_caps(hdev); ··· 1827 1781 * 1828 1782 * @return the CRC32 result 1829 1783 * 1830 - * NOTE: kernel's CRC32 differ's from standard CRC32 calculation. 1784 + * NOTE: kernel's CRC32 differs from standard CRC32 calculation. 1831 1785 * in order to be aligned we need to flip the bits of both the input 1832 1786 * initial CRC and kernel's CRC32 result. 1833 1787 * in addition both sides use initial CRC of 0, ··· 1844 1798 * 1845 1799 * @hdev: pointer to the habanalabs device structure 1846 1800 * @addr: device address of memory transfer 1847 - * @size: memory transter size 1801 + * @size: memory transfer size 1848 1802 * @region: PCI memory region 1849 1803 * 1850 1804 * @return 0 on success, otherwise non-zero error code ··· 1900 1854 u64 addr; 1901 1855 int rc; 1902 1856 1903 - if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) { 1904 - dev_err(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", 1857 + if (le32_to_cpu(fw_desc->header.magic) != HL_COMMS_DESC_MAGIC) 1858 + dev_warn(hdev->dev, "Invalid magic for dynamic FW descriptor (%x)\n", 1905 1859 fw_desc->header.magic); 1906 - return -EIO; 1907 - } 1908 1860 1909 - if (fw_desc->header.version != HL_COMMS_DESC_VER) { 1910 - dev_err(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", 1861 + if (fw_desc->header.version != HL_COMMS_DESC_VER) 1862 + dev_warn(hdev->dev, "Invalid version for dynamic FW descriptor (%x)\n", 1911 1863 fw_desc->header.version); 1912 - return -EIO; 1913 - } 1914 1864 1915 1865 /* 1916 - * calc CRC32 of data without header. 1866 + * Calc CRC32 of data without header. use the size of the descriptor 1867 + * reported by firmware, without calculating it ourself, to allow adding 1868 + * more fields to the lkd_fw_comms_desc structure. 1917 1869 * note that no alignment/stride address issues here as all structures 1918 - * are 64 bit padded 1870 + * are 64 bit padded. 1919 1871 */ 1920 - data_size = sizeof(struct lkd_fw_comms_desc) - 1921 - sizeof(struct comms_desc_header); 1922 1872 data_ptr = (u8 *)fw_desc + sizeof(struct comms_desc_header); 1923 - 1924 - if (le16_to_cpu(fw_desc->header.size) != data_size) { 1925 - dev_err(hdev->dev, 1926 - "Invalid descriptor size 0x%x, expected size 0x%zx\n", 1927 - le16_to_cpu(fw_desc->header.size), data_size); 1928 - return -EIO; 1929 - } 1873 + data_size = le16_to_cpu(fw_desc->header.size); 1930 1874 1931 1875 data_crc32 = hl_fw_compat_crc32(data_ptr, data_size); 1932 - 1933 1876 if (data_crc32 != le32_to_cpu(fw_desc->header.crc32)) { 1934 - dev_err(hdev->dev, 1935 - "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", 1936 - data_crc32, fw_desc->header.crc32); 1877 + dev_err(hdev->dev, "CRC32 mismatch for dynamic FW descriptor (%x:%x)\n", 1878 + data_crc32, fw_desc->header.crc32); 1937 1879 return -EIO; 1938 1880 } 1939 1881 1940 1882 /* find memory region to which to copy the image */ 1941 1883 addr = le64_to_cpu(fw_desc->img_addr); 1942 1884 region_id = hl_get_pci_memory_region(hdev, addr); 1943 - if ((region_id != PCI_REGION_SRAM) && 1944 - ((region_id != PCI_REGION_DRAM))) { 1945 - dev_err(hdev->dev, 1946 - "Invalid region to copy FW image address=%llx\n", addr); 1885 + if ((region_id != PCI_REGION_SRAM) && ((region_id != PCI_REGION_DRAM))) { 1886 + dev_err(hdev->dev, "Invalid region to copy FW image address=%llx\n", addr); 1947 1887 return -EIO; 1948 1888 } 1949 1889 ··· 1946 1914 fw_loader->dynamic_loader.fw_image_size, 1947 1915 region); 1948 1916 if (rc) { 1949 - dev_err(hdev->dev, 1950 - "invalid mem transfer request for FW image\n"); 1917 + dev_err(hdev->dev, "invalid mem transfer request for FW image\n"); 1951 1918 return rc; 1952 1919 } 1953 1920 ··· 2453 2422 msg.reset_cause = *(__u8 *) data; 2454 2423 break; 2455 2424 2456 - case HL_COMMS_BINNING_CONF_TYPE: 2457 - { 2458 - struct fw_binning_conf *binning_conf = (struct fw_binning_conf *) data; 2459 - 2460 - msg.tpc_binning_conf = cpu_to_le64(binning_conf->tpc_binning); 2461 - msg.dec_binning_conf = cpu_to_le32(binning_conf->dec_binning); 2462 - msg.hbm_binning_conf = cpu_to_le32(binning_conf->hbm_binning); 2463 - msg.edma_binning_conf = cpu_to_le32(binning_conf->edma_binning); 2464 - msg.mme_redundancy_conf = cpu_to_le32(binning_conf->mme_redundancy); 2465 - break; 2466 - } 2467 - 2468 2425 default: 2469 2426 dev_err(hdev->dev, 2470 2427 "Send COMMS message - invalid message type %u\n", ··· 2522 2503 */ 2523 2504 dyn_regs = &fw_loader->dynamic_loader.comm_desc.cpu_dyn_regs; 2524 2505 2525 - /* if no preboot loaded indication- wait for preboot */ 2526 - if (!(hdev->fw_loader.fw_comp_loaded & FW_TYPE_PREBOOT_CPU)) { 2527 - rc = hl_fw_wait_preboot_ready(hdev); 2528 - if (rc) 2529 - return -EIO; 2530 - } 2531 - 2532 2506 rc = hl_fw_dynamic_send_protocol_cmd(hdev, fw_loader, COMMS_RST_STATE, 2533 2507 0, true, 2534 2508 fw_loader->cpu_timeout); ··· 2559 2547 /* 2560 2548 * when testing FW load (without Linux) on PLDM we don't want to 2561 2549 * wait until boot fit is active as it may take several hours. 2562 - * instead, we load the bootfit and let it do all initializations in 2550 + * instead, we load the bootfit and let it do all initialization in 2563 2551 * the background. 2564 2552 */ 2565 2553 if (hdev->pldm && !(hdev->fw_components & FW_TYPE_LINUX)) ··· 2972 2960 2973 2961 if (rc) 2974 2962 dev_err(hdev->dev, "Failed to set max power, error %d\n", rc); 2963 + } 2964 + 2965 + static int hl_fw_get_sec_attest_data(struct hl_device *hdev, u32 packet_id, void *data, u32 size, 2966 + u32 nonce, u32 timeout) 2967 + { 2968 + struct cpucp_packet pkt = {}; 2969 + dma_addr_t req_dma_addr; 2970 + void *req_cpu_addr; 2971 + int rc; 2972 + 2973 + req_cpu_addr = hl_cpu_accessible_dma_pool_alloc(hdev, size, &req_dma_addr); 2974 + if (!data) { 2975 + dev_err(hdev->dev, 2976 + "Failed to allocate DMA memory for CPU-CP packet %u\n", packet_id); 2977 + return -ENOMEM; 2978 + } 2979 + 2980 + memset(data, 0, size); 2981 + 2982 + pkt.ctl = cpu_to_le32(packet_id << CPUCP_PKT_CTL_OPCODE_SHIFT); 2983 + pkt.addr = cpu_to_le64(req_dma_addr); 2984 + pkt.data_max_size = cpu_to_le32(size); 2985 + pkt.nonce = cpu_to_le32(nonce); 2986 + 2987 + rc = hdev->asic_funcs->send_cpu_message(hdev, (u32 *) &pkt, sizeof(pkt), 2988 + timeout, NULL); 2989 + if (rc) { 2990 + dev_err(hdev->dev, 2991 + "Failed to handle CPU-CP pkt %u, error %d\n", packet_id, rc); 2992 + goto out; 2993 + } 2994 + 2995 + memcpy(data, req_cpu_addr, size); 2996 + 2997 + out: 2998 + hl_cpu_accessible_dma_pool_free(hdev, size, req_cpu_addr); 2999 + 3000 + return rc; 3001 + } 3002 + 3003 + int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, 3004 + u32 nonce) 3005 + { 3006 + return hl_fw_get_sec_attest_data(hdev, CPUCP_PACKET_SEC_ATTEST_GET, sec_attest_info, 3007 + sizeof(struct cpucp_sec_attest_info), nonce, 3008 + HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC); 2975 3009 }
+113 -58
drivers/misc/habanalabs/common/habanalabs.h
··· 66 66 #define HL_CPUCP_INFO_TIMEOUT_USEC 10000000 /* 10s */ 67 67 #define HL_CPUCP_EEPROM_TIMEOUT_USEC 10000000 /* 10s */ 68 68 #define HL_CPUCP_MON_DUMP_TIMEOUT_USEC 10000000 /* 10s */ 69 + #define HL_CPUCP_SEC_ATTEST_INFO_TINEOUT_USEC 10000000 /* 10s */ 69 70 70 71 #define HL_FW_STATUS_POLL_INTERVAL_USEC 10000 /* 10ms */ 71 72 #define HL_FW_COMMS_STATUS_PLDM_POLL_INTERVAL_USEC 1000000 /* 1s */ ··· 95 94 #define MMU_HASH_TABLE_BITS 7 /* 1 << 7 buckets */ 96 95 97 96 /** 98 - * enum hl_mmu_page_table_locaion - mmu page table location 97 + * enum hl_mmu_page_table_location - mmu page table location 99 98 * @MMU_DR_PGT: page-table is located on device DRAM. 100 99 * @MMU_HR_PGT: page-table is located on host memory. 101 100 * @MMU_NUM_PGT_LOCATIONS: number of page-table locations currently supported. ··· 143 142 #define HL_COMPLETION_MODE_CS 1 144 143 145 144 #define HL_MAX_DCORES 8 145 + 146 + /* DMA alloc/free wrappers */ 147 + #define hl_asic_dma_alloc_coherent(hdev, size, dma_handle, flags) \ 148 + hl_asic_dma_alloc_coherent_caller(hdev, size, dma_handle, flags, __func__) 149 + 150 + #define hl_cpu_accessible_dma_pool_alloc(hdev, size, dma_handle) \ 151 + hl_cpu_accessible_dma_pool_alloc_caller(hdev, size, dma_handle, __func__) 152 + 153 + #define hl_asic_dma_pool_zalloc(hdev, size, mem_flags, dma_handle) \ 154 + hl_asic_dma_pool_zalloc_caller(hdev, size, mem_flags, dma_handle, __func__) 155 + 156 + #define hl_asic_dma_free_coherent(hdev, size, cpu_addr, dma_handle) \ 157 + hl_asic_dma_free_coherent_caller(hdev, size, cpu_addr, dma_handle, __func__) 158 + 159 + #define hl_cpu_accessible_dma_pool_free(hdev, size, vaddr) \ 160 + hl_cpu_accessible_dma_pool_free_caller(hdev, size, vaddr, __func__) 161 + 162 + #define hl_asic_dma_pool_free(hdev, vaddr, dma_addr) \ 163 + hl_asic_dma_pool_free_caller(hdev, vaddr, dma_addr, __func__) 146 164 147 165 /* 148 166 * Reset Flags ··· 228 208 * struct iterate_module_ctx - HW module iterator 229 209 * @fn: function to apply to each HW module instance 230 210 * @data: optional internal data to the function iterator 211 + * @rc: return code for optional use of iterator/iterator-caller 231 212 */ 232 213 struct iterate_module_ctx { 233 214 /* ··· 238 217 * @inst: HW module instance within the block 239 218 * @offset: current HW module instance offset from the 1-st HW module instance 240 219 * in the 1-st block 241 - * @data: function specific data 220 + * @ctx: the iterator context. 242 221 */ 243 - void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, void *data); 222 + void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, 223 + struct iterate_module_ctx *ctx); 244 224 void *data; 225 + int rc; 245 226 }; 246 227 247 228 struct hl_block_glbl_sec { ··· 365 342 CS_TYPE_WAIT, 366 343 CS_TYPE_COLLECTIVE_WAIT, 367 344 CS_RESERVE_SIGNALS, 368 - CS_UNRESERVE_SIGNALS 345 + CS_UNRESERVE_SIGNALS, 346 + CS_TYPE_ENGINE_CORE 369 347 }; 370 348 371 349 /* ··· 568 544 * @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned. 569 545 * @dram_enabled_mask: which DRAMs are enabled. 570 546 * @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned. 571 - * @cb_va_start_addr: virtual start address of command buffers which are mapped 572 - * to the device's MMU. 573 - * @cb_va_end_addr: virtual end address of command buffers which are mapped to 574 - * the device's MMU. 575 547 * @dram_hints_align_mask: dram va hint addresses alignment mask which is used 576 548 * for hints validity check. 577 549 * @cfg_base_address: config space base address. ··· 634 614 * which the property supports_user_set_page_size is true 635 615 * (i.e. the DRAM supports multiple page sizes), otherwise 636 616 * it will shall be equal to dram_page_size. 617 + * @num_engine_cores: number of engine cpu cores 637 618 * @collective_first_sob: first sync object available for collective use 638 619 * @collective_first_mon: first monitor available for collective use 639 620 * @sync_stream_first_sob: first sync object available for sync stream use ··· 679 658 * @set_max_power_on_device_init: true if need to set max power in F/W on device init. 680 659 * @supports_user_set_page_size: true if user can set the allocation page size. 681 660 * @dma_mask: the dma mask to be set for this device 661 + * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. 682 662 */ 683 663 struct asic_fixed_properties { 684 664 struct hw_queue_properties *hw_queues_props; ··· 711 689 u64 tpc_binning_mask; 712 690 u64 dram_enabled_mask; 713 691 u64 dram_binning_mask; 714 - u64 cb_va_start_addr; 715 - u64 cb_va_end_addr; 716 692 u64 dram_hints_align_mask; 717 693 u64 cfg_base_address; 718 694 u64 mmu_cache_mng_addr; ··· 754 734 u32 faulty_dram_cluster_map; 755 735 u32 xbar_edge_enabled_mask; 756 736 u32 device_mem_alloc_default_page_size; 737 + u32 num_engine_cores; 757 738 u16 collective_first_sob; 758 739 u16 collective_first_mon; 759 740 u16 sync_stream_first_sob; ··· 787 766 u8 set_max_power_on_device_init; 788 767 u8 supports_user_set_page_size; 789 768 u8 dma_mask; 769 + u8 supports_advanced_cpucp_rc; 790 770 }; 791 771 792 772 /** ··· 819 797 * @lock: spinlock to protect fence. 820 798 * @hdev: habanalabs device structure. 821 799 * @hw_sob: the H/W SOB used in this signal/wait CS. 822 - * @encaps_sig_hdl: encaps signals hanlder. 800 + * @encaps_sig_hdl: encaps signals handler. 823 801 * @cs_seq: command submission sequence number. 824 802 * @type: type of the CS - signal/wait. 825 803 * @sob_val: the SOB value that is used in this signal/wait CS. ··· 920 898 * @buf: back pointer to the parent mappable memory buffer 921 899 * @debugfs_list: node in debugfs list of command buffers. 922 900 * @pool_list: node in pool list of command buffers. 923 - * @va_block_list: list of virtual addresses blocks of the CB if it is mapped to 924 - * the device's MMU. 925 901 * @kernel_address: Holds the CB's kernel virtual address. 902 + * @virtual_addr: Holds the CB's virtual address. 926 903 * @bus_address: Holds the CB's DMA address. 927 904 * @size: holds the CB's size. 905 + * @roundup_size: holds the cb size after roundup to page size. 928 906 * @cs_cnt: holds number of CS that this CB participates in. 929 907 * @is_pool: true if CB was acquired from the pool, false otherwise. 930 - * @is_internal: internaly allocated 908 + * @is_internal: internally allocated 931 909 * @is_mmu_mapped: true if the CB is mapped to the device's MMU. 932 910 */ 933 911 struct hl_cb { ··· 936 914 struct hl_mmap_mem_buf *buf; 937 915 struct list_head debugfs_list; 938 916 struct list_head pool_list; 939 - struct list_head va_block_list; 940 917 void *kernel_address; 918 + u64 virtual_addr; 941 919 dma_addr_t bus_address; 942 920 u32 size; 921 + u32 roundup_size; 943 922 atomic_t cs_cnt; 944 923 u8 is_pool; 945 924 u8 is_internal; ··· 1136 1113 * @fence: hl fence object for interrupt completion 1137 1114 * @cq_target_value: CQ target value 1138 1115 * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt 1139 - * handler for taget value comparison 1116 + * handler for target value comparison 1140 1117 */ 1141 1118 struct hl_user_pending_interrupt { 1142 1119 struct timestamp_reg_info ts_reg_info; ··· 1395 1372 struct hl_cs; 1396 1373 1397 1374 /** 1375 + * struct engines_data - asic engines data 1376 + * @buf: buffer for engines data in ascii 1377 + * @actual_size: actual size of data that was written by the driver to the allocated buffer 1378 + * @allocated_buf_size: total size of allocated buffer 1379 + */ 1380 + struct engines_data { 1381 + char *buf; 1382 + int actual_size; 1383 + u32 allocated_buf_size; 1384 + }; 1385 + 1386 + /** 1398 1387 * struct hl_asic_funcs - ASIC specific functions that are can be called from 1399 1388 * common code. 1400 1389 * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. ··· 1469 1434 * @send_heartbeat: send is-alive packet to CPU-CP and verify response. 1470 1435 * @debug_coresight: perform certain actions on Coresight for debugging. 1471 1436 * @is_device_idle: return true if device is idle, false otherwise. 1472 - * @non_hard_reset_late_init: perform certain actions needed after a reset which is not hard-reset 1437 + * @compute_reset_late_init: perform certain actions needed after a compute reset 1473 1438 * @hw_queues_lock: acquire H/W queues lock. 1474 1439 * @hw_queues_unlock: release H/W queues lock. 1475 - * @kdma_lock: acquire H/W queues lock. Relevant from GRECO ASIC 1476 - * @kdma_unlock: release H/W queues lock. Relevant from GRECO ASIC 1477 1440 * @get_pci_id: retrieve PCI ID. 1478 1441 * @get_eeprom_data: retrieve EEPROM data from F/W. 1479 1442 * @get_monitor_dump: retrieve monitor registers dump from F/W. ··· 1531 1498 * @check_if_razwi_happened: check if there was a razwi due to RR violation. 1532 1499 * @access_dev_mem: access device memory 1533 1500 * @set_dram_bar_base: set the base of the DRAM BAR 1501 + * @set_engine_cores: set a config command to enigne cores 1502 + * @send_device_activity: indication to FW about device availability 1534 1503 */ 1535 1504 struct hl_asic_funcs { 1536 1505 int (*early_init)(struct hl_device *hdev); ··· 1605 1570 int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); 1606 1571 int (*send_heartbeat)(struct hl_device *hdev); 1607 1572 int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); 1608 - bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, 1609 - u8 mask_len, struct seq_file *s); 1610 - int (*non_hard_reset_late_init)(struct hl_device *hdev); 1573 + bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 1574 + struct engines_data *e); 1575 + int (*compute_reset_late_init)(struct hl_device *hdev); 1611 1576 void (*hw_queues_lock)(struct hl_device *hdev); 1612 1577 void (*hw_queues_unlock)(struct hl_device *hdev); 1613 - void (*kdma_lock)(struct hl_device *hdev, int dcore_id); 1614 - void (*kdma_unlock)(struct hl_device *hdev, int dcore_id); 1615 1578 u32 (*get_pci_id)(struct hl_device *hdev); 1616 1579 int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); 1617 1580 int (*get_monitor_dump)(struct hl_device *hdev, void *data); ··· 1667 1634 int (*access_dev_mem)(struct hl_device *hdev, enum pci_region region_type, 1668 1635 u64 addr, u64 *val, enum debugfs_access_type acc_type); 1669 1636 u64 (*set_dram_bar_base)(struct hl_device *hdev, u64 addr); 1637 + int (*set_engine_cores)(struct hl_device *hdev, u32 *core_ids, 1638 + u32 num_cores, u32 core_command); 1639 + int (*send_device_activity)(struct hl_device *hdev, bool open); 1670 1640 }; 1671 1641 1672 1642 ··· 1763 1727 1764 1728 /** 1765 1729 * struct hl_cs_outcome_store - represents a limited store of completed CS outcomes 1766 - * @outcome_map: index of completed CS searcheable by sequence number 1730 + * @outcome_map: index of completed CS searchable by sequence number 1767 1731 * @used_list: list of outcome objects currently in use 1768 1732 * @free_list: list of outcome objects currently not in use 1769 - * @nodes_pool: a static pool of preallocated outcome objects 1733 + * @nodes_pool: a static pool of pre-allocated outcome objects 1770 1734 * @db_lock: any operation on the store must take this lock 1771 1735 */ 1772 1736 struct hl_cs_outcome_store { ··· 1790 1754 * @refcount: reference counter for the context. Context is released only when 1791 1755 * this hits 0l. It is incremented on CS and CS_WAIT. 1792 1756 * @cs_pending: array of hl fence objects representing pending CS. 1793 - * @outcome_store: storage data structure used to remember ouitcomes of completed 1757 + * @outcome_store: storage data structure used to remember outcomes of completed 1794 1758 * command submissions for a long time after CS id wraparound. 1795 1759 * @va_range: holds available virtual addresses for host and dram mappings. 1796 1760 * @mem_hash_lock: protects the mem_hash. 1797 - * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the 1798 - * MMU hash or walking the PGT requires talking this lock. 1799 1761 * @hw_block_list_lock: protects the HW block memory list. 1800 1762 * @debugfs_list: node in debugfs list of contexts. 1801 1763 * @hw_block_mem_list: list of HW block virtual mapped addresses. ··· 1801 1767 * @cb_va_pool: device VA pool for command buffers which are mapped to the 1802 1768 * device's MMU. 1803 1769 * @sig_mgr: encaps signals handle manager. 1770 + * @cb_va_pool_base: the base address for the device VA pool 1804 1771 * @cs_sequence: sequence number for CS. Value is assigned to a CS and passed 1805 1772 * to user so user could inquire about CS. It is used as 1806 1773 * index to cs_pending array. ··· 1830 1795 struct hl_cs_outcome_store outcome_store; 1831 1796 struct hl_va_range *va_range[HL_VA_RANGE_TYPE_MAX]; 1832 1797 struct mutex mem_hash_lock; 1833 - struct mutex mmu_lock; 1834 1798 struct mutex hw_block_list_lock; 1835 1799 struct list_head debugfs_list; 1836 1800 struct list_head hw_block_mem_list; 1837 1801 struct hl_cs_counters_atomic cs_counters; 1838 1802 struct gen_pool *cb_va_pool; 1839 1803 struct hl_encaps_signals_mgr sig_mgr; 1804 + u64 cb_va_pool_base; 1840 1805 u64 cs_sequence; 1841 1806 u64 *dram_default_hops; 1842 1807 spinlock_t cs_lock; ··· 1856 1821 struct mutex lock; 1857 1822 struct idr handles; 1858 1823 }; 1859 - 1860 1824 1861 1825 1862 1826 /* ··· 1923 1889 * @tdr_active: true if TDR was activated for this CS (to prevent 1924 1890 * double TDR activation). 1925 1891 * @aborted: true if CS was aborted due to some device error. 1926 - * @timestamp: true if a timestmap must be captured upon completion. 1892 + * @timestamp: true if a timestamp must be captured upon completion. 1927 1893 * @staged_last: true if this is the last staged CS and needs completion. 1928 1894 * @staged_first: true if this is the first staged CS and we need to receive 1929 1895 * timeout for this CS. ··· 2081 2047 * @node: node to hang on the list in context object. 2082 2048 * @ctx: the context this node belongs to. 2083 2049 * @vaddr: virtual address of the HW block. 2084 - * @size: size of the block. 2050 + * @block_size: size of the block. 2051 + * @mapped_size: size of the block which is mapped. May change if partial un-mappings are done. 2085 2052 * @id: HW block id (handle). 2086 2053 */ 2087 2054 struct hl_vm_hw_block_list_node { 2088 2055 struct list_head node; 2089 2056 struct hl_ctx *ctx; 2090 2057 unsigned long vaddr; 2091 - u32 size; 2058 + u32 block_size; 2059 + u32 mapped_size; 2092 2060 u32 id; 2093 2061 }; 2094 2062 ··· 2250 2214 2251 2215 /** 2252 2216 * struct hl_debugfs_entry - debugfs dentry wrapper. 2253 - * @info_ent: dentry realted ops. 2217 + * @info_ent: dentry related ops. 2254 2218 * @dev_entry: ASIC specific debugfs manager. 2255 2219 */ 2256 2220 struct hl_debugfs_entry { ··· 2528 2492 break; \ 2529 2493 (val) = __elbi_read; \ 2530 2494 } else {\ 2531 - (val) = RREG32((u32)addr); \ 2495 + (val) = RREG32((u32)(addr)); \ 2532 2496 } \ 2533 2497 if (cond) \ 2534 2498 break; \ ··· 2539 2503 break; \ 2540 2504 (val) = __elbi_read; \ 2541 2505 } else {\ 2542 - (val) = RREG32((u32)addr); \ 2506 + (val) = RREG32((u32)(addr)); \ 2543 2507 } \ 2544 2508 break; \ 2545 2509 } \ ··· 2955 2919 * struct undefined_opcode_info - info about last undefined opcode error 2956 2920 * @timestamp: timestamp of the undefined opcode error 2957 2921 * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ 2958 - * entiers. In case all streams array entries are 2922 + * entries. In case all streams array entries are 2959 2923 * filled with values, it means the execution was in Lower-CP. 2960 2924 * @cq_addr: the address of the current handled command buffer 2961 2925 * @cq_size: the size of the current handled command buffer ··· 2982 2946 }; 2983 2947 2984 2948 /** 2985 - * struct last_error_session_info - info about last session errors occurred. 2986 - * @cs_timeout: CS timeout error last information. 2987 - * @razwi: razwi last information. 2949 + * struct hl_error_info - holds information collected during an error. 2950 + * @cs_timeout: CS timeout error information. 2951 + * @razwi: razwi information. 2988 2952 * @undef_opcode: undefined opcode information 2989 2953 */ 2990 - struct last_error_session_info { 2954 + struct hl_error_info { 2991 2955 struct cs_timeout_info cs_timeout; 2992 2956 struct razwi_info razwi; 2993 2957 struct undefined_opcode_info undef_opcode; ··· 2996 2960 /** 2997 2961 * struct hl_reset_info - holds current device reset information. 2998 2962 * @lock: lock to protect critical reset flows. 2999 - * @compute_reset_cnt: number of compte resets since the driver was loaded. 2963 + * @compute_reset_cnt: number of compute resets since the driver was loaded. 3000 2964 * @hard_reset_cnt: number of hard resets since the driver was loaded. 3001 2965 * @hard_reset_schedule_flags: hard reset is scheduled to after current compute reset, 3002 2966 * here we hold the hard reset flags. ··· 3007 2971 * @hard_reset_pending: is there a hard reset work pending. 3008 2972 * @curr_reset_cause: saves an enumerated reset cause when a hard reset is 3009 2973 * triggered, and cleared after it is shared with preboot. 3010 - * @prev_reset_trigger: saves the previous trigger which caused a reset, overidden 2974 + * @prev_reset_trigger: saves the previous trigger which caused a reset, overridden 3011 2975 * with a new value on next reset 3012 2976 * @reset_trigger_repeated: set if device reset is triggered more than once with 3013 2977 * same cause. ··· 3077 3041 * @asid_mutex: protects asid_bitmap. 3078 3042 * @send_cpu_message_lock: enforces only one message in Host <-> CPU-CP queue. 3079 3043 * @debug_lock: protects critical section of setting debug mode for device 3044 + * @mmu_lock: protects the MMU page tables and invalidation h/w. Although the 3045 + * page tables are per context, the invalidation h/w is per MMU. 3046 + * Therefore, we can't allow multiple contexts (we only have two, 3047 + * user and kernel) to access the invalidation h/w at the same time. 3048 + * In addition, any change to the PGT, modifying the MMU hash or 3049 + * walking the PGT requires talking this lock. 3080 3050 * @asic_prop: ASIC specific immutable properties. 3081 3051 * @asic_funcs: ASIC specific functions. 3082 3052 * @asic_specific: ASIC specific information to use only from ASIC files. ··· 3091 3049 * @hl_chip_info: ASIC's sensors information. 3092 3050 * @device_status_description: device status description. 3093 3051 * @hl_debugfs: device's debugfs manager. 3094 - * @cb_pool: list of preallocated CBs. 3052 + * @cb_pool: list of pre allocated CBs. 3095 3053 * @cb_pool_lock: protects the CB pool. 3096 3054 * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. 3097 3055 * @internal_cb_pool_dma_addr: internal command buffer pool dma address. ··· 3112 3070 * @state_dump_specs: constants and dictionaries needed to dump system state. 3113 3071 * @multi_cs_completion: array of multi-CS completion. 3114 3072 * @clk_throttling: holds information about current/previous clock throttling events 3115 - * @last_error: holds information about last session in which CS timeout or razwi error occurred. 3073 + * @captured_err_info: holds information about errors. 3116 3074 * @reset_info: holds current device reset information. 3117 3075 * @stream_master_qid_arr: pointer to array with QIDs of master streams. 3118 3076 * @fw_major_version: major version of current loaded preboot. ··· 3153 3111 * @edma_binning: contains mask of edma engines that is received from the f/w which 3154 3112 * indicates which edma engines are binned-out 3155 3113 * @id: device minor. 3156 - * @id_control: minor of the control device 3114 + * @id_control: minor of the control device. 3115 + * @cdev_idx: char device index. Used for setting its name. 3157 3116 * @cpu_pci_msb_addr: 50-bit extension bits for the device CPU's 40-bit 3158 3117 * addresses. 3159 3118 * @is_in_dram_scrub: true if dram scrub operation is on going. ··· 3208 3165 * Used only for testing. 3209 3166 * @heartbeat: Controls if we want to enable the heartbeat mechanism vs. the f/w, which verifies 3210 3167 * that the f/w is always alive. Used only for testing. 3168 + * @supports_ctx_switch: true if a ctx switch is required upon first submission. 3211 3169 */ 3212 3170 struct hl_device { 3213 3171 struct pci_dev *pdev; ··· 3248 3204 struct mutex asid_mutex; 3249 3205 struct mutex send_cpu_message_lock; 3250 3206 struct mutex debug_lock; 3207 + struct mutex mmu_lock; 3251 3208 struct asic_fixed_properties asic_prop; 3252 3209 const struct hl_asic_funcs *asic_funcs; 3253 3210 void *asic_specific; ··· 3287 3242 struct multi_cs_completion multi_cs_completion[ 3288 3243 MULTI_CS_MAX_USER_CTX]; 3289 3244 struct hl_clk_throttle clk_throttling; 3290 - struct last_error_session_info last_error; 3245 + struct hl_error_info captured_err_info; 3291 3246 3292 3247 struct hl_reset_info reset_info; 3293 3248 ··· 3316 3271 u32 edma_binning; 3317 3272 u16 id; 3318 3273 u16 id_control; 3274 + u16 cdev_idx; 3319 3275 u16 cpu_pci_msb_addr; 3320 3276 u8 is_in_dram_scrub; 3321 3277 u8 disabled; ··· 3346 3300 u8 compute_ctx_in_release; 3347 3301 u8 supports_mmu_prefetch; 3348 3302 u8 reset_upon_device_release; 3303 + u8 supports_ctx_switch; 3349 3304 3350 3305 /* Parameters for bring-up */ 3351 3306 u64 nic_ports_mask; ··· 3473 3426 } 3474 3427 3475 3428 uint64_t hl_set_dram_bar_default(struct hl_device *hdev, u64 addr); 3476 - void *hl_asic_dma_alloc_coherent(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 3477 - gfp_t flag); 3478 - void hl_asic_dma_free_coherent(struct hl_device *hdev, size_t size, void *cpu_addr, 3479 - dma_addr_t dma_handle); 3480 - void *hl_cpu_accessible_dma_pool_alloc(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle); 3481 - void hl_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size, void *vaddr); 3482 - void *hl_asic_dma_pool_zalloc(struct hl_device *hdev, size_t size, gfp_t mem_flags, 3483 - dma_addr_t *dma_handle); 3484 - void hl_asic_dma_pool_free(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr); 3429 + void *hl_asic_dma_alloc_coherent_caller(struct hl_device *hdev, size_t size, dma_addr_t *dma_handle, 3430 + gfp_t flag, const char *caller); 3431 + void hl_asic_dma_free_coherent_caller(struct hl_device *hdev, size_t size, void *cpu_addr, 3432 + dma_addr_t dma_handle, const char *caller); 3433 + void *hl_cpu_accessible_dma_pool_alloc_caller(struct hl_device *hdev, size_t size, 3434 + dma_addr_t *dma_handle, const char *caller); 3435 + void hl_cpu_accessible_dma_pool_free_caller(struct hl_device *hdev, size_t size, void *vaddr, 3436 + const char *caller); 3437 + void *hl_asic_dma_pool_zalloc_caller(struct hl_device *hdev, size_t size, gfp_t mem_flags, 3438 + dma_addr_t *dma_handle, const char *caller); 3439 + void hl_asic_dma_pool_free_caller(struct hl_device *hdev, void *vaddr, dma_addr_t dma_addr, 3440 + const char *caller); 3485 3441 int hl_dma_map_sgtable(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); 3486 3442 void hl_dma_unmap_sgtable(struct hl_device *hdev, struct sg_table *sgt, 3487 3443 enum dma_data_direction dir); ··· 3563 3513 3564 3514 int hl_hwmon_init(struct hl_device *hdev); 3565 3515 void hl_hwmon_fini(struct hl_device *hdev); 3516 + void hl_hwmon_release_resources(struct hl_device *hdev); 3566 3517 3567 3518 int hl_cb_create(struct hl_device *hdev, struct hl_mem_mgr *mmg, 3568 3519 struct hl_ctx *ctx, u32 cb_size, bool internal_cb, ··· 3608 3557 void hl_hw_block_mem_fini(struct hl_ctx *ctx); 3609 3558 3610 3559 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, 3611 - enum hl_va_range_type type, u32 size, u32 alignment); 3560 + enum hl_va_range_type type, u64 size, u32 alignment); 3612 3561 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, 3613 3562 u64 start_addr, u64 size); 3614 3563 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size, ··· 3725 3674 struct cpucp_hbm_row_info *info); 3726 3675 int hl_fw_dram_pending_row_get(struct hl_device *hdev, u32 *pend_rows_num); 3727 3676 int hl_fw_cpucp_engine_core_asid_set(struct hl_device *hdev, u32 asid); 3677 + int hl_fw_send_device_activity(struct hl_device *hdev, bool open); 3728 3678 int hl_pci_bars_map(struct hl_device *hdev, const char * const name[3], 3729 3679 bool is_wc[3]); 3730 3680 int hl_pci_elbi_read(struct hl_device *hdev, u64 addr, u32 *data); ··· 3749 3697 void hl_set_pwm_info(struct hl_device *hdev, int sensor_index, u32 attr, long value); 3750 3698 long hl_fw_get_max_power(struct hl_device *hdev); 3751 3699 void hl_fw_set_max_power(struct hl_device *hdev); 3700 + int hl_fw_get_sec_attest_info(struct hl_device *hdev, struct cpucp_sec_attest_info *sec_attest_info, 3701 + u32 nonce); 3752 3702 int hl_set_voltage(struct hl_device *hdev, int sensor_index, u32 attr, long value); 3753 3703 int hl_set_current(struct hl_device *hdev, int sensor_index, u32 attr, long value); 3754 3704 int hl_set_power(struct hl_device *hdev, int sensor_index, u32 attr, long value); ··· 3797 3743 hl_mmap_mem_buf_alloc(struct hl_mem_mgr *mmg, 3798 3744 struct hl_mmap_mem_buf_behavior *behavior, gfp_t gfp, 3799 3745 void *args); 3746 + __printf(2, 3) void hl_engine_data_sprintf(struct engines_data *e, const char *fmt, ...); 3800 3747 3801 3748 #ifdef CONFIG_DEBUG_FS 3802 3749
+28 -16
drivers/misc/habanalabs/common/habanalabs_drv.c
··· 14 14 #include <linux/aer.h> 15 15 #include <linux/module.h> 16 16 17 + #define CREATE_TRACE_POINTS 18 + #include <trace/events/habanalabs.h> 19 + 17 20 #define HL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" 18 21 19 22 #define HL_DRIVER_DESC "Driver for HabanaLabs's AI Accelerators" ··· 30 27 static DEFINE_IDR(hl_devs_idr); 31 28 static DEFINE_MUTEX(hl_devs_idr_lock); 32 29 33 - static int timeout_locked = 30; 30 + #define HL_DEFAULT_TIMEOUT_LOCKED 30 /* 30 seconds */ 31 + #define GAUDI_DEFAULT_TIMEOUT_LOCKED 600 /* 10 minutes */ 32 + 33 + static int timeout_locked = HL_DEFAULT_TIMEOUT_LOCKED; 34 34 static int reset_on_lockup = 1; 35 35 static int memory_scrub; 36 36 static ulong boot_error_status_mask = ULONG_MAX; ··· 61 55 #define PCI_IDS_GAUDI_SEC 0x1010 62 56 63 57 #define PCI_IDS_GAUDI2 0x1020 64 - #define PCI_IDS_GAUDI2_SEC 0x1030 65 58 66 59 static const struct pci_device_id ids[] = { 67 60 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GOYA), }, 68 61 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI), }, 69 62 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI_SEC), }, 70 63 { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2), }, 71 - { PCI_DEVICE(PCI_VENDOR_ID_HABANALABS, PCI_IDS_GAUDI2_SEC), }, 72 64 { 0, } 73 65 }; 74 66 MODULE_DEVICE_TABLE(pci, ids); ··· 96 92 case PCI_IDS_GAUDI2: 97 93 asic_type = ASIC_GAUDI2; 98 94 break; 99 - case PCI_IDS_GAUDI2_SEC: 100 - asic_type = ASIC_GAUDI2_SEC; 101 - break; 102 95 default: 103 96 asic_type = ASIC_INVALID; 104 97 break; ··· 108 107 { 109 108 switch (asic_type) { 110 109 case ASIC_GAUDI_SEC: 111 - case ASIC_GAUDI2_SEC: 112 110 return true; 113 111 default: 114 112 return false; ··· 161 161 mutex_lock(&hdev->fpriv_list_lock); 162 162 163 163 if (!hl_device_operational(hdev, &status)) { 164 - dev_err_ratelimited(hdev->dev, 164 + dev_dbg_ratelimited(hdev->dev, 165 165 "Can't open %s because it is %s\n", 166 166 dev_name(hdev->dev), hdev->status[status]); 167 167 ··· 207 207 list_add(&hpriv->dev_node, &hdev->fpriv_list); 208 208 mutex_unlock(&hdev->fpriv_list_lock); 209 209 210 + hdev->asic_funcs->send_device_activity(hdev, true); 211 + 210 212 hl_debugfs_add_file(hpriv); 211 213 212 - atomic_set(&hdev->last_error.cs_timeout.write_enable, 1); 213 - atomic_set(&hdev->last_error.razwi.write_enable, 1); 214 - hdev->last_error.undef_opcode.write_enable = true; 214 + atomic_set(&hdev->captured_err_info.cs_timeout.write_enable, 1); 215 + atomic_set(&hdev->captured_err_info.razwi.write_enable, 1); 216 + hdev->captured_err_info.undef_opcode.write_enable = true; 215 217 216 218 hdev->open_counter++; 217 219 hdev->last_successful_open_jif = jiffies; ··· 271 269 mutex_lock(&hdev->fpriv_ctrl_list_lock); 272 270 273 271 if (!hl_device_operational(hdev, NULL)) { 274 - dev_err_ratelimited(hdev->dev_ctrl, 272 + dev_dbg_ratelimited(hdev->dev_ctrl, 275 273 "Can't open %s because it is disabled or in reset\n", 276 274 dev_name(hdev->dev_ctrl)); 277 275 rc = -EPERM; ··· 316 314 hdev->boot_error_status_mask = boot_error_status_mask; 317 315 } 318 316 319 - static void fixup_device_params_per_asic(struct hl_device *hdev) 317 + static void fixup_device_params_per_asic(struct hl_device *hdev, int timeout) 320 318 { 321 319 switch (hdev->asic_type) { 322 - case ASIC_GOYA: 323 320 case ASIC_GAUDI: 324 321 case ASIC_GAUDI_SEC: 322 + /* If user didn't request a different timeout than the default one, we have 323 + * a different default timeout for Gaudi 324 + */ 325 + if (timeout == HL_DEFAULT_TIMEOUT_LOCKED) 326 + hdev->timeout_jiffies = msecs_to_jiffies(GAUDI_DEFAULT_TIMEOUT_LOCKED * 327 + MSEC_PER_SEC); 328 + 329 + hdev->reset_upon_device_release = 0; 330 + break; 331 + 332 + case ASIC_GOYA: 325 333 hdev->reset_upon_device_release = 0; 326 334 break; 327 335 ··· 351 339 hdev->fw_comms_poll_interval_usec = HL_FW_STATUS_POLL_INTERVAL_USEC; 352 340 353 341 if (tmp_timeout) 354 - hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * 1000); 342 + hdev->timeout_jiffies = msecs_to_jiffies(tmp_timeout * MSEC_PER_SEC); 355 343 else 356 344 hdev->timeout_jiffies = MAX_SCHEDULE_TIMEOUT; 357 345 ··· 372 360 if (!hdev->cpu_queues_enable) 373 361 hdev->heartbeat = 0; 374 362 375 - fixup_device_params_per_asic(hdev); 363 + fixup_device_params_per_asic(hdev, tmp_timeout); 376 364 377 365 return 0; 378 366 }
+108 -15
drivers/misc/habanalabs/common/habanalabs_ioctl.c
··· 14 14 #include <linux/fs.h> 15 15 #include <linux/uaccess.h> 16 16 #include <linux/slab.h> 17 + #include <linux/vmalloc.h> 17 18 18 19 static u32 hl_debug_struct_size[HL_DEBUG_OP_TIMESTAMP + 1] = { 19 20 [HL_DEBUG_OP_ETR] = sizeof(struct hl_debug_params_etr), ··· 104 103 105 104 hw_ip.edma_enabled_mask = prop->edma_enabled_mask; 106 105 hw_ip.server_type = prop->server_type; 106 + hw_ip.security_enabled = prop->fw_security_enabled; 107 107 108 108 return copy_to_user(out, &hw_ip, 109 109 min((size_t) size, sizeof(hw_ip))) ? -EFAULT : 0; ··· 593 591 if ((!max_size) || (!out)) 594 592 return -EINVAL; 595 593 596 - info.seq = hdev->last_error.cs_timeout.seq; 597 - info.timestamp = ktime_to_ns(hdev->last_error.cs_timeout.timestamp); 594 + info.seq = hdev->captured_err_info.cs_timeout.seq; 595 + info.timestamp = ktime_to_ns(hdev->captured_err_info.cs_timeout.timestamp); 598 596 599 597 return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; 600 598 } ··· 609 607 if ((!max_size) || (!out)) 610 608 return -EINVAL; 611 609 612 - info.timestamp = ktime_to_ns(hdev->last_error.razwi.timestamp); 613 - info.addr = hdev->last_error.razwi.addr; 614 - info.engine_id_1 = hdev->last_error.razwi.engine_id_1; 615 - info.engine_id_2 = hdev->last_error.razwi.engine_id_2; 616 - info.no_engine_id = hdev->last_error.razwi.non_engine_initiator; 617 - info.error_type = hdev->last_error.razwi.type; 610 + info.timestamp = ktime_to_ns(hdev->captured_err_info.razwi.timestamp); 611 + info.addr = hdev->captured_err_info.razwi.addr; 612 + info.engine_id_1 = hdev->captured_err_info.razwi.engine_id_1; 613 + info.engine_id_2 = hdev->captured_err_info.razwi.engine_id_2; 614 + info.no_engine_id = hdev->captured_err_info.razwi.non_engine_initiator; 615 + info.error_type = hdev->captured_err_info.razwi.type; 618 616 619 617 return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; 620 618 } ··· 629 627 if ((!max_size) || (!out)) 630 628 return -EINVAL; 631 629 632 - info.timestamp = ktime_to_ns(hdev->last_error.undef_opcode.timestamp); 633 - info.engine_id = hdev->last_error.undef_opcode.engine_id; 634 - info.cq_addr = hdev->last_error.undef_opcode.cq_addr; 635 - info.cq_size = hdev->last_error.undef_opcode.cq_size; 636 - info.stream_id = hdev->last_error.undef_opcode.stream_id; 637 - info.cb_addr_streams_len = hdev->last_error.undef_opcode.cb_addr_streams_len; 638 - memcpy(info.cb_addr_streams, hdev->last_error.undef_opcode.cb_addr_streams, 630 + info.timestamp = ktime_to_ns(hdev->captured_err_info.undef_opcode.timestamp); 631 + info.engine_id = hdev->captured_err_info.undef_opcode.engine_id; 632 + info.cq_addr = hdev->captured_err_info.undef_opcode.cq_addr; 633 + info.cq_size = hdev->captured_err_info.undef_opcode.cq_size; 634 + info.stream_id = hdev->captured_err_info.undef_opcode.stream_id; 635 + info.cb_addr_streams_len = hdev->captured_err_info.undef_opcode.cb_addr_streams_len; 636 + memcpy(info.cb_addr_streams, hdev->captured_err_info.undef_opcode.cb_addr_streams, 639 637 sizeof(info.cb_addr_streams)); 640 638 641 639 return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; ··· 660 658 info.page_order_bitmask = hdev->asic_prop.dmmu.supported_pages_mask; 661 659 662 660 return copy_to_user(out, &info, min_t(size_t, max_size, sizeof(info))) ? -EFAULT : 0; 661 + } 662 + 663 + static int sec_attest_info(struct hl_fpriv *hpriv, struct hl_info_args *args) 664 + { 665 + void __user *out = (void __user *) (uintptr_t) args->return_pointer; 666 + struct cpucp_sec_attest_info *sec_attest_info; 667 + struct hl_info_sec_attest *info; 668 + u32 max_size = args->return_size; 669 + int rc; 670 + 671 + if ((!max_size) || (!out)) 672 + return -EINVAL; 673 + 674 + sec_attest_info = kmalloc(sizeof(*sec_attest_info), GFP_KERNEL); 675 + if (!sec_attest_info) 676 + return -ENOMEM; 677 + 678 + info = kmalloc(sizeof(*info), GFP_KERNEL); 679 + if (!info) { 680 + rc = -ENOMEM; 681 + goto free_sec_attest_info; 682 + } 683 + 684 + rc = hl_fw_get_sec_attest_info(hpriv->hdev, sec_attest_info, args->sec_attest_nonce); 685 + if (rc) 686 + goto free_info; 687 + 688 + info->nonce = le32_to_cpu(sec_attest_info->nonce); 689 + info->pcr_quote_len = le16_to_cpu(sec_attest_info->pcr_quote_len); 690 + info->pub_data_len = le16_to_cpu(sec_attest_info->pub_data_len); 691 + info->certificate_len = le16_to_cpu(sec_attest_info->certificate_len); 692 + info->pcr_num_reg = sec_attest_info->pcr_num_reg; 693 + info->pcr_reg_len = sec_attest_info->pcr_reg_len; 694 + info->quote_sig_len = sec_attest_info->quote_sig_len; 695 + memcpy(&info->pcr_data, &sec_attest_info->pcr_data, sizeof(info->pcr_data)); 696 + memcpy(&info->pcr_quote, &sec_attest_info->pcr_quote, sizeof(info->pcr_quote)); 697 + memcpy(&info->public_data, &sec_attest_info->public_data, sizeof(info->public_data)); 698 + memcpy(&info->certificate, &sec_attest_info->certificate, sizeof(info->certificate)); 699 + memcpy(&info->quote_sig, &sec_attest_info->quote_sig, sizeof(info->quote_sig)); 700 + 701 + rc = copy_to_user(out, info, 702 + min_t(size_t, max_size, sizeof(*info))) ? -EFAULT : 0; 703 + 704 + free_info: 705 + kfree(info); 706 + free_sec_attest_info: 707 + kfree(sec_attest_info); 708 + 709 + return rc; 663 710 } 664 711 665 712 static int eventfd_register(struct hl_fpriv *hpriv, struct hl_info_args *args) ··· 746 695 hpriv->notifier_event.eventfd = NULL; 747 696 mutex_unlock(&hpriv->notifier_event.lock); 748 697 return 0; 698 + } 699 + 700 + static int engine_status_info(struct hl_fpriv *hpriv, struct hl_info_args *args) 701 + { 702 + void __user *out = (void __user *) (uintptr_t) args->return_pointer; 703 + u32 status_buf_size = args->return_size; 704 + struct hl_device *hdev = hpriv->hdev; 705 + struct engines_data eng_data; 706 + int rc; 707 + 708 + if ((status_buf_size < SZ_1K) || (status_buf_size > HL_ENGINES_DATA_MAX_SIZE) || (!out)) 709 + return -EINVAL; 710 + 711 + eng_data.actual_size = 0; 712 + eng_data.allocated_buf_size = status_buf_size; 713 + eng_data.buf = vmalloc(status_buf_size); 714 + if (!eng_data.buf) 715 + return -ENOMEM; 716 + 717 + hdev->asic_funcs->is_device_idle(hdev, NULL, 0, &eng_data); 718 + 719 + if (eng_data.actual_size > eng_data.allocated_buf_size) { 720 + dev_err(hdev->dev, 721 + "Engines data size (%d Bytes) is bigger than allocated size (%u Bytes)\n", 722 + eng_data.actual_size, status_buf_size); 723 + vfree(eng_data.buf); 724 + return -ENOMEM; 725 + } 726 + 727 + args->user_buffer_actual_size = eng_data.actual_size; 728 + rc = copy_to_user(out, eng_data.buf, min_t(size_t, status_buf_size, eng_data.actual_size)) ? 729 + -EFAULT : 0; 730 + 731 + vfree(eng_data.buf); 732 + 733 + return rc; 749 734 } 750 735 751 736 static int _hl_info_ioctl(struct hl_fpriv *hpriv, void *data, ··· 893 806 case HL_INFO_DRAM_PENDING_ROWS: 894 807 return dram_pending_rows_info(hpriv, args); 895 808 809 + case HL_INFO_SECURED_ATTESTATION: 810 + return sec_attest_info(hpriv, args); 811 + 896 812 case HL_INFO_REGISTER_EVENTFD: 897 813 return eventfd_register(hpriv, args); 898 814 899 815 case HL_INFO_UNREGISTER_EVENTFD: 900 816 return eventfd_unregister(hpriv, args); 817 + 818 + case HL_INFO_ENGINE_STATUS: 819 + return engine_status_info(hpriv, args); 901 820 902 821 default: 903 822 dev_err(dev, "Invalid request %d\n", args->op);
+1 -3
drivers/misc/habanalabs/common/hw_queue.c
··· 826 826 827 827 q->kernel_address = p; 828 828 829 - q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, 830 - sizeof(*q->shadow_queue), 831 - GFP_KERNEL); 829 + q->shadow_queue = kmalloc_array(HL_QUEUE_LENGTH, sizeof(struct hl_cs_job *), GFP_KERNEL); 832 830 if (!q->shadow_queue) { 833 831 dev_err(hdev->dev, 834 832 "Failed to allocate shadow queue for H/W queue %d\n",
+23 -1
drivers/misc/habanalabs/common/hwmon.c
··· 194 194 curr_arr[sensors_by_type_next_index[type]++] = flags; 195 195 } 196 196 197 - channels_info = kcalloc(num_active_sensor_types + 1, sizeof(*channels_info), GFP_KERNEL); 197 + channels_info = kcalloc(num_active_sensor_types + 1, sizeof(struct hwmon_channel_info *), 198 + GFP_KERNEL); 198 199 if (!channels_info) { 199 200 rc = -ENOMEM; 200 201 goto channels_info_array_err; ··· 910 909 return; 911 910 912 911 hwmon_device_unregister(hdev->hwmon_dev); 912 + } 913 + 914 + void hl_hwmon_release_resources(struct hl_device *hdev) 915 + { 916 + const struct hwmon_channel_info **channel_info_arr; 917 + int i = 0; 918 + 919 + if (!hdev->hl_chip_info->info) 920 + return; 921 + 922 + channel_info_arr = hdev->hl_chip_info->info; 923 + 924 + while (channel_info_arr[i]) { 925 + kfree(channel_info_arr[i]->config); 926 + kfree(channel_info_arr[i]); 927 + i++; 928 + } 929 + 930 + kfree(channel_info_arr); 931 + 932 + hdev->hl_chip_info->info = NULL; 913 933 }
+34 -23
drivers/misc/habanalabs/common/memory.c
··· 457 457 prev = list_prev_entry(va_block, node); 458 458 if (&prev->node != va_list && prev->end + 1 == va_block->start) { 459 459 prev->end = va_block->end; 460 - prev->size = prev->end - prev->start; 460 + prev->size = prev->end - prev->start + 1; 461 461 list_del(&va_block->node); 462 462 kfree(va_block); 463 463 va_block = prev; ··· 466 466 next = list_next_entry(va_block, node); 467 467 if (&next->node != va_list && va_block->end + 1 == next->start) { 468 468 next->start = va_block->start; 469 - next->size = next->end - next->start; 469 + next->size = next->end - next->start + 1; 470 470 list_del(&va_block->node); 471 471 kfree(va_block); 472 472 } ··· 755 755 * - Return the start address of the virtual block. 756 756 */ 757 757 u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx, 758 - enum hl_va_range_type type, u32 size, u32 alignment) 758 + enum hl_va_range_type type, u64 size, u32 alignment) 759 759 { 760 760 return get_va_block(hdev, ctx->va_range[type], size, 0, 761 761 max(alignment, ctx->va_range[type]->page_size), ··· 1210 1210 goto va_block_err; 1211 1211 } 1212 1212 1213 - mutex_lock(&ctx->mmu_lock); 1213 + mutex_lock(&hdev->mmu_lock); 1214 1214 1215 1215 rc = map_phys_pg_pack(ctx, ret_vaddr, phys_pg_pack); 1216 1216 if (rc) { 1217 1217 dev_err(hdev->dev, "mapping page pack failed for handle %u\n", handle); 1218 - mutex_unlock(&ctx->mmu_lock); 1218 + mutex_unlock(&hdev->mmu_lock); 1219 1219 goto map_err; 1220 1220 } 1221 1221 1222 1222 rc = hl_mmu_invalidate_cache_range(hdev, false, *vm_type | MMU_OP_SKIP_LOW_CACHE_INV, 1223 1223 ctx->asid, ret_vaddr, phys_pg_pack->total_size); 1224 - mutex_unlock(&ctx->mmu_lock); 1224 + mutex_unlock(&hdev->mmu_lock); 1225 1225 if (rc) 1226 1226 goto map_err; 1227 1227 ··· 1362 1362 else 1363 1363 vaddr &= ~(((u64) phys_pg_pack->page_size) - 1); 1364 1364 1365 - mutex_lock(&ctx->mmu_lock); 1365 + mutex_lock(&hdev->mmu_lock); 1366 1366 1367 1367 unmap_phys_pg_pack(ctx, vaddr, phys_pg_pack); 1368 1368 ··· 1375 1375 rc = hl_mmu_invalidate_cache_range(hdev, true, *vm_type, ctx->asid, vaddr, 1376 1376 phys_pg_pack->total_size); 1377 1377 1378 - mutex_unlock(&ctx->mmu_lock); 1378 + mutex_unlock(&hdev->mmu_lock); 1379 1379 1380 1380 /* 1381 1381 * If the context is closing we don't need to check for the MMU cache ··· 1418 1418 return rc; 1419 1419 } 1420 1420 1421 - static int map_block(struct hl_device *hdev, u64 address, u64 *handle, 1422 - u32 *size) 1421 + static int map_block(struct hl_device *hdev, u64 address, u64 *handle, u32 *size) 1423 1422 { 1424 - u32 block_id = 0; 1423 + u32 block_id; 1425 1424 int rc; 1426 1425 1426 + *handle = 0; 1427 + if (size) 1428 + *size = 0; 1429 + 1427 1430 rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); 1431 + if (rc) 1432 + return rc; 1428 1433 1429 1434 *handle = block_id | HL_MMAP_TYPE_BLOCK; 1430 1435 *handle <<= PAGE_SHIFT; 1431 1436 1432 - return rc; 1437 + return 0; 1433 1438 } 1434 1439 1435 1440 static void hw_block_vm_close(struct vm_area_struct *vma) ··· 1442 1437 struct hl_vm_hw_block_list_node *lnode = 1443 1438 (struct hl_vm_hw_block_list_node *) vma->vm_private_data; 1444 1439 struct hl_ctx *ctx = lnode->ctx; 1440 + long new_mmap_size; 1441 + 1442 + new_mmap_size = lnode->mapped_size - (vma->vm_end - vma->vm_start); 1443 + if (new_mmap_size > 0) { 1444 + lnode->mapped_size = new_mmap_size; 1445 + return; 1446 + } 1445 1447 1446 1448 mutex_lock(&ctx->hw_block_list_lock); 1447 1449 list_del(&lnode->node); ··· 1499 1487 if (!lnode) 1500 1488 return -ENOMEM; 1501 1489 1502 - vma->vm_ops = &hw_block_vm_ops; 1503 - vma->vm_private_data = lnode; 1504 - 1505 - hl_ctx_get(ctx); 1506 - 1507 1490 rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size); 1508 1491 if (rc) { 1509 - hl_ctx_put(ctx); 1510 1492 kfree(lnode); 1511 1493 return rc; 1512 1494 } 1513 1495 1496 + hl_ctx_get(ctx); 1497 + 1514 1498 lnode->ctx = ctx; 1515 1499 lnode->vaddr = vma->vm_start; 1516 - lnode->size = block_size; 1500 + lnode->block_size = block_size; 1501 + lnode->mapped_size = lnode->block_size; 1517 1502 lnode->id = block_id; 1503 + 1504 + vma->vm_private_data = lnode; 1505 + vma->vm_ops = &hw_block_vm_ops; 1518 1506 1519 1507 mutex_lock(&ctx->hw_block_list_lock); 1520 1508 list_add_tail(&lnode->node, &ctx->hw_block_mem_list); ··· 2308 2296 return -EFAULT; 2309 2297 } 2310 2298 2311 - userptr->pages = kvmalloc_array(npages, sizeof(*userptr->pages), 2312 - GFP_KERNEL); 2299 + userptr->pages = kvmalloc_array(npages, sizeof(struct page *), GFP_KERNEL); 2313 2300 if (!userptr->pages) 2314 2301 return -ENOMEM; 2315 2302 ··· 2770 2759 unmap_device_va(ctx, &args, true); 2771 2760 } 2772 2761 2773 - mutex_lock(&ctx->mmu_lock); 2762 + mutex_lock(&hdev->mmu_lock); 2774 2763 2775 2764 /* invalidate the cache once after the unmapping loop */ 2776 2765 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 2777 2766 hl_mmu_invalidate_cache(hdev, true, MMU_OP_PHYS_PACK); 2778 2767 2779 - mutex_unlock(&ctx->mmu_lock); 2768 + mutex_unlock(&hdev->mmu_lock); 2780 2769 2781 2770 INIT_LIST_HEAD(&free_list); 2782 2771
+5 -5
drivers/misc/habanalabs/common/memory_mgr.c
··· 11 11 * hl_mmap_mem_buf_get - increase the buffer refcount and return a pointer to 12 12 * the buffer descriptor. 13 13 * 14 - * @mmg: parent unifed memory manager 14 + * @mmg: parent unified memory manager 15 15 * @handle: requested buffer handle 16 16 * 17 17 * Find the buffer in the store and return a pointer to its descriptor. ··· 104 104 * hl_mmap_mem_buf_put_handle - decrease the reference to the buffer with the 105 105 * given handle. 106 106 * 107 - * @mmg: parent unifed memory manager 107 + * @mmg: parent unified memory manager 108 108 * @handle: requested buffer handle 109 109 * 110 110 * Decrease the reference to the buffer, and release it if it was the last one. ··· 137 137 /** 138 138 * hl_mmap_mem_buf_alloc - allocate a new mappable buffer 139 139 * 140 - * @mmg: parent unifed memory manager 140 + * @mmg: parent unified memory manager 141 141 * @behavior: behavior object describing this buffer polymorphic behavior 142 142 * @gfp: gfp flags to use for the memory allocations 143 143 * @args: additional args passed to behavior->alloc ··· 222 222 /** 223 223 * hl_mem_mgr_mmap - map the given buffer to the user 224 224 * 225 - * @mmg: unifed memory manager 225 + * @mmg: unified memory manager 226 226 * @vma: the vma object for which mmap was closed. 227 227 * @args: additional args passed to behavior->mmap 228 228 * ··· 322 322 /** 323 323 * hl_mem_mgr_fini - release unified memory manager 324 324 * 325 - * @mmg: parent unifed memory manager 325 + * @mmg: parent unified memory manager 326 326 * 327 327 * Release the unified memory manager. Shall be called from an interrupt context. 328 328 */
+20 -11
drivers/misc/habanalabs/common/mmu/mmu.c
··· 9 9 10 10 #include "../habanalabs.h" 11 11 12 + #include <trace/events/habanalabs.h> 13 + 12 14 /** 13 15 * hl_mmu_get_funcs() - get MMU functions structure 14 16 * @hdev: habanalabs device structure. ··· 46 44 47 45 if (!hdev->mmu_enable) 48 46 return 0; 47 + 48 + mutex_init(&hdev->mmu_lock); 49 49 50 50 if (hdev->mmu_func[MMU_DR_PGT].init != NULL) { 51 51 rc = hdev->mmu_func[MMU_DR_PGT].init(hdev); ··· 90 86 91 87 if (hdev->mmu_func[MMU_HR_PGT].fini != NULL) 92 88 hdev->mmu_func[MMU_HR_PGT].fini(hdev); 89 + 90 + mutex_destroy(&hdev->mmu_lock); 93 91 } 94 92 95 93 /** ··· 109 103 110 104 if (!hdev->mmu_enable) 111 105 return 0; 112 - 113 - mutex_init(&ctx->mmu_lock); 114 106 115 107 if (hdev->mmu_func[MMU_DR_PGT].ctx_init != NULL) { 116 108 rc = hdev->mmu_func[MMU_DR_PGT].ctx_init(ctx); ··· 153 149 154 150 if (hdev->mmu_func[MMU_HR_PGT].ctx_fini != NULL) 155 151 hdev->mmu_func[MMU_HR_PGT].ctx_fini(ctx); 156 - 157 - mutex_destroy(&ctx->mmu_lock); 158 152 } 159 153 160 154 /* ··· 261 259 if (flush_pte) 262 260 mmu_funcs->flush(ctx); 263 261 262 + if (trace_habanalabs_mmu_unmap_enabled() && !rc) 263 + trace_habanalabs_mmu_unmap(hdev->dev, virt_addr, 0, page_size, flush_pte); 264 + 264 265 return rc; 265 266 } 266 267 ··· 349 344 if (flush_pte) 350 345 mmu_funcs->flush(ctx); 351 346 347 + trace_habanalabs_mmu_map(hdev->dev, virt_addr, phys_addr, page_size, flush_pte); 348 + 352 349 return 0; 353 350 354 351 err: ··· 410 403 dev_err(hdev->dev, 411 404 "Map failed for va 0x%llx to pa 0x%llx\n", 412 405 curr_va, curr_pa); 406 + /* last mapping failed so don't try to unmap it - reduce off by page_size */ 407 + off -= page_size; 413 408 goto unmap; 414 409 } 415 410 } ··· 609 600 pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT; 610 601 mmu_funcs = hl_mmu_get_funcs(hdev, pgt_residency, is_dram_addr); 611 602 612 - mutex_lock(&ctx->mmu_lock); 603 + mutex_lock(&hdev->mmu_lock); 613 604 rc = mmu_funcs->get_tlb_info(ctx, virt_addr, hops); 614 - mutex_unlock(&ctx->mmu_lock); 605 + mutex_unlock(&hdev->mmu_lock); 615 606 616 607 if (rc) 617 608 return rc; ··· 701 692 { 702 693 struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); 703 694 struct hl_ctx *ctx = pfw->ctx; 695 + struct hl_device *hdev = ctx->hdev; 704 696 705 - if (!hl_device_operational(ctx->hdev, NULL)) 697 + if (!hl_device_operational(hdev, NULL)) 706 698 goto put_ctx; 707 699 708 - mutex_lock(&ctx->mmu_lock); 700 + mutex_lock(&hdev->mmu_lock); 709 701 710 - ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, 711 - pfw->va, pfw->size); 702 + hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, pfw->va, pfw->size); 712 703 713 - mutex_unlock(&ctx->mmu_lock); 704 + mutex_unlock(&hdev->mmu_lock); 714 705 715 706 put_ctx: 716 707 /*
+10
drivers/misc/habanalabs/common/sysfs.c
··· 375 375 return max_size; 376 376 } 377 377 378 + static ssize_t security_enabled_show(struct device *dev, 379 + struct device_attribute *attr, char *buf) 380 + { 381 + struct hl_device *hdev = dev_get_drvdata(dev); 382 + 383 + return sprintf(buf, "%d\n", hdev->asic_prop.fw_security_enabled); 384 + } 385 + 378 386 static DEVICE_ATTR_RO(armcp_kernel_ver); 379 387 static DEVICE_ATTR_RO(armcp_ver); 380 388 static DEVICE_ATTR_RO(cpld_ver); ··· 401 393 static DEVICE_ATTR_RO(thermal_ver); 402 394 static DEVICE_ATTR_RO(uboot_ver); 403 395 static DEVICE_ATTR_RO(fw_os_ver); 396 + static DEVICE_ATTR_RO(security_enabled); 404 397 405 398 static struct bin_attribute bin_attr_eeprom = { 406 399 .attr = {.name = "eeprom", .mode = (0444)}, ··· 426 417 &dev_attr_thermal_ver.attr, 427 418 &dev_attr_uboot_ver.attr, 428 419 &dev_attr_fw_os_ver.attr, 420 + &dev_attr_security_enabled.attr, 429 421 NULL, 430 422 }; 431 423
+102 -83
drivers/misc/habanalabs/gaudi/gaudi.c
··· 899 899 */ 900 900 static int gaudi_fetch_psoc_frequency(struct hl_device *hdev) 901 901 { 902 - struct asic_fixed_properties *prop = &hdev->asic_prop; 903 902 u32 nr = 0, nf = 0, od = 0, div_fctr = 0, pll_clk, div_sel; 903 + struct asic_fixed_properties *prop = &hdev->asic_prop; 904 904 u16 pll_freq_arr[HL_PLL_NUM_OUTPUTS], freq; 905 905 int rc; 906 906 907 - if (hdev->asic_prop.fw_security_enabled) { 907 + if ((hdev->fw_components & FW_TYPE_LINUX) && 908 + (prop->fw_app_cpu_boot_dev_sts0 & CPU_BOOT_DEV_STS0_PLL_INFO_EN)) { 908 909 struct gaudi_device *gaudi = hdev->asic_specific; 909 910 910 911 if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q)) ··· 940 939 else 941 940 freq = pll_clk / (div_fctr + 1); 942 941 } else { 943 - dev_warn(hdev->dev, 944 - "Received invalid div select value: %d", 945 - div_sel); 942 + dev_warn(hdev->dev, "Received invalid div select value: %#x", div_sel); 946 943 freq = 0; 947 944 } 948 945 } ··· 984 985 init_tpc_mem_pkt->ctl = cpu_to_le32(ctl); 985 986 986 987 init_tpc_mem_pkt->src_addr = cpu_to_le64(tpc_kernel_src_addr); 987 - dst_addr = (prop->sram_user_base_address & 988 - GAUDI_PKT_LIN_DMA_DST_ADDR_MASK) >> 989 - GAUDI_PKT_LIN_DMA_DST_ADDR_SHIFT; 988 + 989 + /* TPC_CMD is configured with I$ prefetch enabled, so address should be aligned to 8KB */ 990 + dst_addr = FIELD_PREP(GAUDI_PKT_LIN_DMA_DST_ADDR_MASK, 991 + round_up(prop->sram_user_base_address, SZ_8K)); 990 992 init_tpc_mem_pkt->dst_addr |= cpu_to_le64(dst_addr); 991 993 992 994 job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true); ··· 1683 1683 1684 1684 static void gaudi_late_fini(struct hl_device *hdev) 1685 1685 { 1686 - const struct hwmon_channel_info **channel_info_arr; 1687 - int i = 0; 1688 - 1689 - if (!hdev->hl_chip_info->info) 1690 - return; 1691 - 1692 - channel_info_arr = hdev->hl_chip_info->info; 1693 - 1694 - while (channel_info_arr[i]) { 1695 - kfree(channel_info_arr[i]->config); 1696 - kfree(channel_info_arr[i]); 1697 - i++; 1698 - } 1699 - 1700 - kfree(channel_info_arr); 1701 - 1702 - hdev->hl_chip_info->info = NULL; 1686 + hl_hwmon_release_resources(hdev); 1703 1687 } 1704 1688 1705 1689 static int gaudi_alloc_cpu_accessible_dma_mem(struct hl_device *hdev) ··· 4707 4723 addr = prop->sram_user_base_address; 4708 4724 size = hdev->pldm ? 0x10000 : prop->sram_size - SRAM_USER_BASE_OFFSET; 4709 4725 4710 - dev_dbg(hdev->dev, "Scrubing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4726 + dev_dbg(hdev->dev, "Scrubbing SRAM: 0x%09llx - 0x%09llx val: 0x%llx\n", 4711 4727 addr, addr + size, val); 4712 4728 rc = gaudi_memset_device_memory(hdev, addr, size, val); 4713 4729 if (rc) { ··· 6895 6911 stream, cq_ptr, size); 6896 6912 6897 6913 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6898 - hdev->last_error.undef_opcode.cq_addr = cq_ptr; 6899 - hdev->last_error.undef_opcode.cq_size = size; 6900 - hdev->last_error.undef_opcode.stream_id = stream; 6914 + hdev->captured_err_info.undef_opcode.cq_addr = cq_ptr; 6915 + hdev->captured_err_info.undef_opcode.cq_size = size; 6916 + hdev->captured_err_info.undef_opcode.stream_id = stream; 6901 6917 } 6902 6918 } 6903 6919 ··· 6963 6979 } 6964 6980 6965 6981 if (event_mask & HL_NOTIFIER_EVENT_UNDEFINED_OPCODE) { 6966 - struct undefined_opcode_info *undef_opcode = &hdev->last_error.undef_opcode; 6982 + struct undefined_opcode_info *undef_opcode = &hdev->captured_err_info.undef_opcode; 6967 6983 u32 arr_idx = undef_opcode->cb_addr_streams_len; 6968 6984 6969 6985 if (arr_idx == 0) { ··· 7047 7063 } 7048 7064 /* check for undefined opcode */ 7049 7065 if (glbl_sts_val & TPC0_QM_GLBL_STS1_CP_UNDEF_CMD_ERR_MASK && 7050 - hdev->last_error.undef_opcode.write_enable) { 7051 - memset(&hdev->last_error.undef_opcode, 0, 7052 - sizeof(hdev->last_error.undef_opcode)); 7066 + hdev->captured_err_info.undef_opcode.write_enable) { 7067 + memset(&hdev->captured_err_info.undef_opcode, 0, 7068 + sizeof(hdev->captured_err_info.undef_opcode)); 7053 7069 7054 - hdev->last_error.undef_opcode.write_enable = false; 7070 + hdev->captured_err_info.undef_opcode.write_enable = false; 7055 7071 *event_mask |= HL_NOTIFIER_EVENT_UNDEFINED_OPCODE; 7056 7072 } 7057 7073 ··· 7217 7233 7218 7234 switch (event_type) { 7219 7235 case GAUDI_EVENT_TPC0_QM ... GAUDI_EVENT_TPC7_QM: 7220 - /* In TPC QM event, notify on TPC assertion. While there isn't 7221 - * a specific event for assertion yet, the FW generates QM event. 7222 - * The SW upper layer will inspect an internal mapped area to indicate 7223 - * if the event is a tpc assertion or tpc QM. 7224 - */ 7225 - *event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7226 7236 index = event_type - GAUDI_EVENT_TPC0_QM; 7227 7237 qid_base = GAUDI_QUEUE_ID_TPC_0_0 + index * QMAN_STREAMS; 7228 7238 qman_base = mmTPC0_QM_BASE + index * TPC_QMAN_OFFSET; ··· 7327 7349 gaudi_print_and_get_mmu_error_info(hdev, &razwi_addr, &razwi_type); 7328 7350 7329 7351 /* In case it's the first razwi, save its parameters*/ 7330 - rc = atomic_cmpxchg(&hdev->last_error.razwi.write_enable, 1, 0); 7352 + rc = atomic_cmpxchg(&hdev->captured_err_info.razwi.write_enable, 1, 0); 7331 7353 if (rc) { 7332 - hdev->last_error.razwi.timestamp = ktime_get(); 7333 - hdev->last_error.razwi.addr = razwi_addr; 7334 - hdev->last_error.razwi.engine_id_1 = engine_id_1; 7335 - hdev->last_error.razwi.engine_id_2 = engine_id_2; 7354 + hdev->captured_err_info.razwi.timestamp = ktime_get(); 7355 + hdev->captured_err_info.razwi.addr = razwi_addr; 7356 + hdev->captured_err_info.razwi.engine_id_1 = engine_id_1; 7357 + hdev->captured_err_info.razwi.engine_id_2 = engine_id_2; 7336 7358 /* 7337 7359 * If first engine id holds non valid value the razwi initiator 7338 7360 * does not have engine id 7339 7361 */ 7340 - hdev->last_error.razwi.non_engine_initiator = (engine_id_1 == U16_MAX); 7341 - hdev->last_error.razwi.type = razwi_type; 7362 + hdev->captured_err_info.razwi.non_engine_initiator = 7363 + (engine_id_1 == U16_MAX); 7364 + hdev->captured_err_info.razwi.type = razwi_type; 7342 7365 7343 7366 } 7344 7367 } ··· 7406 7427 event_type, desc); 7407 7428 } 7408 7429 7409 - static int gaudi_non_hard_reset_late_init(struct hl_device *hdev) 7430 + static int gaudi_compute_reset_late_init(struct hl_device *hdev) 7410 7431 { 7411 7432 /* GAUDI doesn't support any reset except hard-reset */ 7412 7433 return -EPERM; ··· 7681 7702 case GAUDI_EVENT_NIC0_CS_DBG_DERR ... GAUDI_EVENT_NIC4_CS_DBG_DERR: 7682 7703 gaudi_print_irq_info(hdev, event_type, true); 7683 7704 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7705 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7684 7706 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7685 7707 goto reset_device; 7686 7708 ··· 7691 7711 case GAUDI_EVENT_PLL0 ... GAUDI_EVENT_PLL17: 7692 7712 gaudi_print_irq_info(hdev, event_type, false); 7693 7713 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7714 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7694 7715 goto reset_device; 7695 7716 7696 7717 case GAUDI_EVENT_HBM0_SPI_0: ··· 7703 7722 gaudi_hbm_event_to_dev(event_type), 7704 7723 &eq_entry->hbm_ecc_data); 7705 7724 fw_fatal_err_flag = HL_DRV_RESET_FW_FATAL_ERR; 7725 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7706 7726 goto reset_device; 7707 7727 7708 7728 case GAUDI_EVENT_HBM0_SPI_1: ··· 7715 7733 gaudi_hbm_event_to_dev(event_type), 7716 7734 &eq_entry->hbm_ecc_data); 7717 7735 hl_fw_unmask_irq(hdev, event_type); 7736 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7718 7737 break; 7719 7738 7720 7739 case GAUDI_EVENT_TPC0_DEC: ··· 7726 7743 case GAUDI_EVENT_TPC5_DEC: 7727 7744 case GAUDI_EVENT_TPC6_DEC: 7728 7745 case GAUDI_EVENT_TPC7_DEC: 7746 + /* In TPC DEC event, notify on TPC assertion. While there isn't 7747 + * a specific event for assertion yet, the FW generates TPC DEC event. 7748 + * The SW upper layer will inspect an internal mapped area to indicate 7749 + * if the event is a TPC Assertion or a "real" TPC DEC. 7750 + */ 7751 + event_mask |= HL_NOTIFIER_EVENT_TPC_ASSERT; 7729 7752 gaudi_print_irq_info(hdev, event_type, true); 7730 7753 reset_required = gaudi_tpc_read_interrupts(hdev, 7731 7754 tpc_dec_event_to_tpc_id(event_type), 7732 7755 "AXI_SLV_DEC_Error"); 7756 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7733 7757 if (reset_required) { 7734 7758 dev_err(hdev->dev, "reset required due to %s\n", 7735 7759 gaudi_irq_map_table[event_type].name); ··· 7745 7755 goto reset_device; 7746 7756 } else { 7747 7757 hl_fw_unmask_irq(hdev, event_type); 7758 + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7748 7759 } 7749 7760 break; 7750 7761 ··· 7761 7770 reset_required = gaudi_tpc_read_interrupts(hdev, 7762 7771 tpc_krn_event_to_tpc_id(event_type), 7763 7772 "KRN_ERR"); 7773 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7764 7774 if (reset_required) { 7765 7775 dev_err(hdev->dev, "reset required due to %s\n", 7766 7776 gaudi_irq_map_table[event_type].name); ··· 7770 7778 goto reset_device; 7771 7779 } else { 7772 7780 hl_fw_unmask_irq(hdev, event_type); 7781 + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 7773 7782 } 7774 7783 break; 7775 7784 ··· 7799 7806 gaudi_print_irq_info(hdev, event_type, true); 7800 7807 gaudi_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 7801 7808 hl_fw_unmask_irq(hdev, event_type); 7809 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7802 7810 break; 7803 7811 7804 7812 case GAUDI_EVENT_PCIE_DEC: 7813 + case GAUDI_EVENT_CPU_AXI_SPLITTER: 7814 + case GAUDI_EVENT_PSOC_AXI_DEC: 7815 + case GAUDI_EVENT_PSOC_PRSTN_FALL: 7816 + gaudi_print_irq_info(hdev, event_type, true); 7817 + hl_fw_unmask_irq(hdev, event_type); 7818 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7819 + break; 7820 + 7821 + case GAUDI_EVENT_MMU_PAGE_FAULT: 7822 + case GAUDI_EVENT_MMU_WR_PERM: 7823 + gaudi_print_irq_info(hdev, event_type, true); 7824 + hl_fw_unmask_irq(hdev, event_type); 7825 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7826 + break; 7827 + 7805 7828 case GAUDI_EVENT_MME0_WBC_RSP: 7806 7829 case GAUDI_EVENT_MME0_SBAB0_RSP: 7807 7830 case GAUDI_EVENT_MME1_WBC_RSP: ··· 7826 7817 case GAUDI_EVENT_MME2_SBAB0_RSP: 7827 7818 case GAUDI_EVENT_MME3_WBC_RSP: 7828 7819 case GAUDI_EVENT_MME3_SBAB0_RSP: 7829 - case GAUDI_EVENT_CPU_AXI_SPLITTER: 7830 - case GAUDI_EVENT_PSOC_AXI_DEC: 7831 - case GAUDI_EVENT_PSOC_PRSTN_FALL: 7832 - case GAUDI_EVENT_MMU_PAGE_FAULT: 7833 - case GAUDI_EVENT_MMU_WR_PERM: 7834 7820 case GAUDI_EVENT_RAZWI_OR_ADC: 7835 7821 case GAUDI_EVENT_MME0_QM ... GAUDI_EVENT_MME2_QM: 7836 7822 case GAUDI_EVENT_DMA0_QM ... GAUDI_EVENT_DMA7_QM: ··· 7845 7841 gaudi_print_irq_info(hdev, event_type, true); 7846 7842 gaudi_handle_qman_err(hdev, event_type, &event_mask); 7847 7843 hl_fw_unmask_irq(hdev, event_type); 7844 + event_mask |= (HL_NOTIFIER_EVENT_USER_ENGINE_ERR | HL_NOTIFIER_EVENT_DEVICE_RESET); 7848 7845 break; 7849 7846 7850 7847 case GAUDI_EVENT_RAZWI_OR_ADC_SW: 7851 7848 gaudi_print_irq_info(hdev, event_type, true); 7849 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7852 7850 goto reset_device; 7853 7851 7854 7852 case GAUDI_EVENT_TPC0_BMON_SPMU: ··· 7864 7858 case GAUDI_EVENT_DMA_BM_CH0 ... GAUDI_EVENT_DMA_BM_CH7: 7865 7859 gaudi_print_irq_info(hdev, event_type, false); 7866 7860 hl_fw_unmask_irq(hdev, event_type); 7861 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7867 7862 break; 7868 7863 7869 7864 case GAUDI_EVENT_NIC_SEI_0 ... GAUDI_EVENT_NIC_SEI_4: 7870 7865 gaudi_print_nic_axi_irq_info(hdev, event_type, &data); 7871 7866 hl_fw_unmask_irq(hdev, event_type); 7867 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7872 7868 break; 7873 7869 7874 7870 case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3: ··· 7878 7870 gaudi_print_sm_sei_info(hdev, event_type, 7879 7871 &eq_entry->sm_sei_data); 7880 7872 rc = hl_state_dump(hdev); 7873 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7881 7874 if (rc) 7882 7875 dev_err(hdev->dev, 7883 7876 "Error during system state dump %d\n", rc); ··· 7889 7880 break; 7890 7881 7891 7882 case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E: 7883 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7892 7884 gaudi_print_clk_change_info(hdev, event_type); 7893 7885 hl_fw_unmask_irq(hdev, event_type); 7894 7886 break; ··· 7899 7889 dev_err(hdev->dev, 7900 7890 "Received high temp H/W interrupt %d (cause %d)\n", 7901 7891 event_type, cause); 7892 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 7902 7893 break; 7903 7894 7904 7895 case GAUDI_EVENT_DEV_RESET_REQ: 7905 7896 gaudi_print_irq_info(hdev, event_type, false); 7897 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7906 7898 goto reset_device; 7907 7899 7908 7900 case GAUDI_EVENT_PKT_QUEUE_OUT_SYNC: 7909 7901 gaudi_print_irq_info(hdev, event_type, false); 7910 7902 gaudi_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 7903 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7911 7904 goto reset_device; 7912 7905 7913 7906 case GAUDI_EVENT_FW_ALIVE_S: 7914 7907 gaudi_print_irq_info(hdev, event_type, false); 7915 7908 gaudi_print_fw_alive_info(hdev, &eq_entry->fw_alive); 7909 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 7916 7910 goto reset_device; 7917 7911 7918 7912 default: ··· 8080 8066 return 0; 8081 8067 } 8082 8068 8083 - static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, 8084 - u8 mask_len, struct seq_file *s) 8069 + static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 8070 + struct engines_data *e) 8085 8071 { 8086 8072 struct gaudi_device *gaudi = hdev->asic_specific; 8087 8073 const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n"; ··· 8093 8079 u64 offset; 8094 8080 int i, dma_id, port; 8095 8081 8096 - if (s) 8097 - seq_puts(s, 8082 + if (e) 8083 + hl_engine_data_sprintf(e, 8098 8084 "\nDMA is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 8099 8085 "--- ------- ------------ ---------- -------------\n"); 8100 8086 ··· 8111 8097 8112 8098 if (mask && !is_eng_idle) 8113 8099 set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask); 8114 - if (s) 8115 - seq_printf(s, fmt, dma_id, 8100 + if (e) 8101 + hl_engine_data_sprintf(e, fmt, dma_id, 8116 8102 is_eng_idle ? "Y" : "N", qm_glbl_sts0, 8117 8103 qm_cgm_sts, dma_core_sts0); 8118 8104 } 8119 8105 8120 - if (s) 8121 - seq_puts(s, 8106 + if (e) 8107 + hl_engine_data_sprintf(e, 8122 8108 "\nTPC is_idle QM_GLBL_STS0 QM_CGM_STS CFG_STATUS\n" 8123 8109 "--- ------- ------------ ---------- ----------\n"); 8124 8110 ··· 8133 8119 8134 8120 if (mask && !is_eng_idle) 8135 8121 set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask); 8136 - if (s) 8137 - seq_printf(s, fmt, i, 8122 + if (e) 8123 + hl_engine_data_sprintf(e, fmt, i, 8138 8124 is_eng_idle ? "Y" : "N", 8139 8125 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 8140 8126 } 8141 8127 8142 - if (s) 8143 - seq_puts(s, 8128 + if (e) 8129 + hl_engine_data_sprintf(e, 8144 8130 "\nMME is_idle QM_GLBL_STS0 QM_CGM_STS ARCH_STATUS\n" 8145 8131 "--- ------- ------------ ---------- -----------\n"); 8146 8132 ··· 8161 8147 8162 8148 if (mask && !is_eng_idle) 8163 8149 set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask); 8164 - if (s) { 8150 + if (e) { 8165 8151 if (!is_slave) 8166 - seq_printf(s, fmt, i, 8152 + hl_engine_data_sprintf(e, fmt, i, 8167 8153 is_eng_idle ? "Y" : "N", 8168 8154 qm_glbl_sts0, qm_cgm_sts, mme_arch_sts); 8169 8155 else 8170 - seq_printf(s, mme_slave_fmt, i, 8156 + hl_engine_data_sprintf(e, mme_slave_fmt, i, 8171 8157 is_eng_idle ? "Y" : "N", "-", 8172 8158 "-", mme_arch_sts); 8173 8159 } 8174 8160 } 8175 8161 8176 - if (s) 8177 - seq_puts(s, "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8162 + if (e) 8163 + hl_engine_data_sprintf(e, 8164 + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 8178 8165 "--- ------- ------------ ----------\n"); 8179 8166 8180 8167 for (i = 0 ; i < (NIC_NUMBER_OF_ENGINES / 2) ; i++) { ··· 8189 8174 8190 8175 if (mask && !is_eng_idle) 8191 8176 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8192 - if (s) 8193 - seq_printf(s, nic_fmt, port, 8177 + if (e) 8178 + hl_engine_data_sprintf(e, nic_fmt, port, 8194 8179 is_eng_idle ? "Y" : "N", 8195 8180 qm_glbl_sts0, qm_cgm_sts); 8196 8181 } ··· 8204 8189 8205 8190 if (mask && !is_eng_idle) 8206 8191 set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask); 8207 - if (s) 8208 - seq_printf(s, nic_fmt, port, 8192 + if (e) 8193 + hl_engine_data_sprintf(e, nic_fmt, port, 8209 8194 is_eng_idle ? "Y" : "N", 8210 8195 qm_glbl_sts0, qm_cgm_sts); 8211 8196 } 8212 8197 } 8213 8198 8214 - if (s) 8215 - seq_puts(s, "\n"); 8199 + if (e) 8200 + hl_engine_data_sprintf(e, "\n"); 8216 8201 8217 8202 return is_idle; 8218 8203 } ··· 8407 8392 goto destroy_internal_cb_pool; 8408 8393 } 8409 8394 8410 - mutex_lock(&ctx->mmu_lock); 8395 + mutex_lock(&hdev->mmu_lock); 8411 8396 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, 8412 8397 hdev->internal_cb_pool_dma_addr, 8413 8398 HOST_SPACE_INTERNAL_CB_SZ); 8414 8399 8415 8400 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 8416 - mutex_unlock(&ctx->mmu_lock); 8401 + mutex_unlock(&hdev->mmu_lock); 8417 8402 8418 8403 if (rc) 8419 8404 goto unreserve_internal_cb_pool; ··· 8440 8425 if (!(gaudi->hw_cap_initialized & HW_CAP_MMU)) 8441 8426 return; 8442 8427 8443 - mutex_lock(&ctx->mmu_lock); 8428 + mutex_lock(&hdev->mmu_lock); 8444 8429 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, 8445 8430 HOST_SPACE_INTERNAL_CB_SZ); 8446 8431 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, 8447 8432 HOST_SPACE_INTERNAL_CB_SZ); 8448 8433 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 8449 - mutex_unlock(&ctx->mmu_lock); 8434 + mutex_unlock(&hdev->mmu_lock); 8450 8435 8451 8436 gen_pool_destroy(hdev->internal_cb_pool); 8452 8437 ··· 9163 9148 dev_vrm_attr_grp->attrs = gaudi_vrm_dev_attrs; 9164 9149 } 9165 9150 9151 + static int gaudi_send_device_activity(struct hl_device *hdev, bool open) 9152 + { 9153 + return 0; 9154 + } 9155 + 9166 9156 static const struct hl_asic_funcs gaudi_funcs = { 9167 9157 .early_init = gaudi_early_init, 9168 9158 .early_fini = gaudi_early_fini, ··· 9212 9192 .send_heartbeat = gaudi_send_heartbeat, 9213 9193 .debug_coresight = gaudi_debug_coresight, 9214 9194 .is_device_idle = gaudi_is_device_idle, 9215 - .non_hard_reset_late_init = gaudi_non_hard_reset_late_init, 9195 + .compute_reset_late_init = gaudi_compute_reset_late_init, 9216 9196 .hw_queues_lock = gaudi_hw_queues_lock, 9217 9197 .hw_queues_unlock = gaudi_hw_queues_unlock, 9218 - .kdma_lock = NULL, 9219 - .kdma_unlock = NULL, 9220 9198 .get_pci_id = gaudi_get_pci_id, 9221 9199 .get_eeprom_data = gaudi_get_eeprom_data, 9222 9200 .get_monitor_dump = gaudi_get_monitor_dump, ··· 9260 9242 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 9261 9243 .access_dev_mem = hl_access_dev_mem, 9262 9244 .set_dram_bar_base = gaudi_set_hbm_bar_base, 9245 + .send_device_activity = gaudi_send_device_activity, 9263 9246 }; 9264 9247 9265 9248 /**
+469 -202
drivers/misc/habanalabs/gaudi2/gaudi2.c
··· 21 21 22 22 #define GAUDI2_DMA_POOL_BLK_SIZE SZ_256 /* 256 bytes */ 23 23 24 - #define GAUDI2_RESET_TIMEOUT_MSEC 500 /* 500ms */ 24 + #define GAUDI2_RESET_TIMEOUT_MSEC 2000 /* 2000ms */ 25 25 #define GAUDI2_RESET_POLL_TIMEOUT_USEC 50000 /* 50ms */ 26 26 #define GAUDI2_PLDM_HRESET_TIMEOUT_MSEC 25000 /* 25s */ 27 27 #define GAUDI2_PLDM_SRESET_TIMEOUT_MSEC 25000 /* 25s */ ··· 116 116 #define MMU_RANGE_INV_EN_SHIFT 0 117 117 #define MMU_RANGE_INV_ASID_EN_SHIFT 1 118 118 #define MMU_RANGE_INV_ASID_SHIFT 2 119 + 120 + /* The last SPI_SEI cause bit, "burst_fifo_full", is expected to be triggered in PMMU because it has 121 + * a 2 entries FIFO, and hence it is not enabled for it. 122 + */ 123 + #define GAUDI2_PMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 2, 0) 124 + #define GAUDI2_HMMU_SPI_SEI_ENABLE_MASK GENMASK(GAUDI2_NUM_OF_MMU_SPI_SEI_CAUSE - 1, 0) 119 125 120 126 #define GAUDI2_MAX_STRING_LEN 64 121 127 ··· 616 610 "qman_axi_err", 617 611 "wap sei (wbc axi err)", 618 612 "arc sei", 619 - "mme_cfg_unalign_addr", 613 + "cfg access error", 620 614 "qm_sw_err", 621 615 "sbte_dbg_intr_0", 622 616 "sbte_dbg_intr_1", ··· 1531 1525 RTR_ID_X_Y(17, 11) 1532 1526 }; 1533 1527 1528 + enum rtr_id { 1529 + DCORE0_RTR0, 1530 + DCORE0_RTR1, 1531 + DCORE0_RTR2, 1532 + DCORE0_RTR3, 1533 + DCORE0_RTR4, 1534 + DCORE0_RTR5, 1535 + DCORE0_RTR6, 1536 + DCORE0_RTR7, 1537 + DCORE1_RTR0, 1538 + DCORE1_RTR1, 1539 + DCORE1_RTR2, 1540 + DCORE1_RTR3, 1541 + DCORE1_RTR4, 1542 + DCORE1_RTR5, 1543 + DCORE1_RTR6, 1544 + DCORE1_RTR7, 1545 + DCORE2_RTR0, 1546 + DCORE2_RTR1, 1547 + DCORE2_RTR2, 1548 + DCORE2_RTR3, 1549 + DCORE2_RTR4, 1550 + DCORE2_RTR5, 1551 + DCORE2_RTR6, 1552 + DCORE2_RTR7, 1553 + DCORE3_RTR0, 1554 + DCORE3_RTR1, 1555 + DCORE3_RTR2, 1556 + DCORE3_RTR3, 1557 + DCORE3_RTR4, 1558 + DCORE3_RTR5, 1559 + DCORE3_RTR6, 1560 + DCORE3_RTR7, 1561 + }; 1562 + 1534 1563 static const u32 gaudi2_tpc_initiator_rtr_id[NUM_OF_TPC_PER_DCORE * NUM_OF_DCORES + 1] = { 1535 - 1, 1, 2, 2, 3, 3, 14, 14, 13, 13, 12, 12, 19, 19, 18, 18, 17, 1536 - 17, 28, 28, 29, 29, 30, 30, 0 1564 + DCORE0_RTR1, DCORE0_RTR1, DCORE0_RTR2, DCORE0_RTR2, DCORE0_RTR3, DCORE0_RTR3, 1565 + DCORE1_RTR6, DCORE1_RTR6, DCORE1_RTR5, DCORE1_RTR5, DCORE1_RTR4, DCORE1_RTR4, 1566 + DCORE2_RTR3, DCORE2_RTR3, DCORE2_RTR2, DCORE2_RTR2, DCORE2_RTR1, DCORE2_RTR1, 1567 + DCORE3_RTR4, DCORE3_RTR4, DCORE3_RTR5, DCORE3_RTR5, DCORE3_RTR6, DCORE3_RTR6, 1568 + DCORE0_RTR0 1537 1569 }; 1538 1570 1539 1571 static const u32 gaudi2_dec_initiator_rtr_id[NUMBER_OF_DEC] = { 1540 - 0, 0, 15, 15, 16, 16, 31, 31, 0, 0 1572 + DCORE0_RTR0, DCORE0_RTR0, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, DCORE2_RTR0, 1573 + DCORE3_RTR7, DCORE3_RTR7, DCORE0_RTR0, DCORE0_RTR0 1541 1574 }; 1542 1575 1543 1576 static const u32 gaudi2_nic_initiator_rtr_id[NIC_NUMBER_OF_MACROS] = { 1544 - 15, 15, 15, 15, 15, 16, 16, 16, 16, 31, 31, 31 1577 + DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE1_RTR7, DCORE2_RTR0, 1578 + DCORE2_RTR0, DCORE2_RTR0, DCORE2_RTR0, DCORE3_RTR7, DCORE3_RTR7, DCORE3_RTR7 1545 1579 }; 1546 1580 1547 1581 struct sft_info { ··· 1594 1548 }; 1595 1549 1596 1550 static const u32 gaudi2_pdma_initiator_rtr_id[NUM_OF_PDMA] = { 1597 - 0, 0 1551 + DCORE0_RTR0, DCORE0_RTR0 1598 1552 }; 1599 1553 1600 1554 static const u32 gaudi2_rot_initiator_rtr_id[NUM_OF_ROT] = { 1601 - 16, 31 1555 + DCORE2_RTR0, DCORE3_RTR7 1602 1556 }; 1603 1557 1604 1558 struct mme_initiators_rtr_id { ··· 1709 1663 }; 1710 1664 1711 1665 struct gaudi2_tpc_idle_data { 1712 - struct seq_file *s; 1666 + struct engines_data *e; 1713 1667 unsigned long *mask; 1714 1668 bool *is_idle; 1715 1669 const char *tpc_fmt; ··· 1752 1706 int dcore, inst, tpc_seq; 1753 1707 u32 offset; 1754 1708 1709 + /* init the return code */ 1710 + ctx->rc = 0; 1711 + 1755 1712 for (dcore = 0; dcore < NUM_OF_DCORES; dcore++) { 1756 1713 for (inst = 0; inst < NUM_OF_TPC_PER_DCORE; inst++) { 1757 1714 tpc_seq = dcore * NUM_OF_TPC_PER_DCORE + inst; ··· 1764 1715 1765 1716 offset = (DCORE_OFFSET * dcore) + (DCORE_TPC_OFFSET * inst); 1766 1717 1767 - ctx->fn(hdev, dcore, inst, offset, ctx->data); 1718 + ctx->fn(hdev, dcore, inst, offset, ctx); 1719 + if (ctx->rc) { 1720 + dev_err(hdev->dev, "TPC iterator failed for DCORE%d TPC%d\n", 1721 + dcore, inst); 1722 + return; 1723 + } 1768 1724 } 1769 1725 } 1770 1726 ··· 1778 1724 1779 1725 /* special check for PCI TPC (DCORE0_TPC6) */ 1780 1726 offset = DCORE_TPC_OFFSET * (NUM_DCORE0_TPC - 1); 1781 - ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx->data); 1727 + ctx->fn(hdev, 0, NUM_DCORE0_TPC - 1, offset, ctx); 1728 + if (ctx->rc) 1729 + dev_err(hdev->dev, "TPC iterator failed for DCORE0 TPC6\n"); 1782 1730 } 1783 1731 1784 1732 static bool gaudi2_host_phys_addr_valid(u64 addr) ··· 2029 1973 prop->pmmu_huge.end_addr = VA_HOST_SPACE_HPAGE_END; 2030 1974 } 2031 1975 1976 + prop->num_engine_cores = CPU_ID_MAX; 2032 1977 prop->cfg_size = CFG_SIZE; 2033 1978 prop->max_asid = MAX_ASID; 2034 1979 prop->num_of_events = GAUDI2_EVENT_SIZE; ··· 2061 2004 prop->gic_interrupts_enable = true; 2062 2005 2063 2006 prop->server_type = HL_SERVER_TYPE_UNKNOWN; 2064 - 2065 - prop->cb_va_start_addr = VA_HOST_SPACE_USER_MAPPED_CB_START; 2066 - prop->cb_va_end_addr = VA_HOST_SPACE_USER_MAPPED_CB_END; 2067 2007 2068 2008 prop->max_dec = NUMBER_OF_DEC; 2069 2009 ··· 2531 2477 struct asic_fixed_properties *prop = &hdev->asic_prop; 2532 2478 struct pci_dev *pdev = hdev->pdev; 2533 2479 resource_size_t pci_bar_size; 2534 - u32 fw_boot_status; 2535 2480 int rc; 2536 2481 2537 2482 rc = gaudi2_set_fixed_properties(hdev); ··· 2558 2505 prop->dram_pci_bar_size = pci_resource_len(pdev, DRAM_BAR_ID); 2559 2506 hdev->dram_pci_bar_start = pci_resource_start(pdev, DRAM_BAR_ID); 2560 2507 2561 - /* If FW security is enabled at this point it means no access to ELBI */ 2562 - if (hdev->asic_prop.fw_security_enabled) { 2563 - hdev->asic_prop.iatu_done_by_fw = true; 2564 - goto pci_init; 2565 - } 2566 - 2567 - rc = hl_pci_elbi_read(hdev, CFG_BASE + mmCPU_BOOT_DEV_STS0, &fw_boot_status); 2568 - if (rc) 2569 - goto free_queue_props; 2570 - 2571 - /* Check whether FW is configuring iATU */ 2572 - if ((fw_boot_status & CPU_BOOT_DEV_STS0_ENABLED) && 2573 - (fw_boot_status & CPU_BOOT_DEV_STS0_FW_IATU_CONF_EN)) 2508 + /* 2509 + * Only in pldm driver config iATU 2510 + */ 2511 + if (hdev->pldm) 2512 + hdev->asic_prop.iatu_done_by_fw = false; 2513 + else 2574 2514 hdev->asic_prop.iatu_done_by_fw = true; 2575 2515 2576 - pci_init: 2577 2516 rc = hl_pci_init(hdev); 2578 2517 if (rc) 2579 2518 goto free_queue_props; ··· 2721 2676 struct gaudi2_device *gaudi2 = hdev->asic_specific; 2722 2677 int rc; 2723 2678 2679 + hdev->asic_prop.supports_advanced_cpucp_rc = true; 2680 + 2724 2681 rc = hl_fw_send_pci_access_msg(hdev, CPUCP_PACKET_ENABLE_PCI_ACCESS, 2725 2682 gaudi2->virt_msix_db_dma_addr); 2726 2683 if (rc) { ··· 2750 2703 2751 2704 static void gaudi2_late_fini(struct hl_device *hdev) 2752 2705 { 2753 - const struct hwmon_channel_info **channel_info_arr; 2754 - int i = 0; 2755 - 2756 - if (!hdev->hl_chip_info->info) 2757 - return; 2758 - 2759 - channel_info_arr = hdev->hl_chip_info->info; 2760 - 2761 - while (channel_info_arr[i]) { 2762 - kfree(channel_info_arr[i]->config); 2763 - kfree(channel_info_arr[i]); 2764 - i++; 2765 - } 2766 - 2767 - kfree(channel_info_arr); 2768 - 2769 - hdev->hl_chip_info->info = NULL; 2706 + hl_hwmon_release_resources(hdev); 2770 2707 } 2771 2708 2772 2709 static void gaudi2_user_mapped_dec_init(struct gaudi2_device *gaudi2, u32 start_idx) ··· 3025 2994 } 3026 2995 3027 2996 spin_lock_init(&gaudi2->hw_queues_lock); 3028 - spin_lock_init(&gaudi2->kdma_lock); 3029 2997 3030 2998 gaudi2->scratchpad_kernel_address = hl_asic_dma_alloc_coherent(hdev, PAGE_SIZE, 3031 2999 &gaudi2->scratchpad_bus_address, ··· 3581 3551 rc = gaudi2_dec_enable_msix(hdev); 3582 3552 if (rc) { 3583 3553 dev_err(hdev->dev, "Failed to enable decoder IRQ"); 3584 - goto free_completion_irq; 3554 + goto free_event_irq; 3585 3555 } 3586 3556 3587 3557 for (i = GAUDI2_IRQ_NUM_USER_FIRST, j = prop->user_dec_intr_count, user_irq_init_cnt = 0; ··· 3611 3581 } 3612 3582 3613 3583 gaudi2_dec_disable_msix(hdev, GAUDI2_IRQ_NUM_SHARED_DEC1_ABNRM + 1); 3584 + 3585 + free_event_irq: 3586 + irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_EVENT_QUEUE); 3587 + free_irq(irq, cq); 3614 3588 3615 3589 free_completion_irq: 3616 3590 irq = pci_irq_vector(hdev->pdev, GAUDI2_IRQ_NUM_COMPLETION); ··· 3779 3745 gaudi2_stop_pcie_dec(hdev); 3780 3746 } 3781 3747 3782 - static void gaudi2_halt_arc(struct hl_device *hdev, u32 cpu_id) 3748 + static void gaudi2_set_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3783 3749 { 3784 3750 u32 reg_base, reg_val; 3785 3751 3786 3752 reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3753 + if (run_mode == HL_ENGINE_CORE_RUN) 3754 + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 1); 3755 + else 3756 + reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3787 3757 3788 - /* Halt ARC */ 3789 - reg_val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_HALT_REQ_MASK, 1); 3790 3758 WREG32(reg_base + ARC_HALT_REQ_OFFSET, reg_val); 3791 3759 } 3792 3760 ··· 3798 3762 3799 3763 for (arc_id = CPU_ID_SCHED_ARC0; arc_id < CPU_ID_MAX; arc_id++) { 3800 3764 if (gaudi2_is_arc_enabled(hdev, arc_id)) 3801 - gaudi2_halt_arc(hdev, arc_id); 3765 + gaudi2_set_arc_running_mode(hdev, arc_id, HL_ENGINE_CORE_HALT); 3802 3766 } 3767 + } 3768 + 3769 + static int gaudi2_verify_arc_running_mode(struct hl_device *hdev, u32 cpu_id, u32 run_mode) 3770 + { 3771 + int rc; 3772 + u32 reg_base, val, ack_mask, timeout_usec = 100000; 3773 + 3774 + if (hdev->pldm) 3775 + timeout_usec *= 100; 3776 + 3777 + reg_base = gaudi2_arc_blocks_bases[cpu_id]; 3778 + if (run_mode == HL_ENGINE_CORE_RUN) 3779 + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_RUN_ACK_MASK; 3780 + else 3781 + ack_mask = ARC_FARM_ARC0_AUX_RUN_HALT_ACK_HALT_ACK_MASK; 3782 + 3783 + rc = hl_poll_timeout(hdev, reg_base + ARC_HALT_ACK_OFFSET, 3784 + val, ((val & ack_mask) == ack_mask), 3785 + 1000, timeout_usec); 3786 + 3787 + if (!rc) { 3788 + /* Clear */ 3789 + val = FIELD_PREP(ARC_FARM_ARC0_AUX_RUN_HALT_REQ_RUN_REQ_MASK, 0); 3790 + WREG32(reg_base + ARC_HALT_REQ_OFFSET, val); 3791 + } 3792 + 3793 + return rc; 3803 3794 } 3804 3795 3805 3796 static void gaudi2_reset_arcs(struct hl_device *hdev) ··· 3853 3790 3854 3791 queue_id = GAUDI2_QUEUE_ID_NIC_0_0; 3855 3792 3856 - for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) 3793 + for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++, queue_id += NUM_OF_PQ_PER_QMAN) { 3794 + if (!(hdev->nic_ports_mask & BIT(i))) 3795 + continue; 3796 + 3857 3797 gaudi2_qman_manual_flush_common(hdev, queue_id); 3798 + } 3799 + } 3800 + 3801 + static int gaudi2_set_engine_cores(struct hl_device *hdev, u32 *core_ids, 3802 + u32 num_cores, u32 core_command) 3803 + { 3804 + int i, rc; 3805 + 3806 + 3807 + for (i = 0 ; i < num_cores ; i++) { 3808 + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) 3809 + gaudi2_set_arc_running_mode(hdev, core_ids[i], core_command); 3810 + } 3811 + 3812 + for (i = 0 ; i < num_cores ; i++) { 3813 + if (gaudi2_is_arc_enabled(hdev, core_ids[i])) { 3814 + rc = gaudi2_verify_arc_running_mode(hdev, core_ids[i], core_command); 3815 + 3816 + if (rc) { 3817 + dev_err(hdev->dev, "failed to %s arc: %d\n", 3818 + (core_command == HL_ENGINE_CORE_HALT) ? 3819 + "HALT" : "RUN", core_ids[i]); 3820 + return -1; 3821 + } 3822 + } 3823 + } 3824 + 3825 + return 0; 3858 3826 } 3859 3827 3860 3828 static void gaudi2_halt_engines(struct hl_device *hdev, bool hard_reset, bool fw_reset) ··· 4218 4124 WREG32(reg_base + QM_GLBL_CFG2_OFFSET, 0); 4219 4125 4220 4126 /* Enable the QMAN channel. 4221 - * PDMA1 QMAN configuration is different, as we do not allow user to 4222 - * access CP2/3, it is reserved for the ARC usage. 4127 + * PDMA QMAN configuration is different, as we do not allow user to 4128 + * access some of the CPs. 4129 + * PDMA0: CP2/3 are reserved for the ARC usage. 4130 + * PDMA1: CP1/2/3 are reserved for the ARC usage. 4223 4131 */ 4224 4132 if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_1_0]) 4225 4133 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA1_QMAN_ENABLE); 4134 + else if (reg_base == gaudi2_qm_blocks_bases[GAUDI2_QUEUE_ID_PDMA_0_0]) 4135 + WREG32(reg_base + QM_GLBL_CFG0_OFFSET, PDMA0_QMAN_ENABLE); 4226 4136 else 4227 4137 WREG32(reg_base + QM_GLBL_CFG0_OFFSET, QMAN_ENABLE); 4228 4138 } ··· 4599 4501 }; 4600 4502 4601 4503 static void gaudi2_init_tpc_config(struct hl_device *hdev, int dcore, int inst, 4602 - u32 offset, void *data) 4504 + u32 offset, struct iterate_module_ctx *ctx) 4603 4505 { 4604 4506 struct gaudi2_device *gaudi2 = hdev->asic_specific; 4605 - struct gaudi2_tpc_init_cfg_data *cfg_data = data; 4507 + struct gaudi2_tpc_init_cfg_data *cfg_data = ctx->data; 4606 4508 u32 queue_id_base; 4607 4509 u8 seq; 4608 4510 ··· 5054 4956 return 0; 5055 4957 } 5056 4958 5057 - static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, 5058 - u32 stlb_base) 4959 + static int gaudi2_mmu_init_common(struct hl_device *hdev, u32 mmu_base, u32 stlb_base) 5059 4960 { 5060 4961 u32 status, timeout_usec; 5061 4962 int rc; ··· 5082 4985 return rc; 5083 4986 5084 4987 WREG32(mmu_base + MMU_BYPASS_OFFSET, 0); 5085 - WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, 0xF); 5086 4988 5087 4989 rc = hl_poll_timeout( 5088 4990 hdev, ··· 5138 5042 DCORE0_HMMU0_MMU_STATIC_MULTI_PAGE_SIZE_CFG_8_BITS_HOP_MODE_EN_MASK); 5139 5043 } 5140 5044 5045 + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_PMMU_SPI_SEI_ENABLE_MASK); 5046 + 5141 5047 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5142 5048 if (rc) 5143 5049 return rc; ··· 5189 5091 5190 5092 RMWREG32(stlb_base + STLB_HOP_CONFIGURATION_OFFSET, 1, 5191 5093 STLB_HOP_CONFIGURATION_ONLY_LARGE_PAGE_MASK); 5094 + 5095 + WREG32(mmu_base + MMU_SPI_SEI_MASK_OFFSET, GAUDI2_HMMU_SPI_SEI_ENABLE_MASK); 5192 5096 5193 5097 rc = gaudi2_mmu_init_common(hdev, mmu_base, stlb_base); 5194 5098 if (rc) ··· 5439 5339 5440 5340 if (!driver_performs_reset) { 5441 5341 /* set SP to indicate reset request sent to FW */ 5442 - WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5342 + if (dyn_regs->cpu_rst_status) 5343 + WREG32(le32_to_cpu(dyn_regs->cpu_rst_status), CPU_RST_STATUS_NA); 5344 + else 5345 + WREG32(mmCPU_RST_STATUS_TO_HOST, CPU_RST_STATUS_NA); 5443 5346 5444 5347 WREG32(le32_to_cpu(dyn_regs->gic_host_soft_rst_irq), 5445 5348 gaudi2_irq_map_table[GAUDI2_EVENT_CPU_SOFT_RESET].cpu_id); ··· 5630 5527 u64 hw_test_cap_bit = 0; 5631 5528 5632 5529 switch (hw_queue_id) { 5633 - case GAUDI2_QUEUE_ID_PDMA_0_0 ... GAUDI2_QUEUE_ID_PDMA_1_1: 5530 + case GAUDI2_QUEUE_ID_PDMA_0_0: 5531 + case GAUDI2_QUEUE_ID_PDMA_0_1: 5532 + case GAUDI2_QUEUE_ID_PDMA_1_0: 5634 5533 hw_cap_mask = HW_CAP_PDMA_MASK; 5635 5534 break; 5636 - 5637 5535 case GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0...GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3: 5638 5536 hw_test_cap_bit = HW_CAP_EDMA_SHIFT + 5639 5537 ((hw_queue_id - GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0) >> 2); ··· 6233 6129 return ret_val; 6234 6130 } 6235 6131 6236 - static int gaudi2_non_hard_reset_late_init(struct hl_device *hdev) 6132 + static int gaudi2_compute_reset_late_init(struct hl_device *hdev) 6237 6133 { 6238 6134 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6239 6135 size_t irq_arr_size; ··· 6251 6147 } 6252 6148 6253 6149 static void gaudi2_is_tpc_engine_idle(struct hl_device *hdev, int dcore, int inst, u32 offset, 6254 - void *data) 6150 + struct iterate_module_ctx *ctx) 6255 6151 { 6256 - struct gaudi2_tpc_idle_data *idle_data = (struct gaudi2_tpc_idle_data *)data; 6152 + struct gaudi2_tpc_idle_data *idle_data = ctx->data; 6257 6153 u32 tpc_cfg_sts, qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts; 6258 6154 bool is_eng_idle; 6259 6155 int engine_idx; ··· 6276 6172 if (idle_data->mask && !is_eng_idle) 6277 6173 set_bit(engine_idx, idle_data->mask); 6278 6174 6279 - if (idle_data->s) 6280 - seq_printf(idle_data->s, idle_data->tpc_fmt, dcore, inst, 6175 + if (idle_data->e) 6176 + hl_engine_data_sprintf(idle_data->e, 6177 + idle_data->tpc_fmt, dcore, inst, 6281 6178 is_eng_idle ? "Y" : "N", 6282 6179 qm_glbl_sts0, qm_cgm_sts, tpc_cfg_sts); 6283 6180 } 6284 6181 6285 - static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, 6286 - u8 mask_len, struct seq_file *s) 6182 + static bool gaudi2_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 6183 + struct engines_data *e) 6287 6184 { 6288 6185 u32 qm_glbl_sts0, qm_glbl_sts1, qm_cgm_sts, dma_core_idle_ind_mask, 6289 6186 mme_arch_sts, dec_swreg15, dec_enabled_bit; ··· 6302 6197 6303 6198 struct gaudi2_tpc_idle_data tpc_idle_data = { 6304 6199 .tpc_fmt = "%-6d%-5d%-9s%#-14x%#-12x%#x\n", 6305 - .s = s, 6200 + .e = e, 6306 6201 .mask = mask, 6307 6202 .is_idle = &is_idle, 6308 6203 }; ··· 6314 6209 int engine_idx, i, j; 6315 6210 6316 6211 /* EDMA, Two engines per Dcore */ 6317 - if (s) 6318 - seq_puts(s, 6212 + if (e) 6213 + hl_engine_data_sprintf(e, 6319 6214 "\nCORE EDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6320 6215 "---- ---- ------- ------------ ----------------------\n"); 6321 6216 ··· 6344 6239 if (mask && !is_eng_idle) 6345 6240 set_bit(engine_idx, mask); 6346 6241 6347 - if (s) 6348 - seq_printf(s, edma_fmt, i, j, 6349 - is_eng_idle ? "Y" : "N", 6350 - qm_glbl_sts0, 6351 - dma_core_idle_ind_mask); 6242 + if (e) 6243 + hl_engine_data_sprintf(e, edma_fmt, i, j, 6244 + is_eng_idle ? "Y" : "N", 6245 + qm_glbl_sts0, 6246 + dma_core_idle_ind_mask); 6352 6247 } 6353 6248 } 6354 6249 6355 6250 /* PDMA, Two engines in Full chip */ 6356 - if (s) 6357 - seq_puts(s, 6358 - "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6359 - "---- ------- ------------ ----------------------\n"); 6251 + if (e) 6252 + hl_engine_data_sprintf(e, 6253 + "\nPDMA is_idle QM_GLBL_STS0 DMA_CORE_IDLE_IND_MASK\n" 6254 + "---- ------- ------------ ----------------------\n"); 6360 6255 6361 6256 for (i = 0 ; i < NUM_OF_PDMA ; i++) { 6362 6257 engine_idx = GAUDI2_ENGINE_ID_PDMA_0 + i; ··· 6374 6269 if (mask && !is_eng_idle) 6375 6270 set_bit(engine_idx, mask); 6376 6271 6377 - if (s) 6378 - seq_printf(s, pdma_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, 6379 - dma_core_idle_ind_mask); 6272 + if (e) 6273 + hl_engine_data_sprintf(e, pdma_fmt, i, is_eng_idle ? "Y" : "N", 6274 + qm_glbl_sts0, dma_core_idle_ind_mask); 6380 6275 } 6381 6276 6382 6277 /* NIC, twelve macros in Full chip */ 6383 - if (s && hdev->nic_ports_mask) 6384 - seq_puts(s, 6385 - "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6386 - "--- ------- ------------ ----------\n"); 6278 + if (e && hdev->nic_ports_mask) 6279 + hl_engine_data_sprintf(e, 6280 + "\nNIC is_idle QM_GLBL_STS0 QM_CGM_STS\n" 6281 + "--- ------- ------------ ----------\n"); 6387 6282 6388 6283 for (i = 0 ; i < NIC_NUMBER_OF_ENGINES ; i++) { 6389 6284 if (!(i & 1)) ··· 6407 6302 if (mask && !is_eng_idle) 6408 6303 set_bit(engine_idx, mask); 6409 6304 6410 - if (s) 6411 - seq_printf(s, nic_fmt, i, is_eng_idle ? "Y" : "N", qm_glbl_sts0, 6412 - qm_cgm_sts); 6305 + if (e) 6306 + hl_engine_data_sprintf(e, nic_fmt, i, is_eng_idle ? "Y" : "N", 6307 + qm_glbl_sts0, qm_cgm_sts); 6413 6308 } 6414 6309 6415 - if (s) 6416 - seq_puts(s, 6417 - "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6418 - "--- ---- ------- ------------ ---------------\n"); 6310 + if (e) 6311 + hl_engine_data_sprintf(e, 6312 + "\nMME Stub is_idle QM_GLBL_STS0 MME_ARCH_STATUS\n" 6313 + "--- ---- ------- ------------ ---------------\n"); 6419 6314 /* MME, one per Dcore */ 6420 6315 for (i = 0 ; i < NUM_OF_DCORES ; i++) { 6421 6316 engine_idx = GAUDI2_DCORE0_ENGINE_ID_MME + i * GAUDI2_ENGINE_ID_DCORE_OFFSET; ··· 6432 6327 is_eng_idle &= IS_MME_IDLE(mme_arch_sts); 6433 6328 is_idle &= is_eng_idle; 6434 6329 6435 - if (s) 6436 - seq_printf(s, mme_fmt, i, "N", 6330 + if (e) 6331 + hl_engine_data_sprintf(e, mme_fmt, i, "N", 6437 6332 is_eng_idle ? "Y" : "N", 6438 6333 qm_glbl_sts0, 6439 6334 mme_arch_sts); ··· 6445 6340 /* 6446 6341 * TPC 6447 6342 */ 6448 - if (s && prop->tpc_enabled_mask) 6449 - seq_puts(s, 6343 + if (e && prop->tpc_enabled_mask) 6344 + hl_engine_data_sprintf(e, 6450 6345 "\nCORE TPC is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_IDLE_IND_MASK\n" 6451 6346 "---- --- -------- ------------ ---------- ----------------------\n"); 6452 6347 6453 6348 gaudi2_iterate_tpcs(hdev, &tpc_iter); 6454 6349 6455 6350 /* Decoders, two each Dcore and two shared PCIe decoders */ 6456 - if (s && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6457 - seq_puts(s, 6351 + if (e && (prop->decoder_enabled_mask & (~PCIE_DEC_EN_MASK))) 6352 + hl_engine_data_sprintf(e, 6458 6353 "\nCORE DEC is_idle VSI_CMD_SWREG15\n" 6459 6354 "---- --- ------- ---------------\n"); 6460 6355 ··· 6475 6370 if (mask && !is_eng_idle) 6476 6371 set_bit(engine_idx, mask); 6477 6372 6478 - if (s) 6479 - seq_printf(s, dec_fmt, i, j, is_eng_idle ? "Y" : "N", dec_swreg15); 6373 + if (e) 6374 + hl_engine_data_sprintf(e, dec_fmt, i, j, 6375 + is_eng_idle ? "Y" : "N", dec_swreg15); 6480 6376 } 6481 6377 } 6482 6378 6483 - if (s && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6484 - seq_puts(s, 6379 + if (e && (prop->decoder_enabled_mask & PCIE_DEC_EN_MASK)) 6380 + hl_engine_data_sprintf(e, 6485 6381 "\nPCIe DEC is_idle VSI_CMD_SWREG15\n" 6486 6382 "-------- ------- ---------------\n"); 6487 6383 ··· 6501 6395 if (mask && !is_eng_idle) 6502 6396 set_bit(engine_idx, mask); 6503 6397 6504 - if (s) 6505 - seq_printf(s, pcie_dec_fmt, i, is_eng_idle ? "Y" : "N", dec_swreg15); 6398 + if (e) 6399 + hl_engine_data_sprintf(e, pcie_dec_fmt, i, 6400 + is_eng_idle ? "Y" : "N", dec_swreg15); 6506 6401 } 6507 6402 6508 - if (s) 6509 - seq_puts(s, 6403 + if (e) 6404 + hl_engine_data_sprintf(e, 6510 6405 "\nCORE ROT is_idle QM_GLBL_STS0 QM_CGM_STS DMA_CORE_STS0\n" 6511 6406 "---- ---- ------- ------------ ---------- -------------\n"); 6512 6407 ··· 6526 6419 if (mask && !is_eng_idle) 6527 6420 set_bit(engine_idx, mask); 6528 6421 6529 - if (s) 6530 - seq_printf(s, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6422 + if (e) 6423 + hl_engine_data_sprintf(e, rot_fmt, i, 0, is_eng_idle ? "Y" : "N", 6531 6424 qm_glbl_sts0, qm_cgm_sts, "-"); 6532 6425 } 6533 6426 ··· 6548 6441 struct gaudi2_device *gaudi2 = hdev->asic_specific; 6549 6442 6550 6443 spin_unlock(&gaudi2->hw_queues_lock); 6551 - } 6552 - 6553 - static void gaudi2_kdma_lock(struct hl_device *hdev, int dcore_id) 6554 - __acquires(&gaudi2->kdma_lock) 6555 - { 6556 - struct gaudi2_device *gaudi2 = hdev->asic_specific; 6557 - 6558 - spin_lock(&gaudi2->kdma_lock); 6559 - } 6560 - 6561 - static void gaudi2_kdma_unlock(struct hl_device *hdev, int dcore_id) 6562 - __releases(&gaudi2->kdma_lock) 6563 - { 6564 - struct gaudi2_device *gaudi2 = hdev->asic_specific; 6565 - 6566 - spin_unlock(&gaudi2->kdma_lock); 6567 6444 } 6568 6445 6569 6446 static u32 gaudi2_get_pci_id(struct hl_device *hdev) ··· 6816 6725 } 6817 6726 6818 6727 static void gaudi2_tpc_mmu_prepare(struct hl_device *hdev, int dcore, int inst, u32 offset, 6819 - void *data) 6728 + struct iterate_module_ctx *ctx) 6820 6729 { 6821 - struct gaudi2_tpc_mmu_data *mmu_data = (struct gaudi2_tpc_mmu_data *)data; 6730 + struct gaudi2_tpc_mmu_data *mmu_data = ctx->data; 6822 6731 6823 6732 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_MMU_BP + offset, 0); 6824 6733 WREG32(mmDCORE0_TPC0_CFG_AXUSER_HB_ASID + offset, mmu_data->rw_asid); ··· 7111 7020 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_lo_reg); 7112 7021 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_aw_razwi_id_reg); 7113 7022 } 7114 - 7115 - dev_err_ratelimited(hdev->dev, 7116 - "%s-RAZWI SHARED RR HBW WR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", 7117 - name, razwi_hi, razwi_lo, razwi_xy); 7118 7023 } else { 7119 7024 if (read_razwi_regs) { 7120 7025 razwi_hi = RREG32(rtr_mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HI); ··· 7121 7034 razwi_lo = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_lo_reg); 7122 7035 razwi_xy = le32_to_cpu(razwi_info->hbw.rr_ar_razwi_id_reg); 7123 7036 } 7124 - 7125 - dev_err_ratelimited(hdev->dev, 7126 - "%s-RAZWI SHARED RR HBW AR error, captured address HI 0x%x LO 0x%x, Initiator coordinates 0x%x\n", 7127 - name, razwi_hi, razwi_lo, razwi_xy); 7128 7037 } 7038 + 7039 + dev_err_ratelimited(hdev->dev, 7040 + "%s-RAZWI SHARED RR HBW %s error, address %#llx, Initiator coordinates 0x%x\n", 7041 + name, is_write ? "WR" : "RD", (u64)razwi_hi << 32 | razwi_lo, razwi_xy); 7129 7042 } 7130 7043 7131 7044 static void gaudi2_razwi_rr_lbw_shared_printf_info(struct hl_device *hdev, ··· 7383 7296 gaudi2_ack_module_razwi_event_handler(hdev, RAZWI_ROT, mod_idx, 0, NULL); 7384 7297 } 7385 7298 7386 - static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, 7299 + static const char *gaudi2_get_initiators_name(u32 rtr_id) 7300 + { 7301 + switch (rtr_id) { 7302 + case DCORE0_RTR0: 7303 + return "DEC0/1/8/9, TPC24, PDMA0/1, PMMU, PCIE_IF, EDMA0/2, HMMU0/2/4/6, CPU"; 7304 + case DCORE0_RTR1: 7305 + return "TPC0/1"; 7306 + case DCORE0_RTR2: 7307 + return "TPC2/3"; 7308 + case DCORE0_RTR3: 7309 + return "TPC4/5"; 7310 + case DCORE0_RTR4: 7311 + return "MME0_SBTE0/1"; 7312 + case DCORE0_RTR5: 7313 + return "MME0_WAP0/SBTE2"; 7314 + case DCORE0_RTR6: 7315 + return "MME0_CTRL_WR/SBTE3"; 7316 + case DCORE0_RTR7: 7317 + return "MME0_WAP1/CTRL_RD/SBTE4"; 7318 + case DCORE1_RTR0: 7319 + return "MME1_WAP1/CTRL_RD/SBTE4"; 7320 + case DCORE1_RTR1: 7321 + return "MME1_CTRL_WR/SBTE3"; 7322 + case DCORE1_RTR2: 7323 + return "MME1_WAP0/SBTE2"; 7324 + case DCORE1_RTR3: 7325 + return "MME1_SBTE0/1"; 7326 + case DCORE1_RTR4: 7327 + return "TPC10/11"; 7328 + case DCORE1_RTR5: 7329 + return "TPC8/9"; 7330 + case DCORE1_RTR6: 7331 + return "TPC6/7"; 7332 + case DCORE1_RTR7: 7333 + return "DEC2/3, NIC0/1/2/3/4, ARC_FARM, KDMA, EDMA1/3, HMMU1/3/5/7"; 7334 + case DCORE2_RTR0: 7335 + return "DEC4/5, NIC5/6/7/8, EDMA4/6, HMMU8/10/12/14, ROT0"; 7336 + case DCORE2_RTR1: 7337 + return "TPC16/17"; 7338 + case DCORE2_RTR2: 7339 + return "TPC14/15"; 7340 + case DCORE2_RTR3: 7341 + return "TPC12/13"; 7342 + case DCORE2_RTR4: 7343 + return "MME2_SBTE0/1"; 7344 + case DCORE2_RTR5: 7345 + return "MME2_WAP0/SBTE2"; 7346 + case DCORE2_RTR6: 7347 + return "MME2_CTRL_WR/SBTE3"; 7348 + case DCORE2_RTR7: 7349 + return "MME2_WAP1/CTRL_RD/SBTE4"; 7350 + case DCORE3_RTR0: 7351 + return "MME3_WAP1/CTRL_RD/SBTE4"; 7352 + case DCORE3_RTR1: 7353 + return "MME3_CTRL_WR/SBTE3"; 7354 + case DCORE3_RTR2: 7355 + return "MME3_WAP0/SBTE2"; 7356 + case DCORE3_RTR3: 7357 + return "MME3_SBTE0/1"; 7358 + case DCORE3_RTR4: 7359 + return "TPC18/19"; 7360 + case DCORE3_RTR5: 7361 + return "TPC20/21"; 7362 + case DCORE3_RTR6: 7363 + return "TPC22/23"; 7364 + case DCORE3_RTR7: 7365 + return "DEC6/7, NIC9/10/11, EDMA5/7, HMMU9/11/13/15, ROT1, PSOC"; 7366 + default: 7367 + return "N/A"; 7368 + } 7369 + } 7370 + 7371 + static void gaudi2_razwi_unmapped_addr_hbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7387 7372 u64 rtr_ctrl_base_addr, bool is_write) 7388 7373 { 7389 7374 u32 razwi_hi, razwi_lo; ··· 7464 7305 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_HI); 7465 7306 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_ADDR_LO); 7466 7307 7467 - dev_err_ratelimited(hdev->dev, 7468 - "RAZWI PSOC unmapped HBW WR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", 7469 - rtr_ctrl_base_addr, razwi_hi, razwi_lo); 7470 - 7471 7308 /* Clear set indication */ 7472 7309 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AW_SET, 0x1); 7473 7310 } else { 7474 7311 razwi_hi = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_HI); 7475 - 7476 7312 razwi_lo = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_ADDR_LO); 7477 - 7478 - dev_err_ratelimited(hdev->dev, 7479 - "RAZWI PSOC unmapped HBW AR error, ctr_base 0x%llx, captured address HI 0x%x, LO 0x%x\n", 7480 - rtr_ctrl_base_addr, razwi_hi, razwi_lo); 7481 7313 7482 7314 /* Clear set indication */ 7483 7315 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_HBW_AR_SET, 0x1); 7484 7316 } 7317 + 7318 + dev_err_ratelimited(hdev->dev, 7319 + "RAZWI PSOC unmapped HBW %s error, rtr id %u, address %#llx\n", 7320 + is_write ? "WR" : "RD", rtr_id, (u64)razwi_hi << 32 | razwi_lo); 7321 + 7322 + dev_err_ratelimited(hdev->dev, 7323 + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7485 7324 } 7486 7325 7487 - static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, 7488 - u64 rtr_ctrl_base_addr, bool is_write) 7326 + static void gaudi2_razwi_unmapped_addr_lbw_printf_info(struct hl_device *hdev, u32 rtr_id, 7327 + u64 rtr_ctrl_base_addr, bool is_write) 7489 7328 { 7490 7329 u32 razwi_addr; 7491 7330 7492 7331 if (is_write) { 7493 7332 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_ADDR); 7494 7333 7495 - dev_err_ratelimited(hdev->dev, 7496 - "RAZWI PSOC unmapped LBW WR error, ctr_base 0x%llx, captured address 0x%x\n", 7497 - rtr_ctrl_base_addr, razwi_addr); 7498 - 7499 7334 /* Clear set indication */ 7500 7335 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AW_SET, 0x1); 7501 7336 } else { 7502 7337 razwi_addr = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_ADDR); 7503 7338 7504 - dev_err_ratelimited(hdev->dev, 7505 - "RAZWI PSOC unmapped LBW AR error, ctr_base 0x%llx, captured address 0x%x\n", 7506 - rtr_ctrl_base_addr, razwi_addr); 7507 - 7508 7339 /* Clear set indication */ 7509 7340 WREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET, 0x1); 7510 7341 } 7342 + 7343 + dev_err_ratelimited(hdev->dev, 7344 + "RAZWI PSOC unmapped LBW %s error, rtr id %u, address %#x\n", 7345 + is_write ? "WR" : "RD", rtr_id, razwi_addr); 7346 + 7347 + dev_err_ratelimited(hdev->dev, 7348 + "Initiators: %s\n", gaudi2_get_initiators_name(rtr_id)); 7511 7349 } 7512 7350 7513 7351 /* PSOC RAZWI interrupt occurs only when trying to access a bad address */ ··· 7522 7366 } 7523 7367 7524 7368 razwi_mask_info = RREG32(mmPSOC_GLOBAL_CONF_RAZWI_MASK_INFO); 7525 - 7526 - xy = (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK) 7527 - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_SHIFT; 7369 + xy = FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_L_MASK, razwi_mask_info); 7528 7370 7529 7371 dev_err_ratelimited(hdev->dev, 7530 - "PSOC RAZWI interrupt: Mask %d, WAS_AR %d, WAS_AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7531 - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK) 7532 - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_SHIFT, 7533 - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK) 7534 - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_SHIFT, 7535 - (razwi_mask_info & PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK) 7536 - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_SHIFT, xy, 7537 - (razwi_mask_info & 7538 - PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK) 7539 - >> PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_SHIFT); 7372 + "PSOC RAZWI interrupt: Mask %d, AR %d, AW %d, AXUSER_L 0x%x AXUSER_H 0x%x\n", 7373 + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_MASK_MASK, razwi_mask_info), 7374 + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AR_MASK, razwi_mask_info), 7375 + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_WAS_AW_MASK, razwi_mask_info), 7376 + xy, 7377 + FIELD_GET(PSOC_GLOBAL_CONF_RAZWI_MASK_INFO_AXUSER_H_MASK, razwi_mask_info)); 7378 + 7540 7379 if (xy == 0) { 7541 7380 dev_err_ratelimited(hdev->dev, 7542 7381 "PSOC RAZWI interrupt: received event from 0 rtr coordinates\n"); ··· 7561 7410 lbw_ar_set = RREG32(rtr_ctrl_base_addr + DEC_RAZWI_LBW_AR_SET); 7562 7411 7563 7412 if (hbw_aw_set) 7564 - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, true); 7413 + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7414 + rtr_ctrl_base_addr, true); 7565 7415 7566 7416 if (hbw_ar_set) 7567 - gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_ctrl_base_addr, false); 7417 + gaudi2_razwi_unmapped_addr_hbw_printf_info(hdev, rtr_id, 7418 + rtr_ctrl_base_addr, false); 7568 7419 7569 7420 if (lbw_aw_set) 7570 - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, true); 7421 + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7422 + rtr_ctrl_base_addr, true); 7571 7423 7572 7424 if (lbw_ar_set) 7573 - gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_ctrl_base_addr, false); 7425 + gaudi2_razwi_unmapped_addr_lbw_printf_info(hdev, rtr_id, 7426 + rtr_ctrl_base_addr, false); 7574 7427 7575 7428 clear: 7576 7429 /* Clear Interrupts only on pldm or if f/w doesn't handle interrupts */ ··· 7966 7811 gaudi2_dma_core_interrupts_cause[i]); 7967 7812 } 7968 7813 7814 + static void gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(struct hl_device *hdev) 7815 + { 7816 + u32 mstr_if_base_addr = mmPCIE_MSTR_RR_MSTR_IF_RR_SHRD_HBW_BASE, razwi_happened_addr; 7817 + 7818 + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AW_RAZWI_HAPPENED; 7819 + if (RREG32(razwi_happened_addr)) { 7820 + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 7821 + NULL); 7822 + WREG32(razwi_happened_addr, 0x1); 7823 + } 7824 + 7825 + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_HBW_AR_RAZWI_HAPPENED; 7826 + if (RREG32(razwi_happened_addr)) { 7827 + gaudi2_razwi_rr_hbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 7828 + NULL); 7829 + WREG32(razwi_happened_addr, 0x1); 7830 + } 7831 + 7832 + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AW_RAZWI_HAPPENED; 7833 + if (RREG32(razwi_happened_addr)) { 7834 + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, true, "PCIE", true, 7835 + NULL); 7836 + WREG32(razwi_happened_addr, 0x1); 7837 + } 7838 + 7839 + razwi_happened_addr = mstr_if_base_addr + RR_SHRD_LBW_AR_RAZWI_HAPPENED; 7840 + if (RREG32(razwi_happened_addr)) { 7841 + gaudi2_razwi_rr_lbw_shared_printf_info(hdev, mstr_if_base_addr, false, "PCIE", true, 7842 + NULL); 7843 + WREG32(razwi_happened_addr, 0x1); 7844 + } 7845 + } 7846 + 7969 7847 static void gaudi2_print_pcie_addr_dec_info(struct hl_device *hdev, u64 intr_cause_data) 7970 7848 { 7971 7849 int i; 7972 7850 7973 - for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE; i++) 7974 - if (intr_cause_data & BIT_ULL(i)) 7975 - dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", 7976 - gaudi2_pcie_addr_dec_error_cause[i]); 7851 + for (i = 0 ; i < GAUDI2_NUM_OF_PCIE_ADDR_DEC_ERR_CAUSE ; i++) { 7852 + if (!(intr_cause_data & BIT_ULL(i))) 7853 + continue; 7854 + 7855 + dev_err_ratelimited(hdev->dev, "PCIE ADDR DEC Error: %s\n", 7856 + gaudi2_pcie_addr_dec_error_cause[i]); 7857 + 7858 + switch (intr_cause_data & BIT_ULL(i)) { 7859 + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK: 7860 + break; 7861 + case PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK: 7862 + gaudi2_print_pcie_mstr_rr_mstr_if_razwi_info(hdev); 7863 + break; 7864 + } 7865 + } 7977 7866 } 7978 7867 7979 7868 static void gaudi2_handle_pif_fatal(struct hl_device *hdev, u64 intr_cause_data) ··· 8357 8158 return true; 8358 8159 } 8359 8160 8360 - dev_err_ratelimited(hdev->dev, 8361 - "System Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Critical(%u). Error cause: %s\n", 8362 - hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8363 - sei_data->hdr.is_critical, hbm_mc_sei_cause[cause_idx]); 8161 + if (sei_data->hdr.is_critical) 8162 + dev_err(hdev->dev, 8163 + "System Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8164 + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8165 + hbm_mc_sei_cause[cause_idx]); 8166 + 8167 + else 8168 + dev_err_ratelimited(hdev->dev, 8169 + "System Non-Critical Error Interrupt - HBM(%u) MC(%u) MC_CH(%u) MC_PC(%u). Error cause: %s\n", 8170 + hbm_id, mc_id, sei_data->hdr.mc_channel, sei_data->hdr.mc_pseudo_channel, 8171 + hbm_mc_sei_cause[cause_idx]); 8364 8172 8365 8173 /* Print error-specific info */ 8366 8174 switch (cause_idx) { ··· 8577 8371 struct gaudi2_device *gaudi2 = hdev->asic_specific; 8578 8372 bool reset_required = false, skip_reset = false; 8579 8373 int index, sbte_index; 8374 + u64 event_mask = 0; 8580 8375 u16 event_type; 8581 8376 8582 8377 ctl = le32_to_cpu(eq_entry->hdr.ctl); ··· 8599 8392 fallthrough; 8600 8393 case GAUDI2_EVENT_ROTATOR0_SERR ... GAUDI2_EVENT_ROTATOR1_DERR: 8601 8394 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8395 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8602 8396 reset_required = gaudi2_handle_ecc_event(hdev, event_type, &eq_entry->ecc_data); 8603 8397 break; 8604 8398 ··· 8609 8401 fallthrough; 8610 8402 case GAUDI2_EVENT_NIC0_QM0 ... GAUDI2_EVENT_NIC11_QM1: 8611 8403 gaudi2_handle_qman_err(hdev, event_type); 8404 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8612 8405 break; 8613 8406 8614 8407 case GAUDI2_EVENT_ARC_AXI_ERROR_RESPONSE_0: 8615 8408 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8616 8409 gaudi2_handle_arc_farm_sei_err(hdev); 8410 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8617 8411 break; 8618 8412 8619 8413 case GAUDI2_EVENT_CPU_AXI_ERR_RSP: 8620 8414 gaudi2_handle_cpu_sei_err(hdev); 8415 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8621 8416 break; 8622 8417 8623 8418 case GAUDI2_EVENT_PDMA_CH0_AXI_ERR_RSP: 8624 8419 case GAUDI2_EVENT_PDMA_CH1_AXI_ERR_RSP: 8625 8420 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8626 8421 gaudi2_handle_qm_sei_err(hdev, event_type, &eq_entry->razwi_info); 8422 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8627 8423 break; 8628 8424 8629 8425 case GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE: ··· 8635 8423 index = event_type - GAUDI2_EVENT_ROTATOR0_AXI_ERROR_RESPONSE; 8636 8424 gaudi2_handle_rot_err(hdev, index, &eq_entry->razwi_with_intr_cause); 8637 8425 gaudi2_handle_qm_sei_err(hdev, event_type, NULL); 8426 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8638 8427 break; 8639 8428 8640 8429 case GAUDI2_EVENT_TPC0_AXI_ERR_RSP ... GAUDI2_EVENT_TPC24_AXI_ERR_RSP: ··· 8643 8430 gaudi2_tpc_ack_interrupts(hdev, index, "AXI_ERR_RSP", 8644 8431 &eq_entry->razwi_with_intr_cause); 8645 8432 gaudi2_handle_qm_sei_err(hdev, event_type, NULL); 8433 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8646 8434 break; 8647 8435 8648 8436 case GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE ... GAUDI2_EVENT_DEC9_AXI_ERR_RSPONSE: 8649 8437 index = event_type - GAUDI2_EVENT_DEC0_AXI_ERR_RSPONSE; 8650 8438 gaudi2_handle_dec_err(hdev, index, "AXI_ERR_RESPONSE", &eq_entry->razwi_info); 8439 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8651 8440 break; 8652 8441 8653 8442 case GAUDI2_EVENT_TPC0_KERNEL_ERR: ··· 8680 8465 index = (event_type - GAUDI2_EVENT_TPC0_KERNEL_ERR) / 8681 8466 (GAUDI2_EVENT_TPC1_KERNEL_ERR - GAUDI2_EVENT_TPC0_KERNEL_ERR); 8682 8467 gaudi2_tpc_ack_interrupts(hdev, index, "KRN_ERR", &eq_entry->razwi_with_intr_cause); 8468 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8683 8469 break; 8684 8470 8685 8471 case GAUDI2_EVENT_DEC0_SPI: ··· 8696 8480 index = (event_type - GAUDI2_EVENT_DEC0_SPI) / 8697 8481 (GAUDI2_EVENT_DEC1_SPI - GAUDI2_EVENT_DEC0_SPI); 8698 8482 gaudi2_handle_dec_err(hdev, index, "SPI", &eq_entry->razwi_info); 8483 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8699 8484 break; 8700 8485 8701 8486 case GAUDI2_EVENT_MME0_CTRL_AXI_ERROR_RESPONSE: ··· 8709 8492 gaudi2_handle_mme_err(hdev, index, 8710 8493 "CTRL_AXI_ERROR_RESPONSE", &eq_entry->razwi_info); 8711 8494 gaudi2_handle_qm_sei_err(hdev, event_type, NULL); 8495 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8712 8496 break; 8713 8497 8714 8498 case GAUDI2_EVENT_MME0_QMAN_SW_ERROR: ··· 8720 8502 (GAUDI2_EVENT_MME1_QMAN_SW_ERROR - 8721 8503 GAUDI2_EVENT_MME0_QMAN_SW_ERROR); 8722 8504 gaudi2_handle_mme_err(hdev, index, "QMAN_SW_ERROR", &eq_entry->razwi_info); 8505 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8723 8506 break; 8724 8507 8725 8508 case GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID: ··· 8731 8512 (GAUDI2_EVENT_MME1_WAP_SOURCE_RESULT_INVALID - 8732 8513 GAUDI2_EVENT_MME0_WAP_SOURCE_RESULT_INVALID); 8733 8514 gaudi2_handle_mme_wap_err(hdev, index, &eq_entry->razwi_info); 8515 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8734 8516 break; 8735 8517 8736 8518 case GAUDI2_EVENT_KDMA_CH0_AXI_ERR_RSP: 8737 8519 case GAUDI2_EVENT_KDMA0_CORE: 8738 8520 gaudi2_handle_kdma_core_event(hdev, 8739 8521 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8522 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8740 8523 break; 8741 8524 8742 8525 case GAUDI2_EVENT_HDMA2_CORE ... GAUDI2_EVENT_PDMA1_CORE: 8743 8526 gaudi2_handle_dma_core_event(hdev, 8744 8527 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8528 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8745 8529 break; 8746 8530 8747 8531 case GAUDI2_EVENT_PCIE_ADDR_DEC_ERR: 8748 8532 gaudi2_print_pcie_addr_dec_info(hdev, 8749 8533 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8534 + reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8535 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8750 8536 break; 8751 8537 8752 8538 case GAUDI2_EVENT_HMMU0_PAGE_FAULT_OR_WR_PERM ... GAUDI2_EVENT_HMMU12_SECURITY_ERROR: ··· 8760 8536 case GAUDI2_EVENT_PMMU_AXI_ERR_RSP_0: 8761 8537 gaudi2_handle_mmu_spi_sei_err(hdev, event_type); 8762 8538 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8539 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8763 8540 break; 8764 8541 8765 8542 case GAUDI2_EVENT_HIF0_FATAL ... GAUDI2_EVENT_HIF12_FATAL: 8766 8543 gaudi2_handle_hif_fatal(hdev, event_type, 8767 8544 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8768 8545 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8546 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8769 8547 break; 8770 8548 8771 8549 case GAUDI2_EVENT_PMMU_FATAL_0: 8772 8550 gaudi2_handle_pif_fatal(hdev, 8773 8551 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8774 8552 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8553 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8775 8554 break; 8776 8555 8777 8556 case GAUDI2_EVENT_PSOC63_RAZWI_OR_PID_MIN_MAX_INTERRUPT: 8778 8557 gaudi2_ack_psoc_razwi_event_handler(hdev); 8558 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8779 8559 break; 8780 8560 8781 8561 case GAUDI2_EVENT_HBM0_MC0_SEI_SEVERE ... GAUDI2_EVENT_HBM5_MC1_SEI_NON_SEVERE: 8562 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8782 8563 if (gaudi2_handle_hbm_mc_sei_err(hdev, event_type, &eq_entry->sei_data)) { 8783 8564 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8784 8565 reset_required = true; ··· 8792 8563 8793 8564 case GAUDI2_EVENT_HBM_CATTRIP_0 ... GAUDI2_EVENT_HBM_CATTRIP_5: 8794 8565 gaudi2_handle_hbm_cattrip(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8566 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8795 8567 break; 8796 8568 8797 8569 case GAUDI2_EVENT_HBM0_MC0_SPI ... GAUDI2_EVENT_HBM5_MC1_SPI: 8798 8570 gaudi2_handle_hbm_mc_spi(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8571 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8799 8572 break; 8800 8573 8801 8574 case GAUDI2_EVENT_PCIE_DRAIN_COMPLETE: 8802 8575 gaudi2_handle_pcie_drain(hdev, &eq_entry->pcie_drain_ind_data); 8576 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8803 8577 break; 8804 8578 8805 8579 case GAUDI2_EVENT_PSOC59_RPM_ERROR_OR_DRAIN: 8806 8580 gaudi2_handle_psoc_drain(hdev, le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8581 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8807 8582 break; 8808 8583 8809 8584 case GAUDI2_EVENT_CPU_AXI_ECC: 8810 8585 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8586 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8811 8587 break; 8812 8588 case GAUDI2_EVENT_CPU_L2_RAM_ECC: 8813 8589 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8590 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8814 8591 break; 8815 8592 case GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME0_SBTE4_AXI_ERR_RSP: 8816 8593 case GAUDI2_EVENT_MME1_SBTE0_AXI_ERR_RSP ... GAUDI2_EVENT_MME1_SBTE4_AXI_ERR_RSP: ··· 8830 8595 GAUDI2_EVENT_MME0_SBTE0_AXI_ERR_RSP); 8831 8596 gaudi2_handle_mme_sbte_err(hdev, index, sbte_index, 8832 8597 le64_to_cpu(eq_entry->intr_cause.intr_cause_data)); 8598 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8833 8599 break; 8834 8600 case GAUDI2_EVENT_VM0_ALARM_A ... GAUDI2_EVENT_VM3_ALARM_B: 8835 8601 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8602 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8836 8603 break; 8837 8604 case GAUDI2_EVENT_PSOC_AXI_ERR_RSP: 8605 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8606 + break; 8838 8607 case GAUDI2_EVENT_PSOC_PRSTN_FALL: 8608 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8839 8609 break; 8840 8610 case GAUDI2_EVENT_PCIE_APB_TIMEOUT: 8841 8611 reset_flags |= HL_DRV_RESET_FW_FATAL_ERR; 8612 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8842 8613 break; 8843 8614 case GAUDI2_EVENT_PCIE_FATAL_ERR: 8615 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8844 8616 break; 8845 8617 case GAUDI2_EVENT_TPC0_BMON_SPMU: 8846 8618 case GAUDI2_EVENT_TPC1_BMON_SPMU: ··· 8899 8657 case GAUDI2_EVENT_DEC8_BMON_SPMU: 8900 8658 case GAUDI2_EVENT_DEC9_BMON_SPMU: 8901 8659 case GAUDI2_EVENT_ROTATOR0_BMON_SPMU ... GAUDI2_EVENT_SM3_BMON_SPMU: 8660 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8902 8661 break; 8903 8662 8904 8663 case GAUDI2_EVENT_CPU_FIX_POWER_ENV_S: ··· 8907 8664 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_S: 8908 8665 case GAUDI2_EVENT_CPU_FIX_THERMAL_ENV_E: 8909 8666 gaudi2_print_clk_change_info(hdev, event_type); 8667 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8910 8668 break; 8911 8669 8912 8670 case GAUDI2_EVENT_CPU_PKT_QUEUE_OUT_SYNC: 8913 8671 gaudi2_print_out_of_sync_info(hdev, &eq_entry->pkt_sync_err); 8672 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8914 8673 break; 8915 8674 8916 8675 case GAUDI2_EVENT_PCIE_FLR_REQUESTED: 8676 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8917 8677 /* Do nothing- FW will handle it */ 8918 8678 break; 8919 8679 8920 8680 case GAUDI2_EVENT_PCIE_P2P_MSIX: 8921 8681 gaudi2_handle_pcie_p2p_msix(hdev); 8682 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8922 8683 break; 8923 8684 8924 8685 case GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE ... GAUDI2_EVENT_SM3_AXI_ERROR_RESPONSE: 8925 8686 index = event_type - GAUDI2_EVENT_SM0_AXI_ERROR_RESPONSE; 8926 8687 skip_reset = !gaudi2_handle_sm_err(hdev, index); 8688 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8927 8689 break; 8928 8690 8929 8691 case GAUDI2_EVENT_PSOC_MME_PLL_LOCK_ERR ... GAUDI2_EVENT_DCORE2_HBM_PLL_LOCK_ERR: 8692 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8930 8693 break; 8931 8694 8932 8695 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_CAUSE: 8933 8696 dev_info(hdev->dev, "CPLD shutdown cause, reset reason: 0x%llx\n", 8934 8697 le64_to_cpu(eq_entry->data[0])); 8698 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8935 8699 break; 8936 8700 case GAUDI2_EVENT_CPU_CPLD_SHUTDOWN_EVENT: 8937 8701 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 8938 8702 le64_to_cpu(eq_entry->data[0])); 8703 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8939 8704 break; 8940 8705 8941 8706 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: 8942 8707 gaudi2_print_cpu_pkt_failure_info(hdev, &eq_entry->pkt_sync_err); 8708 + event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 8943 8709 break; 8944 8710 8945 8711 case GAUDI2_EVENT_ARC_DCCM_FULL: 8946 8712 hl_arc_event_handle(hdev, &eq_entry->arc_data); 8713 + event_mask |= HL_NOTIFIER_EVENT_USER_ENGINE_ERR; 8947 8714 break; 8948 8715 8949 8716 default: ··· 8969 8716 if (!gaudi2_irq_map_table[event_type].msg) 8970 8717 hl_fw_unmask_irq(hdev, event_type); 8971 8718 8719 + if (event_mask) 8720 + hl_notifier_event_send_all(hdev, event_mask); 8721 + 8972 8722 return; 8973 8723 8974 8724 reset_device: 8975 8725 if (hdev->hard_reset_on_fw_events) { 8976 8726 hl_device_reset(hdev, reset_flags); 8727 + event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 8977 8728 } else { 8978 8729 if (!gaudi2_irq_map_table[event_type].msg) 8979 8730 hl_fw_unmask_irq(hdev, event_type); 8980 8731 } 8732 + 8733 + if (event_mask) 8734 + hl_notifier_event_send_all(hdev, event_mask); 8981 8735 } 8982 8736 8983 8737 static int gaudi2_memset_device_memory(struct hl_device *hdev, u64 addr, u64 size, u64 val) ··· 9350 9090 } 9351 9091 9352 9092 /* Create mapping on asic side */ 9353 - mutex_lock(&ctx->mmu_lock); 9093 + mutex_lock(&hdev->mmu_lock); 9354 9094 rc = hl_mmu_map_contiguous(ctx, reserved_va_base, host_mem_dma_addr, SZ_2M); 9355 9095 hl_mmu_invalidate_cache_range(hdev, false, 9356 9096 MMU_OP_USERPTR | MMU_OP_SKIP_LOW_CACHE_INV, 9357 9097 ctx->asid, reserved_va_base, SZ_2M); 9358 - mutex_unlock(&ctx->mmu_lock); 9098 + mutex_unlock(&hdev->mmu_lock); 9359 9099 if (rc) { 9360 9100 dev_err(hdev->dev, "Failed to create mapping on asic mmu\n"); 9361 9101 goto unreserve_va; 9362 9102 } 9363 - 9364 - hdev->asic_funcs->kdma_lock(hdev, 0); 9365 9103 9366 9104 /* Enable MMU on KDMA */ 9367 9105 gaudi2_kdma_set_mmbp_asid(hdev, false, ctx->asid); ··· 9388 9130 9389 9131 gaudi2_kdma_set_mmbp_asid(hdev, true, HL_KERNEL_ASID_ID); 9390 9132 9391 - hdev->asic_funcs->kdma_unlock(hdev, 0); 9392 - 9393 - mutex_lock(&ctx->mmu_lock); 9133 + mutex_lock(&hdev->mmu_lock); 9394 9134 hl_mmu_unmap_contiguous(ctx, reserved_va_base, SZ_2M); 9395 9135 hl_mmu_invalidate_cache_range(hdev, false, MMU_OP_USERPTR, 9396 9136 ctx->asid, reserved_va_base, SZ_2M); 9397 - mutex_unlock(&ctx->mmu_lock); 9137 + mutex_unlock(&hdev->mmu_lock); 9398 9138 unreserve_va: 9399 9139 hl_unreserve_va_block(hdev, ctx, reserved_va_base, SZ_2M); 9400 9140 free_data_buffer: ··· 9445 9189 goto destroy_internal_cb_pool; 9446 9190 } 9447 9191 9448 - mutex_lock(&ctx->mmu_lock); 9192 + mutex_lock(&hdev->mmu_lock); 9449 9193 rc = hl_mmu_map_contiguous(ctx, hdev->internal_cb_va_base, hdev->internal_cb_pool_dma_addr, 9450 9194 HOST_SPACE_INTERNAL_CB_SZ); 9451 9195 hl_mmu_invalidate_cache(hdev, false, MMU_OP_USERPTR); 9452 - mutex_unlock(&ctx->mmu_lock); 9196 + mutex_unlock(&hdev->mmu_lock); 9453 9197 9454 9198 if (rc) 9455 9199 goto unreserve_internal_cb_pool; ··· 9474 9218 if (!(gaudi2->hw_cap_initialized & HW_CAP_PMMU)) 9475 9219 return; 9476 9220 9477 - mutex_lock(&ctx->mmu_lock); 9221 + mutex_lock(&hdev->mmu_lock); 9478 9222 hl_mmu_unmap_contiguous(ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9479 9223 hl_unreserve_va_block(hdev, ctx, hdev->internal_cb_va_base, HOST_SPACE_INTERNAL_CB_SZ); 9480 9224 hl_mmu_invalidate_cache(hdev, true, MMU_OP_USERPTR); 9481 - mutex_unlock(&ctx->mmu_lock); 9225 + mutex_unlock(&hdev->mmu_lock); 9482 9226 9483 9227 gen_pool_destroy(hdev->internal_cb_pool); 9484 9228 ··· 9592 9336 9593 9337 static u32 gaudi2_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id, u32 size, bool eb) 9594 9338 { 9595 - struct hl_cb *cb = (struct hl_cb *) data; 9339 + struct hl_cb *cb = data; 9596 9340 struct packet_msg_short *pkt; 9597 9341 u32 value, ctl, pkt_size = sizeof(*pkt); 9598 9342 ··· 9685 9429 9686 9430 static u32 gaudi2_gen_wait_cb(struct hl_device *hdev, struct hl_gen_wait_properties *prop) 9687 9431 { 9688 - struct hl_cb *cb = (struct hl_cb *) prop->data; 9432 + struct hl_cb *cb = prop->data; 9689 9433 void *buf = (void *) (uintptr_t) (cb->kernel_address); 9690 9434 9691 9435 u64 monitor_base, fence_addr = 0; ··· 9737 9481 9738 9482 static void gaudi2_reset_sob(struct hl_device *hdev, void *data) 9739 9483 { 9740 - struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data; 9484 + struct hl_hw_sob *hw_sob = data; 9741 9485 9742 9486 dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx, hw_sob->sob_id); 9743 9487 ··· 9980 9724 9981 9725 static void gaudi2_ack_mmu_error(struct hl_device *hdev, u64 mmu_id) 9982 9726 { 9983 - bool is_pmmu = (mmu_id == HW_CAP_PMMU ? true : false); 9727 + bool is_pmmu = (mmu_id == HW_CAP_PMMU); 9984 9728 struct gaudi2_device *gaudi2 = hdev->asic_specific; 9985 9729 u32 mmu_base; 9986 9730 ··· 10137 9881 return -EOPNOTSUPP; 10138 9882 } 10139 9883 9884 + int gaudi2_send_device_activity(struct hl_device *hdev, bool open) 9885 + { 9886 + struct gaudi2_device *gaudi2 = hdev->asic_specific; 9887 + 9888 + if (!(gaudi2->hw_cap_initialized & HW_CAP_CPU_Q) || hdev->fw_major_version < 37) 9889 + return 0; 9890 + 9891 + /* TODO: add check for FW version using minor ver once it's known */ 9892 + return hl_fw_send_device_activity(hdev, open); 9893 + } 9894 + 10140 9895 static const struct hl_asic_funcs gaudi2_funcs = { 10141 9896 .early_init = gaudi2_early_init, 10142 9897 .early_fini = gaudi2_early_fini, ··· 10194 9927 .send_heartbeat = gaudi2_send_heartbeat, 10195 9928 .debug_coresight = gaudi2_debug_coresight, 10196 9929 .is_device_idle = gaudi2_is_device_idle, 10197 - .non_hard_reset_late_init = gaudi2_non_hard_reset_late_init, 9930 + .compute_reset_late_init = gaudi2_compute_reset_late_init, 10198 9931 .hw_queues_lock = gaudi2_hw_queues_lock, 10199 9932 .hw_queues_unlock = gaudi2_hw_queues_unlock, 10200 - .kdma_lock = gaudi2_kdma_lock, 10201 - .kdma_unlock = gaudi2_kdma_unlock, 10202 9933 .get_pci_id = gaudi2_get_pci_id, 10203 9934 .get_eeprom_data = gaudi2_get_eeprom_data, 10204 9935 .get_monitor_dump = gaudi2_get_monitor_dump, ··· 10243 9978 .mmu_get_real_page_size = gaudi2_mmu_get_real_page_size, 10244 9979 .access_dev_mem = hl_access_dev_mem, 10245 9980 .set_dram_bar_base = gaudi2_set_hbm_bar_base, 9981 + .set_engine_cores = gaudi2_set_engine_cores, 9982 + .send_device_activity = gaudi2_send_device_activity, 10246 9983 }; 10247 9984 10248 9985 void gaudi2_set_asic_funcs(struct hl_device *hdev)
+1 -9
drivers/misc/habanalabs/gaudi2/gaudi2P.h
··· 15 15 #include "../include/gaudi2/gaudi2_packets.h" 16 16 #include "../include/gaudi2/gaudi2_fw_if.h" 17 17 #include "../include/gaudi2/gaudi2_async_events.h" 18 - #include "../include/gaudi2/gaudi2_async_virt_events.h" 19 18 20 19 #define GAUDI2_LINUX_FW_FILE "habanalabs/gaudi2/gaudi2-fit.itb" 21 20 #define GAUDI2_BOOT_FIT_FILE "habanalabs/gaudi2/gaudi2-boot-fit.itb" ··· 138 139 139 140 #define VA_HOST_SPACE_HPAGE_START 0xFFF0800000000000ull 140 141 #define VA_HOST_SPACE_HPAGE_END 0xFFF1000000000000ull /* 140TB */ 141 - 142 - #define VA_HOST_SPACE_USER_MAPPED_CB_START 0xFFF1000000000000ull 143 - #define VA_HOST_SPACE_USER_MAPPED_CB_END 0xFFF1000100000000ull /* 4GB */ 144 142 145 143 /* 140TB */ 146 144 #define VA_HOST_SPACE_PAGE_SIZE (VA_HOST_SPACE_PAGE_END - VA_HOST_SPACE_PAGE_START) ··· 454 458 * the user can map. 455 459 * @lfsr_rand_seeds: array of MME ACC random seeds to set. 456 460 * @hw_queues_lock: protects the H/W queues from concurrent access. 457 - * @kdma_lock: protects the KDMA engine from concurrent access. 458 461 * @scratchpad_kernel_address: general purpose PAGE_SIZE contiguous memory, 459 462 * this memory region should be write-only. 460 463 * currently used for HBW QMAN writes which is ··· 505 510 * @flush_db_fifo: flag to force flush DB FIFO after a write. 506 511 * @hbm_cfg: HBM subsystem settings 507 512 * @hw_queues_lock_mutex: used by simulator instead of hw_queues_lock. 508 - * @kdma_lock_mutex: used by simulator instead of kdma_lock. 509 - * @use_deprecated_event_mappings: use old event mappings which are about to be 510 - * deprecated 511 513 */ 512 514 struct gaudi2_device { 513 515 int (*cpucp_info_get)(struct hl_device *hdev); ··· 513 521 int lfsr_rand_seeds[MME_NUM_OF_LFSR_SEEDS]; 514 522 515 523 spinlock_t hw_queues_lock; 516 - spinlock_t kdma_lock; 517 524 518 525 void *scratchpad_kernel_address; 519 526 dma_addr_t scratchpad_bus_address; ··· 553 562 u32 offended_addr); 554 563 int gaudi2_init_security(struct hl_device *hdev); 555 564 void gaudi2_ack_protection_bits_errors(struct hl_device *hdev); 565 + int gaudi2_send_device_activity(struct hl_device *hdev, bool open); 556 566 557 567 #endif /* GAUDI2P_H_ */
+20 -1
drivers/misc/habanalabs/gaudi2/gaudi2_masks.h
··· 51 51 (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ 52 52 (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) 53 53 54 - #define PDMA1_QMAN_ENABLE \ 54 + #define PDMA0_QMAN_ENABLE \ 55 55 ((0x3 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ 56 + (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ 57 + (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ 58 + (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) 59 + 60 + #define PDMA1_QMAN_ENABLE \ 61 + ((0x1 << PDMA0_QM_GLBL_CFG0_PQF_EN_SHIFT) | \ 56 62 (0x1F << PDMA0_QM_GLBL_CFG0_CQF_EN_SHIFT) | \ 57 63 (0x1F << PDMA0_QM_GLBL_CFG0_CP_EN_SHIFT) | \ 58 64 (0x1 << PDMA0_QM_GLBL_CFG0_ARC_CQF_EN_SHIFT)) ··· 143 137 144 138 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_SHIFT 15 145 139 #define DCORE0_SYNC_MNGR_OBJS_SOB_OBJ_SIGN_MASK 0x8000 140 + 141 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_SHIFT 0 142 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK 0x1 143 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_SHIFT 1 144 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK 0x2 145 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_SHIFT 2 146 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK 0x4 147 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_SHIFT 3 148 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_ERR_INTR_MASK_MASK 0x8 149 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_SHIFT 4 150 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_AXI_LBW_ERR_INTR_MASK_MASK 0x10 151 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_SHIFT 5 152 + #define PCIE_WRAP_PCIE_IC_SEI_INTR_IND_BAD_ACCESS_INTR_MASK_MASK 0x20 146 153 147 154 #endif /* GAUDI2_MASKS_H_ */
+15 -11
drivers/misc/habanalabs/gaudi2/gaudi2_security.c
··· 2559 2559 mmPCIE_WRAP_BASE, 2560 2560 }; 2561 2561 2562 + static const u32 gaudi2_pb_pcie_unsecured_regs[] = { 2563 + mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0, 2564 + }; 2565 + 2562 2566 static const u32 gaudi2_pb_thermal_sensor0[] = { 2563 2567 mmDCORE0_XFT_BASE, 2564 2568 mmDCORE0_TSTDVS_BASE, ··· 2587 2583 }; 2588 2584 2589 2585 static void gaudi2_config_tpcs_glbl_sec(struct hl_device *hdev, int dcore, int inst, u32 offset, 2590 - void *data) 2586 + struct iterate_module_ctx *ctx) 2591 2587 { 2592 - struct gaudi2_tpc_pb_data *pb_data = (struct gaudi2_tpc_pb_data *)data; 2588 + struct gaudi2_tpc_pb_data *pb_data = ctx->data; 2593 2589 2594 2590 hl_config_glbl_sec(hdev, gaudi2_pb_dcr0_tpc0, pb_data->glbl_sec, 2595 2591 offset, pb_data->block_array_size); ··· 2664 2660 struct gaudi2_tpc_arc_pb_data { 2665 2661 u32 unsecured_regs_arr_size; 2666 2662 u32 arc_regs_arr_size; 2667 - int rc; 2668 2663 }; 2669 2664 2670 2665 static void gaudi2_config_tpcs_pb_ranges(struct hl_device *hdev, int dcore, int inst, u32 offset, 2671 - void *data) 2666 + struct iterate_module_ctx *ctx) 2672 2667 { 2673 - struct gaudi2_tpc_arc_pb_data *pb_data = (struct gaudi2_tpc_arc_pb_data *)data; 2668 + struct gaudi2_tpc_arc_pb_data *pb_data = ctx->data; 2674 2669 2675 - pb_data->rc |= hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, 2670 + ctx->rc = hl_init_pb_ranges(hdev, HL_PB_SHARED, HL_PB_NA, 1, 2676 2671 offset, gaudi2_pb_dcr0_tpc0_arc, 2677 2672 pb_data->arc_regs_arr_size, 2678 2673 gaudi2_pb_dcr0_tpc0_arc_unsecured_regs, ··· 2686 2683 tpc_arc_pb_data.arc_regs_arr_size = ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc); 2687 2684 tpc_arc_pb_data.unsecured_regs_arr_size = 2688 2685 ARRAY_SIZE(gaudi2_pb_dcr0_tpc0_arc_unsecured_regs); 2689 - tpc_arc_pb_data.rc = 0; 2686 + 2690 2687 tpc_iter.fn = &gaudi2_config_tpcs_pb_ranges; 2691 2688 tpc_iter.data = &tpc_arc_pb_data; 2692 2689 gaudi2_iterate_tpcs(hdev, &tpc_iter); 2693 2690 2694 - return tpc_arc_pb_data.rc; 2691 + return tpc_iter.rc; 2695 2692 } 2696 2693 2697 2694 static int gaudi2_init_pb_sm_objs(struct hl_device *hdev) ··· 3422 3419 rc |= hl_init_pb(hdev, HL_PB_SHARED, HL_PB_NA, 3423 3420 HL_PB_SINGLE_INSTANCE, HL_PB_NA, 3424 3421 gaudi2_pb_pcie, ARRAY_SIZE(gaudi2_pb_pcie), 3425 - NULL, HL_PB_NA); 3422 + gaudi2_pb_pcie_unsecured_regs, 3423 + ARRAY_SIZE(gaudi2_pb_pcie_unsecured_regs)); 3426 3424 3427 3425 /* Thermal Sensor. 3428 3426 * Skip when security is enabled in F/W, because the blocks are protected by privileged RR. ··· 3551 3547 }; 3552 3548 3553 3549 static void gaudi2_ack_pb_tpc_config(struct hl_device *hdev, int dcore, int inst, u32 offset, 3554 - void *data) 3550 + struct iterate_module_ctx *ctx) 3555 3551 { 3556 - struct gaudi2_ack_pb_tpc_data *pb_data = (struct gaudi2_ack_pb_tpc_data *)data; 3552 + struct gaudi2_ack_pb_tpc_data *pb_data = ctx->data; 3557 3553 3558 3554 hl_ack_pb_single_dcore(hdev, offset, HL_PB_SINGLE_INSTANCE, HL_PB_NA, 3559 3555 gaudi2_pb_dcr0_tpc0, pb_data->tpc_regs_array_size);
+26 -36
drivers/misc/habanalabs/goya/goya.c
··· 916 916 */ 917 917 void goya_late_fini(struct hl_device *hdev) 918 918 { 919 - const struct hwmon_channel_info **channel_info_arr; 920 919 struct goya_device *goya = hdev->asic_specific; 921 - int i = 0; 922 920 923 921 cancel_delayed_work_sync(&goya->goya_work->work_freq); 924 922 925 - if (!hdev->hl_chip_info->info) 926 - return; 927 - 928 - channel_info_arr = hdev->hl_chip_info->info; 929 - 930 - while (channel_info_arr[i]) { 931 - kfree(channel_info_arr[i]->config); 932 - kfree(channel_info_arr[i]); 933 - i++; 934 - } 935 - 936 - kfree(channel_info_arr); 937 - 938 - hdev->hl_chip_info->info = NULL; 923 + hl_hwmon_release_resources(hdev); 939 924 } 940 925 941 926 static void goya_set_pci_memory_regions(struct hl_device *hdev) ··· 1025 1040 hdev->asic_prop.supports_compute_reset = true; 1026 1041 hdev->asic_prop.allow_inference_soft_reset = true; 1027 1042 hdev->supports_wait_for_multi_cs = false; 1043 + hdev->supports_ctx_switch = true; 1028 1044 1029 1045 hdev->asic_funcs->set_pci_memory_regions(hdev); 1030 1046 ··· 4545 4559 return rc; 4546 4560 } 4547 4561 4548 - static int goya_non_hard_reset_late_init(struct hl_device *hdev) 4562 + static int goya_compute_reset_late_init(struct hl_device *hdev) 4549 4563 { 4550 4564 /* 4551 4565 * Unmask all IRQs since some could have been received ··· 5123 5137 return 0; 5124 5138 } 5125 5139 5126 - static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, 5127 - u8 mask_len, struct seq_file *s) 5140 + static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, 5141 + struct engines_data *e) 5128 5142 { 5129 5143 const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n"; 5130 5144 const char *dma_fmt = "%-5d%-9s%#-14x%#x\n"; ··· 5135 5149 u64 offset; 5136 5150 int i; 5137 5151 5138 - if (s) 5139 - seq_puts(s, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" 5140 - "--- ------- ------------ -------------\n"); 5152 + if (e) 5153 + hl_engine_data_sprintf(e, "\nDMA is_idle QM_GLBL_STS0 DMA_CORE_STS0\n" 5154 + "--- ------- ------------ -------------\n"); 5141 5155 5142 5156 offset = mmDMA_QM_1_GLBL_STS0 - mmDMA_QM_0_GLBL_STS0; 5143 5157 ··· 5150 5164 5151 5165 if (mask && !is_eng_idle) 5152 5166 set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask); 5153 - if (s) 5154 - seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N", 5167 + if (e) 5168 + hl_engine_data_sprintf(e, dma_fmt, i, is_eng_idle ? "Y" : "N", 5155 5169 qm_glbl_sts0, dma_core_sts0); 5156 5170 } 5157 5171 5158 - if (s) 5159 - seq_puts(s, 5172 + if (e) 5173 + hl_engine_data_sprintf(e, 5160 5174 "\nTPC is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 CFG_STATUS\n" 5161 5175 "--- ------- ------------ -------------- ----------\n"); 5162 5176 ··· 5173 5187 5174 5188 if (mask && !is_eng_idle) 5175 5189 set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask); 5176 - if (s) 5177 - seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N", 5190 + if (e) 5191 + hl_engine_data_sprintf(e, fmt, i, is_eng_idle ? "Y" : "N", 5178 5192 qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts); 5179 5193 } 5180 5194 5181 - if (s) 5182 - seq_puts(s, 5195 + if (e) 5196 + hl_engine_data_sprintf(e, 5183 5197 "\nMME is_idle QM_GLBL_STS0 CMDQ_GLBL_STS0 ARCH_STATUS\n" 5184 5198 "--- ------- ------------ -------------- -----------\n"); 5185 5199 ··· 5193 5207 5194 5208 if (mask && !is_eng_idle) 5195 5209 set_bit(GOYA_ENGINE_ID_MME_0, mask); 5196 - if (s) { 5197 - seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, 5210 + if (e) { 5211 + hl_engine_data_sprintf(e, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0, 5198 5212 cmdq_glbl_sts0, mme_arch_sts); 5199 - seq_puts(s, "\n"); 5213 + hl_engine_data_sprintf(e, "\n"); 5200 5214 } 5201 5215 5202 5216 return is_idle; ··· 5420 5434 return -EOPNOTSUPP; 5421 5435 } 5422 5436 5437 + static int goya_send_device_activity(struct hl_device *hdev, bool open) 5438 + { 5439 + return 0; 5440 + } 5441 + 5423 5442 static const struct hl_asic_funcs goya_funcs = { 5424 5443 .early_init = goya_early_init, 5425 5444 .early_fini = goya_early_fini, ··· 5469 5478 .send_heartbeat = goya_send_heartbeat, 5470 5479 .debug_coresight = goya_debug_coresight, 5471 5480 .is_device_idle = goya_is_device_idle, 5472 - .non_hard_reset_late_init = goya_non_hard_reset_late_init, 5481 + .compute_reset_late_init = goya_compute_reset_late_init, 5473 5482 .hw_queues_lock = goya_hw_queues_lock, 5474 5483 .hw_queues_unlock = goya_hw_queues_unlock, 5475 - .kdma_lock = NULL, 5476 - .kdma_unlock = NULL, 5477 5484 .get_pci_id = goya_get_pci_id, 5478 5485 .get_eeprom_data = goya_get_eeprom_data, 5479 5486 .get_monitor_dump = goya_get_monitor_dump, ··· 5517 5528 .mmu_get_real_page_size = hl_mmu_get_real_page_size, 5518 5529 .access_dev_mem = hl_access_dev_mem, 5519 5530 .set_dram_bar_base = goya_set_ddr_bar_base, 5531 + .send_device_activity = goya_send_device_activity, 5520 5532 }; 5521 5533 5522 5534 /*
+101 -2
drivers/misc/habanalabs/include/common/cpucp_if.h
··· 629 629 * CPUCP_PACKET_ENGINE_CORE_ASID_SET - 630 630 * Packet to perform engine core ASID configuration 631 631 * 632 + * CPUCP_PACKET_SEC_ATTEST_GET - 633 + * Get the attestaion data that is collected during various stages of the 634 + * boot sequence. the attestation data is also hashed with some unique 635 + * number (nonce) provided by the host to prevent replay attacks. 636 + * public key and certificate also provided as part of the FW response. 637 + * 632 638 * CPUCP_PACKET_MONITOR_DUMP_GET - 633 639 * Get monitors registers dump from the CpuCP kernel. 634 640 * The CPU will put the registers dump in the a buffer allocated by the driver ··· 642 636 * passes the max size it allows the CpuCP to write to the structure, to prevent 643 637 * data corruption in case of mismatched driver/FW versions. 644 638 * Relevant only to Gaudi. 639 + * 640 + * CPUCP_PACKET_ACTIVE_STATUS_SET - 641 + * LKD sends FW indication whether device is free or in use, this indication is reported 642 + * also to the BMC. 645 643 */ 646 644 647 645 enum cpucp_packet_id { ··· 697 687 CPUCP_PACKET_RESERVED, /* not used */ 698 688 CPUCP_PACKET_ENGINE_CORE_ASID_SET, /* internal */ 699 689 CPUCP_PACKET_RESERVED2, /* not used */ 690 + CPUCP_PACKET_SEC_ATTEST_GET, /* internal */ 700 691 CPUCP_PACKET_RESERVED3, /* not used */ 701 692 CPUCP_PACKET_RESERVED4, /* not used */ 702 - CPUCP_PACKET_RESERVED5, /* not used */ 703 693 CPUCP_PACKET_MONITOR_DUMP_GET, /* debugfs */ 694 + CPUCP_PACKET_RESERVED5, /* not used */ 695 + CPUCP_PACKET_RESERVED6, /* not used */ 696 + CPUCP_PACKET_RESERVED7, /* not used */ 697 + CPUCP_PACKET_RESERVED8, /* not used */ 698 + CPUCP_PACKET_RESERVED9, /* not used */ 699 + CPUCP_PACKET_ACTIVE_STATUS_SET, /* internal */ 700 + CPUCP_PACKET_ID_MAX /* must be last */ 704 701 }; 705 702 706 703 #define CPUCP_PACKET_FENCE_VAL 0xFE8CE7A5 ··· 800 783 * result cannot be used to hold general purpose data. 801 784 */ 802 785 __le32 status_mask; 786 + 787 + /* random, used once number, for security packets */ 788 + __le32 nonce; 803 789 }; 804 790 805 791 /* For NIC requests */ ··· 833 813 CPUCP_LED2_INDEX 834 814 }; 835 815 816 + /* 817 + * enum cpucp_packet_rc - Error return code 818 + * @cpucp_packet_success -> in case of success. 819 + * @cpucp_packet_invalid -> this is to support Goya and Gaudi platform. 820 + * @cpucp_packet_fault -> in case of processing error like failing to 821 + * get device binding or semaphore etc. 822 + * @cpucp_packet_invalid_pkt -> when cpucp packet is un-supported. This is 823 + * supported Greco onwards. 824 + * @cpucp_packet_invalid_params -> when checking parameter like length of buffer 825 + * or attribute value etc. Supported Greco onwards. 826 + * @cpucp_packet_rc_max -> It indicates size of enum so should be at last. 827 + */ 836 828 enum cpucp_packet_rc { 837 829 cpucp_packet_success, 838 830 cpucp_packet_invalid, 839 - cpucp_packet_fault 831 + cpucp_packet_fault, 832 + cpucp_packet_invalid_pkt, 833 + cpucp_packet_invalid_params, 834 + cpucp_packet_rc_max 840 835 }; 841 836 842 837 /* ··· 1226 1191 enum cpu_reset_status { 1227 1192 CPU_RST_STATUS_NA = 0, 1228 1193 CPU_RST_STATUS_SOFT_RST_DONE = 1, 1194 + }; 1195 + 1196 + #define SEC_PCR_DATA_BUF_SZ 256 1197 + #define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ 1198 + #define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ 1199 + #define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ 1200 + #define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ 1201 + 1202 + /* 1203 + * struct cpucp_sec_attest_info - attestation report of the boot 1204 + * @pcr_data: raw values of the PCR registers 1205 + * @pcr_num_reg: number of PCR registers in the pcr_data array 1206 + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) 1207 + * @nonce: number only used once. random number provided by host. this also 1208 + * passed to the quote command as a qualifying data. 1209 + * @pcr_quote_len: length of the attestation quote data (bytes) 1210 + * @pcr_quote: attestation report data structure 1211 + * @quote_sig_len: length of the attestation report signature (bytes) 1212 + * @quote_sig: signature structure of the attestation report 1213 + * @pub_data_len: length of the public data (bytes) 1214 + * @public_data: public key for the signed attestation 1215 + * (outPublic + name + qualifiedName) 1216 + * @certificate_len: length of the certificate (bytes) 1217 + * @certificate: certificate for the attestation signing key 1218 + */ 1219 + struct cpucp_sec_attest_info { 1220 + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; 1221 + __u8 pcr_num_reg; 1222 + __u8 pcr_reg_len; 1223 + __le16 pad0; 1224 + __le32 nonce; 1225 + __le16 pcr_quote_len; 1226 + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; 1227 + __u8 quote_sig_len; 1228 + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; 1229 + __le16 pub_data_len; 1230 + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; 1231 + __le16 certificate_len; 1232 + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; 1233 + }; 1234 + 1235 + /* 1236 + * struct cpucp_dev_info_signed - device information signed by a secured device 1237 + * @info: device information structure as defined above 1238 + * @nonce: number only used once. random number provided by host. this number is 1239 + * hashed and signed along with the device information. 1240 + * @info_sig_len: length of the attestation signature (bytes) 1241 + * @info_sig: signature of the info + nonce data. 1242 + * @pub_data_len: length of the public data (bytes) 1243 + * @public_data: public key info signed info data 1244 + * (outPublic + name + qualifiedName) 1245 + * @certificate_len: length of the certificate (bytes) 1246 + * @certificate: certificate for the signing key 1247 + */ 1248 + struct cpucp_dev_info_signed { 1249 + struct cpucp_info info; /* assumed to be 64bit aligned */ 1250 + __le32 nonce; 1251 + __le32 pad0; 1252 + __u8 info_sig_len; 1253 + __u8 info_sig[SEC_SIGNATURE_BUF_SZ]; 1254 + __le16 pub_data_len; 1255 + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; 1256 + __le16 certificate_len; 1257 + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; 1229 1258 }; 1230 1259 1231 1260 /*
+29 -8
drivers/misc/habanalabs/include/common/hl_boot_if.h
··· 34 34 CPU_BOOT_ERR_BINNING_FAIL = 19, 35 35 CPU_BOOT_ERR_TPM_FAIL = 20, 36 36 CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL = 21, 37 + CPU_BOOT_ERR_EEPROM_FAIL = 22, 37 38 CPU_BOOT_ERR_ENABLED = 31, 38 39 CPU_BOOT_ERR_SCND_EN = 63, 39 40 CPU_BOOT_ERR_LAST = 64 /* we have 2 registers of 32 bits */ ··· 116 115 * CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL Failed to set threshold for tmperature 117 116 * sensor. 118 117 * 118 + * CPU_BOOT_ERR_EEPROM_FAIL Failed reading EEPROM data. Defaults 119 + * are used. 120 + * 119 121 * CPU_BOOT_ERR0_ENABLED Error registers enabled. 120 122 * This is a main indication that the 121 123 * running FW populates the error ··· 143 139 #define CPU_BOOT_ERR0_BINNING_FAIL (1 << CPU_BOOT_ERR_BINNING_FAIL) 144 140 #define CPU_BOOT_ERR0_TPM_FAIL (1 << CPU_BOOT_ERR_TPM_FAIL) 145 141 #define CPU_BOOT_ERR0_TMP_THRESH_INIT_FAIL (1 << CPU_BOOT_ERR_TMP_THRESH_INIT_FAIL) 142 + #define CPU_BOOT_ERR0_EEPROM_FAIL (1 << CPU_BOOT_ERR_EEPROM_FAIL) 146 143 #define CPU_BOOT_ERR0_ENABLED (1 << CPU_BOOT_ERR_ENABLED) 147 144 #define CPU_BOOT_ERR1_ENABLED (1 << CPU_BOOT_ERR_ENABLED) 148 145 ··· 431 426 __le32 gic_host_ints_irq; 432 427 __le32 gic_host_soft_rst_irq; 433 428 __le32 gic_rot_qm_irq_ctrl; 434 - __le32 reserved1[22]; /* reserve for future use */ 429 + __le32 cpu_rst_status; 430 + __le32 eng_arc_irq_ctrl; 431 + __le32 reserved1[20]; /* reserve for future use */ 435 432 }; 436 433 437 434 /* TODO: remove the desc magic after the code is updated to use message */ ··· 470 463 HL_COMMS_RESET_CAUSE_TYPE = 1, 471 464 HL_COMMS_FW_CFG_SKIP_TYPE = 2, 472 465 HL_COMMS_BINNING_CONF_TYPE = 3, 466 + }; 467 + 468 + /* 469 + * Binning information shared between LKD and FW 470 + * @tpc_mask - TPC binning information 471 + * @dec_mask - Decoder binning information 472 + * @hbm_mask - HBM binning information 473 + * @edma_mask - EDMA binning information 474 + * @mme_mask_l - MME binning information lower 32 475 + * @mme_mask_h - MME binning information upper 32 476 + * @reserved - reserved field for 64 bit alignment 477 + */ 478 + struct lkd_fw_binning_info { 479 + __le64 tpc_mask; 480 + __le32 dec_mask; 481 + __le32 hbm_mask; 482 + __le32 edma_mask; 483 + __le32 mme_mask_l; 484 + __le32 mme_mask_h; 485 + __le32 reserved; 473 486 }; 474 487 475 488 /* TODO: remove this struct after the code is updated to use message */ ··· 552 525 struct { 553 526 __u8 fw_cfg_skip; /* 1 - skip, 0 - don't skip */ 554 527 }; 555 - struct { 556 - __le64 tpc_binning_conf; 557 - __le32 dec_binning_conf; 558 - __le32 hbm_binning_conf; 559 - __le32 edma_binning_conf; 560 - __le32 mme_redundancy_conf; /* use MME_REDUNDANT_COLUMN */ 561 - }; 528 + struct lkd_fw_binning_info binning_info; 562 529 }; 563 530 }; 564 531
+2
drivers/misc/habanalabs/include/gaudi2/asic_reg/gaudi2_regs.h
··· 132 132 #include "dcore0_mme_ctrl_lo_arch_tensor_a_regs.h" 133 133 #include "dcore0_mme_ctrl_lo_arch_tensor_b_regs.h" 134 134 #include "dcore0_mme_ctrl_lo_arch_tensor_cout_regs.h" 135 + #include "pcie_wrap_special_regs.h" 135 136 136 137 #include "pdma0_qm_masks.h" 137 138 #include "pdma0_core_masks.h" ··· 240 239 #define SFT_IF_RTR_OFFSET (mmSFT0_HBW_RTR_IF1_RTR_H3_BASE - mmSFT0_HBW_RTR_IF0_RTR_H3_BASE) 241 240 242 241 #define ARC_HALT_REQ_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_REQ - mmARC_FARM_ARC0_AUX_BASE) 242 + #define ARC_HALT_ACK_OFFSET (mmARC_FARM_ARC0_AUX_RUN_HALT_ACK - mmARC_FARM_ARC0_AUX_BASE) 243 243 244 244 #define ARC_REGION_CFG_OFFSET(region) \ 245 245 (mmARC_FARM_ARC0_AUX_ARC_REGION_CFG_0 + (region * 4) - mmARC_FARM_ARC0_AUX_BASE)
+185
drivers/misc/habanalabs/include/gaudi2/asic_reg/pcie_wrap_special_regs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * 3 + * Copyright 2016-2020 HabanaLabs, Ltd. 4 + * All Rights Reserved. 5 + * 6 + */ 7 + 8 + /************************************ 9 + ** This is an auto-generated file ** 10 + ** DO NOT EDIT BELOW ** 11 + ************************************/ 12 + 13 + #ifndef ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ 14 + #define ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ 15 + 16 + /* 17 + ***************************************** 18 + * PCIE_WRAP_SPECIAL 19 + * (Prototype: SPECIAL_REGS) 20 + ***************************************** 21 + */ 22 + 23 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_0 0x4C01E80 24 + 25 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_1 0x4C01E84 26 + 27 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_2 0x4C01E88 28 + 29 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_3 0x4C01E8C 30 + 31 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_4 0x4C01E90 32 + 33 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_5 0x4C01E94 34 + 35 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_6 0x4C01E98 36 + 37 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_7 0x4C01E9C 38 + 39 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_8 0x4C01EA0 40 + 41 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_9 0x4C01EA4 42 + 43 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_10 0x4C01EA8 44 + 45 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_11 0x4C01EAC 46 + 47 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_12 0x4C01EB0 48 + 49 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_13 0x4C01EB4 50 + 51 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_14 0x4C01EB8 52 + 53 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_15 0x4C01EBC 54 + 55 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_16 0x4C01EC0 56 + 57 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_17 0x4C01EC4 58 + 59 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_18 0x4C01EC8 60 + 61 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_19 0x4C01ECC 62 + 63 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_20 0x4C01ED0 64 + 65 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_21 0x4C01ED4 66 + 67 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_22 0x4C01ED8 68 + 69 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_23 0x4C01EDC 70 + 71 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_24 0x4C01EE0 72 + 73 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_25 0x4C01EE4 74 + 75 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_26 0x4C01EE8 76 + 77 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_27 0x4C01EEC 78 + 79 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_28 0x4C01EF0 80 + 81 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_29 0x4C01EF4 82 + 83 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_30 0x4C01EF8 84 + 85 + #define mmPCIE_WRAP_SPECIAL_GLBL_PRIV_31 0x4C01EFC 86 + 87 + #define mmPCIE_WRAP_SPECIAL_MEM_GW_DATA 0x4C01F00 88 + 89 + #define mmPCIE_WRAP_SPECIAL_MEM_GW_REQ 0x4C01F04 90 + 91 + #define mmPCIE_WRAP_SPECIAL_MEM_NUMOF 0x4C01F0C 92 + 93 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_SEL 0x4C01F10 94 + 95 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_CTL 0x4C01F14 96 + 97 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_MASK 0x4C01F18 98 + 99 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_GLBL_ERR_MASK 0x4C01F1C 100 + 101 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_STS 0x4C01F20 102 + 103 + #define mmPCIE_WRAP_SPECIAL_MEM_ECC_ERR_ADDR 0x4C01F24 104 + 105 + #define mmPCIE_WRAP_SPECIAL_MEM_RM 0x4C01F28 106 + 107 + #define mmPCIE_WRAP_SPECIAL_GLBL_ERR_MASK 0x4C01F40 108 + 109 + #define mmPCIE_WRAP_SPECIAL_GLBL_ERR_ADDR 0x4C01F44 110 + 111 + #define mmPCIE_WRAP_SPECIAL_GLBL_ERR_CAUSE 0x4C01F48 112 + 113 + #define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_0 0x4C01F60 114 + 115 + #define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_1 0x4C01F64 116 + 117 + #define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_2 0x4C01F68 118 + 119 + #define mmPCIE_WRAP_SPECIAL_GLBL_SPARE_3 0x4C01F6C 120 + 121 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_0 0x4C01F80 122 + 123 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_1 0x4C01F84 124 + 125 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_2 0x4C01F88 126 + 127 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_3 0x4C01F8C 128 + 129 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_4 0x4C01F90 130 + 131 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_5 0x4C01F94 132 + 133 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_6 0x4C01F98 134 + 135 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_7 0x4C01F9C 136 + 137 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_8 0x4C01FA0 138 + 139 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_9 0x4C01FA4 140 + 141 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_10 0x4C01FA8 142 + 143 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_11 0x4C01FAC 144 + 145 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_12 0x4C01FB0 146 + 147 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_13 0x4C01FB4 148 + 149 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_14 0x4C01FB8 150 + 151 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_15 0x4C01FBC 152 + 153 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_16 0x4C01FC0 154 + 155 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_17 0x4C01FC4 156 + 157 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_18 0x4C01FC8 158 + 159 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_19 0x4C01FCC 160 + 161 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_20 0x4C01FD0 162 + 163 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_21 0x4C01FD4 164 + 165 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_22 0x4C01FD8 166 + 167 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_23 0x4C01FDC 168 + 169 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_24 0x4C01FE0 170 + 171 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_25 0x4C01FE4 172 + 173 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_26 0x4C01FE8 174 + 175 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_27 0x4C01FEC 176 + 177 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_28 0x4C01FF0 178 + 179 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_29 0x4C01FF4 180 + 181 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_30 0x4C01FF8 182 + 183 + #define mmPCIE_WRAP_SPECIAL_GLBL_SEC_31 0x4C01FFC 184 + 185 + #endif /* ASIC_REG_PCIE_WRAP_SPECIAL_REGS_H_ */
-57
drivers/misc/habanalabs/include/gaudi2/gaudi2_async_virt_events.h
··· 1 - /* SPDX-License-Identifier: GPL-2.0 2 - * 3 - * Copyright 2022 HabanaLabs, Ltd. 4 - * All Rights Reserved. 5 - * 6 - */ 7 - 8 - #ifndef __GAUDI2_ASYNC_VIRT_EVENTS_H_ 9 - #define __GAUDI2_ASYNC_VIRT_EVENTS_H_ 10 - 11 - enum gaudi2_async_virt_event_id { 12 - GAUDI2_EVENT_NIC3_QM1_OLD = 1206, 13 - GAUDI2_EVENT_NIC4_QM0_OLD = 1207, 14 - GAUDI2_EVENT_NIC4_QM1_OLD = 1208, 15 - GAUDI2_EVENT_NIC5_QM0_OLD = 1209, 16 - GAUDI2_EVENT_NIC5_QM1_OLD = 1210, 17 - GAUDI2_EVENT_NIC6_QM0_OLD = 1211, 18 - GAUDI2_EVENT_NIC6_QM1_OLD = 1212, 19 - GAUDI2_EVENT_NIC7_QM0_OLD = 1213, 20 - GAUDI2_EVENT_NIC7_QM1_OLD = 1214, 21 - GAUDI2_EVENT_NIC8_QM0_OLD = 1215, 22 - GAUDI2_EVENT_NIC8_QM1_OLD = 1216, 23 - GAUDI2_EVENT_NIC9_QM0_OLD = 1217, 24 - GAUDI2_EVENT_NIC9_QM1_OLD = 1218, 25 - GAUDI2_EVENT_NIC10_QM0_OLD = 1219, 26 - GAUDI2_EVENT_NIC10_QM1_OLD = 1220, 27 - GAUDI2_EVENT_NIC11_QM0_OLD = 1221, 28 - GAUDI2_EVENT_NIC11_QM1_OLD = 1222, 29 - GAUDI2_EVENT_CPU_PKT_SANITY_FAILED_OLD = 1223, 30 - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG0_OLD = 1224, 31 - GAUDI2_EVENT_CPU0_STATUS_NIC0_ENG1_OLD = 1225, 32 - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG0_OLD = 1226, 33 - GAUDI2_EVENT_CPU1_STATUS_NIC1_ENG1_OLD = 1227, 34 - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG0_OLD = 1228, 35 - GAUDI2_EVENT_CPU2_STATUS_NIC2_ENG1_OLD = 1229, 36 - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG0_OLD = 1230, 37 - GAUDI2_EVENT_CPU3_STATUS_NIC3_ENG1_OLD = 1231, 38 - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG0_OLD = 1232, 39 - GAUDI2_EVENT_CPU4_STATUS_NIC4_ENG1_OLD = 1233, 40 - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG0_OLD = 1234, 41 - GAUDI2_EVENT_CPU5_STATUS_NIC5_ENG1_OLD = 1235, 42 - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG0_OLD = 1236, 43 - GAUDI2_EVENT_CPU6_STATUS_NIC6_ENG1_OLD = 1237, 44 - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG0_OLD = 1238, 45 - GAUDI2_EVENT_CPU7_STATUS_NIC7_ENG1_OLD = 1239, 46 - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG0_OLD = 1240, 47 - GAUDI2_EVENT_CPU8_STATUS_NIC8_ENG1_OLD = 1241, 48 - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG0_OLD = 1242, 49 - GAUDI2_EVENT_CPU9_STATUS_NIC9_ENG1_OLD = 1243, 50 - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG0_OLD = 1244, 51 - GAUDI2_EVENT_CPU10_STATUS_NIC10_ENG1_OLD = 1245, 52 - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG0_OLD = 1246, 53 - GAUDI2_EVENT_CPU11_STATUS_NIC11_ENG1_OLD = 1247, 54 - GAUDI2_EVENT_ARC_DCCM_FULL_OLD = 1248, 55 - }; 56 - 57 - #endif /* __GAUDI2_ASYNC_VIRT_EVENTS_H_ */
+93
include/trace/events/habanalabs.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * 3 + * Copyright 2016-2021 HabanaLabs, Ltd. 4 + * All Rights Reserved. 5 + * 6 + */ 7 + 8 + #undef TRACE_SYSTEM 9 + #define TRACE_SYSTEM habanalabs 10 + 11 + #if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) 12 + #define _TRACE_HABANALABS_H 13 + 14 + #include <linux/tracepoint.h> 15 + 16 + DECLARE_EVENT_CLASS(habanalabs_mmu_template, 17 + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), 18 + 19 + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte), 20 + 21 + TP_STRUCT__entry( 22 + __string(dname, dev_name(dev)) 23 + __field(u64, virt_addr) 24 + __field(u64, phys_addr) 25 + __field(u32, page_size) 26 + __field(u8, flush_pte) 27 + ), 28 + 29 + TP_fast_assign( 30 + __assign_str(dname, dev_name(dev)); 31 + __entry->virt_addr = virt_addr; 32 + __entry->phys_addr = phys_addr; 33 + __entry->page_size = page_size; 34 + __entry->flush_pte = flush_pte; 35 + ), 36 + 37 + TP_printk("%s: vaddr: %#llx, paddr: %#llx, psize: %#x, flush: %s", 38 + __get_str(dname), 39 + __entry->virt_addr, 40 + __entry->phys_addr, 41 + __entry->page_size, 42 + __entry->flush_pte ? "true" : "false") 43 + ); 44 + 45 + DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_map, 46 + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), 47 + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte)); 48 + 49 + DEFINE_EVENT(habanalabs_mmu_template, habanalabs_mmu_unmap, 50 + TP_PROTO(struct device *dev, u64 virt_addr, u64 phys_addr, u32 page_size, bool flush_pte), 51 + TP_ARGS(dev, virt_addr, phys_addr, page_size, flush_pte)); 52 + 53 + DECLARE_EVENT_CLASS(habanalabs_dma_alloc_template, 54 + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), 55 + 56 + TP_ARGS(dev, cpu_addr, dma_addr, size, caller), 57 + 58 + TP_STRUCT__entry( 59 + __string(dname, dev_name(dev)) 60 + __field(u64, cpu_addr) 61 + __field(u64, dma_addr) 62 + __field(u32, size) 63 + __field(const char *, caller) 64 + ), 65 + 66 + TP_fast_assign( 67 + __assign_str(dname, dev_name(dev)); 68 + __entry->cpu_addr = cpu_addr; 69 + __entry->dma_addr = dma_addr; 70 + __entry->size = size; 71 + __entry->caller = caller; 72 + ), 73 + 74 + TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s", 75 + __get_str(dname), 76 + __entry->cpu_addr, 77 + __entry->dma_addr, 78 + __entry->size, 79 + __entry->caller) 80 + ); 81 + 82 + DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_alloc, 83 + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), 84 + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); 85 + 86 + DEFINE_EVENT(habanalabs_dma_alloc_template, habanalabs_dma_free, 87 + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), 88 + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); 89 + 90 + #endif /* if !defined(_TRACE_HABANALABS_H) || defined(TRACE_HEADER_MULTI_READ) */ 91 + 92 + /* This part must be outside protection */ 93 + #include <trace/define_trace.h>
+112 -25
include/uapi/misc/habanalabs.h
··· 707 707 HL_SERVER_GAUDI2_HLS2 = 5 708 708 }; 709 709 710 + /* 711 + * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command 712 + * 713 + * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event 714 + * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code 715 + * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset 716 + * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error 717 + * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable 718 + * HL_NOTIFIER_EVENT_USER_ENGINE_ERR - Indicates device engine in error state 719 + * HL_NOTIFIER_EVENT_GENERAL_HW_ERR - Indicates device HW error 720 + */ 721 + #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) 722 + #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) 723 + #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) 724 + #define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) 725 + #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) 726 + #define HL_NOTIFIER_EVENT_USER_ENGINE_ERR (1ULL << 5) 727 + #define HL_NOTIFIER_EVENT_GENERAL_HW_ERR (1ULL << 6) 728 + 710 729 /* Opcode for management ioctl 711 730 * 712 731 * HW_IP_INFO - Receive information about different IP blocks in the ··· 773 754 * Razwi initiator. 774 755 * Razwi cause, was it a page fault or MMU access error. 775 756 * HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES - Retrieve valid page sizes for device memory allocation 757 + * HL_INFO_SECURED_ATTESTATION - Retrieve attestation report of the boot. 776 758 * HL_INFO_REGISTER_EVENTFD - Register eventfd for event notifications. 777 759 * HL_INFO_UNREGISTER_EVENTFD - Unregister eventfd 778 760 * HL_INFO_GET_EVENTS - Retrieve the last occurred events ··· 803 783 #define HL_INFO_CS_TIMEOUT_EVENT 24 804 784 #define HL_INFO_RAZWI_EVENT 25 805 785 #define HL_INFO_DEV_MEM_ALLOC_PAGE_SIZES 26 786 + #define HL_INFO_SECURED_ATTESTATION 27 806 787 #define HL_INFO_REGISTER_EVENTFD 28 807 788 #define HL_INFO_UNREGISTER_EVENTFD 29 808 789 #define HL_INFO_GET_EVENTS 30 809 790 #define HL_INFO_UNDEFINED_OPCODE_EVENT 31 791 + #define HL_INFO_ENGINE_STATUS 32 810 792 811 793 #define HL_INFO_VERSION_MAX_LEN 128 812 794 #define HL_INFO_CARD_NAME_MAX_LEN 16 795 + 796 + /* Maximum buffer size for retrieving engines status */ 797 + #define HL_ENGINES_DATA_MAX_SIZE SZ_1M 813 798 814 799 /** 815 800 * struct hl_info_hw_ip_info - hardware information on various IPs in the ASIC ··· 846 821 * @tpc_enabled_mask: Bit-mask that represents which TPCs are enabled. Relevant 847 822 * for Goya/Gaudi only. 848 823 * @dram_enabled: Whether the DRAM is enabled. 824 + * @security_enabled: Whether security is enabled on device. 849 825 * @mme_master_slave_mode: Indicate whether the MME is working in master/slave 850 826 * configuration. Relevant for Greco and later. 851 827 * @cpucp_version: The CPUCP f/w version. ··· 878 852 __u32 psoc_pci_pll_div_factor; 879 853 __u8 tpc_enabled_mask; 880 854 __u8 dram_enabled; 881 - __u8 reserved; 855 + __u8 security_enabled; 882 856 __u8 mme_master_slave_mode; 883 857 __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN]; 884 858 __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN]; ··· 902 876 __u32 is_idle; 903 877 /* 904 878 * Bitmask of busy engines. 905 - * Bits definition is according to `enum <chip>_enging_id'. 879 + * Bits definition is according to `enum <chip>_engine_id'. 906 880 */ 907 881 __u32 busy_engines_mask; 908 882 909 883 /* 910 884 * Extended Bitmask of busy engines. 911 - * Bits definition is according to `enum <chip>_enging_id'. 885 + * Bits definition is according to `enum <chip>_engine_id'. 912 886 */ 913 887 __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE]; 914 888 }; ··· 1104 1078 * struct hl_info_undefined_opcode_event - info about last undefined opcode error 1105 1079 * @timestamp: timestamp of the undefined opcode error 1106 1080 * @cb_addr_streams: CB addresses (per stream) that are currently exists in the PQ 1107 - * entiers. In case all streams array entries are 1081 + * entries. In case all streams array entries are 1108 1082 * filled with values, it means the execution was in Lower-CP. 1109 1083 * @cq_addr: the address of the current handled command buffer 1110 1084 * @cq_size: the size of the current handled command buffer 1111 1085 * @cb_addr_streams_len: num of streams - actual len of cb_addr_streams array. 1112 - * should be equal to 1 incase of undefined opcode 1086 + * should be equal to 1 in case of undefined opcode 1113 1087 * in Upper-CP (specific stream) and equal to 4 incase 1114 1088 * of undefined opcode in Lower-CP. 1115 1089 * @engine_id: engine-id that the error occurred on ··· 1135 1109 __u64 page_order_bitmask; 1136 1110 }; 1137 1111 1112 + #define SEC_PCR_DATA_BUF_SZ 256 1113 + #define SEC_PCR_QUOTE_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ 1114 + #define SEC_SIGNATURE_BUF_SZ 255 /* (256 - 1) 1 byte used for size */ 1115 + #define SEC_PUB_DATA_BUF_SZ 510 /* (512 - 2) 2 bytes used for size */ 1116 + #define SEC_CERTIFICATE_BUF_SZ 2046 /* (2048 - 2) 2 bytes used for size */ 1117 + 1118 + /* 1119 + * struct hl_info_sec_attest - attestation report of the boot 1120 + * @nonce: number only used once. random number provided by host. this also passed to the quote 1121 + * command as a qualifying data. 1122 + * @pcr_quote_len: length of the attestation quote data (bytes) 1123 + * @pub_data_len: length of the public data (bytes) 1124 + * @certificate_len: length of the certificate (bytes) 1125 + * @pcr_num_reg: number of PCR registers in the pcr_data array 1126 + * @pcr_reg_len: length of each PCR register in the pcr_data array (bytes) 1127 + * @quote_sig_len: length of the attestation report signature (bytes) 1128 + * @pcr_data: raw values of the PCR registers 1129 + * @pcr_quote: attestation report data structure 1130 + * @quote_sig: signature structure of the attestation report 1131 + * @public_data: public key for the signed attestation 1132 + * (outPublic + name + qualifiedName) 1133 + * @certificate: certificate for the attestation signing key 1134 + */ 1135 + struct hl_info_sec_attest { 1136 + __u32 nonce; 1137 + __u16 pcr_quote_len; 1138 + __u16 pub_data_len; 1139 + __u16 certificate_len; 1140 + __u8 pcr_num_reg; 1141 + __u8 pcr_reg_len; 1142 + __u8 quote_sig_len; 1143 + __u8 pcr_data[SEC_PCR_DATA_BUF_SZ]; 1144 + __u8 pcr_quote[SEC_PCR_QUOTE_BUF_SZ]; 1145 + __u8 quote_sig[SEC_SIGNATURE_BUF_SZ]; 1146 + __u8 public_data[SEC_PUB_DATA_BUF_SZ]; 1147 + __u8 certificate[SEC_CERTIFICATE_BUF_SZ]; 1148 + __u8 pad0[2]; 1149 + }; 1150 + 1138 1151 enum gaudi_dcores { 1139 1152 HL_GAUDI_WS_DCORE, 1140 1153 HL_GAUDI_WN_DCORE, ··· 1195 1130 * resolution. Currently not in use. 1196 1131 * @pll_index: Index as defined in hl_<asic type>_pll_index enumeration. 1197 1132 * @eventfd: event file descriptor for event notifications. 1133 + * @user_buffer_actual_size: Actual data size which was copied to user allocated buffer by the 1134 + * driver. It is possible for the user to allocate buffer larger than 1135 + * needed, hence updating this variable so user will know the exact amount 1136 + * of bytes copied by the kernel to the buffer. 1137 + * @sec_attest_nonce: Nonce number used for attestation report. 1198 1138 * @pad: Padding to 64 bit. 1199 1139 */ 1200 1140 struct hl_info_args { ··· 1213 1143 __u32 period_ms; 1214 1144 __u32 pll_index; 1215 1145 __u32 eventfd; 1146 + __u32 user_buffer_actual_size; 1147 + __u32 sec_attest_nonce; 1216 1148 }; 1217 1149 1218 1150 __u32 pad; ··· 1409 1337 #define HL_CS_FLAGS_RESERVE_SIGNALS_ONLY 0x1000 1410 1338 #define HL_CS_FLAGS_UNRESERVE_SIGNALS_ONLY 0x2000 1411 1339 1340 + /* 1341 + * The engine cores CS is merged into the existing CS ioctls. 1342 + * Use it to control the engine cores mode. 1343 + */ 1344 + #define HL_CS_FLAGS_ENGINE_CORE_COMMAND 0x4000 1345 + 1412 1346 #define HL_CS_STATUS_SUCCESS 0 1413 1347 1414 1348 #define HL_MAX_JOBS_PER_CS 512 1415 1349 1350 + /* HL_ENGINE_CORE_ values 1351 + * 1352 + * HL_ENGINE_CORE_HALT: engine core halt 1353 + * HL_ENGINE_CORE_RUN: engine core run 1354 + */ 1355 + #define HL_ENGINE_CORE_HALT (1 << 0) 1356 + #define HL_ENGINE_CORE_RUN (1 << 1) 1357 + 1416 1358 struct hl_cs_in { 1417 1359 1418 - /* this holds address of array of hl_cs_chunk for restore phase */ 1419 - __u64 chunks_restore; 1360 + union { 1361 + struct { 1362 + /* this holds address of array of hl_cs_chunk for restore phase */ 1363 + __u64 chunks_restore; 1420 1364 1421 - /* holds address of array of hl_cs_chunk for execution phase */ 1422 - __u64 chunks_execute; 1365 + /* holds address of array of hl_cs_chunk for execution phase */ 1366 + __u64 chunks_execute; 1367 + }; 1368 + 1369 + /* Valid only when HL_CS_FLAGS_ENGINE_CORE_COMMAND is set */ 1370 + struct { 1371 + /* this holds address of array of uint32 for engine_cores */ 1372 + __u64 engine_cores; 1373 + 1374 + /* number of engine cores in engine_cores array */ 1375 + __u32 num_engine_cores; 1376 + 1377 + /* the core command to be sent towards engine cores */ 1378 + __u32 core_command; 1379 + }; 1380 + }; 1423 1381 1424 1382 union { 1425 1383 /* ··· 1514 1412 1515 1413 /* Valid only when HL_CS_FLAGS_RESERVE_SIGNALS_ONLY is set */ 1516 1414 struct { 1517 - /* This is the resereved signal handle id */ 1415 + /* This is the reserved signal handle id */ 1518 1416 __u32 handle_id; 1519 1417 1520 1418 /* This is the signals count */ ··· 1975 1873 /* Context ID - Currently not in use */ 1976 1874 __u32 ctx_id; 1977 1875 }; 1978 - 1979 - /* 1980 - * Notifier event values - for the notification mechanism and the HL_INFO_GET_EVENTS command 1981 - * 1982 - * HL_NOTIFIER_EVENT_TPC_ASSERT - Indicates TPC assert event 1983 - * HL_NOTIFIER_EVENT_UNDEFINED_OPCODE - Indicates undefined operation code 1984 - * HL_NOTIFIER_EVENT_DEVICE_RESET - Indicates device requires a reset 1985 - * HL_NOTIFIER_EVENT_CS_TIMEOUT - Indicates CS timeout error 1986 - * HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE - Indicates device is unavailable 1987 - */ 1988 - #define HL_NOTIFIER_EVENT_TPC_ASSERT (1ULL << 0) 1989 - #define HL_NOTIFIER_EVENT_UNDEFINED_OPCODE (1ULL << 1) 1990 - #define HL_NOTIFIER_EVENT_DEVICE_RESET (1ULL << 2) 1991 - #define HL_NOTIFIER_EVENT_CS_TIMEOUT (1ULL << 3) 1992 - #define HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE (1ULL << 4) 1993 1876 1994 1877 /* 1995 1878 * Various information operations such as: