Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux into char-misc-next

Oded writes:

This tag contains the following changes for 5.12-rc1:

- Improve communication protocol with device CPU CP application.
The change prevents random (rare) out-of-sync errors.

- Notify F/W to start sending events only after initialization of
device is done. This fixes the issue where fatal events were received
but ignored.

- Fix integer handling (static analysis warning).

- Always fetch HBM ECC errors from F/W (if available).

- Minor fix in GAUDI-specific initialization code.

* tag 'misc-habanalabs-next-2021-02-08' of https://git.kernel.org/pub/scm/linux/kernel/git/ogabbay/linux:
habanalabs/gaudi: don't enable clock gating on DMA5
habanalabs: return block size + block ID
habanalabs: update security map after init CPU Qs
habanalabs: enable F/W events after init done
habanalabs/gaudi: use HBM_ECC_EN bit for ECC ERR
habanalabs: support fetching first available user CQ
habanalabs: improve communication protocol with cpucp
habanalabs: fix integer handling issue

+149 -40
+20 -5
drivers/misc/habanalabs/common/device.c
··· 1159 1159 atomic_set(&hdev->in_reset, 0); 1160 1160 hdev->needs_reset = false; 1161 1161 1162 - if (hard_reset) 1163 - hdev->hard_reset_cnt++; 1164 - else 1165 - hdev->soft_reset_cnt++; 1162 + dev_notice(hdev->dev, "Successfully finished resetting the device\n"); 1166 1163 1167 - dev_warn(hdev->dev, "Successfully finished resetting the device\n"); 1164 + if (hard_reset) { 1165 + hdev->hard_reset_cnt++; 1166 + 1167 + /* After reset is done, we are ready to receive events from 1168 + * the F/W. We can't do it before because we will ignore events 1169 + * and if those events are fatal, we won't know about it and 1170 + * the device will be operational although it shouldn't be 1171 + */ 1172 + hdev->asic_funcs->enable_events_from_fw(hdev); 1173 + } else { 1174 + hdev->soft_reset_cnt++; 1175 + } 1168 1176 1169 1177 return 0; 1170 1178 ··· 1422 1414 "Successfully added device to habanalabs driver\n"); 1423 1415 1424 1416 hdev->init_done = true; 1417 + 1418 + /* After initialization is done, we are ready to receive events from 1419 + * the F/W. We can't do it before because we will ignore events and if 1420 + * those events are fatal, we won't know about it and the device will 1421 + * be operational although it shouldn't be 1422 + */ 1423 + hdev->asic_funcs->enable_events_from_fw(hdev); 1425 1424 1426 1425 return 0; 1427 1426
+12 -2
drivers/misc/habanalabs/common/firmware_if.c
··· 90 90 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg, 91 91 u16 len, u32 timeout, u64 *result) 92 92 { 93 + struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id]; 93 94 struct cpucp_packet *pkt; 94 95 dma_addr_t pkt_dma_addr; 95 - u32 tmp; 96 + u32 tmp, expected_ack_val; 96 97 int rc = 0; 97 98 98 99 pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len, ··· 116 115 goto out; 117 116 } 118 117 118 + /* set fence to a non valid value */ 119 + pkt->fence = UINT_MAX; 120 + 119 121 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr); 120 122 if (rc) { 121 123 dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc); 122 124 goto out; 123 125 } 124 126 127 + if (hdev->asic_prop.fw_app_security_map & 128 + CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN) 129 + expected_ack_val = queue->pi; 130 + else 131 + expected_ack_val = CPUCP_PACKET_FENCE_VAL; 132 + 125 133 rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp, 126 - (tmp == CPUCP_PACKET_FENCE_VAL), 1000, 134 + (tmp == expected_ack_val), 1000, 127 135 timeout, true); 128 136 129 137 hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
+12 -3
drivers/misc/habanalabs/common/habanalabs.h
··· 411 411 * @first_available_user_mon: first monitor available for the user 412 412 * @first_available_user_msix_interrupt: first available msix interrupt 413 413 * reserved for the user 414 + * @first_available_cq: first available CQ for the user. 414 415 * @tpc_enabled_mask: which TPCs are enabled. 415 416 * @completion_queues_count: number of completion queues. 416 417 * @fw_security_disabled: true if security measures are disabled in firmware, ··· 474 473 u16 first_available_user_sob[HL_MAX_DCORES]; 475 474 u16 first_available_user_mon[HL_MAX_DCORES]; 476 475 u16 first_available_user_msix_interrupt; 476 + u16 first_available_cq[HL_MAX_DCORES]; 477 477 u8 tpc_enabled_mask; 478 478 u8 completion_queues_count; 479 479 u8 fw_security_disabled; ··· 857 855 * and place them in the relevant cs jobs 858 856 * @collective_wait_create_jobs: allocate collective wait cs jobs 859 857 * @scramble_addr: Routine to scramble the address prior of mapping it 860 - * in the MMU. 858 + * in the MMU. 861 859 * @descramble_addr: Routine to de-scramble the address prior of 862 - * showing it to users. 860 + * showing it to users. 863 861 * @ack_protection_bits_errors: ack and dump all security violations 864 862 * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. 863 + * also returns the size of the block if caller supplies 864 + * a valid pointer for it 865 865 * @hw_block_mmap: mmap a HW block with a given id. 866 + * @enable_events_from_fw: send interrupt to firmware to notify them the 867 + * driver is ready to receive asynchronous events. This 868 + * function should be called during the first init and 869 + * after every hard-reset of the device 866 870 */ 867 871 struct hl_asic_funcs { 868 872 int (*early_init)(struct hl_device *hdev); ··· 982 974 u64 (*descramble_addr)(struct hl_device *hdev, u64 addr); 983 975 void (*ack_protection_bits_errors)(struct hl_device *hdev); 984 976 int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr, 985 - u32 *block_id); 977 + u32 *block_size, u32 *block_id); 986 978 int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma, 987 979 u32 block_id, u32 block_size); 980 + void (*enable_events_from_fw)(struct hl_device *hdev); 988 981 }; 989 982 990 983
+2 -1
drivers/misc/habanalabs/common/habanalabs_ioctl.c
··· 397 397 prop->first_available_user_sob[args->dcore_id]; 398 398 sm_info.first_available_monitor = 399 399 prop->first_available_user_mon[args->dcore_id]; 400 - 400 + sm_info.first_available_cq = 401 + prop->first_available_cq[args->dcore_id]; 401 402 402 403 return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size, 403 404 sizeof(sm_info))) ? -EFAULT : 0;
+11 -8
drivers/misc/habanalabs/common/memory.c
··· 1289 1289 return rc; 1290 1290 } 1291 1291 1292 - static int map_block(struct hl_device *hdev, u64 address, u64 *handle) 1292 + static int map_block(struct hl_device *hdev, u64 address, u64 *handle, 1293 + u32 *size) 1293 1294 { 1294 1295 u32 block_id = 0; 1295 1296 int rc; 1296 1297 1297 - rc = hdev->asic_funcs->get_hw_block_id(hdev, address, &block_id); 1298 + rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id); 1298 1299 1299 1300 *handle = block_id | HL_MMAP_TYPE_BLOCK; 1300 1301 *handle <<= PAGE_SHIFT; ··· 1372 1371 struct hl_device *hdev = hpriv->hdev; 1373 1372 struct hl_ctx *ctx = hpriv->ctx; 1374 1373 u64 block_handle, device_addr = 0; 1375 - u32 handle = 0; 1374 + u32 handle = 0, block_size; 1376 1375 int rc; 1377 1376 1378 1377 switch (args->in.op) { ··· 1417 1416 1418 1417 case HL_MEM_OP_MAP_BLOCK: 1419 1418 rc = map_block(hdev, args->in.map_block.block_addr, 1420 - &block_handle); 1421 - args->out.handle = block_handle; 1419 + &block_handle, &block_size); 1420 + args->out.block_handle = block_handle; 1421 + args->out.block_size = block_size; 1422 1422 break; 1423 1423 1424 1424 default: ··· 1439 1437 struct hl_device *hdev = hpriv->hdev; 1440 1438 struct hl_ctx *ctx = hpriv->ctx; 1441 1439 u64 block_handle, device_addr = 0; 1442 - u32 handle = 0; 1440 + u32 handle = 0, block_size; 1443 1441 int rc; 1444 1442 1445 1443 if (!hl_device_operational(hdev, &status)) { ··· 1526 1524 1527 1525 case HL_MEM_OP_MAP_BLOCK: 1528 1526 rc = map_block(hdev, args->in.map_block.block_addr, 1529 - &block_handle); 1530 - args->out.handle = block_handle; 1527 + &block_handle, &block_size); 1528 + args->out.block_handle = block_handle; 1529 + args->out.block_size = block_size; 1531 1530 break; 1532 1531 1533 1532 default:
+1 -1
drivers/misc/habanalabs/common/mmu/mmu.c
··· 507 507 p = (char *)p + hop0_shift_off; 508 508 p = (char *)p + ((hops->used_hops - 1) * sizeof(u64)); 509 509 hop_shift = *(u64 *)p; 510 - offset_mask = (1 << hop_shift) - 1; 510 + offset_mask = (1ull << hop_shift) - 1; 511 511 addr_mask = ~(offset_mask); 512 512 *phys_addr = (tmp_phys_addr & addr_mask) | 513 513 (virt_addr & offset_mask);
+41 -8
drivers/misc/habanalabs/gaudi/gaudi.c
··· 529 529 530 530 prop->first_available_user_msix_interrupt = USHRT_MAX; 531 531 532 + for (i = 0 ; i < HL_MAX_DCORES ; i++) 533 + prop->first_available_cq[i] = USHRT_MAX; 534 + 532 535 /* disable fw security for now, set it in a later stage */ 533 536 prop->fw_security_disabled = true; 534 537 prop->fw_security_status_valid = false; ··· 1381 1378 dev_err(hdev->dev, "Failed to enable PCI access from CPU\n"); 1382 1379 return rc; 1383 1380 } 1384 - 1385 - WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); 1386 1381 1387 1382 rc = gaudi_fetch_psoc_frequency(hdev); 1388 1383 if (rc) { ··· 3460 3459 enable = !!(hdev->clock_gating_mask & 3461 3460 (BIT_ULL(gaudi_dma_assignment[i]))); 3462 3461 3462 + /* GC sends work to DMA engine through Upper CP in DMA5 so 3463 + * we need to not enable clock gating in that DMA 3464 + */ 3465 + if (i == GAUDI_HBM_DMA_4) 3466 + enable = 0; 3467 + 3463 3468 qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET; 3464 3469 WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset, 3465 3470 enable ? QMAN_CGM1_PWR_GATE_EN : 0); ··· 3732 3725 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout) 3733 3726 { 3734 3727 struct gaudi_device *gaudi = hdev->asic_specific; 3728 + struct asic_fixed_properties *prop = &hdev->asic_prop; 3735 3729 struct hl_eq *eq; 3736 3730 u32 status; 3737 3731 struct hl_hw_queue *cpu_pq = ··· 3788 3780 "Failed to communicate with Device CPU (CPU-CP timeout)\n"); 3789 3781 return -EIO; 3790 3782 } 3783 + 3784 + /* update FW application security bits */ 3785 + if (prop->fw_security_status_valid) 3786 + prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0); 3791 3787 3792 3788 gaudi->hw_cap_initialized |= HW_CAP_CPU_Q; 3793 3789 return 0; ··· 4450 4438 /* ring the doorbell */ 4451 4439 WREG32(db_reg_offset, db_value); 4452 4440 4453 - if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) 4441 + if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) { 4442 + /* make sure device CPU will read latest data from host */ 4443 + mb(); 4454 4444 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, 4455 4445 GAUDI_EVENT_PI_UPDATE); 4446 + } 4456 4447 } 4457 4448 4458 4449 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe, ··· 7113 7098 u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch; 7114 7099 int err = 0; 7115 7100 7116 - if (!hdev->asic_prop.fw_security_disabled) { 7101 + if (hdev->asic_prop.fw_security_status_valid && 7102 + (hdev->asic_prop.fw_app_security_map & 7103 + CPU_BOOT_DEV_STS0_HBM_ECC_EN)) { 7117 7104 if (!hbm_ecc_data) { 7118 7105 dev_err(hdev->dev, "No FW ECC data"); 7119 7106 return 0; ··· 7137 7120 le32_to_cpu(hbm_ecc_data->hbm_ecc_info)); 7138 7121 7139 7122 dev_err(hdev->dev, 7140 - "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7141 - device, ch, type, wr_par, rd_par, ca_par, serr, derr); 7123 + "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n", 7124 + device, ch, wr_par, rd_par, ca_par, serr, derr); 7125 + dev_err(hdev->dev, 7126 + "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n", 7127 + device, ch, hbm_ecc_data->first_addr, type, 7128 + hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt, 7129 + hbm_ecc_data->dec_cnt); 7142 7130 7143 7131 err = 1; 7144 7132 7133 + return 0; 7134 + } 7135 + 7136 + if (!hdev->asic_prop.fw_security_disabled) { 7137 + dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n"); 7145 7138 return 0; 7146 7139 } 7147 7140 ··· 8496 8469 } 8497 8470 8498 8471 static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 8499 - u32 *block_id) 8472 + u32 *block_size, u32 *block_id) 8500 8473 { 8501 8474 return -EPERM; 8502 8475 } ··· 8506 8479 u32 block_id, u32 block_size) 8507 8480 { 8508 8481 return -EPERM; 8482 + } 8483 + 8484 + static void gaudi_enable_events_from_fw(struct hl_device *hdev) 8485 + { 8486 + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER); 8509 8487 } 8510 8488 8511 8489 static const struct hl_asic_funcs gaudi_funcs = { ··· 8594 8562 .descramble_addr = hl_mmu_descramble_addr, 8595 8563 .ack_protection_bits_errors = gaudi_ack_protection_bits_errors, 8596 8564 .get_hw_block_id = gaudi_get_hw_block_id, 8597 - .hw_block_mmap = gaudi_block_mmap 8565 + .hw_block_mmap = gaudi_block_mmap, 8566 + .enable_events_from_fw = gaudi_enable_events_from_fw 8598 8567 }; 8599 8568 8600 8569 /**
+21 -6
drivers/misc/habanalabs/goya/goya.c
··· 457 457 458 458 prop->first_available_user_msix_interrupt = USHRT_MAX; 459 459 460 + for (i = 0 ; i < HL_MAX_DCORES ; i++) 461 + prop->first_available_cq[i] = USHRT_MAX; 462 + 460 463 /* disable fw security for now, set it in a later stage */ 461 464 prop->fw_security_disabled = true; 462 465 prop->fw_security_status_valid = false; ··· 796 793 "Failed to enable PCI access from CPU %d\n", rc); 797 794 return rc; 798 795 } 799 - 800 - WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, 801 - GOYA_ASYNC_EVENT_ID_INTS_REGISTER); 802 796 803 797 return 0; 804 798 } ··· 1188 1188 int goya_init_cpu_queues(struct hl_device *hdev) 1189 1189 { 1190 1190 struct goya_device *goya = hdev->asic_specific; 1191 + struct asic_fixed_properties *prop = &hdev->asic_prop; 1191 1192 struct hl_eq *eq; 1192 1193 u32 status; 1193 1194 struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ]; ··· 1240 1239 "Failed to setup communication with device CPU\n"); 1241 1240 return -EIO; 1242 1241 } 1242 + 1243 + /* update FW application security bits */ 1244 + if (prop->fw_security_status_valid) 1245 + prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0); 1243 1246 1244 1247 goya->hw_cap_initialized |= HW_CAP_CPU_Q; 1245 1248 return 0; ··· 2811 2806 /* ring the doorbell */ 2812 2807 WREG32(db_reg_offset, db_value); 2813 2808 2814 - if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) 2809 + if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) { 2810 + /* make sure device CPU will read latest data from host */ 2811 + mb(); 2815 2812 WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, 2816 2813 GOYA_ASYNC_EVENT_ID_PI_UPDATE); 2814 + } 2817 2815 } 2818 2816 2819 2817 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd) ··· 5390 5382 } 5391 5383 5392 5384 static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr, 5393 - u32 *block_id) 5385 + u32 *block_size, u32 *block_id) 5394 5386 { 5395 5387 return -EPERM; 5396 5388 } ··· 5399 5391 u32 block_id, u32 block_size) 5400 5392 { 5401 5393 return -EPERM; 5394 + } 5395 + 5396 + static void goya_enable_events_from_fw(struct hl_device *hdev) 5397 + { 5398 + WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, 5399 + GOYA_ASYNC_EVENT_ID_INTS_REGISTER); 5402 5400 } 5403 5401 5404 5402 static const struct hl_asic_funcs goya_funcs = { ··· 5488 5474 .descramble_addr = hl_mmu_descramble_addr, 5489 5475 .ack_protection_bits_errors = goya_ack_protection_bits_errors, 5490 5476 .get_hw_block_id = goya_get_hw_block_id, 5491 - .hw_block_mmap = goya_block_mmap 5477 + .hw_block_mmap = goya_block_mmap, 5478 + .enable_events_from_fw = goya_enable_events_from_fw 5492 5479 }; 5493 5480 5494 5481 /*
+5
drivers/misc/habanalabs/include/common/hl_boot_if.h
··· 166 166 * FW handles HBM ECC indications. 167 167 * Initialized in: linux 168 168 * 169 + * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN Packets ack value used in the armcpd 170 + * is set to the PI counter. 171 + * Initialized in: linux 172 + * 169 173 * CPU_BOOT_DEV_STS0_ENABLED Device status register enabled. 170 174 * This is a main indication that the 171 175 * running FW populates the device status ··· 194 190 #define CPU_BOOT_DEV_STS0_SP_SRAM_EN (1 << 12) 195 191 #define CPU_BOOT_DEV_STS0_CLK_GATE_EN (1 << 13) 196 192 #define CPU_BOOT_DEV_STS0_HBM_ECC_EN (1 << 14) 193 + #define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN (1 << 15) 197 194 #define CPU_BOOT_DEV_STS0_ENABLED (1 << 31) 198 195 199 196 enum cpu_boot_status {
+24 -6
include/uapi/misc/habanalabs.h
··· 414 414 * struct hl_info_sync_manager - sync manager information 415 415 * @first_available_sync_object: first available sob 416 416 * @first_available_monitor: first available monitor 417 + * @first_available_cq: first available cq 417 418 */ 418 419 struct hl_info_sync_manager { 419 420 __u32 first_available_sync_object; 420 421 __u32 first_available_monitor; 422 + __u32 first_available_cq; 423 + __u32 reserved; 421 424 }; 422 425 423 426 /** ··· 782 779 /* HL_MEM_OP_MAP_BLOCK - map a hw block */ 783 780 struct { 784 781 /* 785 - * HW block address to map, a handle will be returned 786 - * to the user and will be used to mmap the relevant 787 - * block. Only addresses from configuration space are 788 - * allowed. 782 + * HW block address to map, a handle and size will be 783 + * returned to the user and will be used to mmap the 784 + * relevant block. Only addresses from configuration 785 + * space are allowed. 789 786 */ 790 787 __u64 block_addr; 791 788 } map_block; ··· 816 813 __u64 device_virt_addr; 817 814 818 815 /* 819 - * Used for HL_MEM_OP_ALLOC and HL_MEM_OP_MAP_BLOCK. 816 + * Used in HL_MEM_OP_ALLOC 820 817 * This is the assigned handle for the allocated memory 821 - * or mapped block 822 818 */ 823 819 __u64 handle; 820 + 821 + struct { 822 + /* 823 + * Used in HL_MEM_OP_MAP_BLOCK. 824 + * This is the assigned handle for the mapped block 825 + */ 826 + __u64 block_handle; 827 + 828 + /* 829 + * Used in HL_MEM_OP_MAP_BLOCK 830 + * This is the size of the mapped block 831 + */ 832 + __u32 block_size; 833 + 834 + __u32 pad; 835 + }; 824 836 }; 825 837 }; 826 838