Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'drm-habanalabs-next-2025-09-25' of https://github.com/HabanaAI/drivers.accel.habanalabs.kernel into drm-next

This tag contains habanalabs driver changes for v6.18.
It continues the previous upstream work from tags/drm-habanalabs-next-2024-06-23,
Including improvements in debug and visibility, alongside general code cleanups,
and new features such as vmalloc-backed coherent mmap, HLDIO infrastructure, etc.

Signed-off-by: Dave Airlie <airlied@redhat.com>
From: "Elbaz, Koby" <koby.elbaz@intel.com>
Link: https://lore.kernel.org/r/da02d370-9967-49d2-9eef-7aeaa40c987c@intel.com

+1442 -25
+23
drivers/accel/habanalabs/Kconfig
··· 27 27 28 28 To compile this driver as a module, choose M here: the 29 29 module will be called habanalabs. 30 + 31 + if DRM_ACCEL_HABANALABS 32 + 33 + config HL_HLDIO 34 + bool "Habanalabs NVMe Direct I/O (HLDIO)" 35 + depends on PCI_P2PDMA 36 + depends on BLOCK 37 + help 38 + Enable NVMe peer-to-peer direct I/O support for Habanalabs AI 39 + accelerators. 40 + 41 + This allows direct data transfers between NVMe storage devices 42 + and Habanalabs accelerators without involving system memory, 43 + using PCI peer-to-peer DMA capabilities. 44 + 45 + Requirements: 46 + - CONFIG_PCI_P2PDMA=y 47 + - NVMe device and Habanalabs accelerator under same PCI root complex 48 + - IOMMU disabled or in passthrough mode 49 + - Hardware supporting PCI P2P DMA 50 + 51 + If unsure, say N 52 + endif # DRM_ACCEL_HABANALABS
+5
drivers/accel/habanalabs/common/Makefile
··· 13 13 common/command_submission.o common/firmware_if.o \ 14 14 common/security.o common/state_dump.o \ 15 15 common/memory_mgr.o common/decoder.o 16 + 17 + # Conditionally add HLDIO support 18 + ifdef CONFIG_HL_HLDIO 19 + HL_COMMON_FILES += common/hldio.o 20 + endif
+324
drivers/accel/habanalabs/common/debugfs.c
··· 6 6 */ 7 7 8 8 #include "habanalabs.h" 9 + #include "hldio.h" 9 10 #include "../include/hw_ip/mmu/mmu_general.h" 10 11 11 12 #include <linux/pci.h> ··· 603 602 return 0; 604 603 } 605 604 605 + #ifdef CONFIG_HL_HLDIO 606 + /* DIO debugfs functions following the standard pattern */ 607 + static int dio_ssd2hl_show(struct seq_file *s, void *data) 608 + { 609 + struct hl_debugfs_entry *entry = s->private; 610 + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 611 + struct hl_device *hdev = dev_entry->hdev; 612 + 613 + if (!hdev->asic_prop.supports_nvme) { 614 + seq_puts(s, "NVMe Direct I/O not supported\\n"); 615 + return 0; 616 + } 617 + 618 + seq_puts(s, "Usage: echo \"fd=N va=0xADDR off=N len=N\" > dio_ssd2hl\n"); 619 + seq_printf(s, "Last transfer: %zu bytes\\n", dev_entry->dio_stats.last_len_read); 620 + seq_puts(s, "Note: All parameters must be page-aligned (4KB)\\n"); 621 + 622 + return 0; 623 + } 624 + 625 + static ssize_t dio_ssd2hl_write(struct file *file, const char __user *buf, 626 + size_t count, loff_t *f_pos) 627 + { 628 + struct seq_file *s = file->private_data; 629 + struct hl_debugfs_entry *entry = s->private; 630 + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 631 + struct hl_device *hdev = dev_entry->hdev; 632 + struct hl_ctx *ctx = hdev->kernel_ctx; 633 + char kbuf[128]; 634 + u64 device_va = 0, off_bytes = 0, len_bytes = 0; 635 + u32 fd = 0; 636 + size_t len_read = 0; 637 + int rc, parsed; 638 + 639 + if (!hdev->asic_prop.supports_nvme) 640 + return -EOPNOTSUPP; 641 + 642 + if (count >= sizeof(kbuf)) 643 + return -EINVAL; 644 + 645 + if (copy_from_user(kbuf, buf, count)) 646 + return -EFAULT; 647 + 648 + kbuf[count] = 0; 649 + 650 + /* Parse: fd=N va=0xADDR off=N len=N */ 651 + parsed = sscanf(kbuf, "fd=%u va=0x%llx off=%llu len=%llu", 652 + &fd, &device_va, &off_bytes, &len_bytes); 653 + if (parsed != 4) { 654 + dev_err(hdev->dev, "Invalid format. Expected: fd=N va=0xADDR off=N len=N\\n"); 655 + return -EINVAL; 656 + } 657 + 658 + /* Validate file descriptor */ 659 + if (fd == 0) { 660 + dev_err(hdev->dev, "Invalid file descriptor: %u\\n", fd); 661 + return -EINVAL; 662 + } 663 + 664 + /* Validate alignment requirements */ 665 + if (!IS_ALIGNED(device_va, PAGE_SIZE) || 666 + !IS_ALIGNED(off_bytes, PAGE_SIZE) || 667 + !IS_ALIGNED(len_bytes, PAGE_SIZE)) { 668 + dev_err(hdev->dev, 669 + "All parameters must be page-aligned (4KB)\\n"); 670 + return -EINVAL; 671 + } 672 + 673 + /* Validate transfer size */ 674 + if (len_bytes == 0 || len_bytes > SZ_1G) { 675 + dev_err(hdev->dev, "Invalid length: %llu (max 1GB)\\n", 676 + len_bytes); 677 + return -EINVAL; 678 + } 679 + 680 + dev_dbg(hdev->dev, "DIO SSD2HL: fd=%u va=0x%llx off=%llu len=%llu\\n", 681 + fd, device_va, off_bytes, len_bytes); 682 + 683 + rc = hl_dio_ssd2hl(hdev, ctx, fd, device_va, off_bytes, len_bytes, &len_read); 684 + if (rc < 0) { 685 + dev_entry->dio_stats.failed_ops++; 686 + dev_err(hdev->dev, "SSD2HL operation failed: %d\\n", rc); 687 + return rc; 688 + } 689 + 690 + /* Update statistics */ 691 + dev_entry->dio_stats.total_ops++; 692 + dev_entry->dio_stats.successful_ops++; 693 + dev_entry->dio_stats.bytes_transferred += len_read; 694 + dev_entry->dio_stats.last_len_read = len_read; 695 + 696 + dev_dbg(hdev->dev, "DIO SSD2HL completed: %zu bytes transferred\\n", len_read); 697 + 698 + return count; 699 + } 700 + 701 + static int dio_hl2ssd_show(struct seq_file *s, void *data) 702 + { 703 + seq_puts(s, "HL2SSD (device-to-SSD) transfers not implemented\\n"); 704 + return 0; 705 + } 706 + 707 + static ssize_t dio_hl2ssd_write(struct file *file, const char __user *buf, 708 + size_t count, loff_t *f_pos) 709 + { 710 + struct seq_file *s = file->private_data; 711 + struct hl_debugfs_entry *entry = s->private; 712 + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 713 + struct hl_device *hdev = dev_entry->hdev; 714 + 715 + if (!hdev->asic_prop.supports_nvme) 716 + return -EOPNOTSUPP; 717 + 718 + dev_dbg(hdev->dev, "HL2SSD operation not implemented\\n"); 719 + return -EOPNOTSUPP; 720 + } 721 + 722 + static int dio_stats_show(struct seq_file *s, void *data) 723 + { 724 + struct hl_debugfs_entry *entry = s->private; 725 + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 726 + struct hl_device *hdev = dev_entry->hdev; 727 + struct hl_dio_stats *stats = &dev_entry->dio_stats; 728 + u64 avg_bytes_per_op = 0, success_rate = 0; 729 + 730 + if (!hdev->asic_prop.supports_nvme) { 731 + seq_puts(s, "NVMe Direct I/O not supported\\n"); 732 + return 0; 733 + } 734 + 735 + if (stats->successful_ops > 0) 736 + avg_bytes_per_op = stats->bytes_transferred / stats->successful_ops; 737 + 738 + if (stats->total_ops > 0) 739 + success_rate = (stats->successful_ops * 100) / stats->total_ops; 740 + 741 + seq_puts(s, "=== Habanalabs Direct I/O Statistics ===\\n"); 742 + seq_printf(s, "Total operations: %llu\\n", stats->total_ops); 743 + seq_printf(s, "Successful ops: %llu\\n", stats->successful_ops); 744 + seq_printf(s, "Failed ops: %llu\\n", stats->failed_ops); 745 + seq_printf(s, "Success rate: %llu%%\\n", success_rate); 746 + seq_printf(s, "Total bytes: %llu\\n", stats->bytes_transferred); 747 + seq_printf(s, "Avg bytes per op: %llu\\n", avg_bytes_per_op); 748 + seq_printf(s, "Last transfer: %zu bytes\\n", stats->last_len_read); 749 + 750 + return 0; 751 + } 752 + 753 + static int dio_reset_show(struct seq_file *s, void *data) 754 + { 755 + seq_puts(s, "Write '1' to reset DIO statistics\\n"); 756 + return 0; 757 + } 758 + 759 + static ssize_t dio_reset_write(struct file *file, const char __user *buf, 760 + size_t count, loff_t *f_pos) 761 + { 762 + struct seq_file *s = file->private_data; 763 + struct hl_debugfs_entry *entry = s->private; 764 + struct hl_dbg_device_entry *dev_entry = entry->dev_entry; 765 + struct hl_device *hdev = dev_entry->hdev; 766 + char kbuf[8]; 767 + unsigned long val; 768 + int rc; 769 + 770 + if (!hdev->asic_prop.supports_nvme) 771 + return -EOPNOTSUPP; 772 + 773 + if (count >= sizeof(kbuf)) 774 + return -EINVAL; 775 + 776 + if (copy_from_user(kbuf, buf, count)) 777 + return -EFAULT; 778 + 779 + kbuf[count] = 0; 780 + 781 + rc = kstrtoul(kbuf, 0, &val); 782 + if (rc) 783 + return rc; 784 + 785 + if (val == 1) { 786 + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); 787 + dev_dbg(hdev->dev, "DIO statistics reset\\n"); 788 + } else { 789 + dev_err(hdev->dev, "Write '1' to reset statistics\\n"); 790 + return -EINVAL; 791 + } 792 + 793 + return count; 794 + } 795 + #endif 796 + 606 797 static ssize_t hl_memory_scrub(struct file *f, const char __user *buf, 607 798 size_t count, loff_t *ppos) 608 799 { ··· 981 788 } 982 789 } 983 790 791 + static void dump_cfg_access_entry(struct hl_device *hdev, 792 + struct hl_debugfs_cfg_access_entry *entry) 793 + { 794 + char *access_type = ""; 795 + struct tm tm; 796 + 797 + switch (entry->debugfs_type) { 798 + case DEBUGFS_READ32: 799 + access_type = "READ32 from"; 800 + break; 801 + case DEBUGFS_WRITE32: 802 + access_type = "WRITE32 to"; 803 + break; 804 + case DEBUGFS_READ64: 805 + access_type = "READ64 from"; 806 + break; 807 + case DEBUGFS_WRITE64: 808 + access_type = "WRITE64 to"; 809 + break; 810 + default: 811 + dev_err(hdev->dev, "Invalid DEBUGFS access type (%u)\n", entry->debugfs_type); 812 + return; 813 + } 814 + 815 + time64_to_tm(entry->seconds_since_epoch, 0, &tm); 816 + dev_info(hdev->dev, 817 + "%ld-%02d-%02d %02d:%02d:%02d (UTC): %s %#llx\n", tm.tm_year + 1900, tm.tm_mon + 1, 818 + tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, access_type, entry->addr); 819 + } 820 + 821 + void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) 822 + { 823 + struct hl_debugfs_cfg_access *dbgfs = &hdev->debugfs_cfg_accesses; 824 + u32 i, head, count = 0; 825 + time64_t entry_time, now; 826 + unsigned long flags; 827 + 828 + now = ktime_get_real_seconds(); 829 + 830 + spin_lock_irqsave(&dbgfs->lock, flags); 831 + head = dbgfs->head; 832 + if (head == 0) 833 + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; 834 + else 835 + i = head - 1; 836 + 837 + /* Walk back until timeout or invalid entry */ 838 + while (dbgfs->cfg_access_list[i].valid) { 839 + entry_time = dbgfs->cfg_access_list[i].seconds_since_epoch; 840 + /* Stop when entry is older than timeout */ 841 + if (now - entry_time > HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC) 842 + break; 843 + 844 + /* print single entry under lock */ 845 + { 846 + struct hl_debugfs_cfg_access_entry entry = dbgfs->cfg_access_list[i]; 847 + /* 848 + * We copy the entry out under lock and then print after 849 + * releasing the lock to minimize time under lock. 850 + */ 851 + spin_unlock_irqrestore(&dbgfs->lock, flags); 852 + dump_cfg_access_entry(hdev, &entry); 853 + spin_lock_irqsave(&dbgfs->lock, flags); 854 + } 855 + 856 + /* mark consumed */ 857 + dbgfs->cfg_access_list[i].valid = false; 858 + 859 + if (i == 0) 860 + i = HL_DBGFS_CFG_ACCESS_HIST_LEN - 1; 861 + else 862 + i--; 863 + count++; 864 + if (count >= HL_DBGFS_CFG_ACCESS_HIST_LEN) 865 + break; 866 + } 867 + spin_unlock_irqrestore(&dbgfs->lock, flags); 868 + } 869 + 870 + static void check_if_cfg_access_and_log(struct hl_device *hdev, u64 addr, size_t access_size, 871 + enum debugfs_access_type access_type) 872 + { 873 + struct hl_debugfs_cfg_access *dbgfs_cfg_accesses = &hdev->debugfs_cfg_accesses; 874 + struct pci_mem_region *mem_reg = &hdev->pci_mem_region[PCI_REGION_CFG]; 875 + struct hl_debugfs_cfg_access_entry *new_entry; 876 + unsigned long flags; 877 + 878 + /* Check if address is in config memory */ 879 + if (addr >= mem_reg->region_base && 880 + mem_reg->region_size >= access_size && 881 + addr <= mem_reg->region_base + mem_reg->region_size - access_size) { 882 + 883 + spin_lock_irqsave(&dbgfs_cfg_accesses->lock, flags); 884 + 885 + new_entry = &dbgfs_cfg_accesses->cfg_access_list[dbgfs_cfg_accesses->head]; 886 + new_entry->seconds_since_epoch = ktime_get_real_seconds(); 887 + new_entry->addr = addr; 888 + new_entry->debugfs_type = access_type; 889 + new_entry->valid = true; 890 + dbgfs_cfg_accesses->head = (dbgfs_cfg_accesses->head + 1) 891 + % HL_DBGFS_CFG_ACCESS_HIST_LEN; 892 + 893 + spin_unlock_irqrestore(&dbgfs_cfg_accesses->lock, flags); 894 + 895 + } 896 + } 897 + 984 898 static int hl_access_mem(struct hl_device *hdev, u64 addr, u64 *val, 985 899 enum debugfs_access_type acc_type) 986 900 { ··· 1105 805 return rc; 1106 806 } 1107 807 808 + check_if_cfg_access_and_log(hdev, addr, acc_size, acc_type); 1108 809 rc = hl_access_dev_mem_by_region(hdev, addr, val, acc_type, &found); 1109 810 if (rc) { 1110 811 dev_err(hdev->dev, ··· 1826 1525 {"mmu", mmu_show, mmu_asid_va_write}, 1827 1526 {"mmu_error", mmu_ack_error, mmu_ack_error_value_write}, 1828 1527 {"engines", engines_show, NULL}, 1528 + #ifdef CONFIG_HL_HLDIO 1529 + /* DIO entries - only created if NVMe is supported */ 1530 + {"dio_ssd2hl", dio_ssd2hl_show, dio_ssd2hl_write}, 1531 + {"dio_stats", dio_stats_show, NULL}, 1532 + {"dio_reset", dio_reset_show, dio_reset_write}, 1533 + {"dio_hl2ssd", dio_hl2ssd_show, dio_hl2ssd_write}, 1534 + #endif 1829 1535 }; 1830 1536 1831 1537 static int hl_debugfs_open(struct inode *inode, struct file *file) ··· 2031 1723 &hdev->asic_prop.server_type); 2032 1724 2033 1725 for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) { 1726 + /* Skip DIO entries if NVMe is not supported */ 1727 + if (strncmp(hl_debugfs_list[i].name, "dio_", 4) == 0 && 1728 + !hdev->asic_prop.supports_nvme) 1729 + continue; 1730 + 2034 1731 debugfs_create_file(hl_debugfs_list[i].name, 2035 1732 0644, 2036 1733 root, ··· 2075 1762 spin_lock_init(&dev_entry->userptr_spinlock); 2076 1763 mutex_init(&dev_entry->ctx_mem_hash_mutex); 2077 1764 1765 + spin_lock_init(&hdev->debugfs_cfg_accesses.lock); 1766 + hdev->debugfs_cfg_accesses.head = 0; /* already zero by alloc but explicit init is fine */ 1767 + 1768 + #ifdef CONFIG_HL_HLDIO 1769 + /* Initialize DIO statistics */ 1770 + memset(&dev_entry->dio_stats, 0, sizeof(dev_entry->dio_stats)); 1771 + #endif 1772 + 2078 1773 return 0; 2079 1774 } 2080 1775 ··· 2101 1780 vfree(entry->state_dump[i]); 2102 1781 2103 1782 kfree(entry->entry_arr); 1783 + 2104 1784 } 2105 1785 2106 1786 void hl_debugfs_add_device(struct hl_device *hdev) ··· 2114 1792 2115 1793 if (!hdev->asic_prop.fw_security_enabled) 2116 1794 add_secured_nodes(dev_entry, dev_entry->root); 1795 + 2117 1796 } 2118 1797 2119 1798 void hl_debugfs_add_file(struct hl_fpriv *hpriv) ··· 2247 1924 2248 1925 up_write(&dev_entry->state_dump_sem); 2249 1926 } 1927 +
+23
drivers/accel/habanalabs/common/device.c
··· 1630 1630 from_watchdog_thread = !!(flags & HL_DRV_RESET_FROM_WD_THR); 1631 1631 reset_upon_device_release = hdev->reset_upon_device_release && from_dev_release; 1632 1632 1633 + if (hdev->cpld_shutdown) { 1634 + dev_err(hdev->dev, "Cannot reset device, cpld is shutdown! Device is NOT usable\n"); 1635 + return -EIO; 1636 + } 1637 + 1633 1638 if (!hard_reset && (hl_device_status(hdev) == HL_DEVICE_STATUS_MALFUNCTION)) { 1634 1639 dev_dbg(hdev->dev, "soft-reset isn't supported on a malfunctioning device\n"); 1635 1640 return 0; ··· 2581 2576 if (rc) 2582 2577 dev_err(hdev->dev, "hw_fini failed in device fini while removing device %d\n", rc); 2583 2578 2579 + /* Reset the H/W (if it accessible). It will be in idle state after this returns */ 2580 + if (!hdev->cpld_shutdown) { 2581 + rc = hdev->asic_funcs->hw_fini(hdev, true, false); 2582 + if (rc) 2583 + dev_err(hdev->dev, 2584 + "hw_fini failed in device fini while removing device %d\n", rc); 2585 + } 2586 + 2584 2587 hdev->fw_loader.fw_comp_loaded = FW_TYPE_NONE; 2585 2588 2586 2589 /* Release kernel context */ ··· 2955 2942 } 2956 2943 2957 2944 mutex_unlock(&clk_throttle->lock); 2945 + } 2946 + 2947 + void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask) 2948 + { 2949 + hl_handle_critical_hw_err(hdev, event_id, event_mask); 2950 + *event_mask |= HL_NOTIFIER_EVENT_DEVICE_UNAVAILABLE; 2951 + 2952 + /* Avoid any new accesses to the H/W */ 2953 + hdev->disabled = true; 2954 + hdev->cpld_shutdown = true; 2958 2955 }
+53 -3
drivers/accel/habanalabs/common/habanalabs.h
··· 90 90 #define HL_COMMON_USER_CQ_INTERRUPT_ID 0xFFF 91 91 #define HL_COMMON_DEC_INTERRUPT_ID 0xFFE 92 92 93 - #define HL_STATE_DUMP_HIST_LEN 5 93 + #define HL_STATE_DUMP_HIST_LEN 5 94 + #define HL_DBGFS_CFG_ACCESS_HIST_LEN 20 95 + #define HL_DBGFS_CFG_ACCESS_HIST_TIMEOUT_SEC 2 /* 2s */ 94 96 95 97 /* Default value for device reset trigger , an invalid value */ 96 98 #define HL_RESET_TRIGGER_DEFAULT 0xFF ··· 704 702 * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. 705 703 * @supports_engine_modes: true if changing engines/engine_cores modes is supported. 706 704 * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw. 705 + * @supports_nvme: indicates whether the asic supports NVMe P2P DMA. 707 706 */ 708 707 struct asic_fixed_properties { 709 708 struct hw_queue_properties *hw_queues_props; ··· 825 822 u8 supports_advanced_cpucp_rc; 826 823 u8 supports_engine_modes; 827 824 u8 support_dynamic_resereved_fw_size; 825 + u8 supports_nvme; 828 826 }; 829 827 830 828 /** ··· 2278 2274 u8 init_done; 2279 2275 }; 2280 2276 2277 + #ifdef CONFIG_HL_HLDIO 2278 + #include "hldio.h" 2279 + #endif 2281 2280 2282 2281 /* 2283 2282 * DEBUG, PROFILING STRUCTURE ··· 2351 2344 struct mutex ctx_lock; 2352 2345 }; 2353 2346 2354 - 2355 2347 /* 2356 2348 * DebugFS 2357 2349 */ ··· 2377 2371 const struct hl_info_list *info_ent; 2378 2372 struct hl_dbg_device_entry *dev_entry; 2379 2373 }; 2374 + 2380 2375 2381 2376 /** 2382 2377 * struct hl_dbg_device_entry - ASIC specific debugfs manager. ··· 2410 2403 * @i2c_addr: generic u8 debugfs file for address value to use in i2c_data_read. 2411 2404 * @i2c_reg: generic u8 debugfs file for register value to use in i2c_data_read. 2412 2405 * @i2c_len: generic u8 debugfs file for length value to use in i2c_data_read. 2406 + * @dio_stats: Direct I/O statistics 2413 2407 */ 2414 2408 struct hl_dbg_device_entry { 2415 2409 struct dentry *root; ··· 2442 2434 u8 i2c_addr; 2443 2435 u8 i2c_reg; 2444 2436 u8 i2c_len; 2437 + #ifdef CONFIG_HL_HLDIO 2438 + struct hl_dio_stats dio_stats; 2439 + #endif 2440 + }; 2441 + 2442 + /** 2443 + * struct hl_debugfs_cfg_access_entry - single debugfs config access object, member of 2444 + * hl_debugfs_cfg_access. 2445 + * @seconds_since_epoch: seconds since January 1, 1970, used for time comparisons. 2446 + * @debugfs_type: the debugfs operation requested, can be READ32, WRITE32, READ64 or WRITE64. 2447 + * @addr: the requested address to access. 2448 + * @valid: if set, this entry has valid data for dumping at interrupt time. 2449 + */ 2450 + struct hl_debugfs_cfg_access_entry { 2451 + ktime_t seconds_since_epoch; 2452 + enum debugfs_access_type debugfs_type; 2453 + u64 addr; 2454 + bool valid; 2455 + }; 2456 + 2457 + /** 2458 + * struct hl_debugfs_cfg_access - saves debugfs config region access requests history. 2459 + * @cfg_access_list: list of objects describing config region access requests. 2460 + * @head: next valid index to add new entry to in cfg_access_list. 2461 + */ 2462 + struct hl_debugfs_cfg_access { 2463 + struct hl_debugfs_cfg_access_entry cfg_access_list[HL_DBGFS_CFG_ACCESS_HIST_LEN]; 2464 + u32 head; 2465 + spinlock_t lock; /* protects head and entries */ 2445 2466 }; 2446 2467 2447 2468 /** ··· 3318 3281 * @hl_chip_info: ASIC's sensors information. 3319 3282 * @device_status_description: device status description. 3320 3283 * @hl_debugfs: device's debugfs manager. 3284 + * @debugfs_cfg_accesses: list of last debugfs config region accesses. 3321 3285 * @cb_pool: list of pre allocated CBs. 3322 3286 * @cb_pool_lock: protects the CB pool. 3323 3287 * @internal_cb_pool_virt_addr: internal command buffer pool virtual address. ··· 3343 3305 * @captured_err_info: holds information about errors. 3344 3306 * @reset_info: holds current device reset information. 3345 3307 * @heartbeat_debug_info: counters used to debug heartbeat failures. 3308 + * @hldio: describes habanalabs direct storage interaction interface. 3346 3309 * @irq_affinity_mask: mask of available CPU cores for user and decoder interrupt handling. 3347 3310 * @stream_master_qid_arr: pointer to array with QIDs of master streams. 3348 3311 * @fw_inner_major_ver: the major of current loaded preboot inner version. ··· 3396 3357 * addresses. 3397 3358 * @is_in_dram_scrub: true if dram scrub operation is on going. 3398 3359 * @disabled: is device disabled. 3360 + * @cpld_shutdown: is cpld shutdown. 3399 3361 * @late_init_done: is late init stage was done during initialization. 3400 3362 * @hwmon_initialized: is H/W monitor sensors was initialized. 3401 3363 * @reset_on_lockup: true if a reset should be done in case of stuck CS, false ··· 3501 3461 struct hwmon_chip_info *hl_chip_info; 3502 3462 3503 3463 struct hl_dbg_device_entry hl_debugfs; 3464 + struct hl_debugfs_cfg_access debugfs_cfg_accesses; 3504 3465 3505 3466 struct list_head cb_pool; 3506 3467 spinlock_t cb_pool_lock; ··· 3537 3496 struct hl_reset_info reset_info; 3538 3497 3539 3498 struct eq_heartbeat_debug_info heartbeat_debug_info; 3540 - 3499 + #ifdef CONFIG_HL_HLDIO 3500 + struct hl_dio hldio; 3501 + #endif 3541 3502 cpumask_t irq_affinity_mask; 3542 3503 3543 3504 u32 *stream_master_qid_arr; ··· 3575 3532 u16 cpu_pci_msb_addr; 3576 3533 u8 is_in_dram_scrub; 3577 3534 u8 disabled; 3535 + u8 cpld_shutdown; 3578 3536 u8 late_init_done; 3579 3537 u8 hwmon_initialized; 3580 3538 u8 reset_on_lockup; ··· 4133 4089 void hl_set_irq_affinity(struct hl_device *hdev, int irq); 4134 4090 void hl_eq_heartbeat_event_handle(struct hl_device *hdev); 4135 4091 void hl_handle_clk_change_event(struct hl_device *hdev, u16 event_type, u64 *event_mask); 4092 + void hl_eq_cpld_shutdown_event_handle(struct hl_device *hdev, u16 event_id, u64 *event_mask); 4136 4093 4137 4094 #ifdef CONFIG_DEBUG_FS 4138 4095 ··· 4155 4110 void hl_debugfs_remove_ctx_mem_hash(struct hl_device *hdev, struct hl_ctx *ctx); 4156 4111 void hl_debugfs_set_state_dump(struct hl_device *hdev, char *data, 4157 4112 unsigned long length); 4113 + void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev); 4158 4114 4159 4115 #else 4160 4116 ··· 4228 4182 4229 4183 static inline void hl_debugfs_set_state_dump(struct hl_device *hdev, 4230 4184 char *data, unsigned long length) 4185 + { 4186 + } 4187 + 4188 + static inline void hl_debugfs_cfg_access_history_dump(struct hl_device *hdev) 4231 4189 { 4232 4190 } 4233 4191
+6
drivers/accel/habanalabs/common/habanalabs_ioctl.c
··· 961 961 case HL_PASSTHROUGH_VERSIONS: 962 962 need_input_buff = false; 963 963 break; 964 + case HL_GET_ERR_COUNTERS_CMD: 965 + need_input_buff = true; 966 + break; 967 + case HL_GET_P_STATE: 968 + need_input_buff = false; 969 + break; 964 970 default: 965 971 return -EINVAL; 966 972 }
+437
drivers/accel/habanalabs/common/hldio.c
··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + 3 + /* 4 + * Copyright 2024 HabanaLabs, Ltd. 5 + * All Rights Reserved. 6 + */ 7 + 8 + #include "habanalabs.h" 9 + #include "hldio.h" 10 + #include <generated/uapi/linux/version.h> 11 + #include <linux/pci-p2pdma.h> 12 + #include <linux/blkdev.h> 13 + #include <linux/vmalloc.h> 14 + 15 + /* 16 + * NVMe Direct I/O implementation for habanalabs driver 17 + * 18 + * ASSUMPTIONS 19 + * =========== 20 + * 1. No IOMMU (well, technically it can work with IOMMU, but it is *almost useless). 21 + * 2. Only READ operations (can extend in the future). 22 + * 3. No sparse files (can overcome this in the future). 23 + * 4. Kernel version >= 6.9 24 + * 5. Requiring page alignment is OK (I don't see a solution to this one right, 25 + * now, how do we read partial pages?) 26 + * 6. Kernel compiled with CONFIG_PCI_P2PDMA. This requires a CUSTOM kernel. 27 + * Theoretically I have a slight idea on how this could be solvable, but it 28 + * is probably inacceptable for the upstream. Also may not work in the end. 29 + * 7. Either make sure our cards and disks are under the same PCI bridge, or 30 + * compile a custom kernel to hack around this. 31 + */ 32 + 33 + #define IO_STABILIZE_TIMEOUT 10000000 /* 10 seconds in microseconds */ 34 + 35 + /* 36 + * This struct contains all the useful data I could milk out of the file handle 37 + * provided by the user. 38 + * @TODO: right now it is retrieved on each IO, but can be done once with some 39 + * dedicated IOCTL, call it for example HL_REGISTER_HANDLE. 40 + */ 41 + struct hl_dio_fd { 42 + /* Back pointer in case we need it in async completion */ 43 + struct hl_ctx *ctx; 44 + /* Associated fd struct */ 45 + struct file *filp; 46 + }; 47 + 48 + /* 49 + * This is a single IO descriptor 50 + */ 51 + struct hl_direct_io { 52 + struct hl_dio_fd f; 53 + struct kiocb kio; 54 + struct bio_vec *bv; 55 + struct iov_iter iter; 56 + u64 device_va; 57 + u64 off_bytes; 58 + u64 len_bytes; 59 + u32 type; 60 + }; 61 + 62 + bool hl_device_supports_nvme(struct hl_device *hdev) 63 + { 64 + return hdev->asic_prop.supports_nvme; 65 + } 66 + 67 + static int hl_dio_fd_register(struct hl_ctx *ctx, int fd, struct hl_dio_fd *f) 68 + { 69 + struct hl_device *hdev = ctx->hdev; 70 + struct block_device *bd; 71 + struct super_block *sb; 72 + struct inode *inode; 73 + struct gendisk *gd; 74 + struct device *disk_dev; 75 + int rc; 76 + 77 + f->filp = fget(fd); 78 + if (!f->filp) { 79 + rc = -ENOENT; 80 + goto out; 81 + } 82 + 83 + if (!(f->filp->f_flags & O_DIRECT)) { 84 + dev_err(hdev->dev, "file is not in the direct mode\n"); 85 + rc = -EINVAL; 86 + goto fput; 87 + } 88 + 89 + if (!f->filp->f_op->read_iter) { 90 + dev_err(hdev->dev, "read iter is not supported, need to fall back to legacy\n"); 91 + rc = -EINVAL; 92 + goto fput; 93 + } 94 + 95 + inode = file_inode(f->filp); 96 + sb = inode->i_sb; 97 + bd = sb->s_bdev; 98 + gd = bd->bd_disk; 99 + 100 + if (inode->i_blocks << sb->s_blocksize_bits < i_size_read(inode)) { 101 + dev_err(hdev->dev, "sparse files are not currently supported\n"); 102 + rc = -EINVAL; 103 + goto fput; 104 + } 105 + 106 + if (!bd || !gd) { 107 + dev_err(hdev->dev, "invalid block device\n"); 108 + rc = -ENODEV; 109 + goto fput; 110 + } 111 + /* Get the underlying device from the block device */ 112 + disk_dev = disk_to_dev(gd); 113 + if (!dma_pci_p2pdma_supported(disk_dev)) { 114 + dev_err(hdev->dev, "device does not support PCI P2P DMA\n"); 115 + rc = -EOPNOTSUPP; 116 + goto fput; 117 + } 118 + 119 + /* 120 + * @TODO: Maybe we need additional checks here 121 + */ 122 + 123 + f->ctx = ctx; 124 + rc = 0; 125 + 126 + goto out; 127 + fput: 128 + fput(f->filp); 129 + out: 130 + return rc; 131 + } 132 + 133 + static void hl_dio_fd_unregister(struct hl_dio_fd *f) 134 + { 135 + fput(f->filp); 136 + } 137 + 138 + static long hl_dio_count_io(struct hl_device *hdev) 139 + { 140 + s64 sum = 0; 141 + int i; 142 + 143 + for_each_possible_cpu(i) 144 + sum += per_cpu(*hdev->hldio.inflight_ios, i); 145 + 146 + return sum; 147 + } 148 + 149 + static bool hl_dio_get_iopath(struct hl_ctx *ctx) 150 + { 151 + struct hl_device *hdev = ctx->hdev; 152 + 153 + if (hdev->hldio.io_enabled) { 154 + this_cpu_inc(*hdev->hldio.inflight_ios); 155 + 156 + /* Avoid race conditions */ 157 + if (!hdev->hldio.io_enabled) { 158 + this_cpu_dec(*hdev->hldio.inflight_ios); 159 + return false; 160 + } 161 + 162 + hl_ctx_get(ctx); 163 + 164 + return true; 165 + } 166 + 167 + return false; 168 + } 169 + 170 + static void hl_dio_put_iopath(struct hl_ctx *ctx) 171 + { 172 + struct hl_device *hdev = ctx->hdev; 173 + 174 + hl_ctx_put(ctx); 175 + this_cpu_dec(*hdev->hldio.inflight_ios); 176 + } 177 + 178 + static void hl_dio_set_io_enabled(struct hl_device *hdev, bool enabled) 179 + { 180 + hdev->hldio.io_enabled = enabled; 181 + } 182 + 183 + static bool hl_dio_validate_io(struct hl_device *hdev, struct hl_direct_io *io) 184 + { 185 + if ((u64)io->device_va & ~PAGE_MASK) { 186 + dev_dbg(hdev->dev, "device address must be 4K aligned\n"); 187 + return false; 188 + } 189 + 190 + if (io->len_bytes & ~PAGE_MASK) { 191 + dev_dbg(hdev->dev, "IO length must be 4K aligned\n"); 192 + return false; 193 + } 194 + 195 + if (io->off_bytes & ~PAGE_MASK) { 196 + dev_dbg(hdev->dev, "IO offset must be 4K aligned\n"); 197 + return false; 198 + } 199 + 200 + return true; 201 + } 202 + 203 + static struct page *hl_dio_va2page(struct hl_device *hdev, struct hl_ctx *ctx, u64 device_va) 204 + { 205 + struct hl_dio *hldio = &hdev->hldio; 206 + u64 device_pa; 207 + int rc, i; 208 + 209 + rc = hl_mmu_va_to_pa(ctx, device_va, &device_pa); 210 + if (rc) { 211 + dev_err(hdev->dev, "device virtual address translation error: %#llx (%d)", 212 + device_va, rc); 213 + return NULL; 214 + } 215 + 216 + for (i = 0 ; i < hldio->np2prs ; ++i) { 217 + if (device_pa >= hldio->p2prs[i].device_pa && 218 + device_pa < hldio->p2prs[i].device_pa + hldio->p2prs[i].size) 219 + return hldio->p2prs[i].p2ppages[(device_pa - hldio->p2prs[i].device_pa) >> 220 + PAGE_SHIFT]; 221 + } 222 + 223 + return NULL; 224 + } 225 + 226 + static ssize_t hl_direct_io(struct hl_device *hdev, struct hl_direct_io *io) 227 + { 228 + u64 npages, device_va; 229 + ssize_t rc; 230 + int i; 231 + 232 + if (!hl_dio_validate_io(hdev, io)) 233 + return -EINVAL; 234 + 235 + if (!hl_dio_get_iopath(io->f.ctx)) { 236 + dev_info(hdev->dev, "can't schedule a new IO, IO is disabled\n"); 237 + return -ESHUTDOWN; 238 + } 239 + 240 + init_sync_kiocb(&io->kio, io->f.filp); 241 + io->kio.ki_pos = io->off_bytes; 242 + 243 + npages = (io->len_bytes >> PAGE_SHIFT); 244 + 245 + /* @TODO: this can be implemented smarter, vmalloc in iopath is not 246 + * ideal. Maybe some variation of genpool. Number of pages may differ 247 + * greatly, so maybe even use pools of different sizes and chose the 248 + * closest one. 249 + */ 250 + io->bv = vzalloc(npages * sizeof(struct bio_vec)); 251 + if (!io->bv) 252 + return -ENOMEM; 253 + 254 + for (i = 0, device_va = io->device_va; i < npages ; ++i, device_va += PAGE_SIZE) { 255 + io->bv[i].bv_page = hl_dio_va2page(hdev, io->f.ctx, device_va); 256 + if (!io->bv[i].bv_page) { 257 + dev_err(hdev->dev, "error getting page struct for device va %#llx", 258 + device_va); 259 + rc = -EFAULT; 260 + goto cleanup; 261 + } 262 + io->bv[i].bv_offset = 0; 263 + io->bv[i].bv_len = PAGE_SIZE; 264 + } 265 + 266 + iov_iter_bvec(&io->iter, io->type, io->bv, 1, io->len_bytes); 267 + if (io->f.filp->f_op && io->f.filp->f_op->read_iter) 268 + rc = io->f.filp->f_op->read_iter(&io->kio, &io->iter); 269 + else 270 + rc = -EINVAL; 271 + 272 + cleanup: 273 + vfree(io->bv); 274 + hl_dio_put_iopath(io->f.ctx); 275 + 276 + dev_dbg(hdev->dev, "IO ended with %ld\n", rc); 277 + 278 + return rc; 279 + } 280 + 281 + /* 282 + * @TODO: This function can be used as a callback for io completion under 283 + * kio->ki_complete in order to implement async IO. 284 + * Note that on more recent kernels there is no ret2. 285 + */ 286 + __maybe_unused static void hl_direct_io_complete(struct kiocb *kio, long ret, long ret2) 287 + { 288 + struct hl_direct_io *io = container_of(kio, struct hl_direct_io, kio); 289 + 290 + dev_dbg(io->f.ctx->hdev->dev, "IO completed with %ld\n", ret); 291 + 292 + /* Do something to copy result to user / notify completion */ 293 + 294 + hl_dio_put_iopath(io->f.ctx); 295 + 296 + hl_dio_fd_unregister(&io->f); 297 + } 298 + 299 + /* 300 + * DMA disk to ASIC, wait for results. Must be invoked from the user context 301 + */ 302 + int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, 303 + u64 device_va, off_t off_bytes, size_t len_bytes, 304 + size_t *len_read) 305 + { 306 + struct hl_direct_io *io; 307 + ssize_t rc; 308 + 309 + dev_dbg(hdev->dev, "SSD2HL fd=%d va=%#llx len=%#lx\n", fd, device_va, len_bytes); 310 + 311 + io = kzalloc(sizeof(*io), GFP_KERNEL); 312 + if (!io) { 313 + rc = -ENOMEM; 314 + goto out; 315 + } 316 + 317 + *io = (struct hl_direct_io){ 318 + .device_va = device_va, 319 + .len_bytes = len_bytes, 320 + .off_bytes = off_bytes, 321 + .type = READ, 322 + }; 323 + 324 + rc = hl_dio_fd_register(ctx, fd, &io->f); 325 + if (rc) 326 + goto kfree_io; 327 + 328 + rc = hl_direct_io(hdev, io); 329 + if (rc >= 0) { 330 + *len_read = rc; 331 + rc = 0; 332 + } 333 + 334 + /* This shall be called only in the case of a sync IO */ 335 + hl_dio_fd_unregister(&io->f); 336 + kfree_io: 337 + kfree(io); 338 + out: 339 + return rc; 340 + } 341 + 342 + static void hl_p2p_region_fini(struct hl_device *hdev, struct hl_p2p_region *p2pr) 343 + { 344 + if (p2pr->p2ppages) { 345 + vfree(p2pr->p2ppages); 346 + p2pr->p2ppages = NULL; 347 + } 348 + 349 + if (p2pr->p2pmem) { 350 + dev_dbg(hdev->dev, "freeing P2P mem from %p, size=%#llx\n", 351 + p2pr->p2pmem, p2pr->size); 352 + pci_free_p2pmem(hdev->pdev, p2pr->p2pmem, p2pr->size); 353 + p2pr->p2pmem = NULL; 354 + } 355 + } 356 + 357 + void hl_p2p_region_fini_all(struct hl_device *hdev) 358 + { 359 + int i; 360 + 361 + for (i = 0 ; i < hdev->hldio.np2prs ; ++i) 362 + hl_p2p_region_fini(hdev, &hdev->hldio.p2prs[i]); 363 + 364 + kvfree(hdev->hldio.p2prs); 365 + hdev->hldio.p2prs = NULL; 366 + hdev->hldio.np2prs = 0; 367 + } 368 + 369 + int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) 370 + { 371 + void *addr; 372 + int rc, i; 373 + 374 + /* Start by publishing our p2p memory */ 375 + rc = pci_p2pdma_add_resource(hdev->pdev, p2pr->bar, p2pr->size, p2pr->bar_offset); 376 + if (rc) { 377 + dev_err(hdev->dev, "error adding p2p resource: %d\n", rc); 378 + goto err; 379 + } 380 + 381 + /* Alloc all p2p mem */ 382 + p2pr->p2pmem = pci_alloc_p2pmem(hdev->pdev, p2pr->size); 383 + if (!p2pr->p2pmem) { 384 + dev_err(hdev->dev, "error allocating p2p memory\n"); 385 + rc = -ENOMEM; 386 + goto err; 387 + } 388 + 389 + p2pr->p2ppages = vmalloc((p2pr->size >> PAGE_SHIFT) * sizeof(struct page *)); 390 + if (!p2pr->p2ppages) { 391 + rc = -ENOMEM; 392 + goto err; 393 + } 394 + 395 + for (i = 0, addr = p2pr->p2pmem ; i < (p2pr->size >> PAGE_SHIFT) ; ++i, addr += PAGE_SIZE) { 396 + p2pr->p2ppages[i] = virt_to_page(addr); 397 + if (!p2pr->p2ppages[i]) { 398 + rc = -EFAULT; 399 + goto err; 400 + } 401 + } 402 + 403 + return 0; 404 + err: 405 + hl_p2p_region_fini(hdev, p2pr); 406 + return rc; 407 + } 408 + 409 + int hl_dio_start(struct hl_device *hdev) 410 + { 411 + dev_dbg(hdev->dev, "initializing HLDIO\n"); 412 + 413 + /* Initialize the IO counter and enable IO */ 414 + hdev->hldio.inflight_ios = alloc_percpu(s64); 415 + if (!hdev->hldio.inflight_ios) 416 + return -ENOMEM; 417 + 418 + hl_dio_set_io_enabled(hdev, true); 419 + 420 + return 0; 421 + } 422 + 423 + void hl_dio_stop(struct hl_device *hdev) 424 + { 425 + dev_dbg(hdev->dev, "deinitializing HLDIO\n"); 426 + 427 + if (hdev->hldio.io_enabled) { 428 + /* Wait for all the IO to finish */ 429 + hl_dio_set_io_enabled(hdev, false); 430 + hl_poll_timeout_condition(hdev, !hl_dio_count_io(hdev), 1000, IO_STABILIZE_TIMEOUT); 431 + } 432 + 433 + if (hdev->hldio.inflight_ios) { 434 + free_percpu(hdev->hldio.inflight_ios); 435 + hdev->hldio.inflight_ios = NULL; 436 + } 437 + }
+146
drivers/accel/habanalabs/common/hldio.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0-only */ 2 + /* 3 + * hldio.h - NVMe Direct I/O (HLDIO) infrastructure for Habana Labs Driver 4 + * 5 + * This feature requires specific hardware setup and must not be built 6 + * under COMPILE_TEST. 7 + */ 8 + 9 + #ifndef __HL_HLDIO_H__ 10 + #define __HL_HLDIO_H__ 11 + 12 + #include <linux/types.h> 13 + #include <linux/fs.h> 14 + #include <linux/seq_file.h> 15 + #include <linux/ktime.h> /* ktime functions */ 16 + #include <linux/delay.h> /* usleep_range */ 17 + #include <linux/kernel.h> /* might_sleep_if */ 18 + #include <linux/errno.h> /* error codes */ 19 + 20 + /* Forward declarations */ 21 + struct hl_device; 22 + struct file; 23 + 24 + /* Enable only if Kconfig selected */ 25 + #ifdef CONFIG_HL_HLDIO 26 + /** 27 + * struct hl_p2p_region - describes a single P2P memory region 28 + * @p2ppages: array of page structs for the P2P memory 29 + * @p2pmem: virtual address of the P2P memory region 30 + * @device_pa: physical address on the device 31 + * @bar_offset: offset within the BAR 32 + * @size: size of the region in bytes 33 + * @bar: BAR number containing this region 34 + */ 35 + struct hl_p2p_region { 36 + struct page **p2ppages; 37 + void *p2pmem; 38 + u64 device_pa; 39 + u64 bar_offset; 40 + u64 size; 41 + int bar; 42 + }; 43 + 44 + /** 45 + * struct hl_dio_stats - Direct I/O statistics 46 + * @total_ops: total number of operations attempted 47 + * @successful_ops: number of successful operations 48 + * @failed_ops: number of failed operations 49 + * @bytes_transferred: total bytes successfully transferred 50 + * @last_len_read: length of the last read operation 51 + */ 52 + struct hl_dio_stats { 53 + u64 total_ops; 54 + u64 successful_ops; 55 + u64 failed_ops; 56 + u64 bytes_transferred; 57 + size_t last_len_read; 58 + }; 59 + 60 + /** 61 + * struct hl_dio - describes habanalabs direct storage interaction interface 62 + * @p2prs: array of p2p regions 63 + * @inflight_ios: percpu counter for inflight ios 64 + * @np2prs: number of elements in p2prs 65 + * @io_enabled: 1 if io is enabled 0 otherwise 66 + */ 67 + struct hl_dio { 68 + struct hl_p2p_region *p2prs; 69 + s64 __percpu *inflight_ios; 70 + u8 np2prs; 71 + u8 io_enabled; 72 + }; 73 + 74 + int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, 75 + u64 device_va, off_t off_bytes, size_t len_bytes, 76 + size_t *len_read); 77 + void hl_p2p_region_fini_all(struct hl_device *hdev); 78 + int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr); 79 + int hl_dio_start(struct hl_device *hdev); 80 + void hl_dio_stop(struct hl_device *hdev); 81 + 82 + /* Init/teardown */ 83 + int hl_hldio_init(struct hl_device *hdev); 84 + void hl_hldio_fini(struct hl_device *hdev); 85 + 86 + /* File operations */ 87 + long hl_hldio_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); 88 + 89 + /* DebugFS hooks */ 90 + #ifdef CONFIG_DEBUG_FS 91 + void hl_hldio_debugfs_init(struct hl_device *hdev); 92 + void hl_hldio_debugfs_fini(struct hl_device *hdev); 93 + #else 94 + static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } 95 + static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } 96 + #endif 97 + 98 + #else /* !CONFIG_HL_HLDIO */ 99 + 100 + struct hl_p2p_region; 101 + /* Stubs when HLDIO is disabled */ 102 + static inline int hl_dio_ssd2hl(struct hl_device *hdev, struct hl_ctx *ctx, int fd, 103 + u64 device_va, off_t off_bytes, size_t len_bytes, 104 + size_t *len_read) 105 + { return -EOPNOTSUPP; } 106 + static inline void hl_p2p_region_fini_all(struct hl_device *hdev) {} 107 + static inline int hl_p2p_region_init(struct hl_device *hdev, struct hl_p2p_region *p2pr) 108 + { return -EOPNOTSUPP; } 109 + static inline int hl_dio_start(struct hl_device *hdev) { return -EOPNOTSUPP; } 110 + static inline void hl_dio_stop(struct hl_device *hdev) {} 111 + 112 + static inline int hl_hldio_init(struct hl_device *hdev) { return 0; } 113 + static inline void hl_hldio_fini(struct hl_device *hdev) { } 114 + static inline long hl_hldio_ioctl(struct file *f, unsigned int c, 115 + unsigned long a) 116 + { return -ENOTTY; } 117 + static inline void hl_hldio_debugfs_init(struct hl_device *hdev) { } 118 + static inline void hl_hldio_debugfs_fini(struct hl_device *hdev) { } 119 + 120 + #endif /* CONFIG_HL_HLDIO */ 121 + 122 + /* Simplified polling macro for HLDIO (no simulator support) */ 123 + #define hl_poll_timeout_condition(hdev, cond, sleep_us, timeout_us) \ 124 + ({ \ 125 + ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \ 126 + might_sleep_if(sleep_us); \ 127 + (void)(hdev); /* keep signature consistent, hdev unused */ \ 128 + for (;;) { \ 129 + mb(); /* ensure ordering of memory operations */ \ 130 + if (cond) \ 131 + break; \ 132 + if (timeout_us && ktime_compare(ktime_get(), __timeout) > 0) \ 133 + break; \ 134 + if (sleep_us) \ 135 + usleep_range((sleep_us >> 2) + 1, sleep_us); \ 136 + } \ 137 + (cond) ? 0 : -ETIMEDOUT; \ 138 + }) 139 + 140 + #ifdef CONFIG_HL_HLDIO 141 + bool hl_device_supports_nvme(struct hl_device *hdev); 142 + #else 143 + static inline bool hl_device_supports_nvme(struct hl_device *hdev) { return false; } 144 + #endif 145 + 146 + #endif /* __HL_HLDIO_H__ */
+7 -2
drivers/accel/habanalabs/common/memory.c
··· 1837 1837 atomic_dec(&ctx->hdev->dmabuf_export_cnt); 1838 1838 hl_ctx_put(ctx); 1839 1839 1840 - /* Paired with get_file() in export_dmabuf() */ 1840 + /* 1841 + * Paired with get_file() in export_dmabuf(). 1842 + * 'ctx' can be still used here to get the file pointer, even after hl_ctx_put() was called, 1843 + * because releasing the compute device file involves another reference decrement, and it 1844 + * would be possible only after calling fput(). 1845 + */ 1841 1846 fput(ctx->hpriv->file_priv->filp); 1842 1847 1843 1848 kfree(hl_dmabuf); ··· 2337 2332 if (rc < 0) 2338 2333 goto destroy_pages; 2339 2334 npages = rc; 2340 - rc = -EFAULT; 2335 + rc = -ENOMEM; 2341 2336 goto put_pages; 2342 2337 } 2343 2338 userptr->npages = npages;
-5
drivers/accel/habanalabs/common/memory_mgr.c
··· 259 259 goto put_mem; 260 260 } 261 261 262 - #ifdef _HAS_TYPE_ARG_IN_ACCESS_OK 263 - if (!access_ok(VERIFY_WRITE, (void __user *)(uintptr_t)vma->vm_start, 264 - user_mem_size)) { 265 - #else 266 262 if (!access_ok((void __user *)(uintptr_t)vma->vm_start, 267 263 user_mem_size)) { 268 - #endif 269 264 dev_err(mmg->dev, "%s: User pointer is invalid - 0x%lx\n", 270 265 buf->behavior->topic, vma->vm_start); 271 266
+9 -2
drivers/accel/habanalabs/common/sysfs.c
··· 96 96 infineon_second_stage_third_instance = 97 97 (infineon_second_stage_version >> 16) & mask; 98 98 99 - if (cpucp_info->infineon_second_stage_version) 99 + if (cpucp_info->infineon_version && cpucp_info->infineon_second_stage_version) 100 100 return sprintf(buf, "%#04x %#04x:%#04x:%#04x\n", 101 101 le32_to_cpu(cpucp_info->infineon_version), 102 102 infineon_second_stage_first_instance, 103 103 infineon_second_stage_second_instance, 104 104 infineon_second_stage_third_instance); 105 - else 105 + else if (cpucp_info->infineon_second_stage_version) 106 + return sprintf(buf, "%#04x:%#04x:%#04x\n", 107 + infineon_second_stage_first_instance, 108 + infineon_second_stage_second_instance, 109 + infineon_second_stage_third_instance); 110 + else if (cpucp_info->infineon_version) 106 111 return sprintf(buf, "%#04x\n", le32_to_cpu(cpucp_info->infineon_version)); 112 + 113 + return 0; 107 114 } 108 115 109 116 static DEVICE_ATTR_RO(vrm_ver);
+19
drivers/accel/habanalabs/gaudi/gaudi.c
··· 4168 4168 vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP | 4169 4169 VM_DONTCOPY | VM_NORESERVE); 4170 4170 4171 + #ifdef _HAS_DMA_MMAP_COHERENT 4172 + /* 4173 + * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP 4174 + * so vm_insert_page() can handle it safely. Without this, the kernel 4175 + * may BUG_ON due to VM_PFNMAP. 4176 + */ 4177 + if (is_vmalloc_addr(cpu_addr)) 4178 + vm_flags_set(vma, VM_MIXEDMAP); 4179 + 4171 4180 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, 4172 4181 (dma_addr - HOST_PHYS_BASE), size); 4173 4182 if (rc) 4174 4183 dev_err(hdev->dev, "dma_mmap_coherent error %d", rc); 4184 + #else 4185 + 4186 + rc = remap_pfn_range(vma, vma->vm_start, 4187 + virt_to_phys(cpu_addr) >> PAGE_SHIFT, 4188 + size, vma->vm_page_prot); 4189 + if (rc) 4190 + dev_err(hdev->dev, "remap_pfn_range error %d", rc); 4191 + 4192 + #endif 4193 + 4175 4194 4176 4195 return rc; 4177 4196 }
+375 -11
drivers/accel/habanalabs/gaudi2/gaudi2.c
··· 728 728 [DMA_CORE_ID_KDMA] = GAUDI2_EVENT_KDMA0_CORE, 729 729 }; 730 730 731 + const char *gaudi2_engine_id_str[] = { 732 + __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_0), 733 + __stringify(GAUDI2_DCORE0_ENGINE_ID_EDMA_1), 734 + __stringify(GAUDI2_DCORE0_ENGINE_ID_MME), 735 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_0), 736 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_1), 737 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_2), 738 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_3), 739 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_4), 740 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_5), 741 + __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_0), 742 + __stringify(GAUDI2_DCORE0_ENGINE_ID_DEC_1), 743 + __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_0), 744 + __stringify(GAUDI2_DCORE1_ENGINE_ID_EDMA_1), 745 + __stringify(GAUDI2_DCORE1_ENGINE_ID_MME), 746 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_0), 747 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_1), 748 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_2), 749 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_3), 750 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_4), 751 + __stringify(GAUDI2_DCORE1_ENGINE_ID_TPC_5), 752 + __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_0), 753 + __stringify(GAUDI2_DCORE1_ENGINE_ID_DEC_1), 754 + __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_0), 755 + __stringify(GAUDI2_DCORE2_ENGINE_ID_EDMA_1), 756 + __stringify(GAUDI2_DCORE2_ENGINE_ID_MME), 757 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_0), 758 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_1), 759 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_2), 760 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_3), 761 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_4), 762 + __stringify(GAUDI2_DCORE2_ENGINE_ID_TPC_5), 763 + __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_0), 764 + __stringify(GAUDI2_DCORE2_ENGINE_ID_DEC_1), 765 + __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_0), 766 + __stringify(GAUDI2_DCORE3_ENGINE_ID_EDMA_1), 767 + __stringify(GAUDI2_DCORE3_ENGINE_ID_MME), 768 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_0), 769 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_1), 770 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_2), 771 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_3), 772 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_4), 773 + __stringify(GAUDI2_DCORE3_ENGINE_ID_TPC_5), 774 + __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_0), 775 + __stringify(GAUDI2_DCORE3_ENGINE_ID_DEC_1), 776 + __stringify(GAUDI2_DCORE0_ENGINE_ID_TPC_6), 777 + __stringify(GAUDI2_ENGINE_ID_PDMA_0), 778 + __stringify(GAUDI2_ENGINE_ID_PDMA_1), 779 + __stringify(GAUDI2_ENGINE_ID_ROT_0), 780 + __stringify(GAUDI2_ENGINE_ID_ROT_1), 781 + __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_0), 782 + __stringify(GAUDI2_PCIE_ENGINE_ID_DEC_1), 783 + __stringify(GAUDI2_ENGINE_ID_NIC0_0), 784 + __stringify(GAUDI2_ENGINE_ID_NIC0_1), 785 + __stringify(GAUDI2_ENGINE_ID_NIC1_0), 786 + __stringify(GAUDI2_ENGINE_ID_NIC1_1), 787 + __stringify(GAUDI2_ENGINE_ID_NIC2_0), 788 + __stringify(GAUDI2_ENGINE_ID_NIC2_1), 789 + __stringify(GAUDI2_ENGINE_ID_NIC3_0), 790 + __stringify(GAUDI2_ENGINE_ID_NIC3_1), 791 + __stringify(GAUDI2_ENGINE_ID_NIC4_0), 792 + __stringify(GAUDI2_ENGINE_ID_NIC4_1), 793 + __stringify(GAUDI2_ENGINE_ID_NIC5_0), 794 + __stringify(GAUDI2_ENGINE_ID_NIC5_1), 795 + __stringify(GAUDI2_ENGINE_ID_NIC6_0), 796 + __stringify(GAUDI2_ENGINE_ID_NIC6_1), 797 + __stringify(GAUDI2_ENGINE_ID_NIC7_0), 798 + __stringify(GAUDI2_ENGINE_ID_NIC7_1), 799 + __stringify(GAUDI2_ENGINE_ID_NIC8_0), 800 + __stringify(GAUDI2_ENGINE_ID_NIC8_1), 801 + __stringify(GAUDI2_ENGINE_ID_NIC9_0), 802 + __stringify(GAUDI2_ENGINE_ID_NIC9_1), 803 + __stringify(GAUDI2_ENGINE_ID_NIC10_0), 804 + __stringify(GAUDI2_ENGINE_ID_NIC10_1), 805 + __stringify(GAUDI2_ENGINE_ID_NIC11_0), 806 + __stringify(GAUDI2_ENGINE_ID_NIC11_1), 807 + __stringify(GAUDI2_ENGINE_ID_PCIE), 808 + __stringify(GAUDI2_ENGINE_ID_PSOC), 809 + __stringify(GAUDI2_ENGINE_ID_ARC_FARM), 810 + __stringify(GAUDI2_ENGINE_ID_KDMA), 811 + __stringify(GAUDI2_ENGINE_ID_SIZE), 812 + }; 813 + 814 + const char *gaudi2_queue_id_str[] = { 815 + __stringify(GAUDI2_QUEUE_ID_PDMA_0_0), 816 + __stringify(GAUDI2_QUEUE_ID_PDMA_0_1), 817 + __stringify(GAUDI2_QUEUE_ID_PDMA_0_2), 818 + __stringify(GAUDI2_QUEUE_ID_PDMA_0_3), 819 + __stringify(GAUDI2_QUEUE_ID_PDMA_1_0), 820 + __stringify(GAUDI2_QUEUE_ID_PDMA_1_1), 821 + __stringify(GAUDI2_QUEUE_ID_PDMA_1_2), 822 + __stringify(GAUDI2_QUEUE_ID_PDMA_1_3), 823 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_0), 824 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_1), 825 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_2), 826 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_0_3), 827 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_0), 828 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_1), 829 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_2), 830 + __stringify(GAUDI2_QUEUE_ID_DCORE0_EDMA_1_3), 831 + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_0), 832 + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_1), 833 + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_2), 834 + __stringify(GAUDI2_QUEUE_ID_DCORE0_MME_0_3), 835 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_0), 836 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_1), 837 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_2), 838 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_0_3), 839 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_0), 840 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_1), 841 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_2), 842 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_1_3), 843 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_0), 844 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_1), 845 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_2), 846 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_2_3), 847 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_0), 848 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_1), 849 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_2), 850 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_3_3), 851 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_0), 852 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_1), 853 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_2), 854 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_4_3), 855 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_0), 856 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_1), 857 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_2), 858 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_5_3), 859 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_0), 860 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_1), 861 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_2), 862 + __stringify(GAUDI2_QUEUE_ID_DCORE0_TPC_6_3), 863 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_0), 864 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_1), 865 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_2), 866 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_0_3), 867 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_0), 868 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_1), 869 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_2), 870 + __stringify(GAUDI2_QUEUE_ID_DCORE1_EDMA_1_3), 871 + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_0), 872 + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_1), 873 + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_2), 874 + __stringify(GAUDI2_QUEUE_ID_DCORE1_MME_0_3), 875 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_0), 876 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_1), 877 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_2), 878 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_0_3), 879 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_0), 880 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_1), 881 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_2), 882 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_1_3), 883 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_0), 884 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_1), 885 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_2), 886 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_2_3), 887 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_0), 888 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_1), 889 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_2), 890 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_3_3), 891 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_0), 892 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_1), 893 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_2), 894 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_4_3), 895 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_0), 896 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_1), 897 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_2), 898 + __stringify(GAUDI2_QUEUE_ID_DCORE1_TPC_5_3), 899 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_0), 900 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_1), 901 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_2), 902 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_0_3), 903 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_0), 904 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_1), 905 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_2), 906 + __stringify(GAUDI2_QUEUE_ID_DCORE2_EDMA_1_3), 907 + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_0), 908 + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_1), 909 + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_2), 910 + __stringify(GAUDI2_QUEUE_ID_DCORE2_MME_0_3), 911 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_0), 912 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_1), 913 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_2), 914 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_0_3), 915 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_0), 916 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_1), 917 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_2), 918 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_1_3), 919 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_0), 920 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_1), 921 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_2), 922 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_2_3), 923 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_0), 924 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_1), 925 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_2), 926 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_3_3), 927 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_0), 928 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_1), 929 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_2), 930 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_4_3), 931 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_0), 932 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_1), 933 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_2), 934 + __stringify(GAUDI2_QUEUE_ID_DCORE2_TPC_5_3), 935 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_0), 936 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_1), 937 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_2), 938 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_0_3), 939 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_0), 940 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_1), 941 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_2), 942 + __stringify(GAUDI2_QUEUE_ID_DCORE3_EDMA_1_3), 943 + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_0), 944 + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_1), 945 + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_2), 946 + __stringify(GAUDI2_QUEUE_ID_DCORE3_MME_0_3), 947 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_0), 948 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_1), 949 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_2), 950 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_0_3), 951 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_0), 952 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_1), 953 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_2), 954 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_1_3), 955 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_0), 956 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_1), 957 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_2), 958 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_2_3), 959 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_0), 960 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_1), 961 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_2), 962 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_3_3), 963 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_0), 964 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_1), 965 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_2), 966 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_4_3), 967 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_0), 968 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_1), 969 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_2), 970 + __stringify(GAUDI2_QUEUE_ID_DCORE3_TPC_5_3), 971 + __stringify(GAUDI2_QUEUE_ID_NIC_0_0), 972 + __stringify(GAUDI2_QUEUE_ID_NIC_0_1), 973 + __stringify(GAUDI2_QUEUE_ID_NIC_0_2), 974 + __stringify(GAUDI2_QUEUE_ID_NIC_0_3), 975 + __stringify(GAUDI2_QUEUE_ID_NIC_1_0), 976 + __stringify(GAUDI2_QUEUE_ID_NIC_1_1), 977 + __stringify(GAUDI2_QUEUE_ID_NIC_1_2), 978 + __stringify(GAUDI2_QUEUE_ID_NIC_1_3), 979 + __stringify(GAUDI2_QUEUE_ID_NIC_2_0), 980 + __stringify(GAUDI2_QUEUE_ID_NIC_2_1), 981 + __stringify(GAUDI2_QUEUE_ID_NIC_2_2), 982 + __stringify(GAUDI2_QUEUE_ID_NIC_2_3), 983 + __stringify(GAUDI2_QUEUE_ID_NIC_3_0), 984 + __stringify(GAUDI2_QUEUE_ID_NIC_3_1), 985 + __stringify(GAUDI2_QUEUE_ID_NIC_3_2), 986 + __stringify(GAUDI2_QUEUE_ID_NIC_3_3), 987 + __stringify(GAUDI2_QUEUE_ID_NIC_4_0), 988 + __stringify(GAUDI2_QUEUE_ID_NIC_4_1), 989 + __stringify(GAUDI2_QUEUE_ID_NIC_4_2), 990 + __stringify(GAUDI2_QUEUE_ID_NIC_4_3), 991 + __stringify(GAUDI2_QUEUE_ID_NIC_5_0), 992 + __stringify(GAUDI2_QUEUE_ID_NIC_5_1), 993 + __stringify(GAUDI2_QUEUE_ID_NIC_5_2), 994 + __stringify(GAUDI2_QUEUE_ID_NIC_5_3), 995 + __stringify(GAUDI2_QUEUE_ID_NIC_6_0), 996 + __stringify(GAUDI2_QUEUE_ID_NIC_6_1), 997 + __stringify(GAUDI2_QUEUE_ID_NIC_6_2), 998 + __stringify(GAUDI2_QUEUE_ID_NIC_6_3), 999 + __stringify(GAUDI2_QUEUE_ID_NIC_7_0), 1000 + __stringify(GAUDI2_QUEUE_ID_NIC_7_1), 1001 + __stringify(GAUDI2_QUEUE_ID_NIC_7_2), 1002 + __stringify(GAUDI2_QUEUE_ID_NIC_7_3), 1003 + __stringify(GAUDI2_QUEUE_ID_NIC_8_0), 1004 + __stringify(GAUDI2_QUEUE_ID_NIC_8_1), 1005 + __stringify(GAUDI2_QUEUE_ID_NIC_8_2), 1006 + __stringify(GAUDI2_QUEUE_ID_NIC_8_3), 1007 + __stringify(GAUDI2_QUEUE_ID_NIC_9_0), 1008 + __stringify(GAUDI2_QUEUE_ID_NIC_9_1), 1009 + __stringify(GAUDI2_QUEUE_ID_NIC_9_2), 1010 + __stringify(GAUDI2_QUEUE_ID_NIC_9_3), 1011 + __stringify(GAUDI2_QUEUE_ID_NIC_10_0), 1012 + __stringify(GAUDI2_QUEUE_ID_NIC_10_1), 1013 + __stringify(GAUDI2_QUEUE_ID_NIC_10_2), 1014 + __stringify(GAUDI2_QUEUE_ID_NIC_10_3), 1015 + __stringify(GAUDI2_QUEUE_ID_NIC_11_0), 1016 + __stringify(GAUDI2_QUEUE_ID_NIC_11_1), 1017 + __stringify(GAUDI2_QUEUE_ID_NIC_11_2), 1018 + __stringify(GAUDI2_QUEUE_ID_NIC_11_3), 1019 + __stringify(GAUDI2_QUEUE_ID_NIC_12_0), 1020 + __stringify(GAUDI2_QUEUE_ID_NIC_12_1), 1021 + __stringify(GAUDI2_QUEUE_ID_NIC_12_2), 1022 + __stringify(GAUDI2_QUEUE_ID_NIC_12_3), 1023 + __stringify(GAUDI2_QUEUE_ID_NIC_13_0), 1024 + __stringify(GAUDI2_QUEUE_ID_NIC_13_1), 1025 + __stringify(GAUDI2_QUEUE_ID_NIC_13_2), 1026 + __stringify(GAUDI2_QUEUE_ID_NIC_13_3), 1027 + __stringify(GAUDI2_QUEUE_ID_NIC_14_0), 1028 + __stringify(GAUDI2_QUEUE_ID_NIC_14_1), 1029 + __stringify(GAUDI2_QUEUE_ID_NIC_14_2), 1030 + __stringify(GAUDI2_QUEUE_ID_NIC_14_3), 1031 + __stringify(GAUDI2_QUEUE_ID_NIC_15_0), 1032 + __stringify(GAUDI2_QUEUE_ID_NIC_15_1), 1033 + __stringify(GAUDI2_QUEUE_ID_NIC_15_2), 1034 + __stringify(GAUDI2_QUEUE_ID_NIC_15_3), 1035 + __stringify(GAUDI2_QUEUE_ID_NIC_16_0), 1036 + __stringify(GAUDI2_QUEUE_ID_NIC_16_1), 1037 + __stringify(GAUDI2_QUEUE_ID_NIC_16_2), 1038 + __stringify(GAUDI2_QUEUE_ID_NIC_16_3), 1039 + __stringify(GAUDI2_QUEUE_ID_NIC_17_0), 1040 + __stringify(GAUDI2_QUEUE_ID_NIC_17_1), 1041 + __stringify(GAUDI2_QUEUE_ID_NIC_17_2), 1042 + __stringify(GAUDI2_QUEUE_ID_NIC_17_3), 1043 + __stringify(GAUDI2_QUEUE_ID_NIC_18_0), 1044 + __stringify(GAUDI2_QUEUE_ID_NIC_18_1), 1045 + __stringify(GAUDI2_QUEUE_ID_NIC_18_2), 1046 + __stringify(GAUDI2_QUEUE_ID_NIC_18_3), 1047 + __stringify(GAUDI2_QUEUE_ID_NIC_19_0), 1048 + __stringify(GAUDI2_QUEUE_ID_NIC_19_1), 1049 + __stringify(GAUDI2_QUEUE_ID_NIC_19_2), 1050 + __stringify(GAUDI2_QUEUE_ID_NIC_19_3), 1051 + __stringify(GAUDI2_QUEUE_ID_NIC_20_0), 1052 + __stringify(GAUDI2_QUEUE_ID_NIC_20_1), 1053 + __stringify(GAUDI2_QUEUE_ID_NIC_20_2), 1054 + __stringify(GAUDI2_QUEUE_ID_NIC_20_3), 1055 + __stringify(GAUDI2_QUEUE_ID_NIC_21_0), 1056 + __stringify(GAUDI2_QUEUE_ID_NIC_21_1), 1057 + __stringify(GAUDI2_QUEUE_ID_NIC_21_2), 1058 + __stringify(GAUDI2_QUEUE_ID_NIC_21_3), 1059 + __stringify(GAUDI2_QUEUE_ID_NIC_22_0), 1060 + __stringify(GAUDI2_QUEUE_ID_NIC_22_1), 1061 + __stringify(GAUDI2_QUEUE_ID_NIC_22_2), 1062 + __stringify(GAUDI2_QUEUE_ID_NIC_22_3), 1063 + __stringify(GAUDI2_QUEUE_ID_NIC_23_0), 1064 + __stringify(GAUDI2_QUEUE_ID_NIC_23_1), 1065 + __stringify(GAUDI2_QUEUE_ID_NIC_23_2), 1066 + __stringify(GAUDI2_QUEUE_ID_NIC_23_3), 1067 + __stringify(GAUDI2_QUEUE_ID_ROT_0_0), 1068 + __stringify(GAUDI2_QUEUE_ID_ROT_0_1), 1069 + __stringify(GAUDI2_QUEUE_ID_ROT_0_2), 1070 + __stringify(GAUDI2_QUEUE_ID_ROT_0_3), 1071 + __stringify(GAUDI2_QUEUE_ID_ROT_1_0), 1072 + __stringify(GAUDI2_QUEUE_ID_ROT_1_1), 1073 + __stringify(GAUDI2_QUEUE_ID_ROT_1_2), 1074 + __stringify(GAUDI2_QUEUE_ID_ROT_1_3), 1075 + __stringify(GAUDI2_QUEUE_ID_CPU_PQ), 1076 + __stringify(GAUDI2_QUEUE_ID_SIZE), 1077 + }; 1078 + 731 1079 static const char * const gaudi2_qm_sei_error_cause[GAUDI2_NUM_OF_QM_SEI_ERR_CAUSE] = { 732 1080 "qman sei intr", 733 1081 "arc sei intr" ··· 3498 3150 rc = hl_fw_read_preboot_status(hdev); 3499 3151 if (rc) { 3500 3152 if (hdev->reset_on_preboot_fail) 3501 - /* we are already on failure flow, so don't check if hw_fini fails. */ 3502 3153 hdev->asic_funcs->hw_fini(hdev, true, false); 3503 3154 goto pci_fini; 3504 3155 } ··· 3507 3160 rc = hdev->asic_funcs->hw_fini(hdev, true, false); 3508 3161 if (rc) { 3509 3162 dev_err(hdev->dev, "failed to reset HW in dirty state (%d)\n", rc); 3163 + goto pci_fini; 3164 + } 3165 + 3166 + rc = hl_fw_read_preboot_status(hdev); 3167 + if (rc) { 3168 + if (hdev->reset_on_preboot_fail) 3169 + hdev->asic_funcs->hw_fini(hdev, true, false); 3510 3170 goto pci_fini; 3511 3171 } 3512 3172 } ··· 5190 4836 else 5191 4837 wait_timeout_ms = GAUDI2_RESET_WAIT_MSEC; 5192 4838 5193 - if (fw_reset) 4839 + if (fw_reset || hdev->cpld_shutdown) 5194 4840 goto skip_engines; 5195 4841 5196 4842 gaudi2_stop_dma_qmans(hdev); ··· 6838 6484 VM_DONTCOPY | VM_NORESERVE); 6839 6485 6840 6486 #ifdef _HAS_DMA_MMAP_COHERENT 6487 + /* 6488 + * If dma_alloc_coherent() returns a vmalloc address, set VM_MIXEDMAP 6489 + * so vm_insert_page() can handle it safely. Without this, the kernel 6490 + * may BUG_ON due to VM_PFNMAP. 6491 + */ 6492 + if (is_vmalloc_addr(cpu_addr)) 6493 + vm_flags_set(vma, VM_MIXEDMAP); 6841 6494 6842 6495 rc = dma_mmap_coherent(hdev->dev, vma, cpu_addr, dma_addr, size); 6843 6496 if (rc) ··· 7135 6774 struct gaudi2_device *gaudi2 = hdev->asic_specific; 7136 6775 7137 6776 if (!gaudi2_is_queue_enabled(hdev, parser->hw_queue_id)) { 7138 - dev_err(hdev->dev, "h/w queue %d is disabled\n", parser->hw_queue_id); 6777 + dev_err(hdev->dev, "h/w queue %s is disabled\n", 6778 + GAUDI2_QUEUE_ID_TO_STR(parser->hw_queue_id)); 7139 6779 return -EINVAL; 7140 6780 } 7141 6781 ··· 7388 7026 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, msg_info->dma_addr); 7389 7027 if (rc) 7390 7028 dev_err(hdev->dev, 7391 - "Failed to send msg_short packet to H/W queue %d\n", hw_queue_id); 7029 + "Failed to send msg_short packet to H/W queue %s\n", 7030 + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id)); 7392 7031 7393 7032 return rc; 7394 7033 } ··· 7415 7052 timeout_usec); 7416 7053 7417 7054 if (rc == -ETIMEDOUT) { 7418 - dev_err(hdev->dev, "H/W queue %d test failed (SOB_OBJ_0 == 0x%x)\n", 7419 - hw_queue_id, tmp); 7055 + dev_err(hdev->dev, "H/W queue %s test failed (SOB_OBJ_0 == 0x%x)\n", 7056 + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id), tmp); 7420 7057 rc = -EIO; 7421 7058 } 7422 7059 ··· 9966 9603 q = (struct hl_engine_arc_dccm_queue_full_irq *) &payload; 9967 9604 9968 9605 gaudi2_print_event(hdev, event_type, true, 9969 - "ARC DCCM Full event: EngId: %u, Intr_type: %u, Qidx: %u", 9970 - engine_id, intr_type, q->queue_index); 9606 + "ARC DCCM Full event: Eng: %s, Intr_type: %u, Qidx: %u", 9607 + GAUDI2_ENG_ID_TO_STR(engine_id), intr_type, q->queue_index); 9971 9608 return 1; 9972 9609 default: 9973 9610 gaudi2_print_event(hdev, event_type, true, "Unknown ARC event type"); ··· 10535 10172 dev_err(hdev->dev, "CPLD shutdown event, reset reason: 0x%llx\n", 10536 10173 le64_to_cpu(eq_entry->data[0])); 10537 10174 error_count = GAUDI2_NA_EVENT_CAUSE; 10538 - event_mask |= HL_NOTIFIER_EVENT_GENERAL_HW_ERR; 10175 + hl_eq_cpld_shutdown_event_handle(hdev, event_type, &event_mask); 10539 10176 break; 10540 10177 10541 10178 case GAUDI2_EVENT_CPU_PKT_SANITY_FAILED: ··· 10623 10260 if (event_mask & HL_NOTIFIER_EVENT_GENERAL_HW_ERR) 10624 10261 hl_handle_critical_hw_err(hdev, event_type, &event_mask); 10625 10262 10263 + hl_debugfs_cfg_access_history_dump(hdev); 10626 10264 event_mask |= HL_NOTIFIER_EVENT_DEVICE_RESET; 10627 10265 hl_device_cond_reset(hdev, reset_flags, event_mask); 10628 10266 } ··· 10660 10296 10661 10297 rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, pkt_size, phys_addr); 10662 10298 if (rc) 10663 - dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %d\n", 10664 - hw_queue_id); 10299 + dev_err(hdev->dev, "Failed to send lin_dma packet to H/W queue %s\n", 10300 + GAUDI2_QUEUE_ID_TO_STR(hw_queue_id)); 10665 10301 10666 10302 return rc; 10667 10303 }
+9
drivers/accel/habanalabs/gaudi2/gaudi2P.h
··· 240 240 #define GAUDI2_NUM_TESTED_QS (GAUDI2_QUEUE_ID_CPU_PQ - GAUDI2_QUEUE_ID_PDMA_0_0) 241 241 242 242 243 + extern const char *gaudi2_engine_id_str[]; 244 + extern const char *gaudi2_queue_id_str[]; 245 + 246 + #define GAUDI2_ENG_ID_TO_STR(initiator) ((initiator) >= GAUDI2_ENGINE_ID_SIZE ? "not found" : \ 247 + gaudi2_engine_id_str[initiator]) 248 + 249 + #define GAUDI2_QUEUE_ID_TO_STR(initiator) ((initiator) >= GAUDI2_QUEUE_ID_SIZE ? "not found" : \ 250 + gaudi2_queue_id_str[initiator]) 251 + 243 252 enum gaudi2_reserved_sob_id { 244 253 GAUDI2_RESERVED_SOB_CS_COMPLETION_FIRST, 245 254 GAUDI2_RESERVED_SOB_CS_COMPLETION_LAST =
+1 -1
drivers/accel/habanalabs/gaudi2/gaudi2_coresight.c
··· 2426 2426 WREG32(base_reg + mmBMON_ADDRH_E3_OFFSET, 0); 2427 2427 WREG32(base_reg + mmBMON_REDUCTION_OFFSET, 0); 2428 2428 WREG32(base_reg + mmBMON_STM_TRC_OFFSET, 0x7 | (0xA << 8)); 2429 - WREG32(base_reg + mmBMON_CR_OFFSET, 0x77 | 0xf << 24); 2429 + WREG32(base_reg + mmBMON_CR_OFFSET, 0x41); 2430 2430 } 2431 2431 2432 2432 return 0;
+4
include/linux/habanalabs/cpucp_if.h
··· 1425 1425 * from "pkt_subidx" field in struct cpucp_packet. 1426 1426 * 1427 1427 * HL_PASSTHROUGHT_VERSIONS - Fetch all firmware versions. 1428 + * HL_GET_ERR_COUNTERS_CMD - Command to get error counters 1429 + * HL_GET_P_STATE - get performance state 1428 1430 */ 1429 1431 enum hl_passthrough_type { 1430 1432 HL_PASSTHROUGH_VERSIONS, 1433 + HL_GET_ERR_COUNTERS_CMD, 1434 + HL_GET_P_STATE, 1431 1435 }; 1432 1436 1433 1437 #endif /* CPUCP_IF_H */
+1 -1
include/trace/events/habanalabs.h
··· 145 145 __entry->op_str = op_str; 146 146 ), 147 147 148 - TP_printk("%s: cms: %s", 148 + TP_printk("%s: cmd: %s", 149 149 __get_str(dname), 150 150 __entry->op_str) 151 151 );