Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu: add configurable grace period for unmap queues

The HWS schedule allows a grace period for wave completion prior to
preemption for better performance by avoiding CWSR on waves that can
potentially complete quickly. The debugger, on the other hand, will
want to inspect wave status immediately after it actively triggers
preemption (a suspend function to be provided).

To minimize latency between preemption and debugger wave inspection, allow
immediate preemption by setting the grace period to 0.

Note that setting the preepmtion grace period to 0 will result in an
infinite grace period being set due to a CP FW bug so set it to 1 for now.

Signed-off-by: Jonathan Kim <jonathan.kim@amd.com>
Reviewed-by: Felix Kuehling <felix.kuehling@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>

authored by

Jonathan Kim and committed by
Alex Deucher
7cee6a68 33f3437a

+295 -20
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_aldebaran.c
··· 82 82 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 83 83 .enable_debug_trap = kgd_aldebaran_enable_debug_trap, 84 84 .disable_debug_trap = kgd_aldebaran_disable_debug_trap, 85 + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 86 + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 85 87 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 86 88 };
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c
··· 410 410 kgd_gfx_v9_set_vm_context_page_table_base, 411 411 .enable_debug_trap = kgd_arcturus_enable_debug_trap, 412 412 .disable_debug_trap = kgd_arcturus_disable_debug_trap, 413 + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 414 + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 413 415 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 414 416 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings 415 417 };
+43
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
··· 803 803 return 0; 804 804 } 805 805 806 + /* kgd_gfx_v10_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values 807 + * The values read are: 808 + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. 809 + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. 810 + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. 811 + * gws_wait_time -- Wait Count for Global Wave Syncs. 812 + * que_sleep_wait_time -- Wait Count for Dequeue Retry. 813 + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. 814 + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. 815 + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 816 + */ 817 + void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, 818 + uint32_t *wait_times) 819 + 820 + { 821 + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 822 + } 823 + 824 + void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, 825 + uint32_t wait_times, 826 + uint32_t grace_period, 827 + uint32_t *reg_offset, 828 + uint32_t *reg_data) 829 + { 830 + *reg_data = wait_times; 831 + 832 + /* 833 + * The CP cannont handle a 0 grace period input and will result in 834 + * an infinite grace period being set so set to 1 to prevent this. 835 + */ 836 + if (grace_period == 0) 837 + grace_period = 1; 838 + 839 + *reg_data = REG_SET_FIELD(*reg_data, 840 + CP_IQ_WAIT_TIME2, 841 + SCH_WAVE, 842 + grace_period); 843 + 844 + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); 845 + } 846 + 806 847 static void program_trap_handler_settings(struct amdgpu_device *adev, 807 848 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, 808 849 uint32_t inst) ··· 889 848 .set_vm_context_page_table_base = set_vm_context_page_table_base, 890 849 .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, 891 850 .disable_debug_trap = kgd_gfx_v10_disable_debug_trap, 851 + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, 852 + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, 892 853 .program_trap_handler_settings = program_trap_handler_settings, 893 854 };
+6
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.h
··· 26 26 uint32_t kgd_gfx_v10_disable_debug_trap(struct amdgpu_device *adev, 27 27 bool keep_trap_enabled, 28 28 uint32_t vmid); 29 + void kgd_gfx_v10_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); 30 + void kgd_gfx_v10_build_grace_period_packet_info(struct amdgpu_device *adev, 31 + uint32_t wait_times, 32 + uint32_t grace_period, 33 + uint32_t *reg_offset, 34 + uint32_t *reg_data);
+2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10_3.c
··· 672 672 .get_atc_vmid_pasid_mapping_info = get_atc_vmid_pasid_mapping_info_v10_3, 673 673 .set_vm_context_page_table_base = set_vm_context_page_table_base_v10_3, 674 674 .program_trap_handler_settings = program_trap_handler_settings_v10_3, 675 + .get_iq_wait_times = kgd_gfx_v10_get_iq_wait_times, 676 + .build_grace_period_packet_info = kgd_gfx_v10_build_grace_period_packet_info, 675 677 .enable_debug_trap = kgd_gfx_v10_enable_debug_trap, 676 678 .disable_debug_trap = kgd_gfx_v10_disable_debug_trap 677 679 };
+43
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
··· 739 739 return 0; 740 740 } 741 741 742 + /* kgd_gfx_v9_get_iq_wait_times: Returns the mmCP_IQ_WAIT_TIME1/2 values 743 + * The values read are: 744 + * ib_offload_wait_time -- Wait Count for Indirect Buffer Offloads. 745 + * atomic_offload_wait_time -- Wait Count for L2 and GDS Atomics Offloads. 746 + * wrm_offload_wait_time -- Wait Count for WAIT_REG_MEM Offloads. 747 + * gws_wait_time -- Wait Count for Global Wave Syncs. 748 + * que_sleep_wait_time -- Wait Count for Dequeue Retry. 749 + * sch_wave_wait_time -- Wait Count for Scheduling Wave Message. 750 + * sem_rearm_wait_time -- Wait Count for Semaphore re-arm. 751 + * deq_retry_wait_time -- Wait Count for Global Wave Syncs. 752 + */ 753 + void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, 754 + uint32_t *wait_times) 755 + 756 + { 757 + *wait_times = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2)); 758 + } 759 + 742 760 void kgd_gfx_v9_set_vm_context_page_table_base(struct amdgpu_device *adev, 743 761 uint32_t vmid, uint64_t page_table_base) 744 762 { ··· 944 926 adev->gfx.cu_info.max_waves_per_simd; 945 927 } 946 928 929 + void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, 930 + uint32_t wait_times, 931 + uint32_t grace_period, 932 + uint32_t *reg_offset, 933 + uint32_t *reg_data) 934 + { 935 + *reg_data = wait_times; 936 + 937 + /* 938 + * The CP cannont handle a 0 grace period input and will result in 939 + * an infinite grace period being set so set to 1 to prevent this. 940 + */ 941 + if (grace_period == 0) 942 + grace_period = 1; 943 + 944 + *reg_data = REG_SET_FIELD(*reg_data, 945 + CP_IQ_WAIT_TIME2, 946 + SCH_WAVE, 947 + grace_period); 948 + 949 + *reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_IQ_WAIT_TIME2); 950 + } 951 + 947 952 void kgd_gfx_v9_program_trap_handler_settings(struct amdgpu_device *adev, 948 953 uint32_t vmid, uint64_t tba_addr, uint64_t tma_addr, uint32_t inst) 949 954 { ··· 1010 969 .set_vm_context_page_table_base = kgd_gfx_v9_set_vm_context_page_table_base, 1011 970 .enable_debug_trap = kgd_gfx_v9_enable_debug_trap, 1012 971 .disable_debug_trap = kgd_gfx_v9_disable_debug_trap, 972 + .get_iq_wait_times = kgd_gfx_v9_get_iq_wait_times, 973 + .build_grace_period_packet_info = kgd_gfx_v9_build_grace_period_packet_info, 1013 974 .get_cu_occupancy = kgd_gfx_v9_get_cu_occupancy, 1014 975 .program_trap_handler_settings = kgd_gfx_v9_program_trap_handler_settings, 1015 976 };
+6 -2
drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.h
··· 20 20 * OTHER DEALINGS IN THE SOFTWARE. 21 21 */ 22 22 23 - 24 - 25 23 void kgd_gfx_v9_program_sh_mem_settings(struct amdgpu_device *adev, uint32_t vmid, 26 24 uint32_t sh_mem_config, 27 25 uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit, ··· 71 73 uint32_t kgd_gfx_v9_disable_debug_trap(struct amdgpu_device *adev, 72 74 bool keep_trap_enabled, 73 75 uint32_t vmid); 76 + void kgd_gfx_v9_get_iq_wait_times(struct amdgpu_device *adev, uint32_t *wait_times); 77 + void kgd_gfx_v9_build_grace_period_packet_info(struct amdgpu_device *adev, 78 + uint32_t wait_times, 79 + uint32_t grace_period, 80 + uint32_t *reg_offset, 81 + uint32_t *reg_data);
+45 -18
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
··· 46 46 47 47 static int execute_queues_cpsch(struct device_queue_manager *dqm, 48 48 enum kfd_unmap_queues_filter filter, 49 - uint32_t filter_param); 49 + uint32_t filter_param, 50 + uint32_t grace_period); 50 51 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 51 52 enum kfd_unmap_queues_filter filter, 52 - uint32_t filter_param, bool reset); 53 + uint32_t filter_param, 54 + uint32_t grace_period, 55 + bool reset); 53 56 54 57 static int map_queues_cpsch(struct device_queue_manager *dqm); 55 58 ··· 869 866 if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) { 870 867 if (!dqm->dev->kfd->shared_resources.enable_mes) 871 868 retval = unmap_queues_cpsch(dqm, 872 - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false); 869 + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 873 870 else if (prev_active) 874 871 retval = remove_queue_mes(dqm, q, &pdd->qpd); 875 872 ··· 1045 1042 retval = execute_queues_cpsch(dqm, 1046 1043 qpd->is_debug ? 1047 1044 KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES : 1048 - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1045 + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1046 + USE_DEFAULT_GRACE_PERIOD); 1049 1047 1050 1048 out: 1051 1049 dqm_unlock(dqm); ··· 1186 1182 } 1187 1183 if (!dqm->dev->kfd->shared_resources.enable_mes) 1188 1184 retval = execute_queues_cpsch(dqm, 1189 - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1190 - 1185 + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1191 1186 eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp; 1192 1187 atomic64_add(eviction_duration, &pdd->evict_duration_counter); 1193 1188 vm_not_acquired: ··· 1528 1525 1529 1526 init_sdma_bitmaps(dqm); 1530 1527 1528 + if (dqm->dev->kfd2kgd->get_iq_wait_times) 1529 + dqm->dev->kfd2kgd->get_iq_wait_times(dqm->dev->adev, 1530 + &dqm->wait_times); 1531 1531 return 0; 1532 1532 } 1533 1533 ··· 1569 1563 dqm->is_hws_hang = false; 1570 1564 dqm->is_resetting = false; 1571 1565 dqm->sched_running = true; 1566 + 1572 1567 if (!dqm->dev->kfd->shared_resources.enable_mes) 1573 - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1568 + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1574 1569 dqm_unlock(dqm); 1575 1570 1576 1571 return 0; ··· 1596 1589 1597 1590 if (!dqm->is_hws_hang) { 1598 1591 if (!dqm->dev->kfd->shared_resources.enable_mes) 1599 - unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false); 1592 + unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD, false); 1600 1593 else 1601 1594 remove_all_queues_mes(dqm); 1602 1595 } ··· 1638 1631 list_add(&kq->list, &qpd->priv_queue_list); 1639 1632 increment_queue_count(dqm, qpd, kq->queue); 1640 1633 qpd->is_debug = true; 1641 - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1634 + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1635 + USE_DEFAULT_GRACE_PERIOD); 1642 1636 dqm_unlock(dqm); 1643 1637 1644 1638 return 0; ··· 1653 1645 list_del(&kq->list); 1654 1646 decrement_queue_count(dqm, qpd, kq->queue); 1655 1647 qpd->is_debug = false; 1656 - execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 1648 + execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, 1649 + USE_DEFAULT_GRACE_PERIOD); 1657 1650 /* 1658 1651 * Unconditionally decrement this counter, regardless of the queue's 1659 1652 * type. ··· 1731 1722 1732 1723 if (!dqm->dev->kfd->shared_resources.enable_mes) 1733 1724 retval = execute_queues_cpsch(dqm, 1734 - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1725 + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, USE_DEFAULT_GRACE_PERIOD); 1735 1726 else 1736 1727 retval = add_queue_mes(dqm, q, qpd); 1737 1728 if (retval) ··· 1820 1811 /* dqm->lock mutex has to be locked before calling this function */ 1821 1812 static int unmap_queues_cpsch(struct device_queue_manager *dqm, 1822 1813 enum kfd_unmap_queues_filter filter, 1823 - uint32_t filter_param, bool reset) 1814 + uint32_t filter_param, 1815 + uint32_t grace_period, 1816 + bool reset) 1824 1817 { 1825 1818 int retval = 0; 1826 1819 struct mqd_manager *mqd_mgr; ··· 1833 1822 return -EIO; 1834 1823 if (!dqm->active_runlist) 1835 1824 return retval; 1825 + 1826 + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1827 + retval = pm_update_grace_period(&dqm->packet_mgr, grace_period); 1828 + if (retval) 1829 + return retval; 1830 + } 1836 1831 1837 1832 retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset); 1838 1833 if (retval) ··· 1872 1855 return -ETIME; 1873 1856 } 1874 1857 1858 + /* We need to reset the grace period value for this device */ 1859 + if (grace_period != USE_DEFAULT_GRACE_PERIOD) { 1860 + if (pm_update_grace_period(&dqm->packet_mgr, 1861 + USE_DEFAULT_GRACE_PERIOD)) 1862 + pr_err("Failed to reset grace period\n"); 1863 + } 1864 + 1875 1865 pm_release_ib(&dqm->packet_mgr); 1876 1866 dqm->active_runlist = false; 1877 1867 ··· 1894 1870 dqm_lock(dqm); 1895 1871 1896 1872 retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID, 1897 - pasid, true); 1873 + pasid, USE_DEFAULT_GRACE_PERIOD, true); 1898 1874 1899 1875 dqm_unlock(dqm); 1900 1876 return retval; ··· 1903 1879 /* dqm->lock mutex has to be locked before calling this function */ 1904 1880 static int execute_queues_cpsch(struct device_queue_manager *dqm, 1905 1881 enum kfd_unmap_queues_filter filter, 1906 - uint32_t filter_param) 1882 + uint32_t filter_param, 1883 + uint32_t grace_period) 1907 1884 { 1908 1885 int retval; 1909 1886 1910 1887 if (dqm->is_hws_hang) 1911 1888 return -EIO; 1912 - retval = unmap_queues_cpsch(dqm, filter, filter_param, false); 1889 + retval = unmap_queues_cpsch(dqm, filter, filter_param, grace_period, false); 1913 1890 if (retval) 1914 1891 return retval; 1915 1892 ··· 1968 1943 if (!dqm->dev->kfd->shared_resources.enable_mes) { 1969 1944 decrement_queue_count(dqm, qpd, q); 1970 1945 retval = execute_queues_cpsch(dqm, 1971 - KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); 1946 + KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, 1947 + USE_DEFAULT_GRACE_PERIOD); 1972 1948 if (retval == -ETIME) 1973 1949 qpd->reset_wavefronts = true; 1974 1950 } else { ··· 2254 2228 } 2255 2229 2256 2230 if (!dqm->dev->kfd->shared_resources.enable_mes) 2257 - retval = execute_queues_cpsch(dqm, filter, 0); 2231 + retval = execute_queues_cpsch(dqm, filter, 0, USE_DEFAULT_GRACE_PERIOD); 2258 2232 2259 2233 if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) { 2260 2234 pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev); ··· 2615 2589 return r; 2616 2590 } 2617 2591 dqm->active_runlist = true; 2618 - r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0); 2592 + r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 2593 + 0, USE_DEFAULT_GRACE_PERIOD); 2619 2594 dqm_unlock(dqm); 2620 2595 2621 2596 return r;
+3
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
··· 37 37 38 38 #define KFD_MES_PROCESS_QUANTUM 100000 39 39 #define KFD_MES_GANG_QUANTUM 10000 40 + #define USE_DEFAULT_GRACE_PERIOD 0xffffffff 40 41 41 42 struct device_process_node { 42 43 struct qcm_process_device *qpd; ··· 260 259 261 260 /* used for GFX 9.4.3 only */ 262 261 uint32_t current_logical_xcc_start; 262 + 263 + uint32_t wait_times; 263 264 }; 264 265 265 266 void device_queue_manager_init_cik(
+32
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
··· 370 370 return retval; 371 371 } 372 372 373 + int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period) 374 + { 375 + int retval = 0; 376 + uint32_t *buffer, size; 377 + 378 + size = pm->pmf->set_grace_period_size; 379 + 380 + mutex_lock(&pm->lock); 381 + 382 + if (size) { 383 + kq_acquire_packet_buffer(pm->priv_queue, 384 + size / sizeof(uint32_t), 385 + (unsigned int **)&buffer); 386 + 387 + if (!buffer) { 388 + pr_err("Failed to allocate buffer on kernel queue\n"); 389 + retval = -ENOMEM; 390 + goto out; 391 + } 392 + 393 + retval = pm->pmf->set_grace_period(pm, buffer, grace_period); 394 + if (!retval) 395 + kq_submit_packet(pm->priv_queue); 396 + else 397 + kq_rollback_packet(pm->priv_queue); 398 + } 399 + 400 + out: 401 + mutex_unlock(&pm->lock); 402 + return retval; 403 + } 404 + 373 405 int pm_send_unmap_queue(struct packet_manager *pm, 374 406 enum kfd_unmap_queues_filter filter, 375 407 uint32_t filter_param, bool reset)
+39
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
··· 262 262 return 0; 263 263 } 264 264 265 + static int pm_set_grace_period_v9(struct packet_manager *pm, 266 + uint32_t *buffer, 267 + uint32_t grace_period) 268 + { 269 + struct pm4_mec_write_data_mmio *packet; 270 + uint32_t reg_offset = 0; 271 + uint32_t reg_data = 0; 272 + 273 + pm->dqm->dev->kfd2kgd->build_grace_period_packet_info( 274 + pm->dqm->dev->adev, 275 + pm->dqm->wait_times, 276 + grace_period, 277 + &reg_offset, 278 + &reg_data); 279 + 280 + if (grace_period == USE_DEFAULT_GRACE_PERIOD) 281 + reg_data = pm->dqm->wait_times; 282 + 283 + packet = (struct pm4_mec_write_data_mmio *)buffer; 284 + memset(buffer, 0, sizeof(struct pm4_mec_write_data_mmio)); 285 + 286 + packet->header.u32All = pm_build_pm4_header(IT_WRITE_DATA, 287 + sizeof(struct pm4_mec_write_data_mmio)); 288 + 289 + packet->bitfields2.dst_sel = dst_sel___write_data__mem_mapped_register; 290 + packet->bitfields2.addr_incr = 291 + addr_incr___write_data__do_not_increment_address; 292 + 293 + packet->bitfields3.dst_mmreg_addr = reg_offset; 294 + 295 + packet->data = reg_data; 296 + 297 + return 0; 298 + } 299 + 265 300 static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer, 266 301 enum kfd_unmap_queues_filter filter, 267 302 uint32_t filter_param, bool reset) ··· 380 345 .set_resources = pm_set_resources_v9, 381 346 .map_queues = pm_map_queues_v9, 382 347 .unmap_queues = pm_unmap_queues_v9, 348 + .set_grace_period = pm_set_grace_period_v9, 383 349 .query_status = pm_query_status_v9, 384 350 .release_mem = NULL, 385 351 .map_process_size = sizeof(struct pm4_mes_map_process), ··· 388 352 .set_resources_size = sizeof(struct pm4_mes_set_resources), 389 353 .map_queues_size = sizeof(struct pm4_mes_map_queues), 390 354 .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), 355 + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), 391 356 .query_status_size = sizeof(struct pm4_mes_query_status), 392 357 .release_mem_size = 0, 393 358 }; ··· 399 362 .set_resources = pm_set_resources_v9, 400 363 .map_queues = pm_map_queues_v9, 401 364 .unmap_queues = pm_unmap_queues_v9, 365 + .set_grace_period = pm_set_grace_period_v9, 402 366 .query_status = pm_query_status_v9, 403 367 .release_mem = NULL, 404 368 .map_process_size = sizeof(struct pm4_mes_map_process_aldebaran), ··· 407 369 .set_resources_size = sizeof(struct pm4_mes_set_resources), 408 370 .map_queues_size = sizeof(struct pm4_mes_map_queues), 409 371 .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), 372 + .set_grace_period_size = sizeof(struct pm4_mec_write_data_mmio), 410 373 .query_status_size = sizeof(struct pm4_mes_query_status), 411 374 .release_mem_size = 0, 412 375 };
+2
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
··· 303 303 .set_resources = pm_set_resources_vi, 304 304 .map_queues = pm_map_queues_vi, 305 305 .unmap_queues = pm_unmap_queues_vi, 306 + .set_grace_period = NULL, 306 307 .query_status = pm_query_status_vi, 307 308 .release_mem = pm_release_mem_vi, 308 309 .map_process_size = sizeof(struct pm4_mes_map_process), ··· 311 310 .set_resources_size = sizeof(struct pm4_mes_set_resources), 312 311 .map_queues_size = sizeof(struct pm4_mes_map_queues), 313 312 .unmap_queues_size = sizeof(struct pm4_mes_unmap_queues), 313 + .set_grace_period_size = 0, 314 314 .query_status_size = sizeof(struct pm4_mes_query_status), 315 315 .release_mem_size = sizeof(struct pm4_mec_release_mem) 316 316 };
+65
drivers/gpu/drm/amd/amdkfd/kfd_pm4_headers_ai.h
··· 584 584 585 585 #endif 586 586 587 + #ifndef PM4_MEC_WRITE_DATA_DEFINED 588 + #define PM4_MEC_WRITE_DATA_DEFINED 589 + 590 + enum WRITE_DATA_dst_sel_enum { 591 + dst_sel___write_data__mem_mapped_register = 0, 592 + dst_sel___write_data__tc_l2 = 2, 593 + dst_sel___write_data__gds = 3, 594 + dst_sel___write_data__memory = 5, 595 + dst_sel___write_data__memory_mapped_adc_persistent_state = 6, 596 + }; 597 + 598 + enum WRITE_DATA_addr_incr_enum { 599 + addr_incr___write_data__increment_address = 0, 600 + addr_incr___write_data__do_not_increment_address = 1 601 + }; 602 + 603 + enum WRITE_DATA_wr_confirm_enum { 604 + wr_confirm___write_data__do_not_wait_for_write_confirmation = 0, 605 + wr_confirm___write_data__wait_for_write_confirmation = 1 606 + }; 607 + 608 + enum WRITE_DATA_cache_policy_enum { 609 + cache_policy___write_data__lru = 0, 610 + cache_policy___write_data__stream = 1 611 + }; 612 + 613 + 614 + struct pm4_mec_write_data_mmio { 615 + union { 616 + union PM4_MES_TYPE_3_HEADER header; /*header */ 617 + unsigned int ordinal1; 618 + }; 619 + 620 + union { 621 + struct { 622 + unsigned int reserved1:8; 623 + unsigned int dst_sel:4; 624 + unsigned int reserved2:4; 625 + unsigned int addr_incr:1; 626 + unsigned int reserved3:2; 627 + unsigned int resume_vf:1; 628 + unsigned int wr_confirm:1; 629 + unsigned int reserved4:4; 630 + unsigned int cache_policy:2; 631 + unsigned int reserved5:5; 632 + } bitfields2; 633 + unsigned int ordinal2; 634 + }; 635 + 636 + union { 637 + struct { 638 + unsigned int dst_mmreg_addr:18; 639 + unsigned int reserved6:14; 640 + } bitfields3; 641 + unsigned int ordinal3; 642 + }; 643 + 644 + uint32_t reserved7; 645 + 646 + uint32_t data; 647 + 648 + }; 649 + 650 + #endif 651 + 587 652 enum { 588 653 CACHE_FLUSH_AND_INV_TS_EVENT = 0x00000014 589 654 };
+5
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
··· 1350 1350 int (*unmap_queues)(struct packet_manager *pm, uint32_t *buffer, 1351 1351 enum kfd_unmap_queues_filter mode, 1352 1352 uint32_t filter_param, bool reset); 1353 + int (*set_grace_period)(struct packet_manager *pm, uint32_t *buffer, 1354 + uint32_t grace_period); 1353 1355 int (*query_status)(struct packet_manager *pm, uint32_t *buffer, 1354 1356 uint64_t fence_address, uint64_t fence_value); 1355 1357 int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer); ··· 1362 1360 int set_resources_size; 1363 1361 int map_queues_size; 1364 1362 int unmap_queues_size; 1363 + int set_grace_period_size; 1365 1364 int query_status_size; 1366 1365 int release_mem_size; 1367 1366 }; ··· 1384 1381 uint32_t filter_param, bool reset); 1385 1382 1386 1383 void pm_release_ib(struct packet_manager *pm); 1384 + 1385 + int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period); 1387 1386 1388 1387 /* Following PM funcs can be shared among VI and AI */ 1389 1388 unsigned int pm_build_pm4_header(unsigned int opcode, size_t packet_size);