Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/amdgpu/mes: keep enforce isolation up to date

Re-send the mes message on resume to make sure the
mes state is up to date.

Fixes: 8521e3c5f058 ("drm/amd/amdgpu: limit single process inside MES")
Acked-by: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
Cc: Shaoyun Liu <shaoyun.liu@amd.com>
Cc: Srinivasan Shanmugam <srinivasan.shanmugam@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
(cherry picked from commit 27b791514789844e80da990c456c2465325e0851)

+32 -11
+4 -9
drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
··· 1638 1638 } 1639 1639 1640 1640 mutex_lock(&adev->enforce_isolation_mutex); 1641 - 1642 1641 for (i = 0; i < num_partitions; i++) { 1643 - if (adev->enforce_isolation[i] && !partition_values[i]) { 1642 + if (adev->enforce_isolation[i] && !partition_values[i]) 1644 1643 /* Going from enabled to disabled */ 1645 1644 amdgpu_vmid_free_reserved(adev, AMDGPU_GFXHUB(i)); 1646 - if (adev->enable_mes && adev->gfx.enable_cleaner_shader) 1647 - amdgpu_mes_set_enforce_isolation(adev, i, false); 1648 - } else if (!adev->enforce_isolation[i] && partition_values[i]) { 1645 + else if (!adev->enforce_isolation[i] && partition_values[i]) 1649 1646 /* Going from disabled to enabled */ 1650 1647 amdgpu_vmid_alloc_reserved(adev, AMDGPU_GFXHUB(i)); 1651 - if (adev->enable_mes && adev->gfx.enable_cleaner_shader) 1652 - amdgpu_mes_set_enforce_isolation(adev, i, true); 1653 - } 1654 1648 adev->enforce_isolation[i] = partition_values[i]; 1655 1649 } 1656 - 1657 1650 mutex_unlock(&adev->enforce_isolation_mutex); 1651 + 1652 + amdgpu_mes_update_enforce_isolation(adev); 1658 1653 1659 1654 return count; 1660 1655 }
+19 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
··· 1681 1681 } 1682 1682 1683 1683 /* Fix me -- node_id is used to identify the correct MES instances in the future */ 1684 - int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable) 1684 + static int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, 1685 + uint32_t node_id, bool enable) 1685 1686 { 1686 1687 struct mes_misc_op_input op_input = {0}; 1687 1688 int r; ··· 1701 1700 dev_err(adev->dev, "failed to change_config.\n"); 1702 1701 1703 1702 error: 1703 + return r; 1704 + } 1705 + 1706 + int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev) 1707 + { 1708 + int i, r = 0; 1709 + 1710 + if (adev->enable_mes && adev->gfx.enable_cleaner_shader) { 1711 + mutex_lock(&adev->enforce_isolation_mutex); 1712 + for (i = 0; i < (adev->xcp_mgr ? adev->xcp_mgr->num_xcps : 1); i++) { 1713 + if (adev->enforce_isolation[i]) 1714 + r |= amdgpu_mes_set_enforce_isolation(adev, i, true); 1715 + else 1716 + r |= amdgpu_mes_set_enforce_isolation(adev, i, false); 1717 + } 1718 + mutex_unlock(&adev->enforce_isolation_mutex); 1719 + } 1704 1720 return r; 1705 1721 } 1706 1722
+1 -1
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
··· 534 534 535 535 bool amdgpu_mes_suspend_resume_all_supported(struct amdgpu_device *adev); 536 536 537 - int amdgpu_mes_set_enforce_isolation(struct amdgpu_device *adev, uint32_t node_id, bool enable); 537 + int amdgpu_mes_update_enforce_isolation(struct amdgpu_device *adev); 538 538 539 539 #endif /* __AMDGPU_MES_H__ */
+4
drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
··· 1633 1633 goto failure; 1634 1634 } 1635 1635 1636 + r = amdgpu_mes_update_enforce_isolation(adev); 1637 + if (r) 1638 + goto failure; 1639 + 1636 1640 out: 1637 1641 /* 1638 1642 * Disable KIQ ring usage from the driver once MES is enabled.
+4
drivers/gpu/drm/amd/amdgpu/mes_v12_0.c
··· 1743 1743 goto failure; 1744 1744 } 1745 1745 1746 + r = amdgpu_mes_update_enforce_isolation(adev); 1747 + if (r) 1748 + goto failure; 1749 + 1746 1750 out: 1747 1751 /* 1748 1752 * Disable KIQ ring usage from the driver once MES is enabled.