Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/oa/uapi: Make OA buffer size configurable

Add a new property called DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE to
allow OA buffer size to be configurable from userspace.

With this OA buffer size can be configured to any power of 2
size between 128KB and 128MB and it would default to 16MB in case
the size is not supplied.

v2:
- Rebase
v3:
- Add oa buffer size to capabilities [Ashutosh]
- Address several nitpicks [Ashutosh]
- Fix commit message/subject [Ashutosh]

BSpec: 61100, 61228
Signed-off-by: Sai Teja Pottumuttu <sai.teja.pottumuttu@intel.com>
Reviewed-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20241205041913.883767-2-sai.teja.pottumuttu@intel.com

authored by

Sai Teja Pottumuttu and committed by
Ashutosh Dixit
720f63a8 cb57c750

+56 -22
+1 -8
drivers/gpu/drm/xe/regs/xe_oa_regs.h
··· 41 41 42 42 #define OAG_OABUFFER XE_REG(0xdb08) 43 43 #define OABUFFER_SIZE_MASK REG_GENMASK(5, 3) 44 - #define OABUFFER_SIZE_128K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 0) 45 - #define OABUFFER_SIZE_256K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 1) 46 - #define OABUFFER_SIZE_512K REG_FIELD_PREP(OABUFFER_SIZE_MASK, 2) 47 - #define OABUFFER_SIZE_1M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 3) 48 - #define OABUFFER_SIZE_2M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 4) 49 - #define OABUFFER_SIZE_4M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 5) 50 - #define OABUFFER_SIZE_8M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 6) 51 - #define OABUFFER_SIZE_16M REG_FIELD_PREP(OABUFFER_SIZE_MASK, 7) 52 44 #define OAG_OABUFFER_MEMORY_SELECT REG_BIT(0) /* 0: PPGTT, 1: GGTT */ 53 45 54 46 #define OAG_OACONTROL XE_REG(0xdaf4) ··· 55 63 #define OAG_OA_DEBUG XE_REG(0xdaf8, XE_REG_OPTION_MASKED) 56 64 #define OAG_OA_DEBUG_DISABLE_MMIO_TRG REG_BIT(14) 57 65 #define OAG_OA_DEBUG_START_TRIGGER_SCOPE_CONTROL REG_BIT(13) 66 + #define OAG_OA_DEBUG_BUF_SIZE_SELECT REG_BIT(12) 58 67 #define OAG_OA_DEBUG_DISABLE_START_TRG_2_COUNT_QUAL REG_BIT(8) 59 68 #define OAG_OA_DEBUG_DISABLE_START_TRG_1_COUNT_QUAL REG_BIT(7) 60 69 #define OAG_OA_DEBUG_INCLUDE_CLK_RATIO REG_BIT(6)
+43 -12
drivers/gpu/drm/xe/xe_oa.c
··· 96 96 struct drm_xe_sync __user *syncs_user; 97 97 int num_syncs; 98 98 struct xe_sync_entry *syncs; 99 + size_t oa_buffer_size; 99 100 }; 100 101 101 102 struct xe_oa_config_bo { ··· 404 403 405 404 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 406 405 { 407 - struct xe_mmio *mmio = &stream->gt->mmio; 408 406 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 409 - u32 oa_buf = gtt_offset | OABUFFER_SIZE_16M | OAG_OABUFFER_MEMORY_SELECT; 407 + int size_exponent = __ffs(stream->oa_buffer.bo->size); 408 + u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT; 409 + struct xe_mmio *mmio = &stream->gt->mmio; 410 410 unsigned long flags; 411 + 412 + /* 413 + * If oa buffer size is more than 16MB (exponent greater than 24), the 414 + * oa buffer size field is multiplied by 8 in xe_oa_enable_metric_set. 415 + */ 416 + oa_buf |= REG_FIELD_PREP(OABUFFER_SIZE_MASK, 417 + size_exponent > 24 ? size_exponent - 20 : size_exponent - 17); 411 418 412 419 spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 413 420 ··· 910 901 xe_file_put(stream->xef); 911 902 } 912 903 913 - static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream) 904 + static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size) 914 905 { 915 906 struct xe_bo *bo; 916 907 917 - BUILD_BUG_ON_NOT_POWER_OF_2(XE_OA_BUFFER_SIZE); 918 - BUILD_BUG_ON(XE_OA_BUFFER_SIZE < SZ_128K || XE_OA_BUFFER_SIZE > SZ_16M); 919 - 920 908 bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL, 921 - XE_OA_BUFFER_SIZE, ttm_bo_type_kernel, 909 + size, ttm_bo_type_kernel, 922 910 XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT); 923 911 if (IS_ERR(bo)) 924 912 return PTR_ERR(bo); ··· 1093 1087 0 : OAG_OA_DEBUG_DISABLE_CTX_SWITCH_REPORTS); 1094 1088 } 1095 1089 1090 + static u32 oag_buf_size_select(const struct xe_oa_stream *stream) 1091 + { 1092 + return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT, 1093 + stream->oa_buffer.bo->size > SZ_16M ? 1094 + OAG_OA_DEBUG_BUF_SIZE_SELECT : 0); 1095 + } 1096 + 1096 1097 static int xe_oa_enable_metric_set(struct xe_oa_stream *stream) 1097 1098 { 1098 1099 struct xe_mmio *mmio = &stream->gt->mmio; ··· 1132 1119 xe_mmio_write32(mmio, __oa_regs(stream)->oa_debug, 1133 1120 _MASKED_BIT_ENABLE(oa_debug) | 1134 1121 oag_report_ctx_switches(stream) | 1122 + oag_buf_size_select(stream) | 1135 1123 oag_configure_mmio_trigger(stream, true)); 1136 1124 1137 1125 xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ? ··· 1274 1260 return 0; 1275 1261 } 1276 1262 1263 + static int xe_oa_set_prop_oa_buffer_size(struct xe_oa *oa, u64 value, 1264 + struct xe_oa_open_param *param) 1265 + { 1266 + if (!is_power_of_2(value) || value < SZ_128K || value > SZ_128M) { 1267 + drm_dbg(&oa->xe->drm, "OA buffer size invalid %llu\n", value); 1268 + return -EINVAL; 1269 + } 1270 + param->oa_buffer_size = value; 1271 + return 0; 1272 + } 1273 + 1277 1274 static int xe_oa_set_prop_ret_inval(struct xe_oa *oa, u64 value, 1278 1275 struct xe_oa_open_param *param) 1279 1276 { ··· 1305 1280 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_no_preempt, 1306 1281 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1307 1282 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1283 + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_oa_buffer_size, 1308 1284 }; 1309 1285 1310 1286 static const xe_oa_set_property_fn xe_oa_set_property_funcs_config[] = { ··· 1320 1294 [DRM_XE_OA_PROPERTY_NO_PREEMPT] = xe_oa_set_prop_ret_inval, 1321 1295 [DRM_XE_OA_PROPERTY_NUM_SYNCS] = xe_oa_set_prop_num_syncs, 1322 1296 [DRM_XE_OA_PROPERTY_SYNCS] = xe_oa_set_prop_syncs_user, 1297 + [DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE] = xe_oa_set_prop_ret_inval, 1323 1298 }; 1324 1299 1325 1300 static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_from from, ··· 1580 1553 1581 1554 static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg) 1582 1555 { 1583 - struct drm_xe_oa_stream_info info = { .oa_buf_size = XE_OA_BUFFER_SIZE, }; 1556 + struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, }; 1584 1557 void __user *uaddr = (void __user *)arg; 1585 1558 1586 1559 if (copy_to_user(uaddr, &info, sizeof(info))) ··· 1666 1639 } 1667 1640 1668 1641 /* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */ 1669 - if (vma->vm_end - vma->vm_start != XE_OA_BUFFER_SIZE) { 1642 + if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) { 1670 1643 drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n"); 1671 1644 return -EINVAL; 1672 1645 } ··· 1810 1783 if (GRAPHICS_VER(stream->oa->xe) >= 20 && 1811 1784 stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) 1812 1785 stream->oa_buffer.circ_size = 1813 - XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; 1786 + param->oa_buffer_size - 1787 + param->oa_buffer_size % stream->oa_buffer.format->size; 1814 1788 else 1815 - stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; 1789 + stream->oa_buffer.circ_size = param->oa_buffer_size; 1816 1790 1817 1791 if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { 1818 1792 /* If we don't find the context offset, just return error */ ··· 1856 1828 goto err_fw_put; 1857 1829 } 1858 1830 1859 - ret = xe_oa_alloc_oa_buffer(stream); 1831 + ret = xe_oa_alloc_oa_buffer(stream, param->oa_buffer_size); 1860 1832 if (ret) 1861 1833 goto err_fw_put; 1862 1834 ··· 2152 2124 oa_freq_hz = div64_u64(NSEC_PER_SEC, oa_period); 2153 2125 drm_dbg(&oa->xe->drm, "Using periodic sampling freq %lld Hz\n", oa_freq_hz); 2154 2126 } 2127 + 2128 + if (!param.oa_buffer_size) 2129 + param.oa_buffer_size = DEFAULT_XE_OA_BUFFER_SIZE; 2155 2130 2156 2131 ret = xe_oa_parse_syncs(oa, &param); 2157 2132 if (ret)
+1 -1
drivers/gpu/drm/xe/xe_oa_types.h
··· 15 15 #include "regs/xe_reg_defs.h" 16 16 #include "xe_hw_engine_types.h" 17 17 18 - #define XE_OA_BUFFER_SIZE SZ_16M 18 + #define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M 19 19 20 20 enum xe_oa_report_header { 21 21 HDR_32_BIT = 0,
+2 -1
drivers/gpu/drm/xe/xe_query.c
··· 671 671 du->oa_unit_id = u->oa_unit_id; 672 672 du->oa_unit_type = u->type; 673 673 du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt); 674 - du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS; 674 + du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS | 675 + DRM_XE_OA_CAPS_OA_BUFFER_SIZE; 675 676 676 677 j = 0; 677 678 for_each_hw_engine(hwe, gt, hwe_id) {
+9
include/uapi/drm/xe_drm.h
··· 1486 1486 __u64 capabilities; 1487 1487 #define DRM_XE_OA_CAPS_BASE (1 << 0) 1488 1488 #define DRM_XE_OA_CAPS_SYNCS (1 << 1) 1489 + #define DRM_XE_OA_CAPS_OA_BUFFER_SIZE (1 << 2) 1489 1490 1490 1491 /** @oa_timestamp_freq: OA timestamp freq */ 1491 1492 __u64 oa_timestamp_freq; ··· 1652 1651 * to the VM bind case. 1653 1652 */ 1654 1653 DRM_XE_OA_PROPERTY_SYNCS, 1654 + 1655 + /** 1656 + * @DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE: Size of OA buffer to be 1657 + * allocated by the driver in bytes. Supported sizes are powers of 1658 + * 2 from 128 KiB to 128 MiB. When not specified, a 16 MiB OA 1659 + * buffer is allocated by default. 1660 + */ 1661 + DRM_XE_OA_PROPERTY_OA_BUFFER_SIZE, 1655 1662 }; 1656 1663 1657 1664 /**