Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/xe/oa/uapi: Read file_operation

Implement the OA stream read file_operation. Both blocking and non-blocking
reads are supported. As part of read system call, the read copies OA perf
data from the OA buffer to the user buffer, after appending packet headers
for status and data packets.

v2: Drop OA report headers, implement DRM_XE_PERF_IOCTL_STATUS (Umesh)
v3: Introduce 'struct drm_xe_oa_stream_status'
v4: Define oa_status register bitfields (Umesh)
v5: Add extensions to 'struct drm_xe_oa_stream_status'
v6: Minor cleanup, eliminate report32 variable
v7: Use -EIO to signal to userspace to read OASTATUS using
DRM_XE_PERF_IOCTL_STATUS, change previous sites returning -EIO to
return -EINVAL
Make drm_xe_oa_stream_status bits contiguous (Jose, Umesh)
rmw oa_status bits (Umesh)

Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Acked-by: José Roberto de Souza <jose.souza@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-10-ashutosh.dixit@intel.com

+224
+201
drivers/gpu/drm/xe/xe_oa.c
··· 164 164 return oa_report_header_64bit(stream) ? *(u64 *)report : *(u32 *)report; 165 165 } 166 166 167 + static void oa_report_id_clear(struct xe_oa_stream *stream, u32 *report) 168 + { 169 + if (oa_report_header_64bit(stream)) 170 + *(u64 *)report = 0; 171 + else 172 + *report = 0; 173 + } 174 + 167 175 static u64 oa_timestamp(struct xe_oa_stream *stream, void *report) 168 176 { 169 177 return oa_report_header_64bit(stream) ? 170 178 *((u64 *)report + 1) : 171 179 *((u32 *)report + 1); 180 + } 181 + 182 + static void oa_timestamp_clear(struct xe_oa_stream *stream, u32 *report) 183 + { 184 + if (oa_report_header_64bit(stream)) 185 + *(u64 *)&report[2] = 0; 186 + else 187 + report[1] = 0; 172 188 } 173 189 174 190 static bool xe_oa_buffer_check_unlocked(struct xe_oa_stream *stream) ··· 261 245 return HRTIMER_RESTART; 262 246 } 263 247 248 + static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, 249 + size_t count, size_t *offset, const u8 *report) 250 + { 251 + int report_size = stream->oa_buffer.format->size; 252 + int report_size_partial; 253 + u8 *oa_buf_end; 254 + 255 + if ((count - *offset) < report_size) 256 + return -ENOSPC; 257 + 258 + buf += *offset; 259 + 260 + oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; 261 + report_size_partial = oa_buf_end - report; 262 + 263 + if (report_size_partial < report_size) { 264 + if (copy_to_user(buf, report, report_size_partial)) 265 + return -EFAULT; 266 + buf += report_size_partial; 267 + 268 + if (copy_to_user(buf, stream->oa_buffer.vaddr, 269 + report_size - report_size_partial)) 270 + return -EFAULT; 271 + } else if (copy_to_user(buf, report, report_size)) { 272 + return -EFAULT; 273 + } 274 + 275 + *offset += report_size; 276 + 277 + return 0; 278 + } 279 + 280 + static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, 281 + size_t count, size_t *offset) 282 + { 283 + int report_size = stream->oa_buffer.format->size; 284 + u8 *oa_buf_base = stream->oa_buffer.vaddr; 285 + u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); 286 + u32 mask = (XE_OA_BUFFER_SIZE - 1); 287 + size_t start_offset = *offset; 288 + unsigned long flags; 289 + u32 head, tail; 290 + int ret = 0; 291 + 292 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 293 + head = stream->oa_buffer.head; 294 + tail = stream->oa_buffer.tail; 295 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 296 + 297 + xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); 298 + 299 + for (; OA_TAKEN(tail, head); head = (head + report_size) & mask) { 300 + u8 *report = oa_buf_base + head; 301 + 302 + ret = xe_oa_append_report(stream, buf, count, offset, report); 303 + if (ret) 304 + break; 305 + 306 + if (is_power_of_2(report_size)) { 307 + /* Clear out report id and timestamp to detect unlanded reports */ 308 + oa_report_id_clear(stream, (void *)report); 309 + oa_timestamp_clear(stream, (void *)report); 310 + } else { 311 + u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; 312 + u32 part = oa_buf_end - report; 313 + 314 + /* Zero out the entire report */ 315 + if (report_size <= part) { 316 + memset(report, 0, report_size); 317 + } else { 318 + memset(report, 0, part); 319 + memset(oa_buf_base, 0, report_size - part); 320 + } 321 + } 322 + } 323 + 324 + if (start_offset != *offset) { 325 + struct xe_reg oaheadptr = __oa_regs(stream)->oa_head_ptr; 326 + 327 + spin_lock_irqsave(&stream->oa_buffer.ptr_lock, flags); 328 + xe_mmio_write32(stream->gt, oaheadptr, 329 + (head + gtt_offset) & OAG_OAHEADPTR_MASK); 330 + stream->oa_buffer.head = head; 331 + spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); 332 + } 333 + 334 + return ret; 335 + } 336 + 264 337 static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) 265 338 { 266 339 u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); ··· 421 316 drm_err(&stream->oa->xe->drm, 422 317 "wait for OA tlb invalidate timed out\n"); 423 318 } 319 + } 320 + 321 + static int xe_oa_wait_unlocked(struct xe_oa_stream *stream) 322 + { 323 + /* We might wait indefinitely if periodic sampling is not enabled */ 324 + if (!stream->periodic) 325 + return -EINVAL; 326 + 327 + return wait_event_interruptible(stream->poll_wq, 328 + xe_oa_buffer_check_unlocked(stream)); 329 + } 330 + 331 + #define OASTATUS_RELEVANT_BITS (OASTATUS_MMIO_TRG_Q_FULL | OASTATUS_COUNTER_OVERFLOW | \ 332 + OASTATUS_BUFFER_OVERFLOW | OASTATUS_REPORT_LOST) 333 + 334 + static int __xe_oa_read(struct xe_oa_stream *stream, char __user *buf, 335 + size_t count, size_t *offset) 336 + { 337 + /* Only clear our bits to avoid side-effects */ 338 + stream->oa_status = xe_mmio_rmw32(stream->gt, __oa_regs(stream)->oa_status, 339 + OASTATUS_RELEVANT_BITS, 0); 340 + /* 341 + * Signal to userspace that there is non-zero OA status to read via 342 + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl 343 + */ 344 + if (stream->oa_status & OASTATUS_RELEVANT_BITS) 345 + return -EIO; 346 + 347 + return xe_oa_append_reports(stream, buf, count, offset); 348 + } 349 + 350 + static ssize_t xe_oa_read(struct file *file, char __user *buf, 351 + size_t count, loff_t *ppos) 352 + { 353 + struct xe_oa_stream *stream = file->private_data; 354 + size_t offset = 0; 355 + int ret; 356 + 357 + /* Can't read from disabled streams */ 358 + if (!stream->enabled || !stream->sample) 359 + return -EINVAL; 360 + 361 + if (!(file->f_flags & O_NONBLOCK)) { 362 + do { 363 + ret = xe_oa_wait_unlocked(stream); 364 + if (ret) 365 + return ret; 366 + 367 + mutex_lock(&stream->stream_lock); 368 + ret = __xe_oa_read(stream, buf, count, &offset); 369 + mutex_unlock(&stream->stream_lock); 370 + } while (!offset && !ret); 371 + } else { 372 + mutex_lock(&stream->stream_lock); 373 + ret = __xe_oa_read(stream, buf, count, &offset); 374 + mutex_unlock(&stream->stream_lock); 375 + } 376 + 377 + /* 378 + * Typically we clear pollin here in order to wait for the new hrtimer callback 379 + * before unblocking. The exception to this is if __xe_oa_read returns -ENOSPC, 380 + * which means that more OA data is available than could fit in the user provided 381 + * buffer. In this case we want the next poll() call to not block. 382 + * 383 + * Also in case of -EIO, we have already waited for data before returning 384 + * -EIO, so need to wait again 385 + */ 386 + if (ret != -ENOSPC && ret != -EIO) 387 + stream->pollin = false; 388 + 389 + /* Possible values for ret are 0, -EFAULT, -ENOSPC, -EIO, -EINVAL, ... */ 390 + return offset ?: (ret ?: -EAGAIN); 424 391 } 425 392 426 393 static __poll_t xe_oa_poll_locked(struct xe_oa_stream *stream, ··· 857 680 return ret; 858 681 } 859 682 683 + static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg) 684 + { 685 + struct drm_xe_oa_stream_status status = {}; 686 + void __user *uaddr = (void __user *)arg; 687 + 688 + /* Map from register to uapi bits */ 689 + if (stream->oa_status & OASTATUS_REPORT_LOST) 690 + status.oa_status |= DRM_XE_OASTATUS_REPORT_LOST; 691 + if (stream->oa_status & OASTATUS_BUFFER_OVERFLOW) 692 + status.oa_status |= DRM_XE_OASTATUS_BUFFER_OVERFLOW; 693 + if (stream->oa_status & OASTATUS_COUNTER_OVERFLOW) 694 + status.oa_status |= DRM_XE_OASTATUS_COUNTER_OVERFLOW; 695 + if (stream->oa_status & OASTATUS_MMIO_TRG_Q_FULL) 696 + status.oa_status |= DRM_XE_OASTATUS_MMIO_TRG_Q_FULL; 697 + 698 + if (copy_to_user(uaddr, &status, sizeof(status))) 699 + return -EFAULT; 700 + 701 + return 0; 702 + } 703 + 860 704 static long xe_oa_ioctl_locked(struct xe_oa_stream *stream, 861 705 unsigned int cmd, 862 706 unsigned long arg) ··· 891 693 return 0; 892 694 case DRM_XE_PERF_IOCTL_CONFIG: 893 695 return xe_oa_config_locked(stream, arg); 696 + case DRM_XE_PERF_IOCTL_STATUS: 697 + return xe_oa_status_locked(stream, arg); 894 698 } 895 699 896 700 return -EINVAL; ··· 945 745 .llseek = no_llseek, 946 746 .release = xe_oa_release, 947 747 .poll = xe_oa_poll, 748 + .read = xe_oa_read, 948 749 .unlocked_ioctl = xe_oa_ioctl, 949 750 }; 950 751
+3
drivers/gpu/drm/xe/xe_oa_types.h
··· 222 222 223 223 /** @poll_period_ns: hrtimer period for checking OA buffer for available data */ 224 224 u64 poll_period_ns; 225 + 226 + /** @oa_status: temporary storage for oa_status register value */ 227 + u32 oa_status; 225 228 }; 226 229 #endif
+20
include/uapi/drm/xe_drm.h
··· 1570 1570 __u64 regs_ptr; 1571 1571 }; 1572 1572 1573 + /** 1574 + * struct drm_xe_oa_stream_status - OA stream status returned from 1575 + * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to 1576 + * query stream status in response to EIO errno from perf fd read(). 1577 + */ 1578 + struct drm_xe_oa_stream_status { 1579 + /** @extensions: Pointer to the first extension struct, if any */ 1580 + __u64 extensions; 1581 + 1582 + /** @oa_status: OA stream status (see Bspec 46717/61226) */ 1583 + __u64 oa_status; 1584 + #define DRM_XE_OASTATUS_MMIO_TRG_Q_FULL (1 << 3) 1585 + #define DRM_XE_OASTATUS_COUNTER_OVERFLOW (1 << 2) 1586 + #define DRM_XE_OASTATUS_BUFFER_OVERFLOW (1 << 1) 1587 + #define DRM_XE_OASTATUS_REPORT_LOST (1 << 0) 1588 + 1589 + /** @reserved: reserved for future use */ 1590 + __u64 reserved[3]; 1591 + }; 1592 + 1573 1593 #if defined(__cplusplus) 1574 1594 } 1575 1595 #endif