Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch 'support-exposing-raw-cycle-counters-in-ptp-and-mlx5'

Tariq Toukan says:

====================
Support exposing raw cycle counters in PTP and mlx5

This series by Carolina adds support in ptp and usage in mlx5 for
exposing the raw free-running cycle counter of PTP hardware clocks.

This is V2. Find previous one here:
https://lore.kernel.org/all/1752556533-39218-1-git-send-email-tariqt@nvidia.com/

Find detailed description by Carolina below [1].

[1]
This patch series introduces support for exposing the raw free-running
cycle counter of PTP hardware clocks. When the device is in free-running
mode, it emits timestamps as raw cycle values instead of nanoseconds.
These values may be passed directly to user space through:

- fwctl: exposes internal device event records that include raw
cycle-based timestamps.

- DPDK: retrieves CQEs that contain raw cycle counters, which are passed
to user space unmodified.

To address this, the series introduces two new ioctl commands that allow
userspace to query the device's raw cycle counter together with host
time:

- PTP_SYS_OFFSET_PRECISE_CYCLES

- PTP_SYS_OFFSET_EXTENDED_CYCLES

These commands work like their existing counterparts but return the
device timestamp in cycle units instead of real-time nanoseconds. This
allows user space to collect (cycle, time) pairs and build a mapping
between the device’s free-running clock and host time.

This can also be useful in the XDP fast path: if a driver inserts the
raw cycle value into metadata instead of a real-time timestamp, it can
avoid the overhead of converting cycles to time in the kernel. Then
userspace can resolve the cycle-to-time mapping using this ioctl when
needed.

The ioctl enables user space to correlate those with host time, without
requiring the PHC to be synchronized, so long as the drift remains
stable during collection.

Adds the new PTP ioctls and integrates support in ptp_ioctl():
- ptp: Add ioctl commands to expose raw cycle counter values

Support for exposing raw cycles in mlx5:
- net/mlx5: Extract MTCTR register read logic into helper function
- net/mlx5: Support getcyclesx and getcrosscycles
====================

Link: https://patch.msgid.link/1755008228-88881-1-git-send-email-tariqt@nvidia.com
Signed-off-by: Paolo Abeni <pabeni@redhat.com>

+130 -21
+100 -13
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
··· 247 247 return !!MLX5_GET(mtptm_reg, out, psta); 248 248 } 249 249 250 - static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, 251 - struct system_counterval_t *sys_counterval, 252 - void *ctx) 250 + static int mlx5_mtctr_read(struct mlx5_core_dev *mdev, 251 + bool real_time_mode, 252 + struct system_counterval_t *sys_counterval, 253 + u64 *device) 253 254 { 254 255 u32 out[MLX5_ST_SZ_DW(mtctr_reg)] = {0}; 255 256 u32 in[MLX5_ST_SZ_DW(mtctr_reg)] = {0}; 256 - struct mlx5_core_dev *mdev = ctx; 257 - bool real_time_mode; 258 - u64 host, device; 257 + u64 host; 259 258 int err; 260 - 261 - real_time_mode = mlx5_real_time_mode(mdev); 262 259 263 260 MLX5_SET(mtctr_reg, in, first_clock_timestamp_request, 264 261 MLX5_MTCTR_REQUEST_PTM_ROOT_CLOCK); 265 262 MLX5_SET(mtctr_reg, in, second_clock_timestamp_request, 266 263 real_time_mode ? MLX5_MTCTR_REQUEST_REAL_TIME_CLOCK : 267 - MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER); 264 + MLX5_MTCTR_REQUEST_FREE_RUNNING_COUNTER); 268 265 269 - err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), MLX5_REG_MTCTR, 270 - 0, 0); 266 + err = mlx5_core_access_reg(mdev, in, sizeof(in), out, sizeof(out), 267 + MLX5_REG_MTCTR, 0, 0); 271 268 if (err) 272 269 return err; 273 270 ··· 278 281 .cs_id = CSID_X86_ART, 279 282 .use_nsecs = true, 280 283 }; 284 + *device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp); 281 285 282 - device = MLX5_GET64(mtctr_reg, out, second_clock_timestamp); 286 + return 0; 287 + } 288 + 289 + static int mlx5_mtctr_syncdevicetime(ktime_t *device_time, 290 + struct system_counterval_t *sys_counterval, 291 + void *ctx) 292 + { 293 + struct mlx5_core_dev *mdev = ctx; 294 + bool real_time_mode; 295 + u64 device; 296 + int err; 297 + 298 + real_time_mode = mlx5_real_time_mode(mdev); 299 + 300 + err = mlx5_mtctr_read(mdev, real_time_mode, sys_counterval, &device); 301 + if (err) 302 + return err; 303 + 283 304 if (real_time_mode) 284 305 *device_time = ns_to_ktime(REAL_TIME_TO_NS(device >> 32, device & U32_MAX)); 285 306 else 286 307 *device_time = mlx5_timecounter_cyc2time(mdev->clock, device); 308 + 309 + return 0; 310 + } 311 + 312 + static int 313 + mlx5_mtctr_syncdevicecyclestime(ktime_t *device_time, 314 + struct system_counterval_t *sys_counterval, 315 + void *ctx) 316 + { 317 + struct mlx5_core_dev *mdev = ctx; 318 + u64 device; 319 + int err; 320 + 321 + err = mlx5_mtctr_read(mdev, false, sys_counterval, &device); 322 + if (err) 323 + return err; 324 + *device_time = ns_to_ktime(device); 287 325 288 326 return 0; 289 327 } ··· 343 311 344 312 err = get_device_system_crosststamp(mlx5_mtctr_syncdevicetime, mdev, 345 313 &history_begin, cts); 314 + unlock: 315 + mlx5_clock_unlock(clock); 316 + return err; 317 + } 318 + 319 + static int mlx5_ptp_getcrosscycles(struct ptp_clock_info *ptp, 320 + struct system_device_crosststamp *cts) 321 + { 322 + struct mlx5_clock *clock = 323 + container_of(ptp, struct mlx5_clock, ptp_info); 324 + struct system_time_snapshot history_begin = {0}; 325 + struct mlx5_core_dev *mdev; 326 + int err; 327 + 328 + mlx5_clock_lock(clock); 329 + mdev = mlx5_clock_mdev_get(clock); 330 + 331 + if (!mlx5_is_ptm_source_time_available(mdev)) { 332 + err = -EBUSY; 333 + goto unlock; 334 + } 335 + 336 + ktime_get_snapshot(&history_begin); 337 + 338 + err = get_device_system_crosststamp(mlx5_mtctr_syncdevicecyclestime, 339 + mdev, &history_begin, cts); 346 340 unlock: 347 341 mlx5_clock_unlock(clock); 348 342 return err; ··· 567 509 ns = mlx5_timecounter_cyc2time(clock, cycles); 568 510 *ts = ns_to_timespec64(ns); 569 511 out: 512 + mlx5_clock_unlock(clock); 513 + return 0; 514 + } 515 + 516 + static int mlx5_ptp_getcyclesx(struct ptp_clock_info *ptp, 517 + struct timespec64 *ts, 518 + struct ptp_system_timestamp *sts) 519 + { 520 + struct mlx5_clock *clock = container_of(ptp, struct mlx5_clock, 521 + ptp_info); 522 + struct mlx5_core_dev *mdev; 523 + u64 cycles; 524 + 525 + mlx5_clock_lock(clock); 526 + mdev = mlx5_clock_mdev_get(clock); 527 + 528 + cycles = mlx5_read_time(mdev, sts, false); 529 + *ts = ns_to_timespec64(cycles); 570 530 mlx5_clock_unlock(clock); 571 531 return 0; 572 532 } ··· 1305 1229 static void mlx5_init_timer_clock(struct mlx5_core_dev *mdev) 1306 1230 { 1307 1231 struct mlx5_clock *clock = mdev->clock; 1232 + bool expose_cycles; 1308 1233 1309 1234 /* Configure the PHC */ 1310 1235 clock->ptp_info = mlx5_ptp_clock_info; ··· 1313 1236 if (MLX5_CAP_MCAM_REG(mdev, mtutc)) 1314 1237 mlx5_init_timer_max_freq_adjustment(mdev); 1315 1238 1239 + expose_cycles = !MLX5_CAP_GEN(mdev, disciplined_fr_counter) || 1240 + !mlx5_real_time_mode(mdev); 1241 + 1316 1242 #ifdef CONFIG_X86 1317 1243 if (MLX5_CAP_MCAM_REG3(mdev, mtptm) && 1318 - MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART)) 1244 + MLX5_CAP_MCAM_REG3(mdev, mtctr) && boot_cpu_has(X86_FEATURE_ART)) { 1319 1245 clock->ptp_info.getcrosststamp = mlx5_ptp_getcrosststamp; 1246 + if (expose_cycles) 1247 + clock->ptp_info.getcrosscycles = 1248 + mlx5_ptp_getcrosscycles; 1249 + } 1320 1250 #endif /* CONFIG_X86 */ 1251 + 1252 + if (expose_cycles) 1253 + clock->ptp_info.getcyclesx64 = mlx5_ptp_getcyclesx; 1321 1254 1322 1255 mlx5_timecounter_init(mdev); 1323 1256 mlx5_init_clock_info(mdev);
+26 -8
drivers/ptp/ptp_chardev.c
··· 285 285 return ops->enable(ops, &req, enable); 286 286 } 287 287 288 - static long ptp_sys_offset_precise(struct ptp_clock *ptp, void __user *arg) 288 + typedef int (*ptp_crosststamp_fn)(struct ptp_clock_info *, 289 + struct system_device_crosststamp *); 290 + 291 + static long ptp_sys_offset_precise(struct ptp_clock *ptp, void __user *arg, 292 + ptp_crosststamp_fn crosststamp_fn) 289 293 { 290 294 struct ptp_sys_offset_precise precise_offset; 291 295 struct system_device_crosststamp xtstamp; 292 296 struct timespec64 ts; 293 297 int err; 294 298 295 - if (!ptp->info->getcrosststamp) 299 + if (!crosststamp_fn) 296 300 return -EOPNOTSUPP; 297 301 298 - err = ptp->info->getcrosststamp(ptp->info, &xtstamp); 302 + err = crosststamp_fn(ptp->info, &xtstamp); 299 303 if (err) 300 304 return err; 301 305 ··· 317 313 return copy_to_user(arg, &precise_offset, sizeof(precise_offset)) ? -EFAULT : 0; 318 314 } 319 315 320 - static long ptp_sys_offset_extended(struct ptp_clock *ptp, void __user *arg) 316 + typedef int (*ptp_gettimex_fn)(struct ptp_clock_info *, 317 + struct timespec64 *, 318 + struct ptp_system_timestamp *); 319 + 320 + static long ptp_sys_offset_extended(struct ptp_clock *ptp, void __user *arg, 321 + ptp_gettimex_fn gettimex_fn) 321 322 { 322 323 struct ptp_sys_offset_extended *extoff __free(kfree) = NULL; 323 324 struct ptp_system_timestamp sts; 324 325 325 - if (!ptp->info->gettimex64) 326 + if (!gettimex_fn) 326 327 return -EOPNOTSUPP; 327 328 328 329 extoff = memdup_user(arg, sizeof(*extoff)); ··· 355 346 struct timespec64 ts; 356 347 int err; 357 348 358 - err = ptp->info->gettimex64(ptp->info, &ts, &sts); 349 + err = gettimex_fn(ptp->info, &ts, &sts); 359 350 if (err) 360 351 return err; 361 352 ··· 506 497 507 498 case PTP_SYS_OFFSET_PRECISE: 508 499 case PTP_SYS_OFFSET_PRECISE2: 509 - return ptp_sys_offset_precise(ptp, argptr); 500 + return ptp_sys_offset_precise(ptp, argptr, 501 + ptp->info->getcrosststamp); 510 502 511 503 case PTP_SYS_OFFSET_EXTENDED: 512 504 case PTP_SYS_OFFSET_EXTENDED2: 513 - return ptp_sys_offset_extended(ptp, argptr); 505 + return ptp_sys_offset_extended(ptp, argptr, 506 + ptp->info->gettimex64); 514 507 515 508 case PTP_SYS_OFFSET: 516 509 case PTP_SYS_OFFSET2: ··· 534 523 case PTP_MASK_EN_SINGLE: 535 524 return ptp_mask_en_single(pccontext->private_clkdata, argptr); 536 525 526 + case PTP_SYS_OFFSET_PRECISE_CYCLES: 527 + return ptp_sys_offset_precise(ptp, argptr, 528 + ptp->info->getcrosscycles); 529 + 530 + case PTP_SYS_OFFSET_EXTENDED_CYCLES: 531 + return ptp_sys_offset_extended(ptp, argptr, 532 + ptp->info->getcyclesx64); 537 533 default: 538 534 return -ENOTTY; 539 535 }
+4
include/uapi/linux/ptp_clock.h
··· 245 245 _IOWR(PTP_CLK_MAGIC, 18, struct ptp_sys_offset_extended) 246 246 #define PTP_MASK_CLEAR_ALL _IO(PTP_CLK_MAGIC, 19) 247 247 #define PTP_MASK_EN_SINGLE _IOW(PTP_CLK_MAGIC, 20, unsigned int) 248 + #define PTP_SYS_OFFSET_PRECISE_CYCLES \ 249 + _IOWR(PTP_CLK_MAGIC, 21, struct ptp_sys_offset_precise) 250 + #define PTP_SYS_OFFSET_EXTENDED_CYCLES \ 251 + _IOWR(PTP_CLK_MAGIC, 22, struct ptp_sys_offset_extended) 248 252 249 253 struct ptp_extts_event { 250 254 struct ptp_clock_time t; /* Time event occurred. */