Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge branch '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue

Tony Nguyen says:

====================
1GbE Intel Wired LAN Driver Updates 2022-07-28

Jacob Keller says:

Convert all of the Intel drivers with PTP support to the newer .adjfine
implementation which uses scaled parts per million.

This improves the precision of the frequency adjustments by taking advantage
of the full scaled parts per million input coming from user space.

In addition, all implementations are converted to using the
mul_u64_u64_div_u64 function which better handles the intermediate value.
This function supports architecture specific instructions where possible to
avoid loss of precision if the normal 64-bit multiplication would overflow.

Of note, the i40e implementation is now able to avoid loss of precision on
slower link speeds by taking advantage of this to multiply by the link speed
factor first. This results in a significantly more precise adjustment by
allowing the calculation to impact the lower bits.

This also gets us a step closer to being able to remove the .adjfreq
entirely by removing its use from many drivers.

I plan to follow this up with a series to update the drivers from other
vendors and drop the .adjfreq implementation entirely.

* '1GbE' of git://git.kernel.org/pub/scm/linux/kernel/git/tnguy/next-queue:
igb: convert .adjfreq to .adjfine
ixgbe: convert .adjfreq to .adjfine
i40e: convert .adjfreq to .adjfine
i40e: use mul_u64_u64_div_u64 for PTP frequency calculation
e1000e: convert .adjfreq to .adjfine
e1000e: remove unnecessary range check in e1000e_phc_adjfreq
ice: implement adjfine with mul_u64_u64_div_u64
====================

Link: https://lore.kernel.org/r/20220728181836.3387862-1-anthony.l.nguyen@intel.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+75 -88
+1 -1
drivers/net/ethernet/intel/e1000e/e1000.h
··· 329 329 struct ptp_clock *ptp_clock; 330 330 struct ptp_clock_info ptp_clock_info; 331 331 struct pm_qos_request pm_qos_req; 332 - s32 ptp_delta; 332 + long ptp_delta; 333 333 334 334 u16 eee_advert; 335 335 };
+2 -2
drivers/net/ethernet/intel/e1000e/netdev.c
··· 3922 3922 if (!(adapter->flags & FLAG_HAS_HW_TIMESTAMP)) 3923 3923 return; 3924 3924 3925 - if (info->adjfreq) { 3925 + if (info->adjfine) { 3926 3926 /* restore the previous ptp frequency delta */ 3927 - ret_val = info->adjfreq(info, adapter->ptp_delta); 3927 + ret_val = info->adjfine(info, adapter->ptp_delta); 3928 3928 } else { 3929 3929 /* set the default base frequency if no adjustment possible */ 3930 3930 ret_val = e1000e_get_base_timinca(adapter, &timinca);
+8 -10
drivers/net/ethernet/intel/e1000e/ptp.c
··· 15 15 #endif 16 16 17 17 /** 18 - * e1000e_phc_adjfreq - adjust the frequency of the hardware clock 18 + * e1000e_phc_adjfine - adjust the frequency of the hardware clock 19 19 * @ptp: ptp clock structure 20 - * @delta: Desired frequency change in parts per billion 20 + * @delta: Desired frequency chance in scaled parts per million 21 21 * 22 22 * Adjust the frequency of the PHC cycle counter by the indicated delta from 23 23 * the base frequency. 24 + * 25 + * Scaled parts per million is ppm but with a 16 bit binary fractional field. 24 26 **/ 25 - static int e1000e_phc_adjfreq(struct ptp_clock_info *ptp, s32 delta) 27 + static int e1000e_phc_adjfine(struct ptp_clock_info *ptp, long delta) 26 28 { 27 29 struct e1000_adapter *adapter = container_of(ptp, struct e1000_adapter, 28 30 ptp_clock_info); ··· 34 32 u64 adjustment; 35 33 u32 timinca, incvalue; 36 34 s32 ret_val; 37 - 38 - if ((delta > ptp->max_adj) || (delta <= -1000000000)) 39 - return -EINVAL; 40 35 41 36 if (delta < 0) { 42 37 neg_adj = true; ··· 49 50 50 51 incvalue = timinca & E1000_TIMINCA_INCVALUE_MASK; 51 52 52 - adjustment = incvalue; 53 - adjustment *= delta; 54 - adjustment = div_u64(adjustment, 1000000000); 53 + adjustment = mul_u64_u64_div_u64(incvalue, (u64)delta, 54 + 1000000ULL << 16); 55 55 56 56 incvalue = neg_adj ? (incvalue - adjustment) : (incvalue + adjustment); 57 57 ··· 258 260 .n_per_out = 0, 259 261 .n_pins = 0, 260 262 .pps = 0, 261 - .adjfreq = e1000e_phc_adjfreq, 263 + .adjfine = e1000e_phc_adjfine, 262 264 .adjtime = e1000e_phc_adjtime, 263 265 .gettimex64 = e1000e_phc_gettimex, 264 266 .settime64 = e1000e_phc_settime,
+14 -21
drivers/net/ethernet/intel/i40e/i40e_ptp.c
··· 334 334 } 335 335 336 336 /** 337 - * i40e_ptp_adjfreq - Adjust the PHC frequency 337 + * i40e_ptp_adjfine - Adjust the PHC frequency 338 338 * @ptp: The PTP clock structure 339 - * @ppb: Parts per billion adjustment from the base 339 + * @scaled_ppm: Scaled parts per million adjustment from base 340 340 * 341 - * Adjust the frequency of the PHC by the indicated parts per billion from the 342 - * base frequency. 341 + * Adjust the frequency of the PHC by the indicated delta from the base 342 + * frequency. 343 + * 344 + * Scaled parts per million is ppm with a 16 bit binary fractional field. 343 345 **/ 344 - static int i40e_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb) 346 + static int i40e_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm) 345 347 { 346 348 struct i40e_pf *pf = container_of(ptp, struct i40e_pf, ptp_caps); 347 349 struct i40e_hw *hw = &pf->hw; 348 350 u64 adj, freq, diff; 349 351 int neg_adj = 0; 350 352 351 - if (ppb < 0) { 353 + if (scaled_ppm < 0) { 352 354 neg_adj = 1; 353 - ppb = -ppb; 355 + scaled_ppm = -scaled_ppm; 354 356 } 355 357 356 - freq = I40E_PTP_40GB_INCVAL; 357 - freq *= ppb; 358 - diff = div_u64(freq, 1000000000ULL); 358 + smp_mb(); /* Force any pending update before accessing. */ 359 + freq = I40E_PTP_40GB_INCVAL * READ_ONCE(pf->ptp_adj_mult); 360 + diff = mul_u64_u64_div_u64(freq, (u64)scaled_ppm, 361 + 1000000ULL << 16); 359 362 360 363 if (neg_adj) 361 364 adj = I40E_PTP_40GB_INCVAL - diff; 362 365 else 363 366 adj = I40E_PTP_40GB_INCVAL + diff; 364 - 365 - /* At some link speeds, the base incval is so large that directly 366 - * multiplying by ppb would result in arithmetic overflow even when 367 - * using a u64. Avoid this by instead calculating the new incval 368 - * always in terms of the 40GbE clock rate and then multiplying by the 369 - * link speed factor afterwards. This does result in slightly lower 370 - * precision at lower link speeds, but it is fairly minor. 371 - */ 372 - smp_mb(); /* Force any pending update before accessing. */ 373 - adj *= READ_ONCE(pf->ptp_adj_mult); 374 367 375 368 wr32(hw, I40E_PRTTSYN_INC_L, adj & 0xFFFFFFFF); 376 369 wr32(hw, I40E_PRTTSYN_INC_H, adj >> 32); ··· 1394 1401 sizeof(pf->ptp_caps.name) - 1); 1395 1402 pf->ptp_caps.owner = THIS_MODULE; 1396 1403 pf->ptp_caps.max_adj = 999999999; 1397 - pf->ptp_caps.adjfreq = i40e_ptp_adjfreq; 1404 + pf->ptp_caps.adjfine = i40e_ptp_adjfine; 1398 1405 pf->ptp_caps.adjtime = i40e_ptp_adjtime; 1399 1406 pf->ptp_caps.gettimex64 = i40e_ptp_gettimex; 1400 1407 pf->ptp_caps.settime64 = i40e_ptp_settime;
+3 -13
drivers/net/ethernet/intel/ice/ice_ptp.c
··· 1102 1102 static int ice_ptp_adjfine(struct ptp_clock_info *info, long scaled_ppm) 1103 1103 { 1104 1104 struct ice_pf *pf = ptp_info_to_pf(info); 1105 - u64 freq, divisor = 1000000ULL; 1106 1105 struct ice_hw *hw = &pf->hw; 1107 - s64 incval, diff; 1106 + u64 incval, diff; 1108 1107 int neg_adj = 0; 1109 1108 int err; 1110 1109 ··· 1114 1115 scaled_ppm = -scaled_ppm; 1115 1116 } 1116 1117 1117 - while ((u64)scaled_ppm > div64_u64(U64_MAX, incval)) { 1118 - /* handle overflow by scaling down the scaled_ppm and 1119 - * the divisor, losing some precision 1120 - */ 1121 - scaled_ppm >>= 2; 1122 - divisor >>= 2; 1123 - } 1124 - 1125 - freq = (incval * (u64)scaled_ppm) >> 16; 1126 - diff = div_u64(freq, divisor); 1127 - 1118 + diff = mul_u64_u64_div_u64(incval, (u64)scaled_ppm, 1119 + 1000000ULL << 16); 1128 1120 if (neg_adj) 1129 1121 incval -= diff; 1130 1122 else
+7 -8
drivers/net/ethernet/intel/igb/igb_ptp.c
··· 190 190 } 191 191 192 192 /* PTP clock operations */ 193 - static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) 193 + static int igb_ptp_adjfine_82576(struct ptp_clock_info *ptp, long scaled_ppm) 194 194 { 195 195 struct igb_adapter *igb = container_of(ptp, struct igb_adapter, 196 196 ptp_caps); ··· 199 199 u64 rate; 200 200 u32 incvalue; 201 201 202 - if (ppb < 0) { 202 + if (scaled_ppm < 0) { 203 203 neg_adj = 1; 204 - ppb = -ppb; 204 + scaled_ppm = -scaled_ppm; 205 205 } 206 - rate = ppb; 207 - rate <<= 14; 208 - rate = div_u64(rate, 1953125); 209 206 210 - incvalue = 16 << IGB_82576_TSYNC_SHIFT; 207 + incvalue = INCVALUE_82576; 208 + rate = mul_u64_u64_div_u64(incvalue, (u64)scaled_ppm, 209 + 1000000ULL << 16); 211 210 212 211 if (neg_adj) 213 212 incvalue -= rate; ··· 1346 1347 adapter->ptp_caps.max_adj = 999999881; 1347 1348 adapter->ptp_caps.n_ext_ts = 0; 1348 1349 adapter->ptp_caps.pps = 0; 1349 - adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; 1350 + adapter->ptp_caps.adjfine = igb_ptp_adjfine_82576; 1350 1351 adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; 1351 1352 adapter->ptp_caps.gettimex64 = igb_ptp_gettimex_82576; 1352 1353 adapter->ptp_caps.settime64 = igb_ptp_settime_82576;
+40 -33
drivers/net/ethernet/intel/ixgbe/ixgbe_ptp.c
··· 113 113 * the sign bit. This register enables software to calculate frequency 114 114 * adjustments and apply them directly to the clock rate. 115 115 * 116 - * The math for converting ppb into TIMINCA values is fairly straightforward. 117 - * TIMINCA value = ( Base_Frequency * ppb ) / 1000000000ULL 116 + * The math for converting scaled_ppm into TIMINCA values is fairly 117 + * straightforward. 118 118 * 119 - * This assumes that ppb is never high enough to create a value bigger than 120 - * TIMINCA's 31 bits can store. This is ensured by the stack. Calculating this 121 - * value is also simple. 119 + * TIMINCA value = ( Base_Frequency * scaled_ppm ) / 1000000ULL << 16 120 + * 121 + * To avoid overflow, we simply use mul_u64_u64_div_u64. 122 + * 123 + * This assumes that scaled_ppm is never high enough to create a value bigger 124 + * than TIMINCA's 31 bits can store. This is ensured by the stack, and is 125 + * measured in parts per billion. Calculating this value is also simple. 122 126 * Max ppb = ( Max Adjustment / Base Frequency ) / 1000000000ULL 123 127 * 124 128 * For the X550, the Max adjustment is +/- 0.5 ns, and the base frequency is ··· 437 433 } 438 434 439 435 /** 440 - * ixgbe_ptp_adjfreq_82599 436 + * ixgbe_ptp_adjfine_82599 441 437 * @ptp: the ptp clock structure 442 - * @ppb: parts per billion adjustment from base 438 + * @scaled_ppm: scaled parts per million adjustment from base 443 439 * 444 - * adjust the frequency of the ptp cycle counter by the 445 - * indicated ppb from the base frequency. 440 + * Adjust the frequency of the ptp cycle counter by the 441 + * indicated scaled_ppm from the base frequency. 442 + * 443 + * Scaled parts per million is ppm with a 16-bit binary fractional field. 446 444 */ 447 - static int ixgbe_ptp_adjfreq_82599(struct ptp_clock_info *ptp, s32 ppb) 445 + static int ixgbe_ptp_adjfine_82599(struct ptp_clock_info *ptp, long scaled_ppm) 448 446 { 449 447 struct ixgbe_adapter *adapter = 450 448 container_of(ptp, struct ixgbe_adapter, ptp_caps); 451 449 struct ixgbe_hw *hw = &adapter->hw; 452 - u64 freq, incval; 453 - u32 diff; 450 + u64 incval, diff; 454 451 int neg_adj = 0; 455 452 456 - if (ppb < 0) { 453 + if (scaled_ppm < 0) { 457 454 neg_adj = 1; 458 - ppb = -ppb; 455 + scaled_ppm = -scaled_ppm; 459 456 } 460 457 461 458 smp_mb(); 462 459 incval = READ_ONCE(adapter->base_incval); 463 460 464 - freq = incval; 465 - freq *= ppb; 466 - diff = div_u64(freq, 1000000000ULL); 461 + diff = mul_u64_u64_div_u64(incval, scaled_ppm, 462 + 1000000ULL << 16); 467 463 468 464 incval = neg_adj ? (incval - diff) : (incval + diff); 469 465 470 466 switch (hw->mac.type) { 471 467 case ixgbe_mac_X540: 472 468 if (incval > 0xFFFFFFFFULL) 473 - e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); 469 + e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n"); 474 470 IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, (u32)incval); 475 471 break; 476 472 case ixgbe_mac_82599EB: 477 473 if (incval > 0x00FFFFFFULL) 478 - e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); 474 + e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n"); 479 475 IXGBE_WRITE_REG(hw, IXGBE_TIMINCA, 480 476 BIT(IXGBE_INCPER_SHIFT_82599) | 481 477 ((u32)incval & 0x00FFFFFFUL)); ··· 488 484 } 489 485 490 486 /** 491 - * ixgbe_ptp_adjfreq_X550 487 + * ixgbe_ptp_adjfine_X550 492 488 * @ptp: the ptp clock structure 493 - * @ppb: parts per billion adjustment from base 489 + * @scaled_ppm: scaled parts per million adjustment from base 494 490 * 495 - * adjust the frequency of the SYSTIME registers by the indicated ppb from base 496 - * frequency 491 + * Adjust the frequency of the SYSTIME registers by the indicated scaled_ppm 492 + * from base frequency. 493 + * 494 + * Scaled parts per million is ppm with a 16-bit binary fractional field. 497 495 */ 498 - static int ixgbe_ptp_adjfreq_X550(struct ptp_clock_info *ptp, s32 ppb) 496 + static int ixgbe_ptp_adjfine_X550(struct ptp_clock_info *ptp, long scaled_ppm) 499 497 { 500 498 struct ixgbe_adapter *adapter = 501 499 container_of(ptp, struct ixgbe_adapter, ptp_caps); 502 500 struct ixgbe_hw *hw = &adapter->hw; 503 501 int neg_adj = 0; 504 - u64 rate = IXGBE_X550_BASE_PERIOD; 502 + u64 rate; 505 503 u32 inca; 506 504 507 - if (ppb < 0) { 505 + if (scaled_ppm < 0) { 508 506 neg_adj = 1; 509 - ppb = -ppb; 507 + scaled_ppm = -scaled_ppm; 510 508 } 511 - rate *= ppb; 512 - rate = div_u64(rate, 1000000000ULL); 509 + 510 + rate = mul_u64_u64_div_u64(IXGBE_X550_BASE_PERIOD, scaled_ppm, 511 + 1000000ULL << 16); 513 512 514 513 /* warn if rate is too large */ 515 514 if (rate >= INCVALUE_MASK) 516 - e_dev_warn("PTP ppb adjusted SYSTIME rate overflowed!\n"); 515 + e_dev_warn("PTP scaled_ppm adjusted SYSTIME rate overflowed!\n"); 517 516 518 517 inca = rate & INCVALUE_MASK; 519 518 if (neg_adj) ··· 1362 1355 adapter->ptp_caps.n_ext_ts = 0; 1363 1356 adapter->ptp_caps.n_per_out = 0; 1364 1357 adapter->ptp_caps.pps = 1; 1365 - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; 1358 + adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_82599; 1366 1359 adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; 1367 1360 adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex; 1368 1361 adapter->ptp_caps.settime64 = ixgbe_ptp_settime; ··· 1379 1372 adapter->ptp_caps.n_ext_ts = 0; 1380 1373 adapter->ptp_caps.n_per_out = 0; 1381 1374 adapter->ptp_caps.pps = 0; 1382 - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_82599; 1375 + adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_82599; 1383 1376 adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; 1384 1377 adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex; 1385 1378 adapter->ptp_caps.settime64 = ixgbe_ptp_settime; ··· 1395 1388 adapter->ptp_caps.n_ext_ts = 0; 1396 1389 adapter->ptp_caps.n_per_out = 0; 1397 1390 adapter->ptp_caps.pps = 1; 1398 - adapter->ptp_caps.adjfreq = ixgbe_ptp_adjfreq_X550; 1391 + adapter->ptp_caps.adjfine = ixgbe_ptp_adjfine_X550; 1399 1392 adapter->ptp_caps.adjtime = ixgbe_ptp_adjtime; 1400 1393 adapter->ptp_caps.gettimex64 = ixgbe_ptp_gettimex; 1401 1394 adapter->ptp_caps.settime64 = ixgbe_ptp_settime;