Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/nva3/clk: better pll calculation when no fractional fb div available

The core/mem/shader clocks don't support the fractional feedback divider,
causing our calculated clocks to be off by quite a lot in some cases. To
solve this we will switch to a search-based algorithm when fN is NULL.

For my NVA8 at PL3, this actually generates identical cooefficients to
the binary driver. Hopefully that's a good sign, and that does not
break VPLL calculation for someone..

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

+43 -37
+2 -2
drivers/gpu/drm/nouveau/nouveau_drv.h
··· 1353 1353 /* nv50_calc. */ 1354 1354 int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk, 1355 1355 int *N1, int *M1, int *N2, int *M2, int *P); 1356 - int nv50_calc_pll2(struct drm_device *, struct pll_lims *, 1357 - int clk, int *N, int *fN, int *M, int *P); 1356 + int nva3_calc_pll(struct drm_device *, struct pll_lims *, 1357 + int clk, int *N, int *fN, int *M, int *P); 1358 1358 1359 1359 #ifndef ioread32_native 1360 1360 #ifdef __BIG_ENDIAN
+37 -31
drivers/gpu/drm/nouveau/nv50_calc.c
··· 23 23 */ 24 24 25 25 #include "drmP.h" 26 - #include "drm_fixed.h" 27 26 #include "nouveau_drv.h" 28 27 #include "nouveau_hw.h" 29 28 ··· 46 47 } 47 48 48 49 int 49 - nv50_calc_pll2(struct drm_device *dev, struct pll_lims *pll, int clk, 50 - int *N, int *fN, int *M, int *P) 50 + nva3_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk, 51 + int *pN, int *pfN, int *pM, int *P) 51 52 { 52 - fixed20_12 fb_div, a, b; 53 - u32 refclk = pll->refclk / 10; 54 - u32 max_vco_freq = pll->vco1.maxfreq / 10; 55 - u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10; 56 - clk /= 10; 53 + u32 best_err = ~0, err; 54 + int M, lM, hM, N, fN; 57 55 58 - *P = max_vco_freq / clk; 56 + *P = pll->vco1.maxfreq / clk; 59 57 if (*P > pll->max_p) 60 58 *P = pll->max_p; 61 59 if (*P < pll->min_p) 62 60 *P = pll->min_p; 63 61 64 - /* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */ 65 - a.full = dfixed_const(refclk + max_vco_inputfreq); 66 - b.full = dfixed_const(max_vco_inputfreq); 67 - a.full = dfixed_div(a, b); 68 - a.full = dfixed_floor(a); 69 - *M = dfixed_trunc(a); 62 + lM = (pll->refclk + pll->vco1.max_inputfreq) / pll->vco1.max_inputfreq; 63 + lM = max(lM, (int)pll->vco1.min_m); 64 + hM = (pll->refclk + pll->vco1.min_inputfreq) / pll->vco1.min_inputfreq; 65 + hM = min(hM, (int)pll->vco1.max_m); 70 66 71 - /* fb_div = (vco * *M) / refclk; */ 72 - fb_div.full = dfixed_const(clk * *P); 73 - fb_div.full = dfixed_mul(fb_div, a); 74 - a.full = dfixed_const(refclk); 75 - fb_div.full = dfixed_div(fb_div, a); 67 + for (M = lM; M <= hM; M++) { 68 + u32 tmp = clk * *P * M; 69 + N = tmp / pll->refclk; 70 + fN = tmp % pll->refclk; 71 + if (!pfN && fN >= pll->refclk / 2) 72 + N++; 76 73 77 - /* *N = floor(fb_div); */ 78 - a.full = dfixed_floor(fb_div); 79 - *N = dfixed_trunc(fb_div); 74 + if (N < pll->vco1.min_n) 75 + continue; 76 + if (N > pll->vco1.max_n) 77 + break; 80 78 81 - /* *fN = (fmod(fb_div, 1.0) * 8192) - 4096; */ 82 - b.full = dfixed_const(8192); 83 - a.full = dfixed_mul(a, b); 84 - fb_div.full = dfixed_mul(fb_div, b); 85 - fb_div.full = fb_div.full - a.full; 86 - *fN = dfixed_trunc(fb_div) - 4096; 87 - *fN &= 0xffff; 79 + err = abs(clk - (pll->refclk * N / M / *P)); 80 + if (err < best_err) { 81 + best_err = err; 82 + *pN = N; 83 + *pM = M; 84 + } 88 85 89 - return clk; 86 + if (pfN) { 87 + *pfN = (((fN << 13) / pll->refclk) - 4096) & 0xffff; 88 + return clk; 89 + } 90 + } 91 + 92 + if (unlikely(best_err == ~0)) { 93 + NV_ERROR(dev, "unable to find matching pll values\n"); 94 + return -EINVAL; 95 + } 96 + 97 + return pll->refclk * *pN / *pM / *P; 90 98 }
+2 -2
drivers/gpu/drm/nouveau/nv50_crtc.c
··· 286 286 nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2); 287 287 } else 288 288 if (dev_priv->chipset < NV_C0) { 289 - ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P); 289 + ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P); 290 290 if (ret <= 0) 291 291 return 0; 292 292 ··· 298 298 nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1); 299 299 nv_wr32(dev, pll.reg + 8, N2); 300 300 } else { 301 - ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P); 301 + ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P); 302 302 if (ret <= 0) 303 303 return 0; 304 304
+2 -2
drivers/gpu/drm/nouveau/nva3_pm.c
··· 104 104 { 105 105 struct nva3_pm_state *pll; 106 106 struct pll_lims limits; 107 - int N, fN, M, P, diff; 107 + int N, M, P, diff; 108 108 int ret, off; 109 109 110 110 ret = get_pll_limits(dev, id, &limits); ··· 136 136 } 137 137 138 138 if (!pll->new_div) { 139 - ret = nv50_calc_pll2(dev, &limits, khz, &N, &fN, &M, &P); 139 + ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P); 140 140 if (ret < 0) 141 141 return ERR_PTR(ret); 142 142