Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

drm/nvc0/pm: initial engine reclocking

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>

+243
+2
drivers/gpu/drm/nouveau/nouveau_pm.h
··· 72 72 73 73 /* nvc0_pm.c */ 74 74 int nvc0_pm_clocks_get(struct drm_device *, struct nouveau_pm_level *); 75 + void *nvc0_pm_clocks_pre(struct drm_device *, struct nouveau_pm_level *); 76 + int nvc0_pm_clocks_set(struct drm_device *, void *); 75 77 76 78 /* nouveau_temp.c */ 77 79 void nouveau_temp_init(struct drm_device *dev);
+4
drivers/gpu/drm/nouveau/nouveau_state.c
··· 417 417 engine->vram.flags_valid = nvc0_vram_flags_valid; 418 418 engine->pm.temp_get = nv84_temp_get; 419 419 engine->pm.clocks_get = nvc0_pm_clocks_get; 420 + engine->pm.clocks_pre = nvc0_pm_clocks_pre; 421 + engine->pm.clocks_set = nvc0_pm_clocks_set; 420 422 engine->pm.voltage_get = nouveau_voltage_gpio_get; 421 423 engine->pm.voltage_set = nouveau_voltage_gpio_set; 422 424 engine->pm.pwm_get = nv50_pm_pwm_get; ··· 470 468 engine->vram.flags_valid = nvc0_vram_flags_valid; 471 469 engine->pm.temp_get = nv84_temp_get; 472 470 engine->pm.clocks_get = nvc0_pm_clocks_get; 471 + engine->pm.clocks_pre = nvc0_pm_clocks_pre; 472 + engine->pm.clocks_set = nvc0_pm_clocks_set; 473 473 engine->pm.voltage_get = nouveau_voltage_gpio_get; 474 474 engine->pm.voltage_set = nouveau_voltage_gpio_set; 475 475 break;
+237
drivers/gpu/drm/nouveau/nvc0_pm.c
··· 153 153 perflvl->vdec = read_clk(dev, 0x0e); 154 154 return 0; 155 155 } 156 + 157 + struct nvc0_pm_clock { 158 + u32 freq; 159 + u32 ssel; 160 + u32 mdiv; 161 + u32 dsrc; 162 + u32 ddiv; 163 + u32 coef; 164 + }; 165 + 166 + struct nvc0_pm_state { 167 + struct nvc0_pm_clock eng[16]; 168 + }; 169 + 170 + static u32 171 + calc_div(struct drm_device *dev, int clk, u32 ref, u32 freq, u32 *ddiv) 172 + { 173 + u32 div = min((ref * 2) / freq, (u32)65); 174 + if (div < 2) 175 + div = 2; 176 + 177 + *ddiv = div - 2; 178 + return (ref * 2) / div; 179 + } 180 + 181 + static u32 182 + calc_src(struct drm_device *dev, int clk, u32 freq, u32 *dsrc, u32 *ddiv) 183 + { 184 + u32 sclk; 185 + 186 + /* use one of the fixed frequencies if possible */ 187 + *ddiv = 0x00000000; 188 + switch (freq) { 189 + case 27000: 190 + case 108000: 191 + *dsrc = 0x00000000; 192 + if (freq == 108000) 193 + *dsrc |= 0x00030000; 194 + return freq; 195 + case 100000: 196 + *dsrc = 0x00000002; 197 + return freq; 198 + default: 199 + *dsrc = 0x00000003; 200 + break; 201 + } 202 + 203 + /* otherwise, calculate the closest divider */ 204 + sclk = read_vco(dev, clk); 205 + if (clk < 7) 206 + sclk = calc_div(dev, clk, sclk, freq, ddiv); 207 + return sclk; 208 + } 209 + 210 + static u32 211 + calc_pll(struct drm_device *dev, int clk, u32 freq, u32 *coef) 212 + { 213 + struct pll_lims limits; 214 + int N, M, P, ret; 215 + 216 + ret = get_pll_limits(dev, 0x137000 + (clk * 0x20), &limits); 217 + if (ret) 218 + return 0; 219 + 220 + limits.refclk = read_div(dev, clk, 0x137120, 0x137140); 221 + if (!limits.refclk) 222 + return 0; 223 + 224 + ret = nva3_calc_pll(dev, &limits, freq, &N, NULL, &M, &P); 225 + if (ret <= 0) 226 + return 0; 227 + 228 + *coef = (P << 16) | (N << 8) | M; 229 + return ret; 230 + } 231 + 232 + /* A (likely rather simplified and incomplete) view of the clock tree 233 + * 234 + * Key: 235 + * 236 + * S: source select 237 + * D: divider 238 + * P: pll 239 + * F: switch 240 + * 241 + * Engine clocks: 242 + * 243 + * 137250(D) ---- 137100(F0) ---- 137160(S)/1371d0(D) ------------------- ref 244 + * (F1) ---- 1370X0(P) ---- 137120(S)/137140(D) ---- ref 245 + * 246 + * Not all registers exist for all clocks. For example: clocks >= 8 don't 247 + * have their own PLL (all tied to clock 7's PLL when in PLL mode), nor do 248 + * they have the divider at 1371d0, though the source selection at 137160 249 + * still exists. You must use the divider at 137250 for these instead. 250 + * 251 + * Memory clock: 252 + * 253 + * TBD, read_mem() above is likely very wrong... 254 + * 255 + */ 256 + 257 + static int 258 + calc_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info, u32 freq) 259 + { 260 + u32 src0, div0, div1D, div1P = 0; 261 + u32 clk0, clk1 = 0; 262 + 263 + /* invalid clock domain */ 264 + if (!freq) 265 + return 0; 266 + 267 + /* first possible path, using only dividers */ 268 + clk0 = calc_src(dev, clk, freq, &src0, &div0); 269 + clk0 = calc_div(dev, clk, clk0, freq, &div1D); 270 + 271 + /* see if we can get any closer using PLLs */ 272 + if (clk0 != freq) { 273 + if (clk < 7) 274 + clk1 = calc_pll(dev, clk, freq, &info->coef); 275 + else 276 + clk1 = read_pll(dev, 0x1370e0); 277 + clk1 = calc_div(dev, clk, clk1, freq, &div1P); 278 + } 279 + 280 + /* select the method which gets closest to target freq */ 281 + if (abs((int)freq - clk0) <= abs((int)freq - clk1)) { 282 + info->dsrc = src0; 283 + if (div0) { 284 + info->ddiv |= 0x80000000; 285 + info->ddiv |= div0 << 8; 286 + info->ddiv |= div0; 287 + } 288 + if (div1D) { 289 + info->mdiv |= 0x80000000; 290 + info->mdiv |= div1D; 291 + } 292 + info->ssel = 0; 293 + info->freq = clk0; 294 + } else { 295 + if (div1P) { 296 + info->mdiv |= 0x80000000; 297 + info->mdiv |= div1P << 8; 298 + } 299 + info->ssel = (1 << clk); 300 + info->freq = clk1; 301 + } 302 + 303 + return 0; 304 + } 305 + 306 + void * 307 + nvc0_pm_clocks_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl) 308 + { 309 + struct drm_nouveau_private *dev_priv = dev->dev_private; 310 + struct nvc0_pm_state *info; 311 + int ret; 312 + 313 + info = kzalloc(sizeof(*info), GFP_KERNEL); 314 + if (!info) 315 + return ERR_PTR(-ENOMEM); 316 + 317 + /* NFI why this is still in the performance table, the ROPCs appear 318 + * to get their clock from clock 2 ("hub07", actually hub05 on this 319 + * chip, but, anyway...) as well. nvatiming confirms hub05 and ROP 320 + * are always the same freq with the binary driver even when the 321 + * performance table says they should differ. 322 + */ 323 + if (dev_priv->chipset == 0xd9) 324 + perflvl->rop = 0; 325 + 326 + if ((ret = calc_clk(dev, 0x00, &info->eng[0x00], perflvl->shader)) || 327 + (ret = calc_clk(dev, 0x01, &info->eng[0x01], perflvl->rop)) || 328 + (ret = calc_clk(dev, 0x02, &info->eng[0x02], perflvl->hub07)) || 329 + (ret = calc_clk(dev, 0x07, &info->eng[0x07], perflvl->hub06)) || 330 + (ret = calc_clk(dev, 0x08, &info->eng[0x08], perflvl->hub01)) || 331 + (ret = calc_clk(dev, 0x09, &info->eng[0x09], perflvl->copy)) || 332 + (ret = calc_clk(dev, 0x0c, &info->eng[0x0c], perflvl->daemon)) || 333 + (ret = calc_clk(dev, 0x0e, &info->eng[0x0e], perflvl->vdec))) { 334 + kfree(info); 335 + return ERR_PTR(ret); 336 + } 337 + 338 + return info; 339 + } 340 + 341 + static void 342 + prog_clk(struct drm_device *dev, int clk, struct nvc0_pm_clock *info) 343 + { 344 + /* program dividers at 137160/1371d0 first */ 345 + if (clk < 7 && !info->ssel) { 346 + nv_mask(dev, 0x1371d0 + (clk * 0x04), 0x80003f3f, info->ddiv); 347 + nv_wr32(dev, 0x137160 + (clk * 0x04), info->dsrc); 348 + } 349 + 350 + /* switch clock to non-pll mode */ 351 + nv_mask(dev, 0x137100, (1 << clk), 0x00000000); 352 + nv_wait(dev, 0x137100, (1 << clk), 0x00000000); 353 + 354 + /* reprogram pll */ 355 + if (clk < 7) { 356 + /* make sure it's disabled first... */ 357 + u32 base = 0x137000 + (clk * 0x20); 358 + u32 ctrl = nv_rd32(dev, base + 0x00); 359 + if (ctrl & 0x00000001) { 360 + nv_mask(dev, base + 0x00, 0x00000004, 0x00000000); 361 + nv_mask(dev, base + 0x00, 0x00000001, 0x00000000); 362 + } 363 + /* program it to new values, if necessary */ 364 + if (info->ssel) { 365 + nv_wr32(dev, base + 0x04, info->coef); 366 + nv_mask(dev, base + 0x00, 0x00000001, 0x00000001); 367 + nv_wait(dev, base + 0x00, 0x00020000, 0x00020000); 368 + nv_mask(dev, base + 0x00, 0x00020004, 0x00000004); 369 + } 370 + } 371 + 372 + /* select pll/non-pll mode, and program final clock divider */ 373 + nv_mask(dev, 0x137100, (1 << clk), info->ssel); 374 + nv_wait(dev, 0x137100, (1 << clk), info->ssel); 375 + nv_mask(dev, 0x137250 + (clk * 0x04), 0x00003f3f, info->mdiv); 376 + } 377 + 378 + int 379 + nvc0_pm_clocks_set(struct drm_device *dev, void *data) 380 + { 381 + struct nvc0_pm_state *info = data; 382 + int i; 383 + 384 + for (i = 0; i < 16; i++) { 385 + if (!info->eng[i].freq) 386 + continue; 387 + prog_clk(dev, i, &info->eng[i]); 388 + } 389 + 390 + kfree(info); 391 + return 0; 392 + }