Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: pm-cps: Use GPR number macros

Use GPR number macros in uasm code generation parts to
reduce code duplication.

There are functional change due to difference in register
symbolic names between OABI and NABI, while existing code
is only using definitions from OABI.

Code pieces are carefully inspected to ensure register
usages are safe on NABI as well.

We changed register allocation of r_pcohctl from T7 to T8
as T7 is not available on NABI and we just want a caller
saved scratch register here.

Signed-off-by: Jiaxun Yang <jiaxun.yang@flygoat.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>

authored by

Jiaxun Yang and committed by
Thomas Bogendoerfer
6d74e0fc 881c8e05

+64 -70
+64 -70
arch/mips/kernel/pm-cps.c
··· 18 18 #include <asm/mipsmtregs.h> 19 19 #include <asm/pm.h> 20 20 #include <asm/pm-cps.h> 21 + #include <asm/regdef.h> 21 22 #include <asm/smp-cps.h> 22 23 #include <asm/uasm.h> 23 24 ··· 69 68 /* A somewhat arbitrary number of labels & relocs for uasm */ 70 69 static struct uasm_label labels[32]; 71 70 static struct uasm_reloc relocs[32]; 72 - 73 - enum mips_reg { 74 - zero, at, v0, v1, a0, a1, a2, a3, 75 - t0, t1, t2, t3, t4, t5, t6, t7, 76 - s0, s1, s2, s3, s4, s5, s6, s7, 77 - t8, t9, k0, k1, gp, sp, fp, ra, 78 - }; 79 71 80 72 bool cps_pm_support_state(enum cps_pm_state state) 81 73 { ··· 197 203 return; 198 204 199 205 /* Load base address */ 200 - UASM_i_LA(pp, t0, (long)CKSEG0); 206 + UASM_i_LA(pp, GPR_T0, (long)CKSEG0); 201 207 202 208 /* Calculate end address */ 203 209 if (cache_size < 0x8000) 204 - uasm_i_addiu(pp, t1, t0, cache_size); 210 + uasm_i_addiu(pp, GPR_T1, GPR_T0, cache_size); 205 211 else 206 - UASM_i_LA(pp, t1, (long)(CKSEG0 + cache_size)); 212 + UASM_i_LA(pp, GPR_T1, (long)(CKSEG0 + cache_size)); 207 213 208 214 /* Start of cache op loop */ 209 215 uasm_build_label(pl, *pp, lbl); ··· 211 217 /* Generate the cache ops */ 212 218 for (i = 0; i < unroll_lines; i++) { 213 219 if (cpu_has_mips_r6) { 214 - uasm_i_cache(pp, op, 0, t0); 215 - uasm_i_addiu(pp, t0, t0, cache->linesz); 220 + uasm_i_cache(pp, op, 0, GPR_T0); 221 + uasm_i_addiu(pp, GPR_T0, GPR_T0, cache->linesz); 216 222 } else { 217 - uasm_i_cache(pp, op, i * cache->linesz, t0); 223 + uasm_i_cache(pp, op, i * cache->linesz, GPR_T0); 218 224 } 219 225 } 220 226 221 227 if (!cpu_has_mips_r6) 222 228 /* Update the base address */ 223 - uasm_i_addiu(pp, t0, t0, unroll_lines * cache->linesz); 229 + uasm_i_addiu(pp, GPR_T0, GPR_T0, unroll_lines * cache->linesz); 224 230 225 231 /* Loop if we haven't reached the end address yet */ 226 - uasm_il_bne(pp, pr, t0, t1, lbl); 232 + uasm_il_bne(pp, pr, GPR_T0, GPR_T1, lbl); 227 233 uasm_i_nop(pp); 228 234 } 229 235 ··· 269 275 */ 270 276 271 277 /* Preserve perf counter setup */ 272 - uasm_i_mfc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 273 - uasm_i_mfc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */ 278 + uasm_i_mfc0(pp, GPR_T2, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 279 + uasm_i_mfc0(pp, GPR_T3, 25, (perf_counter * 2) + 1); /* PerfCntN */ 274 280 275 281 /* Setup perf counter to count FSB full pipeline stalls */ 276 - uasm_i_addiu(pp, t0, zero, (perf_event << 5) | 0xf); 277 - uasm_i_mtc0(pp, t0, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 282 + uasm_i_addiu(pp, GPR_T0, GPR_ZERO, (perf_event << 5) | 0xf); 283 + uasm_i_mtc0(pp, GPR_T0, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 278 284 uasm_i_ehb(pp); 279 - uasm_i_mtc0(pp, zero, 25, (perf_counter * 2) + 1); /* PerfCntN */ 285 + uasm_i_mtc0(pp, GPR_ZERO, 25, (perf_counter * 2) + 1); /* PerfCntN */ 280 286 uasm_i_ehb(pp); 281 287 282 288 /* Base address for loads */ 283 - UASM_i_LA(pp, t0, (long)CKSEG0); 289 + UASM_i_LA(pp, GPR_T0, (long)CKSEG0); 284 290 285 291 /* Start of clear loop */ 286 292 uasm_build_label(pl, *pp, lbl); 287 293 288 294 /* Perform some loads to fill the FSB */ 289 295 for (i = 0; i < num_loads; i++) 290 - uasm_i_lw(pp, zero, i * line_size * line_stride, t0); 296 + uasm_i_lw(pp, GPR_ZERO, i * line_size * line_stride, GPR_T0); 291 297 292 298 /* 293 299 * Invalidate the new D-cache entries so that the cache will need ··· 295 301 */ 296 302 for (i = 0; i < num_loads; i++) { 297 303 uasm_i_cache(pp, Hit_Invalidate_D, 298 - i * line_size * line_stride, t0); 304 + i * line_size * line_stride, GPR_T0); 299 305 uasm_i_cache(pp, Hit_Writeback_Inv_SD, 300 - i * line_size * line_stride, t0); 306 + i * line_size * line_stride, GPR_T0); 301 307 } 302 308 303 309 /* Barrier ensuring previous cache invalidates are complete */ ··· 305 311 uasm_i_ehb(pp); 306 312 307 313 /* Check whether the pipeline stalled due to the FSB being full */ 308 - uasm_i_mfc0(pp, t1, 25, (perf_counter * 2) + 1); /* PerfCntN */ 314 + uasm_i_mfc0(pp, GPR_T1, 25, (perf_counter * 2) + 1); /* PerfCntN */ 309 315 310 316 /* Loop if it didn't */ 311 - uasm_il_beqz(pp, pr, t1, lbl); 317 + uasm_il_beqz(pp, pr, GPR_T1, lbl); 312 318 uasm_i_nop(pp); 313 319 314 320 /* Restore perf counter 1. The count may well now be wrong... */ 315 - uasm_i_mtc0(pp, t2, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 321 + uasm_i_mtc0(pp, GPR_T2, 25, (perf_counter * 2) + 0); /* PerfCtlN */ 316 322 uasm_i_ehb(pp); 317 - uasm_i_mtc0(pp, t3, 25, (perf_counter * 2) + 1); /* PerfCntN */ 323 + uasm_i_mtc0(pp, GPR_T3, 25, (perf_counter * 2) + 1); /* PerfCntN */ 318 324 uasm_i_ehb(pp); 319 325 320 326 return 0; ··· 324 330 struct uasm_reloc **pr, 325 331 unsigned r_addr, int lbl) 326 332 { 327 - uasm_i_lui(pp, t0, uasm_rel_hi(0x80000000)); 333 + uasm_i_lui(pp, GPR_T0, uasm_rel_hi(0x80000000)); 328 334 uasm_build_label(pl, *pp, lbl); 329 - uasm_i_ll(pp, t1, 0, r_addr); 330 - uasm_i_or(pp, t1, t1, t0); 331 - uasm_i_sc(pp, t1, 0, r_addr); 332 - uasm_il_beqz(pp, pr, t1, lbl); 335 + uasm_i_ll(pp, GPR_T1, 0, r_addr); 336 + uasm_i_or(pp, GPR_T1, GPR_T1, GPR_T0); 337 + uasm_i_sc(pp, GPR_T1, 0, r_addr); 338 + uasm_il_beqz(pp, pr, GPR_T1, lbl); 333 339 uasm_i_nop(pp); 334 340 } 335 341 ··· 338 344 struct uasm_label *l = labels; 339 345 struct uasm_reloc *r = relocs; 340 346 u32 *buf, *p; 341 - const unsigned r_online = a0; 342 - const unsigned r_nc_count = a1; 343 - const unsigned r_pcohctl = t7; 347 + const unsigned r_online = GPR_A0; 348 + const unsigned r_nc_count = GPR_A1; 349 + const unsigned r_pcohctl = GPR_T8; 344 350 const unsigned max_instrs = 256; 345 351 unsigned cpc_cmd; 346 352 int err; ··· 377 383 * with the return address placed in v0 to avoid clobbering 378 384 * the ra register before it is saved. 379 385 */ 380 - UASM_i_LA(&p, t0, (long)mips_cps_pm_save); 381 - uasm_i_jalr(&p, v0, t0); 386 + UASM_i_LA(&p, GPR_T0, (long)mips_cps_pm_save); 387 + uasm_i_jalr(&p, GPR_V0, GPR_T0); 382 388 uasm_i_nop(&p); 383 389 } 384 390 ··· 393 399 /* Increment ready_count */ 394 400 uasm_i_sync(&p, __SYNC_mb); 395 401 uasm_build_label(&l, p, lbl_incready); 396 - uasm_i_ll(&p, t1, 0, r_nc_count); 397 - uasm_i_addiu(&p, t2, t1, 1); 398 - uasm_i_sc(&p, t2, 0, r_nc_count); 399 - uasm_il_beqz(&p, &r, t2, lbl_incready); 400 - uasm_i_addiu(&p, t1, t1, 1); 402 + uasm_i_ll(&p, GPR_T1, 0, r_nc_count); 403 + uasm_i_addiu(&p, GPR_T2, GPR_T1, 1); 404 + uasm_i_sc(&p, GPR_T2, 0, r_nc_count); 405 + uasm_il_beqz(&p, &r, GPR_T2, lbl_incready); 406 + uasm_i_addiu(&p, GPR_T1, GPR_T1, 1); 401 407 402 408 /* Barrier ensuring all CPUs see the updated r_nc_count value */ 403 409 uasm_i_sync(&p, __SYNC_mb); ··· 406 412 * If this is the last VPE to become ready for non-coherence 407 413 * then it should branch below. 408 414 */ 409 - uasm_il_beq(&p, &r, t1, r_online, lbl_disable_coherence); 415 + uasm_il_beq(&p, &r, GPR_T1, r_online, lbl_disable_coherence); 410 416 uasm_i_nop(&p); 411 417 412 418 if (state < CPS_PM_POWER_GATED) { ··· 416 422 * has been disabled before proceeding, which it will do 417 423 * by polling for the top bit of ready_count being set. 418 424 */ 419 - uasm_i_addiu(&p, t1, zero, -1); 425 + uasm_i_addiu(&p, GPR_T1, GPR_ZERO, -1); 420 426 uasm_build_label(&l, p, lbl_poll_cont); 421 - uasm_i_lw(&p, t0, 0, r_nc_count); 422 - uasm_il_bltz(&p, &r, t0, lbl_secondary_cont); 427 + uasm_i_lw(&p, GPR_T0, 0, r_nc_count); 428 + uasm_il_bltz(&p, &r, GPR_T0, lbl_secondary_cont); 423 429 uasm_i_ehb(&p); 424 430 if (cpu_has_mipsmt) 425 - uasm_i_yield(&p, zero, t1); 431 + uasm_i_yield(&p, GPR_ZERO, GPR_T1); 426 432 uasm_il_b(&p, &r, lbl_poll_cont); 427 433 uasm_i_nop(&p); 428 434 } else { ··· 432 438 */ 433 439 if (cpu_has_mipsmt) { 434 440 /* Halt the VPE via C0 tchalt register */ 435 - uasm_i_addiu(&p, t0, zero, TCHALT_H); 436 - uasm_i_mtc0(&p, t0, 2, 4); 441 + uasm_i_addiu(&p, GPR_T0, GPR_ZERO, TCHALT_H); 442 + uasm_i_mtc0(&p, GPR_T0, 2, 4); 437 443 } else if (cpu_has_vp) { 438 444 /* Halt the VP via the CPC VP_STOP register */ 439 445 unsigned int vpe_id; 440 446 441 447 vpe_id = cpu_vpe_id(&cpu_data[cpu]); 442 - uasm_i_addiu(&p, t0, zero, 1 << vpe_id); 443 - UASM_i_LA(&p, t1, (long)addr_cpc_cl_vp_stop()); 444 - uasm_i_sw(&p, t0, 0, t1); 448 + uasm_i_addiu(&p, GPR_T0, GPR_ZERO, 1 << vpe_id); 449 + UASM_i_LA(&p, GPR_T1, (long)addr_cpc_cl_vp_stop()); 450 + uasm_i_sw(&p, GPR_T0, 0, GPR_T1); 445 451 } else { 446 452 BUG(); 447 453 } ··· 476 482 * defined by the interAptiv & proAptiv SUMs as ensuring that the 477 483 * operation resulting from the preceding store is complete. 478 484 */ 479 - uasm_i_addiu(&p, t0, zero, 1 << cpu_core(&cpu_data[cpu])); 480 - uasm_i_sw(&p, t0, 0, r_pcohctl); 481 - uasm_i_lw(&p, t0, 0, r_pcohctl); 485 + uasm_i_addiu(&p, GPR_T0, GPR_ZERO, 1 << cpu_core(&cpu_data[cpu])); 486 + uasm_i_sw(&p, GPR_T0, 0, r_pcohctl); 487 + uasm_i_lw(&p, GPR_T0, 0, r_pcohctl); 482 488 483 489 /* Barrier to ensure write to coherence control is complete */ 484 490 uasm_i_sync(&p, __SYNC_full); ··· 486 492 } 487 493 488 494 /* Disable coherence */ 489 - uasm_i_sw(&p, zero, 0, r_pcohctl); 490 - uasm_i_lw(&p, t0, 0, r_pcohctl); 495 + uasm_i_sw(&p, GPR_ZERO, 0, r_pcohctl); 496 + uasm_i_lw(&p, GPR_T0, 0, r_pcohctl); 491 497 492 498 if (state >= CPS_PM_CLOCK_GATED) { 493 499 err = cps_gen_flush_fsb(&p, &l, &r, &cpu_data[cpu], ··· 509 515 } 510 516 511 517 /* Issue the CPC command */ 512 - UASM_i_LA(&p, t0, (long)addr_cpc_cl_cmd()); 513 - uasm_i_addiu(&p, t1, zero, cpc_cmd); 514 - uasm_i_sw(&p, t1, 0, t0); 518 + UASM_i_LA(&p, GPR_T0, (long)addr_cpc_cl_cmd()); 519 + uasm_i_addiu(&p, GPR_T1, GPR_ZERO, cpc_cmd); 520 + uasm_i_sw(&p, GPR_T1, 0, GPR_T0); 515 521 516 522 if (state == CPS_PM_POWER_GATED) { 517 523 /* If anything goes wrong just hang */ ··· 558 564 * will run this. The first will actually re-enable coherence & the 559 565 * rest will just be performing a rather unusual nop. 560 566 */ 561 - uasm_i_addiu(&p, t0, zero, mips_cm_revision() < CM_REV_CM3 567 + uasm_i_addiu(&p, GPR_T0, GPR_ZERO, mips_cm_revision() < CM_REV_CM3 562 568 ? CM_GCR_Cx_COHERENCE_COHDOMAINEN 563 569 : CM3_GCR_Cx_COHERENCE_COHEN); 564 570 565 - uasm_i_sw(&p, t0, 0, r_pcohctl); 566 - uasm_i_lw(&p, t0, 0, r_pcohctl); 571 + uasm_i_sw(&p, GPR_T0, 0, r_pcohctl); 572 + uasm_i_lw(&p, GPR_T0, 0, r_pcohctl); 567 573 568 574 /* Barrier to ensure write to coherence control is complete */ 569 575 uasm_i_sync(&p, __SYNC_full); ··· 573 579 /* Decrement ready_count */ 574 580 uasm_build_label(&l, p, lbl_decready); 575 581 uasm_i_sync(&p, __SYNC_mb); 576 - uasm_i_ll(&p, t1, 0, r_nc_count); 577 - uasm_i_addiu(&p, t2, t1, -1); 578 - uasm_i_sc(&p, t2, 0, r_nc_count); 579 - uasm_il_beqz(&p, &r, t2, lbl_decready); 580 - uasm_i_andi(&p, v0, t1, (1 << fls(smp_num_siblings)) - 1); 582 + uasm_i_ll(&p, GPR_T1, 0, r_nc_count); 583 + uasm_i_addiu(&p, GPR_T2, GPR_T1, -1); 584 + uasm_i_sc(&p, GPR_T2, 0, r_nc_count); 585 + uasm_il_beqz(&p, &r, GPR_T2, lbl_decready); 586 + uasm_i_andi(&p, GPR_V0, GPR_T1, (1 << fls(smp_num_siblings)) - 1); 581 587 582 588 /* Barrier ensuring all CPUs see the updated r_nc_count value */ 583 589 uasm_i_sync(&p, __SYNC_mb); ··· 606 612 } 607 613 608 614 /* The core is coherent, time to return to C code */ 609 - uasm_i_jr(&p, ra); 615 + uasm_i_jr(&p, GPR_RA); 610 616 uasm_i_nop(&p); 611 617 612 618 gen_done: