Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

MIPS: Fix MSA assembly with big thread offsets

When lockdep is enabled on a 64-bit kernel the FPR offset into the
thread structure exceeds the maximum range of the MSA ld.d/st.d
instructions. For example THREAD_FPR31 = 4644 (instead of 2448), while
the signed immediate field is only 10 bits with an implicit multiply by
8, giving a maximum offset of 511*8 = 4088.

This isn't a problem when the toolchain doesn't support MSA as the
ld_*/st_* macros perform the addition separately into $1 with [d]addui
which has a 16bit signed immediate field.

Fix the case where the toolchain does support MSA by doing a single
addition of THREAD_FPR0 into $1 with [d]addui, and doing the ld_*/st_*
relative to that.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Patchwork: https://patchwork.linux-mips.org/patch/13064/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>

authored by

James Hogan and committed by
Ralf Baechle
143e93d7 ea168857

+82 -65
+82 -65
arch/mips/include/asm/asmmacro.h
··· 496 496 .endm 497 497 #endif 498 498 499 + #ifdef TOOLCHAIN_SUPPORTS_MSA 500 + #define FPR_BASE_OFFS THREAD_FPR0 501 + #define FPR_BASE $1 502 + #else 503 + #define FPR_BASE_OFFS 0 504 + #define FPR_BASE \thread 505 + #endif 506 + 499 507 .macro msa_save_all thread 500 - st_d 0, THREAD_FPR0, \thread 501 - st_d 1, THREAD_FPR1, \thread 502 - st_d 2, THREAD_FPR2, \thread 503 - st_d 3, THREAD_FPR3, \thread 504 - st_d 4, THREAD_FPR4, \thread 505 - st_d 5, THREAD_FPR5, \thread 506 - st_d 6, THREAD_FPR6, \thread 507 - st_d 7, THREAD_FPR7, \thread 508 - st_d 8, THREAD_FPR8, \thread 509 - st_d 9, THREAD_FPR9, \thread 510 - st_d 10, THREAD_FPR10, \thread 511 - st_d 11, THREAD_FPR11, \thread 512 - st_d 12, THREAD_FPR12, \thread 513 - st_d 13, THREAD_FPR13, \thread 514 - st_d 14, THREAD_FPR14, \thread 515 - st_d 15, THREAD_FPR15, \thread 516 - st_d 16, THREAD_FPR16, \thread 517 - st_d 17, THREAD_FPR17, \thread 518 - st_d 18, THREAD_FPR18, \thread 519 - st_d 19, THREAD_FPR19, \thread 520 - st_d 20, THREAD_FPR20, \thread 521 - st_d 21, THREAD_FPR21, \thread 522 - st_d 22, THREAD_FPR22, \thread 523 - st_d 23, THREAD_FPR23, \thread 524 - st_d 24, THREAD_FPR24, \thread 525 - st_d 25, THREAD_FPR25, \thread 526 - st_d 26, THREAD_FPR26, \thread 527 - st_d 27, THREAD_FPR27, \thread 528 - st_d 28, THREAD_FPR28, \thread 529 - st_d 29, THREAD_FPR29, \thread 530 - st_d 30, THREAD_FPR30, \thread 531 - st_d 31, THREAD_FPR31, \thread 532 508 .set push 533 509 .set noat 510 + #ifdef TOOLCHAIN_SUPPORTS_MSA 511 + PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS 512 + #endif 513 + st_d 0, THREAD_FPR0 - FPR_BASE_OFFS, FPR_BASE 514 + st_d 1, THREAD_FPR1 - FPR_BASE_OFFS, FPR_BASE 515 + st_d 2, THREAD_FPR2 - FPR_BASE_OFFS, FPR_BASE 516 + st_d 3, THREAD_FPR3 - FPR_BASE_OFFS, FPR_BASE 517 + st_d 4, THREAD_FPR4 - FPR_BASE_OFFS, FPR_BASE 518 + st_d 5, THREAD_FPR5 - FPR_BASE_OFFS, FPR_BASE 519 + st_d 6, THREAD_FPR6 - FPR_BASE_OFFS, FPR_BASE 520 + st_d 7, THREAD_FPR7 - FPR_BASE_OFFS, FPR_BASE 521 + st_d 8, THREAD_FPR8 - FPR_BASE_OFFS, FPR_BASE 522 + st_d 9, THREAD_FPR9 - FPR_BASE_OFFS, FPR_BASE 523 + st_d 10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE 524 + st_d 11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE 525 + st_d 12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE 526 + st_d 13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE 527 + st_d 14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE 528 + st_d 15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE 529 + st_d 16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE 530 + st_d 17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE 531 + st_d 18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE 532 + st_d 19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE 533 + st_d 20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE 534 + st_d 21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE 535 + st_d 22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE 536 + st_d 23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE 537 + st_d 24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE 538 + st_d 25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE 539 + st_d 26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE 540 + st_d 27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE 541 + st_d 28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE 542 + st_d 29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE 543 + st_d 30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE 544 + st_d 31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE 534 545 SET_HARDFLOAT 535 546 _cfcmsa $1, MSA_CSR 536 547 sw $1, THREAD_MSA_CSR(\thread) ··· 554 543 SET_HARDFLOAT 555 544 lw $1, THREAD_MSA_CSR(\thread) 556 545 _ctcmsa MSA_CSR, $1 557 - .set pop 558 - ld_d 0, THREAD_FPR0, \thread 559 - ld_d 1, THREAD_FPR1, \thread 560 - ld_d 2, THREAD_FPR2, \thread 561 - ld_d 3, THREAD_FPR3, \thread 562 - ld_d 4, THREAD_FPR4, \thread 563 - ld_d 5, THREAD_FPR5, \thread 564 - ld_d 6, THREAD_FPR6, \thread 565 - ld_d 7, THREAD_FPR7, \thread 566 - ld_d 8, THREAD_FPR8, \thread 567 - ld_d 9, THREAD_FPR9, \thread 568 - ld_d 10, THREAD_FPR10, \thread 569 - ld_d 11, THREAD_FPR11, \thread 570 - ld_d 12, THREAD_FPR12, \thread 571 - ld_d 13, THREAD_FPR13, \thread 572 - ld_d 14, THREAD_FPR14, \thread 573 - ld_d 15, THREAD_FPR15, \thread 574 - ld_d 16, THREAD_FPR16, \thread 575 - ld_d 17, THREAD_FPR17, \thread 576 - ld_d 18, THREAD_FPR18, \thread 577 - ld_d 19, THREAD_FPR19, \thread 578 - ld_d 20, THREAD_FPR20, \thread 579 - ld_d 21, THREAD_FPR21, \thread 580 - ld_d 22, THREAD_FPR22, \thread 581 - ld_d 23, THREAD_FPR23, \thread 582 - ld_d 24, THREAD_FPR24, \thread 583 - ld_d 25, THREAD_FPR25, \thread 584 - ld_d 26, THREAD_FPR26, \thread 585 - ld_d 27, THREAD_FPR27, \thread 586 - ld_d 28, THREAD_FPR28, \thread 587 - ld_d 29, THREAD_FPR29, \thread 588 - ld_d 30, THREAD_FPR30, \thread 589 - ld_d 31, THREAD_FPR31, \thread 546 + #ifdef TOOLCHAIN_SUPPORTS_MSA 547 + PTR_ADDU FPR_BASE, \thread, FPR_BASE_OFFS 548 + #endif 549 + ld_d 0, THREAD_FPR0 - FPR_BASE_OFFS, FPR_BASE 550 + ld_d 1, THREAD_FPR1 - FPR_BASE_OFFS, FPR_BASE 551 + ld_d 2, THREAD_FPR2 - FPR_BASE_OFFS, FPR_BASE 552 + ld_d 3, THREAD_FPR3 - FPR_BASE_OFFS, FPR_BASE 553 + ld_d 4, THREAD_FPR4 - FPR_BASE_OFFS, FPR_BASE 554 + ld_d 5, THREAD_FPR5 - FPR_BASE_OFFS, FPR_BASE 555 + ld_d 6, THREAD_FPR6 - FPR_BASE_OFFS, FPR_BASE 556 + ld_d 7, THREAD_FPR7 - FPR_BASE_OFFS, FPR_BASE 557 + ld_d 8, THREAD_FPR8 - FPR_BASE_OFFS, FPR_BASE 558 + ld_d 9, THREAD_FPR9 - FPR_BASE_OFFS, FPR_BASE 559 + ld_d 10, THREAD_FPR10 - FPR_BASE_OFFS, FPR_BASE 560 + ld_d 11, THREAD_FPR11 - FPR_BASE_OFFS, FPR_BASE 561 + ld_d 12, THREAD_FPR12 - FPR_BASE_OFFS, FPR_BASE 562 + ld_d 13, THREAD_FPR13 - FPR_BASE_OFFS, FPR_BASE 563 + ld_d 14, THREAD_FPR14 - FPR_BASE_OFFS, FPR_BASE 564 + ld_d 15, THREAD_FPR15 - FPR_BASE_OFFS, FPR_BASE 565 + ld_d 16, THREAD_FPR16 - FPR_BASE_OFFS, FPR_BASE 566 + ld_d 17, THREAD_FPR17 - FPR_BASE_OFFS, FPR_BASE 567 + ld_d 18, THREAD_FPR18 - FPR_BASE_OFFS, FPR_BASE 568 + ld_d 19, THREAD_FPR19 - FPR_BASE_OFFS, FPR_BASE 569 + ld_d 20, THREAD_FPR20 - FPR_BASE_OFFS, FPR_BASE 570 + ld_d 21, THREAD_FPR21 - FPR_BASE_OFFS, FPR_BASE 571 + ld_d 22, THREAD_FPR22 - FPR_BASE_OFFS, FPR_BASE 572 + ld_d 23, THREAD_FPR23 - FPR_BASE_OFFS, FPR_BASE 573 + ld_d 24, THREAD_FPR24 - FPR_BASE_OFFS, FPR_BASE 574 + ld_d 25, THREAD_FPR25 - FPR_BASE_OFFS, FPR_BASE 575 + ld_d 26, THREAD_FPR26 - FPR_BASE_OFFS, FPR_BASE 576 + ld_d 27, THREAD_FPR27 - FPR_BASE_OFFS, FPR_BASE 577 + ld_d 28, THREAD_FPR28 - FPR_BASE_OFFS, FPR_BASE 578 + ld_d 29, THREAD_FPR29 - FPR_BASE_OFFS, FPR_BASE 579 + ld_d 30, THREAD_FPR30 - FPR_BASE_OFFS, FPR_BASE 580 + ld_d 31, THREAD_FPR31 - FPR_BASE_OFFS, FPR_BASE 581 + .set pop 590 582 .endm 583 + 584 + #undef FPR_BASE_OFFS 585 + #undef FPR_BASE 591 586 592 587 .macro msa_init_upper wd 593 588 #ifdef CONFIG_64BIT