[SPARC64]: Fix userland FPU state corruption.

We need to use stricter memory barriers around the block
load and store instructions we use to save and restore the
FPU register file.

Signed-off-by: David S. Miller <davem@davemloft.net>

+30 -24
+21 -18
arch/sparc64/kernel/entry.S
··· 33 33 /* This is trivial with the new code... */ 34 34 .globl do_fpdis 35 35 do_fpdis: 36 - sethi %hi(TSTATE_PEF), %g4 ! IEU0 36 + sethi %hi(TSTATE_PEF), %g4 37 37 rdpr %tstate, %g5 38 38 andcc %g5, %g4, %g0 39 39 be,pt %xcc, 1f ··· 50 50 add %g0, %g0, %g0 51 51 ba,a,pt %xcc, rtrap_clr_l6 52 52 53 - 1: ldub [%g6 + TI_FPSAVED], %g5 ! Load Group 54 - wr %g0, FPRS_FEF, %fprs ! LSU Group+4bubbles 55 - andcc %g5, FPRS_FEF, %g0 ! IEU1 Group 56 - be,a,pt %icc, 1f ! CTI 57 - clr %g7 ! IEU0 58 - ldx [%g6 + TI_GSR], %g7 ! Load Group 59 - 1: andcc %g5, FPRS_DL, %g0 ! IEU1 60 - bne,pn %icc, 2f ! CTI 61 - fzero %f0 ! FPA 62 - andcc %g5, FPRS_DU, %g0 ! IEU1 Group 63 - bne,pn %icc, 1f ! CTI 64 - fzero %f2 ! FPA 53 + 1: ldub [%g6 + TI_FPSAVED], %g5 54 + wr %g0, FPRS_FEF, %fprs 55 + andcc %g5, FPRS_FEF, %g0 56 + be,a,pt %icc, 1f 57 + clr %g7 58 + ldx [%g6 + TI_GSR], %g7 59 + 1: andcc %g5, FPRS_DL, %g0 60 + bne,pn %icc, 2f 61 + fzero %f0 62 + andcc %g5, FPRS_DU, %g0 63 + bne,pn %icc, 1f 64 + fzero %f2 65 65 faddd %f0, %f2, %f4 66 66 fmuld %f0, %f2, %f6 67 67 faddd %f0, %f2, %f8 ··· 104 104 add %g6, TI_FPREGS + 0xc0, %g2 105 105 faddd %f0, %f2, %f8 106 106 fmuld %f0, %f2, %f10 107 - ldda [%g1] ASI_BLK_S, %f32 ! grrr, where is ASI_BLK_NUCLEUS 8-( 107 + membar #Sync 108 + ldda [%g1] ASI_BLK_S, %f32 108 109 ldda [%g2] ASI_BLK_S, %f48 110 + membar #Sync 109 111 faddd %f0, %f2, %f12 110 112 fmuld %f0, %f2, %f14 111 113 faddd %f0, %f2, %f16 ··· 118 116 fmuld %f0, %f2, %f26 119 117 faddd %f0, %f2, %f28 120 118 fmuld %f0, %f2, %f30 121 - membar #Sync 122 119 b,pt %xcc, fpdis_exit 123 120 nop 124 121 2: andcc %g5, FPRS_DU, %g0 ··· 134 133 add %g6, TI_FPREGS + 0x40, %g2 135 134 faddd %f32, %f34, %f36 136 135 fmuld %f32, %f34, %f38 137 - ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( 136 + membar #Sync 137 + ldda [%g1] ASI_BLK_S, %f0 138 138 ldda [%g2] ASI_BLK_S, %f16 139 + membar #Sync 139 140 faddd %f32, %f34, %f40 140 141 fmuld %f32, %f34, %f42 141 142 faddd %f32, %f34, %f44 ··· 150 147 fmuld %f32, %f34, %f58 151 148 faddd %f32, %f34, %f60 152 149 fmuld %f32, %f34, %f62 153 - membar #Sync 154 150 ba,pt %xcc, fpdis_exit 155 151 nop 156 152 3: mov SECONDARY_CONTEXT, %g3 ··· 160 158 stxa %g2, [%g3] ASI_DMMU 161 159 membar #Sync 162 160 mov 0x40, %g2 163 - ldda [%g1] ASI_BLK_S, %f0 ! grrr, where is ASI_BLK_NUCLEUS 8-( 161 + membar #Sync 162 + ldda [%g1] ASI_BLK_S, %f0 164 163 ldda [%g1 + %g2] ASI_BLK_S, %f16 165 164 add %g1, 0x80, %g1 166 165 ldda [%g1] ASI_BLK_S, %f32
+4 -3
arch/sparc64/kernel/rtrap.S
··· 312 312 wr %g1, FPRS_FEF, %fprs 313 313 ldx [%o1 + %o5], %g1 314 314 add %g6, TI_XFSR, %o1 315 - membar #StoreLoad | #LoadLoad 316 315 sll %o0, 8, %o2 317 316 add %g6, TI_FPREGS, %o3 318 317 brz,pn %l6, 1f 319 318 add %g6, TI_FPREGS+0x40, %o4 320 319 320 + membar #Sync 321 321 ldda [%o3 + %o2] ASI_BLK_P, %f0 322 322 ldda [%o4 + %o2] ASI_BLK_P, %f16 323 + membar #Sync 323 324 1: andcc %l2, FPRS_DU, %g0 324 325 be,pn %icc, 1f 325 326 wr %g1, 0, %gsr 326 327 add %o2, 0x80, %o2 328 + membar #Sync 327 329 ldda [%o3 + %o2] ASI_BLK_P, %f32 328 330 ldda [%o4 + %o2] ASI_BLK_P, %f48 329 - 330 331 1: membar #Sync 331 332 ldx [%o1 + %o5], %fsr 332 333 2: stb %l5, [%g6 + TI_FPDEPTH] 333 334 ba,pt %xcc, rt_continue 334 335 nop 335 336 5: wr %g0, FPRS_FEF, %fprs 336 - membar #StoreLoad | #LoadLoad 337 337 sll %o0, 8, %o2 338 338 339 339 add %g6, TI_FPREGS+0x80, %o3 340 340 add %g6, TI_FPREGS+0xc0, %o4 341 + membar #Sync 341 342 ldda [%o3 + %o2] ASI_BLK_P, %f32 342 343 ldda [%o4 + %o2] ASI_BLK_P, %f48 343 344 membar #Sync
+5 -3
arch/sparc64/lib/VISsave.S
··· 59 59 be,pn %icc, 9b 60 60 add %g6, TI_FPREGS, %g2 61 61 andcc %o5, FPRS_DL, %g0 62 - membar #StoreStore | #LoadStore 63 62 64 63 be,pn %icc, 4f 65 64 add %g6, TI_FPREGS+0x40, %g3 65 + membar #Sync 66 66 stda %f0, [%g2 + %g1] ASI_BLK_P 67 67 stda %f16, [%g3 + %g1] ASI_BLK_P 68 + membar #Sync 68 69 andcc %o5, FPRS_DU, %g0 69 70 be,pn %icc, 5f 70 71 4: add %g1, 128, %g1 72 + membar #Sync 71 73 stda %f32, [%g2 + %g1] ASI_BLK_P 72 74 73 75 stda %f48, [%g3 + %g1] ASI_BLK_P ··· 89 87 sll %g1, 5, %g1 90 88 add %g6, TI_FPREGS+0xc0, %g3 91 89 wr %g0, FPRS_FEF, %fprs 92 - membar #StoreStore | #LoadStore 90 + membar #Sync 93 91 stda %f32, [%g2 + %g1] ASI_BLK_P 94 92 stda %f48, [%g3 + %g1] ASI_BLK_P 95 93 membar #Sync ··· 130 128 be,pn %icc, 4f 131 129 add %g6, TI_FPREGS, %g2 132 130 133 - membar #StoreStore | #LoadStore 134 131 add %g6, TI_FPREGS+0x40, %g3 132 + membar #Sync 135 133 stda %f0, [%g2 + %g1] ASI_BLK_P 136 134 stda %f16, [%g3 + %g1] ASI_BLK_P 137 135 membar #Sync