Merge tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux

Pull ACCESS_ONCE cleanup preparation from Christian Borntraeger:
"kernel: Provide READ_ONCE and ASSIGN_ONCE

As discussed on LKML http://marc.info/?i=54611D86.4040306%40de.ibm.com
ACCESS_ONCE might fail with specific compilers for non-scalar
accesses.

Here is a set of patches to tackle that problem.

The first patch introduce READ_ONCE and ASSIGN_ONCE. If the data
structure is larger than the machine word size memcpy is used and a
warning is emitted. The next patches fix up several in-tree users of
ACCESS_ONCE on non-scalar types.

This does not yet contain a patch that forces ACCESS_ONCE to work only
on scalar types. This is targetted for the next merge window as Linux
next already contains new offenders regarding ACCESS_ONCE vs.
non-scalar types"

* tag 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/borntraeger/linux:
s390/kvm: REPLACE barrier fixup with READ_ONCE
arm/spinlock: Replace ACCESS_ONCE with READ_ONCE
arm64/spinlock: Replace ACCESS_ONCE READ_ONCE
mips/gup: Replace ACCESS_ONCE with READ_ONCE
x86/gup: Replace ACCESS_ONCE with READ_ONCE
x86/spinlock: Replace ACCESS_ONCE with READ_ONCE
mm: replace ACCESS_ONCE with READ_ONCE or barriers
kernel: Provide READ_ONCE and ASSIGN_ONCE

Changed files
+103 -25
arch
arm
include
arm64
include
mips
mm
s390
kvm
x86
include
mm
include
linux
mm
+2 -2
arch/arm/include/asm/spinlock.h
··· 120 120 121 121 static inline int arch_spin_is_locked(arch_spinlock_t *lock) 122 122 { 123 - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); 123 + return !arch_spin_value_unlocked(READ_ONCE(*lock)); 124 124 } 125 125 126 126 static inline int arch_spin_is_contended(arch_spinlock_t *lock) 127 127 { 128 - struct __raw_tickets tickets = ACCESS_ONCE(lock->tickets); 128 + struct __raw_tickets tickets = READ_ONCE(lock->tickets); 129 129 return (tickets.next - tickets.owner) > 1; 130 130 } 131 131 #define arch_spin_is_contended arch_spin_is_contended
+2 -2
arch/arm64/include/asm/spinlock.h
··· 99 99 100 100 static inline int arch_spin_is_locked(arch_spinlock_t *lock) 101 101 { 102 - return !arch_spin_value_unlocked(ACCESS_ONCE(*lock)); 102 + return !arch_spin_value_unlocked(READ_ONCE(*lock)); 103 103 } 104 104 105 105 static inline int arch_spin_is_contended(arch_spinlock_t *lock) 106 106 { 107 - arch_spinlock_t lockval = ACCESS_ONCE(*lock); 107 + arch_spinlock_t lockval = READ_ONCE(*lock); 108 108 return (lockval.next - lockval.owner) > 1; 109 109 } 110 110 #define arch_spin_is_contended arch_spin_is_contended
+1 -1
arch/mips/mm/gup.c
··· 30 30 31 31 return pte; 32 32 #else 33 - return ACCESS_ONCE(*ptep); 33 + return READ_ONCE(*ptep); 34 34 #endif 35 35 } 36 36
+6 -12
arch/s390/kvm/gaccess.c
··· 227 227 goto out; 228 228 ic = &vcpu->kvm->arch.sca->ipte_control; 229 229 do { 230 - old = *ic; 231 - barrier(); 230 + old = READ_ONCE(*ic); 232 231 while (old.k) { 233 232 cond_resched(); 234 - old = *ic; 235 - barrier(); 233 + old = READ_ONCE(*ic); 236 234 } 237 235 new = old; 238 236 new.k = 1; ··· 249 251 goto out; 250 252 ic = &vcpu->kvm->arch.sca->ipte_control; 251 253 do { 252 - old = *ic; 253 - barrier(); 254 + old = READ_ONCE(*ic); 254 255 new = old; 255 256 new.k = 0; 256 257 } while (cmpxchg(&ic->val, old.val, new.val) != old.val); ··· 264 267 265 268 ic = &vcpu->kvm->arch.sca->ipte_control; 266 269 do { 267 - old = *ic; 268 - barrier(); 270 + old = READ_ONCE(*ic); 269 271 while (old.kg) { 270 272 cond_resched(); 271 - old = *ic; 272 - barrier(); 273 + old = READ_ONCE(*ic); 273 274 } 274 275 new = old; 275 276 new.k = 1; ··· 281 286 282 287 ic = &vcpu->kvm->arch.sca->ipte_control; 283 288 do { 284 - old = *ic; 285 - barrier(); 289 + old = READ_ONCE(*ic); 286 290 new = old; 287 291 new.kh--; 288 292 if (!new.kh)
+4 -4
arch/x86/include/asm/spinlock.h
··· 92 92 unsigned count = SPIN_THRESHOLD; 93 93 94 94 do { 95 - if (ACCESS_ONCE(lock->tickets.head) == inc.tail) 95 + if (READ_ONCE(lock->tickets.head) == inc.tail) 96 96 goto out; 97 97 cpu_relax(); 98 98 } while (--count); ··· 105 105 { 106 106 arch_spinlock_t old, new; 107 107 108 - old.tickets = ACCESS_ONCE(lock->tickets); 108 + old.tickets = READ_ONCE(lock->tickets); 109 109 if (old.tickets.head != (old.tickets.tail & ~TICKET_SLOWPATH_FLAG)) 110 110 return 0; 111 111 ··· 162 162 163 163 static inline int arch_spin_is_locked(arch_spinlock_t *lock) 164 164 { 165 - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 165 + struct __raw_tickets tmp = READ_ONCE(lock->tickets); 166 166 167 167 return tmp.tail != tmp.head; 168 168 } 169 169 170 170 static inline int arch_spin_is_contended(arch_spinlock_t *lock) 171 171 { 172 - struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); 172 + struct __raw_tickets tmp = READ_ONCE(lock->tickets); 173 173 174 174 return (__ticket_t)(tmp.tail - tmp.head) > TICKET_LOCK_INC; 175 175 }
+1 -1
arch/x86/mm/gup.c
··· 15 15 static inline pte_t gup_get_pte(pte_t *ptep) 16 16 { 17 17 #ifndef CONFIG_X86_PAE 18 - return ACCESS_ONCE(*ptep); 18 + return READ_ONCE(*ptep); 19 19 #else 20 20 /* 21 21 * With get_user_pages_fast, we walk down the pagetables without taking
+74
include/linux/compiler.h
··· 186 186 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__) 187 187 #endif 188 188 189 + #include <uapi/linux/types.h> 190 + 191 + static __always_inline void data_access_exceeds_word_size(void) 192 + #ifdef __compiletime_warning 193 + __compiletime_warning("data access exceeds word size and won't be atomic") 194 + #endif 195 + ; 196 + 197 + static __always_inline void data_access_exceeds_word_size(void) 198 + { 199 + } 200 + 201 + static __always_inline void __read_once_size(volatile void *p, void *res, int size) 202 + { 203 + switch (size) { 204 + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; 205 + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; 206 + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; 207 + #ifdef CONFIG_64BIT 208 + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; 209 + #endif 210 + default: 211 + barrier(); 212 + __builtin_memcpy((void *)res, (const void *)p, size); 213 + data_access_exceeds_word_size(); 214 + barrier(); 215 + } 216 + } 217 + 218 + static __always_inline void __assign_once_size(volatile void *p, void *res, int size) 219 + { 220 + switch (size) { 221 + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; 222 + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; 223 + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; 224 + #ifdef CONFIG_64BIT 225 + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; 226 + #endif 227 + default: 228 + barrier(); 229 + __builtin_memcpy((void *)p, (const void *)res, size); 230 + data_access_exceeds_word_size(); 231 + barrier(); 232 + } 233 + } 234 + 235 + /* 236 + * Prevent the compiler from merging or refetching reads or writes. The 237 + * compiler is also forbidden from reordering successive instances of 238 + * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the 239 + * compiler is aware of some particular ordering. One way to make the 240 + * compiler aware of ordering is to put the two invocations of READ_ONCE, 241 + * ASSIGN_ONCE or ACCESS_ONCE() in different C statements. 242 + * 243 + * In contrast to ACCESS_ONCE these two macros will also work on aggregate 244 + * data types like structs or unions. If the size of the accessed data 245 + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) 246 + * READ_ONCE() and ASSIGN_ONCE() will fall back to memcpy and print a 247 + * compile-time warning. 248 + * 249 + * Their two major use cases are: (1) Mediating communication between 250 + * process-level code and irq/NMI handlers, all running on the same CPU, 251 + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise 252 + * mutilate accesses that either do not require ordering or that interact 253 + * with an explicit memory barrier or atomic instruction that provides the 254 + * required ordering. 255 + */ 256 + 257 + #define READ_ONCE(x) \ 258 + ({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; }) 259 + 260 + #define ASSIGN_ONCE(val, x) \ 261 + ({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; }) 262 + 189 263 #endif /* __KERNEL__ */ 190 264 191 265 #endif /* __ASSEMBLY__ */
+1 -1
mm/gup.c
··· 968 968 969 969 pudp = pud_offset(&pgd, addr); 970 970 do { 971 - pud_t pud = ACCESS_ONCE(*pudp); 971 + pud_t pud = READ_ONCE(*pudp); 972 972 973 973 next = pud_addr_end(addr, end); 974 974 if (pud_none(pud))
+10 -1
mm/memory.c
··· 3195 3195 pte_t entry; 3196 3196 spinlock_t *ptl; 3197 3197 3198 - entry = ACCESS_ONCE(*pte); 3198 + /* 3199 + * some architectures can have larger ptes than wordsize, 3200 + * e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and CONFIG_32BIT=y, 3201 + * so READ_ONCE or ACCESS_ONCE cannot guarantee atomic accesses. 3202 + * The code below just needs a consistent view for the ifs and 3203 + * we later double check anyway with the ptl lock held. So here 3204 + * a barrier will do. 3205 + */ 3206 + entry = *pte; 3207 + barrier(); 3199 3208 if (!pte_present(entry)) { 3200 3209 if (pte_none(entry)) { 3201 3210 if (vma->vm_ops) {
+2 -1
mm/rmap.c
··· 583 583 * without holding anon_vma lock for write. So when looking for a 584 584 * genuine pmde (in which to find pte), test present and !THP together. 585 585 */ 586 - pmde = ACCESS_ONCE(*pmd); 586 + pmde = *pmd; 587 + barrier(); 587 588 if (!pmd_present(pmde) || pmd_trans_huge(pmde)) 588 589 pmd = NULL; 589 590 out: