Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

[PATCH] Directed yield: direct yield of spinlocks for s390.

Use the new diagnose 0x9c in the spinlock implementation for s390. It
yields the remaining timeslice of the virtual cpu that tries to acquire a
lock to the virtual cpu that is the current holder of the lock.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>

authored by

Martin Schwidefsky and committed by
Linus Torvalds
3c1fcfe2 cdc39363

+87 -35
+11
arch/s390/kernel/head31.S
··· 254 254 oi 3(%r12),0x80 # set IDTE flag 255 255 .Lchkidte: 256 256 257 + # 258 + # find out if the diag 0x9c is available 259 + # 260 + mvc __LC_PGM_NEW_PSW(8),.Lpcdiag9c-.LPG1(%r13) 261 + stap __LC_CPUID+4 # store cpu address 262 + lh %r1,__LC_CPUID+4 263 + diag %r1,0,0x9c # test diag 0x9c 264 + oi 2(%r12),1 # set diag9c flag 265 + .Lchkdiag9c: 266 + 257 267 lpsw .Lentry-.LPG1(13) # jump to _stext in primary-space, 258 268 # virtual and never return ... 259 269 .align 8 ··· 291 281 .Lpccsp:.long 0x00080000,0x80000000 + .Lchkcsp 292 282 .Lpcmvpg:.long 0x00080000,0x80000000 + .Lchkmvpg 293 283 .Lpcidte:.long 0x00080000,0x80000000 + .Lchkidte 284 + .Lpcdiag9c:.long 0x00080000,0x80000000 + .Lchkdiag9c 294 285 .Lmemsize:.long memory_size 295 286 .Lmchunk:.long memory_chunk 296 287 .Lmflags:.long machine_flags
+11
arch/s390/kernel/head64.S
··· 251 251 0: 252 252 253 253 # 254 + # find out if the diag 0x9c is available 255 + # 256 + la %r1,0f-.LPG1(%r13) # set program check address 257 + stg %r1,__LC_PGM_NEW_PSW+8 258 + stap __LC_CPUID+4 # store cpu address 259 + lh %r1,__LC_CPUID+4 260 + diag %r1,0,0x9c # test diag 0x9c 261 + oi 6(%r12),1 # set diag9c flag 262 + 0: 263 + 264 + # 254 265 # find out if we have the MVCOS instruction 255 266 # 256 267 la %r1,0f-.LPG1(%r13) # set program check address
+39 -23
arch/s390/lib/spinlock.c
··· 24 24 } 25 25 __setup("spin_retry=", spin_retry_setup); 26 26 27 - static inline void 28 - _diag44(void) 27 + static inline void _raw_yield(void) 29 28 { 30 - #ifdef CONFIG_64BIT 31 29 if (MACHINE_HAS_DIAG44) 32 - #endif 33 30 asm volatile("diag 0,0,0x44"); 34 31 } 35 32 36 - void 37 - _raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc) 33 + static inline void _raw_yield_cpu(int cpu) 34 + { 35 + if (MACHINE_HAS_DIAG9C) 36 + asm volatile("diag %0,0,0x9c" 37 + : : "d" (__cpu_logical_map[cpu])); 38 + else 39 + _raw_yield(); 40 + } 41 + 42 + void _raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc) 38 43 { 39 44 int count = spin_retry; 45 + unsigned int cpu = ~smp_processor_id(); 40 46 41 47 while (1) { 42 48 if (count-- <= 0) { 43 - _diag44(); 49 + unsigned int owner = lp->owner_cpu; 50 + if (owner != 0) 51 + _raw_yield_cpu(~owner); 44 52 count = spin_retry; 45 53 } 46 54 if (__raw_spin_is_locked(lp)) 47 55 continue; 48 - if (_raw_compare_and_swap(&lp->lock, 0, pc) == 0) 56 + if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) { 57 + lp->owner_pc = pc; 49 58 return; 59 + } 50 60 } 51 61 } 52 62 EXPORT_SYMBOL(_raw_spin_lock_wait); 53 63 54 - int 55 - _raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc) 64 + int _raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc) 56 65 { 57 - int count = spin_retry; 66 + unsigned int cpu = ~smp_processor_id(); 67 + int count; 58 68 59 - while (count-- > 0) { 69 + for (count = spin_retry; count > 0; count--) { 60 70 if (__raw_spin_is_locked(lp)) 61 71 continue; 62 - if (_raw_compare_and_swap(&lp->lock, 0, pc) == 0) 72 + if (_raw_compare_and_swap(&lp->owner_cpu, 0, cpu) == 0) { 73 + lp->owner_pc = pc; 63 74 return 1; 75 + } 64 76 } 65 77 return 0; 66 78 } 67 79 EXPORT_SYMBOL(_raw_spin_trylock_retry); 68 80 69 - void 70 - _raw_read_lock_wait(raw_rwlock_t *rw) 81 + void _raw_spin_relax(raw_spinlock_t *lock) 82 + { 83 + unsigned int cpu = lock->owner_cpu; 84 + if (cpu != 0) 85 + _raw_yield_cpu(~cpu); 86 + } 87 + EXPORT_SYMBOL(_raw_spin_relax); 88 + 89 + void _raw_read_lock_wait(raw_rwlock_t *rw) 71 90 { 72 91 unsigned int old; 73 92 int count = spin_retry; 74 93 75 94 while (1) { 76 95 if (count-- <= 0) { 77 - _diag44(); 96 + _raw_yield(); 78 97 count = spin_retry; 79 98 } 80 99 if (!__raw_read_can_lock(rw)) ··· 105 86 } 106 87 EXPORT_SYMBOL(_raw_read_lock_wait); 107 88 108 - int 109 - _raw_read_trylock_retry(raw_rwlock_t *rw) 89 + int _raw_read_trylock_retry(raw_rwlock_t *rw) 110 90 { 111 91 unsigned int old; 112 92 int count = spin_retry; ··· 121 103 } 122 104 EXPORT_SYMBOL(_raw_read_trylock_retry); 123 105 124 - void 125 - _raw_write_lock_wait(raw_rwlock_t *rw) 106 + void _raw_write_lock_wait(raw_rwlock_t *rw) 126 107 { 127 108 int count = spin_retry; 128 109 129 110 while (1) { 130 111 if (count-- <= 0) { 131 - _diag44(); 112 + _raw_yield(); 132 113 count = spin_retry; 133 114 } 134 115 if (!__raw_write_can_lock(rw)) ··· 138 121 } 139 122 EXPORT_SYMBOL(_raw_write_lock_wait); 140 123 141 - int 142 - _raw_write_trylock_retry(raw_rwlock_t *rw) 124 + int _raw_write_trylock_retry(raw_rwlock_t *rw) 143 125 { 144 126 int count = spin_retry; 145 127
+1
include/asm-s390/setup.h
··· 39 39 #define MACHINE_IS_P390 (machine_flags & 4) 40 40 #define MACHINE_HAS_MVPG (machine_flags & 16) 41 41 #define MACHINE_HAS_IDTE (machine_flags & 128) 42 + #define MACHINE_HAS_DIAG9C (machine_flags & 256) 42 43 43 44 #ifndef __s390x__ 44 45 #define MACHINE_HAS_IEEE (machine_flags & 2)
+22 -9
include/asm-s390/spinlock.h
··· 13 13 14 14 #if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ > 2) 15 15 16 + #include <linux/smp.h> 17 + 16 18 static inline int 17 19 _raw_compare_and_swap(volatile unsigned int *lock, 18 20 unsigned int old, unsigned int new) ··· 52 50 * (the type definitions are in asm/spinlock_types.h) 53 51 */ 54 52 55 - #define __raw_spin_is_locked(x) ((x)->lock != 0) 53 + #define __raw_spin_is_locked(x) ((x)->owner_cpu != 0) 56 54 #define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) 57 55 #define __raw_spin_unlock_wait(lock) \ 58 - do { while (__raw_spin_is_locked(lock)) cpu_relax(); } while (0) 56 + do { while (__raw_spin_is_locked(lock)) \ 57 + _raw_spin_relax(lock); } while (0) 59 58 60 - extern void _raw_spin_lock_wait(raw_spinlock_t *lp, unsigned int pc); 61 - extern int _raw_spin_trylock_retry(raw_spinlock_t *lp, unsigned int pc); 59 + extern void _raw_spin_lock_wait(raw_spinlock_t *, unsigned int pc); 60 + extern int _raw_spin_trylock_retry(raw_spinlock_t *, unsigned int pc); 61 + extern void _raw_spin_relax(raw_spinlock_t *lock); 62 62 63 63 static inline void __raw_spin_lock(raw_spinlock_t *lp) 64 64 { 65 65 unsigned long pc = 1 | (unsigned long) __builtin_return_address(0); 66 + int old; 66 67 67 - if (unlikely(_raw_compare_and_swap(&lp->lock, 0, pc) != 0)) 68 - _raw_spin_lock_wait(lp, pc); 68 + old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); 69 + if (likely(old == 0)) { 70 + lp->owner_pc = pc; 71 + return; 72 + } 73 + _raw_spin_lock_wait(lp, pc); 69 74 } 70 75 71 76 static inline int __raw_spin_trylock(raw_spinlock_t *lp) 72 77 { 73 78 unsigned long pc = 1 | (unsigned long) __builtin_return_address(0); 79 + int old; 74 80 75 - if (likely(_raw_compare_and_swap(&lp->lock, 0, pc) == 0)) 81 + old = _raw_compare_and_swap(&lp->owner_cpu, 0, ~smp_processor_id()); 82 + if (likely(old == 0)) { 83 + lp->owner_pc = pc; 76 84 return 1; 85 + } 77 86 return _raw_spin_trylock_retry(lp, pc); 78 87 } 79 88 80 89 static inline void __raw_spin_unlock(raw_spinlock_t *lp) 81 90 { 82 - _raw_compare_and_swap(&lp->lock, lp->lock, 0); 91 + lp->owner_pc = 0; 92 + _raw_compare_and_swap(&lp->owner_cpu, lp->owner_cpu, 0); 83 93 } 84 94 85 95 /* ··· 168 154 return _raw_write_trylock_retry(rw); 169 155 } 170 156 171 - #define _raw_spin_relax(lock) cpu_relax() 172 157 #define _raw_read_relax(lock) cpu_relax() 173 158 #define _raw_write_relax(lock) cpu_relax() 174 159
+3 -3
include/asm-s390/spinlock_types.h
··· 6 6 #endif 7 7 8 8 typedef struct { 9 - volatile unsigned int lock; 9 + volatile unsigned int owner_cpu; 10 + volatile unsigned int owner_pc; 10 11 } __attribute__ ((aligned (4))) raw_spinlock_t; 11 12 12 13 #define __RAW_SPIN_LOCK_UNLOCKED { 0 } 13 14 14 15 typedef struct { 15 16 volatile unsigned int lock; 16 - volatile unsigned int owner_pc; 17 17 } raw_rwlock_t; 18 18 19 - #define __RAW_RW_LOCK_UNLOCKED { 0, 0 } 19 + #define __RAW_RW_LOCK_UNLOCKED { 0 } 20 20 21 21 #endif