IA64: Slim down __clear_bit_unlock

__clear_bit_unlock does not need to perform atomic operations on the
variable. Avoid a cmpxchg and simply do a store with release semantics.
Add a barrier to be safe that the compiler does not do funky things.

Tony: Use intrinsic rather than inline assembler

Signed-off-by: Christoph Lameter <clameter@sgi.com>
Acked-by: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>

authored by Christoph Lameter and committed by Tony Luck a3ebdb6c c63a1190

+22 -3
+14 -3
include/asm-ia64/bitops.h
··· 124 124 /** 125 125 * __clear_bit_unlock - Non-atomically clear a bit with release 126 126 * 127 - * This is like clear_bit_unlock, but the implementation may use a non-atomic 128 - * store (this one uses an atomic, however). 127 + * This is like clear_bit_unlock, but the implementation uses a store 128 + * with release semantics. See also __raw_spin_unlock(). 129 129 */ 130 - #define __clear_bit_unlock clear_bit_unlock 130 + static __inline__ void 131 + __clear_bit_unlock(int nr, volatile void *addr) 132 + { 133 + __u32 mask, new; 134 + volatile __u32 *m; 135 + 136 + m = (volatile __u32 *)addr + (nr >> 5); 137 + mask = ~(1 << (nr & 31)); 138 + new = *m & mask; 139 + barrier(); 140 + ia64_st4_rel_nta(m, new); 141 + } 131 142 132 143 /** 133 144 * __clear_bit - Clears a bit in memory (non-atomic version)
+5
include/asm-ia64/gcc_intrin.h
··· 191 191 asm volatile ("ldf.fill %0=[%1]" :"=f"(__f__): "r"(x)); \ 192 192 }) 193 193 194 + #define ia64_st4_rel_nta(m, val) \ 195 + ({ \ 196 + asm volatile ("st4.rel.nta [%0] = %1\n\t" :: "r"(m), "r"(val)); \ 197 + }) 198 + 194 199 #define ia64_stfs(x, regnum) \ 195 200 ({ \ 196 201 register double __f__ asm ("f"#regnum); \
+3
include/asm-ia64/intel_intrin.h
··· 110 110 #define ia64_st4_rel __st4_rel 111 111 #define ia64_st8_rel __st8_rel 112 112 113 + /* FIXME: need st4.rel.nta intrinsic */ 114 + #define ia64_st4_rel_nta __st4_rel 115 + 113 116 #define ia64_ld1_acq __ld1_acq 114 117 #define ia64_ld2_acq __ld2_acq 115 118 #define ia64_ld4_acq __ld4_acq