Merge branch 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS fix from Ingo Molnar:
"Fix an RCU warning that triggers when /dev/mcelog is used"

* 'ras-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/mcelog: Get rid of RCU remnants

Changed files
+25 -92
arch
x86
kernel
cpu
mcheck
+25 -92
arch/x86/kernel/cpu/mcheck/dev-mcelog.c
··· 24 24 static char mce_helper[128]; 25 25 static char *mce_helper_argv[2] = { mce_helper, NULL }; 26 26 27 - #define mce_log_get_idx_check(p) \ 28 - ({ \ 29 - RCU_LOCKDEP_WARN(!rcu_read_lock_sched_held() && \ 30 - !lockdep_is_held(&mce_chrdev_read_mutex), \ 31 - "suspicious mce_log_get_idx_check() usage"); \ 32 - smp_load_acquire(&(p)); \ 33 - }) 34 - 35 27 /* 36 28 * Lockless MCE logging infrastructure. 37 29 * This avoids deadlocks on printk locks without having to break locks. Also ··· 45 53 void *data) 46 54 { 47 55 struct mce *mce = (struct mce *)data; 48 - unsigned int next, entry; 56 + unsigned int entry; 49 57 50 - wmb(); 51 - for (;;) { 52 - entry = mce_log_get_idx_check(mcelog.next); 53 - for (;;) { 58 + mutex_lock(&mce_chrdev_read_mutex); 54 59 55 - /* 56 - * When the buffer fills up discard new entries. 57 - * Assume that the earlier errors are the more 58 - * interesting ones: 59 - */ 60 - if (entry >= MCE_LOG_LEN) { 61 - set_bit(MCE_OVERFLOW, 62 - (unsigned long *)&mcelog.flags); 63 - return NOTIFY_OK; 64 - } 65 - /* Old left over entry. Skip: */ 66 - if (mcelog.entry[entry].finished) { 67 - entry++; 68 - continue; 69 - } 70 - break; 71 - } 72 - smp_rmb(); 73 - next = entry + 1; 74 - if (cmpxchg(&mcelog.next, entry, next) == entry) 75 - break; 60 + entry = mcelog.next; 61 + 62 + /* 63 + * When the buffer fills up discard new entries. Assume that the 64 + * earlier errors are the more interesting ones: 65 + */ 66 + if (entry >= MCE_LOG_LEN) { 67 + set_bit(MCE_OVERFLOW, (unsigned long *)&mcelog.flags); 68 + goto unlock; 76 69 } 70 + 71 + mcelog.next = entry + 1; 72 + 77 73 memcpy(mcelog.entry + entry, mce, sizeof(struct mce)); 78 - wmb(); 79 74 mcelog.entry[entry].finished = 1; 80 - wmb(); 81 75 82 76 /* wake processes polling /dev/mcelog */ 83 77 wake_up_interruptible(&mce_chrdev_wait); 78 + 79 + unlock: 80 + mutex_unlock(&mce_chrdev_read_mutex); 84 81 85 82 return NOTIFY_OK; 86 83 } ··· 158 177 return 0; 159 178 } 160 179 161 - static void collect_tscs(void *data) 162 - { 163 - unsigned long *cpu_tsc = (unsigned long *)data; 164 - 165 - cpu_tsc[smp_processor_id()] = rdtsc(); 166 - } 167 - 168 180 static int mce_apei_read_done; 169 181 170 182 /* Collect MCE record of previous boot in persistent storage via APEI ERST. */ ··· 205 231 size_t usize, loff_t *off) 206 232 { 207 233 char __user *buf = ubuf; 208 - unsigned long *cpu_tsc; 209 - unsigned prev, next; 234 + unsigned next; 210 235 int i, err; 211 - 212 - cpu_tsc = kmalloc(nr_cpu_ids * sizeof(long), GFP_KERNEL); 213 - if (!cpu_tsc) 214 - return -ENOMEM; 215 236 216 237 mutex_lock(&mce_chrdev_read_mutex); 217 238 ··· 216 247 goto out; 217 248 } 218 249 219 - next = mce_log_get_idx_check(mcelog.next); 220 - 221 250 /* Only supports full reads right now */ 222 251 err = -EINVAL; 223 252 if (*off != 0 || usize < MCE_LOG_LEN*sizeof(struct mce)) 224 253 goto out; 225 254 255 + next = mcelog.next; 226 256 err = 0; 227 - prev = 0; 228 - do { 229 - for (i = prev; i < next; i++) { 230 - unsigned long start = jiffies; 231 - struct mce *m = &mcelog.entry[i]; 232 257 233 - while (!m->finished) { 234 - if (time_after_eq(jiffies, start + 2)) { 235 - memset(m, 0, sizeof(*m)); 236 - goto timeout; 237 - } 238 - cpu_relax(); 239 - } 240 - smp_rmb(); 241 - err |= copy_to_user(buf, m, sizeof(*m)); 242 - buf += sizeof(*m); 243 - timeout: 244 - ; 245 - } 246 - 247 - memset(mcelog.entry + prev, 0, 248 - (next - prev) * sizeof(struct mce)); 249 - prev = next; 250 - next = cmpxchg(&mcelog.next, prev, 0); 251 - } while (next != prev); 252 - 253 - synchronize_sched(); 254 - 255 - /* 256 - * Collect entries that were still getting written before the 257 - * synchronize. 258 - */ 259 - on_each_cpu(collect_tscs, cpu_tsc, 1); 260 - 261 - for (i = next; i < MCE_LOG_LEN; i++) { 258 + for (i = 0; i < next; i++) { 262 259 struct mce *m = &mcelog.entry[i]; 263 260 264 - if (m->finished && m->tsc < cpu_tsc[m->cpu]) { 265 - err |= copy_to_user(buf, m, sizeof(*m)); 266 - smp_rmb(); 267 - buf += sizeof(*m); 268 - memset(m, 0, sizeof(*m)); 269 - } 261 + err |= copy_to_user(buf, m, sizeof(*m)); 262 + buf += sizeof(*m); 270 263 } 264 + 265 + memset(mcelog.entry, 0, next * sizeof(struct mce)); 266 + mcelog.next = 0; 271 267 272 268 if (err) 273 269 err = -EFAULT; 274 270 275 271 out: 276 272 mutex_unlock(&mce_chrdev_read_mutex); 277 - kfree(cpu_tsc); 278 273 279 274 return err ? err : buf - ubuf; 280 275 }