Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip

Pull more xen updates from Juergen Gross:

- A small series for Xen event channels adding some sysfs nodes for per
pv-device settings and statistics, and two fixes of theoretical
problems.

- two minor fixes (one for an unlikely error path, one for a comment).

* tag 'for-linus-5.12b-rc1-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/xen/tip:
xen-front-pgdir-shbuf: don't record wrong grant handle upon error
xen: Replace lkml.org links with lore
xen/evtchn: use READ/WRITE_ONCE() for accessing ring indices
xen/evtchn: use smp barriers for user event ring
xen/events: add per-xenbus device event statistics and settings

+168 -16
+41
Documentation/ABI/testing/sysfs-devices-xenbus
··· 1 + What: /sys/devices/*/xenbus/event_channels 2 + Date: February 2021 3 + Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org> 4 + Description: 5 + Number of Xen event channels associated with a kernel based 6 + paravirtualized device frontend or backend. 7 + 8 + What: /sys/devices/*/xenbus/events 9 + Date: February 2021 10 + Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org> 11 + Description: 12 + Total number of Xen events received for a Xen pv device 13 + frontend or backend. 14 + 15 + What: /sys/devices/*/xenbus/jiffies_eoi_delayed 16 + Date: February 2021 17 + Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org> 18 + Description: 19 + Summed up time in jiffies the EOI of an interrupt for a Xen 20 + pv device has been delayed in order to avoid stalls due to 21 + event storms. This value rising is a first sign for a rogue 22 + other end of the pv device. 23 + 24 + What: /sys/devices/*/xenbus/spurious_events 25 + Date: February 2021 26 + Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org> 27 + Description: 28 + Number of events received for a Xen pv device which did not 29 + require any action. Too many spurious events in a row will 30 + trigger delayed EOI processing. 31 + 32 + What: /sys/devices/*/xenbus/spurious_threshold 33 + Date: February 2021 34 + Contact: Xen Developers mailing list <xen-devel@lists.xenproject.org> 35 + Description: 36 + Controls the tolerated number of subsequent spurious events 37 + before delayed EOI processing is triggered for a Xen pv 38 + device. Default is 1. This can be modified in case the other 39 + end of the pv device is issuing spurious events on a regular 40 + basis and is known not to be malicious on purpose. Raising 41 + the value for such cases can improve pv device performance.
+25 -2
drivers/xen/events/events_base.c
··· 323 323 324 324 ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0); 325 325 info->u.interdomain = dev; 326 + if (dev) 327 + atomic_inc(&dev->event_channels); 326 328 327 329 return ret; 328 330 } ··· 570 568 return; 571 569 572 570 if (spurious) { 571 + struct xenbus_device *dev = info->u.interdomain; 572 + unsigned int threshold = 1; 573 + 574 + if (dev && dev->spurious_threshold) 575 + threshold = dev->spurious_threshold; 576 + 573 577 if ((1 << info->spurious_cnt) < (HZ << 2)) { 574 578 if (info->spurious_cnt != 0xFF) 575 579 info->spurious_cnt++; 576 580 } 577 - if (info->spurious_cnt > 1) { 578 - delay = 1 << (info->spurious_cnt - 2); 581 + if (info->spurious_cnt > threshold) { 582 + delay = 1 << (info->spurious_cnt - 1 - threshold); 579 583 if (delay > HZ) 580 584 delay = HZ; 581 585 if (!info->eoi_time) 582 586 info->eoi_cpu = smp_processor_id(); 583 587 info->eoi_time = get_jiffies_64() + delay; 588 + if (dev) 589 + atomic_add(delay, &dev->jiffies_eoi_delayed); 584 590 } 591 + if (dev) 592 + atomic_inc(&dev->spurious_events); 585 593 } else { 586 594 info->spurious_cnt = 0; 587 595 } ··· 920 908 921 909 if (VALID_EVTCHN(evtchn)) { 922 910 unsigned int cpu = cpu_from_irq(irq); 911 + struct xenbus_device *dev; 923 912 924 913 xen_evtchn_close(evtchn); 925 914 ··· 930 917 break; 931 918 case IRQT_IPI: 932 919 per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1; 920 + break; 921 + case IRQT_EVTCHN: 922 + dev = info->u.interdomain; 923 + if (dev) 924 + atomic_dec(&dev->event_channels); 933 925 break; 934 926 default: 935 927 break; ··· 1599 1581 { 1600 1582 int irq; 1601 1583 struct irq_info *info; 1584 + struct xenbus_device *dev; 1602 1585 1603 1586 irq = get_evtchn_to_irq(port); 1604 1587 if (irq == -1) ··· 1628 1609 } 1629 1610 1630 1611 info = info_for_irq(irq); 1612 + 1613 + dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL; 1614 + if (dev) 1615 + atomic_inc(&dev->events); 1631 1616 1632 1617 if (ctrl->defer_eoi) { 1633 1618 info->eoi_cpu = smp_processor_id();
+18 -11
drivers/xen/evtchn.c
··· 162 162 { 163 163 struct user_evtchn *evtchn = data; 164 164 struct per_user_data *u = evtchn->user; 165 + unsigned int prod, cons; 165 166 166 167 WARN(!evtchn->enabled, 167 168 "Interrupt for port %u, but apparently not enabled; per-user %p\n", ··· 172 171 173 172 spin_lock(&u->ring_prod_lock); 174 173 175 - if ((u->ring_prod - u->ring_cons) < u->ring_size) { 176 - *evtchn_ring_entry(u, u->ring_prod) = evtchn->port; 177 - wmb(); /* Ensure ring contents visible */ 178 - if (u->ring_cons == u->ring_prod++) { 174 + prod = READ_ONCE(u->ring_prod); 175 + cons = READ_ONCE(u->ring_cons); 176 + 177 + if ((prod - cons) < u->ring_size) { 178 + *evtchn_ring_entry(u, prod) = evtchn->port; 179 + smp_wmb(); /* Ensure ring contents visible */ 180 + WRITE_ONCE(u->ring_prod, prod + 1); 181 + if (cons == prod) { 179 182 wake_up_interruptible(&u->evtchn_wait); 180 183 kill_fasync(&u->evtchn_async_queue, 181 184 SIGIO, POLL_IN); ··· 215 210 if (u->ring_overflow) 216 211 goto unlock_out; 217 212 218 - c = u->ring_cons; 219 - p = u->ring_prod; 213 + c = READ_ONCE(u->ring_cons); 214 + p = READ_ONCE(u->ring_prod); 220 215 if (c != p) 221 216 break; 222 217 ··· 226 221 return -EAGAIN; 227 222 228 223 rc = wait_event_interruptible(u->evtchn_wait, 229 - u->ring_cons != u->ring_prod); 224 + READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)); 230 225 if (rc) 231 226 return rc; 232 227 } ··· 250 245 } 251 246 252 247 rc = -EFAULT; 253 - rmb(); /* Ensure that we see the port before we copy it. */ 248 + smp_rmb(); /* Ensure that we see the port before we copy it. */ 254 249 if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) || 255 250 ((bytes2 != 0) && 256 251 copy_to_user(&buf[bytes1], &u->ring[0], bytes2))) 257 252 goto unlock_out; 258 253 259 - u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t); 254 + WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t)); 260 255 rc = bytes1 + bytes2; 261 256 262 257 unlock_out: ··· 557 552 /* Initialise the ring to empty. Clear errors. */ 558 553 mutex_lock(&u->ring_cons_mutex); 559 554 spin_lock_irq(&u->ring_prod_lock); 560 - u->ring_cons = u->ring_prod = u->ring_overflow = 0; 555 + WRITE_ONCE(u->ring_cons, 0); 556 + WRITE_ONCE(u->ring_prod, 0); 557 + u->ring_overflow = 0; 561 558 spin_unlock_irq(&u->ring_prod_lock); 562 559 mutex_unlock(&u->ring_cons_mutex); 563 560 rc = 0; ··· 602 595 struct per_user_data *u = file->private_data; 603 596 604 597 poll_wait(file, &u->evtchn_wait, wait); 605 - if (u->ring_cons != u->ring_prod) 598 + if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod)) 606 599 mask |= EPOLLIN | EPOLLRDNORM; 607 600 if (u->ring_overflow) 608 601 mask = EPOLLERR;
+2 -1
drivers/xen/xen-acpi-processor.c
··· 3 3 * Copyright 2012 by Oracle Inc 4 4 * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com> 5 5 * 6 - * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249 6 + * This code borrows ideas from 7 + * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com 7 8 * so many thanks go to Kevin Tian <kevin.tian@intel.com> 8 9 * and Yu Ke <ke.yu@intel.com>. 9 10 */
+9 -2
drivers/xen/xen-front-pgdir-shbuf.c
··· 305 305 306 306 /* Save handles even if error, so we can unmap. */ 307 307 for (cur_page = 0; cur_page < buf->num_pages; cur_page++) { 308 - buf->backend_map_handles[cur_page] = map_ops[cur_page].handle; 309 - if (unlikely(map_ops[cur_page].status != GNTST_okay)) 308 + if (likely(map_ops[cur_page].status == GNTST_okay)) { 309 + buf->backend_map_handles[cur_page] = 310 + map_ops[cur_page].handle; 311 + } else { 312 + buf->backend_map_handles[cur_page] = 313 + INVALID_GRANT_HANDLE; 314 + if (!ret) 315 + ret = -ENXIO; 310 316 dev_err(&buf->xb_dev->dev, 311 317 "Failed to map page %d: %d\n", 312 318 cur_page, map_ops[cur_page].status); 319 + } 313 320 } 314 321 315 322 if (ret) {
+66
drivers/xen/xenbus/xenbus_probe.c
··· 206 206 } 207 207 EXPORT_SYMBOL_GPL(xenbus_otherend_changed); 208 208 209 + #define XENBUS_SHOW_STAT(name) \ 210 + static ssize_t show_##name(struct device *_dev, \ 211 + struct device_attribute *attr, \ 212 + char *buf) \ 213 + { \ 214 + struct xenbus_device *dev = to_xenbus_device(_dev); \ 215 + \ 216 + return sprintf(buf, "%d\n", atomic_read(&dev->name)); \ 217 + } \ 218 + static DEVICE_ATTR(name, 0444, show_##name, NULL) 219 + 220 + XENBUS_SHOW_STAT(event_channels); 221 + XENBUS_SHOW_STAT(events); 222 + XENBUS_SHOW_STAT(spurious_events); 223 + XENBUS_SHOW_STAT(jiffies_eoi_delayed); 224 + 225 + static ssize_t show_spurious_threshold(struct device *_dev, 226 + struct device_attribute *attr, 227 + char *buf) 228 + { 229 + struct xenbus_device *dev = to_xenbus_device(_dev); 230 + 231 + return sprintf(buf, "%d\n", dev->spurious_threshold); 232 + } 233 + 234 + static ssize_t set_spurious_threshold(struct device *_dev, 235 + struct device_attribute *attr, 236 + const char *buf, size_t count) 237 + { 238 + struct xenbus_device *dev = to_xenbus_device(_dev); 239 + unsigned int val; 240 + ssize_t ret; 241 + 242 + ret = kstrtouint(buf, 0, &val); 243 + if (ret) 244 + return ret; 245 + 246 + dev->spurious_threshold = val; 247 + 248 + return count; 249 + } 250 + 251 + static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold, 252 + set_spurious_threshold); 253 + 254 + static struct attribute *xenbus_attrs[] = { 255 + &dev_attr_event_channels.attr, 256 + &dev_attr_events.attr, 257 + &dev_attr_spurious_events.attr, 258 + &dev_attr_jiffies_eoi_delayed.attr, 259 + &dev_attr_spurious_threshold.attr, 260 + NULL 261 + }; 262 + 263 + static const struct attribute_group xenbus_group = { 264 + .name = "xenbus", 265 + .attrs = xenbus_attrs, 266 + }; 267 + 209 268 int xenbus_dev_probe(struct device *_dev) 210 269 { 211 270 struct xenbus_device *dev = to_xenbus_device(_dev); ··· 312 253 return err; 313 254 } 314 255 256 + dev->spurious_threshold = 1; 257 + if (sysfs_create_group(&dev->dev.kobj, &xenbus_group)) 258 + dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n", 259 + dev->nodename); 260 + 315 261 return 0; 316 262 fail_put: 317 263 module_put(drv->driver.owner); ··· 332 268 struct xenbus_driver *drv = to_xenbus_driver(_dev->driver); 333 269 334 270 DPRINTK("%s", dev->nodename); 271 + 272 + sysfs_remove_group(&dev->dev.kobj, &xenbus_group); 335 273 336 274 free_otherend_watch(dev); 337 275
+7
include/xen/xenbus.h
··· 88 88 struct completion down; 89 89 struct work_struct work; 90 90 struct semaphore reclaim_sem; 91 + 92 + /* Event channel based statistics and settings. */ 93 + atomic_t event_channels; 94 + atomic_t events; 95 + atomic_t spurious_events; 96 + atomic_t jiffies_eoi_delayed; 97 + unsigned int spurious_threshold; 91 98 }; 92 99 93 100 static inline struct xenbus_device *to_xenbus_device(struct device *dev)