Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

xen/evtchn: improve scalability by using per-user locks

The global array of port users and the port_user_lock limits
scalability of the evtchn device. Instead of the global array lookup,
use a per-use (per-fd) tree of event channels bound by that user and
protect the tree with a per-user lock.

This is also a prerequiste for extended the number of supported event
channels, by removing the fixed size, per-event channel array.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>

authored by

David Vrabel and committed by
Konrad Rzeszutek Wilk
73cc4bb0 65a45fa2

+112 -80
+112 -80
drivers/xen/evtchn.c
··· 57 57 58 58 struct per_user_data { 59 59 struct mutex bind_mutex; /* serialize bind/unbind operations */ 60 + struct rb_root evtchns; 60 61 61 62 /* Notification ring, accessed via /dev/xen/evtchn. */ 62 63 #define EVTCHN_RING_SIZE (PAGE_SIZE / sizeof(evtchn_port_t)) ··· 65 64 evtchn_port_t *ring; 66 65 unsigned int ring_cons, ring_prod, ring_overflow; 67 66 struct mutex ring_cons_mutex; /* protect against concurrent readers */ 67 + spinlock_t ring_prod_lock; /* product against concurrent interrupts */ 68 68 69 69 /* Processes wait on this queue when ring is empty. */ 70 70 wait_queue_head_t evtchn_wait; ··· 73 71 const char *name; 74 72 }; 75 73 76 - /* 77 - * Who's bound to each port? This is logically an array of struct 78 - * per_user_data *, but we encode the current enabled-state in bit 0. 79 - */ 80 - static unsigned long *port_user; 81 - static DEFINE_SPINLOCK(port_user_lock); /* protects port_user[] and ring_prod */ 74 + struct user_evtchn { 75 + struct rb_node node; 76 + struct per_user_data *user; 77 + unsigned port; 78 + bool enabled; 79 + }; 82 80 83 - static inline struct per_user_data *get_port_user(unsigned port) 81 + static int add_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 84 82 { 85 - return (struct per_user_data *)(port_user[port] & ~1); 83 + struct rb_node **new = &(u->evtchns.rb_node), *parent = NULL; 84 + 85 + while (*new) { 86 + struct user_evtchn *this; 87 + 88 + this = container_of(*new, struct user_evtchn, node); 89 + 90 + parent = *new; 91 + if (this->port < evtchn->port) 92 + new = &((*new)->rb_left); 93 + else if (this->port > evtchn->port) 94 + new = &((*new)->rb_right); 95 + else 96 + return -EEXIST; 97 + } 98 + 99 + /* Add new node and rebalance tree. */ 100 + rb_link_node(&evtchn->node, parent, new); 101 + rb_insert_color(&evtchn->node, &u->evtchns); 102 + 103 + return 0; 86 104 } 87 105 88 - static inline void set_port_user(unsigned port, struct per_user_data *u) 106 + static void del_evtchn(struct per_user_data *u, struct user_evtchn *evtchn) 89 107 { 90 - port_user[port] = (unsigned long)u; 108 + rb_erase(&evtchn->node, &u->evtchns); 109 + kfree(evtchn); 91 110 } 92 111 93 - static inline bool get_port_enabled(unsigned port) 112 + static struct user_evtchn *find_evtchn(struct per_user_data *u, unsigned port) 94 113 { 95 - return port_user[port] & 1; 96 - } 114 + struct rb_node *node = u->evtchns.rb_node; 97 115 98 - static inline void set_port_enabled(unsigned port, bool enabled) 99 - { 100 - if (enabled) 101 - port_user[port] |= 1; 102 - else 103 - port_user[port] &= ~1; 116 + while (node) { 117 + struct user_evtchn *evtchn; 118 + 119 + evtchn = container_of(node, struct user_evtchn, node); 120 + 121 + if (evtchn->port < port) 122 + node = node->rb_left; 123 + else if (evtchn->port > port) 124 + node = node->rb_right; 125 + else 126 + return evtchn; 127 + } 128 + return NULL; 104 129 } 105 130 106 131 static irqreturn_t evtchn_interrupt(int irq, void *data) 107 132 { 108 - unsigned int port = (unsigned long)data; 109 - struct per_user_data *u; 133 + struct user_evtchn *evtchn = data; 134 + struct per_user_data *u = evtchn->user; 110 135 111 - spin_lock(&port_user_lock); 112 - 113 - u = get_port_user(port); 114 - 115 - WARN(!get_port_enabled(port), 136 + WARN(!evtchn->enabled, 116 137 "Interrupt for port %d, but apparently not enabled; per-user %p\n", 117 - port, u); 138 + evtchn->port, u); 118 139 119 140 disable_irq_nosync(irq); 120 - set_port_enabled(port, false); 141 + evtchn->enabled = false; 142 + 143 + spin_lock(&u->ring_prod_lock); 121 144 122 145 if ((u->ring_prod - u->ring_cons) < EVTCHN_RING_SIZE) { 123 - u->ring[EVTCHN_RING_MASK(u->ring_prod)] = port; 146 + u->ring[EVTCHN_RING_MASK(u->ring_prod)] = evtchn->port; 124 147 wmb(); /* Ensure ring contents visible */ 125 148 if (u->ring_cons == u->ring_prod++) { 126 149 wake_up_interruptible(&u->evtchn_wait); ··· 155 128 } else 156 129 u->ring_overflow = 1; 157 130 158 - spin_unlock(&port_user_lock); 131 + spin_unlock(&u->ring_prod_lock); 159 132 160 133 return IRQ_HANDLED; 161 134 } ··· 256 229 if (copy_from_user(kbuf, buf, count) != 0) 257 230 goto out; 258 231 259 - spin_lock_irq(&port_user_lock); 232 + mutex_lock(&u->bind_mutex); 260 233 261 234 for (i = 0; i < (count/sizeof(evtchn_port_t)); i++) { 262 235 unsigned port = kbuf[i]; 236 + struct user_evtchn *evtchn; 263 237 264 - if (port < NR_EVENT_CHANNELS && 265 - get_port_user(port) == u && 266 - !get_port_enabled(port)) { 267 - set_port_enabled(port, true); 238 + evtchn = find_evtchn(u, port); 239 + if (evtchn && !evtchn->enabled) { 240 + evtchn->enabled = true; 268 241 enable_irq(irq_from_evtchn(port)); 269 242 } 270 243 } 271 244 272 - spin_unlock_irq(&port_user_lock); 245 + mutex_unlock(&u->bind_mutex); 273 246 274 247 rc = count; 275 248 ··· 280 253 281 254 static int evtchn_bind_to_user(struct per_user_data *u, int port) 282 255 { 256 + struct user_evtchn *evtchn; 257 + struct evtchn_close close; 283 258 int rc = 0; 284 259 285 260 /* ··· 292 263 * interrupt handler yet, and our caller has already 293 264 * serialized bind operations.) 294 265 */ 295 - BUG_ON(get_port_user(port) != NULL); 296 - set_port_user(port, u); 297 - set_port_enabled(port, true); /* start enabled */ 266 + 267 + evtchn = kzalloc(sizeof(*evtchn), GFP_KERNEL); 268 + if (!evtchn) 269 + return -ENOMEM; 270 + 271 + evtchn->user = u; 272 + evtchn->port = port; 273 + evtchn->enabled = true; /* start enabled */ 274 + 275 + rc = add_evtchn(u, evtchn); 276 + if (rc < 0) 277 + goto err; 298 278 299 279 rc = bind_evtchn_to_irqhandler(port, evtchn_interrupt, IRQF_DISABLED, 300 - u->name, (void *)(unsigned long)port); 301 - if (rc >= 0) 302 - rc = evtchn_make_refcounted(port); 303 - else { 304 - /* bind failed, should close the port now */ 305 - struct evtchn_close close; 306 - close.port = port; 307 - if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 308 - BUG(); 309 - set_port_user(port, NULL); 310 - } 280 + u->name, evtchn); 281 + if (rc < 0) 282 + goto err; 311 283 284 + rc = evtchn_make_refcounted(port); 285 + return rc; 286 + 287 + err: 288 + /* bind failed, should close the port now */ 289 + close.port = port; 290 + if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close) != 0) 291 + BUG(); 292 + del_evtchn(u, evtchn); 293 + kfree(evtchn); 312 294 return rc; 313 295 } 314 296 315 - static void evtchn_unbind_from_user(struct per_user_data *u, int port) 297 + static void evtchn_unbind_from_user(struct per_user_data *u, 298 + struct user_evtchn *evtchn) 316 299 { 317 - int irq = irq_from_evtchn(port); 300 + int irq = irq_from_evtchn(evtchn->port); 318 301 319 302 BUG_ON(irq < 0); 320 303 321 - unbind_from_irqhandler(irq, (void *)(unsigned long)port); 304 + unbind_from_irqhandler(irq, evtchn); 322 305 323 - set_port_user(port, NULL); 306 + del_evtchn(u, evtchn); 324 307 } 325 308 326 309 static long evtchn_ioctl(struct file *file, ··· 411 370 412 371 case IOCTL_EVTCHN_UNBIND: { 413 372 struct ioctl_evtchn_unbind unbind; 373 + struct user_evtchn *evtchn; 414 374 415 375 rc = -EFAULT; 416 376 if (copy_from_user(&unbind, uarg, sizeof(unbind))) ··· 422 380 break; 423 381 424 382 rc = -ENOTCONN; 425 - if (get_port_user(unbind.port) != u) 383 + evtchn = find_evtchn(u, unbind.port); 384 + if (!evtchn) 426 385 break; 427 386 428 387 disable_irq(irq_from_evtchn(unbind.port)); 429 - 430 - evtchn_unbind_from_user(u, unbind.port); 431 - 388 + evtchn_unbind_from_user(u, evtchn); 432 389 rc = 0; 433 390 break; 434 391 } 435 392 436 393 case IOCTL_EVTCHN_NOTIFY: { 437 394 struct ioctl_evtchn_notify notify; 395 + struct user_evtchn *evtchn; 438 396 439 397 rc = -EFAULT; 440 398 if (copy_from_user(&notify, uarg, sizeof(notify))) 441 399 break; 442 400 443 - if (notify.port >= NR_EVENT_CHANNELS) { 444 - rc = -EINVAL; 445 - } else if (get_port_user(notify.port) != u) { 446 - rc = -ENOTCONN; 447 - } else { 401 + rc = -ENOTCONN; 402 + evtchn = find_evtchn(u, notify.port); 403 + if (evtchn) { 448 404 notify_remote_via_evtchn(notify.port); 449 405 rc = 0; 450 406 } ··· 452 412 case IOCTL_EVTCHN_RESET: { 453 413 /* Initialise the ring to empty. Clear errors. */ 454 414 mutex_lock(&u->ring_cons_mutex); 455 - spin_lock_irq(&port_user_lock); 415 + spin_lock_irq(&u->ring_prod_lock); 456 416 u->ring_cons = u->ring_prod = u->ring_overflow = 0; 457 - spin_unlock_irq(&port_user_lock); 417 + spin_unlock_irq(&u->ring_prod_lock); 458 418 mutex_unlock(&u->ring_cons_mutex); 459 419 rc = 0; 460 420 break; ··· 513 473 514 474 mutex_init(&u->bind_mutex); 515 475 mutex_init(&u->ring_cons_mutex); 476 + spin_lock_init(&u->ring_prod_lock); 516 477 517 478 filp->private_data = u; 518 479 ··· 522 481 523 482 static int evtchn_release(struct inode *inode, struct file *filp) 524 483 { 525 - int i; 526 484 struct per_user_data *u = filp->private_data; 485 + struct rb_node *node; 527 486 528 - for (i = 0; i < NR_EVENT_CHANNELS; i++) { 529 - if (get_port_user(i) != u) 530 - continue; 487 + while ((node = u->evtchns.rb_node)) { 488 + struct user_evtchn *evtchn; 531 489 532 - disable_irq(irq_from_evtchn(i)); 533 - evtchn_unbind_from_user(get_port_user(i), i); 490 + evtchn = rb_entry(node, struct user_evtchn, node); 491 + disable_irq(irq_from_evtchn(evtchn->port)); 492 + evtchn_unbind_from_user(u, evtchn); 534 493 } 535 494 536 495 free_page((unsigned long)u->ring); ··· 564 523 if (!xen_domain()) 565 524 return -ENODEV; 566 525 567 - port_user = kcalloc(NR_EVENT_CHANNELS, sizeof(*port_user), GFP_KERNEL); 568 - if (port_user == NULL) 569 - return -ENOMEM; 570 - 571 - spin_lock_init(&port_user_lock); 572 - 573 526 /* Create '/dev/xen/evtchn'. */ 574 527 err = misc_register(&evtchn_miscdev); 575 528 if (err != 0) { ··· 578 543 579 544 static void __exit evtchn_cleanup(void) 580 545 { 581 - kfree(port_user); 582 - port_user = NULL; 583 - 584 546 misc_deregister(&evtchn_miscdev); 585 547 } 586 548