Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

fs/epoll: use faster rb_first_cached()

... such that we can avoid the tree walks to get the node with the
smallest key. Semantically the same, as the previously used rb_first(),
but O(1). The main overhead is the extra footprint for the cached rb_node
pointer, which should not matter for epoll.

Link: http://lkml.kernel.org/r/20170719014603.19029-15-dave@stgolabs.net
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>

authored by

Davidlohr Bueso and committed by
Linus Torvalds
b2ac2ea6 410bd5ec

+16 -14
+16 -14
fs/eventpoll.c
··· 205 205 struct list_head rdllist; 206 206 207 207 /* RB tree root used to store monitored fd structs */ 208 - struct rb_root rbr; 208 + struct rb_root_cached rbr; 209 209 210 210 /* 211 211 * This is a single linked list that chains all the "struct epitem" that ··· 796 796 list_del_rcu(&epi->fllink); 797 797 spin_unlock(&file->f_lock); 798 798 799 - rb_erase(&epi->rbn, &ep->rbr); 799 + rb_erase_cached(&epi->rbn, &ep->rbr); 800 800 801 801 spin_lock_irqsave(&ep->lock, flags); 802 802 if (ep_is_linked(&epi->rdllink)) ··· 840 840 /* 841 841 * Walks through the whole tree by unregistering poll callbacks. 842 842 */ 843 - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 843 + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { 844 844 epi = rb_entry(rbp, struct epitem, rbn); 845 845 846 846 ep_unregister_pollwait(ep, epi); ··· 856 856 * a lockdep warning. 857 857 */ 858 858 mutex_lock(&ep->mtx); 859 - while ((rbp = rb_first(&ep->rbr)) != NULL) { 859 + while ((rbp = rb_first_cached(&ep->rbr)) != NULL) { 860 860 epi = rb_entry(rbp, struct epitem, rbn); 861 861 ep_remove(ep, epi); 862 862 cond_resched(); ··· 963 963 struct rb_node *rbp; 964 964 965 965 mutex_lock(&ep->mtx); 966 - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 966 + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { 967 967 struct epitem *epi = rb_entry(rbp, struct epitem, rbn); 968 968 struct inode *inode = file_inode(epi->ffd.file); 969 969 ··· 1040 1040 init_waitqueue_head(&ep->wq); 1041 1041 init_waitqueue_head(&ep->poll_wait); 1042 1042 INIT_LIST_HEAD(&ep->rdllist); 1043 - ep->rbr = RB_ROOT; 1043 + ep->rbr = RB_ROOT_CACHED; 1044 1044 ep->ovflist = EP_UNACTIVE_PTR; 1045 1045 ep->user = user; 1046 1046 ··· 1066 1066 struct epoll_filefd ffd; 1067 1067 1068 1068 ep_set_ffd(&ffd, file, fd); 1069 - for (rbp = ep->rbr.rb_node; rbp; ) { 1069 + for (rbp = ep->rbr.rb_root.rb_node; rbp; ) { 1070 1070 epi = rb_entry(rbp, struct epitem, rbn); 1071 1071 kcmp = ep_cmp_ffd(&ffd, &epi->ffd); 1072 1072 if (kcmp > 0) ··· 1088 1088 struct rb_node *rbp; 1089 1089 struct epitem *epi; 1090 1090 1091 - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1091 + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1092 1092 epi = rb_entry(rbp, struct epitem, rbn); 1093 1093 if (epi->ffd.fd == tfd) { 1094 1094 if (toff == 0) ··· 1273 1273 static void ep_rbtree_insert(struct eventpoll *ep, struct epitem *epi) 1274 1274 { 1275 1275 int kcmp; 1276 - struct rb_node **p = &ep->rbr.rb_node, *parent = NULL; 1276 + struct rb_node **p = &ep->rbr.rb_root.rb_node, *parent = NULL; 1277 1277 struct epitem *epic; 1278 + bool leftmost = true; 1278 1279 1279 1280 while (*p) { 1280 1281 parent = *p; 1281 1282 epic = rb_entry(parent, struct epitem, rbn); 1282 1283 kcmp = ep_cmp_ffd(&epi->ffd, &epic->ffd); 1283 - if (kcmp > 0) 1284 + if (kcmp > 0) { 1284 1285 p = &parent->rb_right; 1285 - else 1286 + leftmost = false; 1287 + } else 1286 1288 p = &parent->rb_left; 1287 1289 } 1288 1290 rb_link_node(&epi->rbn, parent, p); 1289 - rb_insert_color(&epi->rbn, &ep->rbr); 1291 + rb_insert_color_cached(&epi->rbn, &ep->rbr, leftmost); 1290 1292 } 1291 1293 1292 1294 ··· 1532 1530 list_del_rcu(&epi->fllink); 1533 1531 spin_unlock(&tfile->f_lock); 1534 1532 1535 - rb_erase(&epi->rbn, &ep->rbr); 1533 + rb_erase_cached(&epi->rbn, &ep->rbr); 1536 1534 1537 1535 error_unregister: 1538 1536 ep_unregister_pollwait(ep, epi); ··· 1880 1878 mutex_lock_nested(&ep->mtx, call_nests + 1); 1881 1879 ep->visited = 1; 1882 1880 list_add(&ep->visited_list_link, &visited_list); 1883 - for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1881 + for (rbp = rb_first_cached(&ep->rbr); rbp; rbp = rb_next(rbp)) { 1884 1882 epi = rb_entry(rbp, struct epitem, rbn); 1885 1883 if (unlikely(is_file_epoll(epi->ffd.file))) { 1886 1884 ep_tovisit = epi->ffd.file->private_data;