x86, pat: Migrate to rbtree only backend for pat memtype management

Move pat backend to fully rbtree based implementation from the existing
rbtree and linked list hybrid.

New rbtree based solution uses interval trees (augmented rbtrees) in
order to store the PAT ranges. The new code seprates out the pat backend
to pat_rbtree.c file, making is cleaner. The change also makes the PAT
lookup, reserve and free operations more optimal, as we don't have to
traverse linear linked list of few tens of entries in normal case.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
LKML-Reference: <20100210232607.GB11465@linux-os.sc.intel.com>
Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>

authored by Pallipadi, Venkatesh and committed by H. Peter Anvin 9e41a49a be5a0c12

+296 -205
+1
arch/x86/mm/Makefile
··· 6 6 CFLAGS_physaddr.o := $(nostackp) 7 7 CFLAGS_setup_nx.o := $(nostackp) 8 8 9 + obj-$(CONFIG_X86_PAT) += pat_rbtree.o 9 10 obj-$(CONFIG_SMP) += tlb.o 10 11 11 12 obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
+5 -204
arch/x86/mm/pat.c
··· 130 130 131 131 #undef PAT 132 132 133 - /* 134 - * The global memtype list keeps track of memory type for specific 135 - * physical memory areas. Conflicting memory types in different 136 - * mappings can cause CPU cache corruption. To avoid this we keep track. 137 - * 138 - * The list is sorted based on starting address and can contain multiple 139 - * entries for each address (this allows reference counting for overlapping 140 - * areas). All the aliases have the same cache attributes of course. 141 - * Zero attributes are represented as holes. 142 - * 143 - * The data structure is a list that is also organized as an rbtree 144 - * sorted on the start address of memtype range. 145 - * 146 - * memtype_lock protects both the linear list and rbtree. 147 - */ 148 - 149 - static struct rb_root memtype_rbroot = RB_ROOT; 150 - static LIST_HEAD(memtype_list); 151 - static DEFINE_SPINLOCK(memtype_lock); /* protects memtype list */ 152 - 153 - static struct memtype *memtype_rb_search(struct rb_root *root, u64 start) 154 - { 155 - struct rb_node *node = root->rb_node; 156 - struct memtype *last_lower = NULL; 157 - 158 - while (node) { 159 - struct memtype *data = container_of(node, struct memtype, rb); 160 - 161 - if (data->start < start) { 162 - last_lower = data; 163 - node = node->rb_right; 164 - } else if (data->start > start) { 165 - node = node->rb_left; 166 - } else 167 - return data; 168 - } 169 - 170 - /* Will return NULL if there is no entry with its start <= start */ 171 - return last_lower; 172 - } 173 - 174 - static void memtype_rb_insert(struct rb_root *root, struct memtype *data) 175 - { 176 - struct rb_node **new = &(root->rb_node); 177 - struct rb_node *parent = NULL; 178 - 179 - while (*new) { 180 - struct memtype *this = container_of(*new, struct memtype, rb); 181 - 182 - parent = *new; 183 - if (data->start <= this->start) 184 - new = &((*new)->rb_left); 185 - else if (data->start > this->start) 186 - new = &((*new)->rb_right); 187 - } 188 - 189 - rb_link_node(&data->rb, parent, new); 190 - rb_insert_color(&data->rb, root); 191 - } 133 + static DEFINE_SPINLOCK(memtype_lock); /* protects memtype accesses */ 192 134 193 135 /* 194 136 * Does intersection of PAT memory type and MTRR memory type and returns ··· 156 214 } 157 215 158 216 return req_type; 159 - } 160 - 161 - static int 162 - chk_conflict(struct memtype *new, struct memtype *entry, unsigned long *type) 163 - { 164 - if (new->type != entry->type) { 165 - if (type) { 166 - new->type = entry->type; 167 - *type = entry->type; 168 - } else 169 - goto conflict; 170 - } 171 - 172 - /* check overlaps with more than one entry in the list */ 173 - list_for_each_entry_continue(entry, &memtype_list, nd) { 174 - if (new->end <= entry->start) 175 - break; 176 - else if (new->type != entry->type) 177 - goto conflict; 178 - } 179 - return 0; 180 - 181 - conflict: 182 - printk(KERN_INFO "%s:%d conflicting memory types " 183 - "%Lx-%Lx %s<->%s\n", current->comm, current->pid, new->start, 184 - new->end, cattr_name(new->type), cattr_name(entry->type)); 185 - return -EBUSY; 186 217 } 187 218 188 219 static int pat_pagerange_is_ram(unsigned long start, unsigned long end) ··· 243 328 return 0; 244 329 } 245 330 246 - static int memtype_check_insert(struct memtype *new, unsigned long *new_type) 247 - { 248 - struct memtype *entry; 249 - u64 start, end; 250 - unsigned long actual_type; 251 - struct list_head *where; 252 - int err = 0; 253 - 254 - start = new->start; 255 - end = new->end; 256 - actual_type = new->type; 257 - 258 - /* Search for existing mapping that overlaps the current range */ 259 - where = NULL; 260 - list_for_each_entry(entry, &memtype_list, nd) { 261 - if (end <= entry->start) { 262 - where = entry->nd.prev; 263 - break; 264 - } else if (start <= entry->start) { /* end > entry->start */ 265 - err = chk_conflict(new, entry, new_type); 266 - if (!err) { 267 - dprintk("Overlap at 0x%Lx-0x%Lx\n", 268 - entry->start, entry->end); 269 - where = entry->nd.prev; 270 - } 271 - break; 272 - } else if (start < entry->end) { /* start > entry->start */ 273 - err = chk_conflict(new, entry, new_type); 274 - if (!err) { 275 - dprintk("Overlap at 0x%Lx-0x%Lx\n", 276 - entry->start, entry->end); 277 - 278 - /* 279 - * Move to right position in the linked 280 - * list to add this new entry 281 - */ 282 - list_for_each_entry_continue(entry, 283 - &memtype_list, nd) { 284 - if (start <= entry->start) { 285 - where = entry->nd.prev; 286 - break; 287 - } 288 - } 289 - } 290 - break; 291 - } 292 - } 293 - if (!err) { 294 - if (where) 295 - list_add(&new->nd, where); 296 - else 297 - list_add_tail(&new->nd, &memtype_list); 298 - 299 - memtype_rb_insert(&memtype_rbroot, new); 300 - } 301 - return err; 302 - } 303 - 304 331 /* 305 332 * req_type typically has one of the: 306 333 * - _PAGE_CACHE_WB ··· 316 459 317 460 spin_lock(&memtype_lock); 318 461 319 - err = memtype_check_insert(new, new_type); 462 + err = rbt_memtype_check_insert(new, new_type); 320 463 if (err) { 321 464 printk(KERN_INFO "reserve_memtype failed 0x%Lx-0x%Lx, " 322 465 "track %s, req %s\n", ··· 338 481 339 482 int free_memtype(u64 start, u64 end) 340 483 { 341 - struct memtype *entry, *saved_entry; 342 484 int err = -EINVAL; 343 485 int is_range_ram; 344 486 ··· 361 505 } 362 506 363 507 spin_lock(&memtype_lock); 364 - 365 - entry = memtype_rb_search(&memtype_rbroot, start); 366 - if (unlikely(entry == NULL)) 367 - goto unlock_ret; 368 - 369 - /* 370 - * Saved entry points to an entry with start same or less than what 371 - * we searched for. Now go through the list in both directions to look 372 - * for the entry that matches with both start and end, with list stored 373 - * in sorted start address 374 - */ 375 - saved_entry = entry; 376 - list_for_each_entry_from(entry, &memtype_list, nd) { 377 - if (entry->start == start && entry->end == end) { 378 - rb_erase(&entry->rb, &memtype_rbroot); 379 - list_del(&entry->nd); 380 - kfree(entry); 381 - err = 0; 382 - break; 383 - } else if (entry->start > start) { 384 - break; 385 - } 386 - } 387 - 388 - if (!err) 389 - goto unlock_ret; 390 - 391 - entry = saved_entry; 392 - list_for_each_entry_reverse(entry, &memtype_list, nd) { 393 - if (entry->start == start && entry->end == end) { 394 - rb_erase(&entry->rb, &memtype_rbroot); 395 - list_del(&entry->nd); 396 - kfree(entry); 397 - err = 0; 398 - break; 399 - } else if (entry->start < start) { 400 - break; 401 - } 402 - } 403 - unlock_ret: 508 + err = rbt_memtype_erase(start, end); 404 509 spin_unlock(&memtype_lock); 405 510 406 511 if (err) { ··· 410 593 411 594 spin_lock(&memtype_lock); 412 595 413 - entry = memtype_rb_search(&memtype_rbroot, paddr); 596 + entry = rbt_memtype_lookup(paddr); 414 597 if (entry != NULL) 415 598 rettype = entry->type; 416 599 else ··· 747 930 748 931 #if defined(CONFIG_DEBUG_FS) && defined(CONFIG_X86_PAT) 749 932 750 - /* get Nth element of the linked list */ 751 - static int copy_memtype_nth_element(struct memtype *out, loff_t pos) 752 - { 753 - struct memtype *list_node; 754 - int i = 1; 755 - 756 - list_for_each_entry(list_node, &memtype_list, nd) { 757 - if (pos == i) { 758 - *out = *list_node; 759 - return 0; 760 - } 761 - ++i; 762 - } 763 - return 1; 764 - } 765 - 766 933 static struct memtype *memtype_get_idx(loff_t pos) 767 934 { 768 935 struct memtype *print_entry; ··· 757 956 return NULL; 758 957 759 958 spin_lock(&memtype_lock); 760 - ret = copy_memtype_nth_element(print_entry, pos); 959 + ret = rbt_memtype_copy_nth_element(print_entry, pos); 761 960 spin_unlock(&memtype_lock); 762 961 763 962 if (!ret) {
+19 -1
arch/x86/mm/pat_internal.h
··· 9 9 struct memtype { 10 10 u64 start; 11 11 u64 end; 12 + u64 subtree_max_end; 12 13 unsigned long type; 13 - struct list_head nd; 14 14 struct rb_node rb; 15 15 }; 16 16 ··· 24 24 default: return "broken"; 25 25 } 26 26 } 27 + 28 + #ifdef CONFIG_X86_PAT 29 + extern int rbt_memtype_check_insert(struct memtype *new, 30 + unsigned long *new_type); 31 + extern int rbt_memtype_erase(u64 start, u64 end); 32 + extern struct memtype *rbt_memtype_lookup(u64 addr); 33 + extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); 34 + #else 35 + static inline int rbt_memtype_check_insert(struct memtype *new, 36 + unsigned long *new_type) 37 + { return 0; } 38 + static inline int rbt_memtype_erase(u64 start, u64 end) 39 + { return 0; } 40 + static inline struct memtype *rbt_memtype_lookup(u64 addr) 41 + { return NULL; } 42 + static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) 43 + { return 0; } 44 + #endif 27 45 28 46 #endif /* __PAT_INTERNAL_H_ */
+271
arch/x86/mm/pat_rbtree.c
··· 1 + /* 2 + * Handle caching attributes in page tables (PAT) 3 + * 4 + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> 5 + * Suresh B Siddha <suresh.b.siddha@intel.com> 6 + * 7 + * Interval tree (augmented rbtree) used to store the PAT memory type 8 + * reservations. 9 + */ 10 + 11 + #include <linux/seq_file.h> 12 + #include <linux/debugfs.h> 13 + #include <linux/kernel.h> 14 + #include <linux/module.h> 15 + #include <linux/rbtree.h> 16 + #include <linux/sched.h> 17 + #include <linux/gfp.h> 18 + 19 + #include <asm/pgtable.h> 20 + #include <asm/pat.h> 21 + 22 + #include "pat_internal.h" 23 + 24 + /* 25 + * The memtype tree keeps track of memory type for specific 26 + * physical memory areas. Without proper tracking, conflicting memory 27 + * types in different mappings can cause CPU cache corruption. 28 + * 29 + * The tree is an interval tree (augmented rbtree) with tree ordered 30 + * on starting address. Tree can contain multiple entries for 31 + * different regions which overlap. All the aliases have the same 32 + * cache attributes of course. 33 + * 34 + * memtype_lock protects the rbtree. 35 + */ 36 + 37 + static void memtype_rb_augment_cb(struct rb_node *node); 38 + static struct rb_root memtype_rbroot = RB_AUGMENT_ROOT(&memtype_rb_augment_cb); 39 + 40 + static int is_node_overlap(struct memtype *node, u64 start, u64 end) 41 + { 42 + if (node->start >= end || node->end <= start) 43 + return 0; 44 + 45 + return 1; 46 + } 47 + 48 + static u64 get_subtree_max_end(struct rb_node *node) 49 + { 50 + u64 ret = 0; 51 + if (node) { 52 + struct memtype *data = container_of(node, struct memtype, rb); 53 + ret = data->subtree_max_end; 54 + } 55 + return ret; 56 + } 57 + 58 + /* Update 'subtree_max_end' for a node, based on node and its children */ 59 + static void update_node_max_end(struct rb_node *node) 60 + { 61 + struct memtype *data; 62 + u64 max_end, child_max_end; 63 + 64 + if (!node) 65 + return; 66 + 67 + data = container_of(node, struct memtype, rb); 68 + max_end = data->end; 69 + 70 + child_max_end = get_subtree_max_end(node->rb_right); 71 + if (child_max_end > max_end) 72 + max_end = child_max_end; 73 + 74 + child_max_end = get_subtree_max_end(node->rb_left); 75 + if (child_max_end > max_end) 76 + max_end = child_max_end; 77 + 78 + data->subtree_max_end = max_end; 79 + } 80 + 81 + /* Update 'subtree_max_end' for a node and all its ancestors */ 82 + static void update_path_max_end(struct rb_node *node) 83 + { 84 + u64 old_max_end, new_max_end; 85 + 86 + while (node) { 87 + struct memtype *data = container_of(node, struct memtype, rb); 88 + 89 + old_max_end = data->subtree_max_end; 90 + update_node_max_end(node); 91 + new_max_end = data->subtree_max_end; 92 + 93 + if (new_max_end == old_max_end) 94 + break; 95 + 96 + node = rb_parent(node); 97 + } 98 + } 99 + 100 + /* Find the first (lowest start addr) overlapping range from rb tree */ 101 + static struct memtype *memtype_rb_lowest_match(struct rb_root *root, 102 + u64 start, u64 end) 103 + { 104 + struct rb_node *node = root->rb_node; 105 + struct memtype *last_lower = NULL; 106 + 107 + while (node) { 108 + struct memtype *data = container_of(node, struct memtype, rb); 109 + 110 + if (get_subtree_max_end(node->rb_left) > start) { 111 + /* Lowest overlap if any must be on left side */ 112 + node = node->rb_left; 113 + } else if (is_node_overlap(data, start, end)) { 114 + last_lower = data; 115 + break; 116 + } else if (start >= data->start) { 117 + /* Lowest overlap if any must be on right side */ 118 + node = node->rb_right; 119 + } else { 120 + break; 121 + } 122 + } 123 + return last_lower; /* Returns NULL if there is no overlap */ 124 + } 125 + 126 + static struct memtype *memtype_rb_exact_match(struct rb_root *root, 127 + u64 start, u64 end) 128 + { 129 + struct memtype *match; 130 + 131 + match = memtype_rb_lowest_match(root, start, end); 132 + while (match != NULL && match->start < end) { 133 + struct rb_node *node; 134 + 135 + if (match->start == start && match->end == end) 136 + return match; 137 + 138 + node = rb_next(&match->rb); 139 + if (node) 140 + match = container_of(node, struct memtype, rb); 141 + else 142 + match = NULL; 143 + } 144 + 145 + return NULL; /* Returns NULL if there is no exact match */ 146 + } 147 + 148 + static int memtype_rb_check_conflict(struct rb_root *root, 149 + u64 start, u64 end, 150 + unsigned long reqtype, unsigned long *newtype) 151 + { 152 + struct rb_node *node; 153 + struct memtype *match; 154 + int found_type = reqtype; 155 + 156 + match = memtype_rb_lowest_match(&memtype_rbroot, start, end); 157 + if (match == NULL) 158 + goto success; 159 + 160 + if (match->type != found_type && newtype == NULL) 161 + goto failure; 162 + 163 + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); 164 + found_type = match->type; 165 + 166 + node = rb_next(&match->rb); 167 + while (node) { 168 + match = container_of(node, struct memtype, rb); 169 + 170 + if (match->start >= end) /* Checked all possible matches */ 171 + goto success; 172 + 173 + if (is_node_overlap(match, start, end) && 174 + match->type != found_type) { 175 + goto failure; 176 + } 177 + 178 + node = rb_next(&match->rb); 179 + } 180 + success: 181 + if (newtype) 182 + *newtype = found_type; 183 + 184 + return 0; 185 + 186 + failure: 187 + printk(KERN_INFO "%s:%d conflicting memory types " 188 + "%Lx-%Lx %s<->%s\n", current->comm, current->pid, start, 189 + end, cattr_name(found_type), cattr_name(match->type)); 190 + return -EBUSY; 191 + } 192 + 193 + static void memtype_rb_augment_cb(struct rb_node *node) 194 + { 195 + if (node) 196 + update_path_max_end(node); 197 + } 198 + 199 + static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) 200 + { 201 + struct rb_node **node = &(root->rb_node); 202 + struct rb_node *parent = NULL; 203 + 204 + while (*node) { 205 + struct memtype *data = container_of(*node, struct memtype, rb); 206 + 207 + parent = *node; 208 + if (newdata->start <= data->start) 209 + node = &((*node)->rb_left); 210 + else if (newdata->start > data->start) 211 + node = &((*node)->rb_right); 212 + } 213 + 214 + rb_link_node(&newdata->rb, parent, node); 215 + rb_insert_color(&newdata->rb, root); 216 + } 217 + 218 + int rbt_memtype_check_insert(struct memtype *new, unsigned long *ret_type) 219 + { 220 + int err = 0; 221 + 222 + err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, 223 + new->type, ret_type); 224 + 225 + if (!err) { 226 + new->type = *ret_type; 227 + memtype_rb_insert(&memtype_rbroot, new); 228 + } 229 + return err; 230 + } 231 + 232 + int rbt_memtype_erase(u64 start, u64 end) 233 + { 234 + struct memtype *data; 235 + 236 + data = memtype_rb_exact_match(&memtype_rbroot, start, end); 237 + if (!data) 238 + return -EINVAL; 239 + 240 + rb_erase(&data->rb, &memtype_rbroot); 241 + return 0; 242 + } 243 + 244 + struct memtype *rbt_memtype_lookup(u64 addr) 245 + { 246 + struct memtype *data; 247 + data = memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); 248 + return data; 249 + } 250 + 251 + #if defined(CONFIG_DEBUG_FS) 252 + int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) 253 + { 254 + struct rb_node *node; 255 + int i = 1; 256 + 257 + node = rb_first(&memtype_rbroot); 258 + while (node && pos != i) { 259 + node = rb_next(node); 260 + i++; 261 + } 262 + 263 + if (node) { /* pos == i */ 264 + struct memtype *this = container_of(node, struct memtype, rb); 265 + *out = *this; 266 + return 0; 267 + } else { 268 + return 1; 269 + } 270 + } 271 + #endif