at v5.0 6.7 kB view raw
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * Lockless hierarchical page accounting & limiting 4 * 5 * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner 6 */ 7 8#include <linux/page_counter.h> 9#include <linux/atomic.h> 10#include <linux/kernel.h> 11#include <linux/string.h> 12#include <linux/sched.h> 13#include <linux/bug.h> 14#include <asm/page.h> 15 16static void propagate_protected_usage(struct page_counter *c, 17 unsigned long usage) 18{ 19 unsigned long protected, old_protected; 20 long delta; 21 22 if (!c->parent) 23 return; 24 25 if (c->min || atomic_long_read(&c->min_usage)) { 26 if (usage <= c->min) 27 protected = usage; 28 else 29 protected = 0; 30 31 old_protected = atomic_long_xchg(&c->min_usage, protected); 32 delta = protected - old_protected; 33 if (delta) 34 atomic_long_add(delta, &c->parent->children_min_usage); 35 } 36 37 if (c->low || atomic_long_read(&c->low_usage)) { 38 if (usage <= c->low) 39 protected = usage; 40 else 41 protected = 0; 42 43 old_protected = atomic_long_xchg(&c->low_usage, protected); 44 delta = protected - old_protected; 45 if (delta) 46 atomic_long_add(delta, &c->parent->children_low_usage); 47 } 48} 49 50/** 51 * page_counter_cancel - take pages out of the local counter 52 * @counter: counter 53 * @nr_pages: number of pages to cancel 54 */ 55void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages) 56{ 57 long new; 58 59 new = atomic_long_sub_return(nr_pages, &counter->usage); 60 propagate_protected_usage(counter, new); 61 /* More uncharges than charges? */ 62 WARN_ON_ONCE(new < 0); 63} 64 65/** 66 * page_counter_charge - hierarchically charge pages 67 * @counter: counter 68 * @nr_pages: number of pages to charge 69 * 70 * NOTE: This does not consider any configured counter limits. 71 */ 72void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) 73{ 74 struct page_counter *c; 75 76 for (c = counter; c; c = c->parent) { 77 long new; 78 79 new = atomic_long_add_return(nr_pages, &c->usage); 80 propagate_protected_usage(counter, new); 81 /* 82 * This is indeed racy, but we can live with some 83 * inaccuracy in the watermark. 84 */ 85 if (new > c->watermark) 86 c->watermark = new; 87 } 88} 89 90/** 91 * page_counter_try_charge - try to hierarchically charge pages 92 * @counter: counter 93 * @nr_pages: number of pages to charge 94 * @fail: points first counter to hit its limit, if any 95 * 96 * Returns %true on success, or %false and @fail if the counter or one 97 * of its ancestors has hit its configured limit. 98 */ 99bool page_counter_try_charge(struct page_counter *counter, 100 unsigned long nr_pages, 101 struct page_counter **fail) 102{ 103 struct page_counter *c; 104 105 for (c = counter; c; c = c->parent) { 106 long new; 107 /* 108 * Charge speculatively to avoid an expensive CAS. If 109 * a bigger charge fails, it might falsely lock out a 110 * racing smaller charge and send it into reclaim 111 * early, but the error is limited to the difference 112 * between the two sizes, which is less than 2M/4M in 113 * case of a THP locking out a regular page charge. 114 * 115 * The atomic_long_add_return() implies a full memory 116 * barrier between incrementing the count and reading 117 * the limit. When racing with page_counter_limit(), 118 * we either see the new limit or the setter sees the 119 * counter has changed and retries. 120 */ 121 new = atomic_long_add_return(nr_pages, &c->usage); 122 if (new > c->max) { 123 atomic_long_sub(nr_pages, &c->usage); 124 propagate_protected_usage(counter, new); 125 /* 126 * This is racy, but we can live with some 127 * inaccuracy in the failcnt. 128 */ 129 c->failcnt++; 130 *fail = c; 131 goto failed; 132 } 133 propagate_protected_usage(counter, new); 134 /* 135 * Just like with failcnt, we can live with some 136 * inaccuracy in the watermark. 137 */ 138 if (new > c->watermark) 139 c->watermark = new; 140 } 141 return true; 142 143failed: 144 for (c = counter; c != *fail; c = c->parent) 145 page_counter_cancel(c, nr_pages); 146 147 return false; 148} 149 150/** 151 * page_counter_uncharge - hierarchically uncharge pages 152 * @counter: counter 153 * @nr_pages: number of pages to uncharge 154 */ 155void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages) 156{ 157 struct page_counter *c; 158 159 for (c = counter; c; c = c->parent) 160 page_counter_cancel(c, nr_pages); 161} 162 163/** 164 * page_counter_set_max - set the maximum number of pages allowed 165 * @counter: counter 166 * @nr_pages: limit to set 167 * 168 * Returns 0 on success, -EBUSY if the current number of pages on the 169 * counter already exceeds the specified limit. 170 * 171 * The caller must serialize invocations on the same counter. 172 */ 173int page_counter_set_max(struct page_counter *counter, unsigned long nr_pages) 174{ 175 for (;;) { 176 unsigned long old; 177 long usage; 178 179 /* 180 * Update the limit while making sure that it's not 181 * below the concurrently-changing counter value. 182 * 183 * The xchg implies two full memory barriers before 184 * and after, so the read-swap-read is ordered and 185 * ensures coherency with page_counter_try_charge(): 186 * that function modifies the count before checking 187 * the limit, so if it sees the old limit, we see the 188 * modified counter and retry. 189 */ 190 usage = atomic_long_read(&counter->usage); 191 192 if (usage > nr_pages) 193 return -EBUSY; 194 195 old = xchg(&counter->max, nr_pages); 196 197 if (atomic_long_read(&counter->usage) <= usage) 198 return 0; 199 200 counter->max = old; 201 cond_resched(); 202 } 203} 204 205/** 206 * page_counter_set_min - set the amount of protected memory 207 * @counter: counter 208 * @nr_pages: value to set 209 * 210 * The caller must serialize invocations on the same counter. 211 */ 212void page_counter_set_min(struct page_counter *counter, unsigned long nr_pages) 213{ 214 struct page_counter *c; 215 216 counter->min = nr_pages; 217 218 for (c = counter; c; c = c->parent) 219 propagate_protected_usage(c, atomic_long_read(&c->usage)); 220} 221 222/** 223 * page_counter_set_low - set the amount of protected memory 224 * @counter: counter 225 * @nr_pages: value to set 226 * 227 * The caller must serialize invocations on the same counter. 228 */ 229void page_counter_set_low(struct page_counter *counter, unsigned long nr_pages) 230{ 231 struct page_counter *c; 232 233 counter->low = nr_pages; 234 235 for (c = counter; c; c = c->parent) 236 propagate_protected_usage(c, atomic_long_read(&c->usage)); 237} 238 239/** 240 * page_counter_memparse - memparse() for page counter limits 241 * @buf: string to parse 242 * @max: string meaning maximum possible value 243 * @nr_pages: returns the result in number of pages 244 * 245 * Returns -EINVAL, or 0 and @nr_pages on success. @nr_pages will be 246 * limited to %PAGE_COUNTER_MAX. 247 */ 248int page_counter_memparse(const char *buf, const char *max, 249 unsigned long *nr_pages) 250{ 251 char *end; 252 u64 bytes; 253 254 if (!strcmp(buf, max)) { 255 *nr_pages = PAGE_COUNTER_MAX; 256 return 0; 257 } 258 259 bytes = memparse(buf, &end); 260 if (*end != '\0') 261 return -EINVAL; 262 263 *nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX); 264 265 return 0; 266}