Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.33-rc1 339 lines 8.4 kB view raw
1/* 2 * Copyright (C) 2007-2009 NEC Corporation. All Rights Reserved. 3 * 4 * Module Author: Kiyoshi Ueda 5 * 6 * This file is released under the GPL. 7 * 8 * Throughput oriented path selector. 9 */ 10 11#include "dm.h" 12#include "dm-path-selector.h" 13 14#define DM_MSG_PREFIX "multipath service-time" 15#define ST_MIN_IO 1 16#define ST_MAX_RELATIVE_THROUGHPUT 100 17#define ST_MAX_RELATIVE_THROUGHPUT_SHIFT 7 18#define ST_MAX_INFLIGHT_SIZE ((size_t)-1 >> ST_MAX_RELATIVE_THROUGHPUT_SHIFT) 19#define ST_VERSION "0.2.0" 20 21struct selector { 22 struct list_head valid_paths; 23 struct list_head failed_paths; 24}; 25 26struct path_info { 27 struct list_head list; 28 struct dm_path *path; 29 unsigned repeat_count; 30 unsigned relative_throughput; 31 atomic_t in_flight_size; /* Total size of in-flight I/Os */ 32}; 33 34static struct selector *alloc_selector(void) 35{ 36 struct selector *s = kmalloc(sizeof(*s), GFP_KERNEL); 37 38 if (s) { 39 INIT_LIST_HEAD(&s->valid_paths); 40 INIT_LIST_HEAD(&s->failed_paths); 41 } 42 43 return s; 44} 45 46static int st_create(struct path_selector *ps, unsigned argc, char **argv) 47{ 48 struct selector *s = alloc_selector(); 49 50 if (!s) 51 return -ENOMEM; 52 53 ps->context = s; 54 return 0; 55} 56 57static void free_paths(struct list_head *paths) 58{ 59 struct path_info *pi, *next; 60 61 list_for_each_entry_safe(pi, next, paths, list) { 62 list_del(&pi->list); 63 kfree(pi); 64 } 65} 66 67static void st_destroy(struct path_selector *ps) 68{ 69 struct selector *s = ps->context; 70 71 free_paths(&s->valid_paths); 72 free_paths(&s->failed_paths); 73 kfree(s); 74 ps->context = NULL; 75} 76 77static int st_status(struct path_selector *ps, struct dm_path *path, 78 status_type_t type, char *result, unsigned maxlen) 79{ 80 unsigned sz = 0; 81 struct path_info *pi; 82 83 if (!path) 84 DMEMIT("0 "); 85 else { 86 pi = path->pscontext; 87 88 switch (type) { 89 case STATUSTYPE_INFO: 90 DMEMIT("%d %u ", atomic_read(&pi->in_flight_size), 91 pi->relative_throughput); 92 break; 93 case STATUSTYPE_TABLE: 94 DMEMIT("%u %u ", pi->repeat_count, 95 pi->relative_throughput); 96 break; 97 } 98 } 99 100 return sz; 101} 102 103static int st_add_path(struct path_selector *ps, struct dm_path *path, 104 int argc, char **argv, char **error) 105{ 106 struct selector *s = ps->context; 107 struct path_info *pi; 108 unsigned repeat_count = ST_MIN_IO; 109 unsigned relative_throughput = 1; 110 111 /* 112 * Arguments: [<repeat_count> [<relative_throughput>]] 113 * <repeat_count>: The number of I/Os before switching path. 114 * If not given, default (ST_MIN_IO) is used. 115 * <relative_throughput>: The relative throughput value of 116 * the path among all paths in the path-group. 117 * The valid range: 0-<ST_MAX_RELATIVE_THROUGHPUT> 118 * If not given, minimum value '1' is used. 119 * If '0' is given, the path isn't selected while 120 * other paths having a positive value are 121 * available. 122 */ 123 if (argc > 2) { 124 *error = "service-time ps: incorrect number of arguments"; 125 return -EINVAL; 126 } 127 128 if (argc && (sscanf(argv[0], "%u", &repeat_count) != 1)) { 129 *error = "service-time ps: invalid repeat count"; 130 return -EINVAL; 131 } 132 133 if ((argc == 2) && 134 (sscanf(argv[1], "%u", &relative_throughput) != 1 || 135 relative_throughput > ST_MAX_RELATIVE_THROUGHPUT)) { 136 *error = "service-time ps: invalid relative_throughput value"; 137 return -EINVAL; 138 } 139 140 /* allocate the path */ 141 pi = kmalloc(sizeof(*pi), GFP_KERNEL); 142 if (!pi) { 143 *error = "service-time ps: Error allocating path context"; 144 return -ENOMEM; 145 } 146 147 pi->path = path; 148 pi->repeat_count = repeat_count; 149 pi->relative_throughput = relative_throughput; 150 atomic_set(&pi->in_flight_size, 0); 151 152 path->pscontext = pi; 153 154 list_add_tail(&pi->list, &s->valid_paths); 155 156 return 0; 157} 158 159static void st_fail_path(struct path_selector *ps, struct dm_path *path) 160{ 161 struct selector *s = ps->context; 162 struct path_info *pi = path->pscontext; 163 164 list_move(&pi->list, &s->failed_paths); 165} 166 167static int st_reinstate_path(struct path_selector *ps, struct dm_path *path) 168{ 169 struct selector *s = ps->context; 170 struct path_info *pi = path->pscontext; 171 172 list_move_tail(&pi->list, &s->valid_paths); 173 174 return 0; 175} 176 177/* 178 * Compare the estimated service time of 2 paths, pi1 and pi2, 179 * for the incoming I/O. 180 * 181 * Returns: 182 * < 0 : pi1 is better 183 * 0 : no difference between pi1 and pi2 184 * > 0 : pi2 is better 185 * 186 * Description: 187 * Basically, the service time is estimated by: 188 * ('pi->in-flight-size' + 'incoming') / 'pi->relative_throughput' 189 * To reduce the calculation, some optimizations are made. 190 * (See comments inline) 191 */ 192static int st_compare_load(struct path_info *pi1, struct path_info *pi2, 193 size_t incoming) 194{ 195 size_t sz1, sz2, st1, st2; 196 197 sz1 = atomic_read(&pi1->in_flight_size); 198 sz2 = atomic_read(&pi2->in_flight_size); 199 200 /* 201 * Case 1: Both have same throughput value. Choose less loaded path. 202 */ 203 if (pi1->relative_throughput == pi2->relative_throughput) 204 return sz1 - sz2; 205 206 /* 207 * Case 2a: Both have same load. Choose higher throughput path. 208 * Case 2b: One path has no throughput value. Choose the other one. 209 */ 210 if (sz1 == sz2 || 211 !pi1->relative_throughput || !pi2->relative_throughput) 212 return pi2->relative_throughput - pi1->relative_throughput; 213 214 /* 215 * Case 3: Calculate service time. Choose faster path. 216 * Service time using pi1: 217 * st1 = (sz1 + incoming) / pi1->relative_throughput 218 * Service time using pi2: 219 * st2 = (sz2 + incoming) / pi2->relative_throughput 220 * 221 * To avoid the division, transform the expression to use 222 * multiplication. 223 * Because ->relative_throughput > 0 here, if st1 < st2, 224 * the expressions below are the same meaning: 225 * (sz1 + incoming) / pi1->relative_throughput < 226 * (sz2 + incoming) / pi2->relative_throughput 227 * (sz1 + incoming) * pi2->relative_throughput < 228 * (sz2 + incoming) * pi1->relative_throughput 229 * So use the later one. 230 */ 231 sz1 += incoming; 232 sz2 += incoming; 233 if (unlikely(sz1 >= ST_MAX_INFLIGHT_SIZE || 234 sz2 >= ST_MAX_INFLIGHT_SIZE)) { 235 /* 236 * Size may be too big for multiplying pi->relative_throughput 237 * and overflow. 238 * To avoid the overflow and mis-selection, shift down both. 239 */ 240 sz1 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 241 sz2 >>= ST_MAX_RELATIVE_THROUGHPUT_SHIFT; 242 } 243 st1 = sz1 * pi2->relative_throughput; 244 st2 = sz2 * pi1->relative_throughput; 245 if (st1 != st2) 246 return st1 - st2; 247 248 /* 249 * Case 4: Service time is equal. Choose higher throughput path. 250 */ 251 return pi2->relative_throughput - pi1->relative_throughput; 252} 253 254static struct dm_path *st_select_path(struct path_selector *ps, 255 unsigned *repeat_count, size_t nr_bytes) 256{ 257 struct selector *s = ps->context; 258 struct path_info *pi = NULL, *best = NULL; 259 260 if (list_empty(&s->valid_paths)) 261 return NULL; 262 263 /* Change preferred (first in list) path to evenly balance. */ 264 list_move_tail(s->valid_paths.next, &s->valid_paths); 265 266 list_for_each_entry(pi, &s->valid_paths, list) 267 if (!best || (st_compare_load(pi, best, nr_bytes) < 0)) 268 best = pi; 269 270 if (!best) 271 return NULL; 272 273 *repeat_count = best->repeat_count; 274 275 return best->path; 276} 277 278static int st_start_io(struct path_selector *ps, struct dm_path *path, 279 size_t nr_bytes) 280{ 281 struct path_info *pi = path->pscontext; 282 283 atomic_add(nr_bytes, &pi->in_flight_size); 284 285 return 0; 286} 287 288static int st_end_io(struct path_selector *ps, struct dm_path *path, 289 size_t nr_bytes) 290{ 291 struct path_info *pi = path->pscontext; 292 293 atomic_sub(nr_bytes, &pi->in_flight_size); 294 295 return 0; 296} 297 298static struct path_selector_type st_ps = { 299 .name = "service-time", 300 .module = THIS_MODULE, 301 .table_args = 2, 302 .info_args = 2, 303 .create = st_create, 304 .destroy = st_destroy, 305 .status = st_status, 306 .add_path = st_add_path, 307 .fail_path = st_fail_path, 308 .reinstate_path = st_reinstate_path, 309 .select_path = st_select_path, 310 .start_io = st_start_io, 311 .end_io = st_end_io, 312}; 313 314static int __init dm_st_init(void) 315{ 316 int r = dm_register_path_selector(&st_ps); 317 318 if (r < 0) 319 DMERR("register failed %d", r); 320 321 DMINFO("version " ST_VERSION " loaded"); 322 323 return r; 324} 325 326static void __exit dm_st_exit(void) 327{ 328 int r = dm_unregister_path_selector(&st_ps); 329 330 if (r < 0) 331 DMERR("unregister failed %d", r); 332} 333 334module_init(dm_st_init); 335module_exit(dm_st_exit); 336 337MODULE_DESCRIPTION(DM_NAME " throughput oriented path selector"); 338MODULE_AUTHOR("Kiyoshi Ueda <k-ueda@ct.jp.nec.com>"); 339MODULE_LICENSE("GPL");