Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.14-rc2 346 lines 8.2 kB view raw
1/* 2 * Weighted random policy for multipath. 3 * 4 * 5 * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ 6 * 7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 */ 14 15#include <linux/config.h> 16#include <asm/system.h> 17#include <asm/uaccess.h> 18#include <linux/types.h> 19#include <linux/sched.h> 20#include <linux/errno.h> 21#include <linux/timer.h> 22#include <linux/mm.h> 23#include <linux/kernel.h> 24#include <linux/fcntl.h> 25#include <linux/stat.h> 26#include <linux/socket.h> 27#include <linux/in.h> 28#include <linux/inet.h> 29#include <linux/netdevice.h> 30#include <linux/inetdevice.h> 31#include <linux/igmp.h> 32#include <linux/proc_fs.h> 33#include <linux/seq_file.h> 34#include <linux/module.h> 35#include <linux/mroute.h> 36#include <linux/init.h> 37#include <net/ip.h> 38#include <net/protocol.h> 39#include <linux/skbuff.h> 40#include <net/sock.h> 41#include <net/icmp.h> 42#include <net/udp.h> 43#include <net/raw.h> 44#include <linux/notifier.h> 45#include <linux/if_arp.h> 46#include <linux/netfilter_ipv4.h> 47#include <net/ipip.h> 48#include <net/checksum.h> 49#include <net/ip_fib.h> 50#include <net/ip_mp_alg.h> 51 52#define MULTIPATH_STATE_SIZE 15 53 54struct multipath_candidate { 55 struct multipath_candidate *next; 56 int power; 57 struct rtable *rt; 58}; 59 60struct multipath_dest { 61 struct list_head list; 62 63 const struct fib_nh *nh_info; 64 __u32 netmask; 65 __u32 network; 66 unsigned char prefixlen; 67 68 struct rcu_head rcu; 69}; 70 71struct multipath_bucket { 72 struct list_head head; 73 spinlock_t lock; 74}; 75 76struct multipath_route { 77 struct list_head list; 78 79 int oif; 80 __u32 gw; 81 struct list_head dests; 82 83 struct rcu_head rcu; 84}; 85 86/* state: primarily weight per route information */ 87static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; 88 89/* interface to random number generation */ 90static unsigned int RANDOM_SEED = 93186752; 91 92static inline unsigned int random(unsigned int ubound) 93{ 94 static unsigned int a = 1588635695, 95 q = 2, 96 r = 1117695901; 97 RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q); 98 return RANDOM_SEED % ubound; 99} 100 101static unsigned char __multipath_lookup_weight(const struct flowi *fl, 102 const struct rtable *rt) 103{ 104 const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; 105 struct multipath_route *r; 106 struct multipath_route *target_route = NULL; 107 struct multipath_dest *d; 108 int weight = 1; 109 110 /* lookup the weight information for a certain route */ 111 rcu_read_lock(); 112 113 /* find state entry for gateway or add one if necessary */ 114 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 115 if (r->gw == rt->rt_gateway && 116 r->oif == rt->idev->dev->ifindex) { 117 target_route = r; 118 break; 119 } 120 } 121 122 if (!target_route) { 123 /* this should not happen... but we are prepared */ 124 printk( KERN_CRIT"%s: missing state for gateway: %u and " \ 125 "device %d\n", __FUNCTION__, rt->rt_gateway, 126 rt->idev->dev->ifindex); 127 goto out; 128 } 129 130 /* find state entry for destination */ 131 list_for_each_entry_rcu(d, &target_route->dests, list) { 132 __u32 targetnetwork = fl->fl4_dst & 133 (0xFFFFFFFF >> (32 - d->prefixlen)); 134 135 if ((targetnetwork & d->netmask) == d->network) { 136 weight = d->nh_info->nh_weight; 137 goto out; 138 } 139 } 140 141out: 142 rcu_read_unlock(); 143 return weight; 144} 145 146static void wrandom_init_state(void) 147{ 148 int i; 149 150 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 151 INIT_LIST_HEAD(&state[i].head); 152 spin_lock_init(&state[i].lock); 153 } 154} 155 156static void wrandom_select_route(const struct flowi *flp, 157 struct rtable *first, 158 struct rtable **rp) 159{ 160 struct rtable *rt; 161 struct rtable *decision; 162 struct multipath_candidate *first_mpc = NULL; 163 struct multipath_candidate *mpc, *last_mpc = NULL; 164 int power = 0; 165 int last_power; 166 int selector; 167 const size_t size_mpc = sizeof(struct multipath_candidate); 168 169 /* collect all candidates and identify their weights */ 170 for (rt = rcu_dereference(first); rt; 171 rt = rcu_dereference(rt->u.rt_next)) { 172 if ((rt->u.dst.flags & DST_BALANCED) != 0 && 173 multipath_comparekeys(&rt->fl, flp)) { 174 struct multipath_candidate* mpc = 175 (struct multipath_candidate*) 176 kmalloc(size_mpc, GFP_ATOMIC); 177 178 if (!mpc) 179 return; 180 181 power += __multipath_lookup_weight(flp, rt) * 10000; 182 183 mpc->power = power; 184 mpc->rt = rt; 185 mpc->next = NULL; 186 187 if (!first_mpc) 188 first_mpc = mpc; 189 else 190 last_mpc->next = mpc; 191 192 last_mpc = mpc; 193 } 194 } 195 196 /* choose a weighted random candidate */ 197 decision = first; 198 selector = random(power); 199 last_power = 0; 200 201 /* select candidate, adjust GC data and cleanup local state */ 202 decision = first; 203 last_mpc = NULL; 204 for (mpc = first_mpc; mpc; mpc = mpc->next) { 205 mpc->rt->u.dst.lastuse = jiffies; 206 if (last_power <= selector && selector < mpc->power) 207 decision = mpc->rt; 208 209 last_power = mpc->power; 210 if (last_mpc) 211 kfree(last_mpc); 212 213 last_mpc = mpc; 214 } 215 216 if (last_mpc) { 217 /* concurrent __multipath_flush may lead to !last_mpc */ 218 kfree(last_mpc); 219 } 220 221 decision->u.dst.__use++; 222 *rp = decision; 223} 224 225static void wrandom_set_nhinfo(__u32 network, 226 __u32 netmask, 227 unsigned char prefixlen, 228 const struct fib_nh *nh) 229{ 230 const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; 231 struct multipath_route *r, *target_route = NULL; 232 struct multipath_dest *d, *target_dest = NULL; 233 234 /* store the weight information for a certain route */ 235 spin_lock(&state[state_idx].lock); 236 237 /* find state entry for gateway or add one if necessary */ 238 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 239 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { 240 target_route = r; 241 break; 242 } 243 } 244 245 if (!target_route) { 246 const size_t size_rt = sizeof(struct multipath_route); 247 target_route = (struct multipath_route *) 248 kmalloc(size_rt, GFP_ATOMIC); 249 250 target_route->gw = nh->nh_gw; 251 target_route->oif = nh->nh_oif; 252 memset(&target_route->rcu, 0, sizeof(struct rcu_head)); 253 INIT_LIST_HEAD(&target_route->dests); 254 255 list_add_rcu(&target_route->list, &state[state_idx].head); 256 } 257 258 /* find state entry for destination or add one if necessary */ 259 list_for_each_entry_rcu(d, &target_route->dests, list) { 260 if (d->nh_info == nh) { 261 target_dest = d; 262 break; 263 } 264 } 265 266 if (!target_dest) { 267 const size_t size_dst = sizeof(struct multipath_dest); 268 target_dest = (struct multipath_dest*) 269 kmalloc(size_dst, GFP_ATOMIC); 270 271 target_dest->nh_info = nh; 272 target_dest->network = network; 273 target_dest->netmask = netmask; 274 target_dest->prefixlen = prefixlen; 275 memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); 276 277 list_add_rcu(&target_dest->list, &target_route->dests); 278 } 279 /* else: we already stored this info for another destination => 280 * we are finished 281 */ 282 283 spin_unlock(&state[state_idx].lock); 284} 285 286static void __multipath_free(struct rcu_head *head) 287{ 288 struct multipath_route *rt = container_of(head, struct multipath_route, 289 rcu); 290 kfree(rt); 291} 292 293static void __multipath_free_dst(struct rcu_head *head) 294{ 295 struct multipath_dest *dst = container_of(head, 296 struct multipath_dest, 297 rcu); 298 kfree(dst); 299} 300 301static void wrandom_flush(void) 302{ 303 int i; 304 305 /* defere delete to all entries */ 306 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 307 struct multipath_route *r; 308 309 spin_lock(&state[i].lock); 310 list_for_each_entry_rcu(r, &state[i].head, list) { 311 struct multipath_dest *d; 312 list_for_each_entry_rcu(d, &r->dests, list) { 313 list_del_rcu(&d->list); 314 call_rcu(&d->rcu, 315 __multipath_free_dst); 316 } 317 list_del_rcu(&r->list); 318 call_rcu(&r->rcu, 319 __multipath_free); 320 } 321 322 spin_unlock(&state[i].lock); 323 } 324} 325 326static struct ip_mp_alg_ops wrandom_ops = { 327 .mp_alg_select_route = wrandom_select_route, 328 .mp_alg_flush = wrandom_flush, 329 .mp_alg_set_nhinfo = wrandom_set_nhinfo, 330}; 331 332static int __init wrandom_init(void) 333{ 334 wrandom_init_state(); 335 336 return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); 337} 338 339static void __exit wrandom_exit(void) 340{ 341 multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); 342} 343 344module_init(wrandom_init); 345module_exit(wrandom_exit); 346MODULE_LICENSE("GPL");