Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.20-rc3 341 lines 8.2 kB view raw
1/* 2 * Weighted random policy for multipath. 3 * 4 * 5 * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ 6 * 7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 */ 14 15#include <asm/system.h> 16#include <asm/uaccess.h> 17#include <linux/types.h> 18#include <linux/sched.h> 19#include <linux/errno.h> 20#include <linux/timer.h> 21#include <linux/mm.h> 22#include <linux/kernel.h> 23#include <linux/fcntl.h> 24#include <linux/stat.h> 25#include <linux/socket.h> 26#include <linux/in.h> 27#include <linux/inet.h> 28#include <linux/netdevice.h> 29#include <linux/inetdevice.h> 30#include <linux/igmp.h> 31#include <linux/proc_fs.h> 32#include <linux/seq_file.h> 33#include <linux/module.h> 34#include <linux/mroute.h> 35#include <linux/init.h> 36#include <net/ip.h> 37#include <net/protocol.h> 38#include <linux/skbuff.h> 39#include <net/sock.h> 40#include <net/icmp.h> 41#include <net/udp.h> 42#include <net/raw.h> 43#include <linux/notifier.h> 44#include <linux/if_arp.h> 45#include <linux/netfilter_ipv4.h> 46#include <net/ipip.h> 47#include <net/checksum.h> 48#include <net/ip_fib.h> 49#include <net/ip_mp_alg.h> 50 51#define MULTIPATH_STATE_SIZE 15 52 53struct multipath_candidate { 54 struct multipath_candidate *next; 55 int power; 56 struct rtable *rt; 57}; 58 59struct multipath_dest { 60 struct list_head list; 61 62 const struct fib_nh *nh_info; 63 __be32 netmask; 64 __be32 network; 65 unsigned char prefixlen; 66 67 struct rcu_head rcu; 68}; 69 70struct multipath_bucket { 71 struct list_head head; 72 spinlock_t lock; 73}; 74 75struct multipath_route { 76 struct list_head list; 77 78 int oif; 79 __be32 gw; 80 struct list_head dests; 81 82 struct rcu_head rcu; 83}; 84 85/* state: primarily weight per route information */ 86static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; 87 88/* interface to random number generation */ 89static unsigned int RANDOM_SEED = 93186752; 90 91static inline unsigned int random(unsigned int ubound) 92{ 93 static unsigned int a = 1588635695, 94 q = 2, 95 r = 1117695901; 96 RANDOM_SEED = a*(RANDOM_SEED % q) - r*(RANDOM_SEED / q); 97 return RANDOM_SEED % ubound; 98} 99 100static unsigned char __multipath_lookup_weight(const struct flowi *fl, 101 const struct rtable *rt) 102{ 103 const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; 104 struct multipath_route *r; 105 struct multipath_route *target_route = NULL; 106 struct multipath_dest *d; 107 int weight = 1; 108 109 /* lookup the weight information for a certain route */ 110 rcu_read_lock(); 111 112 /* find state entry for gateway or add one if necessary */ 113 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 114 if (r->gw == rt->rt_gateway && 115 r->oif == rt->idev->dev->ifindex) { 116 target_route = r; 117 break; 118 } 119 } 120 121 if (!target_route) { 122 /* this should not happen... but we are prepared */ 123 printk( KERN_CRIT"%s: missing state for gateway: %u and " \ 124 "device %d\n", __FUNCTION__, rt->rt_gateway, 125 rt->idev->dev->ifindex); 126 goto out; 127 } 128 129 /* find state entry for destination */ 130 list_for_each_entry_rcu(d, &target_route->dests, list) { 131 __be32 targetnetwork = fl->fl4_dst & 132 inet_make_mask(d->prefixlen); 133 134 if ((targetnetwork & d->netmask) == d->network) { 135 weight = d->nh_info->nh_weight; 136 goto out; 137 } 138 } 139 140out: 141 rcu_read_unlock(); 142 return weight; 143} 144 145static void wrandom_init_state(void) 146{ 147 int i; 148 149 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 150 INIT_LIST_HEAD(&state[i].head); 151 spin_lock_init(&state[i].lock); 152 } 153} 154 155static void wrandom_select_route(const struct flowi *flp, 156 struct rtable *first, 157 struct rtable **rp) 158{ 159 struct rtable *rt; 160 struct rtable *decision; 161 struct multipath_candidate *first_mpc = NULL; 162 struct multipath_candidate *mpc, *last_mpc = NULL; 163 int power = 0; 164 int last_power; 165 int selector; 166 const size_t size_mpc = sizeof(struct multipath_candidate); 167 168 /* collect all candidates and identify their weights */ 169 for (rt = rcu_dereference(first); rt; 170 rt = rcu_dereference(rt->u.rt_next)) { 171 if ((rt->u.dst.flags & DST_BALANCED) != 0 && 172 multipath_comparekeys(&rt->fl, flp)) { 173 struct multipath_candidate* mpc = 174 (struct multipath_candidate*) 175 kmalloc(size_mpc, GFP_ATOMIC); 176 177 if (!mpc) 178 return; 179 180 power += __multipath_lookup_weight(flp, rt) * 10000; 181 182 mpc->power = power; 183 mpc->rt = rt; 184 mpc->next = NULL; 185 186 if (!first_mpc) 187 first_mpc = mpc; 188 else 189 last_mpc->next = mpc; 190 191 last_mpc = mpc; 192 } 193 } 194 195 /* choose a weighted random candidate */ 196 decision = first; 197 selector = random(power); 198 last_power = 0; 199 200 /* select candidate, adjust GC data and cleanup local state */ 201 decision = first; 202 last_mpc = NULL; 203 for (mpc = first_mpc; mpc; mpc = mpc->next) { 204 mpc->rt->u.dst.lastuse = jiffies; 205 if (last_power <= selector && selector < mpc->power) 206 decision = mpc->rt; 207 208 last_power = mpc->power; 209 kfree(last_mpc); 210 last_mpc = mpc; 211 } 212 213 /* concurrent __multipath_flush may lead to !last_mpc */ 214 kfree(last_mpc); 215 216 decision->u.dst.__use++; 217 *rp = decision; 218} 219 220static void wrandom_set_nhinfo(__be32 network, 221 __be32 netmask, 222 unsigned char prefixlen, 223 const struct fib_nh *nh) 224{ 225 const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; 226 struct multipath_route *r, *target_route = NULL; 227 struct multipath_dest *d, *target_dest = NULL; 228 229 /* store the weight information for a certain route */ 230 spin_lock_bh(&state[state_idx].lock); 231 232 /* find state entry for gateway or add one if necessary */ 233 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 234 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { 235 target_route = r; 236 break; 237 } 238 } 239 240 if (!target_route) { 241 const size_t size_rt = sizeof(struct multipath_route); 242 target_route = (struct multipath_route *) 243 kmalloc(size_rt, GFP_ATOMIC); 244 245 target_route->gw = nh->nh_gw; 246 target_route->oif = nh->nh_oif; 247 memset(&target_route->rcu, 0, sizeof(struct rcu_head)); 248 INIT_LIST_HEAD(&target_route->dests); 249 250 list_add_rcu(&target_route->list, &state[state_idx].head); 251 } 252 253 /* find state entry for destination or add one if necessary */ 254 list_for_each_entry_rcu(d, &target_route->dests, list) { 255 if (d->nh_info == nh) { 256 target_dest = d; 257 break; 258 } 259 } 260 261 if (!target_dest) { 262 const size_t size_dst = sizeof(struct multipath_dest); 263 target_dest = (struct multipath_dest*) 264 kmalloc(size_dst, GFP_ATOMIC); 265 266 target_dest->nh_info = nh; 267 target_dest->network = network; 268 target_dest->netmask = netmask; 269 target_dest->prefixlen = prefixlen; 270 memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); 271 272 list_add_rcu(&target_dest->list, &target_route->dests); 273 } 274 /* else: we already stored this info for another destination => 275 * we are finished 276 */ 277 278 spin_unlock_bh(&state[state_idx].lock); 279} 280 281static void __multipath_free(struct rcu_head *head) 282{ 283 struct multipath_route *rt = container_of(head, struct multipath_route, 284 rcu); 285 kfree(rt); 286} 287 288static void __multipath_free_dst(struct rcu_head *head) 289{ 290 struct multipath_dest *dst = container_of(head, 291 struct multipath_dest, 292 rcu); 293 kfree(dst); 294} 295 296static void wrandom_flush(void) 297{ 298 int i; 299 300 /* defere delete to all entries */ 301 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 302 struct multipath_route *r; 303 304 spin_lock_bh(&state[i].lock); 305 list_for_each_entry_rcu(r, &state[i].head, list) { 306 struct multipath_dest *d; 307 list_for_each_entry_rcu(d, &r->dests, list) { 308 list_del_rcu(&d->list); 309 call_rcu(&d->rcu, 310 __multipath_free_dst); 311 } 312 list_del_rcu(&r->list); 313 call_rcu(&r->rcu, 314 __multipath_free); 315 } 316 317 spin_unlock_bh(&state[i].lock); 318 } 319} 320 321static struct ip_mp_alg_ops wrandom_ops = { 322 .mp_alg_select_route = wrandom_select_route, 323 .mp_alg_flush = wrandom_flush, 324 .mp_alg_set_nhinfo = wrandom_set_nhinfo, 325}; 326 327static int __init wrandom_init(void) 328{ 329 wrandom_init_state(); 330 331 return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); 332} 333 334static void __exit wrandom_exit(void) 335{ 336 multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); 337} 338 339module_init(wrandom_init); 340module_exit(wrandom_exit); 341MODULE_LICENSE("GPL");