Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.22-rc7 329 lines 7.9 kB view raw
1/* 2 * Weighted random policy for multipath. 3 * 4 * 5 * Version: $Id: multipath_wrandom.c,v 1.1.2.3 2004/09/22 07:51:40 elueck Exp $ 6 * 7 * Authors: Einar Lueck <elueck@de.ibm.com><lkml@einar-lueck.de> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 */ 14 15#include <asm/system.h> 16#include <asm/uaccess.h> 17#include <linux/types.h> 18#include <linux/errno.h> 19#include <linux/timer.h> 20#include <linux/mm.h> 21#include <linux/kernel.h> 22#include <linux/fcntl.h> 23#include <linux/stat.h> 24#include <linux/socket.h> 25#include <linux/in.h> 26#include <linux/inet.h> 27#include <linux/netdevice.h> 28#include <linux/inetdevice.h> 29#include <linux/igmp.h> 30#include <linux/proc_fs.h> 31#include <linux/seq_file.h> 32#include <linux/module.h> 33#include <linux/mroute.h> 34#include <linux/init.h> 35#include <linux/random.h> 36#include <net/ip.h> 37#include <net/protocol.h> 38#include <linux/skbuff.h> 39#include <net/sock.h> 40#include <net/icmp.h> 41#include <net/udp.h> 42#include <net/raw.h> 43#include <linux/notifier.h> 44#include <linux/if_arp.h> 45#include <linux/netfilter_ipv4.h> 46#include <net/ipip.h> 47#include <net/checksum.h> 48#include <net/ip_fib.h> 49#include <net/ip_mp_alg.h> 50 51#define MULTIPATH_STATE_SIZE 15 52 53struct multipath_candidate { 54 struct multipath_candidate *next; 55 int power; 56 struct rtable *rt; 57}; 58 59struct multipath_dest { 60 struct list_head list; 61 62 const struct fib_nh *nh_info; 63 __be32 netmask; 64 __be32 network; 65 unsigned char prefixlen; 66 67 struct rcu_head rcu; 68}; 69 70struct multipath_bucket { 71 struct list_head head; 72 spinlock_t lock; 73}; 74 75struct multipath_route { 76 struct list_head list; 77 78 int oif; 79 __be32 gw; 80 struct list_head dests; 81 82 struct rcu_head rcu; 83}; 84 85/* state: primarily weight per route information */ 86static struct multipath_bucket state[MULTIPATH_STATE_SIZE]; 87 88static unsigned char __multipath_lookup_weight(const struct flowi *fl, 89 const struct rtable *rt) 90{ 91 const int state_idx = rt->idev->dev->ifindex % MULTIPATH_STATE_SIZE; 92 struct multipath_route *r; 93 struct multipath_route *target_route = NULL; 94 struct multipath_dest *d; 95 int weight = 1; 96 97 /* lookup the weight information for a certain route */ 98 rcu_read_lock(); 99 100 /* find state entry for gateway or add one if necessary */ 101 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 102 if (r->gw == rt->rt_gateway && 103 r->oif == rt->idev->dev->ifindex) { 104 target_route = r; 105 break; 106 } 107 } 108 109 if (!target_route) { 110 /* this should not happen... but we are prepared */ 111 printk( KERN_CRIT"%s: missing state for gateway: %u and " \ 112 "device %d\n", __FUNCTION__, rt->rt_gateway, 113 rt->idev->dev->ifindex); 114 goto out; 115 } 116 117 /* find state entry for destination */ 118 list_for_each_entry_rcu(d, &target_route->dests, list) { 119 __be32 targetnetwork = fl->fl4_dst & 120 inet_make_mask(d->prefixlen); 121 122 if ((targetnetwork & d->netmask) == d->network) { 123 weight = d->nh_info->nh_weight; 124 goto out; 125 } 126 } 127 128out: 129 rcu_read_unlock(); 130 return weight; 131} 132 133static void wrandom_init_state(void) 134{ 135 int i; 136 137 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 138 INIT_LIST_HEAD(&state[i].head); 139 spin_lock_init(&state[i].lock); 140 } 141} 142 143static void wrandom_select_route(const struct flowi *flp, 144 struct rtable *first, 145 struct rtable **rp) 146{ 147 struct rtable *rt; 148 struct rtable *decision; 149 struct multipath_candidate *first_mpc = NULL; 150 struct multipath_candidate *mpc, *last_mpc = NULL; 151 int power = 0; 152 int last_power; 153 int selector; 154 const size_t size_mpc = sizeof(struct multipath_candidate); 155 156 /* collect all candidates and identify their weights */ 157 for (rt = rcu_dereference(first); rt; 158 rt = rcu_dereference(rt->u.dst.rt_next)) { 159 if ((rt->u.dst.flags & DST_BALANCED) != 0 && 160 multipath_comparekeys(&rt->fl, flp)) { 161 struct multipath_candidate* mpc = 162 (struct multipath_candidate*) 163 kmalloc(size_mpc, GFP_ATOMIC); 164 165 if (!mpc) 166 return; 167 168 power += __multipath_lookup_weight(flp, rt) * 10000; 169 170 mpc->power = power; 171 mpc->rt = rt; 172 mpc->next = NULL; 173 174 if (!first_mpc) 175 first_mpc = mpc; 176 else 177 last_mpc->next = mpc; 178 179 last_mpc = mpc; 180 } 181 } 182 183 /* choose a weighted random candidate */ 184 decision = first; 185 selector = random32() % power; 186 last_power = 0; 187 188 /* select candidate, adjust GC data and cleanup local state */ 189 decision = first; 190 last_mpc = NULL; 191 for (mpc = first_mpc; mpc; mpc = mpc->next) { 192 mpc->rt->u.dst.lastuse = jiffies; 193 if (last_power <= selector && selector < mpc->power) 194 decision = mpc->rt; 195 196 last_power = mpc->power; 197 kfree(last_mpc); 198 last_mpc = mpc; 199 } 200 201 /* concurrent __multipath_flush may lead to !last_mpc */ 202 kfree(last_mpc); 203 204 decision->u.dst.__use++; 205 *rp = decision; 206} 207 208static void wrandom_set_nhinfo(__be32 network, 209 __be32 netmask, 210 unsigned char prefixlen, 211 const struct fib_nh *nh) 212{ 213 const int state_idx = nh->nh_oif % MULTIPATH_STATE_SIZE; 214 struct multipath_route *r, *target_route = NULL; 215 struct multipath_dest *d, *target_dest = NULL; 216 217 /* store the weight information for a certain route */ 218 spin_lock_bh(&state[state_idx].lock); 219 220 /* find state entry for gateway or add one if necessary */ 221 list_for_each_entry_rcu(r, &state[state_idx].head, list) { 222 if (r->gw == nh->nh_gw && r->oif == nh->nh_oif) { 223 target_route = r; 224 break; 225 } 226 } 227 228 if (!target_route) { 229 const size_t size_rt = sizeof(struct multipath_route); 230 target_route = (struct multipath_route *) 231 kmalloc(size_rt, GFP_ATOMIC); 232 233 target_route->gw = nh->nh_gw; 234 target_route->oif = nh->nh_oif; 235 memset(&target_route->rcu, 0, sizeof(struct rcu_head)); 236 INIT_LIST_HEAD(&target_route->dests); 237 238 list_add_rcu(&target_route->list, &state[state_idx].head); 239 } 240 241 /* find state entry for destination or add one if necessary */ 242 list_for_each_entry_rcu(d, &target_route->dests, list) { 243 if (d->nh_info == nh) { 244 target_dest = d; 245 break; 246 } 247 } 248 249 if (!target_dest) { 250 const size_t size_dst = sizeof(struct multipath_dest); 251 target_dest = (struct multipath_dest*) 252 kmalloc(size_dst, GFP_ATOMIC); 253 254 target_dest->nh_info = nh; 255 target_dest->network = network; 256 target_dest->netmask = netmask; 257 target_dest->prefixlen = prefixlen; 258 memset(&target_dest->rcu, 0, sizeof(struct rcu_head)); 259 260 list_add_rcu(&target_dest->list, &target_route->dests); 261 } 262 /* else: we already stored this info for another destination => 263 * we are finished 264 */ 265 266 spin_unlock_bh(&state[state_idx].lock); 267} 268 269static void __multipath_free(struct rcu_head *head) 270{ 271 struct multipath_route *rt = container_of(head, struct multipath_route, 272 rcu); 273 kfree(rt); 274} 275 276static void __multipath_free_dst(struct rcu_head *head) 277{ 278 struct multipath_dest *dst = container_of(head, 279 struct multipath_dest, 280 rcu); 281 kfree(dst); 282} 283 284static void wrandom_flush(void) 285{ 286 int i; 287 288 /* defere delete to all entries */ 289 for (i = 0; i < MULTIPATH_STATE_SIZE; ++i) { 290 struct multipath_route *r; 291 292 spin_lock_bh(&state[i].lock); 293 list_for_each_entry_rcu(r, &state[i].head, list) { 294 struct multipath_dest *d; 295 list_for_each_entry_rcu(d, &r->dests, list) { 296 list_del_rcu(&d->list); 297 call_rcu(&d->rcu, 298 __multipath_free_dst); 299 } 300 list_del_rcu(&r->list); 301 call_rcu(&r->rcu, 302 __multipath_free); 303 } 304 305 spin_unlock_bh(&state[i].lock); 306 } 307} 308 309static struct ip_mp_alg_ops wrandom_ops = { 310 .mp_alg_select_route = wrandom_select_route, 311 .mp_alg_flush = wrandom_flush, 312 .mp_alg_set_nhinfo = wrandom_set_nhinfo, 313}; 314 315static int __init wrandom_init(void) 316{ 317 wrandom_init_state(); 318 319 return multipath_alg_register(&wrandom_ops, IP_MP_ALG_WRANDOM); 320} 321 322static void __exit wrandom_exit(void) 323{ 324 multipath_alg_unregister(&wrandom_ops, IP_MP_ALG_WRANDOM); 325} 326 327module_init(wrandom_init); 328module_exit(wrandom_exit); 329MODULE_LICENSE("GPL");