Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at c9a28fa7b9ac19b676deefa0a171ce7df8755c08 429 lines 10 kB view raw
1/* 2 * ip_vs_proto_udp.c: UDP load balancing support for IPVS 3 * 4 * Version: $Id: ip_vs_proto_udp.c,v 1.3 2002/11/30 01:50:35 wensong Exp $ 5 * 6 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 7 * Julian Anastasov <ja@ssi.bg> 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License 11 * as published by the Free Software Foundation; either version 12 * 2 of the License, or (at your option) any later version. 13 * 14 * Changes: 15 * 16 */ 17 18#include <linux/in.h> 19#include <linux/ip.h> 20#include <linux/kernel.h> 21#include <linux/netfilter.h> 22#include <linux/netfilter_ipv4.h> 23#include <linux/udp.h> 24 25#include <net/ip_vs.h> 26#include <net/ip.h> 27 28static struct ip_vs_conn * 29udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, 30 const struct iphdr *iph, unsigned int proto_off, int inverse) 31{ 32 struct ip_vs_conn *cp; 33 __be16 _ports[2], *pptr; 34 35 pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); 36 if (pptr == NULL) 37 return NULL; 38 39 if (likely(!inverse)) { 40 cp = ip_vs_conn_in_get(iph->protocol, 41 iph->saddr, pptr[0], 42 iph->daddr, pptr[1]); 43 } else { 44 cp = ip_vs_conn_in_get(iph->protocol, 45 iph->daddr, pptr[1], 46 iph->saddr, pptr[0]); 47 } 48 49 return cp; 50} 51 52 53static struct ip_vs_conn * 54udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, 55 const struct iphdr *iph, unsigned int proto_off, int inverse) 56{ 57 struct ip_vs_conn *cp; 58 __be16 _ports[2], *pptr; 59 60 pptr = skb_header_pointer(skb, ip_hdrlen(skb), 61 sizeof(_ports), _ports); 62 if (pptr == NULL) 63 return NULL; 64 65 if (likely(!inverse)) { 66 cp = ip_vs_conn_out_get(iph->protocol, 67 iph->saddr, pptr[0], 68 iph->daddr, pptr[1]); 69 } else { 70 cp = ip_vs_conn_out_get(iph->protocol, 71 iph->daddr, pptr[1], 72 iph->saddr, pptr[0]); 73 } 74 75 return cp; 76} 77 78 79static int 80udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, 81 int *verdict, struct ip_vs_conn **cpp) 82{ 83 struct ip_vs_service *svc; 84 struct udphdr _udph, *uh; 85 86 uh = skb_header_pointer(skb, ip_hdrlen(skb), 87 sizeof(_udph), &_udph); 88 if (uh == NULL) { 89 *verdict = NF_DROP; 90 return 0; 91 } 92 93 if ((svc = ip_vs_service_get(skb->mark, ip_hdr(skb)->protocol, 94 ip_hdr(skb)->daddr, uh->dest))) { 95 if (ip_vs_todrop()) { 96 /* 97 * It seems that we are very loaded. 98 * We have to drop this packet :( 99 */ 100 ip_vs_service_put(svc); 101 *verdict = NF_DROP; 102 return 0; 103 } 104 105 /* 106 * Let the virtual server select a real server for the 107 * incoming connection, and create a connection entry. 108 */ 109 *cpp = ip_vs_schedule(svc, skb); 110 if (!*cpp) { 111 *verdict = ip_vs_leave(svc, skb, pp); 112 return 0; 113 } 114 ip_vs_service_put(svc); 115 } 116 return 1; 117} 118 119 120static inline void 121udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, 122 __be16 oldport, __be16 newport) 123{ 124 uhdr->check = 125 csum_fold(ip_vs_check_diff4(oldip, newip, 126 ip_vs_check_diff2(oldport, newport, 127 ~csum_unfold(uhdr->check)))); 128 if (!uhdr->check) 129 uhdr->check = CSUM_MANGLED_0; 130} 131 132static int 133udp_snat_handler(struct sk_buff *skb, 134 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 135{ 136 struct udphdr *udph; 137 const unsigned int udphoff = ip_hdrlen(skb); 138 139 /* csum_check requires unshared skb */ 140 if (!skb_make_writable(skb, udphoff+sizeof(*udph))) 141 return 0; 142 143 if (unlikely(cp->app != NULL)) { 144 /* Some checks before mangling */ 145 if (pp->csum_check && !pp->csum_check(skb, pp)) 146 return 0; 147 148 /* 149 * Call application helper if needed 150 */ 151 if (!ip_vs_app_pkt_out(cp, skb)) 152 return 0; 153 } 154 155 udph = (void *)ip_hdr(skb) + udphoff; 156 udph->source = cp->vport; 157 158 /* 159 * Adjust UDP checksums 160 */ 161 if (!cp->app && (udph->check != 0)) { 162 /* Only port and addr are changed, do fast csum update */ 163 udp_fast_csum_update(udph, cp->daddr, cp->vaddr, 164 cp->dport, cp->vport); 165 if (skb->ip_summed == CHECKSUM_COMPLETE) 166 skb->ip_summed = CHECKSUM_NONE; 167 } else { 168 /* full checksum calculation */ 169 udph->check = 0; 170 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); 171 udph->check = csum_tcpudp_magic(cp->vaddr, cp->caddr, 172 skb->len - udphoff, 173 cp->protocol, skb->csum); 174 if (udph->check == 0) 175 udph->check = CSUM_MANGLED_0; 176 IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", 177 pp->name, udph->check, 178 (char*)&(udph->check) - (char*)udph); 179 } 180 return 1; 181} 182 183 184static int 185udp_dnat_handler(struct sk_buff *skb, 186 struct ip_vs_protocol *pp, struct ip_vs_conn *cp) 187{ 188 struct udphdr *udph; 189 unsigned int udphoff = ip_hdrlen(skb); 190 191 /* csum_check requires unshared skb */ 192 if (!skb_make_writable(skb, udphoff+sizeof(*udph))) 193 return 0; 194 195 if (unlikely(cp->app != NULL)) { 196 /* Some checks before mangling */ 197 if (pp->csum_check && !pp->csum_check(skb, pp)) 198 return 0; 199 200 /* 201 * Attempt ip_vs_app call. 202 * It will fix ip_vs_conn 203 */ 204 if (!ip_vs_app_pkt_in(cp, skb)) 205 return 0; 206 } 207 208 udph = (void *)ip_hdr(skb) + udphoff; 209 udph->dest = cp->dport; 210 211 /* 212 * Adjust UDP checksums 213 */ 214 if (!cp->app && (udph->check != 0)) { 215 /* Only port and addr are changed, do fast csum update */ 216 udp_fast_csum_update(udph, cp->vaddr, cp->daddr, 217 cp->vport, cp->dport); 218 if (skb->ip_summed == CHECKSUM_COMPLETE) 219 skb->ip_summed = CHECKSUM_NONE; 220 } else { 221 /* full checksum calculation */ 222 udph->check = 0; 223 skb->csum = skb_checksum(skb, udphoff, skb->len - udphoff, 0); 224 udph->check = csum_tcpudp_magic(cp->caddr, cp->daddr, 225 skb->len - udphoff, 226 cp->protocol, skb->csum); 227 if (udph->check == 0) 228 udph->check = CSUM_MANGLED_0; 229 skb->ip_summed = CHECKSUM_UNNECESSARY; 230 } 231 return 1; 232} 233 234 235static int 236udp_csum_check(struct sk_buff *skb, struct ip_vs_protocol *pp) 237{ 238 struct udphdr _udph, *uh; 239 const unsigned int udphoff = ip_hdrlen(skb); 240 241 uh = skb_header_pointer(skb, udphoff, sizeof(_udph), &_udph); 242 if (uh == NULL) 243 return 0; 244 245 if (uh->check != 0) { 246 switch (skb->ip_summed) { 247 case CHECKSUM_NONE: 248 skb->csum = skb_checksum(skb, udphoff, 249 skb->len - udphoff, 0); 250 case CHECKSUM_COMPLETE: 251 if (csum_tcpudp_magic(ip_hdr(skb)->saddr, 252 ip_hdr(skb)->daddr, 253 skb->len - udphoff, 254 ip_hdr(skb)->protocol, 255 skb->csum)) { 256 IP_VS_DBG_RL_PKT(0, pp, skb, 0, 257 "Failed checksum for"); 258 return 0; 259 } 260 break; 261 default: 262 /* No need to checksum. */ 263 break; 264 } 265 } 266 return 1; 267} 268 269 270/* 271 * Note: the caller guarantees that only one of register_app, 272 * unregister_app or app_conn_bind is called each time. 273 */ 274 275#define UDP_APP_TAB_BITS 4 276#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS) 277#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1) 278 279static struct list_head udp_apps[UDP_APP_TAB_SIZE]; 280static DEFINE_SPINLOCK(udp_app_lock); 281 282static inline __u16 udp_app_hashkey(__be16 port) 283{ 284 return (((__force u16)port >> UDP_APP_TAB_BITS) ^ (__force u16)port) 285 & UDP_APP_TAB_MASK; 286} 287 288 289static int udp_register_app(struct ip_vs_app *inc) 290{ 291 struct ip_vs_app *i; 292 __u16 hash; 293 __be16 port = inc->port; 294 int ret = 0; 295 296 hash = udp_app_hashkey(port); 297 298 299 spin_lock_bh(&udp_app_lock); 300 list_for_each_entry(i, &udp_apps[hash], p_list) { 301 if (i->port == port) { 302 ret = -EEXIST; 303 goto out; 304 } 305 } 306 list_add(&inc->p_list, &udp_apps[hash]); 307 atomic_inc(&ip_vs_protocol_udp.appcnt); 308 309 out: 310 spin_unlock_bh(&udp_app_lock); 311 return ret; 312} 313 314 315static void 316udp_unregister_app(struct ip_vs_app *inc) 317{ 318 spin_lock_bh(&udp_app_lock); 319 atomic_dec(&ip_vs_protocol_udp.appcnt); 320 list_del(&inc->p_list); 321 spin_unlock_bh(&udp_app_lock); 322} 323 324 325static int udp_app_conn_bind(struct ip_vs_conn *cp) 326{ 327 int hash; 328 struct ip_vs_app *inc; 329 int result = 0; 330 331 /* Default binding: bind app only for NAT */ 332 if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) 333 return 0; 334 335 /* Lookup application incarnations and bind the right one */ 336 hash = udp_app_hashkey(cp->vport); 337 338 spin_lock(&udp_app_lock); 339 list_for_each_entry(inc, &udp_apps[hash], p_list) { 340 if (inc->port == cp->vport) { 341 if (unlikely(!ip_vs_app_inc_get(inc))) 342 break; 343 spin_unlock(&udp_app_lock); 344 345 IP_VS_DBG(9, "%s: Binding conn %u.%u.%u.%u:%u->" 346 "%u.%u.%u.%u:%u to app %s on port %u\n", 347 __FUNCTION__, 348 NIPQUAD(cp->caddr), ntohs(cp->cport), 349 NIPQUAD(cp->vaddr), ntohs(cp->vport), 350 inc->name, ntohs(inc->port)); 351 cp->app = inc; 352 if (inc->init_conn) 353 result = inc->init_conn(inc, cp); 354 goto out; 355 } 356 } 357 spin_unlock(&udp_app_lock); 358 359 out: 360 return result; 361} 362 363 364static int udp_timeouts[IP_VS_UDP_S_LAST+1] = { 365 [IP_VS_UDP_S_NORMAL] = 5*60*HZ, 366 [IP_VS_UDP_S_LAST] = 2*HZ, 367}; 368 369static char * udp_state_name_table[IP_VS_UDP_S_LAST+1] = { 370 [IP_VS_UDP_S_NORMAL] = "UDP", 371 [IP_VS_UDP_S_LAST] = "BUG!", 372}; 373 374 375static int 376udp_set_state_timeout(struct ip_vs_protocol *pp, char *sname, int to) 377{ 378 return ip_vs_set_state_timeout(pp->timeout_table, IP_VS_UDP_S_LAST, 379 udp_state_name_table, sname, to); 380} 381 382static const char * udp_state_name(int state) 383{ 384 if (state >= IP_VS_UDP_S_LAST) 385 return "ERR!"; 386 return udp_state_name_table[state] ? udp_state_name_table[state] : "?"; 387} 388 389static int 390udp_state_transition(struct ip_vs_conn *cp, int direction, 391 const struct sk_buff *skb, 392 struct ip_vs_protocol *pp) 393{ 394 cp->timeout = pp->timeout_table[IP_VS_UDP_S_NORMAL]; 395 return 1; 396} 397 398static void udp_init(struct ip_vs_protocol *pp) 399{ 400 IP_VS_INIT_HASH_TABLE(udp_apps); 401 pp->timeout_table = udp_timeouts; 402} 403 404static void udp_exit(struct ip_vs_protocol *pp) 405{ 406} 407 408 409struct ip_vs_protocol ip_vs_protocol_udp = { 410 .name = "UDP", 411 .protocol = IPPROTO_UDP, 412 .dont_defrag = 0, 413 .init = udp_init, 414 .exit = udp_exit, 415 .conn_schedule = udp_conn_schedule, 416 .conn_in_get = udp_conn_in_get, 417 .conn_out_get = udp_conn_out_get, 418 .snat_handler = udp_snat_handler, 419 .dnat_handler = udp_dnat_handler, 420 .csum_check = udp_csum_check, 421 .state_transition = udp_state_transition, 422 .state_name = udp_state_name, 423 .register_app = udp_register_app, 424 .unregister_app = udp_unregister_app, 425 .app_conn_bind = udp_app_conn_bind, 426 .debug_packet = ip_vs_tcpudp_debug_packet, 427 .timeout_change = NULL, 428 .set_state_timeout = udp_set_state_timeout, 429};