Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.19 902 lines 22 kB view raw
1/* 2 * IPVS An implementation of the IP virtual server support for the 3 * LINUX operating system. IPVS is now implemented as a module 4 * over the NetFilter framework. IPVS can be used to build a 5 * high-performance and highly available server based on a 6 * cluster of servers. 7 * 8 * Version: $Id: ip_vs_sync.c,v 1.13 2003/06/08 09:31:19 wensong Exp $ 9 * 10 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org> 11 * 12 * ip_vs_sync: sync connection info from master load balancer to backups 13 * through multicast 14 * 15 * Changes: 16 * Alexandre Cassen : Added master & backup support at a time. 17 * Alexandre Cassen : Added SyncID support for incoming sync 18 * messages filtering. 19 * Justin Ossevoort : Fix endian problem on sync message size. 20 */ 21 22#include <linux/module.h> 23#include <linux/slab.h> 24#include <linux/inetdevice.h> 25#include <linux/net.h> 26#include <linux/completion.h> 27#include <linux/delay.h> 28#include <linux/skbuff.h> 29#include <linux/in.h> 30#include <linux/igmp.h> /* for ip_mc_join_group */ 31#include <linux/udp.h> 32 33#include <net/ip.h> 34#include <net/sock.h> 35#include <asm/uaccess.h> /* for get_fs and set_fs */ 36 37#include <net/ip_vs.h> 38 39#define IP_VS_SYNC_GROUP 0xe0000051 /* multicast addr - 224.0.0.81 */ 40#define IP_VS_SYNC_PORT 8848 /* multicast port */ 41 42 43/* 44 * IPVS sync connection entry 45 */ 46struct ip_vs_sync_conn { 47 __u8 reserved; 48 49 /* Protocol, addresses and port numbers */ 50 __u8 protocol; /* Which protocol (TCP/UDP) */ 51 __be16 cport; 52 __be16 vport; 53 __be16 dport; 54 __be32 caddr; /* client address */ 55 __be32 vaddr; /* virtual address */ 56 __be32 daddr; /* destination address */ 57 58 /* Flags and state transition */ 59 __be16 flags; /* status flags */ 60 __be16 state; /* state info */ 61 62 /* The sequence options start here */ 63}; 64 65struct ip_vs_sync_conn_options { 66 struct ip_vs_seq in_seq; /* incoming seq. struct */ 67 struct ip_vs_seq out_seq; /* outgoing seq. struct */ 68}; 69 70#define IP_VS_SYNC_CONN_TIMEOUT (3*60*HZ) 71#define SIMPLE_CONN_SIZE (sizeof(struct ip_vs_sync_conn)) 72#define FULL_CONN_SIZE \ 73(sizeof(struct ip_vs_sync_conn) + sizeof(struct ip_vs_sync_conn_options)) 74 75 76/* 77 The master mulitcasts messages to the backup load balancers in the 78 following format. 79 80 0 1 2 3 81 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 82 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 83 | Count Conns | SyncID | Size | 84 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 85 | | 86 | IPVS Sync Connection (1) | 87 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 88 | . | 89 | . | 90 | . | 91 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 92 | | 93 | IPVS Sync Connection (n) | 94 +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 95*/ 96 97#define SYNC_MESG_HEADER_LEN 4 98 99struct ip_vs_sync_mesg { 100 __u8 nr_conns; 101 __u8 syncid; 102 __u16 size; 103 104 /* ip_vs_sync_conn entries start here */ 105}; 106 107/* the maximum length of sync (sending/receiving) message */ 108static int sync_send_mesg_maxlen; 109static int sync_recv_mesg_maxlen; 110 111struct ip_vs_sync_buff { 112 struct list_head list; 113 unsigned long firstuse; 114 115 /* pointers for the message data */ 116 struct ip_vs_sync_mesg *mesg; 117 unsigned char *head; 118 unsigned char *end; 119}; 120 121 122/* the sync_buff list head and the lock */ 123static LIST_HEAD(ip_vs_sync_queue); 124static DEFINE_SPINLOCK(ip_vs_sync_lock); 125 126/* current sync_buff for accepting new conn entries */ 127static struct ip_vs_sync_buff *curr_sb = NULL; 128static DEFINE_SPINLOCK(curr_sb_lock); 129 130/* ipvs sync daemon state */ 131volatile int ip_vs_sync_state = IP_VS_STATE_NONE; 132volatile int ip_vs_master_syncid = 0; 133volatile int ip_vs_backup_syncid = 0; 134 135/* multicast interface name */ 136char ip_vs_master_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 137char ip_vs_backup_mcast_ifn[IP_VS_IFNAME_MAXLEN]; 138 139/* multicast addr */ 140static struct sockaddr_in mcast_addr; 141 142 143static inline void sb_queue_tail(struct ip_vs_sync_buff *sb) 144{ 145 spin_lock(&ip_vs_sync_lock); 146 list_add_tail(&sb->list, &ip_vs_sync_queue); 147 spin_unlock(&ip_vs_sync_lock); 148} 149 150static inline struct ip_vs_sync_buff * sb_dequeue(void) 151{ 152 struct ip_vs_sync_buff *sb; 153 154 spin_lock_bh(&ip_vs_sync_lock); 155 if (list_empty(&ip_vs_sync_queue)) { 156 sb = NULL; 157 } else { 158 sb = list_entry(ip_vs_sync_queue.next, 159 struct ip_vs_sync_buff, 160 list); 161 list_del(&sb->list); 162 } 163 spin_unlock_bh(&ip_vs_sync_lock); 164 165 return sb; 166} 167 168static inline struct ip_vs_sync_buff * ip_vs_sync_buff_create(void) 169{ 170 struct ip_vs_sync_buff *sb; 171 172 if (!(sb=kmalloc(sizeof(struct ip_vs_sync_buff), GFP_ATOMIC))) 173 return NULL; 174 175 if (!(sb->mesg=kmalloc(sync_send_mesg_maxlen, GFP_ATOMIC))) { 176 kfree(sb); 177 return NULL; 178 } 179 sb->mesg->nr_conns = 0; 180 sb->mesg->syncid = ip_vs_master_syncid; 181 sb->mesg->size = 4; 182 sb->head = (unsigned char *)sb->mesg + 4; 183 sb->end = (unsigned char *)sb->mesg + sync_send_mesg_maxlen; 184 sb->firstuse = jiffies; 185 return sb; 186} 187 188static inline void ip_vs_sync_buff_release(struct ip_vs_sync_buff *sb) 189{ 190 kfree(sb->mesg); 191 kfree(sb); 192} 193 194/* 195 * Get the current sync buffer if it has been created for more 196 * than the specified time or the specified time is zero. 197 */ 198static inline struct ip_vs_sync_buff * 199get_curr_sync_buff(unsigned long time) 200{ 201 struct ip_vs_sync_buff *sb; 202 203 spin_lock_bh(&curr_sb_lock); 204 if (curr_sb && (time == 0 || 205 time_before(jiffies - curr_sb->firstuse, time))) { 206 sb = curr_sb; 207 curr_sb = NULL; 208 } else 209 sb = NULL; 210 spin_unlock_bh(&curr_sb_lock); 211 return sb; 212} 213 214 215/* 216 * Add an ip_vs_conn information into the current sync_buff. 217 * Called by ip_vs_in. 218 */ 219void ip_vs_sync_conn(struct ip_vs_conn *cp) 220{ 221 struct ip_vs_sync_mesg *m; 222 struct ip_vs_sync_conn *s; 223 int len; 224 225 spin_lock(&curr_sb_lock); 226 if (!curr_sb) { 227 if (!(curr_sb=ip_vs_sync_buff_create())) { 228 spin_unlock(&curr_sb_lock); 229 IP_VS_ERR("ip_vs_sync_buff_create failed.\n"); 230 return; 231 } 232 } 233 234 len = (cp->flags & IP_VS_CONN_F_SEQ_MASK) ? FULL_CONN_SIZE : 235 SIMPLE_CONN_SIZE; 236 m = curr_sb->mesg; 237 s = (struct ip_vs_sync_conn *)curr_sb->head; 238 239 /* copy members */ 240 s->protocol = cp->protocol; 241 s->cport = cp->cport; 242 s->vport = cp->vport; 243 s->dport = cp->dport; 244 s->caddr = cp->caddr; 245 s->vaddr = cp->vaddr; 246 s->daddr = cp->daddr; 247 s->flags = htons(cp->flags & ~IP_VS_CONN_F_HASHED); 248 s->state = htons(cp->state); 249 if (cp->flags & IP_VS_CONN_F_SEQ_MASK) { 250 struct ip_vs_sync_conn_options *opt = 251 (struct ip_vs_sync_conn_options *)&s[1]; 252 memcpy(opt, &cp->in_seq, sizeof(*opt)); 253 } 254 255 m->nr_conns++; 256 m->size += len; 257 curr_sb->head += len; 258 259 /* check if there is a space for next one */ 260 if (curr_sb->head+FULL_CONN_SIZE > curr_sb->end) { 261 sb_queue_tail(curr_sb); 262 curr_sb = NULL; 263 } 264 spin_unlock(&curr_sb_lock); 265 266 /* synchronize its controller if it has */ 267 if (cp->control) 268 ip_vs_sync_conn(cp->control); 269} 270 271 272/* 273 * Process received multicast message and create the corresponding 274 * ip_vs_conn entries. 275 */ 276static void ip_vs_process_message(const char *buffer, const size_t buflen) 277{ 278 struct ip_vs_sync_mesg *m = (struct ip_vs_sync_mesg *)buffer; 279 struct ip_vs_sync_conn *s; 280 struct ip_vs_sync_conn_options *opt; 281 struct ip_vs_conn *cp; 282 char *p; 283 int i; 284 285 /* Convert size back to host byte order */ 286 m->size = ntohs(m->size); 287 288 if (buflen != m->size) { 289 IP_VS_ERR("bogus message\n"); 290 return; 291 } 292 293 /* SyncID sanity check */ 294 if (ip_vs_backup_syncid != 0 && m->syncid != ip_vs_backup_syncid) { 295 IP_VS_DBG(7, "Ignoring incoming msg with syncid = %d\n", 296 m->syncid); 297 return; 298 } 299 300 p = (char *)buffer + sizeof(struct ip_vs_sync_mesg); 301 for (i=0; i<m->nr_conns; i++) { 302 unsigned flags; 303 304 s = (struct ip_vs_sync_conn *)p; 305 flags = ntohs(s->flags); 306 if (!(flags & IP_VS_CONN_F_TEMPLATE)) 307 cp = ip_vs_conn_in_get(s->protocol, 308 s->caddr, s->cport, 309 s->vaddr, s->vport); 310 else 311 cp = ip_vs_ct_in_get(s->protocol, 312 s->caddr, s->cport, 313 s->vaddr, s->vport); 314 if (!cp) { 315 cp = ip_vs_conn_new(s->protocol, 316 s->caddr, s->cport, 317 s->vaddr, s->vport, 318 s->daddr, s->dport, 319 flags, NULL); 320 if (!cp) { 321 IP_VS_ERR("ip_vs_conn_new failed\n"); 322 return; 323 } 324 cp->state = ntohs(s->state); 325 } else if (!cp->dest) { 326 /* it is an entry created by the synchronization */ 327 cp->state = ntohs(s->state); 328 cp->flags = flags | IP_VS_CONN_F_HASHED; 329 } /* Note that we don't touch its state and flags 330 if it is a normal entry. */ 331 332 if (flags & IP_VS_CONN_F_SEQ_MASK) { 333 opt = (struct ip_vs_sync_conn_options *)&s[1]; 334 memcpy(&cp->in_seq, opt, sizeof(*opt)); 335 p += FULL_CONN_SIZE; 336 } else 337 p += SIMPLE_CONN_SIZE; 338 339 atomic_set(&cp->in_pkts, sysctl_ip_vs_sync_threshold[0]); 340 cp->timeout = IP_VS_SYNC_CONN_TIMEOUT; 341 ip_vs_conn_put(cp); 342 343 if (p > buffer+buflen) { 344 IP_VS_ERR("bogus message\n"); 345 return; 346 } 347 } 348} 349 350 351/* 352 * Setup loopback of outgoing multicasts on a sending socket 353 */ 354static void set_mcast_loop(struct sock *sk, u_char loop) 355{ 356 struct inet_sock *inet = inet_sk(sk); 357 358 /* setsockopt(sock, SOL_IP, IP_MULTICAST_LOOP, &loop, sizeof(loop)); */ 359 lock_sock(sk); 360 inet->mc_loop = loop ? 1 : 0; 361 release_sock(sk); 362} 363 364/* 365 * Specify TTL for outgoing multicasts on a sending socket 366 */ 367static void set_mcast_ttl(struct sock *sk, u_char ttl) 368{ 369 struct inet_sock *inet = inet_sk(sk); 370 371 /* setsockopt(sock, SOL_IP, IP_MULTICAST_TTL, &ttl, sizeof(ttl)); */ 372 lock_sock(sk); 373 inet->mc_ttl = ttl; 374 release_sock(sk); 375} 376 377/* 378 * Specifiy default interface for outgoing multicasts 379 */ 380static int set_mcast_if(struct sock *sk, char *ifname) 381{ 382 struct net_device *dev; 383 struct inet_sock *inet = inet_sk(sk); 384 385 if ((dev = __dev_get_by_name(ifname)) == NULL) 386 return -ENODEV; 387 388 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 389 return -EINVAL; 390 391 lock_sock(sk); 392 inet->mc_index = dev->ifindex; 393 /* inet->mc_addr = 0; */ 394 release_sock(sk); 395 396 return 0; 397} 398 399 400/* 401 * Set the maximum length of sync message according to the 402 * specified interface's MTU. 403 */ 404static int set_sync_mesg_maxlen(int sync_state) 405{ 406 struct net_device *dev; 407 int num; 408 409 if (sync_state == IP_VS_STATE_MASTER) { 410 if ((dev = __dev_get_by_name(ip_vs_master_mcast_ifn)) == NULL) 411 return -ENODEV; 412 413 num = (dev->mtu - sizeof(struct iphdr) - 414 sizeof(struct udphdr) - 415 SYNC_MESG_HEADER_LEN - 20) / SIMPLE_CONN_SIZE; 416 sync_send_mesg_maxlen = 417 SYNC_MESG_HEADER_LEN + SIMPLE_CONN_SIZE * num; 418 IP_VS_DBG(7, "setting the maximum length of sync sending " 419 "message %d.\n", sync_send_mesg_maxlen); 420 } else if (sync_state == IP_VS_STATE_BACKUP) { 421 if ((dev = __dev_get_by_name(ip_vs_backup_mcast_ifn)) == NULL) 422 return -ENODEV; 423 424 sync_recv_mesg_maxlen = dev->mtu - 425 sizeof(struct iphdr) - sizeof(struct udphdr); 426 IP_VS_DBG(7, "setting the maximum length of sync receiving " 427 "message %d.\n", sync_recv_mesg_maxlen); 428 } 429 430 return 0; 431} 432 433 434/* 435 * Join a multicast group. 436 * the group is specified by a class D multicast address 224.0.0.0/8 437 * in the in_addr structure passed in as a parameter. 438 */ 439static int 440join_mcast_group(struct sock *sk, struct in_addr *addr, char *ifname) 441{ 442 struct ip_mreqn mreq; 443 struct net_device *dev; 444 int ret; 445 446 memset(&mreq, 0, sizeof(mreq)); 447 memcpy(&mreq.imr_multiaddr, addr, sizeof(struct in_addr)); 448 449 if ((dev = __dev_get_by_name(ifname)) == NULL) 450 return -ENODEV; 451 if (sk->sk_bound_dev_if && dev->ifindex != sk->sk_bound_dev_if) 452 return -EINVAL; 453 454 mreq.imr_ifindex = dev->ifindex; 455 456 lock_sock(sk); 457 ret = ip_mc_join_group(sk, &mreq); 458 release_sock(sk); 459 460 return ret; 461} 462 463 464static int bind_mcastif_addr(struct socket *sock, char *ifname) 465{ 466 struct net_device *dev; 467 __be32 addr; 468 struct sockaddr_in sin; 469 470 if ((dev = __dev_get_by_name(ifname)) == NULL) 471 return -ENODEV; 472 473 addr = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE); 474 if (!addr) 475 IP_VS_ERR("You probably need to specify IP address on " 476 "multicast interface.\n"); 477 478 IP_VS_DBG(7, "binding socket with (%s) %u.%u.%u.%u\n", 479 ifname, NIPQUAD(addr)); 480 481 /* Now bind the socket with the address of multicast interface */ 482 sin.sin_family = AF_INET; 483 sin.sin_addr.s_addr = addr; 484 sin.sin_port = 0; 485 486 return sock->ops->bind(sock, (struct sockaddr*)&sin, sizeof(sin)); 487} 488 489/* 490 * Set up sending multicast socket over UDP 491 */ 492static struct socket * make_send_sock(void) 493{ 494 struct socket *sock; 495 496 /* First create a socket */ 497 if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { 498 IP_VS_ERR("Error during creation of socket; terminating\n"); 499 return NULL; 500 } 501 502 if (set_mcast_if(sock->sk, ip_vs_master_mcast_ifn) < 0) { 503 IP_VS_ERR("Error setting outbound mcast interface\n"); 504 goto error; 505 } 506 507 set_mcast_loop(sock->sk, 0); 508 set_mcast_ttl(sock->sk, 1); 509 510 if (bind_mcastif_addr(sock, ip_vs_master_mcast_ifn) < 0) { 511 IP_VS_ERR("Error binding address of the mcast interface\n"); 512 goto error; 513 } 514 515 if (sock->ops->connect(sock, 516 (struct sockaddr*)&mcast_addr, 517 sizeof(struct sockaddr), 0) < 0) { 518 IP_VS_ERR("Error connecting to the multicast addr\n"); 519 goto error; 520 } 521 522 return sock; 523 524 error: 525 sock_release(sock); 526 return NULL; 527} 528 529 530/* 531 * Set up receiving multicast socket over UDP 532 */ 533static struct socket * make_receive_sock(void) 534{ 535 struct socket *sock; 536 537 /* First create a socket */ 538 if (sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock) < 0) { 539 IP_VS_ERR("Error during creation of socket; terminating\n"); 540 return NULL; 541 } 542 543 /* it is equivalent to the REUSEADDR option in user-space */ 544 sock->sk->sk_reuse = 1; 545 546 if (sock->ops->bind(sock, 547 (struct sockaddr*)&mcast_addr, 548 sizeof(struct sockaddr)) < 0) { 549 IP_VS_ERR("Error binding to the multicast addr\n"); 550 goto error; 551 } 552 553 /* join the multicast group */ 554 if (join_mcast_group(sock->sk, 555 (struct in_addr*)&mcast_addr.sin_addr, 556 ip_vs_backup_mcast_ifn) < 0) { 557 IP_VS_ERR("Error joining to the multicast group\n"); 558 goto error; 559 } 560 561 return sock; 562 563 error: 564 sock_release(sock); 565 return NULL; 566} 567 568 569static int 570ip_vs_send_async(struct socket *sock, const char *buffer, const size_t length) 571{ 572 struct msghdr msg = {.msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL}; 573 struct kvec iov; 574 int len; 575 576 EnterFunction(7); 577 iov.iov_base = (void *)buffer; 578 iov.iov_len = length; 579 580 len = kernel_sendmsg(sock, &msg, &iov, 1, (size_t)(length)); 581 582 LeaveFunction(7); 583 return len; 584} 585 586static void 587ip_vs_send_sync_msg(struct socket *sock, struct ip_vs_sync_mesg *msg) 588{ 589 int msize; 590 591 msize = msg->size; 592 593 /* Put size in network byte order */ 594 msg->size = htons(msg->size); 595 596 if (ip_vs_send_async(sock, (char *)msg, msize) != msize) 597 IP_VS_ERR("ip_vs_send_async error\n"); 598} 599 600static int 601ip_vs_receive(struct socket *sock, char *buffer, const size_t buflen) 602{ 603 struct msghdr msg = {NULL,}; 604 struct kvec iov; 605 int len; 606 607 EnterFunction(7); 608 609 /* Receive a packet */ 610 iov.iov_base = buffer; 611 iov.iov_len = (size_t)buflen; 612 613 len = kernel_recvmsg(sock, &msg, &iov, 1, buflen, 0); 614 615 if (len < 0) 616 return -1; 617 618 LeaveFunction(7); 619 return len; 620} 621 622 623static DECLARE_WAIT_QUEUE_HEAD(sync_wait); 624static pid_t sync_master_pid = 0; 625static pid_t sync_backup_pid = 0; 626 627static DECLARE_WAIT_QUEUE_HEAD(stop_sync_wait); 628static int stop_master_sync = 0; 629static int stop_backup_sync = 0; 630 631static void sync_master_loop(void) 632{ 633 struct socket *sock; 634 struct ip_vs_sync_buff *sb; 635 636 /* create the sending multicast socket */ 637 sock = make_send_sock(); 638 if (!sock) 639 return; 640 641 IP_VS_INFO("sync thread started: state = MASTER, mcast_ifn = %s, " 642 "syncid = %d\n", 643 ip_vs_master_mcast_ifn, ip_vs_master_syncid); 644 645 for (;;) { 646 while ((sb=sb_dequeue())) { 647 ip_vs_send_sync_msg(sock, sb->mesg); 648 ip_vs_sync_buff_release(sb); 649 } 650 651 /* check if entries stay in curr_sb for 2 seconds */ 652 if ((sb = get_curr_sync_buff(2*HZ))) { 653 ip_vs_send_sync_msg(sock, sb->mesg); 654 ip_vs_sync_buff_release(sb); 655 } 656 657 if (stop_master_sync) 658 break; 659 660 ssleep(1); 661 } 662 663 /* clean up the sync_buff queue */ 664 while ((sb=sb_dequeue())) { 665 ip_vs_sync_buff_release(sb); 666 } 667 668 /* clean up the current sync_buff */ 669 if ((sb = get_curr_sync_buff(0))) { 670 ip_vs_sync_buff_release(sb); 671 } 672 673 /* release the sending multicast socket */ 674 sock_release(sock); 675} 676 677 678static void sync_backup_loop(void) 679{ 680 struct socket *sock; 681 char *buf; 682 int len; 683 684 if (!(buf = kmalloc(sync_recv_mesg_maxlen, GFP_ATOMIC))) { 685 IP_VS_ERR("sync_backup_loop: kmalloc error\n"); 686 return; 687 } 688 689 /* create the receiving multicast socket */ 690 sock = make_receive_sock(); 691 if (!sock) 692 goto out; 693 694 IP_VS_INFO("sync thread started: state = BACKUP, mcast_ifn = %s, " 695 "syncid = %d\n", 696 ip_vs_backup_mcast_ifn, ip_vs_backup_syncid); 697 698 for (;;) { 699 /* do you have data now? */ 700 while (!skb_queue_empty(&(sock->sk->sk_receive_queue))) { 701 if ((len = 702 ip_vs_receive(sock, buf, 703 sync_recv_mesg_maxlen)) <= 0) { 704 IP_VS_ERR("receiving message error\n"); 705 break; 706 } 707 /* disable bottom half, because it accessed the data 708 shared by softirq while getting/creating conns */ 709 local_bh_disable(); 710 ip_vs_process_message(buf, len); 711 local_bh_enable(); 712 } 713 714 if (stop_backup_sync) 715 break; 716 717 ssleep(1); 718 } 719 720 /* release the sending multicast socket */ 721 sock_release(sock); 722 723 out: 724 kfree(buf); 725} 726 727 728static void set_sync_pid(int sync_state, pid_t sync_pid) 729{ 730 if (sync_state == IP_VS_STATE_MASTER) 731 sync_master_pid = sync_pid; 732 else if (sync_state == IP_VS_STATE_BACKUP) 733 sync_backup_pid = sync_pid; 734} 735 736static void set_stop_sync(int sync_state, int set) 737{ 738 if (sync_state == IP_VS_STATE_MASTER) 739 stop_master_sync = set; 740 else if (sync_state == IP_VS_STATE_BACKUP) 741 stop_backup_sync = set; 742 else { 743 stop_master_sync = set; 744 stop_backup_sync = set; 745 } 746} 747 748static int sync_thread(void *startup) 749{ 750 DECLARE_WAITQUEUE(wait, current); 751 mm_segment_t oldmm; 752 int state; 753 const char *name; 754 755 /* increase the module use count */ 756 ip_vs_use_count_inc(); 757 758 if (ip_vs_sync_state & IP_VS_STATE_MASTER && !sync_master_pid) { 759 state = IP_VS_STATE_MASTER; 760 name = "ipvs_syncmaster"; 761 } else if (ip_vs_sync_state & IP_VS_STATE_BACKUP && !sync_backup_pid) { 762 state = IP_VS_STATE_BACKUP; 763 name = "ipvs_syncbackup"; 764 } else { 765 IP_VS_BUG(); 766 ip_vs_use_count_dec(); 767 return -EINVAL; 768 } 769 770 daemonize(name); 771 772 oldmm = get_fs(); 773 set_fs(KERNEL_DS); 774 775 /* Block all signals */ 776 spin_lock_irq(&current->sighand->siglock); 777 siginitsetinv(&current->blocked, 0); 778 recalc_sigpending(); 779 spin_unlock_irq(&current->sighand->siglock); 780 781 /* set the maximum length of sync message */ 782 set_sync_mesg_maxlen(state); 783 784 /* set up multicast address */ 785 mcast_addr.sin_family = AF_INET; 786 mcast_addr.sin_port = htons(IP_VS_SYNC_PORT); 787 mcast_addr.sin_addr.s_addr = htonl(IP_VS_SYNC_GROUP); 788 789 add_wait_queue(&sync_wait, &wait); 790 791 set_sync_pid(state, current->pid); 792 complete((struct completion *)startup); 793 794 /* processing master/backup loop here */ 795 if (state == IP_VS_STATE_MASTER) 796 sync_master_loop(); 797 else if (state == IP_VS_STATE_BACKUP) 798 sync_backup_loop(); 799 else IP_VS_BUG(); 800 801 remove_wait_queue(&sync_wait, &wait); 802 803 /* thread exits */ 804 set_sync_pid(state, 0); 805 IP_VS_INFO("sync thread stopped!\n"); 806 807 set_fs(oldmm); 808 809 /* decrease the module use count */ 810 ip_vs_use_count_dec(); 811 812 set_stop_sync(state, 0); 813 wake_up(&stop_sync_wait); 814 815 return 0; 816} 817 818 819static int fork_sync_thread(void *startup) 820{ 821 pid_t pid; 822 823 /* fork the sync thread here, then the parent process of the 824 sync thread is the init process after this thread exits. */ 825 repeat: 826 if ((pid = kernel_thread(sync_thread, startup, 0)) < 0) { 827 IP_VS_ERR("could not create sync_thread due to %d... " 828 "retrying.\n", pid); 829 ssleep(1); 830 goto repeat; 831 } 832 833 return 0; 834} 835 836 837int start_sync_thread(int state, char *mcast_ifn, __u8 syncid) 838{ 839 DECLARE_COMPLETION_ONSTACK(startup); 840 pid_t pid; 841 842 if ((state == IP_VS_STATE_MASTER && sync_master_pid) || 843 (state == IP_VS_STATE_BACKUP && sync_backup_pid)) 844 return -EEXIST; 845 846 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); 847 IP_VS_DBG(7, "Each ip_vs_sync_conn entry need %Zd bytes\n", 848 sizeof(struct ip_vs_sync_conn)); 849 850 ip_vs_sync_state |= state; 851 if (state == IP_VS_STATE_MASTER) { 852 strlcpy(ip_vs_master_mcast_ifn, mcast_ifn, sizeof(ip_vs_master_mcast_ifn)); 853 ip_vs_master_syncid = syncid; 854 } else { 855 strlcpy(ip_vs_backup_mcast_ifn, mcast_ifn, sizeof(ip_vs_backup_mcast_ifn)); 856 ip_vs_backup_syncid = syncid; 857 } 858 859 repeat: 860 if ((pid = kernel_thread(fork_sync_thread, &startup, 0)) < 0) { 861 IP_VS_ERR("could not create fork_sync_thread due to %d... " 862 "retrying.\n", pid); 863 ssleep(1); 864 goto repeat; 865 } 866 867 wait_for_completion(&startup); 868 869 return 0; 870} 871 872 873int stop_sync_thread(int state) 874{ 875 DECLARE_WAITQUEUE(wait, current); 876 877 if ((state == IP_VS_STATE_MASTER && !sync_master_pid) || 878 (state == IP_VS_STATE_BACKUP && !sync_backup_pid)) 879 return -ESRCH; 880 881 IP_VS_DBG(7, "%s: pid %d\n", __FUNCTION__, current->pid); 882 IP_VS_INFO("stopping sync thread %d ...\n", 883 (state == IP_VS_STATE_MASTER) ? sync_master_pid : sync_backup_pid); 884 885 __set_current_state(TASK_UNINTERRUPTIBLE); 886 add_wait_queue(&stop_sync_wait, &wait); 887 set_stop_sync(state, 1); 888 ip_vs_sync_state -= state; 889 wake_up(&sync_wait); 890 schedule(); 891 __set_current_state(TASK_RUNNING); 892 remove_wait_queue(&stop_sync_wait, &wait); 893 894 /* Note: no need to reap the sync thread, because its parent 895 process is the init process */ 896 897 if ((state == IP_VS_STATE_MASTER && stop_master_sync) || 898 (state == IP_VS_STATE_BACKUP && stop_backup_sync)) 899 IP_VS_BUG(); 900 901 return 0; 902}