Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'dlm-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
"This set mainly includes a change to the way the dlm uses the SCTP API
in the kernel, removing the direct dependency on the sctp module.
Other odd SCTP-related fixes are also included.

The other notable fix is for a long standing regression in the
behavior of lock value blocks for user space locks"

* tag 'dlm-4.3' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm:
dlm: print error from kernel_sendpage
dlm: fix lvb copy for user locks
dlm: sctp_accept_from_sock() can be static
dlm: fix reconnecting but not sending data
dlm: replace BUG_ON with a less severe handling
dlm: use sctp 1-to-1 API
dlm: fix not reconnecting on connecting error handling
dlm: fix race while closing connections
dlm: fix connection stealing if using SCTP

+309 -451
+304 -447
fs/dlm/lowcomms.c
··· 120 120 struct cbuf cb; 121 121 int retries; 122 122 #define MAX_CONNECT_RETRIES 3 123 - int sctp_assoc; 124 123 struct hlist_node list; 125 124 struct connection *othercon; 126 125 struct work_struct rwork; /* Receive workqueue */ 127 126 struct work_struct swork; /* Send workqueue */ 128 - bool try_new_addr; 127 + void (*orig_error_report)(struct sock *sk); 129 128 }; 130 129 #define sock2con(x) ((struct connection *)(x)->sk_user_data) 131 130 ··· 251 252 return con; 252 253 } 253 254 254 - /* This is a bit drastic, but only called when things go wrong */ 255 - static struct connection *assoc2con(int assoc_id) 256 - { 257 - int i; 258 - struct connection *con; 259 - 260 - mutex_lock(&connections_lock); 261 - 262 - for (i = 0 ; i < CONN_HASH_SIZE; i++) { 263 - hlist_for_each_entry(con, &connection_hash[i], list) { 264 - if (con->sctp_assoc == assoc_id) { 265 - mutex_unlock(&connections_lock); 266 - return con; 267 - } 268 - } 269 - } 270 - mutex_unlock(&connections_lock); 271 - return NULL; 272 - } 273 - 274 255 static struct dlm_node_addr *find_node_addr(int nodeid) 275 256 { 276 257 struct dlm_node_addr *na; ··· 301 322 spin_lock(&dlm_node_addrs_spin); 302 323 na = find_node_addr(nodeid); 303 324 if (na && na->addr_count) { 325 + memcpy(&sas, na->addr[na->curr_addr_index], 326 + sizeof(struct sockaddr_storage)); 327 + 304 328 if (try_new_addr) { 305 329 na->curr_addr_index++; 306 330 if (na->curr_addr_index == na->addr_count) 307 331 na->curr_addr_index = 0; 308 332 } 309 - 310 - memcpy(&sas, na->addr[na->curr_addr_index ], 311 - sizeof(struct sockaddr_storage)); 312 333 } 313 334 spin_unlock(&dlm_node_addrs_spin); 314 335 ··· 438 459 439 460 static void lowcomms_state_change(struct sock *sk) 440 461 { 441 - if (sk->sk_state == TCP_ESTABLISHED) 462 + /* SCTP layer is not calling sk_data_ready when the connection 463 + * is done, so we catch the signal through here. Also, it 464 + * doesn't switch socket state when entering shutdown, so we 465 + * skip the write in that case. 466 + */ 467 + if (sk->sk_shutdown) { 468 + if (sk->sk_shutdown == RCV_SHUTDOWN) 469 + lowcomms_data_ready(sk); 470 + } else if (sk->sk_state == TCP_ESTABLISHED) { 442 471 lowcomms_write_space(sk); 472 + } 443 473 } 444 474 445 475 int dlm_lowcomms_connect_node(int nodeid) 446 476 { 447 477 struct connection *con; 448 - 449 - /* with sctp there's no connecting without sending */ 450 - if (dlm_config.ci_protocol != 0) 451 - return 0; 452 478 453 479 if (nodeid == dlm_our_nodeid()) 454 480 return 0; ··· 463 479 return -ENOMEM; 464 480 lowcomms_connect_sock(con); 465 481 return 0; 482 + } 483 + 484 + static void lowcomms_error_report(struct sock *sk) 485 + { 486 + struct connection *con = sock2con(sk); 487 + struct sockaddr_storage saddr; 488 + 489 + if (nodeid_to_addr(con->nodeid, &saddr, NULL, false)) { 490 + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 491 + "sending to node %d, port %d, " 492 + "sk_err=%d/%d\n", dlm_our_nodeid(), 493 + con->nodeid, dlm_config.ci_tcp_port, 494 + sk->sk_err, sk->sk_err_soft); 495 + return; 496 + } else if (saddr.ss_family == AF_INET) { 497 + struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; 498 + 499 + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 500 + "sending to node %d at %pI4, port %d, " 501 + "sk_err=%d/%d\n", dlm_our_nodeid(), 502 + con->nodeid, &sin4->sin_addr.s_addr, 503 + dlm_config.ci_tcp_port, sk->sk_err, 504 + sk->sk_err_soft); 505 + } else { 506 + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; 507 + 508 + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 509 + "sending to node %d at %u.%u.%u.%u, " 510 + "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), 511 + con->nodeid, sin6->sin6_addr.s6_addr32[0], 512 + sin6->sin6_addr.s6_addr32[1], 513 + sin6->sin6_addr.s6_addr32[2], 514 + sin6->sin6_addr.s6_addr32[3], 515 + dlm_config.ci_tcp_port, sk->sk_err, 516 + sk->sk_err_soft); 517 + } 518 + con->orig_error_report(sk); 466 519 } 467 520 468 521 /* Make a socket active */ ··· 513 492 con->sock->sk->sk_state_change = lowcomms_state_change; 514 493 con->sock->sk->sk_user_data = con; 515 494 con->sock->sk->sk_allocation = GFP_NOFS; 495 + con->orig_error_report = con->sock->sk->sk_error_report; 496 + con->sock->sk->sk_error_report = lowcomms_error_report; 516 497 } 517 498 518 499 /* Add the port number to an IPv6 or 4 sockaddr and return the address ··· 537 514 } 538 515 539 516 /* Close a remote connection and tidy up */ 540 - static void close_connection(struct connection *con, bool and_other) 517 + static void close_connection(struct connection *con, bool and_other, 518 + bool tx, bool rx) 541 519 { 542 - mutex_lock(&con->sock_mutex); 520 + clear_bit(CF_CONNECT_PENDING, &con->flags); 521 + clear_bit(CF_WRITE_PENDING, &con->flags); 522 + if (tx && cancel_work_sync(&con->swork)) 523 + log_print("canceled swork for node %d", con->nodeid); 524 + if (rx && cancel_work_sync(&con->rwork)) 525 + log_print("canceled rwork for node %d", con->nodeid); 543 526 527 + mutex_lock(&con->sock_mutex); 544 528 if (con->sock) { 545 529 sock_release(con->sock); 546 530 con->sock = NULL; 547 531 } 548 532 if (con->othercon && and_other) { 549 533 /* Will only re-enter once. */ 550 - close_connection(con->othercon, false); 534 + close_connection(con->othercon, false, true, true); 551 535 } 552 536 if (con->rx_page) { 553 537 __free_page(con->rx_page); ··· 563 533 564 534 con->retries = 0; 565 535 mutex_unlock(&con->sock_mutex); 566 - } 567 - 568 - /* We only send shutdown messages to nodes that are not part of the cluster */ 569 - static void sctp_send_shutdown(sctp_assoc_t associd) 570 - { 571 - static char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; 572 - struct msghdr outmessage; 573 - struct cmsghdr *cmsg; 574 - struct sctp_sndrcvinfo *sinfo; 575 - int ret; 576 - struct connection *con; 577 - 578 - con = nodeid2con(0,0); 579 - BUG_ON(con == NULL); 580 - 581 - outmessage.msg_name = NULL; 582 - outmessage.msg_namelen = 0; 583 - outmessage.msg_control = outcmsg; 584 - outmessage.msg_controllen = sizeof(outcmsg); 585 - outmessage.msg_flags = MSG_EOR; 586 - 587 - cmsg = CMSG_FIRSTHDR(&outmessage); 588 - cmsg->cmsg_level = IPPROTO_SCTP; 589 - cmsg->cmsg_type = SCTP_SNDRCV; 590 - cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); 591 - outmessage.msg_controllen = cmsg->cmsg_len; 592 - sinfo = CMSG_DATA(cmsg); 593 - memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); 594 - 595 - sinfo->sinfo_flags |= MSG_EOF; 596 - sinfo->sinfo_assoc_id = associd; 597 - 598 - ret = kernel_sendmsg(con->sock, &outmessage, NULL, 0, 0); 599 - 600 - if (ret != 0) 601 - log_print("send EOF to node failed: %d", ret); 602 - } 603 - 604 - static void sctp_init_failed_foreach(struct connection *con) 605 - { 606 - 607 - /* 608 - * Don't try to recover base con and handle race where the 609 - * other node's assoc init creates a assoc and we get that 610 - * notification, then we get a notification that our attempt 611 - * failed due. This happens when we are still trying the primary 612 - * address, but the other node has already tried secondary addrs 613 - * and found one that worked. 614 - */ 615 - if (!con->nodeid || con->sctp_assoc) 616 - return; 617 - 618 - log_print("Retrying SCTP association init for node %d\n", con->nodeid); 619 - 620 - con->try_new_addr = true; 621 - con->sctp_assoc = 0; 622 - if (test_and_clear_bit(CF_INIT_PENDING, &con->flags)) { 623 - if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 624 - queue_work(send_workqueue, &con->swork); 625 - } 626 - } 627 - 628 - /* INIT failed but we don't know which node... 629 - restart INIT on all pending nodes */ 630 - static void sctp_init_failed(void) 631 - { 632 - mutex_lock(&connections_lock); 633 - 634 - foreach_conn(sctp_init_failed_foreach); 635 - 636 - mutex_unlock(&connections_lock); 637 - } 638 - 639 - static void retry_failed_sctp_send(struct connection *recv_con, 640 - struct sctp_send_failed *sn_send_failed, 641 - char *buf) 642 - { 643 - int len = sn_send_failed->ssf_length - sizeof(struct sctp_send_failed); 644 - struct dlm_mhandle *mh; 645 - struct connection *con; 646 - char *retry_buf; 647 - int nodeid = sn_send_failed->ssf_info.sinfo_ppid; 648 - 649 - log_print("Retry sending %d bytes to node id %d", len, nodeid); 650 - 651 - if (!nodeid) { 652 - log_print("Shouldn't resend data via listening connection."); 653 - return; 654 - } 655 - 656 - con = nodeid2con(nodeid, 0); 657 - if (!con) { 658 - log_print("Could not look up con for nodeid %d\n", 659 - nodeid); 660 - return; 661 - } 662 - 663 - mh = dlm_lowcomms_get_buffer(nodeid, len, GFP_NOFS, &retry_buf); 664 - if (!mh) { 665 - log_print("Could not allocate buf for retry."); 666 - return; 667 - } 668 - memcpy(retry_buf, buf + sizeof(struct sctp_send_failed), len); 669 - dlm_lowcomms_commit_buffer(mh); 670 - 671 - /* 672 - * If we got a assoc changed event before the send failed event then 673 - * we only need to retry the send. 674 - */ 675 - if (con->sctp_assoc) { 676 - if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) 677 - queue_work(send_workqueue, &con->swork); 678 - } else 679 - sctp_init_failed_foreach(con); 680 - } 681 - 682 - /* Something happened to an association */ 683 - static void process_sctp_notification(struct connection *con, 684 - struct msghdr *msg, char *buf) 685 - { 686 - union sctp_notification *sn = (union sctp_notification *)buf; 687 - struct linger linger; 688 - 689 - switch (sn->sn_header.sn_type) { 690 - case SCTP_SEND_FAILED: 691 - retry_failed_sctp_send(con, &sn->sn_send_failed, buf); 692 - break; 693 - case SCTP_ASSOC_CHANGE: 694 - switch (sn->sn_assoc_change.sac_state) { 695 - case SCTP_COMM_UP: 696 - case SCTP_RESTART: 697 - { 698 - /* Check that the new node is in the lockspace */ 699 - struct sctp_prim prim; 700 - int nodeid; 701 - int prim_len, ret; 702 - int addr_len; 703 - struct connection *new_con; 704 - 705 - /* 706 - * We get this before any data for an association. 707 - * We verify that the node is in the cluster and 708 - * then peel off a socket for it. 709 - */ 710 - if ((int)sn->sn_assoc_change.sac_assoc_id <= 0) { 711 - log_print("COMM_UP for invalid assoc ID %d", 712 - (int)sn->sn_assoc_change.sac_assoc_id); 713 - sctp_init_failed(); 714 - return; 715 - } 716 - memset(&prim, 0, sizeof(struct sctp_prim)); 717 - prim_len = sizeof(struct sctp_prim); 718 - prim.ssp_assoc_id = sn->sn_assoc_change.sac_assoc_id; 719 - 720 - ret = kernel_getsockopt(con->sock, 721 - IPPROTO_SCTP, 722 - SCTP_PRIMARY_ADDR, 723 - (char*)&prim, 724 - &prim_len); 725 - if (ret < 0) { 726 - log_print("getsockopt/sctp_primary_addr on " 727 - "new assoc %d failed : %d", 728 - (int)sn->sn_assoc_change.sac_assoc_id, 729 - ret); 730 - 731 - /* Retry INIT later */ 732 - new_con = assoc2con(sn->sn_assoc_change.sac_assoc_id); 733 - if (new_con) 734 - clear_bit(CF_CONNECT_PENDING, &con->flags); 735 - return; 736 - } 737 - make_sockaddr(&prim.ssp_addr, 0, &addr_len); 738 - if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 739 - unsigned char *b=(unsigned char *)&prim.ssp_addr; 740 - log_print("reject connect from unknown addr"); 741 - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 742 - b, sizeof(struct sockaddr_storage)); 743 - sctp_send_shutdown(prim.ssp_assoc_id); 744 - return; 745 - } 746 - 747 - new_con = nodeid2con(nodeid, GFP_NOFS); 748 - if (!new_con) 749 - return; 750 - 751 - /* Peel off a new sock */ 752 - lock_sock(con->sock->sk); 753 - ret = sctp_do_peeloff(con->sock->sk, 754 - sn->sn_assoc_change.sac_assoc_id, 755 - &new_con->sock); 756 - release_sock(con->sock->sk); 757 - if (ret < 0) { 758 - log_print("Can't peel off a socket for " 759 - "connection %d to node %d: err=%d", 760 - (int)sn->sn_assoc_change.sac_assoc_id, 761 - nodeid, ret); 762 - return; 763 - } 764 - add_sock(new_con->sock, new_con); 765 - 766 - linger.l_onoff = 1; 767 - linger.l_linger = 0; 768 - ret = kernel_setsockopt(new_con->sock, SOL_SOCKET, SO_LINGER, 769 - (char *)&linger, sizeof(linger)); 770 - if (ret < 0) 771 - log_print("set socket option SO_LINGER failed"); 772 - 773 - log_print("connecting to %d sctp association %d", 774 - nodeid, (int)sn->sn_assoc_change.sac_assoc_id); 775 - 776 - new_con->sctp_assoc = sn->sn_assoc_change.sac_assoc_id; 777 - new_con->try_new_addr = false; 778 - /* Send any pending writes */ 779 - clear_bit(CF_CONNECT_PENDING, &new_con->flags); 780 - clear_bit(CF_INIT_PENDING, &new_con->flags); 781 - if (!test_and_set_bit(CF_WRITE_PENDING, &new_con->flags)) { 782 - queue_work(send_workqueue, &new_con->swork); 783 - } 784 - if (!test_and_set_bit(CF_READ_PENDING, &new_con->flags)) 785 - queue_work(recv_workqueue, &new_con->rwork); 786 - } 787 - break; 788 - 789 - case SCTP_COMM_LOST: 790 - case SCTP_SHUTDOWN_COMP: 791 - { 792 - con = assoc2con(sn->sn_assoc_change.sac_assoc_id); 793 - if (con) { 794 - con->sctp_assoc = 0; 795 - } 796 - } 797 - break; 798 - 799 - case SCTP_CANT_STR_ASSOC: 800 - { 801 - /* Will retry init when we get the send failed notification */ 802 - log_print("Can't start SCTP association - retrying"); 803 - } 804 - break; 805 - 806 - default: 807 - log_print("unexpected SCTP assoc change id=%d state=%d", 808 - (int)sn->sn_assoc_change.sac_assoc_id, 809 - sn->sn_assoc_change.sac_state); 810 - } 811 - default: 812 - ; /* fall through */ 813 - } 814 536 } 815 537 816 538 /* Data received from remote end */ ··· 575 793 int r; 576 794 int call_again_soon = 0; 577 795 int nvec; 578 - char incmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; 579 796 580 797 mutex_lock(&con->sock_mutex); 581 798 582 799 if (con->sock == NULL) { 583 800 ret = -EAGAIN; 801 + goto out_close; 802 + } 803 + if (con->nodeid == 0) { 804 + ret = -EINVAL; 584 805 goto out_close; 585 806 } 586 807 ··· 597 812 goto out_resched; 598 813 cbuf_init(&con->cb, PAGE_CACHE_SIZE); 599 814 } 600 - 601 - /* Only SCTP needs these really */ 602 - memset(&incmsg, 0, sizeof(incmsg)); 603 - msg.msg_control = incmsg; 604 - msg.msg_controllen = sizeof(incmsg); 605 815 606 816 /* 607 817 * iov[0] is the bit of the circular buffer between the current end ··· 623 843 MSG_DONTWAIT | MSG_NOSIGNAL); 624 844 if (ret <= 0) 625 845 goto out_close; 626 - 627 - /* Process SCTP notifications */ 628 - if (msg.msg_flags & MSG_NOTIFICATION) { 629 - msg.msg_control = incmsg; 630 - msg.msg_controllen = sizeof(incmsg); 631 - 632 - process_sctp_notification(con, &msg, 633 - page_address(con->rx_page) + con->cb.base); 634 - mutex_unlock(&con->sock_mutex); 635 - return 0; 636 - } 637 - BUG_ON(con->nodeid == 0); 638 - 639 - if (ret == len) 846 + else if (ret == len) 640 847 call_again_soon = 1; 848 + 641 849 cbuf_add(&con->cb, ret); 642 850 ret = dlm_process_incoming_buffer(con->nodeid, 643 851 page_address(con->rx_page), 644 852 con->cb.base, con->cb.len, 645 853 PAGE_CACHE_SIZE); 646 854 if (ret == -EBADMSG) { 647 - log_print("lowcomms: addr=%p, base=%u, len=%u, " 648 - "iov_len=%u, iov_base[0]=%p, read=%d", 649 - page_address(con->rx_page), con->cb.base, con->cb.len, 650 - len, iov[0].iov_base, r); 855 + log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d", 856 + page_address(con->rx_page), con->cb.base, 857 + con->cb.len, r); 651 858 } 652 859 if (ret < 0) 653 860 goto out_close; ··· 659 892 out_close: 660 893 mutex_unlock(&con->sock_mutex); 661 894 if (ret != -EAGAIN) { 662 - close_connection(con, false); 895 + close_connection(con, false, true, false); 663 896 /* Reconnect when there is something to send */ 664 897 } 665 898 /* Don't return success if we really got EOF */ ··· 800 1033 return result; 801 1034 } 802 1035 1036 + static int sctp_accept_from_sock(struct connection *con) 1037 + { 1038 + /* Check that the new node is in the lockspace */ 1039 + struct sctp_prim prim; 1040 + int nodeid; 1041 + int prim_len, ret; 1042 + int addr_len; 1043 + struct connection *newcon; 1044 + struct connection *addcon; 1045 + struct socket *newsock; 1046 + 1047 + mutex_lock(&connections_lock); 1048 + if (!dlm_allow_conn) { 1049 + mutex_unlock(&connections_lock); 1050 + return -1; 1051 + } 1052 + mutex_unlock(&connections_lock); 1053 + 1054 + mutex_lock_nested(&con->sock_mutex, 0); 1055 + 1056 + ret = kernel_accept(con->sock, &newsock, O_NONBLOCK); 1057 + if (ret < 0) 1058 + goto accept_err; 1059 + 1060 + memset(&prim, 0, sizeof(struct sctp_prim)); 1061 + prim_len = sizeof(struct sctp_prim); 1062 + 1063 + ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR, 1064 + (char *)&prim, &prim_len); 1065 + if (ret < 0) { 1066 + log_print("getsockopt/sctp_primary_addr failed: %d", ret); 1067 + goto accept_err; 1068 + } 1069 + 1070 + make_sockaddr(&prim.ssp_addr, 0, &addr_len); 1071 + if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 1072 + unsigned char *b = (unsigned char *)&prim.ssp_addr; 1073 + 1074 + log_print("reject connect from unknown addr"); 1075 + print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 1076 + b, sizeof(struct sockaddr_storage)); 1077 + goto accept_err; 1078 + } 1079 + 1080 + newcon = nodeid2con(nodeid, GFP_NOFS); 1081 + if (!newcon) { 1082 + ret = -ENOMEM; 1083 + goto accept_err; 1084 + } 1085 + 1086 + mutex_lock_nested(&newcon->sock_mutex, 1); 1087 + 1088 + if (newcon->sock) { 1089 + struct connection *othercon = newcon->othercon; 1090 + 1091 + if (!othercon) { 1092 + othercon = kmem_cache_zalloc(con_cache, GFP_NOFS); 1093 + if (!othercon) { 1094 + log_print("failed to allocate incoming socket"); 1095 + mutex_unlock(&newcon->sock_mutex); 1096 + ret = -ENOMEM; 1097 + goto accept_err; 1098 + } 1099 + othercon->nodeid = nodeid; 1100 + othercon->rx_action = receive_from_sock; 1101 + mutex_init(&othercon->sock_mutex); 1102 + INIT_WORK(&othercon->swork, process_send_sockets); 1103 + INIT_WORK(&othercon->rwork, process_recv_sockets); 1104 + set_bit(CF_IS_OTHERCON, &othercon->flags); 1105 + } 1106 + if (!othercon->sock) { 1107 + newcon->othercon = othercon; 1108 + othercon->sock = newsock; 1109 + newsock->sk->sk_user_data = othercon; 1110 + add_sock(newsock, othercon); 1111 + addcon = othercon; 1112 + } else { 1113 + printk("Extra connection from node %d attempted\n", nodeid); 1114 + ret = -EAGAIN; 1115 + mutex_unlock(&newcon->sock_mutex); 1116 + goto accept_err; 1117 + } 1118 + } else { 1119 + newsock->sk->sk_user_data = newcon; 1120 + newcon->rx_action = receive_from_sock; 1121 + add_sock(newsock, newcon); 1122 + addcon = newcon; 1123 + } 1124 + 1125 + log_print("connected to %d", nodeid); 1126 + 1127 + mutex_unlock(&newcon->sock_mutex); 1128 + 1129 + /* 1130 + * Add it to the active queue in case we got data 1131 + * between processing the accept adding the socket 1132 + * to the read_sockets list 1133 + */ 1134 + if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags)) 1135 + queue_work(recv_workqueue, &addcon->rwork); 1136 + mutex_unlock(&con->sock_mutex); 1137 + 1138 + return 0; 1139 + 1140 + accept_err: 1141 + mutex_unlock(&con->sock_mutex); 1142 + if (newsock) 1143 + sock_release(newsock); 1144 + if (ret != -EAGAIN) 1145 + log_print("error accepting connection from node: %d", ret); 1146 + 1147 + return ret; 1148 + } 1149 + 803 1150 static void free_entry(struct writequeue_entry *e) 804 1151 { 805 1152 __free_page(e->page); ··· 938 1057 } 939 1058 } 940 1059 1060 + /* 1061 + * sctp_bind_addrs - bind a SCTP socket to all our addresses 1062 + */ 1063 + static int sctp_bind_addrs(struct connection *con, uint16_t port) 1064 + { 1065 + struct sockaddr_storage localaddr; 1066 + int i, addr_len, result = 0; 1067 + 1068 + for (i = 0; i < dlm_local_count; i++) { 1069 + memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 1070 + make_sockaddr(&localaddr, port, &addr_len); 1071 + 1072 + if (!i) 1073 + result = kernel_bind(con->sock, 1074 + (struct sockaddr *)&localaddr, 1075 + addr_len); 1076 + else 1077 + result = kernel_setsockopt(con->sock, SOL_SCTP, 1078 + SCTP_SOCKOPT_BINDX_ADD, 1079 + (char *)&localaddr, addr_len); 1080 + 1081 + if (result < 0) { 1082 + log_print("Can't bind to %d addr number %d, %d.\n", 1083 + port, i + 1, result); 1084 + break; 1085 + } 1086 + } 1087 + return result; 1088 + } 1089 + 941 1090 /* Initiate an SCTP association. 942 1091 This is a special case of send_to_sock() in that we don't yet have a 943 1092 peeled-off socket for this association, so we use the listening socket 944 1093 and add the primary IP address of the remote node. 945 1094 */ 946 - static void sctp_init_assoc(struct connection *con) 1095 + static void sctp_connect_to_sock(struct connection *con) 947 1096 { 948 - struct sockaddr_storage rem_addr; 949 - char outcmsg[CMSG_SPACE(sizeof(struct sctp_sndrcvinfo))]; 950 - struct msghdr outmessage; 951 - struct cmsghdr *cmsg; 952 - struct sctp_sndrcvinfo *sinfo; 953 - struct connection *base_con; 954 - struct writequeue_entry *e; 955 - int len, offset; 956 - int ret; 957 - int addrlen; 958 - struct kvec iov[1]; 1097 + struct sockaddr_storage daddr; 1098 + int one = 1; 1099 + int result; 1100 + int addr_len; 1101 + struct socket *sock; 1102 + 1103 + if (con->nodeid == 0) { 1104 + log_print("attempt to connect sock 0 foiled"); 1105 + return; 1106 + } 959 1107 960 1108 mutex_lock(&con->sock_mutex); 961 - if (test_and_set_bit(CF_INIT_PENDING, &con->flags)) 962 - goto unlock; 963 1109 964 - if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr, 965 - con->try_new_addr)) { 1110 + /* Some odd races can cause double-connects, ignore them */ 1111 + if (con->retries++ > MAX_CONNECT_RETRIES) 1112 + goto out; 1113 + 1114 + if (con->sock) { 1115 + log_print("node %d already connected.", con->nodeid); 1116 + goto out; 1117 + } 1118 + 1119 + memset(&daddr, 0, sizeof(daddr)); 1120 + result = nodeid_to_addr(con->nodeid, &daddr, NULL, true); 1121 + if (result < 0) { 966 1122 log_print("no address for nodeid %d", con->nodeid); 967 - goto unlock; 968 - } 969 - base_con = nodeid2con(0, 0); 970 - BUG_ON(base_con == NULL); 971 - 972 - make_sockaddr(&rem_addr, dlm_config.ci_tcp_port, &addrlen); 973 - 974 - outmessage.msg_name = &rem_addr; 975 - outmessage.msg_namelen = addrlen; 976 - outmessage.msg_control = outcmsg; 977 - outmessage.msg_controllen = sizeof(outcmsg); 978 - outmessage.msg_flags = MSG_EOR; 979 - 980 - spin_lock(&con->writequeue_lock); 981 - 982 - if (list_empty(&con->writequeue)) { 983 - spin_unlock(&con->writequeue_lock); 984 - log_print("writequeue empty for nodeid %d", con->nodeid); 985 - goto unlock; 1123 + goto out; 986 1124 } 987 1125 988 - e = list_first_entry(&con->writequeue, struct writequeue_entry, list); 989 - len = e->len; 990 - offset = e->offset; 1126 + /* Create a socket to communicate with */ 1127 + result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1128 + SOCK_STREAM, IPPROTO_SCTP, &sock); 1129 + if (result < 0) 1130 + goto socket_err; 991 1131 992 - /* Send the first block off the write queue */ 993 - iov[0].iov_base = page_address(e->page)+offset; 994 - iov[0].iov_len = len; 995 - spin_unlock(&con->writequeue_lock); 1132 + sock->sk->sk_user_data = con; 1133 + con->rx_action = receive_from_sock; 1134 + con->connect_action = sctp_connect_to_sock; 1135 + add_sock(sock, con); 996 1136 997 - if (rem_addr.ss_family == AF_INET) { 998 - struct sockaddr_in *sin = (struct sockaddr_in *)&rem_addr; 999 - log_print("Trying to connect to %pI4", &sin->sin_addr.s_addr); 1000 - } else { 1001 - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&rem_addr; 1002 - log_print("Trying to connect to %pI6", &sin6->sin6_addr); 1003 - } 1137 + /* Bind to all addresses. */ 1138 + if (sctp_bind_addrs(con, 0)) 1139 + goto bind_err; 1004 1140 1005 - cmsg = CMSG_FIRSTHDR(&outmessage); 1006 - cmsg->cmsg_level = IPPROTO_SCTP; 1007 - cmsg->cmsg_type = SCTP_SNDRCV; 1008 - cmsg->cmsg_len = CMSG_LEN(sizeof(struct sctp_sndrcvinfo)); 1009 - sinfo = CMSG_DATA(cmsg); 1010 - memset(sinfo, 0x00, sizeof(struct sctp_sndrcvinfo)); 1011 - sinfo->sinfo_ppid = cpu_to_le32(con->nodeid); 1012 - outmessage.msg_controllen = cmsg->cmsg_len; 1013 - sinfo->sinfo_flags |= SCTP_ADDR_OVER; 1141 + make_sockaddr(&daddr, dlm_config.ci_tcp_port, &addr_len); 1014 1142 1015 - ret = kernel_sendmsg(base_con->sock, &outmessage, iov, 1, len); 1016 - if (ret < 0) { 1017 - log_print("Send first packet to node %d failed: %d", 1018 - con->nodeid, ret); 1143 + log_print("connecting to %d", con->nodeid); 1019 1144 1020 - /* Try again later */ 1145 + /* Turn off Nagle's algorithm */ 1146 + kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, 1147 + sizeof(one)); 1148 + 1149 + result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len, 1150 + O_NONBLOCK); 1151 + if (result == -EINPROGRESS) 1152 + result = 0; 1153 + if (result == 0) 1154 + goto out; 1155 + 1156 + 1157 + bind_err: 1158 + con->sock = NULL; 1159 + sock_release(sock); 1160 + 1161 + socket_err: 1162 + /* 1163 + * Some errors are fatal and this list might need adjusting. For other 1164 + * errors we try again until the max number of retries is reached. 1165 + */ 1166 + if (result != -EHOSTUNREACH && 1167 + result != -ENETUNREACH && 1168 + result != -ENETDOWN && 1169 + result != -EINVAL && 1170 + result != -EPROTONOSUPPORT) { 1171 + log_print("connect %d try %d error %d", con->nodeid, 1172 + con->retries, result); 1173 + mutex_unlock(&con->sock_mutex); 1174 + msleep(1000); 1021 1175 clear_bit(CF_CONNECT_PENDING, &con->flags); 1022 - clear_bit(CF_INIT_PENDING, &con->flags); 1023 - } 1024 - else { 1025 - spin_lock(&con->writequeue_lock); 1026 - writequeue_entry_complete(e, ret); 1027 - spin_unlock(&con->writequeue_lock); 1176 + lowcomms_connect_sock(con); 1177 + return; 1028 1178 } 1029 1179 1030 - unlock: 1180 + out: 1031 1181 mutex_unlock(&con->sock_mutex); 1182 + set_bit(CF_WRITE_PENDING, &con->flags); 1032 1183 } 1033 1184 1034 1185 /* Connect a new socket to its peer */ ··· 1149 1236 con->retries, result); 1150 1237 mutex_unlock(&con->sock_mutex); 1151 1238 msleep(1000); 1239 + clear_bit(CF_CONNECT_PENDING, &con->flags); 1152 1240 lowcomms_connect_sock(con); 1153 1241 return; 1154 1242 } 1155 1243 out: 1156 1244 mutex_unlock(&con->sock_mutex); 1245 + set_bit(CF_WRITE_PENDING, &con->flags); 1157 1246 return; 1158 1247 } 1159 1248 ··· 1240 1325 } 1241 1326 } 1242 1327 1243 - /* Bind to an IP address. SCTP allows multiple address so it can do 1244 - multi-homing */ 1245 - static int add_sctp_bind_addr(struct connection *sctp_con, 1246 - struct sockaddr_storage *addr, 1247 - int addr_len, int num) 1248 - { 1249 - int result = 0; 1250 - 1251 - if (num == 1) 1252 - result = kernel_bind(sctp_con->sock, 1253 - (struct sockaddr *) addr, 1254 - addr_len); 1255 - else 1256 - result = kernel_setsockopt(sctp_con->sock, SOL_SCTP, 1257 - SCTP_SOCKOPT_BINDX_ADD, 1258 - (char *)addr, addr_len); 1259 - 1260 - if (result < 0) 1261 - log_print("Can't bind to port %d addr number %d", 1262 - dlm_config.ci_tcp_port, num); 1263 - 1264 - return result; 1265 - } 1266 - 1267 1328 /* Initialise SCTP socket and bind to all interfaces */ 1268 1329 static int sctp_listen_for_all(void) 1269 1330 { 1270 1331 struct socket *sock = NULL; 1271 - struct sockaddr_storage localaddr; 1272 - struct sctp_event_subscribe subscribe; 1273 - int result = -EINVAL, num = 1, i, addr_len; 1332 + int result = -EINVAL; 1274 1333 struct connection *con = nodeid2con(0, GFP_NOFS); 1275 1334 int bufsize = NEEDED_RMEM; 1276 1335 int one = 1; ··· 1255 1366 log_print("Using SCTP for communications"); 1256 1367 1257 1368 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1258 - SOCK_SEQPACKET, IPPROTO_SCTP, &sock); 1369 + SOCK_STREAM, IPPROTO_SCTP, &sock); 1259 1370 if (result < 0) { 1260 1371 log_print("Can't create comms socket, check SCTP is loaded"); 1261 1372 goto out; 1262 1373 } 1263 1374 1264 - /* Listen for events */ 1265 - memset(&subscribe, 0, sizeof(subscribe)); 1266 - subscribe.sctp_data_io_event = 1; 1267 - subscribe.sctp_association_event = 1; 1268 - subscribe.sctp_send_failure_event = 1; 1269 - subscribe.sctp_shutdown_event = 1; 1270 - subscribe.sctp_partial_delivery_event = 1; 1271 - 1272 1375 result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE, 1273 1376 (char *)&bufsize, sizeof(bufsize)); 1274 1377 if (result) 1275 1378 log_print("Error increasing buffer space on socket %d", result); 1276 - 1277 - result = kernel_setsockopt(sock, SOL_SCTP, SCTP_EVENTS, 1278 - (char *)&subscribe, sizeof(subscribe)); 1279 - if (result < 0) { 1280 - log_print("Failed to set SCTP_EVENTS on socket: result=%d", 1281 - result); 1282 - goto create_delsock; 1283 - } 1284 1379 1285 1380 result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one, 1286 1381 sizeof(one)); ··· 1275 1402 sock->sk->sk_user_data = con; 1276 1403 con->sock = sock; 1277 1404 con->sock->sk->sk_data_ready = lowcomms_data_ready; 1278 - con->rx_action = receive_from_sock; 1279 - con->connect_action = sctp_init_assoc; 1405 + con->rx_action = sctp_accept_from_sock; 1406 + con->connect_action = sctp_connect_to_sock; 1280 1407 1281 - /* Bind to all interfaces. */ 1282 - for (i = 0; i < dlm_local_count; i++) { 1283 - memcpy(&localaddr, dlm_local_addr[i], sizeof(localaddr)); 1284 - make_sockaddr(&localaddr, dlm_config.ci_tcp_port, &addr_len); 1285 - 1286 - result = add_sctp_bind_addr(con, &localaddr, addr_len, num); 1287 - if (result) 1288 - goto create_delsock; 1289 - ++num; 1290 - } 1408 + /* Bind to all addresses. */ 1409 + if (sctp_bind_addrs(con, dlm_config.ci_tcp_port)) 1410 + goto create_delsock; 1291 1411 1292 1412 result = sock->ops->listen(sock, 5); 1293 1413 if (result < 0) { ··· 1478 1612 1479 1613 send_error: 1480 1614 mutex_unlock(&con->sock_mutex); 1481 - close_connection(con, false); 1615 + close_connection(con, false, false, true); 1482 1616 lowcomms_connect_sock(con); 1483 1617 return; 1484 1618 1485 1619 out_connect: 1486 1620 mutex_unlock(&con->sock_mutex); 1487 - if (!test_bit(CF_INIT_PENDING, &con->flags)) 1488 - lowcomms_connect_sock(con); 1621 + lowcomms_connect_sock(con); 1489 1622 } 1490 1623 1491 1624 static void clean_one_writequeue(struct connection *con) ··· 1509 1644 log_print("closing connection to node %d", nodeid); 1510 1645 con = nodeid2con(nodeid, 0); 1511 1646 if (con) { 1512 - clear_bit(CF_CONNECT_PENDING, &con->flags); 1513 - clear_bit(CF_WRITE_PENDING, &con->flags); 1514 1647 set_bit(CF_CLOSE, &con->flags); 1515 - if (cancel_work_sync(&con->swork)) 1516 - log_print("canceled swork for node %d", nodeid); 1517 - if (cancel_work_sync(&con->rwork)) 1518 - log_print("canceled rwork for node %d", nodeid); 1648 + close_connection(con, true, true, true); 1519 1649 clean_one_writequeue(con); 1520 - close_connection(con, true); 1521 1650 } 1522 1651 1523 1652 spin_lock(&dlm_node_addrs_spin); ··· 1544 1685 { 1545 1686 struct connection *con = container_of(work, struct connection, swork); 1546 1687 1547 - if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) { 1688 + if (test_and_clear_bit(CF_CONNECT_PENDING, &con->flags)) 1548 1689 con->connect_action(con); 1549 - set_bit(CF_WRITE_PENDING, &con->flags); 1550 - } 1551 1690 if (test_and_clear_bit(CF_WRITE_PENDING, &con->flags)) 1552 1691 send_to_sock(con); 1553 1692 } ··· 1592 1735 1593 1736 static void free_conn(struct connection *con) 1594 1737 { 1595 - close_connection(con, true); 1738 + close_connection(con, true, true, true); 1596 1739 if (con->othercon) 1597 1740 kmem_cache_free(con_cache, con->othercon); 1598 1741 hlist_del(&con->list); ··· 1663 1806 dlm_allow_conn = 0; 1664 1807 con = nodeid2con(0,0); 1665 1808 if (con) { 1666 - close_connection(con, false); 1809 + close_connection(con, false, true, true); 1667 1810 kmem_cache_free(con_cache, con); 1668 1811 } 1669 1812 fail_destroy:
+4 -3
fs/dlm/user.c
··· 782 782 DECLARE_WAITQUEUE(wait, current); 783 783 struct dlm_callback cb; 784 784 int rv, resid, copy_lvb = 0; 785 + int old_mode, new_mode; 785 786 786 787 if (count == sizeof(struct dlm_device_version)) { 787 788 rv = copy_version_to_user(buf, count); ··· 839 838 840 839 lkb = list_entry(proc->asts.next, struct dlm_lkb, lkb_cb_list); 841 840 841 + /* rem_lkb_callback sets a new lkb_last_cast */ 842 + old_mode = lkb->lkb_last_cast.mode; 843 + 842 844 rv = dlm_rem_lkb_callback(lkb->lkb_resource->res_ls, lkb, &cb, &resid); 843 845 if (rv < 0) { 844 846 /* this shouldn't happen; lkb should have been removed from ··· 865 861 } 866 862 867 863 if (cb.flags & DLM_CB_CAST) { 868 - int old_mode, new_mode; 869 - 870 - old_mode = lkb->lkb_last_cast.mode; 871 864 new_mode = cb.mode; 872 865 873 866 if (!cb.sb_status && lkb->lkb_lksb->sb_lvbptr &&
+1 -1
include/uapi/linux/dlm_device.h
··· 26 26 /* Version of the device interface */ 27 27 #define DLM_DEVICE_VERSION_MAJOR 6 28 28 #define DLM_DEVICE_VERSION_MINOR 0 29 - #define DLM_DEVICE_VERSION_PATCH 1 29 + #define DLM_DEVICE_VERSION_PATCH 2 30 30 31 31 /* struct passed to the lock write */ 32 32 struct dlm_lock_params {