Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

dlm: fix deadlock between dlm_send and dlm_controld

A deadlock sometimes occurs between dlm_controld closing
a lowcomms connection through configfs and dlm_send looking
up the address for a new connection in configfs.

dlm_controld does a configfs rmdir which calls
dlm_lowcomms_close which waits for dlm_send to
cancel work on the workqueues.

The dlm_send workqueue thread has called
tcp_connect_to_sock which calls dlm_nodeid_to_addr
which does a configfs lookup and blocks on a lock
held by dlm_controld in the rmdir path.

The solution here is to save the node addresses within
the lowcomms code so that the lowcomms workqueue does
not need to step through configfs to get a node address.

dlm_controld:
wait_for_completion+0x1d/0x20
__cancel_work_timer+0x1b3/0x1e0
cancel_work_sync+0x10/0x20
dlm_lowcomms_close+0x4c/0xb0 [dlm]
drop_comm+0x22/0x60 [dlm]
client_drop_item+0x26/0x50 [configfs]
configfs_rmdir+0x180/0x230 [configfs]
vfs_rmdir+0xbd/0xf0
do_rmdir+0x103/0x120
sys_rmdir+0x16/0x20

dlm_send:
mutex_lock+0x2b/0x50
get_comm+0x34/0x140 [dlm]
dlm_nodeid_to_addr+0x18/0xd0 [dlm]
tcp_connect_to_sock+0xf4/0x2d0 [dlm]
process_send_sockets+0x1d2/0x260 [dlm]
worker_thread+0x170/0x2a0

Signed-off-by: David Teigland <teigland@redhat.com>

+200 -90
+15 -64
fs/dlm/config.c
··· 750 750 static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len) 751 751 { 752 752 struct sockaddr_storage *addr; 753 + int rv; 753 754 754 755 if (len != sizeof(struct sockaddr_storage)) 755 756 return -EINVAL; ··· 763 762 return -ENOMEM; 764 763 765 764 memcpy(addr, buf, len); 765 + 766 + rv = dlm_lowcomms_addr(cm->nodeid, addr, len); 767 + if (rv) { 768 + kfree(addr); 769 + return rv; 770 + } 771 + 766 772 cm->addr[cm->addr_count++] = addr; 767 773 return len; 768 774 } ··· 886 878 config_item_put(&sp->group.cg_item); 887 879 } 888 880 889 - static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) 890 - { 891 - switch (x->ss_family) { 892 - case AF_INET: { 893 - struct sockaddr_in *sinx = (struct sockaddr_in *)x; 894 - struct sockaddr_in *siny = (struct sockaddr_in *)y; 895 - if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) 896 - return 0; 897 - if (sinx->sin_port != siny->sin_port) 898 - return 0; 899 - break; 900 - } 901 - case AF_INET6: { 902 - struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; 903 - struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; 904 - if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) 905 - return 0; 906 - if (sinx->sin6_port != siny->sin6_port) 907 - return 0; 908 - break; 909 - } 910 - default: 911 - return 0; 912 - } 913 - return 1; 914 - } 915 - 916 - static struct dlm_comm *get_comm(int nodeid, struct sockaddr_storage *addr) 881 + static struct dlm_comm *get_comm(int nodeid) 917 882 { 918 883 struct config_item *i; 919 884 struct dlm_comm *cm = NULL; ··· 900 919 list_for_each_entry(i, &comm_list->cg_children, ci_entry) { 901 920 cm = config_item_to_comm(i); 902 921 903 - if (nodeid) { 904 - if (cm->nodeid != nodeid) 905 - continue; 906 - found = 1; 907 - config_item_get(i); 908 - break; 909 - } else { 910 - if (!cm->addr_count || !addr_compare(cm->addr[0], addr)) 911 - continue; 912 - found = 1; 913 - config_item_get(i); 914 - break; 915 - } 922 + if (cm->nodeid != nodeid) 923 + continue; 924 + found = 1; 925 + config_item_get(i); 926 + break; 916 927 } 917 928 mutex_unlock(&clusters_root.subsys.su_mutex); 918 929 ··· 968 995 969 996 int dlm_comm_seq(int nodeid, uint32_t *seq) 970 997 { 971 - struct dlm_comm *cm = get_comm(nodeid, NULL); 998 + struct dlm_comm *cm = get_comm(nodeid); 972 999 if (!cm) 973 1000 return -EEXIST; 974 1001 *seq = cm->seq; 975 - put_comm(cm); 976 - return 0; 977 - } 978 - 979 - int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr) 980 - { 981 - struct dlm_comm *cm = get_comm(nodeid, NULL); 982 - if (!cm) 983 - return -EEXIST; 984 - if (!cm->addr_count) 985 - return -ENOENT; 986 - memcpy(addr, cm->addr[0], sizeof(*addr)); 987 - put_comm(cm); 988 - return 0; 989 - } 990 - 991 - int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) 992 - { 993 - struct dlm_comm *cm = get_comm(0, addr); 994 - if (!cm) 995 - return -EEXIST; 996 - *nodeid = cm->nodeid; 997 1002 put_comm(cm); 998 1003 return 0; 999 1004 }
-2
fs/dlm/config.h
··· 46 46 int dlm_config_nodes(char *lsname, struct dlm_config_node **nodes_out, 47 47 int *count_out); 48 48 int dlm_comm_seq(int nodeid, uint32_t *seq); 49 - int dlm_nodeid_to_addr(int nodeid, struct sockaddr_storage *addr); 50 - int dlm_addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid); 51 49 int dlm_our_nodeid(void); 52 50 int dlm_our_addr(struct sockaddr_storage *addr, int num); 53 51
+181 -24
fs/dlm/lowcomms.c
··· 140 140 struct connection *con; 141 141 }; 142 142 143 + struct dlm_node_addr { 144 + struct list_head list; 145 + int nodeid; 146 + int addr_count; 147 + struct sockaddr_storage *addr[DLM_MAX_ADDR_COUNT]; 148 + }; 149 + 150 + static LIST_HEAD(dlm_node_addrs); 151 + static DEFINE_SPINLOCK(dlm_node_addrs_spin); 152 + 143 153 static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; 144 154 static int dlm_local_count; 145 155 static int dlm_allow_conn; ··· 274 264 return NULL; 275 265 } 276 266 277 - static int nodeid_to_addr(int nodeid, struct sockaddr *retaddr) 267 + static struct dlm_node_addr *find_node_addr(int nodeid) 278 268 { 279 - struct sockaddr_storage addr; 280 - int error; 269 + struct dlm_node_addr *na; 270 + 271 + list_for_each_entry(na, &dlm_node_addrs, list) { 272 + if (na->nodeid == nodeid) 273 + return na; 274 + } 275 + return NULL; 276 + } 277 + 278 + static int addr_compare(struct sockaddr_storage *x, struct sockaddr_storage *y) 279 + { 280 + switch (x->ss_family) { 281 + case AF_INET: { 282 + struct sockaddr_in *sinx = (struct sockaddr_in *)x; 283 + struct sockaddr_in *siny = (struct sockaddr_in *)y; 284 + if (sinx->sin_addr.s_addr != siny->sin_addr.s_addr) 285 + return 0; 286 + if (sinx->sin_port != siny->sin_port) 287 + return 0; 288 + break; 289 + } 290 + case AF_INET6: { 291 + struct sockaddr_in6 *sinx = (struct sockaddr_in6 *)x; 292 + struct sockaddr_in6 *siny = (struct sockaddr_in6 *)y; 293 + if (!ipv6_addr_equal(&sinx->sin6_addr, &siny->sin6_addr)) 294 + return 0; 295 + if (sinx->sin6_port != siny->sin6_port) 296 + return 0; 297 + break; 298 + } 299 + default: 300 + return 0; 301 + } 302 + return 1; 303 + } 304 + 305 + static int nodeid_to_addr(int nodeid, struct sockaddr_storage *sas_out, 306 + struct sockaddr *sa_out) 307 + { 308 + struct sockaddr_storage sas; 309 + struct dlm_node_addr *na; 281 310 282 311 if (!dlm_local_count) 283 312 return -1; 284 313 285 - error = dlm_nodeid_to_addr(nodeid, &addr); 286 - if (error) 287 - return error; 314 + spin_lock(&dlm_node_addrs_spin); 315 + na = find_node_addr(nodeid); 316 + if (na && na->addr_count) 317 + memcpy(&sas, na->addr[0], sizeof(struct sockaddr_storage)); 318 + spin_unlock(&dlm_node_addrs_spin); 319 + 320 + if (!na) 321 + return -EEXIST; 322 + 323 + if (!na->addr_count) 324 + return -ENOENT; 325 + 326 + if (sas_out) 327 + memcpy(sas_out, &sas, sizeof(struct sockaddr_storage)); 328 + 329 + if (!sa_out) 330 + return 0; 288 331 289 332 if (dlm_local_addr[0]->ss_family == AF_INET) { 290 - struct sockaddr_in *in4 = (struct sockaddr_in *) &addr; 291 - struct sockaddr_in *ret4 = (struct sockaddr_in *) retaddr; 333 + struct sockaddr_in *in4 = (struct sockaddr_in *) &sas; 334 + struct sockaddr_in *ret4 = (struct sockaddr_in *) sa_out; 292 335 ret4->sin_addr.s_addr = in4->sin_addr.s_addr; 293 336 } else { 294 - struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &addr; 295 - struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) retaddr; 337 + struct sockaddr_in6 *in6 = (struct sockaddr_in6 *) &sas; 338 + struct sockaddr_in6 *ret6 = (struct sockaddr_in6 *) sa_out; 296 339 ret6->sin6_addr = in6->sin6_addr; 297 340 } 298 341 342 + return 0; 343 + } 344 + 345 + static int addr_to_nodeid(struct sockaddr_storage *addr, int *nodeid) 346 + { 347 + struct dlm_node_addr *na; 348 + int rv = -EEXIST; 349 + 350 + spin_lock(&dlm_node_addrs_spin); 351 + list_for_each_entry(na, &dlm_node_addrs, list) { 352 + if (!na->addr_count) 353 + continue; 354 + 355 + if (!addr_compare(na->addr[0], addr)) 356 + continue; 357 + 358 + *nodeid = na->nodeid; 359 + rv = 0; 360 + break; 361 + } 362 + spin_unlock(&dlm_node_addrs_spin); 363 + return rv; 364 + } 365 + 366 + int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len) 367 + { 368 + struct sockaddr_storage *new_addr; 369 + struct dlm_node_addr *new_node, *na; 370 + 371 + new_node = kzalloc(sizeof(struct dlm_node_addr), GFP_NOFS); 372 + if (!new_node) 373 + return -ENOMEM; 374 + 375 + new_addr = kzalloc(sizeof(struct sockaddr_storage), GFP_NOFS); 376 + if (!new_addr) { 377 + kfree(new_node); 378 + return -ENOMEM; 379 + } 380 + 381 + memcpy(new_addr, addr, len); 382 + 383 + spin_lock(&dlm_node_addrs_spin); 384 + na = find_node_addr(nodeid); 385 + if (!na) { 386 + new_node->nodeid = nodeid; 387 + new_node->addr[0] = new_addr; 388 + new_node->addr_count = 1; 389 + list_add(&new_node->list, &dlm_node_addrs); 390 + spin_unlock(&dlm_node_addrs_spin); 391 + return 0; 392 + } 393 + 394 + if (na->addr_count >= DLM_MAX_ADDR_COUNT) { 395 + spin_unlock(&dlm_node_addrs_spin); 396 + kfree(new_addr); 397 + kfree(new_node); 398 + return -ENOSPC; 399 + } 400 + 401 + na->addr[na->addr_count++] = new_addr; 402 + spin_unlock(&dlm_node_addrs_spin); 403 + kfree(new_node); 299 404 return 0; 300 405 } 301 406 ··· 635 510 return; 636 511 } 637 512 make_sockaddr(&prim.ssp_addr, 0, &addr_len); 638 - if (dlm_addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 513 + if (addr_to_nodeid(&prim.ssp_addr, &nodeid)) { 639 514 unsigned char *b=(unsigned char *)&prim.ssp_addr; 640 515 log_print("reject connect from unknown addr"); 641 516 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, ··· 872 747 873 748 /* Get the new node's NODEID */ 874 749 make_sockaddr(&peeraddr, 0, &len); 875 - if (dlm_addr_to_nodeid(&peeraddr, &nodeid)) { 750 + if (addr_to_nodeid(&peeraddr, &nodeid)) { 876 751 unsigned char *b=(unsigned char *)&peeraddr; 877 752 log_print("connect from non cluster node"); 878 753 print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, ··· 987 862 if (con->retries++ > MAX_CONNECT_RETRIES) 988 863 return; 989 864 990 - if (nodeid_to_addr(con->nodeid, (struct sockaddr *)&rem_addr)) { 865 + if (nodeid_to_addr(con->nodeid, NULL, (struct sockaddr *)&rem_addr)) { 991 866 log_print("no address for nodeid %d", con->nodeid); 992 867 return; 993 868 } ··· 1053 928 /* Connect a new socket to its peer */ 1054 929 static void tcp_connect_to_sock(struct connection *con) 1055 930 { 1056 - int result = -EHOSTUNREACH; 1057 931 struct sockaddr_storage saddr, src_addr; 1058 932 int addr_len; 1059 933 struct socket *sock = NULL; 1060 934 int one = 1; 935 + int result; 1061 936 1062 937 if (con->nodeid == 0) { 1063 938 log_print("attempt to connect sock 0 foiled"); ··· 1069 944 goto out; 1070 945 1071 946 /* Some odd races can cause double-connects, ignore them */ 1072 - if (con->sock) { 1073 - result = 0; 947 + if (con->sock) 1074 948 goto out; 1075 - } 1076 949 1077 950 /* Create a socket to communicate with */ 1078 951 result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, ··· 1079 956 goto out_err; 1080 957 1081 958 memset(&saddr, 0, sizeof(saddr)); 1082 - if (dlm_nodeid_to_addr(con->nodeid, &saddr)) 959 + result = nodeid_to_addr(con->nodeid, &saddr, NULL); 960 + if (result < 0) { 961 + log_print("no address for nodeid %d", con->nodeid); 1083 962 goto out_err; 963 + } 1084 964 1085 965 sock->sk->sk_user_data = con; 1086 966 con->rx_action = receive_from_sock; ··· 1109 983 kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one, 1110 984 sizeof(one)); 1111 985 1112 - result = 1113 - sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 986 + result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len, 1114 987 O_NONBLOCK); 1115 988 if (result == -EINPROGRESS) 1116 989 result = 0; ··· 1127 1002 * Some errors are fatal and this list might need adjusting. For other 1128 1003 * errors we try again until the max number of retries is reached. 1129 1004 */ 1130 - if (result != -EHOSTUNREACH && result != -ENETUNREACH && 1131 - result != -ENETDOWN && result != -EINVAL 1132 - && result != -EPROTONOSUPPORT) { 1005 + if (result != -EHOSTUNREACH && 1006 + result != -ENETUNREACH && 1007 + result != -ENETDOWN && 1008 + result != -EINVAL && 1009 + result != -EPROTONOSUPPORT) { 1010 + log_print("connect %d try %d error %d", con->nodeid, 1011 + con->retries, result); 1012 + mutex_unlock(&con->sock_mutex); 1013 + msleep(1000); 1133 1014 lowcomms_connect_sock(con); 1134 - result = 0; 1015 + return; 1135 1016 } 1136 1017 out: 1137 1018 mutex_unlock(&con->sock_mutex); ··· 1545 1414 int dlm_lowcomms_close(int nodeid) 1546 1415 { 1547 1416 struct connection *con; 1417 + struct dlm_node_addr *na; 1548 1418 1549 1419 log_print("closing connection to node %d", nodeid); 1550 1420 con = nodeid2con(nodeid, 0); ··· 1560 1428 clean_one_writequeue(con); 1561 1429 close_connection(con, true); 1562 1430 } 1431 + 1432 + spin_lock(&dlm_node_addrs_spin); 1433 + na = find_node_addr(nodeid); 1434 + if (na) { 1435 + list_del(&na->list); 1436 + while (na->addr_count--) 1437 + kfree(na->addr[na->addr_count]); 1438 + kfree(na); 1439 + } 1440 + spin_unlock(&dlm_node_addrs_spin); 1441 + 1563 1442 return 0; 1564 1443 } 1565 1444 ··· 1719 1576 kmem_cache_destroy(con_cache); 1720 1577 fail: 1721 1578 return error; 1579 + } 1580 + 1581 + void dlm_lowcomms_exit(void) 1582 + { 1583 + struct dlm_node_addr *na, *safe; 1584 + 1585 + spin_lock(&dlm_node_addrs_spin); 1586 + list_for_each_entry_safe(na, safe, &dlm_node_addrs, list) { 1587 + list_del(&na->list); 1588 + while (na->addr_count--) 1589 + kfree(na->addr[na->addr_count]); 1590 + kfree(na); 1591 + } 1592 + spin_unlock(&dlm_node_addrs_spin); 1722 1593 }
+2
fs/dlm/lowcomms.h
··· 16 16 17 17 int dlm_lowcomms_start(void); 18 18 void dlm_lowcomms_stop(void); 19 + void dlm_lowcomms_exit(void); 19 20 int dlm_lowcomms_close(int nodeid); 20 21 void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc); 21 22 void dlm_lowcomms_commit_buffer(void *mh); 22 23 int dlm_lowcomms_connect_node(int nodeid); 24 + int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); 23 25 24 26 #endif /* __LOWCOMMS_DOT_H__ */ 25 27
+2
fs/dlm/main.c
··· 17 17 #include "user.h" 18 18 #include "memory.h" 19 19 #include "config.h" 20 + #include "lowcomms.h" 20 21 21 22 static int __init init_dlm(void) 22 23 { ··· 79 78 dlm_config_exit(); 80 79 dlm_memory_exit(); 81 80 dlm_lockspace_exit(); 81 + dlm_lowcomms_exit(); 82 82 dlm_unregister_debugfs(); 83 83 } 84 84