Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

RDMA/ucma: Put a lock around every call to the rdma_cm layer

The rdma_cm must be used single threaded.

This appears to be a bug in the design, as it does have lots of locking
that seems like it should allow concurrency. However, when it is all said
and done every single place that uses the cma_exch() scheme is broken, and
all the unlocked reads from the ucma of the cm_id data are wrong too.

syzkaller has been finding endless bugs related to this.

Fixing this in any elegant way is some enormous amount of work. Take a
very big hammer and put a mutex around everything to do with the
ucma_context at the top of every syscall.

Fixes: 75216638572f ("RDMA/cma: Export rdma cm interface to userspace")
Link: https://lore.kernel.org/r/20200218210432.GA31966@ziepe.ca
Reported-by: syzbot+adb15cf8c2798e4e0db4@syzkaller.appspotmail.com
Reported-by: syzbot+e5579222b6a3edd96522@syzkaller.appspotmail.com
Reported-by: syzbot+4b628fcc748474003457@syzkaller.appspotmail.com
Reported-by: syzbot+29ee8f76017ce6cf03da@syzkaller.appspotmail.com
Reported-by: syzbot+6956235342b7317ec564@syzkaller.appspotmail.com
Reported-by: syzbot+b358909d8d01556b790b@syzkaller.appspotmail.com
Reported-by: syzbot+6b46b135602a3f3ac99e@syzkaller.appspotmail.com
Reported-by: syzbot+8458d13b13562abf6b77@syzkaller.appspotmail.com
Reported-by: syzbot+bd034f3fdc0402e942ed@syzkaller.appspotmail.com
Reported-by: syzbot+c92378b32760a4eef756@syzkaller.appspotmail.com
Reported-by: syzbot+68b44a1597636e0b342c@syzkaller.appspotmail.com
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

+47 -2
+47 -2
drivers/infiniband/core/ucma.c
··· 91 91 92 92 struct ucma_file *file; 93 93 struct rdma_cm_id *cm_id; 94 + struct mutex mutex; 94 95 u64 uid; 95 96 96 97 struct list_head list; ··· 217 216 init_completion(&ctx->comp); 218 217 INIT_LIST_HEAD(&ctx->mc_list); 219 218 ctx->file = file; 219 + mutex_init(&ctx->mutex); 220 220 221 221 if (xa_alloc(&ctx_table, &ctx->id, ctx, xa_limit_32b, GFP_KERNEL)) 222 222 goto error; ··· 591 589 } 592 590 593 591 events_reported = ctx->events_reported; 592 + mutex_destroy(&ctx->mutex); 594 593 kfree(ctx); 595 594 return events_reported; 596 595 } ··· 661 658 if (IS_ERR(ctx)) 662 659 return PTR_ERR(ctx); 663 660 661 + mutex_lock(&ctx->mutex); 664 662 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 663 + mutex_unlock(&ctx->mutex); 664 + 665 665 ucma_put_ctx(ctx); 666 666 return ret; 667 667 } ··· 687 681 if (IS_ERR(ctx)) 688 682 return PTR_ERR(ctx); 689 683 684 + mutex_lock(&ctx->mutex); 690 685 ret = rdma_bind_addr(ctx->cm_id, (struct sockaddr *) &cmd.addr); 686 + mutex_unlock(&ctx->mutex); 691 687 ucma_put_ctx(ctx); 692 688 return ret; 693 689 } ··· 713 705 if (IS_ERR(ctx)) 714 706 return PTR_ERR(ctx); 715 707 708 + mutex_lock(&ctx->mutex); 716 709 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 717 710 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 711 + mutex_unlock(&ctx->mutex); 718 712 ucma_put_ctx(ctx); 719 713 return ret; 720 714 } ··· 741 731 if (IS_ERR(ctx)) 742 732 return PTR_ERR(ctx); 743 733 734 + mutex_lock(&ctx->mutex); 744 735 ret = rdma_resolve_addr(ctx->cm_id, (struct sockaddr *) &cmd.src_addr, 745 736 (struct sockaddr *) &cmd.dst_addr, cmd.timeout_ms); 737 + mutex_unlock(&ctx->mutex); 746 738 ucma_put_ctx(ctx); 747 739 return ret; 748 740 } ··· 764 752 if (IS_ERR(ctx)) 765 753 return PTR_ERR(ctx); 766 754 755 + mutex_lock(&ctx->mutex); 767 756 ret = rdma_resolve_route(ctx->cm_id, cmd.timeout_ms); 757 + mutex_unlock(&ctx->mutex); 768 758 ucma_put_ctx(ctx); 769 759 return ret; 770 760 } ··· 855 841 if (IS_ERR(ctx)) 856 842 return PTR_ERR(ctx); 857 843 844 + mutex_lock(&ctx->mutex); 858 845 memset(&resp, 0, sizeof resp); 859 846 addr = (struct sockaddr *) &ctx->cm_id->route.addr.src_addr; 860 847 memcpy(&resp.src_addr, addr, addr->sa_family == AF_INET ? ··· 879 864 ucma_copy_iw_route(&resp, &ctx->cm_id->route); 880 865 881 866 out: 867 + mutex_unlock(&ctx->mutex); 882 868 if (copy_to_user(u64_to_user_ptr(cmd.response), 883 869 &resp, sizeof(resp))) 884 870 ret = -EFAULT; ··· 1030 1014 if (IS_ERR(ctx)) 1031 1015 return PTR_ERR(ctx); 1032 1016 1017 + mutex_lock(&ctx->mutex); 1033 1018 switch (cmd.option) { 1034 1019 case RDMA_USER_CM_QUERY_ADDR: 1035 1020 ret = ucma_query_addr(ctx, response, out_len); ··· 1045 1028 ret = -ENOSYS; 1046 1029 break; 1047 1030 } 1031 + mutex_unlock(&ctx->mutex); 1048 1032 1049 1033 ucma_put_ctx(ctx); 1050 1034 return ret; ··· 1086 1068 return PTR_ERR(ctx); 1087 1069 1088 1070 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1071 + mutex_lock(&ctx->mutex); 1089 1072 ret = rdma_connect(ctx->cm_id, &conn_param); 1073 + mutex_unlock(&ctx->mutex); 1090 1074 ucma_put_ctx(ctx); 1091 1075 return ret; 1092 1076 } ··· 1109 1089 1110 1090 ctx->backlog = cmd.backlog > 0 && cmd.backlog < max_backlog ? 1111 1091 cmd.backlog : max_backlog; 1092 + mutex_lock(&ctx->mutex); 1112 1093 ret = rdma_listen(ctx->cm_id, ctx->backlog); 1094 + mutex_unlock(&ctx->mutex); 1113 1095 ucma_put_ctx(ctx); 1114 1096 return ret; 1115 1097 } ··· 1134 1112 if (cmd.conn_param.valid) { 1135 1113 ucma_copy_conn_param(ctx->cm_id, &conn_param, &cmd.conn_param); 1136 1114 mutex_lock(&file->mut); 1115 + mutex_lock(&ctx->mutex); 1137 1116 ret = __rdma_accept(ctx->cm_id, &conn_param, NULL); 1117 + mutex_unlock(&ctx->mutex); 1138 1118 if (!ret) 1139 1119 ctx->uid = cmd.uid; 1140 1120 mutex_unlock(&file->mut); 1141 - } else 1121 + } else { 1122 + mutex_lock(&ctx->mutex); 1142 1123 ret = __rdma_accept(ctx->cm_id, NULL, NULL); 1143 - 1124 + mutex_unlock(&ctx->mutex); 1125 + } 1144 1126 ucma_put_ctx(ctx); 1145 1127 return ret; 1146 1128 } ··· 1163 1137 if (IS_ERR(ctx)) 1164 1138 return PTR_ERR(ctx); 1165 1139 1140 + mutex_lock(&ctx->mutex); 1166 1141 ret = rdma_reject(ctx->cm_id, cmd.private_data, cmd.private_data_len); 1142 + mutex_unlock(&ctx->mutex); 1167 1143 ucma_put_ctx(ctx); 1168 1144 return ret; 1169 1145 } ··· 1184 1156 if (IS_ERR(ctx)) 1185 1157 return PTR_ERR(ctx); 1186 1158 1159 + mutex_lock(&ctx->mutex); 1187 1160 ret = rdma_disconnect(ctx->cm_id); 1161 + mutex_unlock(&ctx->mutex); 1188 1162 ucma_put_ctx(ctx); 1189 1163 return ret; 1190 1164 } ··· 1217 1187 resp.qp_attr_mask = 0; 1218 1188 memset(&qp_attr, 0, sizeof qp_attr); 1219 1189 qp_attr.qp_state = cmd.qp_state; 1190 + mutex_lock(&ctx->mutex); 1220 1191 ret = rdma_init_qp_attr(ctx->cm_id, &qp_attr, &resp.qp_attr_mask); 1192 + mutex_unlock(&ctx->mutex); 1221 1193 if (ret) 1222 1194 goto out; 1223 1195 ··· 1305 1273 struct sa_path_rec opa; 1306 1274 1307 1275 sa_convert_path_ib_to_opa(&opa, &sa_path); 1276 + mutex_lock(&ctx->mutex); 1308 1277 ret = rdma_set_ib_path(ctx->cm_id, &opa); 1278 + mutex_unlock(&ctx->mutex); 1309 1279 } else { 1280 + mutex_lock(&ctx->mutex); 1310 1281 ret = rdma_set_ib_path(ctx->cm_id, &sa_path); 1282 + mutex_unlock(&ctx->mutex); 1311 1283 } 1312 1284 if (ret) 1313 1285 return ret; ··· 1344 1308 1345 1309 switch (level) { 1346 1310 case RDMA_OPTION_ID: 1311 + mutex_lock(&ctx->mutex); 1347 1312 ret = ucma_set_option_id(ctx, optname, optval, optlen); 1313 + mutex_unlock(&ctx->mutex); 1348 1314 break; 1349 1315 case RDMA_OPTION_IB: 1350 1316 ret = ucma_set_option_ib(ctx, optname, optval, optlen); ··· 1406 1368 if (IS_ERR(ctx)) 1407 1369 return PTR_ERR(ctx); 1408 1370 1371 + mutex_lock(&ctx->mutex); 1409 1372 if (ctx->cm_id->device) 1410 1373 ret = rdma_notify(ctx->cm_id, (enum ib_event_type)cmd.event); 1374 + mutex_unlock(&ctx->mutex); 1411 1375 1412 1376 ucma_put_ctx(ctx); 1413 1377 return ret; ··· 1452 1412 mc->join_state = join_state; 1453 1413 mc->uid = cmd->uid; 1454 1414 memcpy(&mc->addr, addr, cmd->addr_size); 1415 + mutex_lock(&ctx->mutex); 1455 1416 ret = rdma_join_multicast(ctx->cm_id, (struct sockaddr *)&mc->addr, 1456 1417 join_state, mc); 1418 + mutex_unlock(&ctx->mutex); 1457 1419 if (ret) 1458 1420 goto err2; 1459 1421 ··· 1555 1513 goto out; 1556 1514 } 1557 1515 1516 + mutex_lock(&mc->ctx->mutex); 1558 1517 rdma_leave_multicast(mc->ctx->cm_id, (struct sockaddr *) &mc->addr); 1518 + mutex_unlock(&mc->ctx->mutex); 1519 + 1559 1520 mutex_lock(&mc->ctx->file->mut); 1560 1521 ucma_cleanup_mc_events(mc); 1561 1522 list_del(&mc->list);