net: fix a lockdep splat

We have for each socket :

One spinlock (sk_slock.slock)
One rwlock (sk_callback_lock)

Possible scenarios are :

(A) (this is used in net/sunrpc/xprtsock.c)
read_lock(&sk->sk_callback_lock) (without blocking BH)
<BH>
spin_lock(&sk->sk_slock.slock);
...
read_lock(&sk->sk_callback_lock);
...

(B)
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)

(C)
spin_lock_bh(&sk->sk_slock)
...
write_lock_bh(&sk->sk_callback_lock)
stuff
write_unlock_bh(&sk->sk_callback_lock)
spin_unlock_bh(&sk->sk_slock)

This (C) case conflicts with (A) :

CPU1 [A] CPU2 [C]
read_lock(callback_lock)
<BH> spin_lock_bh(slock)
<wait to spin_lock(slock)>
<wait to write_lock_bh(callback_lock)>

We have one problematic (C) use case in inet_csk_listen_stop() :

local_bh_disable();
bh_lock_sock(child); // spin_lock_bh(&sk->sk_slock)
WARN_ON(sock_owned_by_user(child));
...
sock_orphan(child); // write_lock_bh(&sk->sk_callback_lock)

lockdep is not happy with this, as reported by Tetsuo Handa

It seems only way to deal with this is to use read_lock_bh(callbacklock)
everywhere.

Thanks to Jarek for pointing a bug in my first attempt and suggesting
this solution.

Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Tested-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
CC: Jarek Poplawski <jarkao2@gmail.com>
Tested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by Eric Dumazet and committed by David S. Miller f064af1e 605c82ba

+26 -26
+4 -4
net/core/sock.c
··· 1351 1351 { 1352 1352 int uid; 1353 1353 1354 - read_lock(&sk->sk_callback_lock); 1354 + read_lock_bh(&sk->sk_callback_lock); 1355 1355 uid = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : 0; 1356 - read_unlock(&sk->sk_callback_lock); 1356 + read_unlock_bh(&sk->sk_callback_lock); 1357 1357 return uid; 1358 1358 } 1359 1359 EXPORT_SYMBOL(sock_i_uid); ··· 1362 1362 { 1363 1363 unsigned long ino; 1364 1364 1365 - read_lock(&sk->sk_callback_lock); 1365 + read_lock_bh(&sk->sk_callback_lock); 1366 1366 ino = sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_ino : 0; 1367 - read_unlock(&sk->sk_callback_lock); 1367 + read_unlock_bh(&sk->sk_callback_lock); 1368 1368 return ino; 1369 1369 } 1370 1370 EXPORT_SYMBOL(sock_i_ino);
+2 -2
net/rds/tcp_connect.c
··· 43 43 struct rds_connection *conn; 44 44 struct rds_tcp_connection *tc; 45 45 46 - read_lock(&sk->sk_callback_lock); 46 + read_lock_bh(&sk->sk_callback_lock); 47 47 conn = sk->sk_user_data; 48 48 if (conn == NULL) { 49 49 state_change = sk->sk_state_change; ··· 68 68 break; 69 69 } 70 70 out: 71 - read_unlock(&sk->sk_callback_lock); 71 + read_unlock_bh(&sk->sk_callback_lock); 72 72 state_change(sk); 73 73 } 74 74
+2 -2
net/rds/tcp_listen.c
··· 114 114 115 115 rdsdebug("listen data ready sk %p\n", sk); 116 116 117 - read_lock(&sk->sk_callback_lock); 117 + read_lock_bh(&sk->sk_callback_lock); 118 118 ready = sk->sk_user_data; 119 119 if (ready == NULL) { /* check for teardown race */ 120 120 ready = sk->sk_data_ready; ··· 131 131 queue_work(rds_wq, &rds_tcp_listen_work); 132 132 133 133 out: 134 - read_unlock(&sk->sk_callback_lock); 134 + read_unlock_bh(&sk->sk_callback_lock); 135 135 ready(sk, bytes); 136 136 } 137 137
+2 -2
net/rds/tcp_recv.c
··· 324 324 325 325 rdsdebug("data ready sk %p bytes %d\n", sk, bytes); 326 326 327 - read_lock(&sk->sk_callback_lock); 327 + read_lock_bh(&sk->sk_callback_lock); 328 328 conn = sk->sk_user_data; 329 329 if (conn == NULL) { /* check for teardown race */ 330 330 ready = sk->sk_data_ready; ··· 338 338 if (rds_tcp_read_sock(conn, GFP_ATOMIC, KM_SOFTIRQ0) == -ENOMEM) 339 339 queue_delayed_work(rds_wq, &conn->c_recv_w, 0); 340 340 out: 341 - read_unlock(&sk->sk_callback_lock); 341 + read_unlock_bh(&sk->sk_callback_lock); 342 342 ready(sk, bytes); 343 343 } 344 344
+2 -2
net/rds/tcp_send.c
··· 224 224 struct rds_connection *conn; 225 225 struct rds_tcp_connection *tc; 226 226 227 - read_lock(&sk->sk_callback_lock); 227 + read_lock_bh(&sk->sk_callback_lock); 228 228 conn = sk->sk_user_data; 229 229 if (conn == NULL) { 230 230 write_space = sk->sk_write_space; ··· 244 244 queue_delayed_work(rds_wq, &conn->c_send_w, 0); 245 245 246 246 out: 247 - read_unlock(&sk->sk_callback_lock); 247 + read_unlock_bh(&sk->sk_callback_lock); 248 248 249 249 /* 250 250 * write_space is only called when data leaves tcp's send queue if
+14 -14
net/sunrpc/xprtsock.c
··· 800 800 u32 _xid; 801 801 __be32 *xp; 802 802 803 - read_lock(&sk->sk_callback_lock); 803 + read_lock_bh(&sk->sk_callback_lock); 804 804 dprintk("RPC: xs_udp_data_ready...\n"); 805 805 if (!(xprt = xprt_from_sock(sk))) 806 806 goto out; ··· 852 852 dropit: 853 853 skb_free_datagram(sk, skb); 854 854 out: 855 - read_unlock(&sk->sk_callback_lock); 855 + read_unlock_bh(&sk->sk_callback_lock); 856 856 } 857 857 858 858 static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) ··· 1229 1229 1230 1230 dprintk("RPC: xs_tcp_data_ready...\n"); 1231 1231 1232 - read_lock(&sk->sk_callback_lock); 1232 + read_lock_bh(&sk->sk_callback_lock); 1233 1233 if (!(xprt = xprt_from_sock(sk))) 1234 1234 goto out; 1235 1235 if (xprt->shutdown) ··· 1248 1248 read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); 1249 1249 } while (read > 0); 1250 1250 out: 1251 - read_unlock(&sk->sk_callback_lock); 1251 + read_unlock_bh(&sk->sk_callback_lock); 1252 1252 } 1253 1253 1254 1254 /* ··· 1301 1301 { 1302 1302 struct rpc_xprt *xprt; 1303 1303 1304 - read_lock(&sk->sk_callback_lock); 1304 + read_lock_bh(&sk->sk_callback_lock); 1305 1305 if (!(xprt = xprt_from_sock(sk))) 1306 1306 goto out; 1307 1307 dprintk("RPC: xs_tcp_state_change client %p...\n", xprt); ··· 1313 1313 1314 1314 switch (sk->sk_state) { 1315 1315 case TCP_ESTABLISHED: 1316 - spin_lock_bh(&xprt->transport_lock); 1316 + spin_lock(&xprt->transport_lock); 1317 1317 if (!xprt_test_and_set_connected(xprt)) { 1318 1318 struct sock_xprt *transport = container_of(xprt, 1319 1319 struct sock_xprt, xprt); ··· 1327 1327 1328 1328 xprt_wake_pending_tasks(xprt, -EAGAIN); 1329 1329 } 1330 - spin_unlock_bh(&xprt->transport_lock); 1330 + spin_unlock(&xprt->transport_lock); 1331 1331 break; 1332 1332 case TCP_FIN_WAIT1: 1333 1333 /* The client initiated a shutdown of the socket */ ··· 1365 1365 xs_sock_mark_closed(xprt); 1366 1366 } 1367 1367 out: 1368 - read_unlock(&sk->sk_callback_lock); 1368 + read_unlock_bh(&sk->sk_callback_lock); 1369 1369 } 1370 1370 1371 1371 /** ··· 1376 1376 { 1377 1377 struct rpc_xprt *xprt; 1378 1378 1379 - read_lock(&sk->sk_callback_lock); 1379 + read_lock_bh(&sk->sk_callback_lock); 1380 1380 if (!(xprt = xprt_from_sock(sk))) 1381 1381 goto out; 1382 1382 dprintk("RPC: %s client %p...\n" ··· 1384 1384 __func__, xprt, sk->sk_err); 1385 1385 xprt_wake_pending_tasks(xprt, -EAGAIN); 1386 1386 out: 1387 - read_unlock(&sk->sk_callback_lock); 1387 + read_unlock_bh(&sk->sk_callback_lock); 1388 1388 } 1389 1389 1390 1390 static void xs_write_space(struct sock *sk) ··· 1416 1416 */ 1417 1417 static void xs_udp_write_space(struct sock *sk) 1418 1418 { 1419 - read_lock(&sk->sk_callback_lock); 1419 + read_lock_bh(&sk->sk_callback_lock); 1420 1420 1421 1421 /* from net/core/sock.c:sock_def_write_space */ 1422 1422 if (sock_writeable(sk)) 1423 1423 xs_write_space(sk); 1424 1424 1425 - read_unlock(&sk->sk_callback_lock); 1425 + read_unlock_bh(&sk->sk_callback_lock); 1426 1426 } 1427 1427 1428 1428 /** ··· 1437 1437 */ 1438 1438 static void xs_tcp_write_space(struct sock *sk) 1439 1439 { 1440 - read_lock(&sk->sk_callback_lock); 1440 + read_lock_bh(&sk->sk_callback_lock); 1441 1441 1442 1442 /* from net/core/stream.c:sk_stream_write_space */ 1443 1443 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) 1444 1444 xs_write_space(sk); 1445 1445 1446 - read_unlock(&sk->sk_callback_lock); 1446 + read_unlock_bh(&sk->sk_callback_lock); 1447 1447 } 1448 1448 1449 1449 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)