Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

pppoe: remove rwlock usage

Like ppp_generic.c, convert the PPPoE socket hash table to use RCU for
lookups and a spinlock for updates. This removes rwlock usage and allows
lockless readers on the fast path.

- Mark hash table and list pointers as __rcu.
- Use spin_lock() to protect writers.
- Readers use rcu_dereference() under rcu_read_lock(). All known callers
of get_item() already hold the RCU read lock, so no additional locking
is needed.
- get_item() now uses refcount_inc_not_zero() instead of sock_hold() to
safely take a reference. This prevents crashes if a socket is already
in the process of being freed (sk_refcnt == 0).
- Set SOCK_RCU_FREE to defer socket freeing until after an RCU grace
period.
- Move skb_queue_purge() into sk_destruct callback to ensure purge
happens after an RCU grace period.

Signed-off-by: Qingfang Deng <dqfext@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Link: https://patch.msgid.link/20250828012018.15922-1-dqfext@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Qingfang Deng and committed by
Jakub Kicinski
72cdc67e d23ad54d

+54 -42
+53 -41
drivers/net/ppp/pppoe.c
··· 100 100 * as well, moreover in case of SMP less locking 101 101 * controversy here 102 102 */ 103 - struct pppox_sock *hash_table[PPPOE_HASH_SIZE]; 104 - rwlock_t hash_lock; 103 + struct pppox_sock __rcu *hash_table[PPPOE_HASH_SIZE]; 104 + spinlock_t hash_lock; 105 105 }; 106 106 107 107 /* ··· 162 162 int hash = hash_item(sid, addr); 163 163 struct pppox_sock *ret; 164 164 165 - ret = pn->hash_table[hash]; 165 + ret = rcu_dereference(pn->hash_table[hash]); 166 166 while (ret) { 167 167 if (cmp_addr(&ret->pppoe_pa, sid, addr) && 168 168 ret->pppoe_ifindex == ifindex) 169 169 return ret; 170 170 171 - ret = ret->next; 171 + ret = rcu_dereference(ret->next); 172 172 } 173 173 174 174 return NULL; ··· 177 177 static int __set_item(struct pppoe_net *pn, struct pppox_sock *po) 178 178 { 179 179 int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); 180 - struct pppox_sock *ret; 180 + struct pppox_sock *ret, *first; 181 181 182 - ret = pn->hash_table[hash]; 182 + first = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); 183 + ret = first; 183 184 while (ret) { 184 185 if (cmp_2_addr(&ret->pppoe_pa, &po->pppoe_pa) && 185 186 ret->pppoe_ifindex == po->pppoe_ifindex) 186 187 return -EALREADY; 187 188 188 - ret = ret->next; 189 + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); 189 190 } 190 191 191 - po->next = pn->hash_table[hash]; 192 - pn->hash_table[hash] = po; 192 + RCU_INIT_POINTER(po->next, first); 193 + rcu_assign_pointer(pn->hash_table[hash], po); 193 194 194 195 return 0; 195 196 } ··· 199 198 char *addr, int ifindex) 200 199 { 201 200 int hash = hash_item(sid, addr); 202 - struct pppox_sock *ret, **src; 201 + struct pppox_sock *ret, __rcu **src; 203 202 204 - ret = pn->hash_table[hash]; 203 + ret = rcu_dereference_protected(pn->hash_table[hash], lockdep_is_held(&pn->hash_lock)); 205 204 src = &pn->hash_table[hash]; 206 205 207 206 while (ret) { 208 207 if (cmp_addr(&ret->pppoe_pa, sid, addr) && 209 208 ret->pppoe_ifindex == ifindex) { 210 - *src = ret->next; 209 + struct pppox_sock *next; 210 + 211 + next = rcu_dereference_protected(ret->next, 212 + lockdep_is_held(&pn->hash_lock)); 213 + rcu_assign_pointer(*src, next); 211 214 break; 212 215 } 213 216 214 217 src = &ret->next; 215 - ret = ret->next; 218 + ret = rcu_dereference_protected(ret->next, lockdep_is_held(&pn->hash_lock)); 216 219 } 217 220 } 218 221 ··· 230 225 { 231 226 struct pppox_sock *po; 232 227 233 - read_lock_bh(&pn->hash_lock); 234 228 po = __get_item(pn, sid, addr, ifindex); 235 - if (po) 236 - sock_hold(sk_pppox(po)); 237 - read_unlock_bh(&pn->hash_lock); 229 + if (po && !refcount_inc_not_zero(&sk_pppox(po)->sk_refcnt)) 230 + po = NULL; 238 231 239 232 return po; 240 233 } ··· 261 258 static inline void delete_item(struct pppoe_net *pn, __be16 sid, 262 259 char *addr, int ifindex) 263 260 { 264 - write_lock_bh(&pn->hash_lock); 261 + spin_lock(&pn->hash_lock); 265 262 __delete_item(pn, sid, addr, ifindex); 266 - write_unlock_bh(&pn->hash_lock); 263 + spin_unlock(&pn->hash_lock); 267 264 } 268 265 269 266 /*************************************************************************** ··· 279 276 int i; 280 277 281 278 pn = pppoe_pernet(dev_net(dev)); 282 - write_lock_bh(&pn->hash_lock); 279 + spin_lock(&pn->hash_lock); 283 280 for (i = 0; i < PPPOE_HASH_SIZE; i++) { 284 - struct pppox_sock *po = pn->hash_table[i]; 281 + struct pppox_sock *po = rcu_dereference_protected(pn->hash_table[i], 282 + lockdep_is_held(&pn->hash_lock)); 285 283 struct sock *sk; 286 284 287 285 while (po) { 288 286 while (po && po->pppoe_dev != dev) { 289 - po = po->next; 287 + po = rcu_dereference_protected(po->next, 288 + lockdep_is_held(&pn->hash_lock)); 290 289 } 291 290 292 291 if (!po) ··· 305 300 */ 306 301 307 302 sock_hold(sk); 308 - write_unlock_bh(&pn->hash_lock); 303 + spin_unlock(&pn->hash_lock); 309 304 lock_sock(sk); 310 305 311 306 if (po->pppoe_dev == dev && ··· 325 320 */ 326 321 327 322 BUG_ON(pppoe_pernet(dev_net(dev)) == NULL); 328 - write_lock_bh(&pn->hash_lock); 329 - po = pn->hash_table[i]; 323 + spin_lock(&pn->hash_lock); 324 + po = rcu_dereference_protected(pn->hash_table[i], 325 + lockdep_is_held(&pn->hash_lock)); 330 326 } 331 327 } 332 - write_unlock_bh(&pn->hash_lock); 328 + spin_unlock(&pn->hash_lock); 333 329 } 334 330 335 331 static int pppoe_device_event(struct notifier_block *this, ··· 534 528 .obj_size = sizeof(struct pppox_sock), 535 529 }; 536 530 531 + static void pppoe_destruct(struct sock *sk) 532 + { 533 + skb_queue_purge(&sk->sk_receive_queue); 534 + } 535 + 537 536 /*********************************************************************** 538 537 * 539 538 * Initialize a new struct sock. ··· 553 542 return -ENOMEM; 554 543 555 544 sock_init_data(sock, sk); 545 + sock_set_flag(sk, SOCK_RCU_FREE); 556 546 557 547 sock->state = SS_UNCONNECTED; 558 548 sock->ops = &pppoe_ops; 559 549 560 550 sk->sk_backlog_rcv = pppoe_rcv_core; 551 + sk->sk_destruct = pppoe_destruct; 561 552 sk->sk_state = PPPOX_NONE; 562 553 sk->sk_type = SOCK_STREAM; 563 554 sk->sk_family = PF_PPPOX; ··· 612 599 sock_orphan(sk); 613 600 sock->sk = NULL; 614 601 615 - skb_queue_purge(&sk->sk_receive_queue); 616 602 release_sock(sk); 617 603 sock_put(sk); 618 604 ··· 693 681 &sp->sa_addr.pppoe, 694 682 sizeof(struct pppoe_addr)); 695 683 696 - write_lock_bh(&pn->hash_lock); 684 + spin_lock(&pn->hash_lock); 697 685 error = __set_item(pn, po); 698 - write_unlock_bh(&pn->hash_lock); 686 + spin_unlock(&pn->hash_lock); 699 687 if (error < 0) 700 688 goto err_put; 701 689 ··· 1064 1052 int i; 1065 1053 1066 1054 for (i = 0; i < PPPOE_HASH_SIZE; i++) { 1067 - po = pn->hash_table[i]; 1055 + po = rcu_dereference(pn->hash_table[i]); 1068 1056 while (po) { 1069 1057 if (!pos--) 1070 1058 goto out; 1071 - po = po->next; 1059 + po = rcu_dereference(po->next); 1072 1060 } 1073 1061 } 1074 1062 ··· 1077 1065 } 1078 1066 1079 1067 static void *pppoe_seq_start(struct seq_file *seq, loff_t *pos) 1080 - __acquires(pn->hash_lock) 1068 + __acquires(RCU) 1081 1069 { 1082 1070 struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); 1083 1071 loff_t l = *pos; 1084 1072 1085 - read_lock_bh(&pn->hash_lock); 1073 + rcu_read_lock(); 1086 1074 return l ? pppoe_get_idx(pn, --l) : SEQ_START_TOKEN; 1087 1075 } 1088 1076 1089 1077 static void *pppoe_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1090 1078 { 1091 1079 struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); 1092 - struct pppox_sock *po; 1080 + struct pppox_sock *po, *next; 1093 1081 1094 1082 ++*pos; 1095 1083 if (v == SEQ_START_TOKEN) { ··· 1097 1085 goto out; 1098 1086 } 1099 1087 po = v; 1100 - if (po->next) 1101 - po = po->next; 1088 + next = rcu_dereference(po->next); 1089 + if (next) 1090 + po = next; 1102 1091 else { 1103 1092 int hash = hash_item(po->pppoe_pa.sid, po->pppoe_pa.remote); 1104 1093 1105 1094 po = NULL; 1106 1095 while (++hash < PPPOE_HASH_SIZE) { 1107 - po = pn->hash_table[hash]; 1096 + po = rcu_dereference(pn->hash_table[hash]); 1108 1097 if (po) 1109 1098 break; 1110 1099 } ··· 1116 1103 } 1117 1104 1118 1105 static void pppoe_seq_stop(struct seq_file *seq, void *v) 1119 - __releases(pn->hash_lock) 1106 + __releases(RCU) 1120 1107 { 1121 - struct pppoe_net *pn = pppoe_pernet(seq_file_net(seq)); 1122 - read_unlock_bh(&pn->hash_lock); 1108 + rcu_read_unlock(); 1123 1109 } 1124 1110 1125 1111 static const struct seq_operations pppoe_seq_ops = { ··· 1161 1149 struct pppoe_net *pn = pppoe_pernet(net); 1162 1150 struct proc_dir_entry *pde; 1163 1151 1164 - rwlock_init(&pn->hash_lock); 1152 + spin_lock_init(&pn->hash_lock); 1165 1153 1166 1154 pde = proc_create_net("pppoe", 0444, net->proc_net, 1167 1155 &pppoe_seq_ops, sizeof(struct seq_net_private));
+1 -1
include/linux/if_pppox.h
··· 43 43 /* struct sock must be the first member of pppox_sock */ 44 44 struct sock sk; 45 45 struct ppp_channel chan; 46 - struct pppox_sock *next; /* for hash table */ 46 + struct pppox_sock __rcu *next; /* for hash table */ 47 47 union { 48 48 struct pppoe_opt pppoe; 49 49 struct pptp_opt pptp;