Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

net: add proper RCU protection to /proc/net/ptype

Yin Fengwei reported an RCU stall in ptype_seq_show() and provided
a patch.

Real issue is that ptype_seq_next() and ptype_seq_show() violate
RCU rules.

ptype_seq_show() runs under rcu_read_lock(), and reads pt->dev
to get device name without any barrier.

At the same time, concurrent writers can remove a packet_type structure
(which is correctly freed after an RCU grace period) and clear pt->dev
without an RCU grace period.

Define ptype_iter_state to carry a dev pointer along seq_net_private:

struct ptype_iter_state {
struct seq_net_private p;
struct net_device *dev; // added in this patch
};

We need to record the device pointer in ptype_get_idx() and
ptype_seq_next() so that ptype_seq_show() is safe against
concurrent pt->dev changes.

We also need to add full RCU protection in ptype_seq_next().
(Missing READ_ONCE() when reading list.next values)

Many thanks to Dong Chenchen for providing a repro.

Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
Fixes: 1d10f8a1f40b ("net-procfs: show net devices bound packet types")
Fixes: c353e8983e0d ("net: introduce per netns packet chains")
Reported-by: Yin Fengwei <fengwei_yin@linux.alibaba.com>
Reported-by: Dong Chenchen <dongchenchen2@huawei.com>
Closes: https://lore.kernel.org/netdev/CANn89iKRRKPnWjJmb-_3a=sq+9h6DvTQM4DBZHT5ZRGPMzQaiA@mail.gmail.com/T/#m7b80b9fc9b9267f90e0b7aad557595f686f9c50d

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Tested-by: Yin Fengwei <fengwei_yin@linux.alibaba.com>
Link: https://patch.msgid.link/20260202205217.2881198-1-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

authored by

Eric Dumazet and committed by
Jakub Kicinski
f613e8b4 78211543

+34 -16
+34 -16
net/core/net-procfs.c
··· 170 170 .show = softnet_seq_show, 171 171 }; 172 172 173 + struct ptype_iter_state { 174 + struct seq_net_private p; 175 + struct net_device *dev; 176 + }; 177 + 173 178 static void *ptype_get_idx(struct seq_file *seq, loff_t pos) 174 179 { 180 + struct ptype_iter_state *iter = seq->private; 175 181 struct list_head *ptype_list = NULL; 176 182 struct packet_type *pt = NULL; 177 183 struct net_device *dev; ··· 187 181 for_each_netdev_rcu(seq_file_net(seq), dev) { 188 182 ptype_list = &dev->ptype_all; 189 183 list_for_each_entry_rcu(pt, ptype_list, list) { 190 - if (i == pos) 184 + if (i == pos) { 185 + iter->dev = dev; 191 186 return pt; 187 + } 192 188 ++i; 193 189 } 194 190 } 191 + 192 + iter->dev = NULL; 195 193 196 194 list_for_each_entry_rcu(pt, &seq_file_net(seq)->ptype_all, list) { 197 195 if (i == pos) ··· 228 218 229 219 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos) 230 220 { 221 + struct ptype_iter_state *iter = seq->private; 231 222 struct net *net = seq_file_net(seq); 232 223 struct net_device *dev; 233 224 struct packet_type *pt; ··· 240 229 return ptype_get_idx(seq, 0); 241 230 242 231 pt = v; 243 - nxt = pt->list.next; 244 - if (pt->dev) { 245 - if (nxt != &pt->dev->ptype_all) 232 + nxt = READ_ONCE(pt->list.next); 233 + dev = iter->dev; 234 + if (dev) { 235 + if (nxt != &dev->ptype_all) 246 236 goto found; 247 237 248 - dev = pt->dev; 249 238 for_each_netdev_continue_rcu(seq_file_net(seq), dev) { 250 - if (!list_empty(&dev->ptype_all)) { 251 - nxt = dev->ptype_all.next; 239 + nxt = READ_ONCE(dev->ptype_all.next); 240 + if (nxt != &dev->ptype_all) { 241 + iter->dev = dev; 252 242 goto found; 253 243 } 254 244 } 255 - nxt = net->ptype_all.next; 245 + iter->dev = NULL; 246 + nxt = READ_ONCE(net->ptype_all.next); 256 247 goto net_ptype_all; 257 248 } 258 249 ··· 265 252 266 253 if (nxt == &net->ptype_all) { 267 254 /* continue with ->ptype_specific if it's not empty */ 268 - nxt = net->ptype_specific.next; 255 + nxt = READ_ONCE(net->ptype_specific.next); 269 256 if (nxt != &net->ptype_specific) 270 257 goto found; 271 258 } 272 259 273 260 hash = 0; 274 - nxt = ptype_base[0].next; 261 + nxt = READ_ONCE(ptype_base[0].next); 275 262 } else 276 263 hash = ntohs(pt->type) & PTYPE_HASH_MASK; 277 264 278 265 while (nxt == &ptype_base[hash]) { 279 266 if (++hash >= PTYPE_HASH_SIZE) 280 267 return NULL; 281 - nxt = ptype_base[hash].next; 268 + nxt = READ_ONCE(ptype_base[hash].next); 282 269 } 283 270 found: 284 271 return list_entry(nxt, struct packet_type, list); ··· 292 279 293 280 static int ptype_seq_show(struct seq_file *seq, void *v) 294 281 { 282 + struct ptype_iter_state *iter = seq->private; 295 283 struct packet_type *pt = v; 284 + struct net_device *dev; 296 285 297 - if (v == SEQ_START_TOKEN) 286 + if (v == SEQ_START_TOKEN) { 298 287 seq_puts(seq, "Type Device Function\n"); 299 - else if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && 300 - (!pt->dev || net_eq(dev_net(pt->dev), seq_file_net(seq)))) { 288 + return 0; 289 + } 290 + dev = iter->dev; 291 + if ((!pt->af_packet_net || net_eq(pt->af_packet_net, seq_file_net(seq))) && 292 + (!dev || net_eq(dev_net(dev), seq_file_net(seq)))) { 301 293 if (pt->type == htons(ETH_P_ALL)) 302 294 seq_puts(seq, "ALL "); 303 295 else 304 296 seq_printf(seq, "%04x", ntohs(pt->type)); 305 297 306 298 seq_printf(seq, " %-8s %ps\n", 307 - pt->dev ? pt->dev->name : "", pt->func); 299 + dev ? dev->name : "", pt->func); 308 300 } 309 301 310 302 return 0; ··· 333 315 &softnet_seq_ops)) 334 316 goto out_dev; 335 317 if (!proc_create_net("ptype", 0444, net->proc_net, &ptype_seq_ops, 336 - sizeof(struct seq_net_private))) 318 + sizeof(struct ptype_iter_state))) 337 319 goto out_softnet; 338 320 339 321 if (wext_proc_init(net))