Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inet: frag: don't wait for timer deletion when evicting

Frank reports 'NMI watchdog: BUG: soft lockup' errors when
load is high. Instead of (potentially) unbounded restarts of the
eviction process, just skip to the next entry.

One caveat is that, when a netns is exiting, a timer may still be running
by the time inet_evict_bucket returns.

We use the frag memory accounting to wait for outstanding timers,
so that when we free the percpu counter we can be sure no running
timer will trip over it.

Reported-and-tested-by: Frank Schreuder <fschreuder@transip.nl>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>

authored by

Florian Westphal and committed by
David S. Miller
5719b296 0e60d245

+11 -18
+11 -18
net/ipv4/inet_fragment.c
··· 131 131 unsigned int evicted = 0; 132 132 HLIST_HEAD(expired); 133 133 134 - evict_again: 135 134 spin_lock(&hb->chain_lock); 136 135 137 136 hlist_for_each_entry_safe(fq, n, &hb->chain, list) { 138 137 if (!inet_fragq_should_evict(fq)) 139 138 continue; 140 139 141 - if (!del_timer(&fq->timer)) { 142 - /* q expiring right now thus increment its refcount so 143 - * it won't be freed under us and wait until the timer 144 - * has finished executing then destroy it 145 - */ 146 - atomic_inc(&fq->refcnt); 147 - spin_unlock(&hb->chain_lock); 148 - del_timer_sync(&fq->timer); 149 - inet_frag_put(fq, f); 150 - goto evict_again; 151 - } 140 + if (!del_timer(&fq->timer)) 141 + continue; 152 142 153 143 fq->flags |= INET_FRAG_EVICTED; 154 144 hlist_add_head(&fq->list_evictor, &expired); ··· 229 239 int i; 230 240 231 241 nf->low_thresh = 0; 232 - local_bh_disable(); 233 242 234 243 evict_again: 244 + local_bh_disable(); 235 245 seq = read_seqbegin(&f->rnd_seqlock); 236 246 237 247 for (i = 0; i < INETFRAGS_HASHSZ ; i++) 238 248 inet_evict_bucket(f, &f->hash[i]); 239 249 240 - if (read_seqretry(&f->rnd_seqlock, seq)) 241 - goto evict_again; 242 - 243 250 local_bh_enable(); 251 + cond_resched(); 252 + 253 + if (read_seqretry(&f->rnd_seqlock, seq) || 254 + percpu_counter_sum(&nf->mem)) 255 + goto evict_again; 244 256 245 257 percpu_counter_destroy(&nf->mem); 246 258 } ··· 276 284 277 285 hb = get_frag_bucket_locked(fq, f); 278 286 hlist_del(&fq->list); 287 + fq->flags |= INET_FRAG_COMPLETE; 279 288 spin_unlock(&hb->chain_lock); 280 289 } 281 290 ··· 288 295 if (!(fq->flags & INET_FRAG_COMPLETE)) { 289 296 fq_unlink(fq, f); 290 297 atomic_dec(&fq->refcnt); 291 - fq->flags |= INET_FRAG_COMPLETE; 292 298 } 293 299 } 294 300 EXPORT_SYMBOL(inet_frag_kill); ··· 320 328 fp = xp; 321 329 } 322 330 sum = sum_truesize + f->qsize; 323 - sub_frag_mem_limit(q->net, sum); 324 331 325 332 if (f->destructor) 326 333 f->destructor(q); 327 334 kmem_cache_free(f->frags_cachep, q); 335 + 336 + sub_frag_mem_limit(nf, sum); 328 337 } 329 338 EXPORT_SYMBOL(inet_frag_destroy); 330 339