Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

inetpeer: Move ICMP rate limiting state into inet_peer entries.

Like metrics, the ICMP rate limiting bits are cached state about
a destination. So move it into the inet_peer entries.

If an inet_peer cannot be bound (the reason is memory allocation
failure or similar), the policy is to allow.

Signed-off-by: David S. Miller <davem@davemloft.net>

+108 -73
-2
include/net/dst.h
··· 78 78 atomic_t __refcnt; /* client references */ 79 79 int __use; 80 80 unsigned long lastuse; 81 - unsigned long rate_last; /* rate limiting for ICMP */ 82 - unsigned int rate_tokens; 83 81 int flags; 84 82 #define DST_HOST 0x0001 85 83 #define DST_NOXFRM 0x0002
-3
include/net/icmp.h
··· 45 45 extern int icmp_init(void); 46 46 extern void icmp_out_count(struct net *net, unsigned char type); 47 47 48 - /* Move into dst.h ? */ 49 - extern int xrlim_allow(struct dst_entry *dst, int timeout); 50 - 51 48 #endif /* _ICMP_H */
+3
include/net/inetpeer.h
··· 44 44 __u32 tcp_ts; 45 45 __u32 tcp_ts_stamp; 46 46 u32 metrics[RTAX_MAX]; 47 + u32 rate_tokens; /* rate limiting for ICMP */ 48 + unsigned long rate_last; 47 49 }; 48 50 struct rcu_head rcu; 49 51 }; ··· 83 81 84 82 /* can be called from BH context or outside */ 85 83 extern void inet_putpeer(struct inet_peer *p); 84 + extern bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout); 86 85 87 86 /* 88 87 * temporary check to make sure we dont access rid, ip_id_count, tcp_ts,
+8 -41
net/ipv4/icmp.c
··· 233 233 * Send an ICMP frame. 234 234 */ 235 235 236 - /* 237 - * Check transmit rate limitation for given message. 238 - * The rate information is held in the destination cache now. 239 - * This function is generic and could be used for other purposes 240 - * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. 241 - * 242 - * Note that the same dst_entry fields are modified by functions in 243 - * route.c too, but these work for packet destinations while xrlim_allow 244 - * works for icmp destinations. This means the rate limiting information 245 - * for one "ip object" is shared - and these ICMPs are twice limited: 246 - * by source and by destination. 247 - * 248 - * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate 249 - * SHOULD allow setting of rate limits 250 - * 251 - * Shared between ICMPv4 and ICMPv6. 252 - */ 253 - #define XRLIM_BURST_FACTOR 6 254 - int xrlim_allow(struct dst_entry *dst, int timeout) 255 - { 256 - unsigned long now, token = dst->rate_tokens; 257 - int rc = 0; 258 - 259 - now = jiffies; 260 - token += now - dst->rate_last; 261 - dst->rate_last = now; 262 - if (token > XRLIM_BURST_FACTOR * timeout) 263 - token = XRLIM_BURST_FACTOR * timeout; 264 - if (token >= timeout) { 265 - token -= timeout; 266 - rc = 1; 267 - } 268 - dst->rate_tokens = token; 269 - return rc; 270 - } 271 - EXPORT_SYMBOL(xrlim_allow); 272 - 273 - static inline int icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 236 + static inline bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, 274 237 int type, int code) 275 238 { 276 239 struct dst_entry *dst = &rt->dst; 277 - int rc = 1; 240 + bool rc = true; 278 241 279 242 if (type > NR_ICMP_TYPES) 280 243 goto out; ··· 251 288 goto out; 252 289 253 290 /* Limit if icmp type is enabled in ratemask. */ 254 - if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) 255 - rc = xrlim_allow(dst, net->ipv4.sysctl_icmp_ratelimit); 291 + if ((1 << type) & net->ipv4.sysctl_icmp_ratemask) { 292 + if (!rt->peer) 293 + rt_bind_peer(rt, 1); 294 + rc = inet_peer_xrlim_allow(rt->peer, 295 + net->ipv4.sysctl_icmp_ratelimit); 296 + } 256 297 out: 257 298 return rc; 258 299 }
+43
net/ipv4/inetpeer.c
··· 513 513 atomic_set(&p->ip_id_count, secure_ip_id(daddr->a4)); 514 514 p->tcp_ts_stamp = 0; 515 515 p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; 516 + p->rate_tokens = 0; 517 + p->rate_last = 0; 516 518 INIT_LIST_HEAD(&p->unused); 517 519 518 520 ··· 582 580 local_bh_enable(); 583 581 } 584 582 EXPORT_SYMBOL_GPL(inet_putpeer); 583 + 584 + /* 585 + * Check transmit rate limitation for given message. 586 + * The rate information is held in the inet_peer entries now. 587 + * This function is generic and could be used for other purposes 588 + * too. It uses a Token bucket filter as suggested by Alexey Kuznetsov. 589 + * 590 + * Note that the same inet_peer fields are modified by functions in 591 + * route.c too, but these work for packet destinations while xrlim_allow 592 + * works for icmp destinations. This means the rate limiting information 593 + * for one "ip object" is shared - and these ICMPs are twice limited: 594 + * by source and by destination. 595 + * 596 + * RFC 1812: 4.3.2.8 SHOULD be able to limit error message rate 597 + * SHOULD allow setting of rate limits 598 + * 599 + * Shared between ICMPv4 and ICMPv6. 600 + */ 601 + #define XRLIM_BURST_FACTOR 6 602 + bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) 603 + { 604 + unsigned long now, token; 605 + bool rc = false; 606 + 607 + if (!peer) 608 + return true; 609 + 610 + token = peer->rate_tokens; 611 + now = jiffies; 612 + token += now - peer->rate_last; 613 + peer->rate_last = now; 614 + if (token > XRLIM_BURST_FACTOR * timeout) 615 + token = XRLIM_BURST_FACTOR * timeout; 616 + if (token >= timeout) { 617 + token -= timeout; 618 + rc = true; 619 + } 620 + peer->rate_tokens = token; 621 + return rc; 622 + } 623 + EXPORT_SYMBOL(inet_peer_xrlim_allow);
+38 -18
net/ipv4/route.c
··· 1563 1563 { 1564 1564 struct rtable *rt = skb_rtable(skb); 1565 1565 struct in_device *in_dev; 1566 + struct inet_peer *peer; 1566 1567 int log_martians; 1567 1568 1568 1569 rcu_read_lock(); ··· 1575 1574 log_martians = IN_DEV_LOG_MARTIANS(in_dev); 1576 1575 rcu_read_unlock(); 1577 1576 1577 + if (!rt->peer) 1578 + rt_bind_peer(rt, 1); 1579 + peer = rt->peer; 1580 + if (!peer) { 1581 + icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1582 + return; 1583 + } 1584 + 1578 1585 /* No redirected packets during ip_rt_redirect_silence; 1579 1586 * reset the algorithm. 1580 1587 */ 1581 - if (time_after(jiffies, rt->dst.rate_last + ip_rt_redirect_silence)) 1582 - rt->dst.rate_tokens = 0; 1588 + if (time_after(jiffies, peer->rate_last + ip_rt_redirect_silence)) 1589 + peer->rate_tokens = 0; 1583 1590 1584 1591 /* Too many ignored redirects; do not send anything 1585 1592 * set dst.rate_last to the last seen redirected packet. 1586 1593 */ 1587 - if (rt->dst.rate_tokens >= ip_rt_redirect_number) { 1588 - rt->dst.rate_last = jiffies; 1594 + if (peer->rate_tokens >= ip_rt_redirect_number) { 1595 + peer->rate_last = jiffies; 1589 1596 return; 1590 1597 } 1591 1598 1592 1599 /* Check for load limit; set rate_last to the latest sent 1593 1600 * redirect. 1594 1601 */ 1595 - if (rt->dst.rate_tokens == 0 || 1602 + if (peer->rate_tokens == 0 || 1596 1603 time_after(jiffies, 1597 - (rt->dst.rate_last + 1598 - (ip_rt_redirect_load << rt->dst.rate_tokens)))) { 1604 + (peer->rate_last + 1605 + (ip_rt_redirect_load << peer->rate_tokens)))) { 1599 1606 icmp_send(skb, ICMP_REDIRECT, ICMP_REDIR_HOST, rt->rt_gateway); 1600 - rt->dst.rate_last = jiffies; 1601 - ++rt->dst.rate_tokens; 1607 + peer->rate_last = jiffies; 1608 + ++peer->rate_tokens; 1602 1609 #ifdef CONFIG_IP_ROUTE_VERBOSE 1603 1610 if (log_martians && 1604 - rt->dst.rate_tokens == ip_rt_redirect_number && 1611 + peer->rate_tokens == ip_rt_redirect_number && 1605 1612 net_ratelimit()) 1606 1613 printk(KERN_WARNING "host %pI4/if%d ignores redirects for %pI4 to %pI4.\n", 1607 1614 &rt->rt_src, rt->rt_iif, ··· 1621 1612 static int ip_error(struct sk_buff *skb) 1622 1613 { 1623 1614 struct rtable *rt = skb_rtable(skb); 1615 + struct inet_peer *peer; 1624 1616 unsigned long now; 1617 + bool send; 1625 1618 int code; 1626 1619 1627 1620 switch (rt->dst.error) { ··· 1643 1632 break; 1644 1633 } 1645 1634 1646 - now = jiffies; 1647 - rt->dst.rate_tokens += now - rt->dst.rate_last; 1648 - if (rt->dst.rate_tokens > ip_rt_error_burst) 1649 - rt->dst.rate_tokens = ip_rt_error_burst; 1650 - rt->dst.rate_last = now; 1651 - if (rt->dst.rate_tokens >= ip_rt_error_cost) { 1652 - rt->dst.rate_tokens -= ip_rt_error_cost; 1653 - icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1635 + if (!rt->peer) 1636 + rt_bind_peer(rt, 1); 1637 + peer = rt->peer; 1638 + 1639 + send = true; 1640 + if (peer) { 1641 + now = jiffies; 1642 + peer->rate_tokens += now - peer->rate_last; 1643 + if (peer->rate_tokens > ip_rt_error_burst) 1644 + peer->rate_tokens = ip_rt_error_burst; 1645 + peer->rate_last = now; 1646 + if (peer->rate_tokens >= ip_rt_error_cost) 1647 + peer->rate_tokens -= ip_rt_error_cost; 1648 + else 1649 + send = false; 1654 1650 } 1651 + if (send) 1652 + icmp_send(skb, ICMP_DEST_UNREACH, code, 0); 1655 1653 1656 1654 out: kfree_skb(skb); 1657 1655 return 0;
+9 -7
net/ipv6/icmp.c
··· 157 157 /* 158 158 * Check the ICMP output rate limit 159 159 */ 160 - static inline int icmpv6_xrlim_allow(struct sock *sk, u8 type, 161 - struct flowi *fl) 160 + static inline bool icmpv6_xrlim_allow(struct sock *sk, u8 type, 161 + struct flowi *fl) 162 162 { 163 163 struct dst_entry *dst; 164 164 struct net *net = sock_net(sk); 165 - int res = 0; 165 + bool res = false; 166 166 167 167 /* Informational messages are not limited. */ 168 168 if (type & ICMPV6_INFOMSG_MASK) 169 - return 1; 169 + return true; 170 170 171 171 /* Do not limit pmtu discovery, it would break it. */ 172 172 if (type == ICMPV6_PKT_TOOBIG) 173 - return 1; 173 + return true; 174 174 175 175 /* 176 176 * Look up the output route. ··· 182 182 IP6_INC_STATS(net, ip6_dst_idev(dst), 183 183 IPSTATS_MIB_OUTNOROUTES); 184 184 } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) { 185 - res = 1; 185 + res = true; 186 186 } else { 187 187 struct rt6_info *rt = (struct rt6_info *)dst; 188 188 int tmo = net->ipv6.sysctl.icmpv6_time; ··· 191 191 if (rt->rt6i_dst.plen < 128) 192 192 tmo >>= ((128 - rt->rt6i_dst.plen)>>5); 193 193 194 - res = xrlim_allow(dst, tmo); 194 + if (!rt->rt6i_peer) 195 + rt6_bind_peer(rt, 1); 196 + res = inet_peer_xrlim_allow(rt->rt6i_peer, tmo); 195 197 } 196 198 dst_release(dst); 197 199 return res;
+4 -1
net/ipv6/ip6_output.c
··· 479 479 else 480 480 target = &hdr->daddr; 481 481 482 + if (!rt->rt6i_peer) 483 + rt6_bind_peer(rt, 1); 484 + 482 485 /* Limit redirects both by destination (here) 483 486 and by source (inside ndisc_send_redirect) 484 487 */ 485 - if (xrlim_allow(dst, 1*HZ)) 488 + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 486 489 ndisc_send_redirect(skb, n, target); 487 490 } else { 488 491 int addrtype = ipv6_addr_type(&hdr->saddr);
+3 -1
net/ipv6/ndisc.c
··· 1553 1553 "ICMPv6 Redirect: destination is not a neighbour.\n"); 1554 1554 goto release; 1555 1555 } 1556 - if (!xrlim_allow(dst, 1*HZ)) 1556 + if (!rt->rt6i_peer) 1557 + rt6_bind_peer(rt, 1); 1558 + if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ)) 1557 1559 goto release; 1558 1560 1559 1561 if (dev->addr_len) {