Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at v2.6.19-rc3 1844 lines 46 kB view raw
1/* 2 * DECnet An implementation of the DECnet protocol suite for the LINUX 3 * operating system. DECnet is implemented using the BSD Socket 4 * interface as the means of communication with the user level. 5 * 6 * DECnet Routing Functions (Endnode and Router) 7 * 8 * Authors: Steve Whitehouse <SteveW@ACM.org> 9 * Eduardo Marcelo Serrat <emserrat@geocities.com> 10 * 11 * Changes: 12 * Steve Whitehouse : Fixes to allow "intra-ethernet" and 13 * "return-to-sender" bits on outgoing 14 * packets. 15 * Steve Whitehouse : Timeouts for cached routes. 16 * Steve Whitehouse : Use dst cache for input routes too. 17 * Steve Whitehouse : Fixed error values in dn_send_skb. 18 * Steve Whitehouse : Rework routing functions to better fit 19 * DECnet routing design 20 * Alexey Kuznetsov : New SMP locking 21 * Steve Whitehouse : More SMP locking changes & dn_cache_dump() 22 * Steve Whitehouse : Prerouting NF hook, now really is prerouting. 23 * Fixed possible skb leak in rtnetlink funcs. 24 * Steve Whitehouse : Dave Miller's dynamic hash table sizing and 25 * Alexey Kuznetsov's finer grained locking 26 * from ipv4/route.c. 27 * Steve Whitehouse : Routing is now starting to look like a 28 * sensible set of code now, mainly due to 29 * my copying the IPv4 routing code. The 30 * hooks here are modified and will continue 31 * to evolve for a while. 32 * Steve Whitehouse : Real SMP at last :-) Also new netfilter 33 * stuff. Look out raw sockets your days 34 * are numbered! 35 * Steve Whitehouse : Added return-to-sender functions. Added 36 * backlog congestion level return codes. 37 * Steve Whitehouse : Fixed bug where routes were set up with 38 * no ref count on net devices. 39 * Steve Whitehouse : RCU for the route cache 40 * Steve Whitehouse : Preparations for the flow cache 41 * Steve Whitehouse : Prepare for nonlinear skbs 42 */ 43 44/****************************************************************************** 45 (c) 1995-1998 E.M. Serrat emserrat@geocities.com 46 47 This program is free software; you can redistribute it and/or modify 48 it under the terms of the GNU General Public License as published by 49 the Free Software Foundation; either version 2 of the License, or 50 any later version. 51 52 This program is distributed in the hope that it will be useful, 53 but WITHOUT ANY WARRANTY; without even the implied warranty of 54 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 55 GNU General Public License for more details. 56*******************************************************************************/ 57 58#include <linux/errno.h> 59#include <linux/types.h> 60#include <linux/socket.h> 61#include <linux/in.h> 62#include <linux/kernel.h> 63#include <linux/sockios.h> 64#include <linux/net.h> 65#include <linux/netdevice.h> 66#include <linux/inet.h> 67#include <linux/route.h> 68#include <linux/in_route.h> 69#include <net/sock.h> 70#include <linux/mm.h> 71#include <linux/proc_fs.h> 72#include <linux/seq_file.h> 73#include <linux/init.h> 74#include <linux/rtnetlink.h> 75#include <linux/string.h> 76#include <linux/netfilter_decnet.h> 77#include <linux/rcupdate.h> 78#include <linux/times.h> 79#include <asm/errno.h> 80#include <net/neighbour.h> 81#include <net/dst.h> 82#include <net/flow.h> 83#include <net/fib_rules.h> 84#include <net/dn.h> 85#include <net/dn_dev.h> 86#include <net/dn_nsp.h> 87#include <net/dn_route.h> 88#include <net/dn_neigh.h> 89#include <net/dn_fib.h> 90 91struct dn_rt_hash_bucket 92{ 93 struct dn_route *chain; 94 spinlock_t lock; 95} __attribute__((__aligned__(8))); 96 97extern struct neigh_table dn_neigh_table; 98 99 100static unsigned char dn_hiord_addr[6] = {0xAA,0x00,0x04,0x00,0x00,0x00}; 101 102static const int dn_rt_min_delay = 2 * HZ; 103static const int dn_rt_max_delay = 10 * HZ; 104static const int dn_rt_mtu_expires = 10 * 60 * HZ; 105 106static unsigned long dn_rt_deadline; 107 108static int dn_dst_gc(void); 109static struct dst_entry *dn_dst_check(struct dst_entry *, __u32); 110static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); 111static void dn_dst_link_failure(struct sk_buff *); 112static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu); 113static int dn_route_input(struct sk_buff *); 114static void dn_run_flush(unsigned long dummy); 115 116static struct dn_rt_hash_bucket *dn_rt_hash_table; 117static unsigned dn_rt_hash_mask; 118 119static struct timer_list dn_route_timer; 120static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0); 121int decnet_dst_gc_interval = 2; 122 123static struct dst_ops dn_dst_ops = { 124 .family = PF_DECnet, 125 .protocol = __constant_htons(ETH_P_DNA_RT), 126 .gc_thresh = 128, 127 .gc = dn_dst_gc, 128 .check = dn_dst_check, 129 .negative_advice = dn_dst_negative_advice, 130 .link_failure = dn_dst_link_failure, 131 .update_pmtu = dn_dst_update_pmtu, 132 .entry_size = sizeof(struct dn_route), 133 .entries = ATOMIC_INIT(0), 134}; 135 136static __inline__ unsigned dn_hash(__le16 src, __le16 dst) 137{ 138 __u16 tmp = (__u16 __force)(src ^ dst); 139 tmp ^= (tmp >> 3); 140 tmp ^= (tmp >> 5); 141 tmp ^= (tmp >> 10); 142 return dn_rt_hash_mask & (unsigned)tmp; 143} 144 145static inline void dnrt_free(struct dn_route *rt) 146{ 147 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 148} 149 150static inline void dnrt_drop(struct dn_route *rt) 151{ 152 dst_release(&rt->u.dst); 153 call_rcu_bh(&rt->u.dst.rcu_head, dst_rcu_free); 154} 155 156static void dn_dst_check_expire(unsigned long dummy) 157{ 158 int i; 159 struct dn_route *rt, **rtp; 160 unsigned long now = jiffies; 161 unsigned long expire = 120 * HZ; 162 163 for(i = 0; i <= dn_rt_hash_mask; i++) { 164 rtp = &dn_rt_hash_table[i].chain; 165 166 spin_lock(&dn_rt_hash_table[i].lock); 167 while((rt=*rtp) != NULL) { 168 if (atomic_read(&rt->u.dst.__refcnt) || 169 (now - rt->u.dst.lastuse) < expire) { 170 rtp = &rt->u.rt_next; 171 continue; 172 } 173 *rtp = rt->u.rt_next; 174 rt->u.rt_next = NULL; 175 dnrt_free(rt); 176 } 177 spin_unlock(&dn_rt_hash_table[i].lock); 178 179 if ((jiffies - now) > 0) 180 break; 181 } 182 183 mod_timer(&dn_route_timer, now + decnet_dst_gc_interval * HZ); 184} 185 186static int dn_dst_gc(void) 187{ 188 struct dn_route *rt, **rtp; 189 int i; 190 unsigned long now = jiffies; 191 unsigned long expire = 10 * HZ; 192 193 for(i = 0; i <= dn_rt_hash_mask; i++) { 194 195 spin_lock_bh(&dn_rt_hash_table[i].lock); 196 rtp = &dn_rt_hash_table[i].chain; 197 198 while((rt=*rtp) != NULL) { 199 if (atomic_read(&rt->u.dst.__refcnt) || 200 (now - rt->u.dst.lastuse) < expire) { 201 rtp = &rt->u.rt_next; 202 continue; 203 } 204 *rtp = rt->u.rt_next; 205 rt->u.rt_next = NULL; 206 dnrt_drop(rt); 207 break; 208 } 209 spin_unlock_bh(&dn_rt_hash_table[i].lock); 210 } 211 212 return 0; 213} 214 215/* 216 * The decnet standards don't impose a particular minimum mtu, what they 217 * do insist on is that the routing layer accepts a datagram of at least 218 * 230 bytes long. Here we have to subtract the routing header length from 219 * 230 to get the minimum acceptable mtu. If there is no neighbour, then we 220 * assume the worst and use a long header size. 221 * 222 * We update both the mtu and the advertised mss (i.e. the segment size we 223 * advertise to the other end). 224 */ 225static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu) 226{ 227 u32 min_mtu = 230; 228 struct dn_dev *dn = dst->neighbour ? 229 (struct dn_dev *)dst->neighbour->dev->dn_ptr : NULL; 230 231 if (dn && dn->use_long == 0) 232 min_mtu -= 6; 233 else 234 min_mtu -= 21; 235 236 if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) { 237 if (!(dst_metric_locked(dst, RTAX_MTU))) { 238 dst->metrics[RTAX_MTU-1] = mtu; 239 dst_set_expires(dst, dn_rt_mtu_expires); 240 } 241 if (!(dst_metric_locked(dst, RTAX_ADVMSS))) { 242 u32 mss = mtu - DN_MAX_NSP_DATA_HEADER; 243 if (dst->metrics[RTAX_ADVMSS-1] > mss) 244 dst->metrics[RTAX_ADVMSS-1] = mss; 245 } 246 } 247} 248 249/* 250 * When a route has been marked obsolete. (e.g. routing cache flush) 251 */ 252static struct dst_entry *dn_dst_check(struct dst_entry *dst, __u32 cookie) 253{ 254 return NULL; 255} 256 257static struct dst_entry *dn_dst_negative_advice(struct dst_entry *dst) 258{ 259 dst_release(dst); 260 return NULL; 261} 262 263static void dn_dst_link_failure(struct sk_buff *skb) 264{ 265 return; 266} 267 268static inline int compare_keys(struct flowi *fl1, struct flowi *fl2) 269{ 270 return ((fl1->nl_u.dn_u.daddr ^ fl2->nl_u.dn_u.daddr) | 271 (fl1->nl_u.dn_u.saddr ^ fl2->nl_u.dn_u.saddr) | 272#ifdef CONFIG_DECNET_ROUTE_FWMARK 273 (fl1->nl_u.dn_u.fwmark ^ fl2->nl_u.dn_u.fwmark) | 274#endif 275 (fl1->nl_u.dn_u.scope ^ fl2->nl_u.dn_u.scope) | 276 (fl1->oif ^ fl2->oif) | 277 (fl1->iif ^ fl2->iif)) == 0; 278} 279 280static int dn_insert_route(struct dn_route *rt, unsigned hash, struct dn_route **rp) 281{ 282 struct dn_route *rth, **rthp; 283 unsigned long now = jiffies; 284 285 rthp = &dn_rt_hash_table[hash].chain; 286 287 spin_lock_bh(&dn_rt_hash_table[hash].lock); 288 while((rth = *rthp) != NULL) { 289 if (compare_keys(&rth->fl, &rt->fl)) { 290 /* Put it first */ 291 *rthp = rth->u.rt_next; 292 rcu_assign_pointer(rth->u.rt_next, 293 dn_rt_hash_table[hash].chain); 294 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); 295 296 rth->u.dst.__use++; 297 dst_hold(&rth->u.dst); 298 rth->u.dst.lastuse = now; 299 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 300 301 dnrt_drop(rt); 302 *rp = rth; 303 return 0; 304 } 305 rthp = &rth->u.rt_next; 306 } 307 308 rcu_assign_pointer(rt->u.rt_next, dn_rt_hash_table[hash].chain); 309 rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); 310 311 dst_hold(&rt->u.dst); 312 rt->u.dst.__use++; 313 rt->u.dst.lastuse = now; 314 spin_unlock_bh(&dn_rt_hash_table[hash].lock); 315 *rp = rt; 316 return 0; 317} 318 319void dn_run_flush(unsigned long dummy) 320{ 321 int i; 322 struct dn_route *rt, *next; 323 324 for(i = 0; i < dn_rt_hash_mask; i++) { 325 spin_lock_bh(&dn_rt_hash_table[i].lock); 326 327 if ((rt = xchg(&dn_rt_hash_table[i].chain, NULL)) == NULL) 328 goto nothing_to_declare; 329 330 for(; rt; rt=next) { 331 next = rt->u.rt_next; 332 rt->u.rt_next = NULL; 333 dst_free((struct dst_entry *)rt); 334 } 335 336nothing_to_declare: 337 spin_unlock_bh(&dn_rt_hash_table[i].lock); 338 } 339} 340 341static DEFINE_SPINLOCK(dn_rt_flush_lock); 342 343void dn_rt_cache_flush(int delay) 344{ 345 unsigned long now = jiffies; 346 int user_mode = !in_interrupt(); 347 348 if (delay < 0) 349 delay = dn_rt_min_delay; 350 351 spin_lock_bh(&dn_rt_flush_lock); 352 353 if (del_timer(&dn_rt_flush_timer) && delay > 0 && dn_rt_deadline) { 354 long tmo = (long)(dn_rt_deadline - now); 355 356 if (user_mode && tmo < dn_rt_max_delay - dn_rt_min_delay) 357 tmo = 0; 358 359 if (delay > tmo) 360 delay = tmo; 361 } 362 363 if (delay <= 0) { 364 spin_unlock_bh(&dn_rt_flush_lock); 365 dn_run_flush(0); 366 return; 367 } 368 369 if (dn_rt_deadline == 0) 370 dn_rt_deadline = now + dn_rt_max_delay; 371 372 dn_rt_flush_timer.expires = now + delay; 373 add_timer(&dn_rt_flush_timer); 374 spin_unlock_bh(&dn_rt_flush_lock); 375} 376 377/** 378 * dn_return_short - Return a short packet to its sender 379 * @skb: The packet to return 380 * 381 */ 382static int dn_return_short(struct sk_buff *skb) 383{ 384 struct dn_skb_cb *cb; 385 unsigned char *ptr; 386 __le16 *src; 387 __le16 *dst; 388 __le16 tmp; 389 390 /* Add back headers */ 391 skb_push(skb, skb->data - skb->nh.raw); 392 393 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) 394 return NET_RX_DROP; 395 396 cb = DN_SKB_CB(skb); 397 /* Skip packet length and point to flags */ 398 ptr = skb->data + 2; 399 *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; 400 401 dst = (__le16 *)ptr; 402 ptr += 2; 403 src = (__le16 *)ptr; 404 ptr += 2; 405 *ptr = 0; /* Zero hop count */ 406 407 /* Swap source and destination */ 408 tmp = *src; 409 *src = *dst; 410 *dst = tmp; 411 412 skb->pkt_type = PACKET_OUTGOING; 413 dn_rt_finish_output(skb, NULL, NULL); 414 return NET_RX_SUCCESS; 415} 416 417/** 418 * dn_return_long - Return a long packet to its sender 419 * @skb: The long format packet to return 420 * 421 */ 422static int dn_return_long(struct sk_buff *skb) 423{ 424 struct dn_skb_cb *cb; 425 unsigned char *ptr; 426 unsigned char *src_addr, *dst_addr; 427 unsigned char tmp[ETH_ALEN]; 428 429 /* Add back all headers */ 430 skb_push(skb, skb->data - skb->nh.raw); 431 432 if ((skb = skb_unshare(skb, GFP_ATOMIC)) == NULL) 433 return NET_RX_DROP; 434 435 cb = DN_SKB_CB(skb); 436 /* Ignore packet length and point to flags */ 437 ptr = skb->data + 2; 438 439 /* Skip padding */ 440 if (*ptr & DN_RT_F_PF) { 441 char padlen = (*ptr & ~DN_RT_F_PF); 442 ptr += padlen; 443 } 444 445 *ptr++ = (cb->rt_flags & ~DN_RT_F_RQR) | DN_RT_F_RTS; 446 ptr += 2; 447 dst_addr = ptr; 448 ptr += 8; 449 src_addr = ptr; 450 ptr += 6; 451 *ptr = 0; /* Zero hop count */ 452 453 /* Swap source and destination */ 454 memcpy(tmp, src_addr, ETH_ALEN); 455 memcpy(src_addr, dst_addr, ETH_ALEN); 456 memcpy(dst_addr, tmp, ETH_ALEN); 457 458 skb->pkt_type = PACKET_OUTGOING; 459 dn_rt_finish_output(skb, dst_addr, src_addr); 460 return NET_RX_SUCCESS; 461} 462 463/** 464 * dn_route_rx_packet - Try and find a route for an incoming packet 465 * @skb: The packet to find a route for 466 * 467 * Returns: result of input function if route is found, error code otherwise 468 */ 469static int dn_route_rx_packet(struct sk_buff *skb) 470{ 471 struct dn_skb_cb *cb = DN_SKB_CB(skb); 472 int err; 473 474 if ((err = dn_route_input(skb)) == 0) 475 return dst_input(skb); 476 477 if (decnet_debug_level & 4) { 478 char *devname = skb->dev ? skb->dev->name : "???"; 479 struct dn_skb_cb *cb = DN_SKB_CB(skb); 480 printk(KERN_DEBUG 481 "DECnet: dn_route_rx_packet: rt_flags=0x%02x dev=%s len=%d src=0x%04hx dst=0x%04hx err=%d type=%d\n", 482 (int)cb->rt_flags, devname, skb->len, 483 dn_ntohs(cb->src), dn_ntohs(cb->dst), 484 err, skb->pkt_type); 485 } 486 487 if ((skb->pkt_type == PACKET_HOST) && (cb->rt_flags & DN_RT_F_RQR)) { 488 switch(cb->rt_flags & DN_RT_PKT_MSK) { 489 case DN_RT_PKT_SHORT: 490 return dn_return_short(skb); 491 case DN_RT_PKT_LONG: 492 return dn_return_long(skb); 493 } 494 } 495 496 kfree_skb(skb); 497 return NET_RX_DROP; 498} 499 500static int dn_route_rx_long(struct sk_buff *skb) 501{ 502 struct dn_skb_cb *cb = DN_SKB_CB(skb); 503 unsigned char *ptr = skb->data; 504 505 if (!pskb_may_pull(skb, 21)) /* 20 for long header, 1 for shortest nsp */ 506 goto drop_it; 507 508 skb_pull(skb, 20); 509 skb->h.raw = skb->data; 510 511 /* Destination info */ 512 ptr += 2; 513 cb->dst = dn_eth2dn(ptr); 514 if (memcmp(ptr, dn_hiord_addr, 4) != 0) 515 goto drop_it; 516 ptr += 6; 517 518 519 /* Source info */ 520 ptr += 2; 521 cb->src = dn_eth2dn(ptr); 522 if (memcmp(ptr, dn_hiord_addr, 4) != 0) 523 goto drop_it; 524 ptr += 6; 525 /* Other junk */ 526 ptr++; 527 cb->hops = *ptr++; /* Visit Count */ 528 529 return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet); 530 531drop_it: 532 kfree_skb(skb); 533 return NET_RX_DROP; 534} 535 536 537 538static int dn_route_rx_short(struct sk_buff *skb) 539{ 540 struct dn_skb_cb *cb = DN_SKB_CB(skb); 541 unsigned char *ptr = skb->data; 542 543 if (!pskb_may_pull(skb, 6)) /* 5 for short header + 1 for shortest nsp */ 544 goto drop_it; 545 546 skb_pull(skb, 5); 547 skb->h.raw = skb->data; 548 549 cb->dst = *(__le16 *)ptr; 550 ptr += 2; 551 cb->src = *(__le16 *)ptr; 552 ptr += 2; 553 cb->hops = *ptr & 0x3f; 554 555 return NF_HOOK(PF_DECnet, NF_DN_PRE_ROUTING, skb, skb->dev, NULL, dn_route_rx_packet); 556 557drop_it: 558 kfree_skb(skb); 559 return NET_RX_DROP; 560} 561 562static int dn_route_discard(struct sk_buff *skb) 563{ 564 /* 565 * I know we drop the packet here, but thats considered success in 566 * this case 567 */ 568 kfree_skb(skb); 569 return NET_RX_SUCCESS; 570} 571 572static int dn_route_ptp_hello(struct sk_buff *skb) 573{ 574 dn_dev_hello(skb); 575 dn_neigh_pointopoint_hello(skb); 576 return NET_RX_SUCCESS; 577} 578 579int dn_route_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) 580{ 581 struct dn_skb_cb *cb; 582 unsigned char flags = 0; 583 __u16 len = dn_ntohs(*(__le16 *)skb->data); 584 struct dn_dev *dn = (struct dn_dev *)dev->dn_ptr; 585 unsigned char padlen = 0; 586 587 if (dn == NULL) 588 goto dump_it; 589 590 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) 591 goto out; 592 593 if (!pskb_may_pull(skb, 3)) 594 goto dump_it; 595 596 skb_pull(skb, 2); 597 598 if (len > skb->len) 599 goto dump_it; 600 601 skb_trim(skb, len); 602 603 flags = *skb->data; 604 605 cb = DN_SKB_CB(skb); 606 cb->stamp = jiffies; 607 cb->iif = dev->ifindex; 608 609 /* 610 * If we have padding, remove it. 611 */ 612 if (flags & DN_RT_F_PF) { 613 padlen = flags & ~DN_RT_F_PF; 614 if (!pskb_may_pull(skb, padlen + 1)) 615 goto dump_it; 616 skb_pull(skb, padlen); 617 flags = *skb->data; 618 } 619 620 skb->nh.raw = skb->data; 621 622 /* 623 * Weed out future version DECnet 624 */ 625 if (flags & DN_RT_F_VER) 626 goto dump_it; 627 628 cb->rt_flags = flags; 629 630 if (decnet_debug_level & 1) 631 printk(KERN_DEBUG 632 "dn_route_rcv: got 0x%02x from %s [%d %d %d]\n", 633 (int)flags, (dev) ? dev->name : "???", len, skb->len, 634 padlen); 635 636 if (flags & DN_RT_PKT_CNTL) { 637 if (unlikely(skb_linearize(skb))) 638 goto dump_it; 639 640 switch(flags & DN_RT_CNTL_MSK) { 641 case DN_RT_PKT_INIT: 642 dn_dev_init_pkt(skb); 643 break; 644 case DN_RT_PKT_VERI: 645 dn_dev_veri_pkt(skb); 646 break; 647 } 648 649 if (dn->parms.state != DN_DEV_S_RU) 650 goto dump_it; 651 652 switch(flags & DN_RT_CNTL_MSK) { 653 case DN_RT_PKT_HELO: 654 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_route_ptp_hello); 655 656 case DN_RT_PKT_L1RT: 657 case DN_RT_PKT_L2RT: 658 return NF_HOOK(PF_DECnet, NF_DN_ROUTE, skb, skb->dev, NULL, dn_route_discard); 659 case DN_RT_PKT_ERTH: 660 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_router_hello); 661 662 case DN_RT_PKT_EEDH: 663 return NF_HOOK(PF_DECnet, NF_DN_HELLO, skb, skb->dev, NULL, dn_neigh_endnode_hello); 664 } 665 } else { 666 if (dn->parms.state != DN_DEV_S_RU) 667 goto dump_it; 668 669 skb_pull(skb, 1); /* Pull flags */ 670 671 switch(flags & DN_RT_PKT_MSK) { 672 case DN_RT_PKT_LONG: 673 return dn_route_rx_long(skb); 674 case DN_RT_PKT_SHORT: 675 return dn_route_rx_short(skb); 676 } 677 } 678 679dump_it: 680 kfree_skb(skb); 681out: 682 return NET_RX_DROP; 683} 684 685static int dn_output(struct sk_buff *skb) 686{ 687 struct dst_entry *dst = skb->dst; 688 struct dn_route *rt = (struct dn_route *)dst; 689 struct net_device *dev = dst->dev; 690 struct dn_skb_cb *cb = DN_SKB_CB(skb); 691 struct neighbour *neigh; 692 693 int err = -EINVAL; 694 695 if ((neigh = dst->neighbour) == NULL) 696 goto error; 697 698 skb->dev = dev; 699 700 cb->src = rt->rt_saddr; 701 cb->dst = rt->rt_daddr; 702 703 /* 704 * Always set the Intra-Ethernet bit on all outgoing packets 705 * originated on this node. Only valid flag from upper layers 706 * is return-to-sender-requested. Set hop count to 0 too. 707 */ 708 cb->rt_flags &= ~DN_RT_F_RQR; 709 cb->rt_flags |= DN_RT_F_IE; 710 cb->hops = 0; 711 712 return NF_HOOK(PF_DECnet, NF_DN_LOCAL_OUT, skb, NULL, dev, neigh->output); 713 714error: 715 if (net_ratelimit()) 716 printk(KERN_DEBUG "dn_output: This should not happen\n"); 717 718 kfree_skb(skb); 719 720 return err; 721} 722 723static int dn_forward(struct sk_buff *skb) 724{ 725 struct dn_skb_cb *cb = DN_SKB_CB(skb); 726 struct dst_entry *dst = skb->dst; 727 struct dn_dev *dn_db = dst->dev->dn_ptr; 728 struct dn_route *rt; 729 struct neighbour *neigh = dst->neighbour; 730 int header_len; 731#ifdef CONFIG_NETFILTER 732 struct net_device *dev = skb->dev; 733#endif 734 735 if (skb->pkt_type != PACKET_HOST) 736 goto drop; 737 738 /* Ensure that we have enough space for headers */ 739 rt = (struct dn_route *)skb->dst; 740 header_len = dn_db->use_long ? 21 : 6; 741 if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+header_len)) 742 goto drop; 743 744 /* 745 * Hop count exceeded. 746 */ 747 if (++cb->hops > 30) 748 goto drop; 749 750 skb->dev = rt->u.dst.dev; 751 752 /* 753 * If packet goes out same interface it came in on, then set 754 * the Intra-Ethernet bit. This has no effect for short 755 * packets, so we don't need to test for them here. 756 */ 757 cb->rt_flags &= ~DN_RT_F_IE; 758 if (rt->rt_flags & RTCF_DOREDIRECT) 759 cb->rt_flags |= DN_RT_F_IE; 760 761 return NF_HOOK(PF_DECnet, NF_DN_FORWARD, skb, dev, skb->dev, neigh->output); 762 763drop: 764 kfree_skb(skb); 765 return NET_RX_DROP; 766} 767 768/* 769 * Drop packet. This is used for endnodes and for 770 * when we should not be forwarding packets from 771 * this dest. 772 */ 773static int dn_blackhole(struct sk_buff *skb) 774{ 775 kfree_skb(skb); 776 return NET_RX_DROP; 777} 778 779/* 780 * Used to catch bugs. This should never normally get 781 * called. 782 */ 783static int dn_rt_bug(struct sk_buff *skb) 784{ 785 if (net_ratelimit()) { 786 struct dn_skb_cb *cb = DN_SKB_CB(skb); 787 788 printk(KERN_DEBUG "dn_rt_bug: skb from:%04x to:%04x\n", 789 dn_ntohs(cb->src), dn_ntohs(cb->dst)); 790 } 791 792 kfree_skb(skb); 793 794 return NET_RX_BAD; 795} 796 797static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res) 798{ 799 struct dn_fib_info *fi = res->fi; 800 struct net_device *dev = rt->u.dst.dev; 801 struct neighbour *n; 802 unsigned mss; 803 804 if (fi) { 805 if (DN_FIB_RES_GW(*res) && 806 DN_FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) 807 rt->rt_gateway = DN_FIB_RES_GW(*res); 808 memcpy(rt->u.dst.metrics, fi->fib_metrics, 809 sizeof(rt->u.dst.metrics)); 810 } 811 rt->rt_type = res->type; 812 813 if (dev != NULL && rt->u.dst.neighbour == NULL) { 814 n = __neigh_lookup_errno(&dn_neigh_table, &rt->rt_gateway, dev); 815 if (IS_ERR(n)) 816 return PTR_ERR(n); 817 rt->u.dst.neighbour = n; 818 } 819 820 if (rt->u.dst.metrics[RTAX_MTU-1] == 0 || 821 rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu) 822 rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu; 823 mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst)); 824 if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 || 825 rt->u.dst.metrics[RTAX_ADVMSS-1] > mss) 826 rt->u.dst.metrics[RTAX_ADVMSS-1] = mss; 827 return 0; 828} 829 830static inline int dn_match_addr(__le16 addr1, __le16 addr2) 831{ 832 __u16 tmp = dn_ntohs(addr1) ^ dn_ntohs(addr2); 833 int match = 16; 834 while(tmp) { 835 tmp >>= 1; 836 match--; 837 } 838 return match; 839} 840 841static __le16 dnet_select_source(const struct net_device *dev, __le16 daddr, int scope) 842{ 843 __le16 saddr = 0; 844 struct dn_dev *dn_db = dev->dn_ptr; 845 struct dn_ifaddr *ifa; 846 int best_match = 0; 847 int ret; 848 849 read_lock(&dev_base_lock); 850 for(ifa = dn_db->ifa_list; ifa; ifa = ifa->ifa_next) { 851 if (ifa->ifa_scope > scope) 852 continue; 853 if (!daddr) { 854 saddr = ifa->ifa_local; 855 break; 856 } 857 ret = dn_match_addr(daddr, ifa->ifa_local); 858 if (ret > best_match) 859 saddr = ifa->ifa_local; 860 if (best_match == 0) 861 saddr = ifa->ifa_local; 862 } 863 read_unlock(&dev_base_lock); 864 865 return saddr; 866} 867 868static inline __le16 __dn_fib_res_prefsrc(struct dn_fib_res *res) 869{ 870 return dnet_select_source(DN_FIB_RES_DEV(*res), DN_FIB_RES_GW(*res), res->scope); 871} 872 873static inline __le16 dn_fib_rules_map_destination(__le16 daddr, struct dn_fib_res *res) 874{ 875 __le16 mask = dnet_make_mask(res->prefixlen); 876 return (daddr&~mask)|res->fi->fib_nh->nh_gw; 877} 878 879static int dn_route_output_slow(struct dst_entry **pprt, const struct flowi *oldflp, int try_hard) 880{ 881 struct flowi fl = { .nl_u = { .dn_u = 882 { .daddr = oldflp->fld_dst, 883 .saddr = oldflp->fld_src, 884 .scope = RT_SCOPE_UNIVERSE, 885#ifdef CONFIG_DECNET_ROUTE_FWMARK 886 .fwmark = oldflp->fld_fwmark 887#endif 888 } }, 889 .iif = loopback_dev.ifindex, 890 .oif = oldflp->oif }; 891 struct dn_route *rt = NULL; 892 struct net_device *dev_out = NULL; 893 struct neighbour *neigh = NULL; 894 unsigned hash; 895 unsigned flags = 0; 896 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNICAST }; 897 int err; 898 int free_res = 0; 899 __le16 gateway = 0; 900 901 if (decnet_debug_level & 16) 902 printk(KERN_DEBUG 903 "dn_route_output_slow: dst=%04x src=%04x mark=%d" 904 " iif=%d oif=%d\n", dn_ntohs(oldflp->fld_dst), 905 dn_ntohs(oldflp->fld_src), 906 oldflp->fld_fwmark, loopback_dev.ifindex, oldflp->oif); 907 908 /* If we have an output interface, verify its a DECnet device */ 909 if (oldflp->oif) { 910 dev_out = dev_get_by_index(oldflp->oif); 911 err = -ENODEV; 912 if (dev_out && dev_out->dn_ptr == NULL) { 913 dev_put(dev_out); 914 dev_out = NULL; 915 } 916 if (dev_out == NULL) 917 goto out; 918 } 919 920 /* If we have a source address, verify that its a local address */ 921 if (oldflp->fld_src) { 922 err = -EADDRNOTAVAIL; 923 924 if (dev_out) { 925 if (dn_dev_islocal(dev_out, oldflp->fld_src)) 926 goto source_ok; 927 dev_put(dev_out); 928 goto out; 929 } 930 read_lock(&dev_base_lock); 931 for(dev_out = dev_base; dev_out; dev_out = dev_out->next) { 932 if (!dev_out->dn_ptr) 933 continue; 934 if (!dn_dev_islocal(dev_out, oldflp->fld_src)) 935 continue; 936 if ((dev_out->flags & IFF_LOOPBACK) && 937 oldflp->fld_dst && 938 !dn_dev_islocal(dev_out, oldflp->fld_dst)) 939 continue; 940 break; 941 } 942 read_unlock(&dev_base_lock); 943 if (dev_out == NULL) 944 goto out; 945 dev_hold(dev_out); 946source_ok: 947 ; 948 } 949 950 /* No destination? Assume its local */ 951 if (!fl.fld_dst) { 952 fl.fld_dst = fl.fld_src; 953 954 err = -EADDRNOTAVAIL; 955 if (dev_out) 956 dev_put(dev_out); 957 dev_out = &loopback_dev; 958 dev_hold(dev_out); 959 if (!fl.fld_dst) { 960 fl.fld_dst = 961 fl.fld_src = dnet_select_source(dev_out, 0, 962 RT_SCOPE_HOST); 963 if (!fl.fld_dst) 964 goto out; 965 } 966 fl.oif = loopback_dev.ifindex; 967 res.type = RTN_LOCAL; 968 goto make_route; 969 } 970 971 if (decnet_debug_level & 16) 972 printk(KERN_DEBUG 973 "dn_route_output_slow: initial checks complete." 974 " dst=%o4x src=%04x oif=%d try_hard=%d\n", 975 dn_ntohs(fl.fld_dst), dn_ntohs(fl.fld_src), 976 fl.oif, try_hard); 977 978 /* 979 * N.B. If the kernel is compiled without router support then 980 * dn_fib_lookup() will evaluate to non-zero so this if () block 981 * will always be executed. 982 */ 983 err = -ESRCH; 984 if (try_hard || (err = dn_fib_lookup(&fl, &res)) != 0) { 985 struct dn_dev *dn_db; 986 if (err != -ESRCH) 987 goto out; 988 /* 989 * Here the fallback is basically the standard algorithm for 990 * routing in endnodes which is described in the DECnet routing 991 * docs 992 * 993 * If we are not trying hard, look in neighbour cache. 994 * The result is tested to ensure that if a specific output 995 * device/source address was requested, then we honour that 996 * here 997 */ 998 if (!try_hard) { 999 neigh = neigh_lookup_nodev(&dn_neigh_table, &fl.fld_dst); 1000 if (neigh) { 1001 if ((oldflp->oif && 1002 (neigh->dev->ifindex != oldflp->oif)) || 1003 (oldflp->fld_src && 1004 (!dn_dev_islocal(neigh->dev, 1005 oldflp->fld_src)))) { 1006 neigh_release(neigh); 1007 neigh = NULL; 1008 } else { 1009 if (dev_out) 1010 dev_put(dev_out); 1011 if (dn_dev_islocal(neigh->dev, fl.fld_dst)) { 1012 dev_out = &loopback_dev; 1013 res.type = RTN_LOCAL; 1014 } else { 1015 dev_out = neigh->dev; 1016 } 1017 dev_hold(dev_out); 1018 goto select_source; 1019 } 1020 } 1021 } 1022 1023 /* Not there? Perhaps its a local address */ 1024 if (dev_out == NULL) 1025 dev_out = dn_dev_get_default(); 1026 err = -ENODEV; 1027 if (dev_out == NULL) 1028 goto out; 1029 dn_db = dev_out->dn_ptr; 1030 /* Possible improvement - check all devices for local addr */ 1031 if (dn_dev_islocal(dev_out, fl.fld_dst)) { 1032 dev_put(dev_out); 1033 dev_out = &loopback_dev; 1034 dev_hold(dev_out); 1035 res.type = RTN_LOCAL; 1036 goto select_source; 1037 } 1038 /* Not local either.... try sending it to the default router */ 1039 neigh = neigh_clone(dn_db->router); 1040 BUG_ON(neigh && neigh->dev != dev_out); 1041 1042 /* Ok then, we assume its directly connected and move on */ 1043select_source: 1044 if (neigh) 1045 gateway = ((struct dn_neigh *)neigh)->addr; 1046 if (gateway == 0) 1047 gateway = fl.fld_dst; 1048 if (fl.fld_src == 0) { 1049 fl.fld_src = dnet_select_source(dev_out, gateway, 1050 res.type == RTN_LOCAL ? 1051 RT_SCOPE_HOST : 1052 RT_SCOPE_LINK); 1053 if (fl.fld_src == 0 && res.type != RTN_LOCAL) 1054 goto e_addr; 1055 } 1056 fl.oif = dev_out->ifindex; 1057 goto make_route; 1058 } 1059 free_res = 1; 1060 1061 if (res.type == RTN_NAT) 1062 goto e_inval; 1063 1064 if (res.type == RTN_LOCAL) { 1065 if (!fl.fld_src) 1066 fl.fld_src = fl.fld_dst; 1067 if (dev_out) 1068 dev_put(dev_out); 1069 dev_out = &loopback_dev; 1070 dev_hold(dev_out); 1071 fl.oif = dev_out->ifindex; 1072 if (res.fi) 1073 dn_fib_info_put(res.fi); 1074 res.fi = NULL; 1075 goto make_route; 1076 } 1077 1078 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1079 dn_fib_select_multipath(&fl, &res); 1080 1081 /* 1082 * We could add some logic to deal with default routes here and 1083 * get rid of some of the special casing above. 1084 */ 1085 1086 if (!fl.fld_src) 1087 fl.fld_src = DN_FIB_RES_PREFSRC(res); 1088 1089 if (dev_out) 1090 dev_put(dev_out); 1091 dev_out = DN_FIB_RES_DEV(res); 1092 dev_hold(dev_out); 1093 fl.oif = dev_out->ifindex; 1094 gateway = DN_FIB_RES_GW(res); 1095 1096make_route: 1097 if (dev_out->flags & IFF_LOOPBACK) 1098 flags |= RTCF_LOCAL; 1099 1100 rt = dst_alloc(&dn_dst_ops); 1101 if (rt == NULL) 1102 goto e_nobufs; 1103 1104 atomic_set(&rt->u.dst.__refcnt, 1); 1105 rt->u.dst.flags = DST_HOST; 1106 1107 rt->fl.fld_src = oldflp->fld_src; 1108 rt->fl.fld_dst = oldflp->fld_dst; 1109 rt->fl.oif = oldflp->oif; 1110 rt->fl.iif = 0; 1111#ifdef CONFIG_DECNET_ROUTE_FWMARK 1112 rt->fl.fld_fwmark = oldflp->fld_fwmark; 1113#endif 1114 1115 rt->rt_saddr = fl.fld_src; 1116 rt->rt_daddr = fl.fld_dst; 1117 rt->rt_gateway = gateway ? gateway : fl.fld_dst; 1118 rt->rt_local_src = fl.fld_src; 1119 1120 rt->rt_dst_map = fl.fld_dst; 1121 rt->rt_src_map = fl.fld_src; 1122 1123 rt->u.dst.dev = dev_out; 1124 dev_hold(dev_out); 1125 rt->u.dst.neighbour = neigh; 1126 neigh = NULL; 1127 1128 rt->u.dst.lastuse = jiffies; 1129 rt->u.dst.output = dn_output; 1130 rt->u.dst.input = dn_rt_bug; 1131 rt->rt_flags = flags; 1132 if (flags & RTCF_LOCAL) 1133 rt->u.dst.input = dn_nsp_rx; 1134 1135 err = dn_rt_set_next_hop(rt, &res); 1136 if (err) 1137 goto e_neighbour; 1138 1139 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1140 dn_insert_route(rt, hash, (struct dn_route **)pprt); 1141 1142done: 1143 if (neigh) 1144 neigh_release(neigh); 1145 if (free_res) 1146 dn_fib_res_put(&res); 1147 if (dev_out) 1148 dev_put(dev_out); 1149out: 1150 return err; 1151 1152e_addr: 1153 err = -EADDRNOTAVAIL; 1154 goto done; 1155e_inval: 1156 err = -EINVAL; 1157 goto done; 1158e_nobufs: 1159 err = -ENOBUFS; 1160 goto done; 1161e_neighbour: 1162 dst_free(&rt->u.dst); 1163 goto e_nobufs; 1164} 1165 1166 1167/* 1168 * N.B. The flags may be moved into the flowi at some future stage. 1169 */ 1170static int __dn_route_output_key(struct dst_entry **pprt, const struct flowi *flp, int flags) 1171{ 1172 unsigned hash = dn_hash(flp->fld_src, flp->fld_dst); 1173 struct dn_route *rt = NULL; 1174 1175 if (!(flags & MSG_TRYHARD)) { 1176 rcu_read_lock_bh(); 1177 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt; 1178 rt = rcu_dereference(rt->u.rt_next)) { 1179 if ((flp->fld_dst == rt->fl.fld_dst) && 1180 (flp->fld_src == rt->fl.fld_src) && 1181#ifdef CONFIG_DECNET_ROUTE_FWMARK 1182 (flp->fld_fwmark == rt->fl.fld_fwmark) && 1183#endif 1184 (rt->fl.iif == 0) && 1185 (rt->fl.oif == flp->oif)) { 1186 rt->u.dst.lastuse = jiffies; 1187 dst_hold(&rt->u.dst); 1188 rt->u.dst.__use++; 1189 rcu_read_unlock_bh(); 1190 *pprt = &rt->u.dst; 1191 return 0; 1192 } 1193 } 1194 rcu_read_unlock_bh(); 1195 } 1196 1197 return dn_route_output_slow(pprt, flp, flags); 1198} 1199 1200static int dn_route_output_key(struct dst_entry **pprt, struct flowi *flp, int flags) 1201{ 1202 int err; 1203 1204 err = __dn_route_output_key(pprt, flp, flags); 1205 if (err == 0 && flp->proto) { 1206 err = xfrm_lookup(pprt, flp, NULL, 0); 1207 } 1208 return err; 1209} 1210 1211int dn_route_output_sock(struct dst_entry **pprt, struct flowi *fl, struct sock *sk, int flags) 1212{ 1213 int err; 1214 1215 err = __dn_route_output_key(pprt, fl, flags & MSG_TRYHARD); 1216 if (err == 0 && fl->proto) { 1217 err = xfrm_lookup(pprt, fl, sk, !(flags & MSG_DONTWAIT)); 1218 } 1219 return err; 1220} 1221 1222static int dn_route_input_slow(struct sk_buff *skb) 1223{ 1224 struct dn_route *rt = NULL; 1225 struct dn_skb_cb *cb = DN_SKB_CB(skb); 1226 struct net_device *in_dev = skb->dev; 1227 struct net_device *out_dev = NULL; 1228 struct dn_dev *dn_db; 1229 struct neighbour *neigh = NULL; 1230 unsigned hash; 1231 int flags = 0; 1232 __le16 gateway = 0; 1233 __le16 local_src = 0; 1234 struct flowi fl = { .nl_u = { .dn_u = 1235 { .daddr = cb->dst, 1236 .saddr = cb->src, 1237 .scope = RT_SCOPE_UNIVERSE, 1238#ifdef CONFIG_DECNET_ROUTE_FWMARK 1239 .fwmark = skb->nfmark 1240#endif 1241 } }, 1242 .iif = skb->dev->ifindex }; 1243 struct dn_fib_res res = { .fi = NULL, .type = RTN_UNREACHABLE }; 1244 int err = -EINVAL; 1245 int free_res = 0; 1246 1247 dev_hold(in_dev); 1248 1249 if ((dn_db = in_dev->dn_ptr) == NULL) 1250 goto out; 1251 1252 /* Zero source addresses are not allowed */ 1253 if (fl.fld_src == 0) 1254 goto out; 1255 1256 /* 1257 * In this case we've just received a packet from a source 1258 * outside ourselves pretending to come from us. We don't 1259 * allow it any further to prevent routing loops, spoofing and 1260 * other nasties. Loopback packets already have the dst attached 1261 * so this only affects packets which have originated elsewhere. 1262 */ 1263 err = -ENOTUNIQ; 1264 if (dn_dev_islocal(in_dev, cb->src)) 1265 goto out; 1266 1267 err = dn_fib_lookup(&fl, &res); 1268 if (err) { 1269 if (err != -ESRCH) 1270 goto out; 1271 /* 1272 * Is the destination us ? 1273 */ 1274 if (!dn_dev_islocal(in_dev, cb->dst)) 1275 goto e_inval; 1276 1277 res.type = RTN_LOCAL; 1278 } else { 1279 __le16 src_map = fl.fld_src; 1280 free_res = 1; 1281 1282 out_dev = DN_FIB_RES_DEV(res); 1283 if (out_dev == NULL) { 1284 if (net_ratelimit()) 1285 printk(KERN_CRIT "Bug in dn_route_input_slow() " 1286 "No output device\n"); 1287 goto e_inval; 1288 } 1289 dev_hold(out_dev); 1290 1291 if (res.r) 1292 src_map = fl.fld_src; /* no NAT support for now */ 1293 1294 gateway = DN_FIB_RES_GW(res); 1295 if (res.type == RTN_NAT) { 1296 fl.fld_dst = dn_fib_rules_map_destination(fl.fld_dst, &res); 1297 dn_fib_res_put(&res); 1298 free_res = 0; 1299 if (dn_fib_lookup(&fl, &res)) 1300 goto e_inval; 1301 free_res = 1; 1302 if (res.type != RTN_UNICAST) 1303 goto e_inval; 1304 flags |= RTCF_DNAT; 1305 gateway = fl.fld_dst; 1306 } 1307 fl.fld_src = src_map; 1308 } 1309 1310 switch(res.type) { 1311 case RTN_UNICAST: 1312 /* 1313 * Forwarding check here, we only check for forwarding 1314 * being turned off, if you want to only forward intra 1315 * area, its up to you to set the routing tables up 1316 * correctly. 1317 */ 1318 if (dn_db->parms.forwarding == 0) 1319 goto e_inval; 1320 1321 if (res.fi->fib_nhs > 1 && fl.oif == 0) 1322 dn_fib_select_multipath(&fl, &res); 1323 1324 /* 1325 * Check for out_dev == in_dev. We use the RTCF_DOREDIRECT 1326 * flag as a hint to set the intra-ethernet bit when 1327 * forwarding. If we've got NAT in operation, we don't do 1328 * this optimisation. 1329 */ 1330 if (out_dev == in_dev && !(flags & RTCF_NAT)) 1331 flags |= RTCF_DOREDIRECT; 1332 1333 local_src = DN_FIB_RES_PREFSRC(res); 1334 1335 case RTN_BLACKHOLE: 1336 case RTN_UNREACHABLE: 1337 break; 1338 case RTN_LOCAL: 1339 flags |= RTCF_LOCAL; 1340 fl.fld_src = cb->dst; 1341 fl.fld_dst = cb->src; 1342 1343 /* Routing tables gave us a gateway */ 1344 if (gateway) 1345 goto make_route; 1346 1347 /* Packet was intra-ethernet, so we know its on-link */ 1348 if (cb->rt_flags & DN_RT_F_IE) { 1349 gateway = cb->src; 1350 flags |= RTCF_DIRECTSRC; 1351 goto make_route; 1352 } 1353 1354 /* Use the default router if there is one */ 1355 neigh = neigh_clone(dn_db->router); 1356 if (neigh) { 1357 gateway = ((struct dn_neigh *)neigh)->addr; 1358 goto make_route; 1359 } 1360 1361 /* Close eyes and pray */ 1362 gateway = cb->src; 1363 flags |= RTCF_DIRECTSRC; 1364 goto make_route; 1365 default: 1366 goto e_inval; 1367 } 1368 1369make_route: 1370 rt = dst_alloc(&dn_dst_ops); 1371 if (rt == NULL) 1372 goto e_nobufs; 1373 1374 rt->rt_saddr = fl.fld_src; 1375 rt->rt_daddr = fl.fld_dst; 1376 rt->rt_gateway = fl.fld_dst; 1377 if (gateway) 1378 rt->rt_gateway = gateway; 1379 rt->rt_local_src = local_src ? local_src : rt->rt_saddr; 1380 1381 rt->rt_dst_map = fl.fld_dst; 1382 rt->rt_src_map = fl.fld_src; 1383 1384 rt->fl.fld_src = cb->src; 1385 rt->fl.fld_dst = cb->dst; 1386 rt->fl.oif = 0; 1387 rt->fl.iif = in_dev->ifindex; 1388 rt->fl.fld_fwmark = fl.fld_fwmark; 1389 1390 rt->u.dst.flags = DST_HOST; 1391 rt->u.dst.neighbour = neigh; 1392 rt->u.dst.dev = out_dev; 1393 rt->u.dst.lastuse = jiffies; 1394 rt->u.dst.output = dn_rt_bug; 1395 switch(res.type) { 1396 case RTN_UNICAST: 1397 rt->u.dst.input = dn_forward; 1398 break; 1399 case RTN_LOCAL: 1400 rt->u.dst.output = dn_output; 1401 rt->u.dst.input = dn_nsp_rx; 1402 rt->u.dst.dev = in_dev; 1403 flags |= RTCF_LOCAL; 1404 break; 1405 default: 1406 case RTN_UNREACHABLE: 1407 case RTN_BLACKHOLE: 1408 rt->u.dst.input = dn_blackhole; 1409 } 1410 rt->rt_flags = flags; 1411 if (rt->u.dst.dev) 1412 dev_hold(rt->u.dst.dev); 1413 1414 err = dn_rt_set_next_hop(rt, &res); 1415 if (err) 1416 goto e_neighbour; 1417 1418 hash = dn_hash(rt->fl.fld_src, rt->fl.fld_dst); 1419 dn_insert_route(rt, hash, (struct dn_route **)&skb->dst); 1420 1421done: 1422 if (neigh) 1423 neigh_release(neigh); 1424 if (free_res) 1425 dn_fib_res_put(&res); 1426 dev_put(in_dev); 1427 if (out_dev) 1428 dev_put(out_dev); 1429out: 1430 return err; 1431 1432e_inval: 1433 err = -EINVAL; 1434 goto done; 1435 1436e_nobufs: 1437 err = -ENOBUFS; 1438 goto done; 1439 1440e_neighbour: 1441 dst_free(&rt->u.dst); 1442 goto done; 1443} 1444 1445int dn_route_input(struct sk_buff *skb) 1446{ 1447 struct dn_route *rt; 1448 struct dn_skb_cb *cb = DN_SKB_CB(skb); 1449 unsigned hash = dn_hash(cb->src, cb->dst); 1450 1451 if (skb->dst) 1452 return 0; 1453 1454 rcu_read_lock(); 1455 for(rt = rcu_dereference(dn_rt_hash_table[hash].chain); rt != NULL; 1456 rt = rcu_dereference(rt->u.rt_next)) { 1457 if ((rt->fl.fld_src == cb->src) && 1458 (rt->fl.fld_dst == cb->dst) && 1459 (rt->fl.oif == 0) && 1460#ifdef CONFIG_DECNET_ROUTE_FWMARK 1461 (rt->fl.fld_fwmark == skb->nfmark) && 1462#endif 1463 (rt->fl.iif == cb->iif)) { 1464 rt->u.dst.lastuse = jiffies; 1465 dst_hold(&rt->u.dst); 1466 rt->u.dst.__use++; 1467 rcu_read_unlock(); 1468 skb->dst = (struct dst_entry *)rt; 1469 return 0; 1470 } 1471 } 1472 rcu_read_unlock(); 1473 1474 return dn_route_input_slow(skb); 1475} 1476 1477static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, 1478 int event, int nowait, unsigned int flags) 1479{ 1480 struct dn_route *rt = (struct dn_route *)skb->dst; 1481 struct rtmsg *r; 1482 struct nlmsghdr *nlh; 1483 unsigned char *b = skb->tail; 1484 struct rta_cacheinfo ci; 1485 1486 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*r), flags); 1487 r = NLMSG_DATA(nlh); 1488 r->rtm_family = AF_DECnet; 1489 r->rtm_dst_len = 16; 1490 r->rtm_src_len = 0; 1491 r->rtm_tos = 0; 1492 r->rtm_table = RT_TABLE_MAIN; 1493 RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); 1494 r->rtm_type = rt->rt_type; 1495 r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; 1496 r->rtm_scope = RT_SCOPE_UNIVERSE; 1497 r->rtm_protocol = RTPROT_UNSPEC; 1498 if (rt->rt_flags & RTCF_NOTIFY) 1499 r->rtm_flags |= RTM_F_NOTIFY; 1500 RTA_PUT(skb, RTA_DST, 2, &rt->rt_daddr); 1501 if (rt->fl.fld_src) { 1502 r->rtm_src_len = 16; 1503 RTA_PUT(skb, RTA_SRC, 2, &rt->fl.fld_src); 1504 } 1505 if (rt->u.dst.dev) 1506 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex); 1507 /* 1508 * Note to self - change this if input routes reverse direction when 1509 * they deal only with inputs and not with replies like they do 1510 * currently. 1511 */ 1512 RTA_PUT(skb, RTA_PREFSRC, 2, &rt->rt_local_src); 1513 if (rt->rt_daddr != rt->rt_gateway) 1514 RTA_PUT(skb, RTA_GATEWAY, 2, &rt->rt_gateway); 1515 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0) 1516 goto rtattr_failure; 1517 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse); 1518 ci.rta_used = rt->u.dst.__use; 1519 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt); 1520 if (rt->u.dst.expires) 1521 ci.rta_expires = jiffies_to_clock_t(rt->u.dst.expires - jiffies); 1522 else 1523 ci.rta_expires = 0; 1524 ci.rta_error = rt->u.dst.error; 1525 ci.rta_id = ci.rta_ts = ci.rta_tsage = 0; 1526 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci); 1527 if (rt->fl.iif) 1528 RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif); 1529 1530 nlh->nlmsg_len = skb->tail - b; 1531 return skb->len; 1532 1533nlmsg_failure: 1534rtattr_failure: 1535 skb_trim(skb, b - skb->data); 1536 return -1; 1537} 1538 1539/* 1540 * This is called by both endnodes and routers now. 1541 */ 1542int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void *arg) 1543{ 1544 struct rtattr **rta = arg; 1545 struct rtmsg *rtm = NLMSG_DATA(nlh); 1546 struct dn_route *rt = NULL; 1547 struct dn_skb_cb *cb; 1548 int err; 1549 struct sk_buff *skb; 1550 struct flowi fl; 1551 1552 memset(&fl, 0, sizeof(fl)); 1553 fl.proto = DNPROTO_NSP; 1554 1555 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); 1556 if (skb == NULL) 1557 return -ENOBUFS; 1558 skb->mac.raw = skb->data; 1559 cb = DN_SKB_CB(skb); 1560 1561 if (rta[RTA_SRC-1]) 1562 memcpy(&fl.fld_src, RTA_DATA(rta[RTA_SRC-1]), 2); 1563 if (rta[RTA_DST-1]) 1564 memcpy(&fl.fld_dst, RTA_DATA(rta[RTA_DST-1]), 2); 1565 if (rta[RTA_IIF-1]) 1566 memcpy(&fl.iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int)); 1567 1568 if (fl.iif) { 1569 struct net_device *dev; 1570 if ((dev = dev_get_by_index(fl.iif)) == NULL) { 1571 kfree_skb(skb); 1572 return -ENODEV; 1573 } 1574 if (!dev->dn_ptr) { 1575 dev_put(dev); 1576 kfree_skb(skb); 1577 return -ENODEV; 1578 } 1579 skb->protocol = __constant_htons(ETH_P_DNA_RT); 1580 skb->dev = dev; 1581 cb->src = fl.fld_src; 1582 cb->dst = fl.fld_dst; 1583 local_bh_disable(); 1584 err = dn_route_input(skb); 1585 local_bh_enable(); 1586 memset(cb, 0, sizeof(struct dn_skb_cb)); 1587 rt = (struct dn_route *)skb->dst; 1588 if (!err && -rt->u.dst.error) 1589 err = rt->u.dst.error; 1590 } else { 1591 int oif = 0; 1592 if (rta[RTA_OIF - 1]) 1593 memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); 1594 fl.oif = oif; 1595 err = dn_route_output_key((struct dst_entry **)&rt, &fl, 0); 1596 } 1597 1598 if (skb->dev) 1599 dev_put(skb->dev); 1600 skb->dev = NULL; 1601 if (err) 1602 goto out_free; 1603 skb->dst = &rt->u.dst; 1604 if (rtm->rtm_flags & RTM_F_NOTIFY) 1605 rt->rt_flags |= RTCF_NOTIFY; 1606 1607 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; 1608 1609 err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); 1610 1611 if (err == 0) 1612 goto out_free; 1613 if (err < 0) { 1614 err = -EMSGSIZE; 1615 goto out_free; 1616 } 1617 1618 return rtnl_unicast(skb, NETLINK_CB(in_skb).pid); 1619 1620out_free: 1621 kfree_skb(skb); 1622 return err; 1623} 1624 1625/* 1626 * For routers, this is called from dn_fib_dump, but for endnodes its 1627 * called directly from the rtnetlink dispatch table. 1628 */ 1629int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) 1630{ 1631 struct dn_route *rt; 1632 int h, s_h; 1633 int idx, s_idx; 1634 1635 if (NLMSG_PAYLOAD(cb->nlh, 0) < sizeof(struct rtmsg)) 1636 return -EINVAL; 1637 if (!(((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED)) 1638 return 0; 1639 1640 s_h = cb->args[0]; 1641 s_idx = idx = cb->args[1]; 1642 for(h = 0; h <= dn_rt_hash_mask; h++) { 1643 if (h < s_h) 1644 continue; 1645 if (h > s_h) 1646 s_idx = 0; 1647 rcu_read_lock_bh(); 1648 for(rt = rcu_dereference(dn_rt_hash_table[h].chain), idx = 0; 1649 rt; 1650 rt = rcu_dereference(rt->u.rt_next), idx++) { 1651 if (idx < s_idx) 1652 continue; 1653 skb->dst = dst_clone(&rt->u.dst); 1654 if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, 1655 cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1656 1, NLM_F_MULTI) <= 0) { 1657 dst_release(xchg(&skb->dst, NULL)); 1658 rcu_read_unlock_bh(); 1659 goto done; 1660 } 1661 dst_release(xchg(&skb->dst, NULL)); 1662 } 1663 rcu_read_unlock_bh(); 1664 } 1665 1666done: 1667 cb->args[0] = h; 1668 cb->args[1] = idx; 1669 return skb->len; 1670} 1671 1672#ifdef CONFIG_PROC_FS 1673struct dn_rt_cache_iter_state { 1674 int bucket; 1675}; 1676 1677static struct dn_route *dn_rt_cache_get_first(struct seq_file *seq) 1678{ 1679 struct dn_route *rt = NULL; 1680 struct dn_rt_cache_iter_state *s = seq->private; 1681 1682 for(s->bucket = dn_rt_hash_mask; s->bucket >= 0; --s->bucket) { 1683 rcu_read_lock_bh(); 1684 rt = dn_rt_hash_table[s->bucket].chain; 1685 if (rt) 1686 break; 1687 rcu_read_unlock_bh(); 1688 } 1689 return rt; 1690} 1691 1692static struct dn_route *dn_rt_cache_get_next(struct seq_file *seq, struct dn_route *rt) 1693{ 1694 struct dn_rt_cache_iter_state *s = rcu_dereference(seq->private); 1695 1696 rt = rt->u.rt_next; 1697 while(!rt) { 1698 rcu_read_unlock_bh(); 1699 if (--s->bucket < 0) 1700 break; 1701 rcu_read_lock_bh(); 1702 rt = dn_rt_hash_table[s->bucket].chain; 1703 } 1704 return rt; 1705} 1706 1707static void *dn_rt_cache_seq_start(struct seq_file *seq, loff_t *pos) 1708{ 1709 struct dn_route *rt = dn_rt_cache_get_first(seq); 1710 1711 if (rt) { 1712 while(*pos && (rt = dn_rt_cache_get_next(seq, rt))) 1713 --*pos; 1714 } 1715 return *pos ? NULL : rt; 1716} 1717 1718static void *dn_rt_cache_seq_next(struct seq_file *seq, void *v, loff_t *pos) 1719{ 1720 struct dn_route *rt = dn_rt_cache_get_next(seq, v); 1721 ++*pos; 1722 return rt; 1723} 1724 1725static void dn_rt_cache_seq_stop(struct seq_file *seq, void *v) 1726{ 1727 if (v) 1728 rcu_read_unlock_bh(); 1729} 1730 1731static int dn_rt_cache_seq_show(struct seq_file *seq, void *v) 1732{ 1733 struct dn_route *rt = v; 1734 char buf1[DN_ASCBUF_LEN], buf2[DN_ASCBUF_LEN]; 1735 1736 seq_printf(seq, "%-8s %-7s %-7s %04d %04d %04d\n", 1737 rt->u.dst.dev ? rt->u.dst.dev->name : "*", 1738 dn_addr2asc(dn_ntohs(rt->rt_daddr), buf1), 1739 dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2), 1740 atomic_read(&rt->u.dst.__refcnt), 1741 rt->u.dst.__use, 1742 (int) dst_metric(&rt->u.dst, RTAX_RTT)); 1743 return 0; 1744} 1745 1746static struct seq_operations dn_rt_cache_seq_ops = { 1747 .start = dn_rt_cache_seq_start, 1748 .next = dn_rt_cache_seq_next, 1749 .stop = dn_rt_cache_seq_stop, 1750 .show = dn_rt_cache_seq_show, 1751}; 1752 1753static int dn_rt_cache_seq_open(struct inode *inode, struct file *file) 1754{ 1755 struct seq_file *seq; 1756 int rc = -ENOMEM; 1757 struct dn_rt_cache_iter_state *s = kmalloc(sizeof(*s), GFP_KERNEL); 1758 1759 if (!s) 1760 goto out; 1761 rc = seq_open(file, &dn_rt_cache_seq_ops); 1762 if (rc) 1763 goto out_kfree; 1764 seq = file->private_data; 1765 seq->private = s; 1766 memset(s, 0, sizeof(*s)); 1767out: 1768 return rc; 1769out_kfree: 1770 kfree(s); 1771 goto out; 1772} 1773 1774static struct file_operations dn_rt_cache_seq_fops = { 1775 .owner = THIS_MODULE, 1776 .open = dn_rt_cache_seq_open, 1777 .read = seq_read, 1778 .llseek = seq_lseek, 1779 .release = seq_release_private, 1780}; 1781 1782#endif /* CONFIG_PROC_FS */ 1783 1784void __init dn_route_init(void) 1785{ 1786 int i, goal, order; 1787 1788 dn_dst_ops.kmem_cachep = 1789 kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, 1790 SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); 1791 init_timer(&dn_route_timer); 1792 dn_route_timer.function = dn_dst_check_expire; 1793 dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; 1794 add_timer(&dn_route_timer); 1795 1796 goal = num_physpages >> (26 - PAGE_SHIFT); 1797 1798 for(order = 0; (1UL << order) < goal; order++) 1799 /* NOTHING */; 1800 1801 /* 1802 * Only want 1024 entries max, since the table is very, very unlikely 1803 * to be larger than that. 1804 */ 1805 while(order && ((((1UL << order) * PAGE_SIZE) / 1806 sizeof(struct dn_rt_hash_bucket)) >= 2048)) 1807 order--; 1808 1809 do { 1810 dn_rt_hash_mask = (1UL << order) * PAGE_SIZE / 1811 sizeof(struct dn_rt_hash_bucket); 1812 while(dn_rt_hash_mask & (dn_rt_hash_mask - 1)) 1813 dn_rt_hash_mask--; 1814 dn_rt_hash_table = (struct dn_rt_hash_bucket *) 1815 __get_free_pages(GFP_ATOMIC, order); 1816 } while (dn_rt_hash_table == NULL && --order > 0); 1817 1818 if (!dn_rt_hash_table) 1819 panic("Failed to allocate DECnet route cache hash table\n"); 1820 1821 printk(KERN_INFO 1822 "DECnet: Routing cache hash table of %u buckets, %ldKbytes\n", 1823 dn_rt_hash_mask, 1824 (long)(dn_rt_hash_mask*sizeof(struct dn_rt_hash_bucket))/1024); 1825 1826 dn_rt_hash_mask--; 1827 for(i = 0; i <= dn_rt_hash_mask; i++) { 1828 spin_lock_init(&dn_rt_hash_table[i].lock); 1829 dn_rt_hash_table[i].chain = NULL; 1830 } 1831 1832 dn_dst_ops.gc_thresh = (dn_rt_hash_mask + 1); 1833 1834 proc_net_fops_create("decnet_cache", S_IRUGO, &dn_rt_cache_seq_fops); 1835} 1836 1837void __exit dn_route_cleanup(void) 1838{ 1839 del_timer(&dn_route_timer); 1840 dn_run_flush(0); 1841 1842 proc_net_remove("decnet_cache"); 1843} 1844