Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
at v2.6.29-rc4 1723 lines 44 kB view raw
1/* File veth.c created by Kyle A. Lucke on Mon Aug 7 2000. */ 2/* 3 * IBM eServer iSeries Virtual Ethernet Device Driver 4 * Copyright (C) 2001 Kyle A. Lucke (klucke@us.ibm.com), IBM Corp. 5 * Substantially cleaned up by: 6 * Copyright (C) 2003 David Gibson <dwg@au1.ibm.com>, IBM Corporation. 7 * Copyright (C) 2004-2005 Michael Ellerman, IBM Corporation. 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU General Public License as 11 * published by the Free Software Foundation; either version 2 of the 12 * License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, but 15 * WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * General Public License for more details. 18 * 19 * You should have received a copy of the GNU General Public License 20 * along with this program; if not, write to the Free Software 21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 22 * USA 23 * 24 * 25 * This module implements the virtual ethernet device for iSeries LPAR 26 * Linux. It uses hypervisor message passing to implement an 27 * ethernet-like network device communicating between partitions on 28 * the iSeries. 29 * 30 * The iSeries LPAR hypervisor currently allows for up to 16 different 31 * virtual ethernets. These are all dynamically configurable on 32 * OS/400 partitions, but dynamic configuration is not supported under 33 * Linux yet. An ethXX network device will be created for each 34 * virtual ethernet this partition is connected to. 35 * 36 * - This driver is responsible for routing packets to and from other 37 * partitions. The MAC addresses used by the virtual ethernets 38 * contains meaning and must not be modified. 39 * 40 * - Having 2 virtual ethernets to the same remote partition DOES NOT 41 * double the available bandwidth. The 2 devices will share the 42 * available hypervisor bandwidth. 43 * 44 * - If you send a packet to your own mac address, it will just be 45 * dropped, you won't get it on the receive side. 46 * 47 * - Multicast is implemented by sending the frame frame to every 48 * other partition. It is the responsibility of the receiving 49 * partition to filter the addresses desired. 50 * 51 * Tunable parameters: 52 * 53 * VETH_NUMBUFFERS: This compile time option defaults to 120. It 54 * controls how much memory Linux will allocate per remote partition 55 * it is communicating with. It can be thought of as the maximum 56 * number of packets outstanding to a remote partition at a time. 57 */ 58 59#include <linux/module.h> 60#include <linux/types.h> 61#include <linux/errno.h> 62#include <linux/ioport.h> 63#include <linux/kernel.h> 64#include <linux/netdevice.h> 65#include <linux/etherdevice.h> 66#include <linux/skbuff.h> 67#include <linux/init.h> 68#include <linux/delay.h> 69#include <linux/mm.h> 70#include <linux/ethtool.h> 71#include <linux/if_ether.h> 72 73#include <asm/abs_addr.h> 74#include <asm/iseries/mf.h> 75#include <asm/uaccess.h> 76#include <asm/firmware.h> 77#include <asm/iseries/hv_lp_config.h> 78#include <asm/iseries/hv_types.h> 79#include <asm/iseries/hv_lp_event.h> 80#include <asm/iommu.h> 81#include <asm/vio.h> 82 83#undef DEBUG 84 85MODULE_AUTHOR("Kyle Lucke <klucke@us.ibm.com>"); 86MODULE_DESCRIPTION("iSeries Virtual ethernet driver"); 87MODULE_LICENSE("GPL"); 88 89#define VETH_EVENT_CAP (0) 90#define VETH_EVENT_FRAMES (1) 91#define VETH_EVENT_MONITOR (2) 92#define VETH_EVENT_FRAMES_ACK (3) 93 94#define VETH_MAX_ACKS_PER_MSG (20) 95#define VETH_MAX_FRAMES_PER_MSG (6) 96 97struct veth_frames_data { 98 u32 addr[VETH_MAX_FRAMES_PER_MSG]; 99 u16 len[VETH_MAX_FRAMES_PER_MSG]; 100 u32 eofmask; 101}; 102#define VETH_EOF_SHIFT (32-VETH_MAX_FRAMES_PER_MSG) 103 104struct veth_frames_ack_data { 105 u16 token[VETH_MAX_ACKS_PER_MSG]; 106}; 107 108struct veth_cap_data { 109 u8 caps_version; 110 u8 rsvd1; 111 u16 num_buffers; 112 u16 ack_threshold; 113 u16 rsvd2; 114 u32 ack_timeout; 115 u32 rsvd3; 116 u64 rsvd4[3]; 117}; 118 119struct veth_lpevent { 120 struct HvLpEvent base_event; 121 union { 122 struct veth_cap_data caps_data; 123 struct veth_frames_data frames_data; 124 struct veth_frames_ack_data frames_ack_data; 125 } u; 126 127}; 128 129#define DRV_NAME "iseries_veth" 130#define DRV_VERSION "2.0" 131 132#define VETH_NUMBUFFERS (120) 133#define VETH_ACKTIMEOUT (1000000) /* microseconds */ 134#define VETH_MAX_MCAST (12) 135 136#define VETH_MAX_MTU (9000) 137 138#if VETH_NUMBUFFERS < 10 139#define ACK_THRESHOLD (1) 140#elif VETH_NUMBUFFERS < 20 141#define ACK_THRESHOLD (4) 142#elif VETH_NUMBUFFERS < 40 143#define ACK_THRESHOLD (10) 144#else 145#define ACK_THRESHOLD (20) 146#endif 147 148#define VETH_STATE_SHUTDOWN (0x0001) 149#define VETH_STATE_OPEN (0x0002) 150#define VETH_STATE_RESET (0x0004) 151#define VETH_STATE_SENTMON (0x0008) 152#define VETH_STATE_SENTCAPS (0x0010) 153#define VETH_STATE_GOTCAPACK (0x0020) 154#define VETH_STATE_GOTCAPS (0x0040) 155#define VETH_STATE_SENTCAPACK (0x0080) 156#define VETH_STATE_READY (0x0100) 157 158struct veth_msg { 159 struct veth_msg *next; 160 struct veth_frames_data data; 161 int token; 162 int in_use; 163 struct sk_buff *skb; 164 struct device *dev; 165}; 166 167struct veth_lpar_connection { 168 HvLpIndex remote_lp; 169 struct delayed_work statemachine_wq; 170 struct veth_msg *msgs; 171 int num_events; 172 struct veth_cap_data local_caps; 173 174 struct kobject kobject; 175 struct timer_list ack_timer; 176 177 struct timer_list reset_timer; 178 unsigned int reset_timeout; 179 unsigned long last_contact; 180 int outstanding_tx; 181 182 spinlock_t lock; 183 unsigned long state; 184 HvLpInstanceId src_inst; 185 HvLpInstanceId dst_inst; 186 struct veth_lpevent cap_event, cap_ack_event; 187 u16 pending_acks[VETH_MAX_ACKS_PER_MSG]; 188 u32 num_pending_acks; 189 190 int num_ack_events; 191 struct veth_cap_data remote_caps; 192 u32 ack_timeout; 193 194 struct veth_msg *msg_stack_head; 195}; 196 197struct veth_port { 198 struct device *dev; 199 u64 mac_addr; 200 HvLpIndexMap lpar_map; 201 202 /* queue_lock protects the stopped_map and dev's queue. */ 203 spinlock_t queue_lock; 204 HvLpIndexMap stopped_map; 205 206 /* mcast_gate protects promiscuous, num_mcast & mcast_addr. */ 207 rwlock_t mcast_gate; 208 int promiscuous; 209 int num_mcast; 210 u64 mcast_addr[VETH_MAX_MCAST]; 211 212 struct kobject kobject; 213}; 214 215static HvLpIndex this_lp; 216static struct veth_lpar_connection *veth_cnx[HVMAXARCHITECTEDLPS]; /* = 0 */ 217static struct net_device *veth_dev[HVMAXARCHITECTEDVIRTUALLANS]; /* = 0 */ 218 219static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev); 220static void veth_recycle_msg(struct veth_lpar_connection *, struct veth_msg *); 221static void veth_wake_queues(struct veth_lpar_connection *cnx); 222static void veth_stop_queues(struct veth_lpar_connection *cnx); 223static void veth_receive(struct veth_lpar_connection *, struct veth_lpevent *); 224static void veth_release_connection(struct kobject *kobject); 225static void veth_timed_ack(unsigned long ptr); 226static void veth_timed_reset(unsigned long ptr); 227 228/* 229 * Utility functions 230 */ 231 232#define veth_info(fmt, args...) \ 233 printk(KERN_INFO DRV_NAME ": " fmt, ## args) 234 235#define veth_error(fmt, args...) \ 236 printk(KERN_ERR DRV_NAME ": Error: " fmt, ## args) 237 238#ifdef DEBUG 239#define veth_debug(fmt, args...) \ 240 printk(KERN_DEBUG DRV_NAME ": " fmt, ## args) 241#else 242#define veth_debug(fmt, args...) do {} while (0) 243#endif 244 245/* You must hold the connection's lock when you call this function. */ 246static inline void veth_stack_push(struct veth_lpar_connection *cnx, 247 struct veth_msg *msg) 248{ 249 msg->next = cnx->msg_stack_head; 250 cnx->msg_stack_head = msg; 251} 252 253/* You must hold the connection's lock when you call this function. */ 254static inline struct veth_msg *veth_stack_pop(struct veth_lpar_connection *cnx) 255{ 256 struct veth_msg *msg; 257 258 msg = cnx->msg_stack_head; 259 if (msg) 260 cnx->msg_stack_head = cnx->msg_stack_head->next; 261 262 return msg; 263} 264 265/* You must hold the connection's lock when you call this function. */ 266static inline int veth_stack_is_empty(struct veth_lpar_connection *cnx) 267{ 268 return cnx->msg_stack_head == NULL; 269} 270 271static inline HvLpEvent_Rc 272veth_signalevent(struct veth_lpar_connection *cnx, u16 subtype, 273 HvLpEvent_AckInd ackind, HvLpEvent_AckType acktype, 274 u64 token, 275 u64 data1, u64 data2, u64 data3, u64 data4, u64 data5) 276{ 277 return HvCallEvent_signalLpEventFast(cnx->remote_lp, 278 HvLpEvent_Type_VirtualLan, 279 subtype, ackind, acktype, 280 cnx->src_inst, 281 cnx->dst_inst, 282 token, data1, data2, data3, 283 data4, data5); 284} 285 286static inline HvLpEvent_Rc veth_signaldata(struct veth_lpar_connection *cnx, 287 u16 subtype, u64 token, void *data) 288{ 289 u64 *p = (u64 *) data; 290 291 return veth_signalevent(cnx, subtype, HvLpEvent_AckInd_NoAck, 292 HvLpEvent_AckType_ImmediateAck, 293 token, p[0], p[1], p[2], p[3], p[4]); 294} 295 296struct veth_allocation { 297 struct completion c; 298 int num; 299}; 300 301static void veth_complete_allocation(void *parm, int number) 302{ 303 struct veth_allocation *vc = (struct veth_allocation *)parm; 304 305 vc->num = number; 306 complete(&vc->c); 307} 308 309static int veth_allocate_events(HvLpIndex rlp, int number) 310{ 311 struct veth_allocation vc = 312 { COMPLETION_INITIALIZER_ONSTACK(vc.c), 0 }; 313 314 mf_allocate_lp_events(rlp, HvLpEvent_Type_VirtualLan, 315 sizeof(struct veth_lpevent), number, 316 &veth_complete_allocation, &vc); 317 wait_for_completion(&vc.c); 318 319 return vc.num; 320} 321 322/* 323 * sysfs support 324 */ 325 326struct veth_cnx_attribute { 327 struct attribute attr; 328 ssize_t (*show)(struct veth_lpar_connection *, char *buf); 329 ssize_t (*store)(struct veth_lpar_connection *, const char *buf); 330}; 331 332static ssize_t veth_cnx_attribute_show(struct kobject *kobj, 333 struct attribute *attr, char *buf) 334{ 335 struct veth_cnx_attribute *cnx_attr; 336 struct veth_lpar_connection *cnx; 337 338 cnx_attr = container_of(attr, struct veth_cnx_attribute, attr); 339 cnx = container_of(kobj, struct veth_lpar_connection, kobject); 340 341 if (!cnx_attr->show) 342 return -EIO; 343 344 return cnx_attr->show(cnx, buf); 345} 346 347#define CUSTOM_CNX_ATTR(_name, _format, _expression) \ 348static ssize_t _name##_show(struct veth_lpar_connection *cnx, char *buf)\ 349{ \ 350 return sprintf(buf, _format, _expression); \ 351} \ 352struct veth_cnx_attribute veth_cnx_attr_##_name = __ATTR_RO(_name) 353 354#define SIMPLE_CNX_ATTR(_name) \ 355 CUSTOM_CNX_ATTR(_name, "%lu\n", (unsigned long)cnx->_name) 356 357SIMPLE_CNX_ATTR(outstanding_tx); 358SIMPLE_CNX_ATTR(remote_lp); 359SIMPLE_CNX_ATTR(num_events); 360SIMPLE_CNX_ATTR(src_inst); 361SIMPLE_CNX_ATTR(dst_inst); 362SIMPLE_CNX_ATTR(num_pending_acks); 363SIMPLE_CNX_ATTR(num_ack_events); 364CUSTOM_CNX_ATTR(ack_timeout, "%d\n", jiffies_to_msecs(cnx->ack_timeout)); 365CUSTOM_CNX_ATTR(reset_timeout, "%d\n", jiffies_to_msecs(cnx->reset_timeout)); 366CUSTOM_CNX_ATTR(state, "0x%.4lX\n", cnx->state); 367CUSTOM_CNX_ATTR(last_contact, "%d\n", cnx->last_contact ? 368 jiffies_to_msecs(jiffies - cnx->last_contact) : 0); 369 370#define GET_CNX_ATTR(_name) (&veth_cnx_attr_##_name.attr) 371 372static struct attribute *veth_cnx_default_attrs[] = { 373 GET_CNX_ATTR(outstanding_tx), 374 GET_CNX_ATTR(remote_lp), 375 GET_CNX_ATTR(num_events), 376 GET_CNX_ATTR(reset_timeout), 377 GET_CNX_ATTR(last_contact), 378 GET_CNX_ATTR(state), 379 GET_CNX_ATTR(src_inst), 380 GET_CNX_ATTR(dst_inst), 381 GET_CNX_ATTR(num_pending_acks), 382 GET_CNX_ATTR(num_ack_events), 383 GET_CNX_ATTR(ack_timeout), 384 NULL 385}; 386 387static struct sysfs_ops veth_cnx_sysfs_ops = { 388 .show = veth_cnx_attribute_show 389}; 390 391static struct kobj_type veth_lpar_connection_ktype = { 392 .release = veth_release_connection, 393 .sysfs_ops = &veth_cnx_sysfs_ops, 394 .default_attrs = veth_cnx_default_attrs 395}; 396 397struct veth_port_attribute { 398 struct attribute attr; 399 ssize_t (*show)(struct veth_port *, char *buf); 400 ssize_t (*store)(struct veth_port *, const char *buf); 401}; 402 403static ssize_t veth_port_attribute_show(struct kobject *kobj, 404 struct attribute *attr, char *buf) 405{ 406 struct veth_port_attribute *port_attr; 407 struct veth_port *port; 408 409 port_attr = container_of(attr, struct veth_port_attribute, attr); 410 port = container_of(kobj, struct veth_port, kobject); 411 412 if (!port_attr->show) 413 return -EIO; 414 415 return port_attr->show(port, buf); 416} 417 418#define CUSTOM_PORT_ATTR(_name, _format, _expression) \ 419static ssize_t _name##_show(struct veth_port *port, char *buf) \ 420{ \ 421 return sprintf(buf, _format, _expression); \ 422} \ 423struct veth_port_attribute veth_port_attr_##_name = __ATTR_RO(_name) 424 425#define SIMPLE_PORT_ATTR(_name) \ 426 CUSTOM_PORT_ATTR(_name, "%lu\n", (unsigned long)port->_name) 427 428SIMPLE_PORT_ATTR(promiscuous); 429SIMPLE_PORT_ATTR(num_mcast); 430CUSTOM_PORT_ATTR(lpar_map, "0x%X\n", port->lpar_map); 431CUSTOM_PORT_ATTR(stopped_map, "0x%X\n", port->stopped_map); 432CUSTOM_PORT_ATTR(mac_addr, "0x%llX\n", port->mac_addr); 433 434#define GET_PORT_ATTR(_name) (&veth_port_attr_##_name.attr) 435static struct attribute *veth_port_default_attrs[] = { 436 GET_PORT_ATTR(mac_addr), 437 GET_PORT_ATTR(lpar_map), 438 GET_PORT_ATTR(stopped_map), 439 GET_PORT_ATTR(promiscuous), 440 GET_PORT_ATTR(num_mcast), 441 NULL 442}; 443 444static struct sysfs_ops veth_port_sysfs_ops = { 445 .show = veth_port_attribute_show 446}; 447 448static struct kobj_type veth_port_ktype = { 449 .sysfs_ops = &veth_port_sysfs_ops, 450 .default_attrs = veth_port_default_attrs 451}; 452 453/* 454 * LPAR connection code 455 */ 456 457static inline void veth_kick_statemachine(struct veth_lpar_connection *cnx) 458{ 459 schedule_delayed_work(&cnx->statemachine_wq, 0); 460} 461 462static void veth_take_cap(struct veth_lpar_connection *cnx, 463 struct veth_lpevent *event) 464{ 465 unsigned long flags; 466 467 spin_lock_irqsave(&cnx->lock, flags); 468 /* Receiving caps may mean the other end has just come up, so 469 * we need to reload the instance ID of the far end */ 470 cnx->dst_inst = 471 HvCallEvent_getTargetLpInstanceId(cnx->remote_lp, 472 HvLpEvent_Type_VirtualLan); 473 474 if (cnx->state & VETH_STATE_GOTCAPS) { 475 veth_error("Received a second capabilities from LPAR %d.\n", 476 cnx->remote_lp); 477 event->base_event.xRc = HvLpEvent_Rc_BufferNotAvailable; 478 HvCallEvent_ackLpEvent((struct HvLpEvent *) event); 479 } else { 480 memcpy(&cnx->cap_event, event, sizeof(cnx->cap_event)); 481 cnx->state |= VETH_STATE_GOTCAPS; 482 veth_kick_statemachine(cnx); 483 } 484 spin_unlock_irqrestore(&cnx->lock, flags); 485} 486 487static void veth_take_cap_ack(struct veth_lpar_connection *cnx, 488 struct veth_lpevent *event) 489{ 490 unsigned long flags; 491 492 spin_lock_irqsave(&cnx->lock, flags); 493 if (cnx->state & VETH_STATE_GOTCAPACK) { 494 veth_error("Received a second capabilities ack from LPAR %d.\n", 495 cnx->remote_lp); 496 } else { 497 memcpy(&cnx->cap_ack_event, event, 498 sizeof(&cnx->cap_ack_event)); 499 cnx->state |= VETH_STATE_GOTCAPACK; 500 veth_kick_statemachine(cnx); 501 } 502 spin_unlock_irqrestore(&cnx->lock, flags); 503} 504 505static void veth_take_monitor_ack(struct veth_lpar_connection *cnx, 506 struct veth_lpevent *event) 507{ 508 unsigned long flags; 509 510 spin_lock_irqsave(&cnx->lock, flags); 511 veth_debug("cnx %d: lost connection.\n", cnx->remote_lp); 512 513 /* Avoid kicking the statemachine once we're shutdown. 514 * It's unnecessary and it could break veth_stop_connection(). */ 515 516 if (! (cnx->state & VETH_STATE_SHUTDOWN)) { 517 cnx->state |= VETH_STATE_RESET; 518 veth_kick_statemachine(cnx); 519 } 520 spin_unlock_irqrestore(&cnx->lock, flags); 521} 522 523static void veth_handle_ack(struct veth_lpevent *event) 524{ 525 HvLpIndex rlp = event->base_event.xTargetLp; 526 struct veth_lpar_connection *cnx = veth_cnx[rlp]; 527 528 BUG_ON(! cnx); 529 530 switch (event->base_event.xSubtype) { 531 case VETH_EVENT_CAP: 532 veth_take_cap_ack(cnx, event); 533 break; 534 case VETH_EVENT_MONITOR: 535 veth_take_monitor_ack(cnx, event); 536 break; 537 default: 538 veth_error("Unknown ack type %d from LPAR %d.\n", 539 event->base_event.xSubtype, rlp); 540 }; 541} 542 543static void veth_handle_int(struct veth_lpevent *event) 544{ 545 HvLpIndex rlp = event->base_event.xSourceLp; 546 struct veth_lpar_connection *cnx = veth_cnx[rlp]; 547 unsigned long flags; 548 int i, acked = 0; 549 550 BUG_ON(! cnx); 551 552 switch (event->base_event.xSubtype) { 553 case VETH_EVENT_CAP: 554 veth_take_cap(cnx, event); 555 break; 556 case VETH_EVENT_MONITOR: 557 /* do nothing... this'll hang out here til we're dead, 558 * and the hypervisor will return it for us. */ 559 break; 560 case VETH_EVENT_FRAMES_ACK: 561 spin_lock_irqsave(&cnx->lock, flags); 562 563 for (i = 0; i < VETH_MAX_ACKS_PER_MSG; ++i) { 564 u16 msgnum = event->u.frames_ack_data.token[i]; 565 566 if (msgnum < VETH_NUMBUFFERS) { 567 veth_recycle_msg(cnx, cnx->msgs + msgnum); 568 cnx->outstanding_tx--; 569 acked++; 570 } 571 } 572 573 if (acked > 0) { 574 cnx->last_contact = jiffies; 575 veth_wake_queues(cnx); 576 } 577 578 spin_unlock_irqrestore(&cnx->lock, flags); 579 break; 580 case VETH_EVENT_FRAMES: 581 veth_receive(cnx, event); 582 break; 583 default: 584 veth_error("Unknown interrupt type %d from LPAR %d.\n", 585 event->base_event.xSubtype, rlp); 586 }; 587} 588 589static void veth_handle_event(struct HvLpEvent *event) 590{ 591 struct veth_lpevent *veth_event = (struct veth_lpevent *)event; 592 593 if (hvlpevent_is_ack(event)) 594 veth_handle_ack(veth_event); 595 else 596 veth_handle_int(veth_event); 597} 598 599static int veth_process_caps(struct veth_lpar_connection *cnx) 600{ 601 struct veth_cap_data *remote_caps = &cnx->remote_caps; 602 int num_acks_needed; 603 604 /* Convert timer to jiffies */ 605 cnx->ack_timeout = remote_caps->ack_timeout * HZ / 1000000; 606 607 if ( (remote_caps->num_buffers == 0) 608 || (remote_caps->ack_threshold > VETH_MAX_ACKS_PER_MSG) 609 || (remote_caps->ack_threshold == 0) 610 || (cnx->ack_timeout == 0) ) { 611 veth_error("Received incompatible capabilities from LPAR %d.\n", 612 cnx->remote_lp); 613 return HvLpEvent_Rc_InvalidSubtypeData; 614 } 615 616 num_acks_needed = (remote_caps->num_buffers 617 / remote_caps->ack_threshold) + 1; 618 619 /* FIXME: locking on num_ack_events? */ 620 if (cnx->num_ack_events < num_acks_needed) { 621 int num; 622 623 num = veth_allocate_events(cnx->remote_lp, 624 num_acks_needed-cnx->num_ack_events); 625 if (num > 0) 626 cnx->num_ack_events += num; 627 628 if (cnx->num_ack_events < num_acks_needed) { 629 veth_error("Couldn't allocate enough ack events " 630 "for LPAR %d.\n", cnx->remote_lp); 631 632 return HvLpEvent_Rc_BufferNotAvailable; 633 } 634 } 635 636 637 return HvLpEvent_Rc_Good; 638} 639 640/* FIXME: The gotos here are a bit dubious */ 641static void veth_statemachine(struct work_struct *work) 642{ 643 struct veth_lpar_connection *cnx = 644 container_of(work, struct veth_lpar_connection, 645 statemachine_wq.work); 646 int rlp = cnx->remote_lp; 647 int rc; 648 649 spin_lock_irq(&cnx->lock); 650 651 restart: 652 if (cnx->state & VETH_STATE_RESET) { 653 if (cnx->state & VETH_STATE_OPEN) 654 HvCallEvent_closeLpEventPath(cnx->remote_lp, 655 HvLpEvent_Type_VirtualLan); 656 657 /* 658 * Reset ack data. This prevents the ack_timer actually 659 * doing anything, even if it runs one more time when 660 * we drop the lock below. 661 */ 662 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); 663 cnx->num_pending_acks = 0; 664 665 cnx->state &= ~(VETH_STATE_RESET | VETH_STATE_SENTMON 666 | VETH_STATE_OPEN | VETH_STATE_SENTCAPS 667 | VETH_STATE_GOTCAPACK | VETH_STATE_GOTCAPS 668 | VETH_STATE_SENTCAPACK | VETH_STATE_READY); 669 670 /* Clean up any leftover messages */ 671 if (cnx->msgs) { 672 int i; 673 for (i = 0; i < VETH_NUMBUFFERS; ++i) 674 veth_recycle_msg(cnx, cnx->msgs + i); 675 } 676 677 cnx->outstanding_tx = 0; 678 veth_wake_queues(cnx); 679 680 /* Drop the lock so we can do stuff that might sleep or 681 * take other locks. */ 682 spin_unlock_irq(&cnx->lock); 683 684 del_timer_sync(&cnx->ack_timer); 685 del_timer_sync(&cnx->reset_timer); 686 687 spin_lock_irq(&cnx->lock); 688 689 if (cnx->state & VETH_STATE_RESET) 690 goto restart; 691 692 /* Hack, wait for the other end to reset itself. */ 693 if (! (cnx->state & VETH_STATE_SHUTDOWN)) { 694 schedule_delayed_work(&cnx->statemachine_wq, 5 * HZ); 695 goto out; 696 } 697 } 698 699 if (cnx->state & VETH_STATE_SHUTDOWN) 700 /* It's all over, do nothing */ 701 goto out; 702 703 if ( !(cnx->state & VETH_STATE_OPEN) ) { 704 if (! cnx->msgs || (cnx->num_events < (2 + VETH_NUMBUFFERS)) ) 705 goto cant_cope; 706 707 HvCallEvent_openLpEventPath(rlp, HvLpEvent_Type_VirtualLan); 708 cnx->src_inst = 709 HvCallEvent_getSourceLpInstanceId(rlp, 710 HvLpEvent_Type_VirtualLan); 711 cnx->dst_inst = 712 HvCallEvent_getTargetLpInstanceId(rlp, 713 HvLpEvent_Type_VirtualLan); 714 cnx->state |= VETH_STATE_OPEN; 715 } 716 717 if ( (cnx->state & VETH_STATE_OPEN) 718 && !(cnx->state & VETH_STATE_SENTMON) ) { 719 rc = veth_signalevent(cnx, VETH_EVENT_MONITOR, 720 HvLpEvent_AckInd_DoAck, 721 HvLpEvent_AckType_DeferredAck, 722 0, 0, 0, 0, 0, 0); 723 724 if (rc == HvLpEvent_Rc_Good) { 725 cnx->state |= VETH_STATE_SENTMON; 726 } else { 727 if ( (rc != HvLpEvent_Rc_PartitionDead) 728 && (rc != HvLpEvent_Rc_PathClosed) ) 729 veth_error("Error sending monitor to LPAR %d, " 730 "rc = %d\n", rlp, rc); 731 732 /* Oh well, hope we get a cap from the other 733 * end and do better when that kicks us */ 734 goto out; 735 } 736 } 737 738 if ( (cnx->state & VETH_STATE_OPEN) 739 && !(cnx->state & VETH_STATE_SENTCAPS)) { 740 u64 *rawcap = (u64 *)&cnx->local_caps; 741 742 rc = veth_signalevent(cnx, VETH_EVENT_CAP, 743 HvLpEvent_AckInd_DoAck, 744 HvLpEvent_AckType_ImmediateAck, 745 0, rawcap[0], rawcap[1], rawcap[2], 746 rawcap[3], rawcap[4]); 747 748 if (rc == HvLpEvent_Rc_Good) { 749 cnx->state |= VETH_STATE_SENTCAPS; 750 } else { 751 if ( (rc != HvLpEvent_Rc_PartitionDead) 752 && (rc != HvLpEvent_Rc_PathClosed) ) 753 veth_error("Error sending caps to LPAR %d, " 754 "rc = %d\n", rlp, rc); 755 756 /* Oh well, hope we get a cap from the other 757 * end and do better when that kicks us */ 758 goto out; 759 } 760 } 761 762 if ((cnx->state & VETH_STATE_GOTCAPS) 763 && !(cnx->state & VETH_STATE_SENTCAPACK)) { 764 struct veth_cap_data *remote_caps = &cnx->remote_caps; 765 766 memcpy(remote_caps, &cnx->cap_event.u.caps_data, 767 sizeof(*remote_caps)); 768 769 spin_unlock_irq(&cnx->lock); 770 rc = veth_process_caps(cnx); 771 spin_lock_irq(&cnx->lock); 772 773 /* We dropped the lock, so recheck for anything which 774 * might mess us up */ 775 if (cnx->state & (VETH_STATE_RESET|VETH_STATE_SHUTDOWN)) 776 goto restart; 777 778 cnx->cap_event.base_event.xRc = rc; 779 HvCallEvent_ackLpEvent((struct HvLpEvent *)&cnx->cap_event); 780 if (rc == HvLpEvent_Rc_Good) 781 cnx->state |= VETH_STATE_SENTCAPACK; 782 else 783 goto cant_cope; 784 } 785 786 if ((cnx->state & VETH_STATE_GOTCAPACK) 787 && (cnx->state & VETH_STATE_GOTCAPS) 788 && !(cnx->state & VETH_STATE_READY)) { 789 if (cnx->cap_ack_event.base_event.xRc == HvLpEvent_Rc_Good) { 790 /* Start the ACK timer */ 791 cnx->ack_timer.expires = jiffies + cnx->ack_timeout; 792 add_timer(&cnx->ack_timer); 793 cnx->state |= VETH_STATE_READY; 794 } else { 795 veth_error("Caps rejected by LPAR %d, rc = %d\n", 796 rlp, cnx->cap_ack_event.base_event.xRc); 797 goto cant_cope; 798 } 799 } 800 801 out: 802 spin_unlock_irq(&cnx->lock); 803 return; 804 805 cant_cope: 806 /* FIXME: we get here if something happens we really can't 807 * cope with. The link will never work once we get here, and 808 * all we can do is not lock the rest of the system up */ 809 veth_error("Unrecoverable error on connection to LPAR %d, shutting down" 810 " (state = 0x%04lx)\n", rlp, cnx->state); 811 cnx->state |= VETH_STATE_SHUTDOWN; 812 spin_unlock_irq(&cnx->lock); 813} 814 815static int veth_init_connection(u8 rlp) 816{ 817 struct veth_lpar_connection *cnx; 818 struct veth_msg *msgs; 819 int i; 820 821 if ( (rlp == this_lp) 822 || ! HvLpConfig_doLpsCommunicateOnVirtualLan(this_lp, rlp) ) 823 return 0; 824 825 cnx = kzalloc(sizeof(*cnx), GFP_KERNEL); 826 if (! cnx) 827 return -ENOMEM; 828 829 cnx->remote_lp = rlp; 830 spin_lock_init(&cnx->lock); 831 INIT_DELAYED_WORK(&cnx->statemachine_wq, veth_statemachine); 832 833 init_timer(&cnx->ack_timer); 834 cnx->ack_timer.function = veth_timed_ack; 835 cnx->ack_timer.data = (unsigned long) cnx; 836 837 init_timer(&cnx->reset_timer); 838 cnx->reset_timer.function = veth_timed_reset; 839 cnx->reset_timer.data = (unsigned long) cnx; 840 cnx->reset_timeout = 5 * HZ * (VETH_ACKTIMEOUT / 1000000); 841 842 memset(&cnx->pending_acks, 0xff, sizeof (cnx->pending_acks)); 843 844 veth_cnx[rlp] = cnx; 845 846 /* This gets us 1 reference, which is held on behalf of the driver 847 * infrastructure. It's released at module unload. */ 848 kobject_init(&cnx->kobject, &veth_lpar_connection_ktype); 849 850 msgs = kcalloc(VETH_NUMBUFFERS, sizeof(struct veth_msg), GFP_KERNEL); 851 if (! msgs) { 852 veth_error("Can't allocate buffers for LPAR %d.\n", rlp); 853 return -ENOMEM; 854 } 855 856 cnx->msgs = msgs; 857 858 for (i = 0; i < VETH_NUMBUFFERS; i++) { 859 msgs[i].token = i; 860 veth_stack_push(cnx, msgs + i); 861 } 862 863 cnx->num_events = veth_allocate_events(rlp, 2 + VETH_NUMBUFFERS); 864 865 if (cnx->num_events < (2 + VETH_NUMBUFFERS)) { 866 veth_error("Can't allocate enough events for LPAR %d.\n", rlp); 867 return -ENOMEM; 868 } 869 870 cnx->local_caps.num_buffers = VETH_NUMBUFFERS; 871 cnx->local_caps.ack_threshold = ACK_THRESHOLD; 872 cnx->local_caps.ack_timeout = VETH_ACKTIMEOUT; 873 874 return 0; 875} 876 877static void veth_stop_connection(struct veth_lpar_connection *cnx) 878{ 879 if (!cnx) 880 return; 881 882 spin_lock_irq(&cnx->lock); 883 cnx->state |= VETH_STATE_RESET | VETH_STATE_SHUTDOWN; 884 veth_kick_statemachine(cnx); 885 spin_unlock_irq(&cnx->lock); 886 887 /* There's a slim chance the reset code has just queued the 888 * statemachine to run in five seconds. If so we need to cancel 889 * that and requeue the work to run now. */ 890 if (cancel_delayed_work(&cnx->statemachine_wq)) { 891 spin_lock_irq(&cnx->lock); 892 veth_kick_statemachine(cnx); 893 spin_unlock_irq(&cnx->lock); 894 } 895 896 /* Wait for the state machine to run. */ 897 flush_scheduled_work(); 898} 899 900static void veth_destroy_connection(struct veth_lpar_connection *cnx) 901{ 902 if (!cnx) 903 return; 904 905 if (cnx->num_events > 0) 906 mf_deallocate_lp_events(cnx->remote_lp, 907 HvLpEvent_Type_VirtualLan, 908 cnx->num_events, 909 NULL, NULL); 910 if (cnx->num_ack_events > 0) 911 mf_deallocate_lp_events(cnx->remote_lp, 912 HvLpEvent_Type_VirtualLan, 913 cnx->num_ack_events, 914 NULL, NULL); 915 916 kfree(cnx->msgs); 917 veth_cnx[cnx->remote_lp] = NULL; 918 kfree(cnx); 919} 920 921static void veth_release_connection(struct kobject *kobj) 922{ 923 struct veth_lpar_connection *cnx; 924 cnx = container_of(kobj, struct veth_lpar_connection, kobject); 925 veth_stop_connection(cnx); 926 veth_destroy_connection(cnx); 927} 928 929/* 930 * net_device code 931 */ 932 933static int veth_open(struct net_device *dev) 934{ 935 netif_start_queue(dev); 936 return 0; 937} 938 939static int veth_close(struct net_device *dev) 940{ 941 netif_stop_queue(dev); 942 return 0; 943} 944 945static int veth_change_mtu(struct net_device *dev, int new_mtu) 946{ 947 if ((new_mtu < 68) || (new_mtu > VETH_MAX_MTU)) 948 return -EINVAL; 949 dev->mtu = new_mtu; 950 return 0; 951} 952 953static void veth_set_multicast_list(struct net_device *dev) 954{ 955 struct veth_port *port = netdev_priv(dev); 956 unsigned long flags; 957 958 write_lock_irqsave(&port->mcast_gate, flags); 959 960 if ((dev->flags & IFF_PROMISC) || (dev->flags & IFF_ALLMULTI) || 961 (dev->mc_count > VETH_MAX_MCAST)) { 962 port->promiscuous = 1; 963 } else { 964 struct dev_mc_list *dmi = dev->mc_list; 965 int i; 966 967 port->promiscuous = 0; 968 969 /* Update table */ 970 port->num_mcast = 0; 971 972 for (i = 0; i < dev->mc_count; i++) { 973 u8 *addr = dmi->dmi_addr; 974 u64 xaddr = 0; 975 976 if (addr[0] & 0x01) {/* multicast address? */ 977 memcpy(&xaddr, addr, ETH_ALEN); 978 port->mcast_addr[port->num_mcast] = xaddr; 979 port->num_mcast++; 980 } 981 dmi = dmi->next; 982 } 983 } 984 985 write_unlock_irqrestore(&port->mcast_gate, flags); 986} 987 988static void veth_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) 989{ 990 strncpy(info->driver, DRV_NAME, sizeof(info->driver) - 1); 991 info->driver[sizeof(info->driver) - 1] = '\0'; 992 strncpy(info->version, DRV_VERSION, sizeof(info->version) - 1); 993 info->version[sizeof(info->version) - 1] = '\0'; 994} 995 996static int veth_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) 997{ 998 ecmd->supported = (SUPPORTED_1000baseT_Full 999 | SUPPORTED_Autoneg | SUPPORTED_FIBRE); 1000 ecmd->advertising = (SUPPORTED_1000baseT_Full 1001 | SUPPORTED_Autoneg | SUPPORTED_FIBRE); 1002 ecmd->port = PORT_FIBRE; 1003 ecmd->transceiver = XCVR_INTERNAL; 1004 ecmd->phy_address = 0; 1005 ecmd->speed = SPEED_1000; 1006 ecmd->duplex = DUPLEX_FULL; 1007 ecmd->autoneg = AUTONEG_ENABLE; 1008 ecmd->maxtxpkt = 120; 1009 ecmd->maxrxpkt = 120; 1010 return 0; 1011} 1012 1013static u32 veth_get_link(struct net_device *dev) 1014{ 1015 return 1; 1016} 1017 1018static const struct ethtool_ops ops = { 1019 .get_drvinfo = veth_get_drvinfo, 1020 .get_settings = veth_get_settings, 1021 .get_link = veth_get_link, 1022}; 1023 1024static struct net_device *veth_probe_one(int vlan, 1025 struct vio_dev *vio_dev) 1026{ 1027 struct net_device *dev; 1028 struct veth_port *port; 1029 struct device *vdev = &vio_dev->dev; 1030 int i, rc; 1031 const unsigned char *mac_addr; 1032 1033 mac_addr = vio_get_attribute(vio_dev, "local-mac-address", NULL); 1034 if (mac_addr == NULL) 1035 mac_addr = vio_get_attribute(vio_dev, "mac-address", NULL); 1036 if (mac_addr == NULL) { 1037 veth_error("Unable to fetch MAC address from device tree.\n"); 1038 return NULL; 1039 } 1040 1041 dev = alloc_etherdev(sizeof (struct veth_port)); 1042 if (! dev) { 1043 veth_error("Unable to allocate net_device structure!\n"); 1044 return NULL; 1045 } 1046 1047 port = netdev_priv(dev); 1048 1049 spin_lock_init(&port->queue_lock); 1050 rwlock_init(&port->mcast_gate); 1051 port->stopped_map = 0; 1052 1053 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { 1054 HvLpVirtualLanIndexMap map; 1055 1056 if (i == this_lp) 1057 continue; 1058 map = HvLpConfig_getVirtualLanIndexMapForLp(i); 1059 if (map & (0x8000 >> vlan)) 1060 port->lpar_map |= (1 << i); 1061 } 1062 port->dev = vdev; 1063 1064 memcpy(dev->dev_addr, mac_addr, ETH_ALEN); 1065 1066 dev->mtu = VETH_MAX_MTU; 1067 1068 memcpy(&port->mac_addr, mac_addr, ETH_ALEN); 1069 1070 dev->open = veth_open; 1071 dev->hard_start_xmit = veth_start_xmit; 1072 dev->stop = veth_close; 1073 dev->change_mtu = veth_change_mtu; 1074 dev->set_mac_address = NULL; 1075 dev->set_multicast_list = veth_set_multicast_list; 1076 SET_ETHTOOL_OPS(dev, &ops); 1077 1078 SET_NETDEV_DEV(dev, vdev); 1079 1080 rc = register_netdev(dev); 1081 if (rc != 0) { 1082 veth_error("Failed registering net device for vlan%d.\n", vlan); 1083 free_netdev(dev); 1084 return NULL; 1085 } 1086 1087 kobject_init(&port->kobject, &veth_port_ktype); 1088 if (0 != kobject_add(&port->kobject, &dev->dev.kobj, "veth_port")) 1089 veth_error("Failed adding port for %s to sysfs.\n", dev->name); 1090 1091 veth_info("%s attached to iSeries vlan %d (LPAR map = 0x%.4X)\n", 1092 dev->name, vlan, port->lpar_map); 1093 1094 return dev; 1095} 1096 1097/* 1098 * Tx path 1099 */ 1100 1101static int veth_transmit_to_one(struct sk_buff *skb, HvLpIndex rlp, 1102 struct net_device *dev) 1103{ 1104 struct veth_lpar_connection *cnx = veth_cnx[rlp]; 1105 struct veth_port *port = netdev_priv(dev); 1106 HvLpEvent_Rc rc; 1107 struct veth_msg *msg = NULL; 1108 unsigned long flags; 1109 1110 if (! cnx) 1111 return 0; 1112 1113 spin_lock_irqsave(&cnx->lock, flags); 1114 1115 if (! (cnx->state & VETH_STATE_READY)) 1116 goto no_error; 1117 1118 if ((skb->len - ETH_HLEN) > VETH_MAX_MTU) 1119 goto drop; 1120 1121 msg = veth_stack_pop(cnx); 1122 if (! msg) 1123 goto drop; 1124 1125 msg->in_use = 1; 1126 msg->skb = skb_get(skb); 1127 1128 msg->data.addr[0] = dma_map_single(port->dev, skb->data, 1129 skb->len, DMA_TO_DEVICE); 1130 1131 if (dma_mapping_error(port->dev, msg->data.addr[0])) 1132 goto recycle_and_drop; 1133 1134 msg->dev = port->dev; 1135 msg->data.len[0] = skb->len; 1136 msg->data.eofmask = 1 << VETH_EOF_SHIFT; 1137 1138 rc = veth_signaldata(cnx, VETH_EVENT_FRAMES, msg->token, &msg->data); 1139 1140 if (rc != HvLpEvent_Rc_Good) 1141 goto recycle_and_drop; 1142 1143 /* If the timer's not already running, start it now. */ 1144 if (0 == cnx->outstanding_tx) 1145 mod_timer(&cnx->reset_timer, jiffies + cnx->reset_timeout); 1146 1147 cnx->last_contact = jiffies; 1148 cnx->outstanding_tx++; 1149 1150 if (veth_stack_is_empty(cnx)) 1151 veth_stop_queues(cnx); 1152 1153 no_error: 1154 spin_unlock_irqrestore(&cnx->lock, flags); 1155 return 0; 1156 1157 recycle_and_drop: 1158 veth_recycle_msg(cnx, msg); 1159 drop: 1160 spin_unlock_irqrestore(&cnx->lock, flags); 1161 return 1; 1162} 1163 1164static void veth_transmit_to_many(struct sk_buff *skb, 1165 HvLpIndexMap lpmask, 1166 struct net_device *dev) 1167{ 1168 int i, success, error; 1169 1170 success = error = 0; 1171 1172 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { 1173 if ((lpmask & (1 << i)) == 0) 1174 continue; 1175 1176 if (veth_transmit_to_one(skb, i, dev)) 1177 error = 1; 1178 else 1179 success = 1; 1180 } 1181 1182 if (error) 1183 dev->stats.tx_errors++; 1184 1185 if (success) { 1186 dev->stats.tx_packets++; 1187 dev->stats.tx_bytes += skb->len; 1188 } 1189} 1190 1191static int veth_start_xmit(struct sk_buff *skb, struct net_device *dev) 1192{ 1193 unsigned char *frame = skb->data; 1194 struct veth_port *port = netdev_priv(dev); 1195 HvLpIndexMap lpmask; 1196 1197 if (! (frame[0] & 0x01)) { 1198 /* unicast packet */ 1199 HvLpIndex rlp = frame[5]; 1200 1201 if ( ! ((1 << rlp) & port->lpar_map) ) { 1202 dev_kfree_skb(skb); 1203 return 0; 1204 } 1205 1206 lpmask = 1 << rlp; 1207 } else { 1208 lpmask = port->lpar_map; 1209 } 1210 1211 veth_transmit_to_many(skb, lpmask, dev); 1212 1213 dev_kfree_skb(skb); 1214 1215 return 0; 1216} 1217 1218/* You must hold the connection's lock when you call this function. */ 1219static void veth_recycle_msg(struct veth_lpar_connection *cnx, 1220 struct veth_msg *msg) 1221{ 1222 u32 dma_address, dma_length; 1223 1224 if (msg->in_use) { 1225 msg->in_use = 0; 1226 dma_address = msg->data.addr[0]; 1227 dma_length = msg->data.len[0]; 1228 1229 if (!dma_mapping_error(msg->dev, dma_address)) 1230 dma_unmap_single(msg->dev, dma_address, dma_length, 1231 DMA_TO_DEVICE); 1232 1233 if (msg->skb) { 1234 dev_kfree_skb_any(msg->skb); 1235 msg->skb = NULL; 1236 } 1237 1238 memset(&msg->data, 0, sizeof(msg->data)); 1239 veth_stack_push(cnx, msg); 1240 } else if (cnx->state & VETH_STATE_OPEN) { 1241 veth_error("Non-pending frame (# %d) acked by LPAR %d.\n", 1242 cnx->remote_lp, msg->token); 1243 } 1244} 1245 1246static void veth_wake_queues(struct veth_lpar_connection *cnx) 1247{ 1248 int i; 1249 1250 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { 1251 struct net_device *dev = veth_dev[i]; 1252 struct veth_port *port; 1253 unsigned long flags; 1254 1255 if (! dev) 1256 continue; 1257 1258 port = netdev_priv(dev); 1259 1260 if (! (port->lpar_map & (1<<cnx->remote_lp))) 1261 continue; 1262 1263 spin_lock_irqsave(&port->queue_lock, flags); 1264 1265 port->stopped_map &= ~(1 << cnx->remote_lp); 1266 1267 if (0 == port->stopped_map && netif_queue_stopped(dev)) { 1268 veth_debug("cnx %d: woke queue for %s.\n", 1269 cnx->remote_lp, dev->name); 1270 netif_wake_queue(dev); 1271 } 1272 spin_unlock_irqrestore(&port->queue_lock, flags); 1273 } 1274} 1275 1276static void veth_stop_queues(struct veth_lpar_connection *cnx) 1277{ 1278 int i; 1279 1280 for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { 1281 struct net_device *dev = veth_dev[i]; 1282 struct veth_port *port; 1283 1284 if (! dev) 1285 continue; 1286 1287 port = netdev_priv(dev); 1288 1289 /* If this cnx is not on the vlan for this port, continue */ 1290 if (! (port->lpar_map & (1 << cnx->remote_lp))) 1291 continue; 1292 1293 spin_lock(&port->queue_lock); 1294 1295 netif_stop_queue(dev); 1296 port->stopped_map |= (1 << cnx->remote_lp); 1297 1298 veth_debug("cnx %d: stopped queue for %s, map = 0x%x.\n", 1299 cnx->remote_lp, dev->name, port->stopped_map); 1300 1301 spin_unlock(&port->queue_lock); 1302 } 1303} 1304 1305static void veth_timed_reset(unsigned long ptr) 1306{ 1307 struct veth_lpar_connection *cnx = (struct veth_lpar_connection *)ptr; 1308 unsigned long trigger_time, flags; 1309 1310 /* FIXME is it possible this fires after veth_stop_connection()? 1311 * That would reschedule the statemachine for 5 seconds and probably 1312 * execute it after the module's been unloaded. Hmm. */ 1313 1314 spin_lock_irqsave(&cnx->lock, flags); 1315 1316 if (cnx->outstanding_tx > 0) { 1317 trigger_time = cnx->last_contact + cnx->reset_timeout; 1318 1319 if (trigger_time < jiffies) { 1320 cnx->state |= VETH_STATE_RESET; 1321 veth_kick_statemachine(cnx); 1322 veth_error("%d packets not acked by LPAR %d within %d " 1323 "seconds, resetting.\n", 1324 cnx->outstanding_tx, cnx->remote_lp, 1325 cnx->reset_timeout / HZ); 1326 } else { 1327 /* Reschedule the timer */ 1328 trigger_time = jiffies + cnx->reset_timeout; 1329 mod_timer(&cnx->reset_timer, trigger_time); 1330 } 1331 } 1332 1333 spin_unlock_irqrestore(&cnx->lock, flags); 1334} 1335 1336/* 1337 * Rx path 1338 */ 1339 1340static inline int veth_frame_wanted(struct veth_port *port, u64 mac_addr) 1341{ 1342 int wanted = 0; 1343 int i; 1344 unsigned long flags; 1345 1346 if ( (mac_addr == port->mac_addr) || (mac_addr == 0xffffffffffff0000) ) 1347 return 1; 1348 1349 read_lock_irqsave(&port->mcast_gate, flags); 1350 1351 if (port->promiscuous) { 1352 wanted = 1; 1353 goto out; 1354 } 1355 1356 for (i = 0; i < port->num_mcast; ++i) { 1357 if (port->mcast_addr[i] == mac_addr) { 1358 wanted = 1; 1359 break; 1360 } 1361 } 1362 1363 out: 1364 read_unlock_irqrestore(&port->mcast_gate, flags); 1365 1366 return wanted; 1367} 1368 1369struct dma_chunk { 1370 u64 addr; 1371 u64 size; 1372}; 1373 1374#define VETH_MAX_PAGES_PER_FRAME ( (VETH_MAX_MTU+PAGE_SIZE-2)/PAGE_SIZE + 1 ) 1375 1376static inline void veth_build_dma_list(struct dma_chunk *list, 1377 unsigned char *p, unsigned long length) 1378{ 1379 unsigned long done; 1380 int i = 1; 1381 1382 /* FIXME: skbs are continguous in real addresses. Do we 1383 * really need to break it into PAGE_SIZE chunks, or can we do 1384 * it just at the granularity of iSeries real->absolute 1385 * mapping? Indeed, given the way the allocator works, can we 1386 * count on them being absolutely contiguous? */ 1387 list[0].addr = iseries_hv_addr(p); 1388 list[0].size = min(length, 1389 PAGE_SIZE - ((unsigned long)p & ~PAGE_MASK)); 1390 1391 done = list[0].size; 1392 while (done < length) { 1393 list[i].addr = iseries_hv_addr(p + done); 1394 list[i].size = min(length-done, PAGE_SIZE); 1395 done += list[i].size; 1396 i++; 1397 } 1398} 1399 1400static void veth_flush_acks(struct veth_lpar_connection *cnx) 1401{ 1402 HvLpEvent_Rc rc; 1403 1404 rc = veth_signaldata(cnx, VETH_EVENT_FRAMES_ACK, 1405 0, &cnx->pending_acks); 1406 1407 if (rc != HvLpEvent_Rc_Good) 1408 veth_error("Failed acking frames from LPAR %d, rc = %d\n", 1409 cnx->remote_lp, (int)rc); 1410 1411 cnx->num_pending_acks = 0; 1412 memset(&cnx->pending_acks, 0xff, sizeof(cnx->pending_acks)); 1413} 1414 1415static void veth_receive(struct veth_lpar_connection *cnx, 1416 struct veth_lpevent *event) 1417{ 1418 struct veth_frames_data *senddata = &event->u.frames_data; 1419 int startchunk = 0; 1420 int nchunks; 1421 unsigned long flags; 1422 HvLpDma_Rc rc; 1423 1424 do { 1425 u16 length = 0; 1426 struct sk_buff *skb; 1427 struct dma_chunk local_list[VETH_MAX_PAGES_PER_FRAME]; 1428 struct dma_chunk remote_list[VETH_MAX_FRAMES_PER_MSG]; 1429 u64 dest; 1430 HvLpVirtualLanIndex vlan; 1431 struct net_device *dev; 1432 struct veth_port *port; 1433 1434 /* FIXME: do we need this? */ 1435 memset(local_list, 0, sizeof(local_list)); 1436 memset(remote_list, 0, sizeof(VETH_MAX_FRAMES_PER_MSG)); 1437 1438 /* a 0 address marks the end of the valid entries */ 1439 if (senddata->addr[startchunk] == 0) 1440 break; 1441 1442 /* make sure that we have at least 1 EOF entry in the 1443 * remaining entries */ 1444 if (! (senddata->eofmask >> (startchunk + VETH_EOF_SHIFT))) { 1445 veth_error("Missing EOF fragment in event " 1446 "eofmask = 0x%x startchunk = %d\n", 1447 (unsigned)senddata->eofmask, 1448 startchunk); 1449 break; 1450 } 1451 1452 /* build list of chunks in this frame */ 1453 nchunks = 0; 1454 do { 1455 remote_list[nchunks].addr = 1456 (u64) senddata->addr[startchunk+nchunks] << 32; 1457 remote_list[nchunks].size = 1458 senddata->len[startchunk+nchunks]; 1459 length += remote_list[nchunks].size; 1460 } while (! (senddata->eofmask & 1461 (1 << (VETH_EOF_SHIFT + startchunk + nchunks++)))); 1462 1463 /* length == total length of all chunks */ 1464 /* nchunks == # of chunks in this frame */ 1465 1466 if ((length - ETH_HLEN) > VETH_MAX_MTU) { 1467 veth_error("Received oversize frame from LPAR %d " 1468 "(length = %d)\n", 1469 cnx->remote_lp, length); 1470 continue; 1471 } 1472 1473 skb = alloc_skb(length, GFP_ATOMIC); 1474 if (!skb) 1475 continue; 1476 1477 veth_build_dma_list(local_list, skb->data, length); 1478 1479 rc = HvCallEvent_dmaBufList(HvLpEvent_Type_VirtualLan, 1480 event->base_event.xSourceLp, 1481 HvLpDma_Direction_RemoteToLocal, 1482 cnx->src_inst, 1483 cnx->dst_inst, 1484 HvLpDma_AddressType_RealAddress, 1485 HvLpDma_AddressType_TceIndex, 1486 iseries_hv_addr(&local_list), 1487 iseries_hv_addr(&remote_list), 1488 length); 1489 if (rc != HvLpDma_Rc_Good) { 1490 dev_kfree_skb_irq(skb); 1491 continue; 1492 } 1493 1494 vlan = skb->data[9]; 1495 dev = veth_dev[vlan]; 1496 if (! dev) { 1497 /* 1498 * Some earlier versions of the driver sent 1499 * broadcasts down all connections, even to lpars 1500 * that weren't on the relevant vlan. So ignore 1501 * packets belonging to a vlan we're not on. 1502 * We can also be here if we receive packets while 1503 * the driver is going down, because then dev is NULL. 1504 */ 1505 dev_kfree_skb_irq(skb); 1506 continue; 1507 } 1508 1509 port = netdev_priv(dev); 1510 dest = *((u64 *) skb->data) & 0xFFFFFFFFFFFF0000; 1511 1512 if ((vlan > HVMAXARCHITECTEDVIRTUALLANS) || !port) { 1513 dev_kfree_skb_irq(skb); 1514 continue; 1515 } 1516 if (! veth_frame_wanted(port, dest)) { 1517 dev_kfree_skb_irq(skb); 1518 continue; 1519 } 1520 1521 skb_put(skb, length); 1522 skb->protocol = eth_type_trans(skb, dev); 1523 skb->ip_summed = CHECKSUM_NONE; 1524 netif_rx(skb); /* send it up */ 1525 dev->stats.rx_packets++; 1526 dev->stats.rx_bytes += length; 1527 } while (startchunk += nchunks, startchunk < VETH_MAX_FRAMES_PER_MSG); 1528 1529 /* Ack it */ 1530 spin_lock_irqsave(&cnx->lock, flags); 1531 BUG_ON(cnx->num_pending_acks > VETH_MAX_ACKS_PER_MSG); 1532 1533 cnx->pending_acks[cnx->num_pending_acks++] = 1534 event->base_event.xCorrelationToken; 1535 1536 if ( (cnx->num_pending_acks >= cnx->remote_caps.ack_threshold) 1537 || (cnx->num_pending_acks >= VETH_MAX_ACKS_PER_MSG) ) 1538 veth_flush_acks(cnx); 1539 1540 spin_unlock_irqrestore(&cnx->lock, flags); 1541} 1542 1543static void veth_timed_ack(unsigned long ptr) 1544{ 1545 struct veth_lpar_connection *cnx = (struct veth_lpar_connection *) ptr; 1546 unsigned long flags; 1547 1548 /* Ack all the events */ 1549 spin_lock_irqsave(&cnx->lock, flags); 1550 if (cnx->num_pending_acks > 0) 1551 veth_flush_acks(cnx); 1552 1553 /* Reschedule the timer */ 1554 cnx->ack_timer.expires = jiffies + cnx->ack_timeout; 1555 add_timer(&cnx->ack_timer); 1556 spin_unlock_irqrestore(&cnx->lock, flags); 1557} 1558 1559static int veth_remove(struct vio_dev *vdev) 1560{ 1561 struct veth_lpar_connection *cnx; 1562 struct net_device *dev; 1563 struct veth_port *port; 1564 int i; 1565 1566 dev = veth_dev[vdev->unit_address]; 1567 1568 if (! dev) 1569 return 0; 1570 1571 port = netdev_priv(dev); 1572 1573 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { 1574 cnx = veth_cnx[i]; 1575 1576 if (cnx && (port->lpar_map & (1 << i))) { 1577 /* Drop our reference to connections on our VLAN */ 1578 kobject_put(&cnx->kobject); 1579 } 1580 } 1581 1582 veth_dev[vdev->unit_address] = NULL; 1583 kobject_del(&port->kobject); 1584 kobject_put(&port->kobject); 1585 unregister_netdev(dev); 1586 free_netdev(dev); 1587 1588 return 0; 1589} 1590 1591static int veth_probe(struct vio_dev *vdev, const struct vio_device_id *id) 1592{ 1593 int i = vdev->unit_address; 1594 struct net_device *dev; 1595 struct veth_port *port; 1596 1597 dev = veth_probe_one(i, vdev); 1598 if (dev == NULL) { 1599 veth_remove(vdev); 1600 return 1; 1601 } 1602 veth_dev[i] = dev; 1603 1604 port = (struct veth_port*)netdev_priv(dev); 1605 1606 /* Start the state machine on each connection on this vlan. If we're 1607 * the first dev to do so this will commence link negotiation */ 1608 for (i = 0; i < HVMAXARCHITECTEDLPS; i++) { 1609 struct veth_lpar_connection *cnx; 1610 1611 if (! (port->lpar_map & (1 << i))) 1612 continue; 1613 1614 cnx = veth_cnx[i]; 1615 if (!cnx) 1616 continue; 1617 1618 kobject_get(&cnx->kobject); 1619 veth_kick_statemachine(cnx); 1620 } 1621 1622 return 0; 1623} 1624 1625/** 1626 * veth_device_table: Used by vio.c to match devices that we 1627 * support. 1628 */ 1629static struct vio_device_id veth_device_table[] __devinitdata = { 1630 { "network", "IBM,iSeries-l-lan" }, 1631 { "", "" } 1632}; 1633MODULE_DEVICE_TABLE(vio, veth_device_table); 1634 1635static struct vio_driver veth_driver = { 1636 .id_table = veth_device_table, 1637 .probe = veth_probe, 1638 .remove = veth_remove, 1639 .driver = { 1640 .name = DRV_NAME, 1641 .owner = THIS_MODULE, 1642 } 1643}; 1644 1645/* 1646 * Module initialization/cleanup 1647 */ 1648 1649static void __exit veth_module_cleanup(void) 1650{ 1651 int i; 1652 struct veth_lpar_connection *cnx; 1653 1654 /* Disconnect our "irq" to stop events coming from the Hypervisor. */ 1655 HvLpEvent_unregisterHandler(HvLpEvent_Type_VirtualLan); 1656 1657 /* Make sure any work queued from Hypervisor callbacks is finished. */ 1658 flush_scheduled_work(); 1659 1660 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { 1661 cnx = veth_cnx[i]; 1662 1663 if (!cnx) 1664 continue; 1665 1666 /* Remove the connection from sysfs */ 1667 kobject_del(&cnx->kobject); 1668 /* Drop the driver's reference to the connection */ 1669 kobject_put(&cnx->kobject); 1670 } 1671 1672 /* Unregister the driver, which will close all the netdevs and stop 1673 * the connections when they're no longer referenced. */ 1674 vio_unregister_driver(&veth_driver); 1675} 1676module_exit(veth_module_cleanup); 1677 1678static int __init veth_module_init(void) 1679{ 1680 int i; 1681 int rc; 1682 1683 if (!firmware_has_feature(FW_FEATURE_ISERIES)) 1684 return -ENODEV; 1685 1686 this_lp = HvLpConfig_getLpIndex_outline(); 1687 1688 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { 1689 rc = veth_init_connection(i); 1690 if (rc != 0) 1691 goto error; 1692 } 1693 1694 HvLpEvent_registerHandler(HvLpEvent_Type_VirtualLan, 1695 &veth_handle_event); 1696 1697 rc = vio_register_driver(&veth_driver); 1698 if (rc != 0) 1699 goto error; 1700 1701 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { 1702 struct kobject *kobj; 1703 1704 if (!veth_cnx[i]) 1705 continue; 1706 1707 kobj = &veth_cnx[i]->kobject; 1708 /* If the add failes, complain but otherwise continue */ 1709 if (0 != driver_add_kobj(&veth_driver.driver, kobj, 1710 "cnx%.2d", veth_cnx[i]->remote_lp)) 1711 veth_error("cnx %d: Failed adding to sysfs.\n", i); 1712 } 1713 1714 return 0; 1715 1716error: 1717 for (i = 0; i < HVMAXARCHITECTEDLPS; ++i) { 1718 veth_destroy_connection(veth_cnx[i]); 1719 } 1720 1721 return rc; 1722} 1723module_init(veth_module_init);