drivers/net/tile/: on-chip network drivers for the tile architecture

+1

MAINTAINERS

··· 5828 5828 S: Supported 5829 5829 F: arch/tile/ 5830 5830 F: drivers/char/hvc_tile.c 5831 + F: drivers/net/tile/ 5831 5832 5832 5833 TLAN NETWORK DRIVER 5833 5834 M: Samuel Chessman <chessman@tux.org>

+52

arch/tile/include/asm/cacheflush.h

··· 137 137 mb_incoherent(); 138 138 } 139 139 140 + /* 141 + * Flush & invalidate a VA range that is homed remotely on a single core, 142 + * waiting until the memory controller holds the flushed values. 143 + */ 144 + static inline void finv_buffer_remote(void *buffer, size_t size) 145 + { 146 + char *p; 147 + int i; 148 + 149 + /* 150 + * Flush and invalidate the buffer out of the local L1/L2 151 + * and request the home cache to flush and invalidate as well. 152 + */ 153 + __finv_buffer(buffer, size); 154 + 155 + /* 156 + * Wait for the home cache to acknowledge that it has processed 157 + * all the flush-and-invalidate requests. This does not mean 158 + * that the flushed data has reached the memory controller yet, 159 + * but it does mean the home cache is processing the flushes. 160 + */ 161 + __insn_mf(); 162 + 163 + /* 164 + * Issue a load to the last cache line, which can't complete 165 + * until all the previously-issued flushes to the same memory 166 + * controller have also completed. If we weren't striping 167 + * memory, that one load would be sufficient, but since we may 168 + * be, we also need to back up to the last load issued to 169 + * another memory controller, which would be the point where 170 + * we crossed an 8KB boundary (the granularity of striping 171 + * across memory controllers). Keep backing up and doing this 172 + * until we are before the beginning of the buffer, or have 173 + * hit all the controllers. 174 + */ 175 + for (i = 0, p = (char *)buffer + size - 1; 176 + i < (1 << CHIP_LOG_NUM_MSHIMS()) && p >= (char *)buffer; 177 + ++i) { 178 + const unsigned long STRIPE_WIDTH = 8192; 179 + 180 + /* Force a load instruction to issue. */ 181 + *(volatile char *)p; 182 + 183 + /* Jump to end of previous stripe. */ 184 + p -= STRIPE_WIDTH; 185 + p = (char *)((unsigned long)p | (STRIPE_WIDTH - 1)); 186 + } 187 + 188 + /* Wait for the loads (and thus flushes) to have completed. */ 189 + __insn_mf(); 190 + } 191 + 140 192 #endif /* _ASM_TILE_CACHEFLUSH_H */

+10

arch/tile/include/asm/processor.h

··· 292 292 /* Are we using huge pages in the TLB for kernel data? */ 293 293 extern int kdata_huge; 294 294 295 + /* Support standard Linux prefetching. */ 296 + #define ARCH_HAS_PREFETCH 297 + #define prefetch(x) __builtin_prefetch(x) 295 298 #define PREFETCH_STRIDE CHIP_L2_LINE_SIZE() 299 + 300 + /* Bring a value into the L1D, faulting the TLB if necessary. */ 301 + #ifdef __tilegx__ 302 + #define prefetch_L1(x) __insn_prefetch_l1_fault((void *)(x)) 303 + #else 304 + #define prefetch_L1(x) __insn_prefetch_L1((void *)(x)) 305 + #endif 296 306 297 307 #else /* __ASSEMBLY__ */ 298 308

+300

arch/tile/include/hv/drv_xgbe_impl.h

··· 1 + /* 2 + * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 + * NON INFRINGEMENT. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + /** 16 + * @file drivers/xgbe/impl.h 17 + * Implementation details for the NetIO library. 18 + */ 19 + 20 + #ifndef __DRV_XGBE_IMPL_H__ 21 + #define __DRV_XGBE_IMPL_H__ 22 + 23 + #include <hv/netio_errors.h> 24 + #include <hv/netio_intf.h> 25 + #include <hv/drv_xgbe_intf.h> 26 + 27 + 28 + /** How many groups we have (log2). */ 29 + #define LOG2_NUM_GROUPS (12) 30 + /** How many groups we have. */ 31 + #define NUM_GROUPS (1 << LOG2_NUM_GROUPS) 32 + 33 + /** Number of output requests we'll buffer per tile. */ 34 + #define EPP_REQS_PER_TILE (32) 35 + 36 + /** Words used in an eDMA command without checksum acceleration. */ 37 + #define EDMA_WDS_NO_CSUM 8 38 + /** Words used in an eDMA command with checksum acceleration. */ 39 + #define EDMA_WDS_CSUM 10 40 + /** Total available words in the eDMA command FIFO. */ 41 + #define EDMA_WDS_TOTAL 128 42 + 43 + 44 + /* 45 + * FIXME: These definitions are internal and should have underscores! 46 + * NOTE: The actual numeric values here are intentional and allow us to 47 + * optimize the concept "if small ... else if large ... else ...", by 48 + * checking for the low bit being set, and then for non-zero. 49 + * These are used as array indices, so they must have the values (0, 1, 2) 50 + * in some order. 51 + */ 52 + #define SIZE_SMALL (1) /**< Small packet queue. */ 53 + #define SIZE_LARGE (2) /**< Large packet queue. */ 54 + #define SIZE_JUMBO (0) /**< Jumbo packet queue. */ 55 + 56 + /** The number of "SIZE_xxx" values. */ 57 + #define NETIO_NUM_SIZES 3 58 + 59 + 60 + /* 61 + * Default numbers of packets for IPP drivers. These values are chosen 62 + * such that CIPP1 will not overflow its L2 cache. 63 + */ 64 + 65 + /** The default number of small packets. */ 66 + #define NETIO_DEFAULT_SMALL_PACKETS 2750 67 + /** The default number of large packets. */ 68 + #define NETIO_DEFAULT_LARGE_PACKETS 2500 69 + /** The default number of jumbo packets. */ 70 + #define NETIO_DEFAULT_JUMBO_PACKETS 250 71 + 72 + 73 + /** Log2 of the size of a memory arena. */ 74 + #define NETIO_ARENA_SHIFT 24 /* 16 MB */ 75 + /** Size of a memory arena. */ 76 + #define NETIO_ARENA_SIZE (1 << NETIO_ARENA_SHIFT) 77 + 78 + 79 + /** A queue of packets. 80 + * 81 + * This structure partially defines a queue of packets waiting to be 82 + * processed. The queue as a whole is written to by an interrupt handler and 83 + * read by non-interrupt code; this data structure is what's touched by the 84 + * interrupt handler. The other part of the queue state, the read offset, is 85 + * kept in user space, not in hypervisor space, so it is in a separate data 86 + * structure. 87 + * 88 + * The read offset (__packet_receive_read in the user part of the queue 89 + * structure) points to the next packet to be read. When the read offset is 90 + * equal to the write offset, the queue is empty; therefore the queue must 91 + * contain one more slot than the required maximum queue size. 92 + * 93 + * Here's an example of all 3 state variables and what they mean. All 94 + * pointers move left to right. 95 + * 96 + * @code 97 + * I I V V V V I I I I 98 + * 0 1 2 3 4 5 6 7 8 9 10 99 + * ^ ^ ^ ^ 100 + * | | | 101 + * | | __last_packet_plus_one 102 + * | __buffer_write 103 + * __packet_receive_read 104 + * @endcode 105 + * 106 + * This queue has 10 slots, and thus can hold 9 packets (_last_packet_plus_one 107 + * = 10). The read pointer is at 2, and the write pointer is at 6; thus, 108 + * there are valid, unread packets in slots 2, 3, 4, and 5. The remaining 109 + * slots are invalid (do not contain a packet). 110 + */ 111 + typedef struct { 112 + /** Byte offset of the next notify packet to be written: zero for the first 113 + * packet on the queue, sizeof (netio_pkt_t) for the second packet on the 114 + * queue, etc. */ 115 + volatile uint32_t __packet_write; 116 + 117 + /** Offset of the packet after the last valid packet (i.e., when any 118 + * pointer is incremented to this value, it wraps back to zero). */ 119 + uint32_t __last_packet_plus_one; 120 + } 121 + __netio_packet_queue_t; 122 + 123 + 124 + /** A queue of buffers. 125 + * 126 + * This structure partially defines a queue of empty buffers which have been 127 + * obtained via requests to the IPP. (The elements of the queue are packet 128 + * handles, which are transformed into a full netio_pkt_t when the buffer is 129 + * retrieved.) The queue as a whole is written to by an interrupt handler and 130 + * read by non-interrupt code; this data structure is what's touched by the 131 + * interrupt handler. The other parts of the queue state, the read offset and 132 + * requested write offset, are kept in user space, not in hypervisor space, so 133 + * they are in a separate data structure. 134 + * 135 + * The read offset (__buffer_read in the user part of the queue structure) 136 + * points to the next buffer to be read. When the read offset is equal to the 137 + * write offset, the queue is empty; therefore the queue must contain one more 138 + * slot than the required maximum queue size. 139 + * 140 + * The requested write offset (__buffer_requested_write in the user part of 141 + * the queue structure) points to the slot which will hold the next buffer we 142 + * request from the IPP, once we get around to sending such a request. When 143 + * the requested write offset is equal to the write offset, no requests for 144 + * new buffers are outstanding; when the requested write offset is one greater 145 + * than the read offset, no more requests may be sent. 146 + * 147 + * Note that, unlike the packet_queue, the buffer_queue places incoming 148 + * buffers at decreasing addresses. This makes the check for "is it time to 149 + * wrap the buffer pointer" cheaper in the assembly code which receives new 150 + * buffers, and means that the value which defines the queue size, 151 + * __last_buffer, is different than in the packet queue. Also, the offset 152 + * used in the packet_queue is already scaled by the size of a packet; here we 153 + * use unscaled slot indices for the offsets. (These differences are 154 + * historical, and in the future it's possible that the packet_queue will look 155 + * more like this queue.) 156 + * 157 + * @code 158 + * Here's an example of all 4 state variables and what they mean. Remember: 159 + * all pointers move right to left. 160 + * 161 + * V V V I I R R V V V 162 + * 0 1 2 3 4 5 6 7 8 9 163 + * ^ ^ ^ ^ 164 + * | | | | 165 + * | | | __last_buffer 166 + * | | __buffer_write 167 + * | __buffer_requested_write 168 + * __buffer_read 169 + * @endcode 170 + * 171 + * This queue has 10 slots, and thus can hold 9 buffers (_last_buffer = 9). 172 + * The read pointer is at 2, and the write pointer is at 6; thus, there are 173 + * valid, unread buffers in slots 2, 1, 0, 9, 8, and 7. The requested write 174 + * pointer is at 4; thus, requests have been made to the IPP for buffers which 175 + * will be placed in slots 6 and 5 when they arrive. Finally, the remaining 176 + * slots are invalid (do not contain a buffer). 177 + */ 178 + typedef struct 179 + { 180 + /** Ordinal number of the next buffer to be written: 0 for the first slot in 181 + * the queue, 1 for the second slot in the queue, etc. */ 182 + volatile uint32_t __buffer_write; 183 + 184 + /** Ordinal number of the last buffer (i.e., when any pointer is decremented 185 + * below zero, it is reloaded with this value). */ 186 + uint32_t __last_buffer; 187 + } 188 + __netio_buffer_queue_t; 189 + 190 + 191 + /** 192 + * An object for providing Ethernet packets to a process. 193 + */ 194 + typedef struct __netio_queue_impl_t 195 + { 196 + /** The queue of packets waiting to be received. */ 197 + __netio_packet_queue_t __packet_receive_queue; 198 + /** The intr bit mask that IDs this device. */ 199 + unsigned int __intr_id; 200 + /** Offset to queues of empty buffers, one per size. */ 201 + uint32_t __buffer_queue[NETIO_NUM_SIZES]; 202 + /** The address of the first EPP tile, or -1 if no EPP. */ 203 + /* ISSUE: Actually this is always "0" or "~0". */ 204 + uint32_t __epp_location; 205 + /** The queue ID that this queue represents. */ 206 + unsigned int __queue_id; 207 + /** Number of acknowledgements received. */ 208 + volatile uint32_t __acks_received; 209 + /** Last completion number received for packet_sendv. */ 210 + volatile uint32_t __last_completion_rcv; 211 + /** Number of packets allowed to be outstanding. */ 212 + uint32_t __max_outstanding; 213 + /** First VA available for packets. */ 214 + void* __va_0; 215 + /** First VA in second range available for packets. */ 216 + void* __va_1; 217 + /** Padding to align the "__packets" field to the size of a netio_pkt_t. */ 218 + uint32_t __padding[3]; 219 + /** The packets themselves. */ 220 + netio_pkt_t __packets[0]; 221 + } 222 + netio_queue_impl_t; 223 + 224 + 225 + /** 226 + * An object for managing the user end of a NetIO queue. 227 + */ 228 + typedef struct __netio_queue_user_impl_t 229 + { 230 + /** The next incoming packet to be read. */ 231 + uint32_t __packet_receive_read; 232 + /** The next empty buffers to be read, one index per size. */ 233 + uint8_t __buffer_read[NETIO_NUM_SIZES]; 234 + /** Where the empty buffer we next request from the IPP will go, one index 235 + * per size. */ 236 + uint8_t __buffer_requested_write[NETIO_NUM_SIZES]; 237 + /** PCIe interface flag. */ 238 + uint8_t __pcie; 239 + /** Number of packets left to be received before we send a credit update. */ 240 + uint32_t __receive_credit_remaining; 241 + /** Value placed in __receive_credit_remaining when it reaches zero. */ 242 + uint32_t __receive_credit_interval; 243 + /** First fast I/O routine index. */ 244 + uint32_t __fastio_index; 245 + /** Number of acknowledgements expected. */ 246 + uint32_t __acks_outstanding; 247 + /** Last completion number requested. */ 248 + uint32_t __last_completion_req; 249 + /** File descriptor for driver. */ 250 + int __fd; 251 + } 252 + netio_queue_user_impl_t; 253 + 254 + 255 + #define NETIO_GROUP_CHUNK_SIZE 64 /**< Max # groups in one IPP request */ 256 + #define NETIO_BUCKET_CHUNK_SIZE 64 /**< Max # buckets in one IPP request */ 257 + 258 + 259 + /** Internal structure used to convey packet send information to the 260 + * hypervisor. FIXME: Actually, it's not used for that anymore, but 261 + * netio_packet_send() still uses it internally. 262 + */ 263 + typedef struct 264 + { 265 + uint16_t flags; /**< Packet flags (__NETIO_SEND_FLG_xxx) */ 266 + uint16_t transfer_size; /**< Size of packet */ 267 + uint32_t va; /**< VA of start of packet */ 268 + __netio_pkt_handle_t handle; /**< Packet handle */ 269 + uint32_t csum0; /**< First checksum word */ 270 + uint32_t csum1; /**< Second checksum word */ 271 + } 272 + __netio_send_cmd_t; 273 + 274 + 275 + /** Flags used in two contexts: 276 + * - As the "flags" member in the __netio_send_cmd_t, above; used only 277 + * for netio_pkt_send_{prepare,commit}. 278 + * - As part of the flags passed to the various send packet fast I/O calls. 279 + */ 280 + 281 + /** Need acknowledgement on this packet. Note that some code in the 282 + * normal send_pkt fast I/O handler assumes that this is equal to 1. */ 283 + #define __NETIO_SEND_FLG_ACK 0x1 284 + 285 + /** Do checksum on this packet. (Only used with the __netio_send_cmd_t; 286 + * normal packet sends use a special fast I/O index to denote checksumming, 287 + * and multi-segment sends test the checksum descriptor.) */ 288 + #define __NETIO_SEND_FLG_CSUM 0x2 289 + 290 + /** Get a completion on this packet. Only used with multi-segment sends. */ 291 + #define __NETIO_SEND_FLG_COMPLETION 0x4 292 + 293 + /** Position of the number-of-extra-segments value in the flags word. 294 + Only used with multi-segment sends. */ 295 + #define __NETIO_SEND_FLG_XSEG_SHIFT 3 296 + 297 + /** Width of the number-of-extra-segments value in the flags word. */ 298 + #define __NETIO_SEND_FLG_XSEG_WIDTH 2 299 + 300 + #endif /* __DRV_XGBE_IMPL_H__ */

+615

arch/tile/include/hv/drv_xgbe_intf.h

··· 1 + /* 2 + * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 + * NON INFRINGEMENT. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + /** 16 + * @file drv_xgbe_intf.h 17 + * Interface to the hypervisor XGBE driver. 18 + */ 19 + 20 + #ifndef __DRV_XGBE_INTF_H__ 21 + #define __DRV_XGBE_INTF_H__ 22 + 23 + /** 24 + * An object for forwarding VAs and PAs to the hypervisor. 25 + * @ingroup types 26 + * 27 + * This allows the supervisor to specify a number of areas of memory to 28 + * store packet buffers. 29 + */ 30 + typedef struct 31 + { 32 + /** The physical address of the memory. */ 33 + HV_PhysAddr pa; 34 + /** Page table entry for the memory. This is only used to derive the 35 + * memory's caching mode; the PA bits are ignored. */ 36 + HV_PTE pte; 37 + /** The virtual address of the memory. */ 38 + HV_VirtAddr va; 39 + /** Size (in bytes) of the memory area. */ 40 + int size; 41 + 42 + } 43 + netio_ipp_address_t; 44 + 45 + /** The various pread/pwrite offsets into the hypervisor-level driver. 46 + * @ingroup types 47 + */ 48 + typedef enum 49 + { 50 + /** Inform the Linux driver of the address of the NetIO arena memory. 51 + * This offset is actually only used to convey information from netio 52 + * to the Linux driver; it never makes it from there to the hypervisor. 53 + * Write-only; takes a uint32_t specifying the VA address. */ 54 + NETIO_FIXED_ADDR = 0x5000000000000000ULL, 55 + 56 + /** Inform the Linux driver of the size of the NetIO arena memory. 57 + * This offset is actually only used to convey information from netio 58 + * to the Linux driver; it never makes it from there to the hypervisor. 59 + * Write-only; takes a uint32_t specifying the VA size. */ 60 + NETIO_FIXED_SIZE = 0x5100000000000000ULL, 61 + 62 + /** Register current tile with IPP. Write then read: write, takes a 63 + * netio_input_config_t, read returns a pointer to a netio_queue_impl_t. */ 64 + NETIO_IPP_INPUT_REGISTER_OFF = 0x6000000000000000ULL, 65 + 66 + /** Unregister current tile from IPP. Write-only, takes a dummy argument. */ 67 + NETIO_IPP_INPUT_UNREGISTER_OFF = 0x6100000000000000ULL, 68 + 69 + /** Start packets flowing. Write-only, takes a dummy argument. */ 70 + NETIO_IPP_INPUT_INIT_OFF = 0x6200000000000000ULL, 71 + 72 + /** Stop packets flowing. Write-only, takes a dummy argument. */ 73 + NETIO_IPP_INPUT_UNINIT_OFF = 0x6300000000000000ULL, 74 + 75 + /** Configure group (typically we group on VLAN). Write-only: takes an 76 + * array of netio_group_t's, low 24 bits of the offset is the base group 77 + * number times the size of a netio_group_t. */ 78 + NETIO_IPP_INPUT_GROUP_CFG_OFF = 0x6400000000000000ULL, 79 + 80 + /** Configure bucket. Write-only: takes an array of netio_bucket_t's, low 81 + * 24 bits of the offset is the base bucket number times the size of a 82 + * netio_bucket_t. */ 83 + NETIO_IPP_INPUT_BUCKET_CFG_OFF = 0x6500000000000000ULL, 84 + 85 + /** Get/set a parameter. Read or write: read or write data is the parameter 86 + * value, low 32 bits of the offset is a __netio_getset_offset_t. */ 87 + NETIO_IPP_PARAM_OFF = 0x6600000000000000ULL, 88 + 89 + /** Get fast I/O index. Read-only; returns a 4-byte base index value. */ 90 + NETIO_IPP_GET_FASTIO_OFF = 0x6700000000000000ULL, 91 + 92 + /** Configure hijack IP address. Packets with this IPv4 dest address 93 + * go to bucket NETIO_NUM_BUCKETS - 1. Write-only: takes an IP address 94 + * in some standard form. FIXME: Define the form! */ 95 + NETIO_IPP_INPUT_HIJACK_CFG_OFF = 0x6800000000000000ULL, 96 + 97 + /** 98 + * Offsets beyond this point are reserved for the supervisor (although that 99 + * enforcement must be done by the supervisor driver itself). 100 + */ 101 + NETIO_IPP_USER_MAX_OFF = 0x6FFFFFFFFFFFFFFFULL, 102 + 103 + /** Register I/O memory. Write-only, takes a netio_ipp_address_t. */ 104 + NETIO_IPP_IOMEM_REGISTER_OFF = 0x7000000000000000ULL, 105 + 106 + /** Unregister I/O memory. Write-only, takes a netio_ipp_address_t. */ 107 + NETIO_IPP_IOMEM_UNREGISTER_OFF = 0x7100000000000000ULL, 108 + 109 + /* Offsets greater than 0x7FFFFFFF can't be used directly from Linux 110 + * userspace code due to limitations in the pread/pwrite syscalls. */ 111 + 112 + /** Drain LIPP buffers. */ 113 + NETIO_IPP_DRAIN_OFF = 0xFA00000000000000ULL, 114 + 115 + /** Supply a netio_ipp_address_t to be used as shared memory for the 116 + * LEPP command queue. */ 117 + NETIO_EPP_SHM_OFF = 0xFB00000000000000ULL, 118 + 119 + /* 0xFC... is currently unused. */ 120 + 121 + /** Stop IPP/EPP tiles. Write-only, takes a dummy argument. */ 122 + NETIO_IPP_STOP_SHIM_OFF = 0xFD00000000000000ULL, 123 + 124 + /** Start IPP/EPP tiles. Write-only, takes a dummy argument. */ 125 + NETIO_IPP_START_SHIM_OFF = 0xFE00000000000000ULL, 126 + 127 + /** Supply packet arena. Write-only, takes an array of 128 + * netio_ipp_address_t values. */ 129 + NETIO_IPP_ADDRESS_OFF = 0xFF00000000000000ULL, 130 + } netio_hv_offset_t; 131 + 132 + /** Extract the base offset from an offset */ 133 + #define NETIO_BASE_OFFSET(off) ((off) & 0xFF00000000000000ULL) 134 + /** Extract the local offset from an offset */ 135 + #define NETIO_LOCAL_OFFSET(off) ((off) & 0x00FFFFFFFFFFFFFFULL) 136 + 137 + 138 + /** 139 + * Get/set offset. 140 + */ 141 + typedef union 142 + { 143 + struct 144 + { 145 + uint64_t addr:48; /**< Class-specific address */ 146 + unsigned int class:8; /**< Class (e.g., NETIO_PARAM) */ 147 + unsigned int opcode:8; /**< High 8 bits of NETIO_IPP_PARAM_OFF */ 148 + } 149 + bits; /**< Bitfields */ 150 + uint64_t word; /**< Aggregated value to use as the offset */ 151 + } 152 + __netio_getset_offset_t; 153 + 154 + /** 155 + * Fast I/O index offsets (must be contiguous). 156 + */ 157 + typedef enum 158 + { 159 + NETIO_FASTIO_ALLOCATE = 0, /**< Get empty packet buffer */ 160 + NETIO_FASTIO_FREE_BUFFER = 1, /**< Give buffer back to IPP */ 161 + NETIO_FASTIO_RETURN_CREDITS = 2, /**< Give credits to IPP */ 162 + NETIO_FASTIO_SEND_PKT_NOCK = 3, /**< Send a packet, no checksum */ 163 + NETIO_FASTIO_SEND_PKT_CK = 4, /**< Send a packet, with checksum */ 164 + NETIO_FASTIO_SEND_PKT_VEC = 5, /**< Send a vector of packets */ 165 + NETIO_FASTIO_SENDV_PKT = 6, /**< Sendv one packet */ 166 + NETIO_FASTIO_NUM_INDEX = 7, /**< Total number of fast I/O indices */ 167 + } netio_fastio_index_t; 168 + 169 + /** 3-word return type for Fast I/O call. */ 170 + typedef struct 171 + { 172 + int err; /**< Error code. */ 173 + uint32_t val0; /**< Value. Meaning depends upon the specific call. */ 174 + uint32_t val1; /**< Value. Meaning depends upon the specific call. */ 175 + } netio_fastio_rv3_t; 176 + 177 + /** 0-argument fast I/O call */ 178 + int __netio_fastio0(uint32_t fastio_index); 179 + /** 1-argument fast I/O call */ 180 + int __netio_fastio1(uint32_t fastio_index, uint32_t arg0); 181 + /** 3-argument fast I/O call, 2-word return value */ 182 + netio_fastio_rv3_t __netio_fastio3_rv3(uint32_t fastio_index, uint32_t arg0, 183 + uint32_t arg1, uint32_t arg2); 184 + /** 4-argument fast I/O call */ 185 + int __netio_fastio4(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, 186 + uint32_t arg2, uint32_t arg3); 187 + /** 6-argument fast I/O call */ 188 + int __netio_fastio6(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, 189 + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5); 190 + /** 9-argument fast I/O call */ 191 + int __netio_fastio9(uint32_t fastio_index, uint32_t arg0, uint32_t arg1, 192 + uint32_t arg2, uint32_t arg3, uint32_t arg4, uint32_t arg5, 193 + uint32_t arg6, uint32_t arg7, uint32_t arg8); 194 + 195 + /** Allocate an empty packet. 196 + * @param fastio_index Fast I/O index. 197 + * @param size Size of the packet to allocate. 198 + */ 199 + #define __netio_fastio_allocate(fastio_index, size) \ 200 + __netio_fastio1((fastio_index) + NETIO_FASTIO_ALLOCATE, size) 201 + 202 + /** Free a buffer. 203 + * @param fastio_index Fast I/O index. 204 + * @param handle Handle for the packet to free. 205 + */ 206 + #define __netio_fastio_free_buffer(fastio_index, handle) \ 207 + __netio_fastio1((fastio_index) + NETIO_FASTIO_FREE_BUFFER, handle) 208 + 209 + /** Increment our receive credits. 210 + * @param fastio_index Fast I/O index. 211 + * @param credits Number of credits to add. 212 + */ 213 + #define __netio_fastio_return_credits(fastio_index, credits) \ 214 + __netio_fastio1((fastio_index) + NETIO_FASTIO_RETURN_CREDITS, credits) 215 + 216 + /** Send packet, no checksum. 217 + * @param fastio_index Fast I/O index. 218 + * @param ackflag Nonzero if we want an ack. 219 + * @param size Size of the packet. 220 + * @param va Virtual address of start of packet. 221 + * @param handle Packet handle. 222 + */ 223 + #define __netio_fastio_send_pkt_nock(fastio_index, ackflag, size, va, handle) \ 224 + __netio_fastio4((fastio_index) + NETIO_FASTIO_SEND_PKT_NOCK, ackflag, \ 225 + size, va, handle) 226 + 227 + /** Send packet, calculate checksum. 228 + * @param fastio_index Fast I/O index. 229 + * @param ackflag Nonzero if we want an ack. 230 + * @param size Size of the packet. 231 + * @param va Virtual address of start of packet. 232 + * @param handle Packet handle. 233 + * @param csum0 Shim checksum header. 234 + * @param csum1 Checksum seed. 235 + */ 236 + #define __netio_fastio_send_pkt_ck(fastio_index, ackflag, size, va, handle, \ 237 + csum0, csum1) \ 238 + __netio_fastio6((fastio_index) + NETIO_FASTIO_SEND_PKT_CK, ackflag, \ 239 + size, va, handle, csum0, csum1) 240 + 241 + 242 + /** Format for the "csum0" argument to the __netio_fastio_send routines 243 + * and LEPP. Note that this is currently exactly identical to the 244 + * ShimProtocolOffloadHeader. 245 + */ 246 + typedef union 247 + { 248 + struct 249 + { 250 + unsigned int start_byte:7; /**< The first byte to be checksummed */ 251 + unsigned int count:14; /**< Number of bytes to be checksummed. */ 252 + unsigned int destination_byte:7; /**< The byte to write the checksum to. */ 253 + unsigned int reserved:4; /**< Reserved. */ 254 + } bits; /**< Decomposed method of access. */ 255 + unsigned int word; /**< To send out the IDN. */ 256 + } __netio_checksum_header_t; 257 + 258 + 259 + /** Sendv packet with 1 or 2 segments. 260 + * @param fastio_index Fast I/O index. 261 + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus 262 + * 1 in next 2 bits; expected checksum in high 16 bits. 263 + * @param confno Confirmation number to request, if notify flag set. 264 + * @param csum0 Checksum descriptor; if zero, no checksum. 265 + * @param va_F Virtual address of first segment. 266 + * @param va_L Virtual address of last segment, if 2 segments. 267 + * @param len_F_L Length of first segment in low 16 bits; length of last 268 + * segment, if 2 segments, in high 16 bits. 269 + */ 270 + #define __netio_fastio_sendv_pkt_1_2(fastio_index, flags, confno, csum0, \ 271 + va_F, va_L, len_F_L) \ 272 + __netio_fastio6((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ 273 + csum0, va_F, va_L, len_F_L) 274 + 275 + /** Send packet on PCIe interface. 276 + * @param fastio_index Fast I/O index. 277 + * @param flags Ack/csum/notify flags in low 3 bits. 278 + * @param confno Confirmation number to request, if notify flag set. 279 + * @param csum0 Checksum descriptor; Hard wired 0, not needed for PCIe. 280 + * @param va_F Virtual address of the packet buffer. 281 + * @param va_L Virtual address of last segment, if 2 segments. Hard wired 0. 282 + * @param len_F_L Length of the packet buffer in low 16 bits. 283 + */ 284 + #define __netio_fastio_send_pcie_pkt(fastio_index, flags, confno, csum0, \ 285 + va_F, va_L, len_F_L) \ 286 + __netio_fastio6((fastio_index) + PCIE_FASTIO_SENDV_PKT, flags, confno, \ 287 + csum0, va_F, va_L, len_F_L) 288 + 289 + /** Sendv packet with 3 or 4 segments. 290 + * @param fastio_index Fast I/O index. 291 + * @param flags Ack/csum/notify flags in low 3 bits; number of segments minus 292 + * 1 in next 2 bits; expected checksum in high 16 bits. 293 + * @param confno Confirmation number to request, if notify flag set. 294 + * @param csum0 Checksum descriptor; if zero, no checksum. 295 + * @param va_F Virtual address of first segment. 296 + * @param va_L Virtual address of last segment (third segment if 3 segments, 297 + * fourth segment if 4 segments). 298 + * @param len_F_L Length of first segment in low 16 bits; length of last 299 + * segment in high 16 bits. 300 + * @param va_M0 Virtual address of "middle 0" segment; this segment is sent 301 + * second when there are three segments, and third if there are four. 302 + * @param va_M1 Virtual address of "middle 1" segment; this segment is sent 303 + * second when there are four segments. 304 + * @param len_M0_M1 Length of middle 0 segment in low 16 bits; length of middle 305 + * 1 segment, if 4 segments, in high 16 bits. 306 + */ 307 + #define __netio_fastio_sendv_pkt_3_4(fastio_index, flags, confno, csum0, va_F, \ 308 + va_L, len_F_L, va_M0, va_M1, len_M0_M1) \ 309 + __netio_fastio9((fastio_index) + NETIO_FASTIO_SENDV_PKT, flags, confno, \ 310 + csum0, va_F, va_L, len_F_L, va_M0, va_M1, len_M0_M1) 311 + 312 + /** Send vector of packets. 313 + * @param fastio_index Fast I/O index. 314 + * @param seqno Number of packets transmitted so far on this interface; 315 + * used to decide which packets should be acknowledged. 316 + * @param nentries Number of entries in vector. 317 + * @param va Virtual address of start of vector entry array. 318 + * @return 3-word netio_fastio_rv3_t structure. The structure's err member 319 + * is an error code, or zero if no error. The val0 member is the 320 + * updated value of seqno; it has been incremented by 1 for each 321 + * packet sent. That increment may be less than nentries if an 322 + * error occured, or if some of the entries in the vector contain 323 + * handles equal to NETIO_PKT_HANDLE_NONE. The val1 member is the 324 + * updated value of nentries; it has been decremented by 1 for each 325 + * vector entry processed. Again, that decrement may be less than 326 + * nentries (leaving the returned value positive) if an error 327 + * occurred. 328 + */ 329 + #define __netio_fastio_send_pkt_vec(fastio_index, seqno, nentries, va) \ 330 + __netio_fastio3_rv3((fastio_index) + NETIO_FASTIO_SEND_PKT_VEC, seqno, \ 331 + nentries, va) 332 + 333 + 334 + /** An egress DMA command for LEPP. */ 335 + typedef struct 336 + { 337 + /** Is this a TSO transfer? 338 + * 339 + * NOTE: This field is always 0, to distinguish it from 340 + * lepp_tso_cmd_t. It must come first! 341 + */ 342 + uint8_t tso : 1; 343 + 344 + /** Unused padding bits. */ 345 + uint8_t _unused : 3; 346 + 347 + /** Should this packet be sent directly from caches instead of DRAM, 348 + * using hash-for-home to locate the packet data? 349 + */ 350 + uint8_t hash_for_home : 1; 351 + 352 + /** Should we compute a checksum? */ 353 + uint8_t compute_checksum : 1; 354 + 355 + /** Is this the final buffer for this packet? 356 + * 357 + * A single packet can be split over several input buffers (a "gather" 358 + * operation). This flag indicates that this is the last buffer 359 + * in a packet. 360 + */ 361 + uint8_t end_of_packet : 1; 362 + 363 + /** Should LEPP advance 'comp_busy' when this DMA is fully finished? */ 364 + uint8_t send_completion : 1; 365 + 366 + /** High bits of Client Physical Address of the start of the buffer 367 + * to be egressed. 368 + * 369 + * NOTE: Only 6 bits are actually needed here, as CPAs are 370 + * currently 38 bits. So two bits could be scavenged from this. 371 + */ 372 + uint8_t cpa_hi; 373 + 374 + /** The number of bytes to be egressed. */ 375 + uint16_t length; 376 + 377 + /** Low 32 bits of Client Physical Address of the start of the buffer 378 + * to be egressed. 379 + */ 380 + uint32_t cpa_lo; 381 + 382 + /** Checksum information (only used if 'compute_checksum'). */ 383 + __netio_checksum_header_t checksum_data; 384 + 385 + } lepp_cmd_t; 386 + 387 + 388 + /** A chunk of physical memory for a TSO egress. */ 389 + typedef struct 390 + { 391 + /** The low bits of the CPA. */ 392 + uint32_t cpa_lo; 393 + /** The high bits of the CPA. */ 394 + uint16_t cpa_hi : 15; 395 + /** Should this packet be sent directly from caches instead of DRAM, 396 + * using hash-for-home to locate the packet data? 397 + */ 398 + uint16_t hash_for_home : 1; 399 + /** The length in bytes. */ 400 + uint16_t length; 401 + } lepp_frag_t; 402 + 403 + 404 + /** An LEPP command that handles TSO. */ 405 + typedef struct 406 + { 407 + /** Is this a TSO transfer? 408 + * 409 + * NOTE: This field is always 1, to distinguish it from 410 + * lepp_cmd_t. It must come first! 411 + */ 412 + uint8_t tso : 1; 413 + 414 + /** Unused padding bits. */ 415 + uint8_t _unused : 7; 416 + 417 + /** Size of the header[] array in bytes. It must be in the range 418 + * [40, 127], which are the smallest header for a TCP packet over 419 + * Ethernet and the maximum possible prepend size supported by 420 + * hardware, respectively. Note that the array storage must be 421 + * padded out to a multiple of four bytes so that the following 422 + * LEPP command is aligned properly. 423 + */ 424 + uint8_t header_size; 425 + 426 + /** Byte offset of the IP header in header[]. */ 427 + uint8_t ip_offset; 428 + 429 + /** Byte offset of the TCP header in header[]. */ 430 + uint8_t tcp_offset; 431 + 432 + /** The number of bytes to use for the payload of each packet, 433 + * except of course the last one, which may not have enough bytes. 434 + * This means that each Ethernet packet except the last will have a 435 + * size of header_size + payload_size. 436 + */ 437 + uint16_t payload_size; 438 + 439 + /** The length of the 'frags' array that follows this struct. */ 440 + uint16_t num_frags; 441 + 442 + /** The actual frags. */ 443 + lepp_frag_t frags[0 /* Variable-sized; num_frags entries. */]; 444 + 445 + /* 446 + * The packet header template logically follows frags[], 447 + * but you can't declare that in C. 448 + * 449 + * uint32_t header[header_size_in_words_rounded_up]; 450 + */ 451 + 452 + } lepp_tso_cmd_t; 453 + 454 + 455 + /** An LEPP completion ring entry. */ 456 + typedef void* lepp_comp_t; 457 + 458 + 459 + /** Maximum number of frags for one TSO command. This is adapted from 460 + * linux's "MAX_SKB_FRAGS", and presumably over-estimates by one, for 461 + * our page size of exactly 65536. We add one for a "body" fragment. 462 + */ 463 + #define LEPP_MAX_FRAGS (65536 / HV_PAGE_SIZE_SMALL + 2 + 1) 464 + 465 + /** Total number of bytes needed for an lepp_tso_cmd_t. */ 466 + #define LEPP_TSO_CMD_SIZE(num_frags, header_size) \ 467 + (sizeof(lepp_tso_cmd_t) + \ 468 + (num_frags) * sizeof(lepp_frag_t) + \ 469 + (((header_size) + 3) & -4)) 470 + 471 + /** The size of the lepp "cmd" queue. */ 472 + #define LEPP_CMD_QUEUE_BYTES \ 473 + (((CHIP_L2_CACHE_SIZE() - 2 * CHIP_L2_LINE_SIZE()) / \ 474 + (sizeof(lepp_cmd_t) + sizeof(lepp_comp_t))) * sizeof(lepp_cmd_t)) 475 + 476 + /** The largest possible command that can go in lepp_queue_t::cmds[]. */ 477 + #define LEPP_MAX_CMD_SIZE LEPP_TSO_CMD_SIZE(LEPP_MAX_FRAGS, 128) 478 + 479 + /** The largest possible value of lepp_queue_t::cmd_{head, tail} (inclusive). 480 + */ 481 + #define LEPP_CMD_LIMIT \ 482 + (LEPP_CMD_QUEUE_BYTES - LEPP_MAX_CMD_SIZE) 483 + 484 + /** The maximum number of completions in an LEPP queue. */ 485 + #define LEPP_COMP_QUEUE_SIZE \ 486 + ((LEPP_CMD_LIMIT + sizeof(lepp_cmd_t) - 1) / sizeof(lepp_cmd_t)) 487 + 488 + /** Increment an index modulo the queue size. */ 489 + #define LEPP_QINC(var) \ 490 + (var = __insn_mnz(var - (LEPP_COMP_QUEUE_SIZE - 1), var + 1)) 491 + 492 + /** A queue used to convey egress commands from the client to LEPP. */ 493 + typedef struct 494 + { 495 + /** Index of first completion not yet processed by user code. 496 + * If this is equal to comp_busy, there are no such completions. 497 + * 498 + * NOTE: This is only read/written by the user. 499 + */ 500 + unsigned int comp_head; 501 + 502 + /** Index of first completion record not yet completed. 503 + * If this is equal to comp_tail, there are no such completions. 504 + * This index gets advanced (modulo LEPP_QUEUE_SIZE) whenever 505 + * a command with the 'completion' bit set is finished. 506 + * 507 + * NOTE: This is only written by LEPP, only read by the user. 508 + */ 509 + volatile unsigned int comp_busy; 510 + 511 + /** Index of the first empty slot in the completion ring. 512 + * Entries from this up to but not including comp_head (in ring order) 513 + * can be filled in with completion data. 514 + * 515 + * NOTE: This is only read/written by the user. 516 + */ 517 + unsigned int comp_tail; 518 + 519 + /** Byte index of first command enqueued for LEPP but not yet processed. 520 + * 521 + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. 522 + * 523 + * NOTE: LEPP advances this counter as soon as it no longer needs 524 + * the cmds[] storage for this entry, but the transfer is not actually 525 + * complete (i.e. the buffer pointed to by the command is no longer 526 + * needed) until comp_busy advances. 527 + * 528 + * If this is equal to cmd_tail, the ring is empty. 529 + * 530 + * NOTE: This is only written by LEPP, only read by the user. 531 + */ 532 + volatile unsigned int cmd_head; 533 + 534 + /** Byte index of first empty slot in the command ring. This field can 535 + * be incremented up to but not equal to cmd_head (because that would 536 + * mean the ring is empty). 537 + * 538 + * This is always divisible by sizeof(void*) and always <= LEPP_CMD_LIMIT. 539 + * 540 + * NOTE: This is read/written by the user, only read by LEPP. 541 + */ 542 + volatile unsigned int cmd_tail; 543 + 544 + /** A ring of variable-sized egress DMA commands. 545 + * 546 + * NOTE: Only written by the user, only read by LEPP. 547 + */ 548 + char cmds[LEPP_CMD_QUEUE_BYTES] 549 + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); 550 + 551 + /** A ring of user completion data. 552 + * NOTE: Only read/written by the user. 553 + */ 554 + lepp_comp_t comps[LEPP_COMP_QUEUE_SIZE] 555 + __attribute__((aligned(CHIP_L2_LINE_SIZE()))); 556 + } lepp_queue_t; 557 + 558 + 559 + /** An internal helper function for determining the number of entries 560 + * available in a ring buffer, given that there is one sentinel. 561 + */ 562 + static inline unsigned int 563 + _lepp_num_free_slots(unsigned int head, unsigned int tail) 564 + { 565 + /* 566 + * One entry is reserved for use as a sentinel, to distinguish 567 + * "empty" from "full". So we compute 568 + * (head - tail - 1) % LEPP_QUEUE_SIZE, but without using a slow % operation. 569 + */ 570 + return (head - tail - 1) + ((head <= tail) ? LEPP_COMP_QUEUE_SIZE : 0); 571 + } 572 + 573 + 574 + /** Returns how many new comp entries can be enqueued. */ 575 + static inline unsigned int 576 + lepp_num_free_comp_slots(const lepp_queue_t* q) 577 + { 578 + return _lepp_num_free_slots(q->comp_head, q->comp_tail); 579 + } 580 + 581 + static inline int 582 + lepp_qsub(int v1, int v2) 583 + { 584 + int delta = v1 - v2; 585 + return delta + ((delta >> 31) & LEPP_COMP_QUEUE_SIZE); 586 + } 587 + 588 + 589 + /** FIXME: Check this from linux, via a new "pwrite()" call. */ 590 + #define LIPP_VERSION 1 591 + 592 + 593 + /** We use exactly two bytes of alignment padding. */ 594 + #define LIPP_PACKET_PADDING 2 595 + 596 + /** The minimum size of a "small" buffer (including the padding). */ 597 + #define LIPP_SMALL_PACKET_SIZE 128 598 + 599 + /* 600 + * NOTE: The following two values should total to less than around 601 + * 13582, to keep the total size used for "lipp_state_t" below 64K. 602 + */ 603 + 604 + /** The maximum number of "small" buffers. 605 + * This is enough for 53 network cpus with 128 credits. Note that 606 + * if these are exhausted, we will fall back to using large buffers. 607 + */ 608 + #define LIPP_SMALL_BUFFERS 6785 609 + 610 + /** The maximum number of "large" buffers. 611 + * This is enough for 53 network cpus with 128 credits. 612 + */ 613 + #define LIPP_LARGE_BUFFERS 6785 614 + 615 + #endif /* __DRV_XGBE_INTF_H__ */

+122

arch/tile/include/hv/netio_errors.h

··· 1 + /* 2 + * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 + * NON INFRINGEMENT. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + /** 16 + * Error codes returned from NetIO routines. 17 + */ 18 + 19 + #ifndef __NETIO_ERRORS_H__ 20 + #define __NETIO_ERRORS_H__ 21 + 22 + /** 23 + * @addtogroup error 24 + * 25 + * @brief The error codes returned by NetIO functions. 26 + * 27 + * NetIO functions return 0 (defined as ::NETIO_NO_ERROR) on success, and 28 + * a negative value if an error occurs. 29 + * 30 + * In cases where a NetIO function failed due to a error reported by 31 + * system libraries, the error code will be the negation of the 32 + * system errno at the time of failure. The @ref netio_strerror() 33 + * function will deliver error strings for both NetIO and system error 34 + * codes. 35 + * 36 + * @{ 37 + */ 38 + 39 + /** The set of all NetIO errors. */ 40 + typedef enum 41 + { 42 + /** Operation successfully completed. */ 43 + NETIO_NO_ERROR = 0, 44 + 45 + /** A packet was successfully retrieved from an input queue. */ 46 + NETIO_PKT = 0, 47 + 48 + /** Largest NetIO error number. */ 49 + NETIO_ERR_MAX = -701, 50 + 51 + /** The tile is not registered with the IPP. */ 52 + NETIO_NOT_REGISTERED = -701, 53 + 54 + /** No packet was available to retrieve from the input queue. */ 55 + NETIO_NOPKT = -702, 56 + 57 + /** The requested function is not implemented. */ 58 + NETIO_NOT_IMPLEMENTED = -703, 59 + 60 + /** On a registration operation, the target queue already has the maximum 61 + * number of tiles registered for it, and no more may be added. On a 62 + * packet send operation, the output queue is full and nothing more can 63 + * be queued until some of the queued packets are actually transmitted. */ 64 + NETIO_QUEUE_FULL = -704, 65 + 66 + /** The calling process or thread is not bound to exactly one CPU. */ 67 + NETIO_BAD_AFFINITY = -705, 68 + 69 + /** Cannot allocate memory on requested controllers. */ 70 + NETIO_CANNOT_HOME = -706, 71 + 72 + /** On a registration operation, the IPP specified is not configured 73 + * to support the options requested; for instance, the application 74 + * wants a specific type of tagged headers which the configured IPP 75 + * doesn't support. Or, the supplied configuration information is 76 + * not self-consistent, or is out of range; for instance, specifying 77 + * both NETIO_RECV and NETIO_NO_RECV, or asking for more than 78 + * NETIO_MAX_SEND_BUFFERS to be preallocated. On a VLAN or bucket 79 + * configure operation, the number of items, or the base item, was 80 + * out of range. 81 + */ 82 + NETIO_BAD_CONFIG = -707, 83 + 84 + /** Too many tiles have registered to transmit packets. */ 85 + NETIO_TOOMANY_XMIT = -708, 86 + 87 + /** Packet transmission was attempted on a queue which was registered 88 + with transmit disabled. */ 89 + NETIO_UNREG_XMIT = -709, 90 + 91 + /** This tile is already registered with the IPP. */ 92 + NETIO_ALREADY_REGISTERED = -710, 93 + 94 + /** The Ethernet link is down. The application should try again later. */ 95 + NETIO_LINK_DOWN = -711, 96 + 97 + /** An invalid memory buffer has been specified. This may be an unmapped 98 + * virtual address, or one which does not meet alignment requirements. 99 + * For netio_input_register(), this error may be returned when multiple 100 + * processes specify different memory regions to be used for NetIO 101 + * buffers. That can happen if these processes specify explicit memory 102 + * regions with the ::NETIO_FIXED_BUFFER_VA flag, or if tmc_cmem_init() 103 + * has not been called by a common ancestor of the processes. 104 + */ 105 + NETIO_FAULT = -712, 106 + 107 + /** Cannot combine user-managed shared memory and cache coherence. */ 108 + NETIO_BAD_CACHE_CONFIG = -713, 109 + 110 + /** Smallest NetIO error number. */ 111 + NETIO_ERR_MIN = -713, 112 + 113 + #ifndef __DOXYGEN__ 114 + /** Used internally to mean that no response is needed; never returned to 115 + * an application. */ 116 + NETIO_NO_RESPONSE = 1 117 + #endif 118 + } netio_error_t; 119 + 120 + /** @} */ 121 + 122 + #endif /* __NETIO_ERRORS_H__ */

+2975

arch/tile/include/hv/netio_intf.h

··· 1 + /* 2 + * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 + * NON INFRINGEMENT. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + /** 16 + * NetIO interface structures and macros. 17 + */ 18 + 19 + #ifndef __NETIO_INTF_H__ 20 + #define __NETIO_INTF_H__ 21 + 22 + #include <hv/netio_errors.h> 23 + 24 + #ifdef __KERNEL__ 25 + #include <linux/types.h> 26 + #else 27 + #include <stdint.h> 28 + #endif 29 + 30 + #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) 31 + #include <assert.h> 32 + #define netio_assert assert /**< Enable assertions from macros */ 33 + #else 34 + #define netio_assert(...) ((void)(0)) /**< Disable assertions from macros */ 35 + #endif 36 + 37 + /* 38 + * If none of these symbols are defined, we're building libnetio in an 39 + * environment where we have pthreads, so we'll enable locking. 40 + */ 41 + #if !defined(__HV__) && !defined(__BOGUX__) && !defined(__KERNEL__) && \ 42 + !defined(__NEWLIB__) 43 + #define _NETIO_PTHREAD /**< Include a mutex in netio_queue_t below */ 44 + 45 + /* 46 + * If NETIO_UNLOCKED is defined, we don't do use per-cpu locks on 47 + * per-packet NetIO operations. We still do pthread locking on things 48 + * like netio_input_register, though. This is used for building 49 + * libnetio_unlocked. 50 + */ 51 + #ifndef NETIO_UNLOCKED 52 + 53 + /* Avoid PLT overhead by using our own inlined per-cpu lock. */ 54 + #include <sched.h> 55 + typedef int _netio_percpu_mutex_t; 56 + 57 + static __inline int 58 + _netio_percpu_mutex_init(_netio_percpu_mutex_t* lock) 59 + { 60 + *lock = 0; 61 + return 0; 62 + } 63 + 64 + static __inline int 65 + _netio_percpu_mutex_lock(_netio_percpu_mutex_t* lock) 66 + { 67 + while (__builtin_expect(__insn_tns(lock), 0)) 68 + sched_yield(); 69 + return 0; 70 + } 71 + 72 + static __inline int 73 + _netio_percpu_mutex_unlock(_netio_percpu_mutex_t* lock) 74 + { 75 + *lock = 0; 76 + return 0; 77 + } 78 + 79 + #else /* NETIO_UNLOCKED */ 80 + 81 + /* Don't do any locking for per-packet NetIO operations. */ 82 + typedef int _netio_percpu_mutex_t; 83 + #define _netio_percpu_mutex_init(L) 84 + #define _netio_percpu_mutex_lock(L) 85 + #define _netio_percpu_mutex_unlock(L) 86 + 87 + #endif /* NETIO_UNLOCKED */ 88 + #endif /* !__HV__, !__BOGUX, !__KERNEL__, !__NEWLIB__ */ 89 + 90 + /** How many tiles can register for a given queue. 91 + * @ingroup setup */ 92 + #define NETIO_MAX_TILES_PER_QUEUE 64 93 + 94 + 95 + /** Largest permissible queue identifier. 96 + * @ingroup setup */ 97 + #define NETIO_MAX_QUEUE_ID 255 98 + 99 + 100 + #ifndef __DOXYGEN__ 101 + 102 + /* Metadata packet checksum/ethertype flags. */ 103 + 104 + /** The L4 checksum has not been calculated. */ 105 + #define _NETIO_PKT_NO_L4_CSUM_SHIFT 0 106 + #define _NETIO_PKT_NO_L4_CSUM_RMASK 1 107 + #define _NETIO_PKT_NO_L4_CSUM_MASK \ 108 + (_NETIO_PKT_NO_L4_CSUM_RMASK << _NETIO_PKT_NO_L4_CSUM_SHIFT) 109 + 110 + /** The L3 checksum has not been calculated. */ 111 + #define _NETIO_PKT_NO_L3_CSUM_SHIFT 1 112 + #define _NETIO_PKT_NO_L3_CSUM_RMASK 1 113 + #define _NETIO_PKT_NO_L3_CSUM_MASK \ 114 + (_NETIO_PKT_NO_L3_CSUM_RMASK << _NETIO_PKT_NO_L3_CSUM_SHIFT) 115 + 116 + /** The L3 checksum is incorrect (or perhaps has not been calculated). */ 117 + #define _NETIO_PKT_BAD_L3_CSUM_SHIFT 2 118 + #define _NETIO_PKT_BAD_L3_CSUM_RMASK 1 119 + #define _NETIO_PKT_BAD_L3_CSUM_MASK \ 120 + (_NETIO_PKT_BAD_L3_CSUM_RMASK << _NETIO_PKT_BAD_L3_CSUM_SHIFT) 121 + 122 + /** The Ethernet packet type is unrecognized. */ 123 + #define _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT 3 124 + #define _NETIO_PKT_TYPE_UNRECOGNIZED_RMASK 1 125 + #define _NETIO_PKT_TYPE_UNRECOGNIZED_MASK \ 126 + (_NETIO_PKT_TYPE_UNRECOGNIZED_RMASK << \ 127 + _NETIO_PKT_TYPE_UNRECOGNIZED_SHIFT) 128 + 129 + /* Metadata packet type flags. */ 130 + 131 + /** Where the packet type bits are; this field is the index into 132 + * _netio_pkt_info. */ 133 + #define _NETIO_PKT_TYPE_SHIFT 4 134 + #define _NETIO_PKT_TYPE_RMASK 0x3F 135 + 136 + /** How many VLAN tags the packet has, and, if we have two, which one we 137 + * actually grouped on. A VLAN within a proprietary (Marvell or Broadcom) 138 + * tag is counted here. */ 139 + #define _NETIO_PKT_VLAN_SHIFT 4 140 + #define _NETIO_PKT_VLAN_RMASK 0x3 141 + #define _NETIO_PKT_VLAN_MASK \ 142 + (_NETIO_PKT_VLAN_RMASK << _NETIO_PKT_VLAN_SHIFT) 143 + #define _NETIO_PKT_VLAN_NONE 0 /* No VLAN tag. */ 144 + #define _NETIO_PKT_VLAN_ONE 1 /* One VLAN tag. */ 145 + #define _NETIO_PKT_VLAN_TWO_OUTER 2 /* Two VLAN tags, outer one used. */ 146 + #define _NETIO_PKT_VLAN_TWO_INNER 3 /* Two VLAN tags, inner one used. */ 147 + 148 + /** Which proprietary tags the packet has. */ 149 + #define _NETIO_PKT_TAG_SHIFT 6 150 + #define _NETIO_PKT_TAG_RMASK 0x3 151 + #define _NETIO_PKT_TAG_MASK \ 152 + (_NETIO_PKT_TAG_RMASK << _NETIO_PKT_TAG_SHIFT) 153 + #define _NETIO_PKT_TAG_NONE 0 /* No proprietary tags. */ 154 + #define _NETIO_PKT_TAG_MRVL 1 /* Marvell HyperG.Stack tags. */ 155 + #define _NETIO_PKT_TAG_MRVL_EXT 2 /* HyperG.Stack extended tags. */ 156 + #define _NETIO_PKT_TAG_BRCM 3 /* Broadcom HiGig tags. */ 157 + 158 + /** Whether a packet has an LLC + SNAP header. */ 159 + #define _NETIO_PKT_SNAP_SHIFT 8 160 + #define _NETIO_PKT_SNAP_RMASK 0x1 161 + #define _NETIO_PKT_SNAP_MASK \ 162 + (_NETIO_PKT_SNAP_RMASK << _NETIO_PKT_SNAP_SHIFT) 163 + 164 + /* NOTE: Bits 9 and 10 are unused. */ 165 + 166 + /** Length of any custom data before the L2 header, in words. */ 167 + #define _NETIO_PKT_CUSTOM_LEN_SHIFT 11 168 + #define _NETIO_PKT_CUSTOM_LEN_RMASK 0x1F 169 + #define _NETIO_PKT_CUSTOM_LEN_MASK \ 170 + (_NETIO_PKT_CUSTOM_LEN_RMASK << _NETIO_PKT_CUSTOM_LEN_SHIFT) 171 + 172 + /** The L4 checksum is incorrect (or perhaps has not been calculated). */ 173 + #define _NETIO_PKT_BAD_L4_CSUM_SHIFT 16 174 + #define _NETIO_PKT_BAD_L4_CSUM_RMASK 0x1 175 + #define _NETIO_PKT_BAD_L4_CSUM_MASK \ 176 + (_NETIO_PKT_BAD_L4_CSUM_RMASK << _NETIO_PKT_BAD_L4_CSUM_SHIFT) 177 + 178 + /** Length of the L2 header, in words. */ 179 + #define _NETIO_PKT_L2_LEN_SHIFT 17 180 + #define _NETIO_PKT_L2_LEN_RMASK 0x1F 181 + #define _NETIO_PKT_L2_LEN_MASK \ 182 + (_NETIO_PKT_L2_LEN_RMASK << _NETIO_PKT_L2_LEN_SHIFT) 183 + 184 + 185 + /* Flags in minimal packet metadata. */ 186 + 187 + /** We need an eDMA checksum on this packet. */ 188 + #define _NETIO_PKT_NEED_EDMA_CSUM_SHIFT 0 189 + #define _NETIO_PKT_NEED_EDMA_CSUM_RMASK 1 190 + #define _NETIO_PKT_NEED_EDMA_CSUM_MASK \ 191 + (_NETIO_PKT_NEED_EDMA_CSUM_RMASK << _NETIO_PKT_NEED_EDMA_CSUM_SHIFT) 192 + 193 + /* Data within the packet information table. */ 194 + 195 + /* Note that, for efficiency, code which uses these fields assumes that none 196 + * of the shift values below are zero. See uses below for an explanation. */ 197 + 198 + /** Offset within the L2 header of the innermost ethertype (in halfwords). */ 199 + #define _NETIO_PKT_INFO_ETYPE_SHIFT 6 200 + #define _NETIO_PKT_INFO_ETYPE_RMASK 0x1F 201 + 202 + /** Offset within the L2 header of the VLAN tag (in halfwords). */ 203 + #define _NETIO_PKT_INFO_VLAN_SHIFT 11 204 + #define _NETIO_PKT_INFO_VLAN_RMASK 0x1F 205 + 206 + #endif 207 + 208 + 209 + /** The size of a memory buffer representing a small packet. 210 + * @ingroup egress */ 211 + #define SMALL_PACKET_SIZE 256 212 + 213 + /** The size of a memory buffer representing a large packet. 214 + * @ingroup egress */ 215 + #define LARGE_PACKET_SIZE 2048 216 + 217 + /** The size of a memory buffer representing a jumbo packet. 218 + * @ingroup egress */ 219 + #define JUMBO_PACKET_SIZE (12 * 1024) 220 + 221 + 222 + /* Common ethertypes. 223 + * @ingroup ingress */ 224 + /** @{ */ 225 + /** The ethertype of IPv4. */ 226 + #define ETHERTYPE_IPv4 (0x0800) 227 + /** The ethertype of ARP. */ 228 + #define ETHERTYPE_ARP (0x0806) 229 + /** The ethertype of VLANs. */ 230 + #define ETHERTYPE_VLAN (0x8100) 231 + /** The ethertype of a Q-in-Q header. */ 232 + #define ETHERTYPE_Q_IN_Q (0x9100) 233 + /** The ethertype of IPv6. */ 234 + #define ETHERTYPE_IPv6 (0x86DD) 235 + /** The ethertype of MPLS. */ 236 + #define ETHERTYPE_MPLS (0x8847) 237 + /** @} */ 238 + 239 + 240 + /** The possible return values of NETIO_PKT_STATUS. 241 + * @ingroup ingress 242 + */ 243 + typedef enum 244 + { 245 + /** No problems were detected with this packet. */ 246 + NETIO_PKT_STATUS_OK, 247 + /** The packet is undersized; this is expected behavior if the packet's 248 + * ethertype is unrecognized, but otherwise the packet is likely corrupt. */ 249 + NETIO_PKT_STATUS_UNDERSIZE, 250 + /** The packet is oversized and some trailing bytes have been discarded. 251 + This is expected behavior for short packets, since it's impossible to 252 + precisely determine the amount of padding which may have been added to 253 + them to make them meet the minimum Ethernet packet size. */ 254 + NETIO_PKT_STATUS_OVERSIZE, 255 + /** The packet was judged to be corrupt by hardware (for instance, it had 256 + a bad CRC, or part of it was discarded due to lack of buffer space in 257 + the I/O shim) and should be discarded. */ 258 + NETIO_PKT_STATUS_BAD 259 + } netio_pkt_status_t; 260 + 261 + 262 + /** Log2 of how many buckets we have. */ 263 + #define NETIO_LOG2_NUM_BUCKETS (10) 264 + 265 + /** How many buckets we have. 266 + * @ingroup ingress */ 267 + #define NETIO_NUM_BUCKETS (1 << NETIO_LOG2_NUM_BUCKETS) 268 + 269 + 270 + /** 271 + * @brief A group-to-bucket identifier. 272 + * 273 + * @ingroup setup 274 + * 275 + * This tells us what to do with a given group. 276 + */ 277 + typedef union { 278 + /** The header broken down into bits. */ 279 + struct { 280 + /** Whether we should balance on L4, if available */ 281 + unsigned int __balance_on_l4:1; 282 + /** Whether we should balance on L3, if available */ 283 + unsigned int __balance_on_l3:1; 284 + /** Whether we should balance on L2, if available */ 285 + unsigned int __balance_on_l2:1; 286 + /** Reserved for future use */ 287 + unsigned int __reserved:1; 288 + /** The base bucket to use to send traffic */ 289 + unsigned int __bucket_base:NETIO_LOG2_NUM_BUCKETS; 290 + /** The mask to apply to the balancing value. This must be one less 291 + * than a power of two, e.g. 0x3 or 0xFF. 292 + */ 293 + unsigned int __bucket_mask:NETIO_LOG2_NUM_BUCKETS; 294 + /** Pad to 32 bits */ 295 + unsigned int __padding:(32 - 4 - 2 * NETIO_LOG2_NUM_BUCKETS); 296 + } bits; 297 + /** To send out the IDN. */ 298 + unsigned int word; 299 + } 300 + netio_group_t; 301 + 302 + 303 + /** 304 + * @brief A VLAN-to-bucket identifier. 305 + * 306 + * @ingroup setup 307 + * 308 + * This tells us what to do with a given VLAN. 309 + */ 310 + typedef netio_group_t netio_vlan_t; 311 + 312 + 313 + /** 314 + * A bucket-to-queue mapping. 315 + * @ingroup setup 316 + */ 317 + typedef unsigned char netio_bucket_t; 318 + 319 + 320 + /** 321 + * A packet size can always fit in a netio_size_t. 322 + * @ingroup setup 323 + */ 324 + typedef unsigned int netio_size_t; 325 + 326 + 327 + /** 328 + * @brief Ethernet standard (ingress) packet metadata. 329 + * 330 + * @ingroup ingress 331 + * 332 + * This is additional data associated with each packet. 333 + * This structure is opaque and accessed through the @ref ingress. 334 + * 335 + * Also, the buffer population operation currently assumes that standard 336 + * metadata is at least as large as minimal metadata, and will need to be 337 + * modified if that is no longer the case. 338 + */ 339 + typedef struct 340 + { 341 + #ifdef __DOXYGEN__ 342 + /** This structure is opaque. */ 343 + unsigned char opaque[24]; 344 + #else 345 + /** The overall ordinal of the packet */ 346 + unsigned int __packet_ordinal; 347 + /** The ordinal of the packet within the group */ 348 + unsigned int __group_ordinal; 349 + /** The best flow hash IPP could compute. */ 350 + unsigned int __flow_hash; 351 + /** Flags pertaining to checksum calculation, packet type, etc. */ 352 + unsigned int __flags; 353 + /** The first word of "user data". */ 354 + unsigned int __user_data_0; 355 + /** The second word of "user data". */ 356 + unsigned int __user_data_1; 357 + #endif 358 + } 359 + netio_pkt_metadata_t; 360 + 361 + 362 + /** To ensure that the L3 header is aligned mod 4, the L2 header should be 363 + * aligned mod 4 plus 2, since every supported L2 header is 4n + 2 bytes 364 + * long. The standard way to do this is to simply add 2 bytes of padding 365 + * before the L2 header. 366 + */ 367 + #define NETIO_PACKET_PADDING 2 368 + 369 + 370 + 371 + /** 372 + * @brief Ethernet minimal (egress) packet metadata. 373 + * 374 + * @ingroup egress 375 + * 376 + * This structure represents information about packets which have 377 + * been processed by @ref netio_populate_buffer() or 378 + * @ref netio_populate_prepend_buffer(). This structure is opaque 379 + * and accessed through the @ref egress. 380 + * 381 + * @internal This structure is actually copied into the memory used by 382 + * standard metadata, which is assumed to be large enough. 383 + */ 384 + typedef struct 385 + { 386 + #ifdef __DOXYGEN__ 387 + /** This structure is opaque. */ 388 + unsigned char opaque[14]; 389 + #else 390 + /** The offset of the L2 header from the start of the packet data. */ 391 + unsigned short l2_offset; 392 + /** The offset of the L3 header from the start of the packet data. */ 393 + unsigned short l3_offset; 394 + /** Where to write the checksum. */ 395 + unsigned char csum_location; 396 + /** Where to start checksumming from. */ 397 + unsigned char csum_start; 398 + /** Flags pertaining to checksum calculation etc. */ 399 + unsigned short flags; 400 + /** The L2 length of the packet. */ 401 + unsigned short l2_length; 402 + /** The checksum with which to seed the checksum generator. */ 403 + unsigned short csum_seed; 404 + /** How much to checksum. */ 405 + unsigned short csum_length; 406 + #endif 407 + } 408 + netio_pkt_minimal_metadata_t; 409 + 410 + 411 + #ifndef __DOXYGEN__ 412 + 413 + /** 414 + * @brief An I/O notification header. 415 + * 416 + * This is the first word of data received from an I/O shim in a notification 417 + * packet. It contains framing and status information. 418 + */ 419 + typedef union 420 + { 421 + unsigned int word; /**< The whole word. */ 422 + /** The various fields. */ 423 + struct 424 + { 425 + unsigned int __channel:7; /**< Resource channel. */ 426 + unsigned int __type:4; /**< Type. */ 427 + unsigned int __ack:1; /**< Whether an acknowledgement is needed. */ 428 + unsigned int __reserved:1; /**< Reserved. */ 429 + unsigned int __protocol:1; /**< A protocol-specific word is added. */ 430 + unsigned int __status:2; /**< Status of the transfer. */ 431 + unsigned int __framing:2; /**< Framing of the transfer. */ 432 + unsigned int __transfer_size:14; /**< Transfer size in bytes (total). */ 433 + } bits; 434 + } 435 + __netio_pkt_notif_t; 436 + 437 + 438 + /** 439 + * Returns the base address of the packet. 440 + */ 441 + #define _NETIO_PKT_HANDLE_BASE(p) \ 442 + ((unsigned char*)((p).word & 0xFFFFFFC0)) 443 + 444 + /** 445 + * Returns the base address of the packet. 446 + */ 447 + #define _NETIO_PKT_BASE(p) \ 448 + _NETIO_PKT_HANDLE_BASE(p->__packet) 449 + 450 + /** 451 + * @brief An I/O notification packet (second word) 452 + * 453 + * This is the second word of data received from an I/O shim in a notification 454 + * packet. This is the virtual address of the packet buffer, plus some flag 455 + * bits. (The virtual address of the packet is always 256-byte aligned so we 456 + * have room for 8 bits' worth of flags in the low 8 bits.) 457 + * 458 + * @internal 459 + * NOTE: The low two bits must contain "__queue", so the "packet size" 460 + * (SIZE_SMALL, SIZE_LARGE, or SIZE_JUMBO) can be determined quickly. 461 + * 462 + * If __addr or __offset are moved, _NETIO_PKT_BASE 463 + * (defined right below this) must be changed. 464 + */ 465 + typedef union 466 + { 467 + unsigned int word; /**< The whole word. */ 468 + /** The various fields. */ 469 + struct 470 + { 471 + /** Which queue the packet will be returned to once it is sent back to 472 + the IPP. This is one of the SIZE_xxx values. */ 473 + unsigned int __queue:2; 474 + 475 + /** The IPP handle of the sending IPP. */ 476 + unsigned int __ipp_handle:2; 477 + 478 + /** Reserved for future use. */ 479 + unsigned int __reserved:1; 480 + 481 + /** If 1, this packet has minimal (egress) metadata; otherwise, it 482 + has standard (ingress) metadata. */ 483 + unsigned int __minimal:1; 484 + 485 + /** Offset of the metadata within the packet. This value is multiplied 486 + * by 64 and added to the base packet address to get the metadata 487 + * address. Note that this field is aligned within the word such that 488 + * you can easily extract the metadata address with a 26-bit mask. */ 489 + unsigned int __offset:2; 490 + 491 + /** The top 24 bits of the packet's virtual address. */ 492 + unsigned int __addr:24; 493 + } bits; 494 + } 495 + __netio_pkt_handle_t; 496 + 497 + #endif /* !__DOXYGEN__ */ 498 + 499 + 500 + /** 501 + * @brief A handle for an I/O packet's storage. 502 + * @ingroup ingress 503 + * 504 + * netio_pkt_handle_t encodes the concept of a ::netio_pkt_t with its 505 + * packet metadata removed. It is a much smaller type that exists to 506 + * facilitate applications where the full ::netio_pkt_t type is too 507 + * large, such as those that cache enormous numbers of packets or wish 508 + * to transmit packet descriptors over the UDN. 509 + * 510 + * Because there is no metadata, most ::netio_pkt_t operations cannot be 511 + * performed on a netio_pkt_handle_t. It supports only 512 + * netio_free_handle() (to free the buffer) and 513 + * NETIO_PKT_CUSTOM_DATA_H() (to access a pointer to its contents). 514 + * The application must acquire any additional metadata it wants from the 515 + * original ::netio_pkt_t and record it separately. 516 + * 517 + * A netio_pkt_handle_t can be extracted from a ::netio_pkt_t by calling 518 + * NETIO_PKT_HANDLE(). An invalid handle (analogous to NULL) can be 519 + * created by assigning the value ::NETIO_PKT_HANDLE_NONE. A handle can 520 + * be tested for validity with NETIO_PKT_HANDLE_IS_VALID(). 521 + */ 522 + typedef struct 523 + { 524 + unsigned int word; /**< Opaque bits. */ 525 + } netio_pkt_handle_t; 526 + 527 + /** 528 + * @brief A packet descriptor. 529 + * 530 + * @ingroup ingress 531 + * @ingroup egress 532 + * 533 + * This data structure represents a packet. The structure is manipulated 534 + * through the @ref ingress and the @ref egress. 535 + * 536 + * While the contents of a netio_pkt_t are opaque, the structure itself is 537 + * portable. This means that it may be shared between all tiles which have 538 + * done a netio_input_register() call for the interface on which the pkt_t 539 + * was initially received (via netio_get_packet()) or retrieved (via 540 + * netio_get_buffer()). The contents of a netio_pkt_t can be transmitted to 541 + * another tile via shared memory, or via a UDN message, or by other means. 542 + * The destination tile may then use the pkt_t as if it had originally been 543 + * received locally; it may read or write the packet's data, read its 544 + * metadata, free the packet, send the packet, transfer the netio_pkt_t to 545 + * yet another tile, and so forth. 546 + * 547 + * Once a netio_pkt_t has been transferred to a second tile, the first tile 548 + * should not reference the original copy; in particular, if more than one 549 + * tile frees or sends the same netio_pkt_t, the IPP's packet free lists will 550 + * become corrupted. Note also that each tile which reads or modifies 551 + * packet data must obey the memory coherency rules outlined in @ref input. 552 + */ 553 + typedef struct 554 + { 555 + #ifdef __DOXYGEN__ 556 + /** This structure is opaque. */ 557 + unsigned char opaque[32]; 558 + #else 559 + /** For an ingress packet (one with standard metadata), this is the 560 + * notification header we got from the I/O shim. For an egress packet 561 + * (one with minimal metadata), this word is zero if the packet has not 562 + * been populated, and nonzero if it has. */ 563 + __netio_pkt_notif_t __notif_header; 564 + 565 + /** Virtual address of the packet buffer, plus state flags. */ 566 + __netio_pkt_handle_t __packet; 567 + 568 + /** Metadata associated with the packet. */ 569 + netio_pkt_metadata_t __metadata; 570 + #endif 571 + } 572 + netio_pkt_t; 573 + 574 + 575 + #ifndef __DOXYGEN__ 576 + 577 + #define __NETIO_PKT_NOTIF_HEADER(pkt) ((pkt)->__notif_header) 578 + #define __NETIO_PKT_IPP_HANDLE(pkt) ((pkt)->__packet.bits.__ipp_handle) 579 + #define __NETIO_PKT_QUEUE(pkt) ((pkt)->__packet.bits.__queue) 580 + #define __NETIO_PKT_NOTIF_HEADER_M(mda, pkt) ((pkt)->__notif_header) 581 + #define __NETIO_PKT_IPP_HANDLE_M(mda, pkt) ((pkt)->__packet.bits.__ipp_handle) 582 + #define __NETIO_PKT_MINIMAL(pkt) ((pkt)->__packet.bits.__minimal) 583 + #define __NETIO_PKT_QUEUE_M(mda, pkt) ((pkt)->__packet.bits.__queue) 584 + #define __NETIO_PKT_FLAGS_M(mda, pkt) ((mda)->__flags) 585 + 586 + /* Packet information table, used by the attribute access functions below. */ 587 + extern const uint16_t _netio_pkt_info[]; 588 + 589 + #endif /* __DOXYGEN__ */ 590 + 591 + 592 + #ifndef __DOXYGEN__ 593 + /* These macros are deprecated and will disappear in a future MDE release. */ 594 + #define NETIO_PKT_GOOD_CHECKSUM(pkt) \ 595 + NETIO_PKT_L4_CSUM_CORRECT(pkt) 596 + #define NETIO_PKT_GOOD_CHECKSUM_M(mda, pkt) \ 597 + NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt) 598 + #endif /* __DOXYGEN__ */ 599 + 600 + 601 + /* Packet attribute access functions. */ 602 + 603 + /** Return a pointer to the metadata for a packet. 604 + * @ingroup ingress 605 + * 606 + * Calling this function once and passing the result to other retrieval 607 + * functions with a "_M" suffix usually improves performance. This 608 + * function must be called on an 'ingress' packet (i.e. one retrieved 609 + * by @ref netio_get_packet(), on which @ref netio_populate_buffer() or 610 + * @ref netio_populate_prepend_buffer have not been called). Use of this 611 + * function on an 'egress' packet will cause an assertion failure. 612 + * 613 + * @param[in] pkt Packet on which to operate. 614 + * @return A pointer to the packet's standard metadata. 615 + */ 616 + static __inline netio_pkt_metadata_t* 617 + NETIO_PKT_METADATA(netio_pkt_t* pkt) 618 + { 619 + netio_assert(!pkt->__packet.bits.__minimal); 620 + return &pkt->__metadata; 621 + } 622 + 623 + 624 + /** Return a pointer to the minimal metadata for a packet. 625 + * @ingroup egress 626 + * 627 + * Calling this function once and passing the result to other retrieval 628 + * functions with a "_MM" suffix usually improves performance. This 629 + * function must be called on an 'egress' packet (i.e. one on which 630 + * @ref netio_populate_buffer() or @ref netio_populate_prepend_buffer() 631 + * have been called, or one retrieved by @ref netio_get_buffer()). Use of 632 + * this function on an 'ingress' packet will cause an assertion failure. 633 + * 634 + * @param[in] pkt Packet on which to operate. 635 + * @return A pointer to the packet's standard metadata. 636 + */ 637 + static __inline netio_pkt_minimal_metadata_t* 638 + NETIO_PKT_MINIMAL_METADATA(netio_pkt_t* pkt) 639 + { 640 + netio_assert(pkt->__packet.bits.__minimal); 641 + return (netio_pkt_minimal_metadata_t*) &pkt->__metadata; 642 + } 643 + 644 + 645 + /** Determine whether a packet has 'minimal' metadata. 646 + * @ingroup pktfuncs 647 + * 648 + * This function will return nonzero if the packet is an 'egress' 649 + * packet (i.e. one on which @ref netio_populate_buffer() or 650 + * @ref netio_populate_prepend_buffer() have been called, or one 651 + * retrieved by @ref netio_get_buffer()), and zero if the packet 652 + * is an 'ingress' packet (i.e. one retrieved by @ref netio_get_packet(), 653 + * which has not been converted into an 'egress' packet). 654 + * 655 + * @param[in] pkt Packet on which to operate. 656 + * @return Nonzero if the packet has minimal metadata. 657 + */ 658 + static __inline unsigned int 659 + NETIO_PKT_IS_MINIMAL(netio_pkt_t* pkt) 660 + { 661 + return pkt->__packet.bits.__minimal; 662 + } 663 + 664 + 665 + /** Return a handle for a packet's storage. 666 + * @ingroup pktfuncs 667 + * 668 + * @param[in] pkt Packet on which to operate. 669 + * @return A handle for the packet's storage. 670 + */ 671 + static __inline netio_pkt_handle_t 672 + NETIO_PKT_HANDLE(netio_pkt_t* pkt) 673 + { 674 + netio_pkt_handle_t h; 675 + h.word = pkt->__packet.word; 676 + return h; 677 + } 678 + 679 + 680 + /** A special reserved value indicating the absence of a packet handle. 681 + * 682 + * @ingroup pktfuncs 683 + */ 684 + #define NETIO_PKT_HANDLE_NONE ((netio_pkt_handle_t) { 0 }) 685 + 686 + 687 + /** Test whether a packet handle is valid. 688 + * 689 + * Applications may wish to use the reserved value NETIO_PKT_HANDLE_NONE 690 + * to indicate no packet at all. This function tests to see if a packet 691 + * handle is a real handle, not this special reserved value. 692 + * 693 + * @ingroup pktfuncs 694 + * 695 + * @param[in] handle Handle on which to operate. 696 + * @return One if the packet handle is valid, else zero. 697 + */ 698 + static __inline unsigned int 699 + NETIO_PKT_HANDLE_IS_VALID(netio_pkt_handle_t handle) 700 + { 701 + return handle.word != 0; 702 + } 703 + 704 + 705 + 706 + /** Return a pointer to the start of the packet's custom header. 707 + * A custom header may or may not be present, depending upon the IPP; its 708 + * contents and alignment are also IPP-dependent. Currently, none of the 709 + * standard IPPs supplied by Tilera produce a custom header. If present, 710 + * the custom header precedes the L2 header in the packet buffer. 711 + * @ingroup ingress 712 + * 713 + * @param[in] handle Handle on which to operate. 714 + * @return A pointer to start of the packet. 715 + */ 716 + static __inline unsigned char* 717 + NETIO_PKT_CUSTOM_DATA_H(netio_pkt_handle_t handle) 718 + { 719 + return _NETIO_PKT_HANDLE_BASE(handle) + NETIO_PACKET_PADDING; 720 + } 721 + 722 + 723 + /** Return the length of the packet's custom header. 724 + * A custom header may or may not be present, depending upon the IPP; its 725 + * contents and alignment are also IPP-dependent. Currently, none of the 726 + * standard IPPs supplied by Tilera produce a custom header. If present, 727 + * the custom header precedes the L2 header in the packet buffer. 728 + * 729 + * @ingroup ingress 730 + * 731 + * @param[in] mda Pointer to packet's standard metadata. 732 + * @param[in] pkt Packet on which to operate. 733 + * @return The length of the packet's custom header, in bytes. 734 + */ 735 + static __inline netio_size_t 736 + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 737 + { 738 + /* 739 + * Note that we effectively need to extract a quantity from the flags word 740 + * which is measured in words, and then turn it into bytes by shifting 741 + * it left by 2. We do this all at once by just shifting right two less 742 + * bits, and shifting the mask up two bits. 743 + */ 744 + return ((mda->__flags >> (_NETIO_PKT_CUSTOM_LEN_SHIFT - 2)) & 745 + (_NETIO_PKT_CUSTOM_LEN_RMASK << 2)); 746 + } 747 + 748 + 749 + /** Return the length of the packet, starting with the custom header. 750 + * A custom header may or may not be present, depending upon the IPP; its 751 + * contents and alignment are also IPP-dependent. Currently, none of the 752 + * standard IPPs supplied by Tilera produce a custom header. If present, 753 + * the custom header precedes the L2 header in the packet buffer. 754 + * @ingroup ingress 755 + * 756 + * @param[in] mda Pointer to packet's standard metadata. 757 + * @param[in] pkt Packet on which to operate. 758 + * @return The length of the packet, in bytes. 759 + */ 760 + static __inline netio_size_t 761 + NETIO_PKT_CUSTOM_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 762 + { 763 + return (__NETIO_PKT_NOTIF_HEADER(pkt).bits.__transfer_size - 764 + NETIO_PACKET_PADDING); 765 + } 766 + 767 + 768 + /** Return a pointer to the start of the packet's custom header. 769 + * A custom header may or may not be present, depending upon the IPP; its 770 + * contents and alignment are also IPP-dependent. Currently, none of the 771 + * standard IPPs supplied by Tilera produce a custom header. If present, 772 + * the custom header precedes the L2 header in the packet buffer. 773 + * @ingroup ingress 774 + * 775 + * @param[in] mda Pointer to packet's standard metadata. 776 + * @param[in] pkt Packet on which to operate. 777 + * @return A pointer to start of the packet. 778 + */ 779 + static __inline unsigned char* 780 + NETIO_PKT_CUSTOM_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 781 + { 782 + return NETIO_PKT_CUSTOM_DATA_H(NETIO_PKT_HANDLE(pkt)); 783 + } 784 + 785 + 786 + /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. 787 + * @ingroup ingress 788 + * 789 + * @param[in] mda Pointer to packet's standard metadata. 790 + * @param[in] pkt Packet on which to operate. 791 + * @return The length of the packet's L2 header, in bytes. 792 + */ 793 + static __inline netio_size_t 794 + NETIO_PKT_L2_HEADER_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 795 + { 796 + /* 797 + * Note that we effectively need to extract a quantity from the flags word 798 + * which is measured in words, and then turn it into bytes by shifting 799 + * it left by 2. We do this all at once by just shifting right two less 800 + * bits, and shifting the mask up two bits. We then add two bytes. 801 + */ 802 + return ((mda->__flags >> (_NETIO_PKT_L2_LEN_SHIFT - 2)) & 803 + (_NETIO_PKT_L2_LEN_RMASK << 2)) + 2; 804 + } 805 + 806 + 807 + /** Return the length of the packet, starting with the L2 (Ethernet) header. 808 + * @ingroup ingress 809 + * 810 + * @param[in] mda Pointer to packet's standard metadata. 811 + * @param[in] pkt Packet on which to operate. 812 + * @return The length of the packet, in bytes. 813 + */ 814 + static __inline netio_size_t 815 + NETIO_PKT_L2_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 816 + { 817 + return (NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt) - 818 + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda,pkt)); 819 + } 820 + 821 + 822 + /** Return a pointer to the start of the packet's L2 (Ethernet) header. 823 + * @ingroup ingress 824 + * 825 + * @param[in] mda Pointer to packet's standard metadata. 826 + * @param[in] pkt Packet on which to operate. 827 + * @return A pointer to start of the packet. 828 + */ 829 + static __inline unsigned char* 830 + NETIO_PKT_L2_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 831 + { 832 + return (NETIO_PKT_CUSTOM_DATA_M(mda, pkt) + 833 + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt)); 834 + } 835 + 836 + 837 + /** Retrieve the length of the packet, starting with the L3 (generally, 838 + * the IP) header. 839 + * @ingroup ingress 840 + * 841 + * @param[in] mda Pointer to packet's standard metadata. 842 + * @param[in] pkt Packet on which to operate. 843 + * @return Length of the packet's L3 header and data, in bytes. 844 + */ 845 + static __inline netio_size_t 846 + NETIO_PKT_L3_LENGTH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 847 + { 848 + return (NETIO_PKT_L2_LENGTH_M(mda, pkt) - 849 + NETIO_PKT_L2_HEADER_LENGTH_M(mda,pkt)); 850 + } 851 + 852 + 853 + /** Return a pointer to the packet's L3 (generally, the IP) header. 854 + * @ingroup ingress 855 + * 856 + * Note that we guarantee word alignment of the L3 header. 857 + * 858 + * @param[in] mda Pointer to packet's standard metadata. 859 + * @param[in] pkt Packet on which to operate. 860 + * @return A pointer to the packet's L3 header. 861 + */ 862 + static __inline unsigned char* 863 + NETIO_PKT_L3_DATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 864 + { 865 + return (NETIO_PKT_L2_DATA_M(mda, pkt) + 866 + NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt)); 867 + } 868 + 869 + 870 + /** Return the ordinal of the packet. 871 + * @ingroup ingress 872 + * 873 + * Each packet is given an ordinal number when it is delivered by the IPP. 874 + * In the medium term, the ordinal is unique and monotonically increasing, 875 + * being incremented by 1 for each packet; the ordinal of the first packet 876 + * delivered after the IPP starts is zero. (Since the ordinal is of finite 877 + * size, given enough input packets, it will eventually wrap around to zero; 878 + * in the long term, therefore, ordinals are not unique.) The ordinals 879 + * handed out by different IPPs are not disjoint, so two packets from 880 + * different IPPs may have identical ordinals. Packets dropped by the 881 + * IPP or by the I/O shim are not assigned ordinals. 882 + * 883 + * @param[in] mda Pointer to packet's standard metadata. 884 + * @param[in] pkt Packet on which to operate. 885 + * @return The packet's per-IPP packet ordinal. 886 + */ 887 + static __inline unsigned int 888 + NETIO_PKT_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 889 + { 890 + return mda->__packet_ordinal; 891 + } 892 + 893 + 894 + /** Return the per-group ordinal of the packet. 895 + * @ingroup ingress 896 + * 897 + * Each packet is given a per-group ordinal number when it is 898 + * delivered by the IPP. By default, the group is the packet's VLAN, 899 + * although IPP can be recompiled to use different values. In 900 + * the medium term, the ordinal is unique and monotonically 901 + * increasing, being incremented by 1 for each packet; the ordinal of 902 + * the first packet distributed to a particular group is zero. 903 + * (Since the ordinal is of finite size, given enough input packets, 904 + * it will eventually wrap around to zero; in the long term, 905 + * therefore, ordinals are not unique.) The ordinals handed out by 906 + * different IPPs are not disjoint, so two packets from different IPPs 907 + * may have identical ordinals; similarly, packets distributed to 908 + * different groups may have identical ordinals. Packets dropped by 909 + * the IPP or by the I/O shim are not assigned ordinals. 910 + * 911 + * @param[in] mda Pointer to packet's standard metadata. 912 + * @param[in] pkt Packet on which to operate. 913 + * @return The packet's per-IPP, per-group ordinal. 914 + */ 915 + static __inline unsigned int 916 + NETIO_PKT_GROUP_ORDINAL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 917 + { 918 + return mda->__group_ordinal; 919 + } 920 + 921 + 922 + /** Return the VLAN ID assigned to the packet. 923 + * @ingroup ingress 924 + * 925 + * This value is usually contained within the packet header. 926 + * 927 + * This value will be zero if the packet does not have a VLAN tag, or if 928 + * this value was not extracted from the packet. 929 + * 930 + * @param[in] mda Pointer to packet's standard metadata. 931 + * @param[in] pkt Packet on which to operate. 932 + * @return The packet's VLAN ID. 933 + */ 934 + static __inline unsigned short 935 + NETIO_PKT_VLAN_ID_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 936 + { 937 + int vl = (mda->__flags >> _NETIO_PKT_VLAN_SHIFT) & _NETIO_PKT_VLAN_RMASK; 938 + unsigned short* pkt_p; 939 + int index; 940 + unsigned short val; 941 + 942 + if (vl == _NETIO_PKT_VLAN_NONE) 943 + return 0; 944 + 945 + pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); 946 + index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; 947 + 948 + val = pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_VLAN_SHIFT) & 949 + _NETIO_PKT_INFO_VLAN_RMASK]; 950 + 951 + #ifdef __TILECC__ 952 + return (__insn_bytex(val) >> 16) & 0xFFF; 953 + #else 954 + return (__builtin_bswap32(val) >> 16) & 0xFFF; 955 + #endif 956 + } 957 + 958 + 959 + /** Return the ethertype of the packet. 960 + * @ingroup ingress 961 + * 962 + * This value is usually contained within the packet header. 963 + * 964 + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED_M() 965 + * returns true, and otherwise, may not be well defined. 966 + * 967 + * @param[in] mda Pointer to packet's standard metadata. 968 + * @param[in] pkt Packet on which to operate. 969 + * @return The packet's ethertype. 970 + */ 971 + static __inline unsigned short 972 + NETIO_PKT_ETHERTYPE_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 973 + { 974 + unsigned short* pkt_p = (unsigned short*) NETIO_PKT_L2_DATA_M(mda, pkt); 975 + int index = (mda->__flags >> _NETIO_PKT_TYPE_SHIFT) & _NETIO_PKT_TYPE_RMASK; 976 + 977 + unsigned short val = 978 + pkt_p[(_netio_pkt_info[index] >> _NETIO_PKT_INFO_ETYPE_SHIFT) & 979 + _NETIO_PKT_INFO_ETYPE_RMASK]; 980 + 981 + return __builtin_bswap32(val) >> 16; 982 + } 983 + 984 + 985 + /** Return the flow hash computed on the packet. 986 + * @ingroup ingress 987 + * 988 + * For TCP and UDP packets, this hash is calculated by hashing together 989 + * the "5-tuple" values, specifically the source IP address, destination 990 + * IP address, protocol type, source port and destination port. 991 + * The hash value is intended to be helpful for millions of distinct 992 + * flows. 993 + * 994 + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is 995 + * derived by hashing together the source and destination IP addresses. 996 + * 997 + * For MPLS-encapsulated packets, the flow hash is derived by hashing 998 + * the first MPLS label. 999 + * 1000 + * For all other packets the flow hash is computed from the source 1001 + * and destination Ethernet addresses. 1002 + * 1003 + * The hash is symmetric, meaning it produces the same value if the 1004 + * source and destination are swapped. The only exceptions are 1005 + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple 1006 + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 1007 + * (Encap Security Payload), which use only the destination address 1008 + * since the source address is not meaningful. 1009 + * 1010 + * @param[in] mda Pointer to packet's standard metadata. 1011 + * @param[in] pkt Packet on which to operate. 1012 + * @return The packet's 32-bit flow hash. 1013 + */ 1014 + static __inline unsigned int 1015 + NETIO_PKT_FLOW_HASH_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1016 + { 1017 + return mda->__flow_hash; 1018 + } 1019 + 1020 + 1021 + /** Return the first word of "user data" for the packet. 1022 + * 1023 + * The contents of the user data words depend on the IPP. 1024 + * 1025 + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first 1026 + * word of user data contains the least significant bits of the 64-bit 1027 + * arrival cycle count (see @c get_cycle_count_low()). 1028 + * 1029 + * See the System Programmer's Guide for details. 1030 + * 1031 + * @ingroup ingress 1032 + * 1033 + * @param[in] mda Pointer to packet's standard metadata. 1034 + * @param[in] pkt Packet on which to operate. 1035 + * @return The packet's first word of "user data". 1036 + */ 1037 + static __inline unsigned int 1038 + NETIO_PKT_USER_DATA_0_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1039 + { 1040 + return mda->__user_data_0; 1041 + } 1042 + 1043 + 1044 + /** Return the second word of "user data" for the packet. 1045 + * 1046 + * The contents of the user data words depend on the IPP. 1047 + * 1048 + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second 1049 + * word of user data contains the most significant bits of the 64-bit 1050 + * arrival cycle count (see @c get_cycle_count_high()). 1051 + * 1052 + * See the System Programmer's Guide for details. 1053 + * 1054 + * @ingroup ingress 1055 + * 1056 + * @param[in] mda Pointer to packet's standard metadata. 1057 + * @param[in] pkt Packet on which to operate. 1058 + * @return The packet's second word of "user data". 1059 + */ 1060 + static __inline unsigned int 1061 + NETIO_PKT_USER_DATA_1_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1062 + { 1063 + return mda->__user_data_1; 1064 + } 1065 + 1066 + 1067 + /** Determine whether the L4 (TCP/UDP) checksum was calculated. 1068 + * @ingroup ingress 1069 + * 1070 + * @param[in] mda Pointer to packet's standard metadata. 1071 + * @param[in] pkt Packet on which to operate. 1072 + * @return Nonzero if the L4 checksum was calculated. 1073 + */ 1074 + static __inline unsigned int 1075 + NETIO_PKT_L4_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1076 + { 1077 + return !(mda->__flags & _NETIO_PKT_NO_L4_CSUM_MASK); 1078 + } 1079 + 1080 + 1081 + /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to 1082 + * be correct. 1083 + * @ingroup ingress 1084 + * 1085 + * @param[in] mda Pointer to packet's standard metadata. 1086 + * @param[in] pkt Packet on which to operate. 1087 + * @return Nonzero if the checksum was calculated and is correct. 1088 + */ 1089 + static __inline unsigned int 1090 + NETIO_PKT_L4_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1091 + { 1092 + return !(mda->__flags & 1093 + (_NETIO_PKT_BAD_L4_CSUM_MASK | _NETIO_PKT_NO_L4_CSUM_MASK)); 1094 + } 1095 + 1096 + 1097 + /** Determine whether the L3 (IP) checksum was calculated. 1098 + * @ingroup ingress 1099 + * 1100 + * @param[in] mda Pointer to packet's standard metadata. 1101 + * @param[in] pkt Packet on which to operate. 1102 + * @return Nonzero if the L3 (IP) checksum was calculated. 1103 + */ 1104 + static __inline unsigned int 1105 + NETIO_PKT_L3_CSUM_CALCULATED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1106 + { 1107 + return !(mda->__flags & _NETIO_PKT_NO_L3_CSUM_MASK); 1108 + } 1109 + 1110 + 1111 + /** Determine whether the L3 (IP) checksum was calculated and found to be 1112 + * correct. 1113 + * @ingroup ingress 1114 + * 1115 + * @param[in] mda Pointer to packet's standard metadata. 1116 + * @param[in] pkt Packet on which to operate. 1117 + * @return Nonzero if the checksum was calculated and is correct. 1118 + */ 1119 + static __inline unsigned int 1120 + NETIO_PKT_L3_CSUM_CORRECT_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1121 + { 1122 + return !(mda->__flags & 1123 + (_NETIO_PKT_BAD_L3_CSUM_MASK | _NETIO_PKT_NO_L3_CSUM_MASK)); 1124 + } 1125 + 1126 + 1127 + /** Determine whether the ethertype was recognized and L3 packet data was 1128 + * processed. 1129 + * @ingroup ingress 1130 + * 1131 + * @param[in] mda Pointer to packet's standard metadata. 1132 + * @param[in] pkt Packet on which to operate. 1133 + * @return Nonzero if the ethertype was recognized and L3 packet data was 1134 + * processed. 1135 + */ 1136 + static __inline unsigned int 1137 + NETIO_PKT_ETHERTYPE_RECOGNIZED_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1138 + { 1139 + return !(mda->__flags & _NETIO_PKT_TYPE_UNRECOGNIZED_MASK); 1140 + } 1141 + 1142 + 1143 + /** Retrieve the status of a packet and any errors that may have occurred 1144 + * during ingress processing (length mismatches, CRC errors, etc.). 1145 + * @ingroup ingress 1146 + * 1147 + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() 1148 + * returns zero are always reported as underlength, as there is no a priori 1149 + * means to determine their length. Normally, applications should use 1150 + * @ref NETIO_PKT_BAD_M() instead of explicitly checking status with this 1151 + * function. 1152 + * 1153 + * @param[in] mda Pointer to packet's standard metadata. 1154 + * @param[in] pkt Packet on which to operate. 1155 + * @return The packet's status. 1156 + */ 1157 + static __inline netio_pkt_status_t 1158 + NETIO_PKT_STATUS_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1159 + { 1160 + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; 1161 + } 1162 + 1163 + 1164 + /** Report whether a packet is bad (i.e., was shorter than expected based on 1165 + * its headers, or had a bad CRC). 1166 + * @ingroup ingress 1167 + * 1168 + * Note that this function does not verify L3 or L4 checksums. 1169 + * 1170 + * @param[in] mda Pointer to packet's standard metadata. 1171 + * @param[in] pkt Packet on which to operate. 1172 + * @return Nonzero if the packet is bad and should be discarded. 1173 + */ 1174 + static __inline unsigned int 1175 + NETIO_PKT_BAD_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1176 + { 1177 + return ((NETIO_PKT_STATUS_M(mda, pkt) & 1) && 1178 + (NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt) || 1179 + NETIO_PKT_STATUS_M(mda, pkt) == NETIO_PKT_STATUS_BAD)); 1180 + } 1181 + 1182 + 1183 + /** Return the length of the packet, starting with the L2 (Ethernet) header. 1184 + * @ingroup egress 1185 + * 1186 + * @param[in] mmd Pointer to packet's minimal metadata. 1187 + * @param[in] pkt Packet on which to operate. 1188 + * @return The length of the packet, in bytes. 1189 + */ 1190 + static __inline netio_size_t 1191 + NETIO_PKT_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) 1192 + { 1193 + return mmd->l2_length; 1194 + } 1195 + 1196 + 1197 + /** Return the length of the L2 (Ethernet) header. 1198 + * @ingroup egress 1199 + * 1200 + * @param[in] mmd Pointer to packet's minimal metadata. 1201 + * @param[in] pkt Packet on which to operate. 1202 + * @return The length of the packet's L2 header, in bytes. 1203 + */ 1204 + static __inline netio_size_t 1205 + NETIO_PKT_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, 1206 + netio_pkt_t* pkt) 1207 + { 1208 + return mmd->l3_offset - mmd->l2_offset; 1209 + } 1210 + 1211 + 1212 + /** Return the length of the packet, starting with the L3 (IP) header. 1213 + * @ingroup egress 1214 + * 1215 + * @param[in] mmd Pointer to packet's minimal metadata. 1216 + * @param[in] pkt Packet on which to operate. 1217 + * @return Length of the packet's L3 header and data, in bytes. 1218 + */ 1219 + static __inline netio_size_t 1220 + NETIO_PKT_L3_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) 1221 + { 1222 + return (NETIO_PKT_L2_LENGTH_MM(mmd, pkt) - 1223 + NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt)); 1224 + } 1225 + 1226 + 1227 + /** Return a pointer to the packet's L3 (generally, the IP) header. 1228 + * @ingroup egress 1229 + * 1230 + * Note that we guarantee word alignment of the L3 header. 1231 + * 1232 + * @param[in] mmd Pointer to packet's minimal metadata. 1233 + * @param[in] pkt Packet on which to operate. 1234 + * @return A pointer to the packet's L3 header. 1235 + */ 1236 + static __inline unsigned char* 1237 + NETIO_PKT_L3_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) 1238 + { 1239 + return _NETIO_PKT_BASE(pkt) + mmd->l3_offset; 1240 + } 1241 + 1242 + 1243 + /** Return a pointer to the packet's L2 (Ethernet) header. 1244 + * @ingroup egress 1245 + * 1246 + * @param[in] mmd Pointer to packet's minimal metadata. 1247 + * @param[in] pkt Packet on which to operate. 1248 + * @return A pointer to start of the packet. 1249 + */ 1250 + static __inline unsigned char* 1251 + NETIO_PKT_L2_DATA_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) 1252 + { 1253 + return _NETIO_PKT_BASE(pkt) + mmd->l2_offset; 1254 + } 1255 + 1256 + 1257 + /** Retrieve the status of a packet and any errors that may have occurred 1258 + * during ingress processing (length mismatches, CRC errors, etc.). 1259 + * @ingroup ingress 1260 + * 1261 + * Note that packets for which @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() 1262 + * returns zero are always reported as underlength, as there is no a priori 1263 + * means to determine their length. Normally, applications should use 1264 + * @ref NETIO_PKT_BAD() instead of explicitly checking status with this 1265 + * function. 1266 + * 1267 + * @param[in] pkt Packet on which to operate. 1268 + * @return The packet's status. 1269 + */ 1270 + static __inline netio_pkt_status_t 1271 + NETIO_PKT_STATUS(netio_pkt_t* pkt) 1272 + { 1273 + netio_assert(!pkt->__packet.bits.__minimal); 1274 + 1275 + return (netio_pkt_status_t) __NETIO_PKT_NOTIF_HEADER(pkt).bits.__status; 1276 + } 1277 + 1278 + 1279 + /** Report whether a packet is bad (i.e., was shorter than expected based on 1280 + * its headers, or had a bad CRC). 1281 + * @ingroup ingress 1282 + * 1283 + * Note that this function does not verify L3 or L4 checksums. 1284 + * 1285 + * @param[in] pkt Packet on which to operate. 1286 + * @return Nonzero if the packet is bad and should be discarded. 1287 + */ 1288 + static __inline unsigned int 1289 + NETIO_PKT_BAD(netio_pkt_t* pkt) 1290 + { 1291 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1292 + 1293 + return NETIO_PKT_BAD_M(mda, pkt); 1294 + } 1295 + 1296 + 1297 + /** Return the length of the packet's custom header. 1298 + * A custom header may or may not be present, depending upon the IPP; its 1299 + * contents and alignment are also IPP-dependent. Currently, none of the 1300 + * standard IPPs supplied by Tilera produce a custom header. If present, 1301 + * the custom header precedes the L2 header in the packet buffer. 1302 + * @ingroup pktfuncs 1303 + * 1304 + * @param[in] pkt Packet on which to operate. 1305 + * @return The length of the packet's custom header, in bytes. 1306 + */ 1307 + static __inline netio_size_t 1308 + NETIO_PKT_CUSTOM_HEADER_LENGTH(netio_pkt_t* pkt) 1309 + { 1310 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1311 + 1312 + return NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); 1313 + } 1314 + 1315 + 1316 + /** Return the length of the packet, starting with the custom header. 1317 + * A custom header may or may not be present, depending upon the IPP; its 1318 + * contents and alignment are also IPP-dependent. Currently, none of the 1319 + * standard IPPs supplied by Tilera produce a custom header. If present, 1320 + * the custom header precedes the L2 header in the packet buffer. 1321 + * @ingroup pktfuncs 1322 + * 1323 + * @param[in] pkt Packet on which to operate. 1324 + * @return The length of the packet, in bytes. 1325 + */ 1326 + static __inline netio_size_t 1327 + NETIO_PKT_CUSTOM_LENGTH(netio_pkt_t* pkt) 1328 + { 1329 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1330 + 1331 + return NETIO_PKT_CUSTOM_LENGTH_M(mda, pkt); 1332 + } 1333 + 1334 + 1335 + /** Return a pointer to the packet's custom header. 1336 + * A custom header may or may not be present, depending upon the IPP; its 1337 + * contents and alignment are also IPP-dependent. Currently, none of the 1338 + * standard IPPs supplied by Tilera produce a custom header. If present, 1339 + * the custom header precedes the L2 header in the packet buffer. 1340 + * @ingroup pktfuncs 1341 + * 1342 + * @param[in] pkt Packet on which to operate. 1343 + * @return A pointer to start of the packet. 1344 + */ 1345 + static __inline unsigned char* 1346 + NETIO_PKT_CUSTOM_DATA(netio_pkt_t* pkt) 1347 + { 1348 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1349 + 1350 + return NETIO_PKT_CUSTOM_DATA_M(mda, pkt); 1351 + } 1352 + 1353 + 1354 + /** Return the length of the packet's L2 (Ethernet plus VLAN or SNAP) header. 1355 + * @ingroup pktfuncs 1356 + * 1357 + * @param[in] pkt Packet on which to operate. 1358 + * @return The length of the packet's L2 header, in bytes. 1359 + */ 1360 + static __inline netio_size_t 1361 + NETIO_PKT_L2_HEADER_LENGTH(netio_pkt_t* pkt) 1362 + { 1363 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1364 + { 1365 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1366 + 1367 + return NETIO_PKT_L2_HEADER_LENGTH_MM(mmd, pkt); 1368 + } 1369 + else 1370 + { 1371 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1372 + 1373 + return NETIO_PKT_L2_HEADER_LENGTH_M(mda, pkt); 1374 + } 1375 + } 1376 + 1377 + 1378 + /** Return the length of the packet, starting with the L2 (Ethernet) header. 1379 + * @ingroup pktfuncs 1380 + * 1381 + * @param[in] pkt Packet on which to operate. 1382 + * @return The length of the packet, in bytes. 1383 + */ 1384 + static __inline netio_size_t 1385 + NETIO_PKT_L2_LENGTH(netio_pkt_t* pkt) 1386 + { 1387 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1388 + { 1389 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1390 + 1391 + return NETIO_PKT_L2_LENGTH_MM(mmd, pkt); 1392 + } 1393 + else 1394 + { 1395 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1396 + 1397 + return NETIO_PKT_L2_LENGTH_M(mda, pkt); 1398 + } 1399 + } 1400 + 1401 + 1402 + /** Return a pointer to the packet's L2 (Ethernet) header. 1403 + * @ingroup pktfuncs 1404 + * 1405 + * @param[in] pkt Packet on which to operate. 1406 + * @return A pointer to start of the packet. 1407 + */ 1408 + static __inline unsigned char* 1409 + NETIO_PKT_L2_DATA(netio_pkt_t* pkt) 1410 + { 1411 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1412 + { 1413 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1414 + 1415 + return NETIO_PKT_L2_DATA_MM(mmd, pkt); 1416 + } 1417 + else 1418 + { 1419 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1420 + 1421 + return NETIO_PKT_L2_DATA_M(mda, pkt); 1422 + } 1423 + } 1424 + 1425 + 1426 + /** Retrieve the length of the packet, starting with the L3 (generally, the IP) 1427 + * header. 1428 + * @ingroup pktfuncs 1429 + * 1430 + * @param[in] pkt Packet on which to operate. 1431 + * @return Length of the packet's L3 header and data, in bytes. 1432 + */ 1433 + static __inline netio_size_t 1434 + NETIO_PKT_L3_LENGTH(netio_pkt_t* pkt) 1435 + { 1436 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1437 + { 1438 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1439 + 1440 + return NETIO_PKT_L3_LENGTH_MM(mmd, pkt); 1441 + } 1442 + else 1443 + { 1444 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1445 + 1446 + return NETIO_PKT_L3_LENGTH_M(mda, pkt); 1447 + } 1448 + } 1449 + 1450 + 1451 + /** Return a pointer to the packet's L3 (generally, the IP) header. 1452 + * @ingroup pktfuncs 1453 + * 1454 + * Note that we guarantee word alignment of the L3 header. 1455 + * 1456 + * @param[in] pkt Packet on which to operate. 1457 + * @return A pointer to the packet's L3 header. 1458 + */ 1459 + static __inline unsigned char* 1460 + NETIO_PKT_L3_DATA(netio_pkt_t* pkt) 1461 + { 1462 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1463 + { 1464 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1465 + 1466 + return NETIO_PKT_L3_DATA_MM(mmd, pkt); 1467 + } 1468 + else 1469 + { 1470 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1471 + 1472 + return NETIO_PKT_L3_DATA_M(mda, pkt); 1473 + } 1474 + } 1475 + 1476 + 1477 + /** Return the ordinal of the packet. 1478 + * @ingroup ingress 1479 + * 1480 + * Each packet is given an ordinal number when it is delivered by the IPP. 1481 + * In the medium term, the ordinal is unique and monotonically increasing, 1482 + * being incremented by 1 for each packet; the ordinal of the first packet 1483 + * delivered after the IPP starts is zero. (Since the ordinal is of finite 1484 + * size, given enough input packets, it will eventually wrap around to zero; 1485 + * in the long term, therefore, ordinals are not unique.) The ordinals 1486 + * handed out by different IPPs are not disjoint, so two packets from 1487 + * different IPPs may have identical ordinals. Packets dropped by the 1488 + * IPP or by the I/O shim are not assigned ordinals. 1489 + * 1490 + * 1491 + * @param[in] pkt Packet on which to operate. 1492 + * @return The packet's per-IPP packet ordinal. 1493 + */ 1494 + static __inline unsigned int 1495 + NETIO_PKT_ORDINAL(netio_pkt_t* pkt) 1496 + { 1497 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1498 + 1499 + return NETIO_PKT_ORDINAL_M(mda, pkt); 1500 + } 1501 + 1502 + 1503 + /** Return the per-group ordinal of the packet. 1504 + * @ingroup ingress 1505 + * 1506 + * Each packet is given a per-group ordinal number when it is 1507 + * delivered by the IPP. By default, the group is the packet's VLAN, 1508 + * although IPP can be recompiled to use different values. In 1509 + * the medium term, the ordinal is unique and monotonically 1510 + * increasing, being incremented by 1 for each packet; the ordinal of 1511 + * the first packet distributed to a particular group is zero. 1512 + * (Since the ordinal is of finite size, given enough input packets, 1513 + * it will eventually wrap around to zero; in the long term, 1514 + * therefore, ordinals are not unique.) The ordinals handed out by 1515 + * different IPPs are not disjoint, so two packets from different IPPs 1516 + * may have identical ordinals; similarly, packets distributed to 1517 + * different groups may have identical ordinals. Packets dropped by 1518 + * the IPP or by the I/O shim are not assigned ordinals. 1519 + * 1520 + * @param[in] pkt Packet on which to operate. 1521 + * @return The packet's per-IPP, per-group ordinal. 1522 + */ 1523 + static __inline unsigned int 1524 + NETIO_PKT_GROUP_ORDINAL(netio_pkt_t* pkt) 1525 + { 1526 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1527 + 1528 + return NETIO_PKT_GROUP_ORDINAL_M(mda, pkt); 1529 + } 1530 + 1531 + 1532 + /** Return the VLAN ID assigned to the packet. 1533 + * @ingroup ingress 1534 + * 1535 + * This is usually also contained within the packet header. If the packet 1536 + * does not have a VLAN tag, the VLAN ID returned by this function is zero. 1537 + * 1538 + * @param[in] pkt Packet on which to operate. 1539 + * @return The packet's VLAN ID. 1540 + */ 1541 + static __inline unsigned short 1542 + NETIO_PKT_VLAN_ID(netio_pkt_t* pkt) 1543 + { 1544 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1545 + 1546 + return NETIO_PKT_VLAN_ID_M(mda, pkt); 1547 + } 1548 + 1549 + 1550 + /** Return the ethertype of the packet. 1551 + * @ingroup ingress 1552 + * 1553 + * This value is reliable if @ref NETIO_PKT_ETHERTYPE_RECOGNIZED() 1554 + * returns true, and otherwise, may not be well defined. 1555 + * 1556 + * @param[in] pkt Packet on which to operate. 1557 + * @return The packet's ethertype. 1558 + */ 1559 + static __inline unsigned short 1560 + NETIO_PKT_ETHERTYPE(netio_pkt_t* pkt) 1561 + { 1562 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1563 + 1564 + return NETIO_PKT_ETHERTYPE_M(mda, pkt); 1565 + } 1566 + 1567 + 1568 + /** Return the flow hash computed on the packet. 1569 + * @ingroup ingress 1570 + * 1571 + * For TCP and UDP packets, this hash is calculated by hashing together 1572 + * the "5-tuple" values, specifically the source IP address, destination 1573 + * IP address, protocol type, source port and destination port. 1574 + * The hash value is intended to be helpful for millions of distinct 1575 + * flows. 1576 + * 1577 + * For IPv4 or IPv6 packets which are neither TCP nor UDP, the flow hash is 1578 + * derived by hashing together the source and destination IP addresses. 1579 + * 1580 + * For MPLS-encapsulated packets, the flow hash is derived by hashing 1581 + * the first MPLS label. 1582 + * 1583 + * For all other packets the flow hash is computed from the source 1584 + * and destination Ethernet addresses. 1585 + * 1586 + * The hash is symmetric, meaning it produces the same value if the 1587 + * source and destination are swapped. The only exceptions are 1588 + * tunneling protocols 0x04 (IP in IP Encapsulation), 0x29 (Simple 1589 + * Internet Protocol), 0x2F (General Routing Encapsulation) and 0x32 1590 + * (Encap Security Payload), which use only the destination address 1591 + * since the source address is not meaningful. 1592 + * 1593 + * @param[in] pkt Packet on which to operate. 1594 + * @return The packet's 32-bit flow hash. 1595 + */ 1596 + static __inline unsigned int 1597 + NETIO_PKT_FLOW_HASH(netio_pkt_t* pkt) 1598 + { 1599 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1600 + 1601 + return NETIO_PKT_FLOW_HASH_M(mda, pkt); 1602 + } 1603 + 1604 + 1605 + /** Return the first word of "user data" for the packet. 1606 + * 1607 + * The contents of the user data words depend on the IPP. 1608 + * 1609 + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the first 1610 + * word of user data contains the least significant bits of the 64-bit 1611 + * arrival cycle count (see @c get_cycle_count_low()). 1612 + * 1613 + * See the System Programmer's Guide for details. 1614 + * 1615 + * @ingroup ingress 1616 + * 1617 + * @param[in] pkt Packet on which to operate. 1618 + * @return The packet's first word of "user data". 1619 + */ 1620 + static __inline unsigned int 1621 + NETIO_PKT_USER_DATA_0(netio_pkt_t* pkt) 1622 + { 1623 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1624 + 1625 + return NETIO_PKT_USER_DATA_0_M(mda, pkt); 1626 + } 1627 + 1628 + 1629 + /** Return the second word of "user data" for the packet. 1630 + * 1631 + * The contents of the user data words depend on the IPP. 1632 + * 1633 + * When using the standard ipp1, ipp2, or ipp4 sub-drivers, the second 1634 + * word of user data contains the most significant bits of the 64-bit 1635 + * arrival cycle count (see @c get_cycle_count_high()). 1636 + * 1637 + * See the System Programmer's Guide for details. 1638 + * 1639 + * @ingroup ingress 1640 + * 1641 + * @param[in] pkt Packet on which to operate. 1642 + * @return The packet's second word of "user data". 1643 + */ 1644 + static __inline unsigned int 1645 + NETIO_PKT_USER_DATA_1(netio_pkt_t* pkt) 1646 + { 1647 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1648 + 1649 + return NETIO_PKT_USER_DATA_1_M(mda, pkt); 1650 + } 1651 + 1652 + 1653 + /** Determine whether the L4 (TCP/UDP) checksum was calculated. 1654 + * @ingroup ingress 1655 + * 1656 + * @param[in] pkt Packet on which to operate. 1657 + * @return Nonzero if the L4 checksum was calculated. 1658 + */ 1659 + static __inline unsigned int 1660 + NETIO_PKT_L4_CSUM_CALCULATED(netio_pkt_t* pkt) 1661 + { 1662 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1663 + 1664 + return NETIO_PKT_L4_CSUM_CALCULATED_M(mda, pkt); 1665 + } 1666 + 1667 + 1668 + /** Determine whether the L4 (TCP/UDP) checksum was calculated and found to 1669 + * be correct. 1670 + * @ingroup ingress 1671 + * 1672 + * @param[in] pkt Packet on which to operate. 1673 + * @return Nonzero if the checksum was calculated and is correct. 1674 + */ 1675 + static __inline unsigned int 1676 + NETIO_PKT_L4_CSUM_CORRECT(netio_pkt_t* pkt) 1677 + { 1678 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1679 + 1680 + return NETIO_PKT_L4_CSUM_CORRECT_M(mda, pkt); 1681 + } 1682 + 1683 + 1684 + /** Determine whether the L3 (IP) checksum was calculated. 1685 + * @ingroup ingress 1686 + * 1687 + * @param[in] pkt Packet on which to operate. 1688 + * @return Nonzero if the L3 (IP) checksum was calculated. 1689 + */ 1690 + static __inline unsigned int 1691 + NETIO_PKT_L3_CSUM_CALCULATED(netio_pkt_t* pkt) 1692 + { 1693 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1694 + 1695 + return NETIO_PKT_L3_CSUM_CALCULATED_M(mda, pkt); 1696 + } 1697 + 1698 + 1699 + /** Determine whether the L3 (IP) checksum was calculated and found to be 1700 + * correct. 1701 + * @ingroup ingress 1702 + * 1703 + * @param[in] pkt Packet on which to operate. 1704 + * @return Nonzero if the checksum was calculated and is correct. 1705 + */ 1706 + static __inline unsigned int 1707 + NETIO_PKT_L3_CSUM_CORRECT(netio_pkt_t* pkt) 1708 + { 1709 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1710 + 1711 + return NETIO_PKT_L3_CSUM_CORRECT_M(mda, pkt); 1712 + } 1713 + 1714 + 1715 + /** Determine whether the Ethertype was recognized and L3 packet data was 1716 + * processed. 1717 + * @ingroup ingress 1718 + * 1719 + * @param[in] pkt Packet on which to operate. 1720 + * @return Nonzero if the Ethertype was recognized and L3 packet data was 1721 + * processed. 1722 + */ 1723 + static __inline unsigned int 1724 + NETIO_PKT_ETHERTYPE_RECOGNIZED(netio_pkt_t* pkt) 1725 + { 1726 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1727 + 1728 + return NETIO_PKT_ETHERTYPE_RECOGNIZED_M(mda, pkt); 1729 + } 1730 + 1731 + 1732 + /** Set an egress packet's L2 length, using a metadata pointer to speed the 1733 + * computation. 1734 + * @ingroup egress 1735 + * 1736 + * @param[in,out] mmd Pointer to packet's minimal metadata. 1737 + * @param[in] pkt Packet on which to operate. 1738 + * @param[in] len Packet L2 length, in bytes. 1739 + */ 1740 + static __inline void 1741 + NETIO_PKT_SET_L2_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt, 1742 + int len) 1743 + { 1744 + mmd->l2_length = len; 1745 + } 1746 + 1747 + 1748 + /** Set an egress packet's L2 length. 1749 + * @ingroup egress 1750 + * 1751 + * @param[in,out] pkt Packet on which to operate. 1752 + * @param[in] len Packet L2 length, in bytes. 1753 + */ 1754 + static __inline void 1755 + NETIO_PKT_SET_L2_LENGTH(netio_pkt_t* pkt, int len) 1756 + { 1757 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1758 + 1759 + NETIO_PKT_SET_L2_LENGTH_MM(mmd, pkt, len); 1760 + } 1761 + 1762 + 1763 + /** Set an egress packet's L2 header length, using a metadata pointer to 1764 + * speed the computation. 1765 + * @ingroup egress 1766 + * 1767 + * It is not normally necessary to call this routine; only the L2 length, 1768 + * not the header length, is needed to transmit a packet. It may be useful if 1769 + * the egress packet will later be processed by code which expects to use 1770 + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. 1771 + * 1772 + * @param[in,out] mmd Pointer to packet's minimal metadata. 1773 + * @param[in] pkt Packet on which to operate. 1774 + * @param[in] len Packet L2 header length, in bytes. 1775 + */ 1776 + static __inline void 1777 + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(netio_pkt_minimal_metadata_t* mmd, 1778 + netio_pkt_t* pkt, int len) 1779 + { 1780 + mmd->l3_offset = mmd->l2_offset + len; 1781 + } 1782 + 1783 + 1784 + /** Set an egress packet's L2 header length. 1785 + * @ingroup egress 1786 + * 1787 + * It is not normally necessary to call this routine; only the L2 length, 1788 + * not the header length, is needed to transmit a packet. It may be useful if 1789 + * the egress packet will later be processed by code which expects to use 1790 + * functions like @ref NETIO_PKT_L3_DATA() to get a pointer to the L3 payload. 1791 + * 1792 + * @param[in,out] pkt Packet on which to operate. 1793 + * @param[in] len Packet L2 header length, in bytes. 1794 + */ 1795 + static __inline void 1796 + NETIO_PKT_SET_L2_HEADER_LENGTH(netio_pkt_t* pkt, int len) 1797 + { 1798 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1799 + 1800 + NETIO_PKT_SET_L2_HEADER_LENGTH_MM(mmd, pkt, len); 1801 + } 1802 + 1803 + 1804 + /** Set up an egress packet for hardware checksum computation, using a 1805 + * metadata pointer to speed the operation. 1806 + * @ingroup egress 1807 + * 1808 + * NetIO provides the ability to automatically calculate a standard 1809 + * 16-bit Internet checksum on transmitted packets. The application 1810 + * may specify the point in the packet where the checksum starts, the 1811 + * number of bytes to be checksummed, and the two bytes in the packet 1812 + * which will be replaced with the completed checksum. (If the range 1813 + * of bytes to be checksummed includes the bytes to be replaced, the 1814 + * initial values of those bytes will be included in the checksum.) 1815 + * 1816 + * For some protocols, the packet checksum covers data which is not present 1817 + * in the packet, or is at least not contiguous to the main data payload. 1818 + * For instance, the TCP checksum includes a "pseudo-header" which includes 1819 + * the source and destination IP addresses of the packet. To accommodate 1820 + * this, the checksum engine may be "seeded" with an initial value, which 1821 + * the application would need to compute based on the specific protocol's 1822 + * requirements. Note that the seed is given in host byte order (little- 1823 + * endian), not network byte order (big-endian); code written to compute a 1824 + * pseudo-header checksum in network byte order will need to byte-swap it 1825 + * before use as the seed. 1826 + * 1827 + * Note that the checksum is computed as part of the transmission process, 1828 + * so it will not be present in the packet upon completion of this routine. 1829 + * 1830 + * @param[in,out] mmd Pointer to packet's minimal metadata. 1831 + * @param[in] pkt Packet on which to operate. 1832 + * @param[in] start Offset within L2 packet of the first byte to include in 1833 + * the checksum. 1834 + * @param[in] length Number of bytes to include in the checksum. 1835 + * the checksum. 1836 + * @param[in] location Offset within L2 packet of the first of the two bytes 1837 + * to be replaced with the calculated checksum. 1838 + * @param[in] seed Initial value of the running checksum before any of the 1839 + * packet data is added. 1840 + */ 1841 + static __inline void 1842 + NETIO_PKT_DO_EGRESS_CSUM_MM(netio_pkt_minimal_metadata_t* mmd, 1843 + netio_pkt_t* pkt, int start, int length, 1844 + int location, uint16_t seed) 1845 + { 1846 + mmd->csum_start = start; 1847 + mmd->csum_length = length; 1848 + mmd->csum_location = location; 1849 + mmd->csum_seed = seed; 1850 + mmd->flags |= _NETIO_PKT_NEED_EDMA_CSUM_MASK; 1851 + } 1852 + 1853 + 1854 + /** Set up an egress packet for hardware checksum computation. 1855 + * @ingroup egress 1856 + * 1857 + * NetIO provides the ability to automatically calculate a standard 1858 + * 16-bit Internet checksum on transmitted packets. The application 1859 + * may specify the point in the packet where the checksum starts, the 1860 + * number of bytes to be checksummed, and the two bytes in the packet 1861 + * which will be replaced with the completed checksum. (If the range 1862 + * of bytes to be checksummed includes the bytes to be replaced, the 1863 + * initial values of those bytes will be included in the checksum.) 1864 + * 1865 + * For some protocols, the packet checksum covers data which is not present 1866 + * in the packet, or is at least not contiguous to the main data payload. 1867 + * For instance, the TCP checksum includes a "pseudo-header" which includes 1868 + * the source and destination IP addresses of the packet. To accommodate 1869 + * this, the checksum engine may be "seeded" with an initial value, which 1870 + * the application would need to compute based on the specific protocol's 1871 + * requirements. Note that the seed is given in host byte order (little- 1872 + * endian), not network byte order (big-endian); code written to compute a 1873 + * pseudo-header checksum in network byte order will need to byte-swap it 1874 + * before use as the seed. 1875 + * 1876 + * Note that the checksum is computed as part of the transmission process, 1877 + * so it will not be present in the packet upon completion of this routine. 1878 + * 1879 + * @param[in,out] pkt Packet on which to operate. 1880 + * @param[in] start Offset within L2 packet of the first byte to include in 1881 + * the checksum. 1882 + * @param[in] length Number of bytes to include in the checksum. 1883 + * the checksum. 1884 + * @param[in] location Offset within L2 packet of the first of the two bytes 1885 + * to be replaced with the calculated checksum. 1886 + * @param[in] seed Initial value of the running checksum before any of the 1887 + * packet data is added. 1888 + */ 1889 + static __inline void 1890 + NETIO_PKT_DO_EGRESS_CSUM(netio_pkt_t* pkt, int start, int length, 1891 + int location, uint16_t seed) 1892 + { 1893 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1894 + 1895 + NETIO_PKT_DO_EGRESS_CSUM_MM(mmd, pkt, start, length, location, seed); 1896 + } 1897 + 1898 + 1899 + /** Return the number of bytes which could be prepended to a packet, using a 1900 + * metadata pointer to speed the operation. 1901 + * See @ref netio_populate_prepend_buffer() to get a full description of 1902 + * prepending. 1903 + * 1904 + * @param[in,out] mda Pointer to packet's standard metadata. 1905 + * @param[in] pkt Packet on which to operate. 1906 + */ 1907 + static __inline int 1908 + NETIO_PKT_PREPEND_AVAIL_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 1909 + { 1910 + return (pkt->__packet.bits.__offset << 6) + 1911 + NETIO_PKT_CUSTOM_HEADER_LENGTH_M(mda, pkt); 1912 + } 1913 + 1914 + 1915 + /** Return the number of bytes which could be prepended to a packet, using a 1916 + * metadata pointer to speed the operation. 1917 + * See @ref netio_populate_prepend_buffer() to get a full description of 1918 + * prepending. 1919 + * @ingroup egress 1920 + * 1921 + * @param[in,out] mmd Pointer to packet's minimal metadata. 1922 + * @param[in] pkt Packet on which to operate. 1923 + */ 1924 + static __inline int 1925 + NETIO_PKT_PREPEND_AVAIL_MM(netio_pkt_minimal_metadata_t* mmd, netio_pkt_t* pkt) 1926 + { 1927 + return (pkt->__packet.bits.__offset << 6) + mmd->l2_offset; 1928 + } 1929 + 1930 + 1931 + /** Return the number of bytes which could be prepended to a packet. 1932 + * See @ref netio_populate_prepend_buffer() to get a full description of 1933 + * prepending. 1934 + * @ingroup egress 1935 + * 1936 + * @param[in] pkt Packet on which to operate. 1937 + */ 1938 + static __inline int 1939 + NETIO_PKT_PREPEND_AVAIL(netio_pkt_t* pkt) 1940 + { 1941 + if (NETIO_PKT_IS_MINIMAL(pkt)) 1942 + { 1943 + netio_pkt_minimal_metadata_t* mmd = NETIO_PKT_MINIMAL_METADATA(pkt); 1944 + 1945 + return NETIO_PKT_PREPEND_AVAIL_MM(mmd, pkt); 1946 + } 1947 + else 1948 + { 1949 + netio_pkt_metadata_t* mda = NETIO_PKT_METADATA(pkt); 1950 + 1951 + return NETIO_PKT_PREPEND_AVAIL_M(mda, pkt); 1952 + } 1953 + } 1954 + 1955 + 1956 + /** Flush a packet's minimal metadata from the cache, using a metadata pointer 1957 + * to speed the operation. 1958 + * @ingroup egress 1959 + * 1960 + * @param[in] mmd Pointer to packet's minimal metadata. 1961 + * @param[in] pkt Packet on which to operate. 1962 + */ 1963 + static __inline void 1964 + NETIO_PKT_FLUSH_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, 1965 + netio_pkt_t* pkt) 1966 + { 1967 + } 1968 + 1969 + 1970 + /** Invalidate a packet's minimal metadata from the cache, using a metadata 1971 + * pointer to speed the operation. 1972 + * @ingroup egress 1973 + * 1974 + * @param[in] mmd Pointer to packet's minimal metadata. 1975 + * @param[in] pkt Packet on which to operate. 1976 + */ 1977 + static __inline void 1978 + NETIO_PKT_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, 1979 + netio_pkt_t* pkt) 1980 + { 1981 + } 1982 + 1983 + 1984 + /** Flush and then invalidate a packet's minimal metadata from the cache, 1985 + * using a metadata pointer to speed the operation. 1986 + * @ingroup egress 1987 + * 1988 + * @param[in] mmd Pointer to packet's minimal metadata. 1989 + * @param[in] pkt Packet on which to operate. 1990 + */ 1991 + static __inline void 1992 + NETIO_PKT_FLUSH_INV_MINIMAL_METADATA_MM(netio_pkt_minimal_metadata_t* mmd, 1993 + netio_pkt_t* pkt) 1994 + { 1995 + } 1996 + 1997 + 1998 + /** Flush a packet's metadata from the cache, using a metadata pointer 1999 + * to speed the operation. 2000 + * @ingroup ingress 2001 + * 2002 + * @param[in] mda Pointer to packet's minimal metadata. 2003 + * @param[in] pkt Packet on which to operate. 2004 + */ 2005 + static __inline void 2006 + NETIO_PKT_FLUSH_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 2007 + { 2008 + } 2009 + 2010 + 2011 + /** Invalidate a packet's metadata from the cache, using a metadata 2012 + * pointer to speed the operation. 2013 + * @ingroup ingress 2014 + * 2015 + * @param[in] mda Pointer to packet's metadata. 2016 + * @param[in] pkt Packet on which to operate. 2017 + */ 2018 + static __inline void 2019 + NETIO_PKT_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 2020 + { 2021 + } 2022 + 2023 + 2024 + /** Flush and then invalidate a packet's metadata from the cache, 2025 + * using a metadata pointer to speed the operation. 2026 + * @ingroup ingress 2027 + * 2028 + * @param[in] mda Pointer to packet's metadata. 2029 + * @param[in] pkt Packet on which to operate. 2030 + */ 2031 + static __inline void 2032 + NETIO_PKT_FLUSH_INV_METADATA_M(netio_pkt_metadata_t* mda, netio_pkt_t* pkt) 2033 + { 2034 + } 2035 + 2036 + 2037 + /** Flush a packet's minimal metadata from the cache. 2038 + * @ingroup egress 2039 + * 2040 + * @param[in] pkt Packet on which to operate. 2041 + */ 2042 + static __inline void 2043 + NETIO_PKT_FLUSH_MINIMAL_METADATA(netio_pkt_t* pkt) 2044 + { 2045 + } 2046 + 2047 + 2048 + /** Invalidate a packet's minimal metadata from the cache. 2049 + * @ingroup egress 2050 + * 2051 + * @param[in] pkt Packet on which to operate. 2052 + */ 2053 + static __inline void 2054 + NETIO_PKT_INV_MINIMAL_METADATA(netio_pkt_t* pkt) 2055 + { 2056 + } 2057 + 2058 + 2059 + /** Flush and then invalidate a packet's minimal metadata from the cache. 2060 + * @ingroup egress 2061 + * 2062 + * @param[in] pkt Packet on which to operate. 2063 + */ 2064 + static __inline void 2065 + NETIO_PKT_FLUSH_INV_MINIMAL_METADATA(netio_pkt_t* pkt) 2066 + { 2067 + } 2068 + 2069 + 2070 + /** Flush a packet's metadata from the cache. 2071 + * @ingroup ingress 2072 + * 2073 + * @param[in] pkt Packet on which to operate. 2074 + */ 2075 + static __inline void 2076 + NETIO_PKT_FLUSH_METADATA(netio_pkt_t* pkt) 2077 + { 2078 + } 2079 + 2080 + 2081 + /** Invalidate a packet's metadata from the cache. 2082 + * @ingroup ingress 2083 + * 2084 + * @param[in] pkt Packet on which to operate. 2085 + */ 2086 + static __inline void 2087 + NETIO_PKT_INV_METADATA(netio_pkt_t* pkt) 2088 + { 2089 + } 2090 + 2091 + 2092 + /** Flush and then invalidate a packet's metadata from the cache. 2093 + * @ingroup ingress 2094 + * 2095 + * @param[in] pkt Packet on which to operate. 2096 + */ 2097 + static __inline void 2098 + NETIO_PKT_FLUSH_INV_METADATA(netio_pkt_t* pkt) 2099 + { 2100 + } 2101 + 2102 + /** Number of NUMA nodes we can distribute buffers to. 2103 + * @ingroup setup */ 2104 + #define NETIO_NUM_NODE_WEIGHTS 16 2105 + 2106 + /** 2107 + * @brief An object for specifying the characteristics of NetIO communication 2108 + * endpoint. 2109 + * 2110 + * @ingroup setup 2111 + * 2112 + * The @ref netio_input_register() function uses this structure to define 2113 + * how an application tile will communicate with an IPP. 2114 + * 2115 + * 2116 + * Future updates to NetIO may add new members to this structure, 2117 + * which can affect the success of the registration operation. Thus, 2118 + * if dynamically initializing the structure, applications are urged to 2119 + * zero it out first, for example: 2120 + * 2121 + * @code 2122 + * netio_input_config_t config; 2123 + * memset(&config, 0, sizeof (config)); 2124 + * config.flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE; 2125 + * config.num_receive_packets = NETIO_MAX_RECEIVE_PKTS; 2126 + * config.queue_id = 0; 2127 + * . 2128 + * . 2129 + * . 2130 + * @endcode 2131 + * 2132 + * since that guarantees that any unused structure members, including 2133 + * members which did not exist when the application was first developed, 2134 + * will not have unexpected values. 2135 + * 2136 + * If statically initializing the structure, we strongly recommend use of 2137 + * C99-style named initializers, for example: 2138 + * 2139 + * @code 2140 + * netio_input_config_t config = { 2141 + * .flags = NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, 2142 + * .num_receive_packets = NETIO_MAX_RECEIVE_PKTS, 2143 + * .queue_id = 0, 2144 + * }, 2145 + * @endcode 2146 + * 2147 + * instead of the old-style structure initialization: 2148 + * 2149 + * @code 2150 + * // Bad example! Currently equivalent to the above, but don't do this. 2151 + * netio_input_config_t config = { 2152 + * NETIO_RECV | NETIO_XMIT_CSUM | NETIO_TAG_NONE, NETIO_MAX_RECEIVE_PKTS, 0 2153 + * }, 2154 + * @endcode 2155 + * 2156 + * since the C99 style requires no changes to the code if elements of the 2157 + * config structure are rearranged. (It also makes the initialization much 2158 + * easier to understand.) 2159 + * 2160 + * Except for items which address a particular tile's transmit or receive 2161 + * characteristics, such as the ::NETIO_RECV flag, applications are advised 2162 + * to specify the same set of configuration data on all registrations. 2163 + * This prevents differing results if multiple tiles happen to do their 2164 + * registration operations in a different order on different invocations of 2165 + * the application. This is particularly important for things like link 2166 + * management flags, and buffer size and homing specifications. 2167 + * 2168 + * Unless the ::NETIO_FIXED_BUFFER_VA flag is specified in flags, the NetIO 2169 + * buffer pool is automatically created and mapped into the application's 2170 + * virtual address space at an address chosen by the operating system, 2171 + * using the common memory (cmem) facility in the Tilera Multicore 2172 + * Components library. The cmem facility allows multiple processes to gain 2173 + * access to shared memory which is mapped into each process at an 2174 + * identical virtual address. In order for this to work, the processes 2175 + * must have a common ancestor, which must create the common memory using 2176 + * tmc_cmem_init(). 2177 + * 2178 + * In programs using the iLib process creation API, or in programs which use 2179 + * only one process (which include programs using the pthreads library), 2180 + * tmc_cmem_init() is called automatically. All other applications 2181 + * must call it explicitly, before any child processes which might call 2182 + * netio_input_register() are created. 2183 + */ 2184 + typedef struct 2185 + { 2186 + /** Registration characteristics. 2187 + 2188 + This value determines several characteristics of the registration; 2189 + flags for different types of behavior are ORed together to make the 2190 + final flag value. Generally applications should specify exactly 2191 + one flag from each of the following categories: 2192 + 2193 + - Whether the application will be receiving packets on this queue 2194 + (::NETIO_RECV or ::NETIO_NO_RECV). 2195 + 2196 + - Whether the application will be transmitting packets on this queue, 2197 + and if so, whether it will request egress checksum calculation 2198 + (::NETIO_XMIT, ::NETIO_XMIT_CSUM, or ::NETIO_NO_XMIT). It is 2199 + legal to call netio_get_buffer() without one of the XMIT flags, 2200 + as long as ::NETIO_RECV is specified; in this case, the retrieved 2201 + buffers must be passed to another tile for transmission. 2202 + 2203 + - Whether the application expects any vendor-specific tags in 2204 + its packets' L2 headers (::NETIO_TAG_NONE, ::NETIO_TAG_BRCM, 2205 + or ::NETIO_TAG_MRVL). This must match the configuration of the 2206 + target IPP. 2207 + 2208 + To accommodate applications written to previous versions of the NetIO 2209 + interface, none of the flags above are currently required; if omitted, 2210 + NetIO behaves more or less as if ::NETIO_RECV | ::NETIO_XMIT_CSUM | 2211 + ::NETIO_TAG_NONE were used. However, explicit specification of 2212 + the relevant flags allows NetIO to do a better job of resource 2213 + allocation, allows earlier detection of certain configuration errors, 2214 + and may enable advanced features or higher performance in the future, 2215 + so their use is strongly recommended. 2216 + 2217 + Note that specifying ::NETIO_NO_RECV along with ::NETIO_NO_XMIT 2218 + is a special case, intended primarily for use by programs which 2219 + retrieve network statistics or do link management operations. 2220 + When these flags are both specified, the resulting queue may not 2221 + be used with NetIO routines other than netio_get(), netio_set(), 2222 + and netio_input_unregister(). See @ref link for more information 2223 + on link management. 2224 + 2225 + Other flags are optional; their use is described below. 2226 + */ 2227 + int flags; 2228 + 2229 + /** Interface name. This is a string which identifies the specific 2230 + Ethernet controller hardware to be used. The format of the string 2231 + is a device type and a device index, separated by a slash; so, 2232 + the first 10 Gigabit Ethernet controller is named "xgbe/0", while 2233 + the second 10/100/1000 Megabit Ethernet controller is named "gbe/1". 2234 + */ 2235 + const char* interface; 2236 + 2237 + /** Receive packet queue size. This specifies the maximum number 2238 + of ingress packets that can be received on this queue without 2239 + being retrieved by @ref netio_get_packet(). If the IPP's distribution 2240 + algorithm calls for a packet to be sent to this queue, and this 2241 + number of packets are already pending there, the new packet 2242 + will either be discarded, or sent to another tile registered 2243 + for the same queue_id (see @ref drops). This value must 2244 + be at least ::NETIO_MIN_RECEIVE_PKTS, can always be at least 2245 + ::NETIO_MAX_RECEIVE_PKTS, and may be larger than that on certain 2246 + interfaces. 2247 + */ 2248 + int num_receive_packets; 2249 + 2250 + /** The queue ID being requested. Legal values for this range from 0 2251 + to ::NETIO_MAX_QUEUE_ID, inclusive. ::NETIO_MAX_QUEUE_ID is always 2252 + greater than or equal to the number of tiles; this allows one queue 2253 + for each tile, plus at least one additional queue. Some applications 2254 + may wish to use the additional queue as a destination for unwanted 2255 + packets, since packets delivered to queues for which no tiles have 2256 + registered are discarded. 2257 + */ 2258 + unsigned int queue_id; 2259 + 2260 + /** Maximum number of small send buffers to be held in the local empty 2261 + buffer cache. This specifies the size of the area which holds 2262 + empty small egress buffers requested from the IPP but not yet 2263 + retrieved via @ref netio_get_buffer(). This value must be greater 2264 + than zero if the application will ever use @ref netio_get_buffer() 2265 + to allocate empty small egress buffers; it may be no larger than 2266 + ::NETIO_MAX_SEND_BUFFERS. See @ref epp for more details on empty 2267 + buffer caching. 2268 + */ 2269 + int num_send_buffers_small_total; 2270 + 2271 + /** Number of small send buffers to be preallocated at registration. 2272 + If this value is nonzero, the specified number of empty small egress 2273 + buffers will be requested from the IPP during the netio_input_register 2274 + operation; this may speed the execution of @ref netio_get_buffer(). 2275 + This may be no larger than @ref num_send_buffers_small_total. See @ref 2276 + epp for more details on empty buffer caching. 2277 + */ 2278 + int num_send_buffers_small_prealloc; 2279 + 2280 + /** Maximum number of large send buffers to be held in the local empty 2281 + buffer cache. This specifies the size of the area which holds empty 2282 + large egress buffers requested from the IPP but not yet retrieved via 2283 + @ref netio_get_buffer(). This value must be greater than zero if the 2284 + application will ever use @ref netio_get_buffer() to allocate empty 2285 + large egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. 2286 + See @ref epp for more details on empty buffer caching. 2287 + */ 2288 + int num_send_buffers_large_total; 2289 + 2290 + /** Number of large send buffers to be preallocated at registration. 2291 + If this value is nonzero, the specified number of empty large egress 2292 + buffers will be requested from the IPP during the netio_input_register 2293 + operation; this may speed the execution of @ref netio_get_buffer(). 2294 + This may be no larger than @ref num_send_buffers_large_total. See @ref 2295 + epp for more details on empty buffer caching. 2296 + */ 2297 + int num_send_buffers_large_prealloc; 2298 + 2299 + /** Maximum number of jumbo send buffers to be held in the local empty 2300 + buffer cache. This specifies the size of the area which holds empty 2301 + jumbo egress buffers requested from the IPP but not yet retrieved via 2302 + @ref netio_get_buffer(). This value must be greater than zero if the 2303 + application will ever use @ref netio_get_buffer() to allocate empty 2304 + jumbo egress buffers; it may be no larger than ::NETIO_MAX_SEND_BUFFERS. 2305 + See @ref epp for more details on empty buffer caching. 2306 + */ 2307 + int num_send_buffers_jumbo_total; 2308 + 2309 + /** Number of jumbo send buffers to be preallocated at registration. 2310 + If this value is nonzero, the specified number of empty jumbo egress 2311 + buffers will be requested from the IPP during the netio_input_register 2312 + operation; this may speed the execution of @ref netio_get_buffer(). 2313 + This may be no larger than @ref num_send_buffers_jumbo_total. See @ref 2314 + epp for more details on empty buffer caching. 2315 + */ 2316 + int num_send_buffers_jumbo_prealloc; 2317 + 2318 + /** Total packet buffer size. This determines the total size, in bytes, 2319 + of the NetIO buffer pool. Note that the maximum number of available 2320 + buffers of each size is determined during hypervisor configuration 2321 + (see the System Programmer's Guide for details); this just 2322 + influences how much host memory is allocated for those buffers. 2323 + 2324 + The buffer pool is allocated from common memory, which will be 2325 + automatically initialized if needed. If your buffer pool is larger 2326 + than 240 MB, you might need to explicitly call @c tmc_cmem_init(), 2327 + as described in the Application Libraries Reference Manual (UG227). 2328 + 2329 + Packet buffers are currently allocated in chunks of 16 MB; this 2330 + value will be rounded up to the next larger multiple of 16 MB. 2331 + If this value is zero, a default of 32 MB will be used; this was 2332 + the value used by previous versions of NetIO. Note that taking this 2333 + default also affects the placement of buffers on Linux NUMA nodes. 2334 + See @ref buffer_node_weights for an explanation of buffer placement. 2335 + 2336 + In order to successfully allocate packet buffers, Linux must have 2337 + available huge pages on the relevant Linux NUMA nodes. See the 2338 + System Programmer's Guide for information on configuring 2339 + huge page support in Linux. 2340 + */ 2341 + uint64_t total_buffer_size; 2342 + 2343 + /** Buffer placement weighting factors. 2344 + 2345 + This array specifies the relative amount of buffering to place 2346 + on each of the available Linux NUMA nodes. This array is 2347 + indexed by the NUMA node, and the values in the array are 2348 + proportional to the amount of buffer space to allocate on that 2349 + node. 2350 + 2351 + If memory striping is enabled in the Hypervisor, then there is 2352 + only one logical NUMA node (node 0). In that case, NetIO will by 2353 + default ignore the suggested buffer node weights, and buffers 2354 + will be striped across the physical memory controllers. See 2355 + UG209 System Programmer's Guide for a description of the 2356 + hypervisor option that controls memory striping. 2357 + 2358 + If memory striping is disabled, then there are up to four NUMA 2359 + nodes, corresponding to the four DDRAM controllers in the TILE 2360 + processor architecture. See UG100 Tile Processor Architecture 2361 + Overview for a diagram showing the location of each of the DDRAM 2362 + controllers relative to the tile array. 2363 + 2364 + For instance, if memory striping is disabled, the following 2365 + configuration strucure: 2366 + 2367 + @code 2368 + netio_input_config_t config = { 2369 + . 2370 + . 2371 + . 2372 + .total_buffer_size = 4 * 16 * 1024 * 1024; 2373 + .buffer_node_weights = { 1, 0, 1, 0 }, 2374 + }, 2375 + @endcode 2376 + 2377 + would result in 32 MB of buffers being placed on controller 0, and 2378 + 32 MB on controller 2. (Since buffers are allocated in units of 2379 + 16 MB, some sets of weights will not be able to be matched exactly.) 2380 + 2381 + For the weights to be effective, @ref total_buffer_size must be 2382 + nonzero. If @ref total_buffer_size is zero, causing the default 2383 + 32 MB of buffer space to be used, then any specified weights will 2384 + be ignored, and buffers will positioned as they were in previous 2385 + versions of NetIO: 2386 + 2387 + - For xgbe/0 and gbe/0, 16 MB of buffers will be placed on controller 1, 2388 + and the other 16 MB will be placed on controller 2. 2389 + 2390 + - For xgbe/1 and gbe/1, 16 MB of buffers will be placed on controller 2, 2391 + and the other 16 MB will be placed on controller 3. 2392 + 2393 + If @ref total_buffer_size is nonzero, but all weights are zero, 2394 + then all buffer space will be allocated on Linux NUMA node zero. 2395 + 2396 + By default, the specified buffer placement is treated as a hint; 2397 + if sufficient free memory is not available on the specified 2398 + controllers, the buffers will be allocated elsewhere. However, 2399 + if the ::NETIO_STRICT_HOMING flag is specified in @ref flags, then a 2400 + failure to allocate buffer space exactly as requested will cause the 2401 + registration operation to fail with an error of ::NETIO_CANNOT_HOME. 2402 + 2403 + Note that maximal network performance cannot be achieved with 2404 + only one memory controller. 2405 + */ 2406 + uint8_t buffer_node_weights[NETIO_NUM_NODE_WEIGHTS]; 2407 + 2408 + /** Fixed virtual address for packet buffers. Only valid when 2409 + ::NETIO_FIXED_BUFFER_VA is specified in @ref flags; see the 2410 + description of that flag for details. 2411 + */ 2412 + void* fixed_buffer_va; 2413 + 2414 + /** 2415 + Maximum number of outstanding send packet requests. This value is 2416 + only relevant when an EPP is in use; it determines the number of 2417 + slots in the EPP's outgoing packet queue which this tile is allowed 2418 + to consume, and thus the number of packets which may be sent before 2419 + the sending tile must wait for an acknowledgment from the EPP. 2420 + Modifying this value is generally only helpful when using @ref 2421 + netio_send_packet_vector(), where it can help improve performance by 2422 + allowing a single vector send operation to process more packets. 2423 + Typically it is not specified, and the default, which divides the 2424 + outgoing packet slots evenly between all tiles on the chip, is used. 2425 + 2426 + If a registration asks for more outgoing packet queue slots than are 2427 + available, ::NETIO_TOOMANY_XMIT will be returned. The total number 2428 + of packet queue slots which are available for all tiles for each EPP 2429 + is subject to change, but is currently ::NETIO_TOTAL_SENDS_OUTSTANDING. 2430 + 2431 + 2432 + This value is ignored if ::NETIO_XMIT is not specified in flags. 2433 + If you want to specify a large value here for a specific tile, you are 2434 + advised to specify NETIO_NO_XMIT on other, non-transmitting tiles so 2435 + that they do not consume a default number of packet slots. Any tile 2436 + transmitting is required to have at least ::NETIO_MIN_SENDS_OUTSTANDING 2437 + slots allocated to it; values less than that will be silently 2438 + increased by the NetIO library. 2439 + */ 2440 + int num_sends_outstanding; 2441 + } 2442 + netio_input_config_t; 2443 + 2444 + 2445 + /** Registration flags; used in the @ref netio_input_config_t structure. 2446 + * @addtogroup setup 2447 + */ 2448 + /** @{ */ 2449 + 2450 + /** Fail a registration request if we can't put packet buffers 2451 + on the specified memory controllers. */ 2452 + #define NETIO_STRICT_HOMING 0x00000002 2453 + 2454 + /** This application expects no tags on its L2 headers. */ 2455 + #define NETIO_TAG_NONE 0x00000004 2456 + 2457 + /** This application expects Marvell extended tags on its L2 headers. */ 2458 + #define NETIO_TAG_MRVL 0x00000008 2459 + 2460 + /** This application expects Broadcom tags on its L2 headers. */ 2461 + #define NETIO_TAG_BRCM 0x00000010 2462 + 2463 + /** This registration may call routines which receive packets. */ 2464 + #define NETIO_RECV 0x00000020 2465 + 2466 + /** This registration may not call routines which receive packets. */ 2467 + #define NETIO_NO_RECV 0x00000040 2468 + 2469 + /** This registration may call routines which transmit packets. */ 2470 + #define NETIO_XMIT 0x00000080 2471 + 2472 + /** This registration may call routines which transmit packets with 2473 + checksum acceleration. */ 2474 + #define NETIO_XMIT_CSUM 0x00000100 2475 + 2476 + /** This registration may not call routines which transmit packets. */ 2477 + #define NETIO_NO_XMIT 0x00000200 2478 + 2479 + /** This registration wants NetIO buffers mapped at an application-specified 2480 + virtual address. 2481 + 2482 + NetIO buffers are by default created by the TMC common memory facility, 2483 + which must be configured by a common ancestor of all processes sharing 2484 + a network interface. When this flag is specified, NetIO buffers are 2485 + instead mapped at an address chosen by the application (and specified 2486 + in @ref netio_input_config_t::fixed_buffer_va). This allows multiple 2487 + unrelated but cooperating processes to share a NetIO interface. 2488 + All processes sharing the same interface must specify this flag, 2489 + and all must specify the same fixed virtual address. 2490 + 2491 + @ref netio_input_config_t::fixed_buffer_va must be a 2492 + multiple of 16 MB, and the packet buffers will occupy @ref 2493 + netio_input_config_t::total_buffer_size bytes of virtual address 2494 + space, beginning at that address. If any of those virtual addresses 2495 + are currently occupied by other memory objects, like application or 2496 + shared library code or data, @ref netio_input_register() will return 2497 + ::NETIO_FAULT. While it is impossible to provide a fixed_buffer_va 2498 + which will work for all applications, a good first guess might be to 2499 + use 0xb0000000 minus @ref netio_input_config_t::total_buffer_size. 2500 + If that fails, it might be helpful to consult the running application's 2501 + virtual address description file (/proc/pid/maps) to see 2502 + which regions of virtual address space are available. 2503 + */ 2504 + #define NETIO_FIXED_BUFFER_VA 0x00000400 2505 + 2506 + /** This registration call will not complete unless the network link 2507 + is up. The process will wait several seconds for this to happen (the 2508 + precise interval is link-dependent), but if the link does not come up, 2509 + ::NETIO_LINK_DOWN will be returned. This flag is the default if 2510 + ::NETIO_NOREQUIRE_LINK_UP is not specified. Note that this flag by 2511 + itself does not request that the link be brought up; that can be done 2512 + with the ::NETIO_AUTO_LINK_UPDN or ::NETIO_AUTO_LINK_UP flags (the 2513 + latter is the default if no NETIO_AUTO_LINK_xxx flags are specified), 2514 + or by explicitly setting the link's desired state via netio_set(). 2515 + If the link is not brought up by one of those methods, and this flag 2516 + is specified, the registration operation will return ::NETIO_LINK_DOWN. 2517 + This flag is ignored if it is specified along with ::NETIO_NO_XMIT and 2518 + ::NETIO_NO_RECV. See @ref link for more information on link 2519 + management. 2520 + */ 2521 + #define NETIO_REQUIRE_LINK_UP 0x00000800 2522 + 2523 + /** This registration call will complete even if the network link is not up. 2524 + Whenever the link is not up, packets will not be sent or received: 2525 + netio_get_packet() will return ::NETIO_NOPKT once all queued packets 2526 + have been drained, and netio_send_packet() and similar routines will 2527 + return NETIO_QUEUE_FULL once the outgoing packet queue in the EPP 2528 + or the I/O shim is full. See @ref link for more information on link 2529 + management. 2530 + */ 2531 + #define NETIO_NOREQUIRE_LINK_UP 0x00001000 2532 + 2533 + #ifndef __DOXYGEN__ 2534 + /* 2535 + * These are part of the implementation of the NETIO_AUTO_LINK_xxx flags, 2536 + * but should not be used directly by applications, and are thus not 2537 + * documented. 2538 + */ 2539 + #define _NETIO_AUTO_UP 0x00002000 2540 + #define _NETIO_AUTO_DN 0x00004000 2541 + #define _NETIO_AUTO_PRESENT 0x00008000 2542 + #endif 2543 + 2544 + /** Set the desired state of the link to up, allowing any speeds which are 2545 + supported by the link hardware, as part of this registration operation. 2546 + Do not take down the link automatically. This is the default if 2547 + no other NETIO_AUTO_LINK_xxx flags are specified. This flag is ignored 2548 + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. 2549 + See @ref link for more information on link management. 2550 + */ 2551 + #define NETIO_AUTO_LINK_UP (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP) 2552 + 2553 + /** Set the desired state of the link to up, allowing any speeds which are 2554 + supported by the link hardware, as part of this registration operation. 2555 + Set the desired state of the link to down the next time no tiles are 2556 + registered for packet reception or transmission. This flag is ignored 2557 + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. 2558 + See @ref link for more information on link management. 2559 + */ 2560 + #define NETIO_AUTO_LINK_UPDN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_UP | \ 2561 + _NETIO_AUTO_DN) 2562 + 2563 + /** Set the desired state of the link to down the next time no tiles are 2564 + registered for packet reception or transmission. This flag is ignored 2565 + if it is specified along with ::NETIO_NO_XMIT and ::NETIO_NO_RECV. 2566 + See @ref link for more information on link management. 2567 + */ 2568 + #define NETIO_AUTO_LINK_DN (_NETIO_AUTO_PRESENT | _NETIO_AUTO_DN) 2569 + 2570 + /** Do not bring up the link automatically as part of this registration 2571 + operation. Do not take down the link automatically. This flag 2572 + is ignored if it is specified along with ::NETIO_NO_XMIT and 2573 + ::NETIO_NO_RECV. See @ref link for more information on link management. 2574 + */ 2575 + #define NETIO_AUTO_LINK_NONE _NETIO_AUTO_PRESENT 2576 + 2577 + 2578 + /** Minimum number of receive packets. */ 2579 + #define NETIO_MIN_RECEIVE_PKTS 16 2580 + 2581 + /** Lower bound on the maximum number of receive packets; may be higher 2582 + than this on some interfaces. */ 2583 + #define NETIO_MAX_RECEIVE_PKTS 128 2584 + 2585 + /** Maximum number of send buffers, per packet size. */ 2586 + #define NETIO_MAX_SEND_BUFFERS 16 2587 + 2588 + /** Number of EPP queue slots, and thus outstanding sends, per EPP. */ 2589 + #define NETIO_TOTAL_SENDS_OUTSTANDING 2015 2590 + 2591 + /** Minimum number of EPP queue slots, and thus outstanding sends, per 2592 + * transmitting tile. */ 2593 + #define NETIO_MIN_SENDS_OUTSTANDING 16 2594 + 2595 + 2596 + /**@}*/ 2597 + 2598 + #ifndef __DOXYGEN__ 2599 + 2600 + /** 2601 + * An object for providing Ethernet packets to a process. 2602 + */ 2603 + struct __netio_queue_impl_t; 2604 + 2605 + /** 2606 + * An object for managing the user end of a NetIO queue. 2607 + */ 2608 + struct __netio_queue_user_impl_t; 2609 + 2610 + #endif /* !__DOXYGEN__ */ 2611 + 2612 + 2613 + /** A netio_queue_t describes a NetIO communications endpoint. 2614 + * @ingroup setup 2615 + */ 2616 + typedef struct 2617 + { 2618 + #ifdef __DOXYGEN__ 2619 + uint8_t opaque[8]; /**< This is an opaque structure. */ 2620 + #else 2621 + struct __netio_queue_impl_t* __system_part; /**< The system part. */ 2622 + struct __netio_queue_user_impl_t* __user_part; /**< The user part. */ 2623 + #ifdef _NETIO_PTHREAD 2624 + _netio_percpu_mutex_t lock; /**< Queue lock. */ 2625 + #endif 2626 + #endif 2627 + } 2628 + netio_queue_t; 2629 + 2630 + 2631 + /** 2632 + * @brief Packet send context. 2633 + * 2634 + * @ingroup egress 2635 + * 2636 + * Packet send context for use with netio_send_packet_prepare and _commit. 2637 + */ 2638 + typedef struct 2639 + { 2640 + #ifdef __DOXYGEN__ 2641 + uint8_t opaque[44]; /**< This is an opaque structure. */ 2642 + #else 2643 + uint8_t flags; /**< Defined below */ 2644 + uint8_t datalen; /**< Number of valid words pointed to by data. */ 2645 + uint32_t request[9]; /**< Request to be sent to the EPP or shim. Note 2646 + that this is smaller than the 11-word maximum 2647 + request size, since some constant values are 2648 + not saved in the context. */ 2649 + uint32_t *data; /**< Data to be sent to the EPP or shim via IDN. */ 2650 + #endif 2651 + } 2652 + netio_send_pkt_context_t; 2653 + 2654 + 2655 + #ifndef __DOXYGEN__ 2656 + #define SEND_PKT_CTX_USE_EPP 1 /**< We're sending to an EPP. */ 2657 + #define SEND_PKT_CTX_SEND_CSUM 2 /**< Request includes a checksum. */ 2658 + #endif 2659 + 2660 + /** 2661 + * @brief Packet vector entry. 2662 + * 2663 + * @ingroup egress 2664 + * 2665 + * This data structure is used with netio_send_packet_vector() to send multiple 2666 + * packets with one NetIO call. The structure should be initialized by 2667 + * calling netio_pkt_vector_set(), rather than by setting the fields 2668 + * directly. 2669 + * 2670 + * This structure is guaranteed to be a power of two in size, no 2671 + * bigger than one L2 cache line, and to be aligned modulo its size. 2672 + */ 2673 + typedef struct 2674 + #ifndef __DOXYGEN__ 2675 + __attribute__((aligned(8))) 2676 + #endif 2677 + { 2678 + /** Reserved for use by the user application. When initialized with 2679 + * the netio_set_pkt_vector_entry() function, this field is guaranteed 2680 + * to be visible to readers only after all other fields are already 2681 + * visible. This way it can be used as a valid flag or generation 2682 + * counter. */ 2683 + uint8_t user_data; 2684 + 2685 + /* Structure members below this point should not be accessed directly by 2686 + * applications, as they may change in the future. */ 2687 + 2688 + /** Low 8 bits of the packet address to send. The high bits are 2689 + * acquired from the 'handle' field. */ 2690 + uint8_t buffer_address_low; 2691 + 2692 + /** Number of bytes to transmit. */ 2693 + uint16_t size; 2694 + 2695 + /** The raw handle from a netio_pkt_t. If this is NETIO_PKT_HANDLE_NONE, 2696 + * this vector entry will be skipped and no packet will be transmitted. */ 2697 + netio_pkt_handle_t handle; 2698 + } 2699 + netio_pkt_vector_entry_t; 2700 + 2701 + 2702 + /** 2703 + * @brief Initialize fields in a packet vector entry. 2704 + * 2705 + * @ingroup egress 2706 + * 2707 + * @param[out] v Pointer to the vector entry to be initialized. 2708 + * @param[in] pkt Packet to be transmitted when the vector entry is passed to 2709 + * netio_send_packet_vector(). Note that the packet's attributes 2710 + * (e.g., its L2 offset and length) are captured at the time this 2711 + * routine is called; subsequent changes in those attributes will not 2712 + * be reflected in the packet which is actually transmitted. 2713 + * Changes in the packet's contents, however, will be so reflected. 2714 + * If this is NULL, no packet will be transmitted. 2715 + * @param[in] user_data User data to be set in the vector entry. 2716 + * This function guarantees that the "user_data" field will become 2717 + * visible to a reader only after all other fields have become visible. 2718 + * This allows a structure in a ring buffer to be written and read 2719 + * by a polling reader without any locks or other synchronization. 2720 + */ 2721 + static __inline void 2722 + netio_pkt_vector_set(volatile netio_pkt_vector_entry_t* v, netio_pkt_t* pkt, 2723 + uint8_t user_data) 2724 + { 2725 + if (pkt) 2726 + { 2727 + if (NETIO_PKT_IS_MINIMAL(pkt)) 2728 + { 2729 + netio_pkt_minimal_metadata_t* mmd = 2730 + (netio_pkt_minimal_metadata_t*) &pkt->__metadata; 2731 + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_MM(mmd, pkt) & 0xFF; 2732 + v->size = NETIO_PKT_L2_LENGTH_MM(mmd, pkt); 2733 + } 2734 + else 2735 + { 2736 + netio_pkt_metadata_t* mda = &pkt->__metadata; 2737 + v->buffer_address_low = (uintptr_t) NETIO_PKT_L2_DATA_M(mda, pkt) & 0xFF; 2738 + v->size = NETIO_PKT_L2_LENGTH_M(mda, pkt); 2739 + } 2740 + v->handle.word = pkt->__packet.word; 2741 + } 2742 + else 2743 + { 2744 + v->handle.word = 0; /* Set handle to NETIO_PKT_HANDLE_NONE. */ 2745 + } 2746 + 2747 + __asm__("" : : : "memory"); 2748 + 2749 + v->user_data = user_data; 2750 + } 2751 + 2752 + 2753 + /** 2754 + * Flags and structures for @ref netio_get() and @ref netio_set(). 2755 + * @ingroup config 2756 + */ 2757 + 2758 + /** @{ */ 2759 + /** Parameter class; addr is a NETIO_PARAM_xxx value. */ 2760 + #define NETIO_PARAM 0 2761 + /** Interface MAC address. This address is only valid with @ref netio_get(). 2762 + * The value is a 6-byte MAC address. Depending upon the overall system 2763 + * design, a MAC address may or may not be available for each interface. */ 2764 + #define NETIO_PARAM_MAC 0 2765 + 2766 + /** Determine whether to suspend output on the receipt of pause frames. 2767 + * If the value is nonzero, the I/O shim will suspend output when a pause 2768 + * frame is received. If the value is zero, pause frames will be ignored. */ 2769 + #define NETIO_PARAM_PAUSE_IN 1 2770 + 2771 + /** Determine whether to send pause frames if the I/O shim packet FIFOs are 2772 + * nearly full. If the value is zero, pause frames are not sent. If 2773 + * the value is nonzero, it is the delay value which will be sent in any 2774 + * pause frames which are output, in units of 512 bit times. */ 2775 + #define NETIO_PARAM_PAUSE_OUT 2 2776 + 2777 + /** Jumbo frame support. The value is a 4-byte integer. If the value is 2778 + * nonzero, the MAC will accept frames of up to 10240 bytes. If the value 2779 + * is zero, the MAC will only accept frames of up to 1544 bytes. */ 2780 + #define NETIO_PARAM_JUMBO 3 2781 + 2782 + /** I/O shim's overflow statistics register. The value is two 16-bit integers. 2783 + * The first 16-bit value (or the low 16 bits, if the value is treated as a 2784 + * 32-bit number) is the count of packets which were completely dropped and 2785 + * not delivered by the shim. The second 16-bit value (or the high 16 bits, 2786 + * if the value is treated as a 32-bit number) is the count of packets 2787 + * which were truncated and thus only partially delivered by the shim. This 2788 + * register is automatically reset to zero after it has been read. 2789 + */ 2790 + #define NETIO_PARAM_OVERFLOW 4 2791 + 2792 + /** IPP statistics. This address is only valid with @ref netio_get(). The 2793 + * value is a netio_stat_t structure. Unlike the I/O shim statistics, the 2794 + * IPP statistics are not all reset to zero on read; see the description 2795 + * of the netio_stat_t for details. */ 2796 + #define NETIO_PARAM_STAT 5 2797 + 2798 + /** Possible link state. The value is a combination of "NETIO_LINK_xxx" 2799 + * flags. With @ref netio_get(), this will indicate which flags are 2800 + * actually supported by the hardware. 2801 + * 2802 + * For historical reasons, specifying this value to netio_set() will have 2803 + * the same behavior as using ::NETIO_PARAM_LINK_CONFIG, but this usage is 2804 + * discouraged. 2805 + */ 2806 + #define NETIO_PARAM_LINK_POSSIBLE_STATE 6 2807 + 2808 + /** Link configuration. The value is a combination of "NETIO_LINK_xxx" flags. 2809 + * With @ref netio_set(), this will attempt to immediately bring up the 2810 + * link using whichever of the requested flags are supported by the 2811 + * hardware, or take down the link if the flags are zero; if this is 2812 + * not possible, an error will be returned. Many programs will want 2813 + * to use ::NETIO_PARAM_LINK_DESIRED_STATE instead. 2814 + * 2815 + * For historical reasons, specifying this value to netio_get() will 2816 + * have the same behavior as using ::NETIO_PARAM_LINK_POSSIBLE_STATE, 2817 + * but this usage is discouraged. 2818 + */ 2819 + #define NETIO_PARAM_LINK_CONFIG NETIO_PARAM_LINK_POSSIBLE_STATE 2820 + 2821 + /** Current link state. This address is only valid with @ref netio_get(). 2822 + * The value is zero or more of the "NETIO_LINK_xxx" flags, ORed together. 2823 + * If the link is down, the value ANDed with NETIO_LINK_SPEED will be 2824 + * zero; if the link is up, the value ANDed with NETIO_LINK_SPEED will 2825 + * result in exactly one of the NETIO_LINK_xxx values, indicating the 2826 + * current speed. */ 2827 + #define NETIO_PARAM_LINK_CURRENT_STATE 7 2828 + 2829 + /** Variant symbol for current state, retained for compatibility with 2830 + * pre-MDE-2.1 programs. */ 2831 + #define NETIO_PARAM_LINK_STATUS NETIO_PARAM_LINK_CURRENT_STATE 2832 + 2833 + /** Packet Coherence protocol. This address is only valid with @ref netio_get(). 2834 + * The value is nonzero if the interface is configured for cache-coherent DMA. 2835 + */ 2836 + #define NETIO_PARAM_COHERENT 8 2837 + 2838 + /** Desired link state. The value is a conbination of "NETIO_LINK_xxx" 2839 + * flags, which specify the desired state for the link. With @ref 2840 + * netio_set(), this will, in the background, attempt to bring up the link 2841 + * using whichever of the requested flags are reasonable, or take down the 2842 + * link if the flags are zero. The actual link up or down operation may 2843 + * happen after this call completes. If the link state changes in the 2844 + * future, the system will continue to try to get back to the desired link 2845 + * state; for instance, if the link is brought up successfully, and then 2846 + * the network cable is disconnected, the link will go down. However, the 2847 + * desired state of the link is still up, so if the cable is reconnected, 2848 + * the link will be brought up again. 2849 + * 2850 + * With @ref netio_get(), this will indicate the desired state for the 2851 + * link, as set with a previous netio_set() call, or implicitly by a 2852 + * netio_input_register() or netio_input_unregister() operation. This may 2853 + * not reflect the current state of the link; to get that, use 2854 + * ::NETIO_PARAM_LINK_CURRENT_STATE. */ 2855 + #define NETIO_PARAM_LINK_DESIRED_STATE 9 2856 + 2857 + /** NetIO statistics structure. Retrieved using the ::NETIO_PARAM_STAT 2858 + * address passed to @ref netio_get(). */ 2859 + typedef struct 2860 + { 2861 + /** Number of packets which have been received by the IPP and forwarded 2862 + * to a tile's receive queue for processing. This value wraps at its 2863 + * maximum, and is not cleared upon read. */ 2864 + uint32_t packets_received; 2865 + 2866 + /** Number of packets which have been dropped by the IPP, because they could 2867 + * not be received, or could not be forwarded to a tile. The former happens 2868 + * when the IPP does not have a free packet buffer of suitable size for an 2869 + * incoming frame. The latter happens when all potential destination tiles 2870 + * for a packet, as defined by the group, bucket, and queue configuration, 2871 + * have full receive queues. This value wraps at its maximum, and is not 2872 + * cleared upon read. */ 2873 + uint32_t packets_dropped; 2874 + 2875 + /* 2876 + * Note: the #defines after each of the following four one-byte values 2877 + * denote their location within the third word of the netio_stat_t. They 2878 + * are intended for use only by the IPP implementation and are thus omitted 2879 + * from the Doxygen output. 2880 + */ 2881 + 2882 + /** Number of packets dropped because no worker was able to accept a new 2883 + * packet. This value saturates at its maximum, and is cleared upon 2884 + * read. */ 2885 + uint8_t drops_no_worker; 2886 + #ifndef __DOXYGEN__ 2887 + #define NETIO_STAT_DROPS_NO_WORKER 0 2888 + #endif 2889 + 2890 + /** Number of packets dropped because no small buffers were available. 2891 + * This value saturates at its maximum, and is cleared upon read. */ 2892 + uint8_t drops_no_smallbuf; 2893 + #ifndef __DOXYGEN__ 2894 + #define NETIO_STAT_DROPS_NO_SMALLBUF 1 2895 + #endif 2896 + 2897 + /** Number of packets dropped because no large buffers were available. 2898 + * This value saturates at its maximum, and is cleared upon read. */ 2899 + uint8_t drops_no_largebuf; 2900 + #ifndef __DOXYGEN__ 2901 + #define NETIO_STAT_DROPS_NO_LARGEBUF 2 2902 + #endif 2903 + 2904 + /** Number of packets dropped because no jumbo buffers were available. 2905 + * This value saturates at its maximum, and is cleared upon read. */ 2906 + uint8_t drops_no_jumbobuf; 2907 + #ifndef __DOXYGEN__ 2908 + #define NETIO_STAT_DROPS_NO_JUMBOBUF 3 2909 + #endif 2910 + } 2911 + netio_stat_t; 2912 + 2913 + 2914 + /** Link can run, should run, or is running at 10 Mbps. */ 2915 + #define NETIO_LINK_10M 0x01 2916 + 2917 + /** Link can run, should run, or is running at 100 Mbps. */ 2918 + #define NETIO_LINK_100M 0x02 2919 + 2920 + /** Link can run, should run, or is running at 1 Gbps. */ 2921 + #define NETIO_LINK_1G 0x04 2922 + 2923 + /** Link can run, should run, or is running at 10 Gbps. */ 2924 + #define NETIO_LINK_10G 0x08 2925 + 2926 + /** Link should run at the highest speed supported by the link and by 2927 + * the device connected to the link. Only usable as a value for 2928 + * the link's desired state; never returned as a value for the current 2929 + * or possible states. */ 2930 + #define NETIO_LINK_ANYSPEED 0x10 2931 + 2932 + /** All legal link speeds. */ 2933 + #define NETIO_LINK_SPEED (NETIO_LINK_10M | \ 2934 + NETIO_LINK_100M | \ 2935 + NETIO_LINK_1G | \ 2936 + NETIO_LINK_10G | \ 2937 + NETIO_LINK_ANYSPEED) 2938 + 2939 + 2940 + /** MAC register class. Addr is a register offset within the MAC. 2941 + * Registers within the XGbE and GbE MACs are documented in the Tile 2942 + * Processor I/O Device Guide (UG104). MAC registers start at address 2943 + * 0x4000, and do not include the MAC_INTERFACE registers. */ 2944 + #define NETIO_MAC 1 2945 + 2946 + /** MDIO register class (IEEE 802.3 clause 22 format). Addr is the "addr" 2947 + * member of a netio_mdio_addr_t structure. */ 2948 + #define NETIO_MDIO 2 2949 + 2950 + /** MDIO register class (IEEE 802.3 clause 45 format). Addr is the "addr" 2951 + * member of a netio_mdio_addr_t structure. */ 2952 + #define NETIO_MDIO_CLAUSE45 3 2953 + 2954 + /** NetIO MDIO address type. Retrieved or provided using the ::NETIO_MDIO 2955 + * address passed to @ref netio_get() or @ref netio_set(). */ 2956 + typedef union 2957 + { 2958 + struct 2959 + { 2960 + unsigned int reg:16; /**< MDIO register offset. For clause 22 access, 2961 + must be less than 32. */ 2962 + unsigned int phy:5; /**< Which MDIO PHY to access. */ 2963 + unsigned int dev:5; /**< Which MDIO device to access within that PHY. 2964 + Applicable for clause 45 access only; ignored 2965 + for clause 22 access. */ 2966 + } 2967 + bits; /**< Container for bitfields. */ 2968 + uint64_t addr; /**< Value to pass to @ref netio_get() or 2969 + * @ref netio_set(). */ 2970 + } 2971 + netio_mdio_addr_t; 2972 + 2973 + /** @} */ 2974 + 2975 + #endif /* __NETIO_INTF_H__ */

+6 -2

arch/tile/mm/init.c

··· 988 988 /* Select whether to free (1) or mark unusable (0) the __init pages. */ 989 989 static int __init set_initfree(char *str) 990 990 { 991 - strict_strtol(str, 0, &initfree); 992 - pr_info("initfree: %s free init pages\n", initfree ? "will" : "won't"); 991 + long val; 992 + if (strict_strtol(str, 0, &val)) { 993 + initfree = val; 994 + pr_info("initfree: %s free init pages\n", 995 + initfree ? "will" : "won't"); 996 + } 993 997 return 1; 994 998 } 995 999 __setup("initfree=", set_initfree);

+12

drivers/net/Kconfig

··· 2945 2945 2946 2946 source "drivers/net/caif/Kconfig" 2947 2947 2948 + config TILE_NET 2949 + tristate "Tilera GBE/XGBE network driver support" 2950 + depends on TILE 2951 + default y 2952 + select CRC32 2953 + help 2954 + This is a standard Linux network device driver for the 2955 + on-chip Tilera Gigabit Ethernet and XAUI interfaces. 2956 + 2957 + To compile this driver as a module, choose M here: the module 2958 + will be called tile_net. 2959 + 2948 2960 config XEN_NETDEV_FRONTEND 2949 2961 tristate "Xen network device frontend driver" 2950 2962 depends on XEN

+1

drivers/net/Makefile

··· 301 301 302 302 obj-$(CONFIG_OCTEON_MGMT_ETHERNET) += octeon/ 303 303 obj-$(CONFIG_PCH_GBE) += pch_gbe/ 304 + obj-$(CONFIG_TILE_NET) += tile/

+10

drivers/net/tile/Makefile

··· 1 + # 2 + # Makefile for the TILE on-chip networking support. 3 + # 4 + 5 + obj-$(CONFIG_TILE_NET) += tile_net.o 6 + ifdef CONFIG_TILEGX 7 + tile_net-objs := tilegx.o mpipe.o iorpc_mpipe.o dma_queue.o 8 + else 9 + tile_net-objs := tilepro.o 10 + endif

+2406

drivers/net/tile/tilepro.c

··· 1 + /* 2 + * Copyright 2010 Tilera Corporation. All Rights Reserved. 3 + * 4 + * This program is free software; you can redistribute it and/or 5 + * modify it under the terms of the GNU General Public License 6 + * as published by the Free Software Foundation, version 2. 7 + * 8 + * This program is distributed in the hope that it will be useful, but 9 + * WITHOUT ANY WARRANTY; without even the implied warranty of 10 + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or 11 + * NON INFRINGEMENT. See the GNU General Public License for 12 + * more details. 13 + */ 14 + 15 + #include <linux/module.h> 16 + #include <linux/init.h> 17 + #include <linux/moduleparam.h> 18 + #include <linux/sched.h> 19 + #include <linux/kernel.h> /* printk() */ 20 + #include <linux/slab.h> /* kmalloc() */ 21 + #include <linux/errno.h> /* error codes */ 22 + #include <linux/types.h> /* size_t */ 23 + #include <linux/interrupt.h> 24 + #include <linux/in.h> 25 + #include <linux/netdevice.h> /* struct device, and other headers */ 26 + #include <linux/etherdevice.h> /* eth_type_trans */ 27 + #include <linux/skbuff.h> 28 + #include <linux/ioctl.h> 29 + #include <linux/cdev.h> 30 + #include <linux/hugetlb.h> 31 + #include <linux/in6.h> 32 + #include <linux/timer.h> 33 + #include <linux/io.h> 34 + #include <asm/checksum.h> 35 + #include <asm/homecache.h> 36 + 37 + #include <hv/drv_xgbe_intf.h> 38 + #include <hv/drv_xgbe_impl.h> 39 + #include <hv/hypervisor.h> 40 + #include <hv/netio_intf.h> 41 + 42 + /* For TSO */ 43 + #include <linux/ip.h> 44 + #include <linux/tcp.h> 45 + 46 + 47 + /* There is no singlethread_cpu, so schedule work on the current cpu. */ 48 + #define singlethread_cpu -1 49 + 50 + 51 + /* 52 + * First, "tile_net_init_module()" initializes all four "devices" which 53 + * can be used by linux. 54 + * 55 + * Then, "ifconfig DEVICE up" calls "tile_net_open()", which analyzes 56 + * the network cpus, then uses "tile_net_open_aux()" to initialize 57 + * LIPP/LEPP, and then uses "tile_net_open_inner()" to register all 58 + * the tiles, provide buffers to LIPP, allow ingress to start, and 59 + * turn on hypervisor interrupt handling (and NAPI) on all tiles. 60 + * 61 + * If registration fails due to the link being down, then "retry_work" 62 + * is used to keep calling "tile_net_open_inner()" until it succeeds. 63 + * 64 + * If "ifconfig DEVICE down" is called, it uses "tile_net_stop()" to 65 + * stop egress, drain the LIPP buffers, unregister all the tiles, stop 66 + * LIPP/LEPP, and wipe the LEPP queue. 67 + * 68 + * We start out with the ingress interrupt enabled on each CPU. When 69 + * this interrupt fires, we disable it, and call "napi_schedule()". 70 + * This will cause "tile_net_poll()" to be called, which will pull 71 + * packets from the netio queue, filtering them out, or passing them 72 + * to "netif_receive_skb()". If our budget is exhausted, we will 73 + * return, knowing we will be called again later. Otherwise, we 74 + * reenable the ingress interrupt, and call "napi_complete()". 75 + * 76 + * 77 + * NOTE: The use of "native_driver" ensures that EPP exists, and that 78 + * "epp_sendv" is legal, and that "LIPP" is being used. 79 + * 80 + * NOTE: Failing to free completions for an arbitrarily long time 81 + * (which is defined to be illegal) does in fact cause bizarre 82 + * problems. The "egress_timer" helps prevent this from happening. 83 + * 84 + * NOTE: The egress code can be interrupted by the interrupt handler. 85 + */ 86 + 87 + 88 + /* HACK: Allow use of "jumbo" packets. */ 89 + /* This should be 1500 if "jumbo" is not set in LIPP. */ 90 + /* This should be at most 10226 (10240 - 14) if "jumbo" is set in LIPP. */ 91 + /* ISSUE: This has not been thoroughly tested (except at 1500). */ 92 + #define TILE_NET_MTU 1500 93 + 94 + /* HACK: Define to support GSO. */ 95 + /* ISSUE: This may actually hurt performance of the TCP blaster. */ 96 + /* #define TILE_NET_GSO */ 97 + 98 + /* Define this to collapse "duplicate" acks. */ 99 + /* #define IGNORE_DUP_ACKS */ 100 + 101 + /* HACK: Define this to verify incoming packets. */ 102 + /* #define TILE_NET_VERIFY_INGRESS */ 103 + 104 + /* Use 3000 to enable the Linux Traffic Control (QoS) layer, else 0. */ 105 + #define TILE_NET_TX_QUEUE_LEN 0 106 + 107 + /* Define to dump packets (prints out the whole packet on tx and rx). */ 108 + /* #define TILE_NET_DUMP_PACKETS */ 109 + 110 + /* Define to enable debug spew (all PDEBUG's are enabled). */ 111 + /* #define TILE_NET_DEBUG */ 112 + 113 + 114 + /* Define to activate paranoia checks. */ 115 + /* #define TILE_NET_PARANOIA */ 116 + 117 + /* Default transmit lockup timeout period, in jiffies. */ 118 + #define TILE_NET_TIMEOUT (5 * HZ) 119 + 120 + /* Default retry interval for bringing up the NetIO interface, in jiffies. */ 121 + #define TILE_NET_RETRY_INTERVAL (5 * HZ) 122 + 123 + /* Number of ports (xgbe0, xgbe1, gbe0, gbe1). */ 124 + #define TILE_NET_DEVS 4 125 + 126 + 127 + 128 + /* Paranoia. */ 129 + #if NET_IP_ALIGN != LIPP_PACKET_PADDING 130 + #error "NET_IP_ALIGN must match LIPP_PACKET_PADDING." 131 + #endif 132 + 133 + 134 + /* Debug print. */ 135 + #ifdef TILE_NET_DEBUG 136 + #define PDEBUG(fmt, args...) net_printk(fmt, ## args) 137 + #else 138 + #define PDEBUG(fmt, args...) 139 + #endif 140 + 141 + 142 + MODULE_AUTHOR("Tilera"); 143 + MODULE_LICENSE("GPL"); 144 + 145 + 146 + #define IS_MULTICAST(mac_addr) \ 147 + (((u8 *)(mac_addr))[0] & 0x01) 148 + 149 + #define IS_BROADCAST(mac_addr) \ 150 + (((u16 *)(mac_addr))[0] == 0xffff) 151 + 152 + 153 + /* 154 + * Queue of incoming packets for a specific cpu and device. 155 + * 156 + * Includes a pointer to the "system" data, and the actual "user" data. 157 + */ 158 + struct tile_netio_queue { 159 + netio_queue_impl_t *__system_part; 160 + netio_queue_user_impl_t __user_part; 161 + 162 + }; 163 + 164 + 165 + /* 166 + * Statistics counters for a specific cpu and device. 167 + */ 168 + struct tile_net_stats_t { 169 + u32 rx_packets; 170 + u32 rx_bytes; 171 + u32 tx_packets; 172 + u32 tx_bytes; 173 + }; 174 + 175 + 176 + /* 177 + * Info for a specific cpu and device. 178 + * 179 + * ISSUE: There is a "dev" pointer in "napi" as well. 180 + */ 181 + struct tile_net_cpu { 182 + /* The NAPI struct. */ 183 + struct napi_struct napi; 184 + /* Packet queue. */ 185 + struct tile_netio_queue queue; 186 + /* Statistics. */ 187 + struct tile_net_stats_t stats; 188 + /* ISSUE: Is this needed? */ 189 + bool napi_enabled; 190 + /* True if this tile has succcessfully registered with the IPP. */ 191 + bool registered; 192 + /* True if the link was down last time we tried to register. */ 193 + bool link_down; 194 + /* True if "egress_timer" is scheduled. */ 195 + bool egress_timer_scheduled; 196 + /* Number of small sk_buffs which must still be provided. */ 197 + unsigned int num_needed_small_buffers; 198 + /* Number of large sk_buffs which must still be provided. */ 199 + unsigned int num_needed_large_buffers; 200 + /* A timer for handling egress completions. */ 201 + struct timer_list egress_timer; 202 + }; 203 + 204 + 205 + /* 206 + * Info for a specific device. 207 + */ 208 + struct tile_net_priv { 209 + /* Our network device. */ 210 + struct net_device *dev; 211 + /* The actual egress queue. */ 212 + lepp_queue_t *epp_queue; 213 + /* Protects "epp_queue->cmd_tail" and "epp_queue->comp_tail" */ 214 + spinlock_t cmd_lock; 215 + /* Protects "epp_queue->comp_head". */ 216 + spinlock_t comp_lock; 217 + /* The hypervisor handle for this interface. */ 218 + int hv_devhdl; 219 + /* The intr bit mask that IDs this device. */ 220 + u32 intr_id; 221 + /* True iff "tile_net_open_aux()" has succeeded. */ 222 + int partly_opened; 223 + /* True iff "tile_net_open_inner()" has succeeded. */ 224 + int fully_opened; 225 + /* Effective network cpus. */ 226 + struct cpumask network_cpus_map; 227 + /* Number of network cpus. */ 228 + int network_cpus_count; 229 + /* Credits per network cpu. */ 230 + int network_cpus_credits; 231 + /* Network stats. */ 232 + struct net_device_stats stats; 233 + /* For NetIO bringup retries. */ 234 + struct delayed_work retry_work; 235 + /* Quick access to per cpu data. */ 236 + struct tile_net_cpu *cpu[NR_CPUS]; 237 + }; 238 + 239 + 240 + /* 241 + * The actual devices (xgbe0, xgbe1, gbe0, gbe1). 242 + */ 243 + static struct net_device *tile_net_devs[TILE_NET_DEVS]; 244 + 245 + /* 246 + * The "tile_net_cpu" structures for each device. 247 + */ 248 + static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe0); 249 + static DEFINE_PER_CPU(struct tile_net_cpu, hv_xgbe1); 250 + static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe0); 251 + static DEFINE_PER_CPU(struct tile_net_cpu, hv_gbe1); 252 + 253 + 254 + /* 255 + * True if "network_cpus" was specified. 256 + */ 257 + static bool network_cpus_used; 258 + 259 + /* 260 + * The actual cpus in "network_cpus". 261 + */ 262 + static struct cpumask network_cpus_map; 263 + 264 + 265 + 266 + #ifdef TILE_NET_DEBUG 267 + /* 268 + * printk with extra stuff. 269 + * 270 + * We print the CPU we're running in brackets. 271 + */ 272 + static void net_printk(char *fmt, ...) 273 + { 274 + int i; 275 + int len; 276 + va_list args; 277 + static char buf[256]; 278 + 279 + len = sprintf(buf, "tile_net[%2.2d]: ", smp_processor_id()); 280 + va_start(args, fmt); 281 + i = vscnprintf(buf + len, sizeof(buf) - len - 1, fmt, args); 282 + va_end(args); 283 + buf[255] = '\0'; 284 + pr_notice(buf); 285 + } 286 + #endif 287 + 288 + 289 + #ifdef TILE_NET_DUMP_PACKETS 290 + /* 291 + * Dump a packet. 292 + */ 293 + static void dump_packet(unsigned char *data, unsigned long length, char *s) 294 + { 295 + unsigned long i; 296 + static unsigned int count; 297 + 298 + pr_info("dump_packet(data %p, length 0x%lx s %s count 0x%x)\n", 299 + data, length, s, count++); 300 + 301 + pr_info("\n"); 302 + 303 + for (i = 0; i < length; i++) { 304 + if ((i & 0xf) == 0) 305 + sprintf(buf, "%8.8lx:", i); 306 + sprintf(buf + strlen(buf), " %2.2x", data[i]); 307 + if ((i & 0xf) == 0xf || i == length - 1) 308 + pr_info("%s\n", buf); 309 + } 310 + } 311 + #endif 312 + 313 + 314 + /* 315 + * Provide support for the __netio_fastio1() swint 316 + * (see <hv/drv_xgbe_intf.h> for how it is used). 317 + * 318 + * The fastio swint2 call may clobber all the caller-saved registers. 319 + * It rarely clobbers memory, but we allow for the possibility in 320 + * the signature just to be on the safe side. 321 + * 322 + * Also, gcc doesn't seem to allow an input operand to be 323 + * clobbered, so we fake it with dummy outputs. 324 + * 325 + * This function can't be static because of the way it is declared 326 + * in the netio header. 327 + */ 328 + inline int __netio_fastio1(u32 fastio_index, u32 arg0) 329 + { 330 + long result, clobber_r1, clobber_r10; 331 + asm volatile("swint2" 332 + : "=R00" (result), 333 + "=R01" (clobber_r1), "=R10" (clobber_r10) 334 + : "R10" (fastio_index), "R01" (arg0) 335 + : "memory", "r2", "r3", "r4", 336 + "r5", "r6", "r7", "r8", "r9", 337 + "r11", "r12", "r13", "r14", 338 + "r15", "r16", "r17", "r18", "r19", 339 + "r20", "r21", "r22", "r23", "r24", 340 + "r25", "r26", "r27", "r28", "r29"); 341 + return result; 342 + } 343 + 344 + 345 + /* 346 + * Provide a linux buffer to LIPP. 347 + */ 348 + static void tile_net_provide_linux_buffer(struct tile_net_cpu *info, 349 + void *va, bool small) 350 + { 351 + struct tile_netio_queue *queue = &info->queue; 352 + 353 + /* Convert "va" and "small" to "linux_buffer_t". */ 354 + unsigned int buffer = ((unsigned int)(__pa(va) >> 7) << 1) + small; 355 + 356 + __netio_fastio_free_buffer(queue->__user_part.__fastio_index, buffer); 357 + } 358 + 359 + 360 + /* 361 + * Provide a linux buffer for LIPP. 362 + */ 363 + static bool tile_net_provide_needed_buffer(struct tile_net_cpu *info, 364 + bool small) 365 + { 366 + /* ISSUE: What should we use here? */ 367 + unsigned int large_size = NET_IP_ALIGN + TILE_NET_MTU + 100; 368 + 369 + /* Round up to ensure to avoid "false sharing" with last cache line. */ 370 + unsigned int buffer_size = 371 + (((small ? LIPP_SMALL_PACKET_SIZE : large_size) + 372 + CHIP_L2_LINE_SIZE() - 1) & -CHIP_L2_LINE_SIZE()); 373 + 374 + /* 375 + * ISSUE: Since CPAs are 38 bits, and we can only encode the 376 + * high 31 bits in a "linux_buffer_t", the low 7 bits must be 377 + * zero, and thus, we must align the actual "va" mod 128. 378 + */ 379 + const unsigned long align = 128; 380 + 381 + struct sk_buff *skb; 382 + void *va; 383 + 384 + struct sk_buff **skb_ptr; 385 + 386 + /* Note that "dev_alloc_skb()" adds NET_SKB_PAD more bytes, */ 387 + /* and also "reserves" that many bytes. */ 388 + /* ISSUE: Can we "share" the NET_SKB_PAD bytes with "skb_ptr"? */ 389 + int len = sizeof(*skb_ptr) + align + buffer_size; 390 + 391 + while (1) { 392 + 393 + /* Allocate (or fail). */ 394 + skb = dev_alloc_skb(len); 395 + if (skb == NULL) 396 + return false; 397 + 398 + /* Make room for a back-pointer to 'skb'. */ 399 + skb_reserve(skb, sizeof(*skb_ptr)); 400 + 401 + /* Make sure we are aligned. */ 402 + skb_reserve(skb, -(long)skb->data & (align - 1)); 403 + 404 + /* This address is given to IPP. */ 405 + va = skb->data; 406 + 407 + if (small) 408 + break; 409 + 410 + /* ISSUE: This has never been observed! */ 411 + /* Large buffers must not span a huge page. */ 412 + if (((((long)va & ~HPAGE_MASK) + 1535) & HPAGE_MASK) == 0) 413 + break; 414 + pr_err("Leaking unaligned linux buffer at %p.\n", va); 415 + } 416 + 417 + /* Skip two bytes to satisfy LIPP assumptions. */ 418 + /* Note that this aligns IP on a 16 byte boundary. */ 419 + /* ISSUE: Do this when the packet arrives? */ 420 + skb_reserve(skb, NET_IP_ALIGN); 421 + 422 + /* Save a back-pointer to 'skb'. */ 423 + skb_ptr = va - sizeof(*skb_ptr); 424 + *skb_ptr = skb; 425 + 426 + /* Invalidate the packet buffer. */ 427 + if (!hash_default) 428 + __inv_buffer(skb->data, buffer_size); 429 + 430 + /* Make sure "skb_ptr" has been flushed. */ 431 + __insn_mf(); 432 + 433 + #ifdef TILE_NET_PARANOIA 434 + #if CHIP_HAS_CBOX_HOME_MAP() 435 + if (hash_default) { 436 + HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)va); 437 + if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) 438 + panic("Non-coherent ingress buffer!"); 439 + } 440 + #endif 441 + #endif 442 + 443 + /* Provide the new buffer. */ 444 + tile_net_provide_linux_buffer(info, va, small); 445 + 446 + return true; 447 + } 448 + 449 + 450 + /* 451 + * Provide linux buffers for LIPP. 452 + */ 453 + static void tile_net_provide_needed_buffers(struct tile_net_cpu *info) 454 + { 455 + while (info->num_needed_small_buffers != 0) { 456 + if (!tile_net_provide_needed_buffer(info, true)) 457 + goto oops; 458 + info->num_needed_small_buffers--; 459 + } 460 + 461 + while (info->num_needed_large_buffers != 0) { 462 + if (!tile_net_provide_needed_buffer(info, false)) 463 + goto oops; 464 + info->num_needed_large_buffers--; 465 + } 466 + 467 + return; 468 + 469 + oops: 470 + 471 + /* Add a description to the page allocation failure dump. */ 472 + pr_notice("Could not provide a linux buffer to LIPP.\n"); 473 + } 474 + 475 + 476 + /* 477 + * Grab some LEPP completions, and store them in "comps", of size 478 + * "comps_size", and return the number of completions which were 479 + * stored, so the caller can free them. 480 + * 481 + * If "pending" is not NULL, it will be set to true if there might 482 + * still be some pending completions caused by this tile, else false. 483 + */ 484 + static unsigned int tile_net_lepp_grab_comps(struct net_device *dev, 485 + struct sk_buff *comps[], 486 + unsigned int comps_size, 487 + bool *pending) 488 + { 489 + struct tile_net_priv *priv = netdev_priv(dev); 490 + 491 + lepp_queue_t *eq = priv->epp_queue; 492 + 493 + unsigned int n = 0; 494 + 495 + unsigned int comp_head; 496 + unsigned int comp_busy; 497 + unsigned int comp_tail; 498 + 499 + spin_lock(&priv->comp_lock); 500 + 501 + comp_head = eq->comp_head; 502 + comp_busy = eq->comp_busy; 503 + comp_tail = eq->comp_tail; 504 + 505 + while (comp_head != comp_busy && n < comps_size) { 506 + comps[n++] = eq->comps[comp_head]; 507 + LEPP_QINC(comp_head); 508 + } 509 + 510 + if (pending != NULL) 511 + *pending = (comp_head != comp_tail); 512 + 513 + eq->comp_head = comp_head; 514 + 515 + spin_unlock(&priv->comp_lock); 516 + 517 + return n; 518 + } 519 + 520 + 521 + /* 522 + * Make sure the egress timer is scheduled. 523 + * 524 + * Note that we use "schedule if not scheduled" logic instead of the more 525 + * obvious "reschedule" logic, because "reschedule" is fairly expensive. 526 + */ 527 + static void tile_net_schedule_egress_timer(struct tile_net_cpu *info) 528 + { 529 + if (!info->egress_timer_scheduled) { 530 + mod_timer_pinned(&info->egress_timer, jiffies + 1); 531 + info->egress_timer_scheduled = true; 532 + } 533 + } 534 + 535 + 536 + /* 537 + * The "function" for "info->egress_timer". 538 + * 539 + * This timer will reschedule itself as long as there are any pending 540 + * completions expected (on behalf of any tile). 541 + * 542 + * ISSUE: Realistically, will the timer ever stop scheduling itself? 543 + * 544 + * ISSUE: This timer is almost never actually needed, so just use a global 545 + * timer that can run on any tile. 546 + * 547 + * ISSUE: Maybe instead track number of expected completions, and free 548 + * only that many, resetting to zero if "pending" is ever false. 549 + */ 550 + static void tile_net_handle_egress_timer(unsigned long arg) 551 + { 552 + struct tile_net_cpu *info = (struct tile_net_cpu *)arg; 553 + struct net_device *dev = info->napi.dev; 554 + 555 + struct sk_buff *olds[32]; 556 + unsigned int wanted = 32; 557 + unsigned int i, nolds = 0; 558 + bool pending; 559 + 560 + /* The timer is no longer scheduled. */ 561 + info->egress_timer_scheduled = false; 562 + 563 + nolds = tile_net_lepp_grab_comps(dev, olds, wanted, &pending); 564 + 565 + for (i = 0; i < nolds; i++) 566 + kfree_skb(olds[i]); 567 + 568 + /* Reschedule timer if needed. */ 569 + if (pending) 570 + tile_net_schedule_egress_timer(info); 571 + } 572 + 573 + 574 + #ifdef IGNORE_DUP_ACKS 575 + 576 + /* 577 + * Help detect "duplicate" ACKs. These are sequential packets (for a 578 + * given flow) which are exactly 66 bytes long, sharing everything but 579 + * ID=2@0x12, Hsum=2@0x18, Ack=4@0x2a, WinSize=2@0x30, Csum=2@0x32, 580 + * Tstamps=10@0x38. The ID's are +1, the Hsum's are -1, the Ack's are 581 + * +N, and the Tstamps are usually identical. 582 + * 583 + * NOTE: Apparently truly duplicate acks (with identical "ack" values), 584 + * should not be collapsed, as they are used for some kind of flow control. 585 + */ 586 + static bool is_dup_ack(char *s1, char *s2, unsigned int len) 587 + { 588 + int i; 589 + 590 + unsigned long long ignorable = 0; 591 + 592 + /* Identification. */ 593 + ignorable |= (1ULL << 0x12); 594 + ignorable |= (1ULL << 0x13); 595 + 596 + /* Header checksum. */ 597 + ignorable |= (1ULL << 0x18); 598 + ignorable |= (1ULL << 0x19); 599 + 600 + /* ACK. */ 601 + ignorable |= (1ULL << 0x2a); 602 + ignorable |= (1ULL << 0x2b); 603 + ignorable |= (1ULL << 0x2c); 604 + ignorable |= (1ULL << 0x2d); 605 + 606 + /* WinSize. */ 607 + ignorable |= (1ULL << 0x30); 608 + ignorable |= (1ULL << 0x31); 609 + 610 + /* Checksum. */ 611 + ignorable |= (1ULL << 0x32); 612 + ignorable |= (1ULL << 0x33); 613 + 614 + for (i = 0; i < len; i++, ignorable >>= 1) { 615 + 616 + if ((ignorable & 1) || (s1[i] == s2[i])) 617 + continue; 618 + 619 + #ifdef TILE_NET_DEBUG 620 + /* HACK: Mention non-timestamp diffs. */ 621 + if (i < 0x38 && i != 0x2f && 622 + net_ratelimit()) 623 + pr_info("Diff at 0x%x\n", i); 624 + #endif 625 + 626 + return false; 627 + } 628 + 629 + #ifdef TILE_NET_NO_SUPPRESS_DUP_ACKS 630 + /* HACK: Do not suppress truly duplicate ACKs. */ 631 + /* ISSUE: Is this actually necessary or helpful? */ 632 + if (s1[0x2a] == s2[0x2a] && 633 + s1[0x2b] == s2[0x2b] && 634 + s1[0x2c] == s2[0x2c] && 635 + s1[0x2d] == s2[0x2d]) { 636 + return false; 637 + } 638 + #endif 639 + 640 + return true; 641 + } 642 + 643 + #endif 644 + 645 + 646 + 647 + /* 648 + * Like "tile_net_handle_packets()", but just discard packets. 649 + */ 650 + static void tile_net_discard_packets(struct net_device *dev) 651 + { 652 + struct tile_net_priv *priv = netdev_priv(dev); 653 + int my_cpu = smp_processor_id(); 654 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 655 + struct tile_netio_queue *queue = &info->queue; 656 + netio_queue_impl_t *qsp = queue->__system_part; 657 + netio_queue_user_impl_t *qup = &queue->__user_part; 658 + 659 + while (qup->__packet_receive_read != 660 + qsp->__packet_receive_queue.__packet_write) { 661 + 662 + int index = qup->__packet_receive_read; 663 + 664 + int index2_aux = index + sizeof(netio_pkt_t); 665 + int index2 = 666 + ((index2_aux == 667 + qsp->__packet_receive_queue.__last_packet_plus_one) ? 668 + 0 : index2_aux); 669 + 670 + netio_pkt_t *pkt = (netio_pkt_t *) 671 + ((unsigned long) &qsp[1] + index); 672 + 673 + /* Extract the "linux_buffer_t". */ 674 + unsigned int buffer = pkt->__packet.word; 675 + 676 + /* Convert "linux_buffer_t" to "va". */ 677 + void *va = __va((phys_addr_t)(buffer >> 1) << 7); 678 + 679 + /* Acquire the associated "skb". */ 680 + struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); 681 + struct sk_buff *skb = *skb_ptr; 682 + 683 + kfree_skb(skb); 684 + 685 + /* Consume this packet. */ 686 + qup->__packet_receive_read = index2; 687 + } 688 + } 689 + 690 + 691 + /* 692 + * Handle the next packet. Return true if "processed", false if "filtered". 693 + */ 694 + static bool tile_net_poll_aux(struct tile_net_cpu *info, int index) 695 + { 696 + struct net_device *dev = info->napi.dev; 697 + 698 + struct tile_netio_queue *queue = &info->queue; 699 + netio_queue_impl_t *qsp = queue->__system_part; 700 + netio_queue_user_impl_t *qup = &queue->__user_part; 701 + struct tile_net_stats_t *stats = &info->stats; 702 + 703 + int filter; 704 + 705 + int index2_aux = index + sizeof(netio_pkt_t); 706 + int index2 = 707 + ((index2_aux == 708 + qsp->__packet_receive_queue.__last_packet_plus_one) ? 709 + 0 : index2_aux); 710 + 711 + netio_pkt_t *pkt = (netio_pkt_t *)((unsigned long) &qsp[1] + index); 712 + 713 + netio_pkt_metadata_t *metadata = NETIO_PKT_METADATA(pkt); 714 + 715 + /* Extract the packet size. */ 716 + unsigned long len = 717 + (NETIO_PKT_CUSTOM_LENGTH(pkt) + 718 + NET_IP_ALIGN - NETIO_PACKET_PADDING); 719 + 720 + /* Extract the "linux_buffer_t". */ 721 + unsigned int buffer = pkt->__packet.word; 722 + 723 + /* Extract "small" (vs "large"). */ 724 + bool small = ((buffer & 1) != 0); 725 + 726 + /* Convert "linux_buffer_t" to "va". */ 727 + void *va = __va((phys_addr_t)(buffer >> 1) << 7); 728 + 729 + /* Extract the packet data pointer. */ 730 + /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ 731 + unsigned char *buf = va + NET_IP_ALIGN; 732 + 733 + #ifdef IGNORE_DUP_ACKS 734 + 735 + static int other; 736 + static int final; 737 + static int keep; 738 + static int skip; 739 + 740 + #endif 741 + 742 + /* Invalidate the packet buffer. */ 743 + if (!hash_default) 744 + __inv_buffer(buf, len); 745 + 746 + /* ISSUE: Is this needed? */ 747 + dev->last_rx = jiffies; 748 + 749 + #ifdef TILE_NET_DUMP_PACKETS 750 + dump_packet(buf, len, "rx"); 751 + #endif /* TILE_NET_DUMP_PACKETS */ 752 + 753 + #ifdef TILE_NET_VERIFY_INGRESS 754 + if (!NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt) && 755 + NETIO_PKT_L4_CSUM_CALCULATED_M(metadata, pkt)) { 756 + /* 757 + * FIXME: This complains about UDP packets 758 + * with a "zero" checksum (bug 6624). 759 + */ 760 + #ifdef TILE_NET_PANIC_ON_BAD 761 + dump_packet(buf, len, "rx"); 762 + panic("Bad L4 checksum."); 763 + #else 764 + pr_warning("Bad L4 checksum on %d byte packet.\n", len); 765 + #endif 766 + } 767 + if (!NETIO_PKT_L3_CSUM_CORRECT_M(metadata, pkt) && 768 + NETIO_PKT_L3_CSUM_CALCULATED_M(metadata, pkt)) { 769 + dump_packet(buf, len, "rx"); 770 + panic("Bad L3 checksum."); 771 + } 772 + switch (NETIO_PKT_STATUS_M(metadata, pkt)) { 773 + case NETIO_PKT_STATUS_OVERSIZE: 774 + if (len >= 64) { 775 + dump_packet(buf, len, "rx"); 776 + panic("Unexpected OVERSIZE."); 777 + } 778 + break; 779 + case NETIO_PKT_STATUS_BAD: 780 + #ifdef TILE_NET_PANIC_ON_BAD 781 + dump_packet(buf, len, "rx"); 782 + panic("Unexpected BAD packet."); 783 + #else 784 + pr_warning("Unexpected BAD %d byte packet.\n", len); 785 + #endif 786 + } 787 + #endif 788 + 789 + filter = 0; 790 + 791 + if (!(dev->flags & IFF_UP)) { 792 + /* Filter packets received before we're up. */ 793 + filter = 1; 794 + } else if (!(dev->flags & IFF_PROMISC)) { 795 + /* 796 + * FIXME: Implement HW multicast filter. 797 + */ 798 + if (!IS_MULTICAST(buf) && !IS_BROADCAST(buf)) { 799 + /* Filter packets not for our address. */ 800 + const u8 *mine = dev->dev_addr; 801 + filter = compare_ether_addr(mine, buf); 802 + } 803 + } 804 + 805 + #ifdef IGNORE_DUP_ACKS 806 + 807 + if (len != 66) { 808 + /* FIXME: Must check "is_tcp_ack(buf, len)" somehow. */ 809 + 810 + other++; 811 + 812 + } else if (index2 == 813 + qsp->__packet_receive_queue.__packet_write) { 814 + 815 + final++; 816 + 817 + } else { 818 + 819 + netio_pkt_t *pkt2 = (netio_pkt_t *) 820 + ((unsigned long) &qsp[1] + index2); 821 + 822 + netio_pkt_metadata_t *metadata2 = 823 + NETIO_PKT_METADATA(pkt2); 824 + 825 + /* Extract the packet size. */ 826 + unsigned long len2 = 827 + (NETIO_PKT_CUSTOM_LENGTH(pkt2) + 828 + NET_IP_ALIGN - NETIO_PACKET_PADDING); 829 + 830 + if (len2 == 66 && 831 + NETIO_PKT_FLOW_HASH_M(metadata, pkt) == 832 + NETIO_PKT_FLOW_HASH_M(metadata2, pkt2)) { 833 + 834 + /* Extract the "linux_buffer_t". */ 835 + unsigned int buffer2 = pkt2->__packet.word; 836 + 837 + /* Convert "linux_buffer_t" to "va". */ 838 + void *va2 = 839 + __va((phys_addr_t)(buffer2 >> 1) << 7); 840 + 841 + /* Extract the packet data pointer. */ 842 + /* Compare to "NETIO_PKT_CUSTOM_DATA(pkt)". */ 843 + unsigned char *buf2 = va2 + NET_IP_ALIGN; 844 + 845 + /* Invalidate the packet buffer. */ 846 + if (!hash_default) 847 + __inv_buffer(buf2, len2); 848 + 849 + if (is_dup_ack(buf, buf2, len)) { 850 + skip++; 851 + filter = 1; 852 + } else { 853 + keep++; 854 + } 855 + } 856 + } 857 + 858 + if (net_ratelimit()) 859 + pr_info("Other %d Final %d Keep %d Skip %d.\n", 860 + other, final, keep, skip); 861 + 862 + #endif 863 + 864 + if (filter) { 865 + 866 + /* ISSUE: Update "drop" statistics? */ 867 + 868 + tile_net_provide_linux_buffer(info, va, small); 869 + 870 + } else { 871 + 872 + /* Acquire the associated "skb". */ 873 + struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); 874 + struct sk_buff *skb = *skb_ptr; 875 + 876 + /* Paranoia. */ 877 + if (skb->data != buf) 878 + panic("Corrupt linux buffer from LIPP! " 879 + "VA=%p, skb=%p, skb->data=%p\n", 880 + va, skb, skb->data); 881 + 882 + /* Encode the actual packet length. */ 883 + skb_put(skb, len); 884 + 885 + /* NOTE: This call also sets "skb->dev = dev". */ 886 + skb->protocol = eth_type_trans(skb, dev); 887 + 888 + /* ISSUE: Discard corrupt packets? */ 889 + /* ISSUE: Discard packets with bad checksums? */ 890 + 891 + /* Avoid recomputing TCP/UDP checksums. */ 892 + if (NETIO_PKT_L4_CSUM_CORRECT_M(metadata, pkt)) 893 + skb->ip_summed = CHECKSUM_UNNECESSARY; 894 + 895 + netif_receive_skb(skb); 896 + 897 + stats->rx_packets++; 898 + stats->rx_bytes += len; 899 + 900 + if (small) 901 + info->num_needed_small_buffers++; 902 + else 903 + info->num_needed_large_buffers++; 904 + } 905 + 906 + /* Return four credits after every fourth packet. */ 907 + if (--qup->__receive_credit_remaining == 0) { 908 + u32 interval = qup->__receive_credit_interval; 909 + qup->__receive_credit_remaining = interval; 910 + __netio_fastio_return_credits(qup->__fastio_index, interval); 911 + } 912 + 913 + /* Consume this packet. */ 914 + qup->__packet_receive_read = index2; 915 + 916 + return !filter; 917 + } 918 + 919 + 920 + /* 921 + * Handle some packets for the given device on the current CPU. 922 + * 923 + * ISSUE: The "rotting packet" race condition occurs if a packet 924 + * arrives after the queue appears to be empty, and before the 925 + * hypervisor interrupt is re-enabled. 926 + */ 927 + static int tile_net_poll(struct napi_struct *napi, int budget) 928 + { 929 + struct net_device *dev = napi->dev; 930 + struct tile_net_priv *priv = netdev_priv(dev); 931 + int my_cpu = smp_processor_id(); 932 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 933 + struct tile_netio_queue *queue = &info->queue; 934 + netio_queue_impl_t *qsp = queue->__system_part; 935 + netio_queue_user_impl_t *qup = &queue->__user_part; 936 + 937 + unsigned int work = 0; 938 + 939 + while (1) { 940 + int index = qup->__packet_receive_read; 941 + if (index == qsp->__packet_receive_queue.__packet_write) 942 + break; 943 + 944 + if (tile_net_poll_aux(info, index)) { 945 + if (++work >= budget) 946 + goto done; 947 + } 948 + } 949 + 950 + napi_complete(&info->napi); 951 + 952 + /* Re-enable hypervisor interrupts. */ 953 + enable_percpu_irq(priv->intr_id); 954 + 955 + /* HACK: Avoid the "rotting packet" problem. */ 956 + if (qup->__packet_receive_read != 957 + qsp->__packet_receive_queue.__packet_write) 958 + napi_schedule(&info->napi); 959 + 960 + /* ISSUE: Handle completions? */ 961 + 962 + done: 963 + 964 + tile_net_provide_needed_buffers(info); 965 + 966 + return work; 967 + } 968 + 969 + 970 + /* 971 + * Handle an ingress interrupt for the given device on the current cpu. 972 + */ 973 + static irqreturn_t tile_net_handle_ingress_interrupt(int irq, void *dev_ptr) 974 + { 975 + struct net_device *dev = (struct net_device *)dev_ptr; 976 + struct tile_net_priv *priv = netdev_priv(dev); 977 + int my_cpu = smp_processor_id(); 978 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 979 + 980 + /* Disable hypervisor interrupt. */ 981 + disable_percpu_irq(priv->intr_id); 982 + 983 + napi_schedule(&info->napi); 984 + 985 + return IRQ_HANDLED; 986 + } 987 + 988 + 989 + /* 990 + * One time initialization per interface. 991 + */ 992 + static int tile_net_open_aux(struct net_device *dev) 993 + { 994 + struct tile_net_priv *priv = netdev_priv(dev); 995 + 996 + int ret; 997 + int dummy; 998 + unsigned int epp_lotar; 999 + 1000 + /* 1001 + * Find out where EPP memory should be homed. 1002 + */ 1003 + ret = hv_dev_pread(priv->hv_devhdl, 0, 1004 + (HV_VirtAddr)&epp_lotar, sizeof(epp_lotar), 1005 + NETIO_EPP_SHM_OFF); 1006 + if (ret < 0) { 1007 + pr_err("could not read epp_shm_queue lotar.\n"); 1008 + return -EIO; 1009 + } 1010 + 1011 + /* 1012 + * Home the page on the EPP. 1013 + */ 1014 + { 1015 + int epp_home = hv_lotar_to_cpu(epp_lotar); 1016 + struct page *page = virt_to_page(priv->epp_queue); 1017 + homecache_change_page_home(page, 0, epp_home); 1018 + } 1019 + 1020 + /* 1021 + * Register the EPP shared memory queue. 1022 + */ 1023 + { 1024 + netio_ipp_address_t ea = { 1025 + .va = 0, 1026 + .pa = __pa(priv->epp_queue), 1027 + .pte = hv_pte(0), 1028 + .size = PAGE_SIZE, 1029 + }; 1030 + ea.pte = hv_pte_set_lotar(ea.pte, epp_lotar); 1031 + ea.pte = hv_pte_set_mode(ea.pte, HV_PTE_MODE_CACHE_TILE_L3); 1032 + ret = hv_dev_pwrite(priv->hv_devhdl, 0, 1033 + (HV_VirtAddr)&ea, 1034 + sizeof(ea), 1035 + NETIO_EPP_SHM_OFF); 1036 + if (ret < 0) 1037 + return -EIO; 1038 + } 1039 + 1040 + /* 1041 + * Start LIPP/LEPP. 1042 + */ 1043 + if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1044 + sizeof(dummy), NETIO_IPP_START_SHIM_OFF) < 0) { 1045 + pr_warning("Failed to start LIPP/LEPP.\n"); 1046 + return -EIO; 1047 + } 1048 + 1049 + return 0; 1050 + } 1051 + 1052 + 1053 + /* 1054 + * Register with hypervisor on each CPU. 1055 + * 1056 + * Strangely, this function does important things even if it "fails", 1057 + * which is especially common if the link is not up yet. Hopefully 1058 + * these things are all "harmless" if done twice! 1059 + */ 1060 + static void tile_net_register(void *dev_ptr) 1061 + { 1062 + struct net_device *dev = (struct net_device *)dev_ptr; 1063 + struct tile_net_priv *priv = netdev_priv(dev); 1064 + int my_cpu = smp_processor_id(); 1065 + struct tile_net_cpu *info; 1066 + 1067 + struct tile_netio_queue *queue; 1068 + 1069 + /* Only network cpus can receive packets. */ 1070 + int queue_id = 1071 + cpumask_test_cpu(my_cpu, &priv->network_cpus_map) ? 0 : 255; 1072 + 1073 + netio_input_config_t config = { 1074 + .flags = 0, 1075 + .num_receive_packets = priv->network_cpus_credits, 1076 + .queue_id = queue_id 1077 + }; 1078 + 1079 + int ret = 0; 1080 + netio_queue_impl_t *queuep; 1081 + 1082 + PDEBUG("tile_net_register(queue_id %d)\n", queue_id); 1083 + 1084 + if (!strcmp(dev->name, "xgbe0")) 1085 + info = &__get_cpu_var(hv_xgbe0); 1086 + else if (!strcmp(dev->name, "xgbe1")) 1087 + info = &__get_cpu_var(hv_xgbe1); 1088 + else if (!strcmp(dev->name, "gbe0")) 1089 + info = &__get_cpu_var(hv_gbe0); 1090 + else if (!strcmp(dev->name, "gbe1")) 1091 + info = &__get_cpu_var(hv_gbe1); 1092 + else 1093 + BUG(); 1094 + 1095 + /* Initialize the egress timer. */ 1096 + init_timer(&info->egress_timer); 1097 + info->egress_timer.data = (long)info; 1098 + info->egress_timer.function = tile_net_handle_egress_timer; 1099 + 1100 + priv->cpu[my_cpu] = info; 1101 + 1102 + /* 1103 + * Register ourselves with the IPP. 1104 + */ 1105 + ret = hv_dev_pwrite(priv->hv_devhdl, 0, 1106 + (HV_VirtAddr)&config, 1107 + sizeof(netio_input_config_t), 1108 + NETIO_IPP_INPUT_REGISTER_OFF); 1109 + PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", 1110 + ret); 1111 + if (ret < 0) { 1112 + printk(KERN_DEBUG "hv_dev_pwrite NETIO_IPP_INPUT_REGISTER_OFF" 1113 + " failure %d\n", ret); 1114 + info->link_down = (ret == NETIO_LINK_DOWN); 1115 + return; 1116 + } 1117 + 1118 + /* 1119 + * Get the pointer to our queue's system part. 1120 + */ 1121 + 1122 + ret = hv_dev_pread(priv->hv_devhdl, 0, 1123 + (HV_VirtAddr)&queuep, 1124 + sizeof(netio_queue_impl_t *), 1125 + NETIO_IPP_INPUT_REGISTER_OFF); 1126 + PDEBUG("hv_dev_pread(NETIO_IPP_INPUT_REGISTER_OFF) returned %d\n", 1127 + ret); 1128 + PDEBUG("queuep %p\n", queuep); 1129 + if (ret <= 0) { 1130 + /* ISSUE: Shouldn't this be a fatal error? */ 1131 + pr_err("hv_dev_pread NETIO_IPP_INPUT_REGISTER_OFF failure\n"); 1132 + return; 1133 + } 1134 + 1135 + queue = &info->queue; 1136 + 1137 + queue->__system_part = queuep; 1138 + 1139 + memset(&queue->__user_part, 0, sizeof(netio_queue_user_impl_t)); 1140 + 1141 + /* This is traditionally "config.num_receive_packets / 2". */ 1142 + queue->__user_part.__receive_credit_interval = 4; 1143 + queue->__user_part.__receive_credit_remaining = 1144 + queue->__user_part.__receive_credit_interval; 1145 + 1146 + /* 1147 + * Get a fastio index from the hypervisor. 1148 + * ISSUE: Shouldn't this check the result? 1149 + */ 1150 + ret = hv_dev_pread(priv->hv_devhdl, 0, 1151 + (HV_VirtAddr)&queue->__user_part.__fastio_index, 1152 + sizeof(queue->__user_part.__fastio_index), 1153 + NETIO_IPP_GET_FASTIO_OFF); 1154 + PDEBUG("hv_dev_pread(NETIO_IPP_GET_FASTIO_OFF) returned %d\n", ret); 1155 + 1156 + netif_napi_add(dev, &info->napi, tile_net_poll, 64); 1157 + 1158 + /* Now we are registered. */ 1159 + info->registered = true; 1160 + } 1161 + 1162 + 1163 + /* 1164 + * Unregister with hypervisor on each CPU. 1165 + */ 1166 + static void tile_net_unregister(void *dev_ptr) 1167 + { 1168 + struct net_device *dev = (struct net_device *)dev_ptr; 1169 + struct tile_net_priv *priv = netdev_priv(dev); 1170 + int my_cpu = smp_processor_id(); 1171 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 1172 + 1173 + int ret = 0; 1174 + int dummy = 0; 1175 + 1176 + /* Do nothing if never registered. */ 1177 + if (info == NULL) 1178 + return; 1179 + 1180 + /* Do nothing if already unregistered. */ 1181 + if (!info->registered) 1182 + return; 1183 + 1184 + /* 1185 + * Unregister ourselves with LIPP. 1186 + */ 1187 + ret = hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1188 + sizeof(dummy), NETIO_IPP_INPUT_UNREGISTER_OFF); 1189 + PDEBUG("hv_dev_pwrite(NETIO_IPP_INPUT_UNREGISTER_OFF) returned %d\n", 1190 + ret); 1191 + if (ret < 0) { 1192 + /* FIXME: Just panic? */ 1193 + pr_err("hv_dev_pwrite NETIO_IPP_INPUT_UNREGISTER_OFF" 1194 + " failure %d\n", ret); 1195 + } 1196 + 1197 + /* 1198 + * Discard all packets still in our NetIO queue. Hopefully, 1199 + * once the unregister call is complete, there will be no 1200 + * packets still in flight on the IDN. 1201 + */ 1202 + tile_net_discard_packets(dev); 1203 + 1204 + /* Reset state. */ 1205 + info->num_needed_small_buffers = 0; 1206 + info->num_needed_large_buffers = 0; 1207 + 1208 + /* Cancel egress timer. */ 1209 + del_timer(&info->egress_timer); 1210 + info->egress_timer_scheduled = false; 1211 + 1212 + netif_napi_del(&info->napi); 1213 + 1214 + /* Now we are unregistered. */ 1215 + info->registered = false; 1216 + } 1217 + 1218 + 1219 + /* 1220 + * Helper function for "tile_net_stop()". 1221 + * 1222 + * Also used to handle registration failure in "tile_net_open_inner()", 1223 + * when "fully_opened" is known to be false, and the various extra 1224 + * steps in "tile_net_stop()" are not necessary. ISSUE: It might be 1225 + * simpler if we could just call "tile_net_stop()" anyway. 1226 + */ 1227 + static void tile_net_stop_aux(struct net_device *dev) 1228 + { 1229 + struct tile_net_priv *priv = netdev_priv(dev); 1230 + 1231 + int dummy = 0; 1232 + 1233 + /* Unregister all tiles, so LIPP will stop delivering packets. */ 1234 + on_each_cpu(tile_net_unregister, (void *)dev, 1); 1235 + 1236 + /* Stop LIPP/LEPP. */ 1237 + if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1238 + sizeof(dummy), NETIO_IPP_STOP_SHIM_OFF) < 0) 1239 + panic("Failed to stop LIPP/LEPP!\n"); 1240 + 1241 + priv->partly_opened = 0; 1242 + } 1243 + 1244 + 1245 + /* 1246 + * Disable ingress interrupts for the given device on the current cpu. 1247 + */ 1248 + static void tile_net_disable_intr(void *dev_ptr) 1249 + { 1250 + struct net_device *dev = (struct net_device *)dev_ptr; 1251 + struct tile_net_priv *priv = netdev_priv(dev); 1252 + int my_cpu = smp_processor_id(); 1253 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 1254 + 1255 + /* Disable hypervisor interrupt. */ 1256 + disable_percpu_irq(priv->intr_id); 1257 + 1258 + /* Disable NAPI if needed. */ 1259 + if (info != NULL && info->napi_enabled) { 1260 + napi_disable(&info->napi); 1261 + info->napi_enabled = false; 1262 + } 1263 + } 1264 + 1265 + 1266 + /* 1267 + * Enable ingress interrupts for the given device on the current cpu. 1268 + */ 1269 + static void tile_net_enable_intr(void *dev_ptr) 1270 + { 1271 + struct net_device *dev = (struct net_device *)dev_ptr; 1272 + struct tile_net_priv *priv = netdev_priv(dev); 1273 + int my_cpu = smp_processor_id(); 1274 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 1275 + 1276 + /* Enable hypervisor interrupt. */ 1277 + enable_percpu_irq(priv->intr_id); 1278 + 1279 + /* Enable NAPI. */ 1280 + napi_enable(&info->napi); 1281 + info->napi_enabled = true; 1282 + } 1283 + 1284 + 1285 + /* 1286 + * tile_net_open_inner does most of the work of bringing up the interface. 1287 + * It's called from tile_net_open(), and also from tile_net_retry_open(). 1288 + * The return value is 0 if the interface was brought up, < 0 if 1289 + * tile_net_open() should return the return value as an error, and > 0 if 1290 + * tile_net_open() should return success and schedule a work item to 1291 + * periodically retry the bringup. 1292 + */ 1293 + static int tile_net_open_inner(struct net_device *dev) 1294 + { 1295 + struct tile_net_priv *priv = netdev_priv(dev); 1296 + int my_cpu = smp_processor_id(); 1297 + struct tile_net_cpu *info; 1298 + struct tile_netio_queue *queue; 1299 + unsigned int irq; 1300 + int i; 1301 + 1302 + /* 1303 + * First try to register just on the local CPU, and handle any 1304 + * semi-expected "link down" failure specially. Note that we 1305 + * do NOT call "tile_net_stop_aux()", unlike below. 1306 + */ 1307 + tile_net_register(dev); 1308 + info = priv->cpu[my_cpu]; 1309 + if (!info->registered) { 1310 + if (info->link_down) 1311 + return 1; 1312 + return -EAGAIN; 1313 + } 1314 + 1315 + /* 1316 + * Now register everywhere else. If any registration fails, 1317 + * even for "link down" (which might not be possible), we 1318 + * clean up using "tile_net_stop_aux()". 1319 + */ 1320 + smp_call_function(tile_net_register, (void *)dev, 1); 1321 + for_each_online_cpu(i) { 1322 + if (!priv->cpu[i]->registered) { 1323 + tile_net_stop_aux(dev); 1324 + return -EAGAIN; 1325 + } 1326 + } 1327 + 1328 + queue = &info->queue; 1329 + 1330 + /* 1331 + * Set the device intr bit mask. 1332 + * The tile_net_register above sets per tile __intr_id. 1333 + */ 1334 + priv->intr_id = queue->__system_part->__intr_id; 1335 + BUG_ON(!priv->intr_id); 1336 + 1337 + /* 1338 + * Register the device interrupt handler. 1339 + * The __ffs() function returns the index into the interrupt handler 1340 + * table from the interrupt bit mask which should have one bit 1341 + * and one bit only set. 1342 + */ 1343 + irq = __ffs(priv->intr_id); 1344 + tile_irq_activate(irq, TILE_IRQ_PERCPU); 1345 + BUG_ON(request_irq(irq, tile_net_handle_ingress_interrupt, 1346 + 0, dev->name, (void *)dev) != 0); 1347 + 1348 + /* ISSUE: How could "priv->fully_opened" ever be "true" here? */ 1349 + 1350 + if (!priv->fully_opened) { 1351 + 1352 + int dummy = 0; 1353 + 1354 + /* Allocate initial buffers. */ 1355 + 1356 + int max_buffers = 1357 + priv->network_cpus_count * priv->network_cpus_credits; 1358 + 1359 + info->num_needed_small_buffers = 1360 + min(LIPP_SMALL_BUFFERS, max_buffers); 1361 + 1362 + info->num_needed_large_buffers = 1363 + min(LIPP_LARGE_BUFFERS, max_buffers); 1364 + 1365 + tile_net_provide_needed_buffers(info); 1366 + 1367 + if (info->num_needed_small_buffers != 0 || 1368 + info->num_needed_large_buffers != 0) 1369 + panic("Insufficient memory for buffer stack!"); 1370 + 1371 + /* Start LIPP/LEPP and activate "ingress" at the shim. */ 1372 + if (hv_dev_pwrite(priv->hv_devhdl, 0, (HV_VirtAddr)&dummy, 1373 + sizeof(dummy), NETIO_IPP_INPUT_INIT_OFF) < 0) 1374 + panic("Failed to activate the LIPP Shim!\n"); 1375 + 1376 + priv->fully_opened = 1; 1377 + } 1378 + 1379 + /* On each tile, enable the hypervisor to trigger interrupts. */ 1380 + /* ISSUE: Do this before starting LIPP/LEPP? */ 1381 + on_each_cpu(tile_net_enable_intr, (void *)dev, 1); 1382 + 1383 + /* Start our transmit queue. */ 1384 + netif_start_queue(dev); 1385 + 1386 + return 0; 1387 + } 1388 + 1389 + 1390 + /* 1391 + * Called periodically to retry bringing up the NetIO interface, 1392 + * if it doesn't come up cleanly during tile_net_open(). 1393 + */ 1394 + static void tile_net_open_retry(struct work_struct *w) 1395 + { 1396 + struct delayed_work *dw = 1397 + container_of(w, struct delayed_work, work); 1398 + 1399 + struct tile_net_priv *priv = 1400 + container_of(dw, struct tile_net_priv, retry_work); 1401 + 1402 + /* 1403 + * Try to bring the NetIO interface up. If it fails, reschedule 1404 + * ourselves to try again later; otherwise, tell Linux we now have 1405 + * a working link. ISSUE: What if the return value is negative? 1406 + */ 1407 + if (tile_net_open_inner(priv->dev)) 1408 + schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1409 + TILE_NET_RETRY_INTERVAL); 1410 + else 1411 + netif_carrier_on(priv->dev); 1412 + } 1413 + 1414 + 1415 + /* 1416 + * Called when a network interface is made active. 1417 + * 1418 + * Returns 0 on success, negative value on failure. 1419 + * 1420 + * The open entry point is called when a network interface is made 1421 + * active by the system (IFF_UP). At this point all resources needed 1422 + * for transmit and receive operations are allocated, the interrupt 1423 + * handler is registered with the OS, the watchdog timer is started, 1424 + * and the stack is notified that the interface is ready. 1425 + * 1426 + * If the actual link is not available yet, then we tell Linux that 1427 + * we have no carrier, and we keep checking until the link comes up. 1428 + */ 1429 + static int tile_net_open(struct net_device *dev) 1430 + { 1431 + int ret = 0; 1432 + struct tile_net_priv *priv = netdev_priv(dev); 1433 + 1434 + /* 1435 + * We rely on priv->partly_opened to tell us if this is the 1436 + * first time this interface is being brought up. If it is 1437 + * set, the IPP was already initialized and should not be 1438 + * initialized again. 1439 + */ 1440 + if (!priv->partly_opened) { 1441 + 1442 + int count; 1443 + int credits; 1444 + 1445 + /* Initialize LIPP/LEPP, and start the Shim. */ 1446 + ret = tile_net_open_aux(dev); 1447 + if (ret < 0) { 1448 + pr_err("tile_net_open_aux failed: %d\n", ret); 1449 + return ret; 1450 + } 1451 + 1452 + /* Analyze the network cpus. */ 1453 + 1454 + if (network_cpus_used) 1455 + cpumask_copy(&priv->network_cpus_map, 1456 + &network_cpus_map); 1457 + else 1458 + cpumask_copy(&priv->network_cpus_map, cpu_online_mask); 1459 + 1460 + 1461 + count = cpumask_weight(&priv->network_cpus_map); 1462 + 1463 + /* Limit credits to available buffers, and apply min. */ 1464 + credits = max(16, (LIPP_LARGE_BUFFERS / count) & ~1); 1465 + 1466 + /* Apply "GBE" max limit. */ 1467 + /* ISSUE: Use higher limit for XGBE? */ 1468 + credits = min(NETIO_MAX_RECEIVE_PKTS, credits); 1469 + 1470 + priv->network_cpus_count = count; 1471 + priv->network_cpus_credits = credits; 1472 + 1473 + #ifdef TILE_NET_DEBUG 1474 + pr_info("Using %d network cpus, with %d credits each\n", 1475 + priv->network_cpus_count, priv->network_cpus_credits); 1476 + #endif 1477 + 1478 + priv->partly_opened = 1; 1479 + } 1480 + 1481 + /* 1482 + * Attempt to bring up the link. 1483 + */ 1484 + ret = tile_net_open_inner(dev); 1485 + if (ret <= 0) { 1486 + if (ret == 0) 1487 + netif_carrier_on(dev); 1488 + return ret; 1489 + } 1490 + 1491 + /* 1492 + * We were unable to bring up the NetIO interface, but we want to 1493 + * try again in a little bit. Tell Linux that we have no carrier 1494 + * so it doesn't try to use the interface before the link comes up 1495 + * and then remember to try again later. 1496 + */ 1497 + netif_carrier_off(dev); 1498 + schedule_delayed_work_on(singlethread_cpu, &priv->retry_work, 1499 + TILE_NET_RETRY_INTERVAL); 1500 + 1501 + return 0; 1502 + } 1503 + 1504 + 1505 + /* 1506 + * Disables a network interface. 1507 + * 1508 + * Returns 0, this is not allowed to fail. 1509 + * 1510 + * The close entry point is called when an interface is de-activated 1511 + * by the OS. The hardware is still under the drivers control, but 1512 + * needs to be disabled. A global MAC reset is issued to stop the 1513 + * hardware, and all transmit and receive resources are freed. 1514 + * 1515 + * ISSUE: Can this can be called while "tile_net_poll()" is running? 1516 + */ 1517 + static int tile_net_stop(struct net_device *dev) 1518 + { 1519 + struct tile_net_priv *priv = netdev_priv(dev); 1520 + 1521 + bool pending = true; 1522 + 1523 + PDEBUG("tile_net_stop()\n"); 1524 + 1525 + /* ISSUE: Only needed if not yet fully open. */ 1526 + cancel_delayed_work_sync(&priv->retry_work); 1527 + 1528 + /* Can't transmit any more. */ 1529 + netif_stop_queue(dev); 1530 + 1531 + /* 1532 + * Disable hypervisor interrupts on each tile. 1533 + */ 1534 + on_each_cpu(tile_net_disable_intr, (void *)dev, 1); 1535 + 1536 + /* 1537 + * Unregister the interrupt handler. 1538 + * The __ffs() function returns the index into the interrupt handler 1539 + * table from the interrupt bit mask which should have one bit 1540 + * and one bit only set. 1541 + */ 1542 + if (priv->intr_id) 1543 + free_irq(__ffs(priv->intr_id), dev); 1544 + 1545 + /* 1546 + * Drain all the LIPP buffers. 1547 + */ 1548 + 1549 + while (true) { 1550 + int buffer; 1551 + 1552 + /* NOTE: This should never fail. */ 1553 + if (hv_dev_pread(priv->hv_devhdl, 0, (HV_VirtAddr)&buffer, 1554 + sizeof(buffer), NETIO_IPP_DRAIN_OFF) < 0) 1555 + break; 1556 + 1557 + /* Stop when done. */ 1558 + if (buffer == 0) 1559 + break; 1560 + 1561 + { 1562 + /* Convert "linux_buffer_t" to "va". */ 1563 + void *va = __va((phys_addr_t)(buffer >> 1) << 7); 1564 + 1565 + /* Acquire the associated "skb". */ 1566 + struct sk_buff **skb_ptr = va - sizeof(*skb_ptr); 1567 + struct sk_buff *skb = *skb_ptr; 1568 + 1569 + kfree_skb(skb); 1570 + } 1571 + } 1572 + 1573 + /* Stop LIPP/LEPP. */ 1574 + tile_net_stop_aux(dev); 1575 + 1576 + 1577 + priv->fully_opened = 0; 1578 + 1579 + 1580 + /* 1581 + * XXX: ISSUE: It appears that, in practice anyway, by the 1582 + * time we get here, there are no pending completions. 1583 + */ 1584 + while (pending) { 1585 + 1586 + struct sk_buff *olds[32]; 1587 + unsigned int wanted = 32; 1588 + unsigned int i, nolds = 0; 1589 + 1590 + nolds = tile_net_lepp_grab_comps(dev, olds, 1591 + wanted, &pending); 1592 + 1593 + /* ISSUE: We have never actually seen this debug spew. */ 1594 + if (nolds != 0) 1595 + pr_info("During tile_net_stop(), grabbed %d comps.\n", 1596 + nolds); 1597 + 1598 + for (i = 0; i < nolds; i++) 1599 + kfree_skb(olds[i]); 1600 + } 1601 + 1602 + 1603 + /* Wipe the EPP queue. */ 1604 + memset(priv->epp_queue, 0, sizeof(lepp_queue_t)); 1605 + 1606 + /* Evict the EPP queue. */ 1607 + finv_buffer(priv->epp_queue, PAGE_SIZE); 1608 + 1609 + return 0; 1610 + } 1611 + 1612 + 1613 + /* 1614 + * Prepare the "frags" info for the resulting LEPP command. 1615 + * 1616 + * If needed, flush the memory used by the frags. 1617 + */ 1618 + static unsigned int tile_net_tx_frags(lepp_frag_t *frags, 1619 + struct sk_buff *skb, 1620 + void *b_data, unsigned int b_len) 1621 + { 1622 + unsigned int i, n = 0; 1623 + 1624 + struct skb_shared_info *sh = skb_shinfo(skb); 1625 + 1626 + phys_addr_t cpa; 1627 + 1628 + if (b_len != 0) { 1629 + 1630 + if (!hash_default) 1631 + finv_buffer_remote(b_data, b_len); 1632 + 1633 + cpa = __pa(b_data); 1634 + frags[n].cpa_lo = cpa; 1635 + frags[n].cpa_hi = cpa >> 32; 1636 + frags[n].length = b_len; 1637 + frags[n].hash_for_home = hash_default; 1638 + n++; 1639 + } 1640 + 1641 + for (i = 0; i < sh->nr_frags; i++) { 1642 + 1643 + skb_frag_t *f = &sh->frags[i]; 1644 + unsigned long pfn = page_to_pfn(f->page); 1645 + 1646 + /* FIXME: Compute "hash_for_home" properly. */ 1647 + /* ISSUE: The hypervisor checks CHIP_HAS_REV1_DMA_PACKETS(). */ 1648 + int hash_for_home = hash_default; 1649 + 1650 + /* FIXME: Hmmm. */ 1651 + if (!hash_default) { 1652 + void *va = pfn_to_kaddr(pfn) + f->page_offset; 1653 + BUG_ON(PageHighMem(f->page)); 1654 + finv_buffer_remote(va, f->size); 1655 + } 1656 + 1657 + cpa = ((phys_addr_t)pfn << PAGE_SHIFT) + f->page_offset; 1658 + frags[n].cpa_lo = cpa; 1659 + frags[n].cpa_hi = cpa >> 32; 1660 + frags[n].length = f->size; 1661 + frags[n].hash_for_home = hash_for_home; 1662 + n++; 1663 + } 1664 + 1665 + return n; 1666 + } 1667 + 1668 + 1669 + /* 1670 + * This function takes "skb", consisting of a header template and a 1671 + * payload, and hands it to LEPP, to emit as one or more segments, 1672 + * each consisting of a possibly modified header, plus a piece of the 1673 + * payload, via a process known as "tcp segmentation offload". 1674 + * 1675 + * Usually, "data" will contain the header template, of size "sh_len", 1676 + * and "sh->frags" will contain "skb->data_len" bytes of payload, and 1677 + * there will be "sh->gso_segs" segments. 1678 + * 1679 + * Sometimes, if "sendfile()" requires copying, we will be called with 1680 + * "data" containing the header and payload, with "frags" being empty. 1681 + * 1682 + * In theory, "sh->nr_frags" could be 3, but in practice, it seems 1683 + * that this will never actually happen. 1684 + * 1685 + * See "emulate_large_send_offload()" for some reference code, which 1686 + * does not handle checksumming. 1687 + * 1688 + * ISSUE: How do we make sure that high memory DMA does not migrate? 1689 + */ 1690 + static int tile_net_tx_tso(struct sk_buff *skb, struct net_device *dev) 1691 + { 1692 + struct tile_net_priv *priv = netdev_priv(dev); 1693 + int my_cpu = smp_processor_id(); 1694 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 1695 + struct tile_net_stats_t *stats = &info->stats; 1696 + 1697 + struct skb_shared_info *sh = skb_shinfo(skb); 1698 + 1699 + unsigned char *data = skb->data; 1700 + 1701 + /* The ip header follows the ethernet header. */ 1702 + struct iphdr *ih = ip_hdr(skb); 1703 + unsigned int ih_len = ih->ihl * 4; 1704 + 1705 + /* Note that "nh == ih", by definition. */ 1706 + unsigned char *nh = skb_network_header(skb); 1707 + unsigned int eh_len = nh - data; 1708 + 1709 + /* The tcp header follows the ip header. */ 1710 + struct tcphdr *th = (struct tcphdr *)(nh + ih_len); 1711 + unsigned int th_len = th->doff * 4; 1712 + 1713 + /* The total number of header bytes. */ 1714 + /* NOTE: This may be less than skb_headlen(skb). */ 1715 + unsigned int sh_len = eh_len + ih_len + th_len; 1716 + 1717 + /* The number of payload bytes at "skb->data + sh_len". */ 1718 + /* This is non-zero for sendfile() without HIGHDMA. */ 1719 + unsigned int b_len = skb_headlen(skb) - sh_len; 1720 + 1721 + /* The total number of payload bytes. */ 1722 + unsigned int d_len = b_len + skb->data_len; 1723 + 1724 + /* The maximum payload size. */ 1725 + unsigned int p_len = sh->gso_size; 1726 + 1727 + /* The total number of segments. */ 1728 + unsigned int num_segs = sh->gso_segs; 1729 + 1730 + /* The temporary copy of the command. */ 1731 + u32 cmd_body[(LEPP_MAX_CMD_SIZE + 3) / 4]; 1732 + lepp_tso_cmd_t *cmd = (lepp_tso_cmd_t *)cmd_body; 1733 + 1734 + /* Analyze the "frags". */ 1735 + unsigned int num_frags = 1736 + tile_net_tx_frags(cmd->frags, skb, data + sh_len, b_len); 1737 + 1738 + /* The size of the command, including frags and header. */ 1739 + size_t cmd_size = LEPP_TSO_CMD_SIZE(num_frags, sh_len); 1740 + 1741 + /* The command header. */ 1742 + lepp_tso_cmd_t cmd_init = { 1743 + .tso = true, 1744 + .header_size = sh_len, 1745 + .ip_offset = eh_len, 1746 + .tcp_offset = eh_len + ih_len, 1747 + .payload_size = p_len, 1748 + .num_frags = num_frags, 1749 + }; 1750 + 1751 + unsigned long irqflags; 1752 + 1753 + lepp_queue_t *eq = priv->epp_queue; 1754 + 1755 + struct sk_buff *olds[4]; 1756 + unsigned int wanted = 4; 1757 + unsigned int i, nolds = 0; 1758 + 1759 + unsigned int cmd_head, cmd_tail, cmd_next; 1760 + unsigned int comp_tail; 1761 + 1762 + unsigned int free_slots; 1763 + 1764 + 1765 + /* Paranoia. */ 1766 + BUG_ON(skb->protocol != htons(ETH_P_IP)); 1767 + BUG_ON(ih->protocol != IPPROTO_TCP); 1768 + BUG_ON(skb->ip_summed != CHECKSUM_PARTIAL); 1769 + BUG_ON(num_frags > LEPP_MAX_FRAGS); 1770 + /*--BUG_ON(num_segs != (d_len + (p_len - 1)) / p_len); */ 1771 + BUG_ON(num_segs <= 1); 1772 + 1773 + 1774 + /* Finish preparing the command. */ 1775 + 1776 + /* Copy the command header. */ 1777 + *cmd = cmd_init; 1778 + 1779 + /* Copy the "header". */ 1780 + memcpy(&cmd->frags[num_frags], data, sh_len); 1781 + 1782 + 1783 + /* Prefetch and wait, to minimize time spent holding the spinlock. */ 1784 + prefetch_L1(&eq->comp_tail); 1785 + prefetch_L1(&eq->cmd_tail); 1786 + mb(); 1787 + 1788 + 1789 + /* Enqueue the command. */ 1790 + 1791 + spin_lock_irqsave(&priv->cmd_lock, irqflags); 1792 + 1793 + /* 1794 + * Handle completions if needed to make room. 1795 + * HACK: Spin until there is sufficient room. 1796 + */ 1797 + free_slots = lepp_num_free_comp_slots(eq); 1798 + if (free_slots < 1) { 1799 + spin: 1800 + nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 1801 + wanted - nolds, NULL); 1802 + if (lepp_num_free_comp_slots(eq) < 1) 1803 + goto spin; 1804 + } 1805 + 1806 + cmd_head = eq->cmd_head; 1807 + cmd_tail = eq->cmd_tail; 1808 + 1809 + /* NOTE: The "gotos" below are untested. */ 1810 + 1811 + /* Prepare to advance, detecting full queue. */ 1812 + cmd_next = cmd_tail + cmd_size; 1813 + if (cmd_tail < cmd_head && cmd_next >= cmd_head) 1814 + goto spin; 1815 + if (cmd_next > LEPP_CMD_LIMIT) { 1816 + cmd_next = 0; 1817 + if (cmd_next == cmd_head) 1818 + goto spin; 1819 + } 1820 + 1821 + /* Copy the command. */ 1822 + memcpy(&eq->cmds[cmd_tail], cmd, cmd_size); 1823 + 1824 + /* Advance. */ 1825 + cmd_tail = cmd_next; 1826 + 1827 + /* Record "skb" for eventual freeing. */ 1828 + comp_tail = eq->comp_tail; 1829 + eq->comps[comp_tail] = skb; 1830 + LEPP_QINC(comp_tail); 1831 + eq->comp_tail = comp_tail; 1832 + 1833 + /* Flush before allowing LEPP to handle the command. */ 1834 + __insn_mf(); 1835 + 1836 + eq->cmd_tail = cmd_tail; 1837 + 1838 + spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 1839 + 1840 + if (nolds == 0) 1841 + nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 1842 + 1843 + /* Handle completions. */ 1844 + for (i = 0; i < nolds; i++) 1845 + kfree_skb(olds[i]); 1846 + 1847 + /* Update stats. */ 1848 + stats->tx_packets += num_segs; 1849 + stats->tx_bytes += (num_segs * sh_len) + d_len; 1850 + 1851 + /* Make sure the egress timer is scheduled. */ 1852 + tile_net_schedule_egress_timer(info); 1853 + 1854 + return NETDEV_TX_OK; 1855 + } 1856 + 1857 + 1858 + /* 1859 + * Transmit a packet (called by the kernel via "hard_start_xmit" hook). 1860 + */ 1861 + static int tile_net_tx(struct sk_buff *skb, struct net_device *dev) 1862 + { 1863 + struct tile_net_priv *priv = netdev_priv(dev); 1864 + int my_cpu = smp_processor_id(); 1865 + struct tile_net_cpu *info = priv->cpu[my_cpu]; 1866 + struct tile_net_stats_t *stats = &info->stats; 1867 + 1868 + unsigned long irqflags; 1869 + 1870 + struct skb_shared_info *sh = skb_shinfo(skb); 1871 + 1872 + unsigned int len = skb->len; 1873 + unsigned char *data = skb->data; 1874 + 1875 + unsigned int csum_start = skb->csum_start - skb_headroom(skb); 1876 + 1877 + lepp_frag_t frags[LEPP_MAX_FRAGS]; 1878 + 1879 + unsigned int num_frags; 1880 + 1881 + lepp_queue_t *eq = priv->epp_queue; 1882 + 1883 + struct sk_buff *olds[4]; 1884 + unsigned int wanted = 4; 1885 + unsigned int i, nolds = 0; 1886 + 1887 + unsigned int cmd_size = sizeof(lepp_cmd_t); 1888 + 1889 + unsigned int cmd_head, cmd_tail, cmd_next; 1890 + unsigned int comp_tail; 1891 + 1892 + lepp_cmd_t cmds[LEPP_MAX_FRAGS]; 1893 + 1894 + unsigned int free_slots; 1895 + 1896 + 1897 + /* 1898 + * This is paranoia, since we think that if the link doesn't come 1899 + * up, telling Linux we have no carrier will keep it from trying 1900 + * to transmit. If it does, though, we can't execute this routine, 1901 + * since data structures we depend on aren't set up yet. 1902 + */ 1903 + if (!info->registered) 1904 + return NETDEV_TX_BUSY; 1905 + 1906 + 1907 + /* Save the timestamp. */ 1908 + dev->trans_start = jiffies; 1909 + 1910 + 1911 + #ifdef TILE_NET_PARANOIA 1912 + #if CHIP_HAS_CBOX_HOME_MAP() 1913 + if (hash_default) { 1914 + HV_PTE pte = *virt_to_pte(current->mm, (unsigned long)data); 1915 + if (hv_pte_get_mode(pte) != HV_PTE_MODE_CACHE_HASH_L3) 1916 + panic("Non-coherent egress buffer!"); 1917 + } 1918 + #endif 1919 + #endif 1920 + 1921 + 1922 + #ifdef TILE_NET_DUMP_PACKETS 1923 + /* ISSUE: Does not dump the "frags". */ 1924 + dump_packet(data, skb_headlen(skb), "tx"); 1925 + #endif /* TILE_NET_DUMP_PACKETS */ 1926 + 1927 + 1928 + if (sh->gso_size != 0) 1929 + return tile_net_tx_tso(skb, dev); 1930 + 1931 + 1932 + /* Prepare the commands. */ 1933 + 1934 + num_frags = tile_net_tx_frags(frags, skb, data, skb_headlen(skb)); 1935 + 1936 + for (i = 0; i < num_frags; i++) { 1937 + 1938 + bool final = (i == num_frags - 1); 1939 + 1940 + lepp_cmd_t cmd = { 1941 + .cpa_lo = frags[i].cpa_lo, 1942 + .cpa_hi = frags[i].cpa_hi, 1943 + .length = frags[i].length, 1944 + .hash_for_home = frags[i].hash_for_home, 1945 + .send_completion = final, 1946 + .end_of_packet = final 1947 + }; 1948 + 1949 + if (i == 0 && skb->ip_summed == CHECKSUM_PARTIAL) { 1950 + cmd.compute_checksum = 1; 1951 + cmd.checksum_data.bits.start_byte = csum_start; 1952 + cmd.checksum_data.bits.count = len - csum_start; 1953 + cmd.checksum_data.bits.destination_byte = 1954 + csum_start + skb->csum_offset; 1955 + } 1956 + 1957 + cmds[i] = cmd; 1958 + } 1959 + 1960 + 1961 + /* Prefetch and wait, to minimize time spent holding the spinlock. */ 1962 + prefetch_L1(&eq->comp_tail); 1963 + prefetch_L1(&eq->cmd_tail); 1964 + mb(); 1965 + 1966 + 1967 + /* Enqueue the commands. */ 1968 + 1969 + spin_lock_irqsave(&priv->cmd_lock, irqflags); 1970 + 1971 + /* 1972 + * Handle completions if needed to make room. 1973 + * HACK: Spin until there is sufficient room. 1974 + */ 1975 + free_slots = lepp_num_free_comp_slots(eq); 1976 + if (free_slots < 1) { 1977 + spin: 1978 + nolds += tile_net_lepp_grab_comps(dev, olds + nolds, 1979 + wanted - nolds, NULL); 1980 + if (lepp_num_free_comp_slots(eq) < 1) 1981 + goto spin; 1982 + } 1983 + 1984 + cmd_head = eq->cmd_head; 1985 + cmd_tail = eq->cmd_tail; 1986 + 1987 + /* NOTE: The "gotos" below are untested. */ 1988 + 1989 + /* Copy the commands, or fail. */ 1990 + for (i = 0; i < num_frags; i++) { 1991 + 1992 + /* Prepare to advance, detecting full queue. */ 1993 + cmd_next = cmd_tail + cmd_size; 1994 + if (cmd_tail < cmd_head && cmd_next >= cmd_head) 1995 + goto spin; 1996 + if (cmd_next > LEPP_CMD_LIMIT) { 1997 + cmd_next = 0; 1998 + if (cmd_next == cmd_head) 1999 + goto spin; 2000 + } 2001 + 2002 + /* Copy the command. */ 2003 + *(lepp_cmd_t *)&eq->cmds[cmd_tail] = cmds[i]; 2004 + 2005 + /* Advance. */ 2006 + cmd_tail = cmd_next; 2007 + } 2008 + 2009 + /* Record "skb" for eventual freeing. */ 2010 + comp_tail = eq->comp_tail; 2011 + eq->comps[comp_tail] = skb; 2012 + LEPP_QINC(comp_tail); 2013 + eq->comp_tail = comp_tail; 2014 + 2015 + /* Flush before allowing LEPP to handle the command. */ 2016 + __insn_mf(); 2017 + 2018 + eq->cmd_tail = cmd_tail; 2019 + 2020 + spin_unlock_irqrestore(&priv->cmd_lock, irqflags); 2021 + 2022 + if (nolds == 0) 2023 + nolds = tile_net_lepp_grab_comps(dev, olds, wanted, NULL); 2024 + 2025 + /* Handle completions. */ 2026 + for (i = 0; i < nolds; i++) 2027 + kfree_skb(olds[i]); 2028 + 2029 + /* HACK: Track "expanded" size for short packets (e.g. 42 < 60). */ 2030 + stats->tx_packets++; 2031 + stats->tx_bytes += ((len >= ETH_ZLEN) ? len : ETH_ZLEN); 2032 + 2033 + /* Make sure the egress timer is scheduled. */ 2034 + tile_net_schedule_egress_timer(info); 2035 + 2036 + return NETDEV_TX_OK; 2037 + } 2038 + 2039 + 2040 + /* 2041 + * Deal with a transmit timeout. 2042 + */ 2043 + static void tile_net_tx_timeout(struct net_device *dev) 2044 + { 2045 + PDEBUG("tile_net_tx_timeout()\n"); 2046 + PDEBUG("Transmit timeout at %ld, latency %ld\n", jiffies, 2047 + jiffies - dev->trans_start); 2048 + 2049 + /* XXX: ISSUE: This doesn't seem useful for us. */ 2050 + netif_wake_queue(dev); 2051 + } 2052 + 2053 + 2054 + /* 2055 + * Ioctl commands. 2056 + */ 2057 + static int tile_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) 2058 + { 2059 + return -EOPNOTSUPP; 2060 + } 2061 + 2062 + 2063 + /* 2064 + * Get System Network Statistics. 2065 + * 2066 + * Returns the address of the device statistics structure. 2067 + */ 2068 + static struct net_device_stats *tile_net_get_stats(struct net_device *dev) 2069 + { 2070 + struct tile_net_priv *priv = netdev_priv(dev); 2071 + u32 rx_packets = 0; 2072 + u32 tx_packets = 0; 2073 + u32 rx_bytes = 0; 2074 + u32 tx_bytes = 0; 2075 + int i; 2076 + 2077 + for_each_online_cpu(i) { 2078 + if (priv->cpu[i]) { 2079 + rx_packets += priv->cpu[i]->stats.rx_packets; 2080 + rx_bytes += priv->cpu[i]->stats.rx_bytes; 2081 + tx_packets += priv->cpu[i]->stats.tx_packets; 2082 + tx_bytes += priv->cpu[i]->stats.tx_bytes; 2083 + } 2084 + } 2085 + 2086 + priv->stats.rx_packets = rx_packets; 2087 + priv->stats.rx_bytes = rx_bytes; 2088 + priv->stats.tx_packets = tx_packets; 2089 + priv->stats.tx_bytes = tx_bytes; 2090 + 2091 + return &priv->stats; 2092 + } 2093 + 2094 + 2095 + /* 2096 + * Change the "mtu". 2097 + * 2098 + * The "change_mtu" method is usually not needed. 2099 + * If you need it, it must be like this. 2100 + */ 2101 + static int tile_net_change_mtu(struct net_device *dev, int new_mtu) 2102 + { 2103 + PDEBUG("tile_net_change_mtu()\n"); 2104 + 2105 + /* Check ranges. */ 2106 + if ((new_mtu < 68) || (new_mtu > 1500)) 2107 + return -EINVAL; 2108 + 2109 + /* Accept the value. */ 2110 + dev->mtu = new_mtu; 2111 + 2112 + return 0; 2113 + } 2114 + 2115 + 2116 + /* 2117 + * Change the Ethernet Address of the NIC. 2118 + * 2119 + * The hypervisor driver does not support changing MAC address. However, 2120 + * the IPP does not do anything with the MAC address, so the address which 2121 + * gets used on outgoing packets, and which is accepted on incoming packets, 2122 + * is completely up to the NetIO program or kernel driver which is actually 2123 + * handling them. 2124 + * 2125 + * Returns 0 on success, negative on failure. 2126 + */ 2127 + static int tile_net_set_mac_address(struct net_device *dev, void *p) 2128 + { 2129 + struct sockaddr *addr = p; 2130 + 2131 + if (!is_valid_ether_addr(addr->sa_data)) 2132 + return -EINVAL; 2133 + 2134 + /* ISSUE: Note that "dev_addr" is now a pointer. */ 2135 + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); 2136 + 2137 + return 0; 2138 + } 2139 + 2140 + 2141 + /* 2142 + * Obtain the MAC address from the hypervisor. 2143 + * This must be done before opening the device. 2144 + */ 2145 + static int tile_net_get_mac(struct net_device *dev) 2146 + { 2147 + struct tile_net_priv *priv = netdev_priv(dev); 2148 + 2149 + char hv_dev_name[32]; 2150 + int len; 2151 + 2152 + __netio_getset_offset_t offset = { .word = NETIO_IPP_PARAM_OFF }; 2153 + 2154 + int ret; 2155 + 2156 + /* For example, "xgbe0". */ 2157 + strcpy(hv_dev_name, dev->name); 2158 + len = strlen(hv_dev_name); 2159 + 2160 + /* For example, "xgbe/0". */ 2161 + hv_dev_name[len] = hv_dev_name[len - 1]; 2162 + hv_dev_name[len - 1] = '/'; 2163 + len++; 2164 + 2165 + /* For example, "xgbe/0/native_hash". */ 2166 + strcpy(hv_dev_name + len, hash_default ? "/native_hash" : "/native"); 2167 + 2168 + /* Get the hypervisor handle for this device. */ 2169 + priv->hv_devhdl = hv_dev_open((HV_VirtAddr)hv_dev_name, 0); 2170 + PDEBUG("hv_dev_open(%s) returned %d %p\n", 2171 + hv_dev_name, priv->hv_devhdl, &priv->hv_devhdl); 2172 + if (priv->hv_devhdl < 0) { 2173 + if (priv->hv_devhdl == HV_ENODEV) 2174 + printk(KERN_DEBUG "Ignoring unconfigured device %s\n", 2175 + hv_dev_name); 2176 + else 2177 + printk(KERN_DEBUG "hv_dev_open(%s) returned %d\n", 2178 + hv_dev_name, priv->hv_devhdl); 2179 + return -1; 2180 + } 2181 + 2182 + /* 2183 + * Read the hardware address from the hypervisor. 2184 + * ISSUE: Note that "dev_addr" is now a pointer. 2185 + */ 2186 + offset.bits.class = NETIO_PARAM; 2187 + offset.bits.addr = NETIO_PARAM_MAC; 2188 + ret = hv_dev_pread(priv->hv_devhdl, 0, 2189 + (HV_VirtAddr)dev->dev_addr, dev->addr_len, 2190 + offset.word); 2191 + PDEBUG("hv_dev_pread(NETIO_PARAM_MAC) returned %d\n", ret); 2192 + if (ret <= 0) { 2193 + printk(KERN_DEBUG "hv_dev_pread(NETIO_PARAM_MAC) %s failed\n", 2194 + dev->name); 2195 + /* 2196 + * Since the device is configured by the hypervisor but we 2197 + * can't get its MAC address, we are most likely running 2198 + * the simulator, so let's generate a random MAC address. 2199 + */ 2200 + random_ether_addr(dev->dev_addr); 2201 + } 2202 + 2203 + return 0; 2204 + } 2205 + 2206 + 2207 + static struct net_device_ops tile_net_ops = { 2208 + .ndo_open = tile_net_open, 2209 + .ndo_stop = tile_net_stop, 2210 + .ndo_start_xmit = tile_net_tx, 2211 + .ndo_do_ioctl = tile_net_ioctl, 2212 + .ndo_get_stats = tile_net_get_stats, 2213 + .ndo_change_mtu = tile_net_change_mtu, 2214 + .ndo_tx_timeout = tile_net_tx_timeout, 2215 + .ndo_set_mac_address = tile_net_set_mac_address 2216 + }; 2217 + 2218 + 2219 + /* 2220 + * The setup function. 2221 + * 2222 + * This uses ether_setup() to assign various fields in dev, including 2223 + * setting IFF_BROADCAST and IFF_MULTICAST, then sets some extra fields. 2224 + */ 2225 + static void tile_net_setup(struct net_device *dev) 2226 + { 2227 + PDEBUG("tile_net_setup()\n"); 2228 + 2229 + ether_setup(dev); 2230 + 2231 + dev->netdev_ops = &tile_net_ops; 2232 + 2233 + dev->watchdog_timeo = TILE_NET_TIMEOUT; 2234 + 2235 + /* We want lockless xmit. */ 2236 + dev->features |= NETIF_F_LLTX; 2237 + 2238 + /* We support hardware tx checksums. */ 2239 + dev->features |= NETIF_F_HW_CSUM; 2240 + 2241 + /* We support scatter/gather. */ 2242 + dev->features |= NETIF_F_SG; 2243 + 2244 + /* We support TSO. */ 2245 + dev->features |= NETIF_F_TSO; 2246 + 2247 + #ifdef TILE_NET_GSO 2248 + /* We support GSO. */ 2249 + dev->features |= NETIF_F_GSO; 2250 + #endif 2251 + 2252 + if (hash_default) 2253 + dev->features |= NETIF_F_HIGHDMA; 2254 + 2255 + /* ISSUE: We should support NETIF_F_UFO. */ 2256 + 2257 + dev->tx_queue_len = TILE_NET_TX_QUEUE_LEN; 2258 + 2259 + dev->mtu = TILE_NET_MTU; 2260 + } 2261 + 2262 + 2263 + /* 2264 + * Allocate the device structure, register the device, and obtain the 2265 + * MAC address from the hypervisor. 2266 + */ 2267 + static struct net_device *tile_net_dev_init(const char *name) 2268 + { 2269 + int ret; 2270 + struct net_device *dev; 2271 + struct tile_net_priv *priv; 2272 + struct page *page; 2273 + 2274 + /* 2275 + * Allocate the device structure. This allocates "priv", calls 2276 + * tile_net_setup(), and saves "name". Normally, "name" is a 2277 + * template, instantiated by register_netdev(), but not for us. 2278 + */ 2279 + dev = alloc_netdev(sizeof(*priv), name, tile_net_setup); 2280 + if (!dev) { 2281 + pr_err("alloc_netdev(%s) failed\n", name); 2282 + return NULL; 2283 + } 2284 + 2285 + priv = netdev_priv(dev); 2286 + 2287 + /* Initialize "priv". */ 2288 + 2289 + memset(priv, 0, sizeof(*priv)); 2290 + 2291 + /* Save "dev" for "tile_net_open_retry()". */ 2292 + priv->dev = dev; 2293 + 2294 + INIT_DELAYED_WORK(&priv->retry_work, tile_net_open_retry); 2295 + 2296 + spin_lock_init(&priv->cmd_lock); 2297 + spin_lock_init(&priv->comp_lock); 2298 + 2299 + /* Allocate "epp_queue". */ 2300 + BUG_ON(get_order(sizeof(lepp_queue_t)) != 0); 2301 + page = alloc_pages(GFP_KERNEL | __GFP_ZERO, 0); 2302 + if (!page) { 2303 + free_netdev(dev); 2304 + return NULL; 2305 + } 2306 + priv->epp_queue = page_address(page); 2307 + 2308 + /* Register the network device. */ 2309 + ret = register_netdev(dev); 2310 + if (ret) { 2311 + pr_err("register_netdev %s failed %d\n", dev->name, ret); 2312 + free_page((unsigned long)priv->epp_queue); 2313 + free_netdev(dev); 2314 + return NULL; 2315 + } 2316 + 2317 + /* Get the MAC address. */ 2318 + ret = tile_net_get_mac(dev); 2319 + if (ret < 0) { 2320 + unregister_netdev(dev); 2321 + free_page((unsigned long)priv->epp_queue); 2322 + free_netdev(dev); 2323 + return NULL; 2324 + } 2325 + 2326 + return dev; 2327 + } 2328 + 2329 + 2330 + /* 2331 + * Module cleanup. 2332 + */ 2333 + static void tile_net_cleanup(void) 2334 + { 2335 + int i; 2336 + 2337 + for (i = 0; i < TILE_NET_DEVS; i++) { 2338 + if (tile_net_devs[i]) { 2339 + struct net_device *dev = tile_net_devs[i]; 2340 + struct tile_net_priv *priv = netdev_priv(dev); 2341 + unregister_netdev(dev); 2342 + finv_buffer(priv->epp_queue, PAGE_SIZE); 2343 + free_page((unsigned long)priv->epp_queue); 2344 + free_netdev(dev); 2345 + } 2346 + } 2347 + } 2348 + 2349 + 2350 + /* 2351 + * Module initialization. 2352 + */ 2353 + static int tile_net_init_module(void) 2354 + { 2355 + pr_info("Tilera IPP Net Driver\n"); 2356 + 2357 + tile_net_devs[0] = tile_net_dev_init("xgbe0"); 2358 + tile_net_devs[1] = tile_net_dev_init("xgbe1"); 2359 + tile_net_devs[2] = tile_net_dev_init("gbe0"); 2360 + tile_net_devs[3] = tile_net_dev_init("gbe1"); 2361 + 2362 + return 0; 2363 + } 2364 + 2365 + 2366 + #ifndef MODULE 2367 + /* 2368 + * The "network_cpus" boot argument specifies the cpus that are dedicated 2369 + * to handle ingress packets. 2370 + * 2371 + * The parameter should be in the form "network_cpus=m-n[,x-y]", where 2372 + * m, n, x, y are integer numbers that represent the cpus that can be 2373 + * neither a dedicated cpu nor a dataplane cpu. 2374 + */ 2375 + static int __init network_cpus_setup(char *str) 2376 + { 2377 + int rc = cpulist_parse_crop(str, &network_cpus_map); 2378 + if (rc != 0) { 2379 + pr_warning("network_cpus=%s: malformed cpu list\n", 2380 + str); 2381 + } else { 2382 + 2383 + /* Remove dedicated cpus. */ 2384 + cpumask_and(&network_cpus_map, &network_cpus_map, 2385 + cpu_possible_mask); 2386 + 2387 + 2388 + if (cpumask_empty(&network_cpus_map)) { 2389 + pr_warning("Ignoring network_cpus='%s'.\n", 2390 + str); 2391 + } else { 2392 + char buf[1024]; 2393 + cpulist_scnprintf(buf, sizeof(buf), &network_cpus_map); 2394 + pr_info("Linux network CPUs: %s\n", buf); 2395 + network_cpus_used = true; 2396 + } 2397 + } 2398 + 2399 + return 0; 2400 + } 2401 + __setup("network_cpus=", network_cpus_setup); 2402 + #endif 2403 + 2404 + 2405 + module_init(tile_net_init_module); 2406 + module_exit(tile_net_cleanup);