gve: Add transmit and receive support · tjh.dev/kernel@f5cedc8

+30

Documentation/networking/device_drivers/google/gve.rst

··· 42 42 - See description below 43 43 - Interrupts 44 44 - See supported interrupts below 45 + - Transmit and Receive Queues 46 + - See description below 45 47 46 48 Registers 47 49 --------- ··· 82 80 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 83 81 The notification block interrupts are used to tell the driver to poll 84 82 the queues associated with that interrupt. 83 + 84 + The handler for these irqs schedule the napi for that block to run 85 + and poll the queues. 86 + 87 + Traffic Queues 88 + -------------- 89 + gVNIC's queues are composed of a descriptor ring and a buffer and are 90 + assigned to a notification block. 91 + 92 + The descriptor rings are power-of-two-sized ring buffers consisting of 93 + fixed-size descriptors. They advance their head pointer using a __be32 94 + doorbell located in Bar2. The tail pointers are advanced by consuming 95 + descriptors in-order and updating a __be32 counter. Both the doorbell 96 + and the counter overflow to zero. 97 + 98 + Each queue's buffers must be registered in advance with the device as a 99 + queue page list, and packet data can only be put in those pages. 100 + 101 + Transmit 102 + ~~~~~~~~ 103 + gve maps the buffers for transmit rings into a FIFO and copies the packets 104 + into the FIFO before sending them to the NIC. 105 + 106 + Receive 107 + ~~~~~~~ 108 + The buffers for receive rings are put into a data ring that is the same 109 + length as the descriptor ring and the head and tail pointers advance over 110 + the rings together.

+1 -1

drivers/net/ethernet/google/gve/Makefile

··· 1 1 # Makefile for the Google virtual Ethernet (gve) driver 2 2 3 3 obj-$(CONFIG_GVE) += gve.o 4 - gve-objs := gve_main.o gve_adminq.o 4 + gve-objs := gve_main.o gve_tx.o gve_rx.o gve_adminq.o

+259 -1

drivers/net/ethernet/google/gve/gve.h

··· 10 10 #include <linux/dma-mapping.h> 11 11 #include <linux/netdevice.h> 12 12 #include <linux/pci.h> 13 + #include <linux/u64_stats_sync.h> 14 + #include "gve_desc.h" 13 15 14 16 #ifndef PCI_VENDOR_ID_GOOGLE 15 17 #define PCI_VENDOR_ID_GOOGLE 0x1ae0 ··· 22 20 #define GVE_REGISTER_BAR 0 23 21 #define GVE_DOORBELL_BAR 2 24 22 25 - /* 1 for management */ 23 + /* Driver can alloc up to 2 segments for the header and 2 for the payload. */ 24 + #define GVE_TX_MAX_IOVEC 4 25 + /* 1 for management, 1 for rx, 1 for tx */ 26 26 #define GVE_MIN_MSIX 3 27 27 28 + /* Each slot in the desc ring has a 1:1 mapping to a slot in the data ring */ 29 + struct gve_rx_desc_queue { 30 + struct gve_rx_desc *desc_ring; /* the descriptor ring */ 31 + dma_addr_t bus; /* the bus for the desc_ring */ 32 + u32 cnt; /* free-running total number of completed packets */ 33 + u32 fill_cnt; /* free-running total number of descriptors posted */ 34 + u32 mask; /* masks the cnt to the size of the ring */ 35 + u8 seqno; /* the next expected seqno for this desc*/ 36 + }; 37 + 38 + /* The page info for a single slot in the RX data queue */ 39 + struct gve_rx_slot_page_info { 40 + struct page *page; 41 + void *page_address; 42 + u32 page_offset; /* offset to write to in page */ 43 + }; 44 + 45 + /* A list of pages registered with the device during setup and used by a queue 46 + * as buffers 47 + */ 48 + struct gve_queue_page_list { 49 + u32 id; /* unique id */ 50 + u32 num_entries; 51 + struct page **pages; /* list of num_entries pages */ 52 + dma_addr_t *page_buses; /* the dma addrs of the pages */ 53 + }; 54 + 55 + /* Each slot in the data ring has a 1:1 mapping to a slot in the desc ring */ 56 + struct gve_rx_data_queue { 57 + struct gve_rx_data_slot *data_ring; /* read by NIC */ 58 + dma_addr_t data_bus; /* dma mapping of the slots */ 59 + struct gve_rx_slot_page_info *page_info; /* page info of the buffers */ 60 + struct gve_queue_page_list *qpl; /* qpl assigned to this queue */ 61 + u32 mask; /* masks the cnt to the size of the ring */ 62 + u32 cnt; /* free-running total number of completed packets */ 63 + }; 64 + 65 + struct gve_priv; 66 + 67 + /* An RX ring that contains a power-of-two sized desc and data ring. */ 68 + struct gve_rx_ring { 69 + struct gve_priv *gve; 70 + struct gve_rx_desc_queue desc; 71 + struct gve_rx_data_queue data; 72 + u64 rbytes; /* free-running bytes received */ 73 + u64 rpackets; /* free-running packets received */ 74 + u32 q_num; /* queue index */ 75 + u32 ntfy_id; /* notification block index */ 76 + struct gve_queue_resources *q_resources; /* head and tail pointer idx */ 77 + dma_addr_t q_resources_bus; /* dma address for the queue resources */ 78 + struct u64_stats_sync statss; /* sync stats for 32bit archs */ 79 + }; 80 + 81 + /* A TX desc ring entry */ 82 + union gve_tx_desc { 83 + struct gve_tx_pkt_desc pkt; /* first desc for a packet */ 84 + struct gve_tx_seg_desc seg; /* subsequent descs for a packet */ 85 + }; 86 + 87 + /* Tracks the memory in the fifo occupied by a segment of a packet */ 88 + struct gve_tx_iovec { 89 + u32 iov_offset; /* offset into this segment */ 90 + u32 iov_len; /* length */ 91 + u32 iov_padding; /* padding associated with this segment */ 92 + }; 93 + 94 + /* Tracks the memory in the fifo occupied by the skb. Mapped 1:1 to a desc 95 + * ring entry but only used for a pkt_desc not a seg_desc 96 + */ 97 + struct gve_tx_buffer_state { 98 + struct sk_buff *skb; /* skb for this pkt */ 99 + struct gve_tx_iovec iov[GVE_TX_MAX_IOVEC]; /* segments of this pkt */ 100 + }; 101 + 102 + /* A TX buffer - each queue has one */ 103 + struct gve_tx_fifo { 104 + void *base; /* address of base of FIFO */ 105 + u32 size; /* total size */ 106 + atomic_t available; /* how much space is still available */ 107 + u32 head; /* offset to write at */ 108 + struct gve_queue_page_list *qpl; /* QPL mapped into this FIFO */ 109 + }; 110 + 111 + /* A TX ring that contains a power-of-two sized desc ring and a FIFO buffer */ 112 + struct gve_tx_ring { 113 + /* Cacheline 0 -- Accessed & dirtied during transmit */ 114 + struct gve_tx_fifo tx_fifo; 115 + u32 req; /* driver tracked head pointer */ 116 + u32 done; /* driver tracked tail pointer */ 117 + 118 + /* Cacheline 1 -- Accessed & dirtied during gve_clean_tx_done */ 119 + __be32 last_nic_done ____cacheline_aligned; /* NIC tail pointer */ 120 + u64 pkt_done; /* free-running - total packets completed */ 121 + u64 bytes_done; /* free-running - total bytes completed */ 122 + 123 + /* Cacheline 2 -- Read-mostly fields */ 124 + union gve_tx_desc *desc ____cacheline_aligned; 125 + struct gve_tx_buffer_state *info; /* Maps 1:1 to a desc */ 126 + struct netdev_queue *netdev_txq; 127 + struct gve_queue_resources *q_resources; /* head and tail pointer idx */ 128 + u32 mask; /* masks req and done down to queue size */ 129 + 130 + /* Slow-path fields */ 131 + u32 q_num ____cacheline_aligned; /* queue idx */ 132 + u32 stop_queue; /* count of queue stops */ 133 + u32 wake_queue; /* count of queue wakes */ 134 + u32 ntfy_id; /* notification block index */ 135 + dma_addr_t bus; /* dma address of the descr ring */ 136 + dma_addr_t q_resources_bus; /* dma address of the queue resources */ 137 + struct u64_stats_sync statss; /* sync stats for 32bit archs */ 138 + } ____cacheline_aligned; 139 + 140 + /* Wraps the info for one irq including the napi struct and the queues 141 + * associated with that irq. 142 + */ 28 143 struct gve_notify_block { 29 144 __be32 irq_db_index; /* idx into Bar2 - set by device, must be 1st */ 30 145 char name[IFNAMSIZ + 16]; /* name registered with the kernel */ 31 146 struct napi_struct napi; /* kernel napi struct for this block */ 32 147 struct gve_priv *priv; 148 + struct gve_tx_ring *tx; /* tx rings on this block */ 149 + struct gve_rx_ring *rx; /* rx rings on this block */ 33 150 } ____cacheline_aligned; 151 + 152 + /* Tracks allowed and current queue settings */ 153 + struct gve_queue_config { 154 + u16 max_queues; 155 + u16 num_queues; /* current */ 156 + }; 157 + 158 + /* Tracks the available and used qpl IDs */ 159 + struct gve_qpl_config { 160 + u32 qpl_map_size; /* map memory size */ 161 + unsigned long *qpl_id_map; /* bitmap of used qpl ids */ 162 + }; 34 163 35 164 struct gve_priv { 36 165 struct net_device *dev; 166 + struct gve_tx_ring *tx; /* array of tx_cfg.num_queues */ 167 + struct gve_rx_ring *rx; /* array of rx_cfg.num_queues */ 168 + struct gve_queue_page_list *qpls; /* array of num qpls */ 37 169 struct gve_notify_block *ntfy_blocks; /* array of num_ntfy_blks */ 38 170 dma_addr_t ntfy_block_bus; 39 171 struct msix_entry *msix_vectors; /* array of num_ntfy_blks + 1 */ ··· 177 41 dma_addr_t counter_array_bus; 178 42 179 43 u16 num_event_counters; 44 + u16 tx_desc_cnt; /* num desc per ring */ 45 + u16 rx_desc_cnt; /* num desc per ring */ 46 + u16 tx_pages_per_qpl; /* tx buffer length */ 47 + u16 rx_pages_per_qpl; /* rx buffer length */ 48 + u64 max_registered_pages; 49 + u64 num_registered_pages; /* num pages registered with NIC */ 50 + u32 rx_copybreak; /* copy packets smaller than this */ 51 + u16 default_num_queues; /* default num queues to set up */ 180 52 53 + struct gve_queue_config tx_cfg; 54 + struct gve_queue_config rx_cfg; 55 + struct gve_qpl_config qpl_cfg; /* map used QPL ids */ 181 56 u32 num_ntfy_blks; /* spilt between TX and RX so must be even */ 182 57 183 58 struct gve_registers __iomem *reg_bar0; /* see gve_register.h */ 184 59 __be32 __iomem *db_bar2; /* "array" of doorbells */ 185 60 u32 msg_enable; /* level for netif* netdev print macros */ 186 61 struct pci_dev *pdev; 62 + 63 + /* metrics */ 64 + u32 tx_timeo_cnt; 187 65 188 66 /* Admin queue - see gve_adminq.h*/ 189 67 union gve_adminq_command *adminq; ··· 282 132 { 283 133 return &priv->db_bar2[be32_to_cpu(block->irq_db_index)]; 284 134 } 135 + 136 + /* Returns the index into ntfy_blocks of the given tx ring's block 137 + */ 138 + static inline u32 gve_tx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) 139 + { 140 + return queue_idx; 141 + } 142 + 143 + /* Returns the index into ntfy_blocks of the given rx ring's block 144 + */ 145 + static inline u32 gve_rx_idx_to_ntfy(struct gve_priv *priv, u32 queue_idx) 146 + { 147 + return (priv->num_ntfy_blks / 2) + queue_idx; 148 + } 149 + 150 + /* Returns the number of tx queue page lists 151 + */ 152 + static inline u32 gve_num_tx_qpls(struct gve_priv *priv) 153 + { 154 + return priv->tx_cfg.num_queues; 155 + } 156 + 157 + /* Returns the number of rx queue page lists 158 + */ 159 + static inline u32 gve_num_rx_qpls(struct gve_priv *priv) 160 + { 161 + return priv->rx_cfg.num_queues; 162 + } 163 + 164 + /* Returns a pointer to the next available tx qpl in the list of qpls 165 + */ 166 + static inline 167 + struct gve_queue_page_list *gve_assign_tx_qpl(struct gve_priv *priv) 168 + { 169 + int id = find_first_zero_bit(priv->qpl_cfg.qpl_id_map, 170 + priv->qpl_cfg.qpl_map_size); 171 + 172 + /* we are out of tx qpls */ 173 + if (id >= gve_num_tx_qpls(priv)) 174 + return NULL; 175 + 176 + set_bit(id, priv->qpl_cfg.qpl_id_map); 177 + return &priv->qpls[id]; 178 + } 179 + 180 + /* Returns a pointer to the next available rx qpl in the list of qpls 181 + */ 182 + static inline 183 + struct gve_queue_page_list *gve_assign_rx_qpl(struct gve_priv *priv) 184 + { 185 + int id = find_next_zero_bit(priv->qpl_cfg.qpl_id_map, 186 + priv->qpl_cfg.qpl_map_size, 187 + gve_num_tx_qpls(priv)); 188 + 189 + /* we are out of rx qpls */ 190 + if (id == priv->qpl_cfg.qpl_map_size) 191 + return NULL; 192 + 193 + set_bit(id, priv->qpl_cfg.qpl_id_map); 194 + return &priv->qpls[id]; 195 + } 196 + 197 + /* Unassigns the qpl with the given id 198 + */ 199 + static inline void gve_unassign_qpl(struct gve_priv *priv, int id) 200 + { 201 + clear_bit(id, priv->qpl_cfg.qpl_id_map); 202 + } 203 + 204 + /* Returns the correct dma direction for tx and rx qpls 205 + */ 206 + static inline enum dma_data_direction gve_qpl_dma_dir(struct gve_priv *priv, 207 + int id) 208 + { 209 + if (id < gve_num_tx_qpls(priv)) 210 + return DMA_TO_DEVICE; 211 + else 212 + return DMA_FROM_DEVICE; 213 + } 214 + 215 + /* Returns true if the max mtu allows page recycling */ 216 + static inline bool gve_can_recycle_pages(struct net_device *dev) 217 + { 218 + /* We can't recycle the pages if we can't fit a packet into half a 219 + * page. 220 + */ 221 + return dev->max_mtu <= PAGE_SIZE / 2; 222 + } 223 + 224 + /* buffers */ 225 + int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma, 226 + enum dma_data_direction); 227 + void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 228 + enum dma_data_direction); 229 + /* tx handling */ 230 + netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev); 231 + bool gve_tx_poll(struct gve_notify_block *block, int budget); 232 + int gve_tx_alloc_rings(struct gve_priv *priv); 233 + void gve_tx_free_rings(struct gve_priv *priv); 234 + __be32 gve_tx_load_event_counter(struct gve_priv *priv, 235 + struct gve_tx_ring *tx); 236 + /* rx handling */ 237 + void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx); 238 + bool gve_rx_poll(struct gve_notify_block *block, int budget); 239 + int gve_rx_alloc_rings(struct gve_priv *priv); 240 + void gve_rx_free_rings(struct gve_priv *priv); 241 + bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 242 + netdev_features_t feat); 285 243 #endif /* _GVE_H_ */

+138

drivers/net/ethernet/google/gve/gve_adminq.c

··· 190 190 return gve_adminq_execute_cmd(priv, &cmd); 191 191 } 192 192 193 + int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_index) 194 + { 195 + struct gve_tx_ring *tx = &priv->tx[queue_index]; 196 + union gve_adminq_command cmd; 197 + 198 + memset(&cmd, 0, sizeof(cmd)); 199 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_TX_QUEUE); 200 + cmd.create_tx_queue = (struct gve_adminq_create_tx_queue) { 201 + .queue_id = cpu_to_be32(queue_index), 202 + .reserved = 0, 203 + .queue_resources_addr = cpu_to_be64(tx->q_resources_bus), 204 + .tx_ring_addr = cpu_to_be64(tx->bus), 205 + .queue_page_list_id = cpu_to_be32(tx->tx_fifo.qpl->id), 206 + .ntfy_id = cpu_to_be32(tx->ntfy_id), 207 + }; 208 + 209 + return gve_adminq_execute_cmd(priv, &cmd); 210 + } 211 + 212 + int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_index) 213 + { 214 + struct gve_rx_ring *rx = &priv->rx[queue_index]; 215 + union gve_adminq_command cmd; 216 + 217 + memset(&cmd, 0, sizeof(cmd)); 218 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_CREATE_RX_QUEUE); 219 + cmd.create_rx_queue = (struct gve_adminq_create_rx_queue) { 220 + .queue_id = cpu_to_be32(queue_index), 221 + .index = cpu_to_be32(queue_index), 222 + .reserved = 0, 223 + .ntfy_id = cpu_to_be32(rx->ntfy_id), 224 + .queue_resources_addr = cpu_to_be64(rx->q_resources_bus), 225 + .rx_desc_ring_addr = cpu_to_be64(rx->desc.bus), 226 + .rx_data_ring_addr = cpu_to_be64(rx->data.data_bus), 227 + .queue_page_list_id = cpu_to_be32(rx->data.qpl->id), 228 + }; 229 + 230 + return gve_adminq_execute_cmd(priv, &cmd); 231 + } 232 + 233 + int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_index) 234 + { 235 + union gve_adminq_command cmd; 236 + 237 + memset(&cmd, 0, sizeof(cmd)); 238 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_TX_QUEUE); 239 + cmd.destroy_tx_queue = (struct gve_adminq_destroy_tx_queue) { 240 + .queue_id = cpu_to_be32(queue_index), 241 + }; 242 + 243 + return gve_adminq_execute_cmd(priv, &cmd); 244 + } 245 + 246 + int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_index) 247 + { 248 + union gve_adminq_command cmd; 249 + 250 + memset(&cmd, 0, sizeof(cmd)); 251 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_DESTROY_RX_QUEUE); 252 + cmd.destroy_rx_queue = (struct gve_adminq_destroy_rx_queue) { 253 + .queue_id = cpu_to_be32(queue_index), 254 + }; 255 + 256 + return gve_adminq_execute_cmd(priv, &cmd); 257 + } 258 + 193 259 int gve_adminq_describe_device(struct gve_priv *priv) 194 260 { 195 261 struct gve_device_descriptor *descriptor; ··· 281 215 if (err) 282 216 goto free_device_descriptor; 283 217 218 + priv->tx_desc_cnt = be16_to_cpu(descriptor->tx_queue_entries); 219 + if (priv->tx_desc_cnt * sizeof(priv->tx->desc[0]) < PAGE_SIZE) { 220 + netif_err(priv, drv, priv->dev, "Tx desc count %d too low\n", 221 + priv->tx_desc_cnt); 222 + err = -EINVAL; 223 + goto free_device_descriptor; 224 + } 225 + priv->rx_desc_cnt = be16_to_cpu(descriptor->rx_queue_entries); 226 + if (priv->rx_desc_cnt * sizeof(priv->rx->desc.desc_ring[0]) 227 + < PAGE_SIZE || 228 + priv->rx_desc_cnt * sizeof(priv->rx->data.data_ring[0]) 229 + < PAGE_SIZE) { 230 + netif_err(priv, drv, priv->dev, "Rx desc count %d too low\n", 231 + priv->rx_desc_cnt); 232 + err = -EINVAL; 233 + goto free_device_descriptor; 234 + } 235 + priv->max_registered_pages = 236 + be64_to_cpu(descriptor->max_registered_pages); 284 237 mtu = be16_to_cpu(descriptor->mtu); 285 238 if (mtu < ETH_MIN_MTU) { 286 239 netif_err(priv, drv, priv->dev, "MTU %d below minimum MTU\n", ··· 312 227 ether_addr_copy(priv->dev->dev_addr, descriptor->mac); 313 228 mac = descriptor->mac; 314 229 netif_info(priv, drv, priv->dev, "MAC addr: %pM\n", mac); 230 + priv->tx_pages_per_qpl = be16_to_cpu(descriptor->tx_pages_per_qpl); 231 + priv->rx_pages_per_qpl = be16_to_cpu(descriptor->rx_pages_per_qpl); 232 + if (priv->rx_pages_per_qpl < priv->rx_desc_cnt) { 233 + netif_err(priv, drv, priv->dev, "rx_pages_per_qpl cannot be smaller than rx_desc_cnt, setting rx_desc_cnt down to %d.\n", 234 + priv->rx_pages_per_qpl); 235 + priv->rx_desc_cnt = priv->rx_pages_per_qpl; 236 + } 237 + priv->default_num_queues = be16_to_cpu(descriptor->default_num_queues); 315 238 316 239 free_device_descriptor: 317 240 dma_free_coherent(&priv->pdev->dev, sizeof(*descriptor), descriptor, 318 241 descriptor_bus); 319 242 return err; 243 + } 244 + 245 + int gve_adminq_register_page_list(struct gve_priv *priv, 246 + struct gve_queue_page_list *qpl) 247 + { 248 + struct device *hdev = &priv->pdev->dev; 249 + u32 num_entries = qpl->num_entries; 250 + u32 size = num_entries * sizeof(qpl->page_buses[0]); 251 + union gve_adminq_command cmd; 252 + dma_addr_t page_list_bus; 253 + __be64 *page_list; 254 + int err; 255 + int i; 256 + 257 + memset(&cmd, 0, sizeof(cmd)); 258 + page_list = dma_alloc_coherent(hdev, size, &page_list_bus, GFP_KERNEL); 259 + if (!page_list) 260 + return -ENOMEM; 261 + 262 + for (i = 0; i < num_entries; i++) 263 + page_list[i] = cpu_to_be64(qpl->page_buses[i]); 264 + 265 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_REGISTER_PAGE_LIST); 266 + cmd.reg_page_list = (struct gve_adminq_register_page_list) { 267 + .page_list_id = cpu_to_be32(qpl->id), 268 + .num_pages = cpu_to_be32(num_entries), 269 + .page_address_list_addr = cpu_to_be64(page_list_bus), 270 + }; 271 + 272 + err = gve_adminq_execute_cmd(priv, &cmd); 273 + dma_free_coherent(hdev, size, page_list, page_list_bus); 274 + return err; 275 + } 276 + 277 + int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id) 278 + { 279 + union gve_adminq_command cmd; 280 + 281 + memset(&cmd, 0, sizeof(cmd)); 282 + cmd.opcode = cpu_to_be32(GVE_ADMINQ_UNREGISTER_PAGE_LIST); 283 + cmd.unreg_page_list = (struct gve_adminq_unregister_page_list) { 284 + .page_list_id = cpu_to_be32(page_list_id), 285 + }; 286 + 287 + return gve_adminq_execute_cmd(priv, &cmd); 320 288 } 321 289 322 290 int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu)

+83

drivers/net/ethernet/google/gve/gve_adminq.h

··· 13 13 enum gve_adminq_opcodes { 14 14 GVE_ADMINQ_DESCRIBE_DEVICE = 0x1, 15 15 GVE_ADMINQ_CONFIGURE_DEVICE_RESOURCES = 0x2, 16 + GVE_ADMINQ_REGISTER_PAGE_LIST = 0x3, 17 + GVE_ADMINQ_UNREGISTER_PAGE_LIST = 0x4, 18 + GVE_ADMINQ_CREATE_TX_QUEUE = 0x5, 19 + GVE_ADMINQ_CREATE_RX_QUEUE = 0x6, 20 + GVE_ADMINQ_DESTROY_TX_QUEUE = 0x7, 21 + GVE_ADMINQ_DESTROY_RX_QUEUE = 0x8, 16 22 GVE_ADMINQ_DECONFIGURE_DEVICE_RESOURCES = 0x9, 17 23 GVE_ADMINQ_SET_DRIVER_PARAMETER = 0xB, 18 24 }; ··· 95 89 96 90 static_assert(sizeof(struct gve_adminq_configure_device_resources) == 32); 97 91 92 + struct gve_adminq_register_page_list { 93 + __be32 page_list_id; 94 + __be32 num_pages; 95 + __be64 page_address_list_addr; 96 + }; 97 + 98 + static_assert(sizeof(struct gve_adminq_register_page_list) == 16); 99 + 100 + struct gve_adminq_unregister_page_list { 101 + __be32 page_list_id; 102 + }; 103 + 104 + static_assert(sizeof(struct gve_adminq_unregister_page_list) == 4); 105 + 106 + struct gve_adminq_create_tx_queue { 107 + __be32 queue_id; 108 + __be32 reserved; 109 + __be64 queue_resources_addr; 110 + __be64 tx_ring_addr; 111 + __be32 queue_page_list_id; 112 + __be32 ntfy_id; 113 + }; 114 + 115 + static_assert(sizeof(struct gve_adminq_create_tx_queue) == 32); 116 + 117 + struct gve_adminq_create_rx_queue { 118 + __be32 queue_id; 119 + __be32 index; 120 + __be32 reserved; 121 + __be32 ntfy_id; 122 + __be64 queue_resources_addr; 123 + __be64 rx_desc_ring_addr; 124 + __be64 rx_data_ring_addr; 125 + __be32 queue_page_list_id; 126 + u8 padding[4]; 127 + }; 128 + 129 + static_assert(sizeof(struct gve_adminq_create_rx_queue) == 48); 130 + 131 + /* Queue resources that are shared with the device */ 132 + struct gve_queue_resources { 133 + union { 134 + struct { 135 + __be32 db_index; /* Device -> Guest */ 136 + __be32 counter_index; /* Device -> Guest */ 137 + }; 138 + u8 reserved[64]; 139 + }; 140 + }; 141 + 142 + static_assert(sizeof(struct gve_queue_resources) == 64); 143 + 144 + struct gve_adminq_destroy_tx_queue { 145 + __be32 queue_id; 146 + }; 147 + 148 + static_assert(sizeof(struct gve_adminq_destroy_tx_queue) == 4); 149 + 150 + struct gve_adminq_destroy_rx_queue { 151 + __be32 queue_id; 152 + }; 153 + 154 + static_assert(sizeof(struct gve_adminq_destroy_rx_queue) == 4); 155 + 98 156 /* GVE Set Driver Parameter Types */ 99 157 enum gve_set_driver_param_types { 100 158 GVE_SET_PARAM_MTU = 0x1, ··· 179 109 union { 180 110 struct gve_adminq_configure_device_resources 181 111 configure_device_resources; 112 + struct gve_adminq_create_tx_queue create_tx_queue; 113 + struct gve_adminq_create_rx_queue create_rx_queue; 114 + struct gve_adminq_destroy_tx_queue destroy_tx_queue; 115 + struct gve_adminq_destroy_rx_queue destroy_rx_queue; 182 116 struct gve_adminq_describe_device describe_device; 117 + struct gve_adminq_register_page_list reg_page_list; 118 + struct gve_adminq_unregister_page_list unreg_page_list; 183 119 struct gve_adminq_set_driver_parameter set_driver_param; 184 120 }; 185 121 }; ··· 206 130 dma_addr_t db_array_bus_addr, 207 131 u32 num_ntfy_blks); 208 132 int gve_adminq_deconfigure_device_resources(struct gve_priv *priv); 133 + int gve_adminq_create_tx_queue(struct gve_priv *priv, u32 queue_id); 134 + int gve_adminq_destroy_tx_queue(struct gve_priv *priv, u32 queue_id); 135 + int gve_adminq_create_rx_queue(struct gve_priv *priv, u32 queue_id); 136 + int gve_adminq_destroy_rx_queue(struct gve_priv *priv, u32 queue_id); 137 + int gve_adminq_register_page_list(struct gve_priv *priv, 138 + struct gve_queue_page_list *qpl); 139 + int gve_adminq_unregister_page_list(struct gve_priv *priv, u32 page_list_id); 209 140 int gve_adminq_set_mtu(struct gve_priv *priv, u64 mtu); 210 141 #endif /* _GVE_ADMINQ_H */

+113

drivers/net/ethernet/google/gve/gve_desc.h

··· 1 + /* SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 + * Google virtual Ethernet (gve) driver 3 + * 4 + * Copyright (C) 2015-2019 Google, Inc. 5 + */ 6 + 7 + /* GVE Transmit Descriptor formats */ 8 + 9 + #ifndef _GVE_DESC_H_ 10 + #define _GVE_DESC_H_ 11 + 12 + #include <linux/build_bug.h> 13 + 14 + /* A note on seg_addrs 15 + * 16 + * Base addresses encoded in seg_addr are not assumed to be physical 17 + * addresses. The ring format assumes these come from some linear address 18 + * space. This could be physical memory, kernel virtual memory, user virtual 19 + * memory. gVNIC uses lists of registered pages. Each queue is assumed 20 + * to be associated with a single such linear address space to ensure a 21 + * consistent meaning for seg_addrs posted to its rings. 22 + */ 23 + 24 + struct gve_tx_pkt_desc { 25 + u8 type_flags; /* desc type is lower 4 bits, flags upper */ 26 + u8 l4_csum_offset; /* relative offset of L4 csum word */ 27 + u8 l4_hdr_offset; /* Offset of start of L4 headers in packet */ 28 + u8 desc_cnt; /* Total descriptors for this packet */ 29 + __be16 len; /* Total length of this packet (in bytes) */ 30 + __be16 seg_len; /* Length of this descriptor's segment */ 31 + __be64 seg_addr; /* Base address (see note) of this segment */ 32 + } __packed; 33 + 34 + struct gve_tx_seg_desc { 35 + u8 type_flags; /* type is lower 4 bits, flags upper */ 36 + u8 l3_offset; /* TSO: 2 byte units to start of IPH */ 37 + __be16 reserved; 38 + __be16 mss; /* TSO MSS */ 39 + __be16 seg_len; 40 + __be64 seg_addr; 41 + } __packed; 42 + 43 + /* GVE Transmit Descriptor Types */ 44 + #define GVE_TXD_STD (0x0 << 4) /* Std with Host Address */ 45 + #define GVE_TXD_TSO (0x1 << 4) /* TSO with Host Address */ 46 + #define GVE_TXD_SEG (0x2 << 4) /* Seg with Host Address */ 47 + 48 + /* GVE Transmit Descriptor Flags for Std Pkts */ 49 + #define GVE_TXF_L4CSUM BIT(0) /* Need csum offload */ 50 + #define GVE_TXF_TSTAMP BIT(2) /* Timestamp required */ 51 + 52 + /* GVE Transmit Descriptor Flags for TSO Segs */ 53 + #define GVE_TXSF_IPV6 BIT(1) /* IPv6 TSO */ 54 + 55 + /* GVE Receive Packet Descriptor */ 56 + /* The start of an ethernet packet comes 2 bytes into the rx buffer. 57 + * gVNIC adds this padding so that both the DMA and the L3/4 protocol header 58 + * access is aligned. 59 + */ 60 + #define GVE_RX_PAD 2 61 + 62 + struct gve_rx_desc { 63 + u8 padding[48]; 64 + __be32 rss_hash; /* Receive-side scaling hash (Toeplitz for gVNIC) */ 65 + __be16 mss; 66 + __be16 reserved; /* Reserved to zero */ 67 + u8 hdr_len; /* Header length (L2-L4) including padding */ 68 + u8 hdr_off; /* 64-byte-scaled offset into RX_DATA entry */ 69 + __sum16 csum; /* 1's-complement partial checksum of L3+ bytes */ 70 + __be16 len; /* Length of the received packet */ 71 + __be16 flags_seq; /* Flags [15:3] and sequence number [2:0] (1-7) */ 72 + } __packed; 73 + static_assert(sizeof(struct gve_rx_desc) == 64); 74 + 75 + /* As with the Tx ring format, the qpl_offset entries below are offsets into an 76 + * ordered list of registered pages. 77 + */ 78 + struct gve_rx_data_slot { 79 + /* byte offset into the rx registered segment of this slot */ 80 + __be64 qpl_offset; 81 + }; 82 + 83 + /* GVE Recive Packet Descriptor Seq No */ 84 + #define GVE_SEQNO(x) (be16_to_cpu(x) & 0x7) 85 + 86 + /* GVE Recive Packet Descriptor Flags */ 87 + #define GVE_RXFLG(x) cpu_to_be16(1 << (3 + (x))) 88 + #define GVE_RXF_FRAG GVE_RXFLG(3) /* IP Fragment */ 89 + #define GVE_RXF_IPV4 GVE_RXFLG(4) /* IPv4 */ 90 + #define GVE_RXF_IPV6 GVE_RXFLG(5) /* IPv6 */ 91 + #define GVE_RXF_TCP GVE_RXFLG(6) /* TCP Packet */ 92 + #define GVE_RXF_UDP GVE_RXFLG(7) /* UDP Packet */ 93 + #define GVE_RXF_ERR GVE_RXFLG(8) /* Packet Error Detected */ 94 + 95 + /* GVE IRQ */ 96 + #define GVE_IRQ_ACK BIT(31) 97 + #define GVE_IRQ_MASK BIT(30) 98 + #define GVE_IRQ_EVENT BIT(29) 99 + 100 + static inline bool gve_needs_rss(__be16 flag) 101 + { 102 + if (flag & GVE_RXF_FRAG) 103 + return false; 104 + if (flag & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 105 + return true; 106 + return false; 107 + } 108 + 109 + static inline u8 gve_next_seqno(u8 seq) 110 + { 111 + return (seq + 1) == 8 ? 1 : seq + 1; 112 + } 113 + #endif /* _GVE_DESC_H_ */

+570 -3

drivers/net/ethernet/google/gve/gve_main.c

··· 16 16 #include "gve_adminq.h" 17 17 #include "gve_register.h" 18 18 19 + #define GVE_DEFAULT_RX_COPYBREAK (256) 20 + 19 21 #define DEFAULT_MSG_LEVEL (NETIF_MSG_DRV | NETIF_MSG_LINK) 20 22 #define GVE_VERSION "1.0.0" 21 23 #define GVE_VERSION_PREFIX "GVE-" 22 24 23 25 static const char gve_version_str[] = GVE_VERSION; 24 26 static const char gve_version_prefix[] = GVE_VERSION_PREFIX; 27 + 28 + static void gve_get_stats(struct net_device *dev, struct rtnl_link_stats64 *s) 29 + { 30 + struct gve_priv *priv = netdev_priv(dev); 31 + unsigned int start; 32 + int ring; 33 + 34 + if (priv->rx) { 35 + for (ring = 0; ring < priv->rx_cfg.num_queues; ring++) { 36 + do { 37 + u64_stats_fetch_begin(&priv->rx[ring].statss); 38 + s->rx_packets += priv->rx[ring].rpackets; 39 + s->rx_bytes += priv->rx[ring].rbytes; 40 + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 41 + start)); 42 + } 43 + } 44 + if (priv->tx) { 45 + for (ring = 0; ring < priv->tx_cfg.num_queues; ring++) { 46 + do { 47 + u64_stats_fetch_begin(&priv->tx[ring].statss); 48 + s->tx_packets += priv->tx[ring].pkt_done; 49 + s->tx_bytes += priv->tx[ring].bytes_done; 50 + } while (u64_stats_fetch_retry(&priv->rx[ring].statss, 51 + start)); 52 + } 53 + } 54 + } 25 55 26 56 static int gve_alloc_counter_array(struct gve_priv *priv) 27 57 { ··· 82 52 83 53 static irqreturn_t gve_intr(int irq, void *arg) 84 54 { 55 + struct gve_notify_block *block = arg; 56 + struct gve_priv *priv = block->priv; 57 + 58 + iowrite32be(GVE_IRQ_MASK, gve_irq_doorbell(priv, block)); 59 + napi_schedule_irqoff(&block->napi); 85 60 return IRQ_HANDLED; 61 + } 62 + 63 + static int gve_napi_poll(struct napi_struct *napi, int budget) 64 + { 65 + struct gve_notify_block *block; 66 + __be32 __iomem *irq_doorbell; 67 + bool reschedule = false; 68 + struct gve_priv *priv; 69 + 70 + block = container_of(napi, struct gve_notify_block, napi); 71 + priv = block->priv; 72 + 73 + if (block->tx) 74 + reschedule |= gve_tx_poll(block, budget); 75 + if (block->rx) 76 + reschedule |= gve_rx_poll(block, budget); 77 + 78 + if (reschedule) 79 + return budget; 80 + 81 + napi_complete(napi); 82 + irq_doorbell = gve_irq_doorbell(priv, block); 83 + iowrite32be(GVE_IRQ_ACK | GVE_IRQ_EVENT, irq_doorbell); 84 + 85 + /* Double check we have no extra work. 86 + * Ensure unmask synchronizes with checking for work. 87 + */ 88 + dma_rmb(); 89 + if (block->tx) 90 + reschedule |= gve_tx_poll(block, -1); 91 + if (block->rx) 92 + reschedule |= gve_rx_poll(block, -1); 93 + if (reschedule && napi_reschedule(napi)) 94 + iowrite32be(GVE_IRQ_MASK, irq_doorbell); 95 + 96 + return 0; 86 97 } 87 98 88 99 static int gve_alloc_notify_blocks(struct gve_priv *priv) ··· 150 79 goto abort_with_msix_vectors; 151 80 } 152 81 if (vecs_enabled != num_vecs_requested) { 153 - priv->num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 82 + int new_num_ntfy_blks = (vecs_enabled - 1) & ~0x1; 83 + int vecs_per_type = new_num_ntfy_blks / 2; 84 + int vecs_left = new_num_ntfy_blks % 2; 85 + 86 + priv->num_ntfy_blks = new_num_ntfy_blks; 87 + priv->tx_cfg.max_queues = min_t(int, priv->tx_cfg.max_queues, 88 + vecs_per_type); 89 + priv->rx_cfg.max_queues = min_t(int, priv->rx_cfg.max_queues, 90 + vecs_per_type + vecs_left); 154 91 dev_err(&priv->pdev->dev, 155 - "Only received %d msix. Lowering number of notification blocks to %d\n", 156 - vecs_enabled, priv->num_ntfy_blks); 92 + "Could not enable desired msix, only enabled %d, adjusting tx max queues to %d, and rx max queues to %d\n", 93 + vecs_enabled, priv->tx_cfg.max_queues, 94 + priv->rx_cfg.max_queues); 95 + if (priv->tx_cfg.num_queues > priv->tx_cfg.max_queues) 96 + priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 97 + if (priv->rx_cfg.num_queues > priv->rx_cfg.max_queues) 98 + priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 157 99 } 158 100 /* Half the notification blocks go to TX and half to RX */ 159 101 active_cpus = min_t(int, priv->num_ntfy_blks / 2, num_online_cpus()); ··· 303 219 gve_clear_device_resources_ok(priv); 304 220 } 305 221 222 + static void gve_add_napi(struct gve_priv *priv, int ntfy_idx) 223 + { 224 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 225 + 226 + netif_napi_add(priv->dev, &block->napi, gve_napi_poll, 227 + NAPI_POLL_WEIGHT); 228 + } 229 + 230 + static void gve_remove_napi(struct gve_priv *priv, int ntfy_idx) 231 + { 232 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 233 + 234 + netif_napi_del(&block->napi); 235 + } 236 + 237 + static int gve_register_qpls(struct gve_priv *priv) 238 + { 239 + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 240 + int err; 241 + int i; 242 + 243 + for (i = 0; i < num_qpls; i++) { 244 + err = gve_adminq_register_page_list(priv, &priv->qpls[i]); 245 + if (err) { 246 + netif_err(priv, drv, priv->dev, 247 + "failed to register queue page list %d\n", 248 + priv->qpls[i].id); 249 + return err; 250 + } 251 + } 252 + return 0; 253 + } 254 + 255 + static int gve_unregister_qpls(struct gve_priv *priv) 256 + { 257 + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 258 + int err; 259 + int i; 260 + 261 + for (i = 0; i < num_qpls; i++) { 262 + err = gve_adminq_unregister_page_list(priv, priv->qpls[i].id); 263 + if (err) { 264 + netif_err(priv, drv, priv->dev, 265 + "Failed to unregister queue page list %d\n", 266 + priv->qpls[i].id); 267 + return err; 268 + } 269 + } 270 + return 0; 271 + } 272 + 273 + static int gve_create_rings(struct gve_priv *priv) 274 + { 275 + int err; 276 + int i; 277 + 278 + for (i = 0; i < priv->tx_cfg.num_queues; i++) { 279 + err = gve_adminq_create_tx_queue(priv, i); 280 + if (err) { 281 + netif_err(priv, drv, priv->dev, "failed to create tx queue %d\n", 282 + i); 283 + return err; 284 + } 285 + netif_dbg(priv, drv, priv->dev, "created tx queue %d\n", i); 286 + } 287 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 288 + err = gve_adminq_create_rx_queue(priv, i); 289 + if (err) { 290 + netif_err(priv, drv, priv->dev, "failed to create rx queue %d\n", 291 + i); 292 + return err; 293 + } 294 + /* Rx data ring has been prefilled with packet buffers at 295 + * queue allocation time. 296 + * Write the doorbell to provide descriptor slots and packet 297 + * buffers to the NIC. 298 + */ 299 + gve_rx_write_doorbell(priv, &priv->rx[i]); 300 + netif_dbg(priv, drv, priv->dev, "created rx queue %d\n", i); 301 + } 302 + 303 + return 0; 304 + } 305 + 306 + static int gve_alloc_rings(struct gve_priv *priv) 307 + { 308 + int ntfy_idx; 309 + int err; 310 + int i; 311 + 312 + /* Setup tx rings */ 313 + priv->tx = kvzalloc(priv->tx_cfg.num_queues * sizeof(*priv->tx), 314 + GFP_KERNEL); 315 + if (!priv->tx) 316 + return -ENOMEM; 317 + err = gve_tx_alloc_rings(priv); 318 + if (err) 319 + goto free_tx; 320 + /* Setup rx rings */ 321 + priv->rx = kvzalloc(priv->rx_cfg.num_queues * sizeof(*priv->rx), 322 + GFP_KERNEL); 323 + if (!priv->rx) { 324 + err = -ENOMEM; 325 + goto free_tx_queue; 326 + } 327 + err = gve_rx_alloc_rings(priv); 328 + if (err) 329 + goto free_rx; 330 + /* Add tx napi & init sync stats*/ 331 + for (i = 0; i < priv->tx_cfg.num_queues; i++) { 332 + u64_stats_init(&priv->tx[i].statss); 333 + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 334 + gve_add_napi(priv, ntfy_idx); 335 + } 336 + /* Add rx napi & init sync stats*/ 337 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 338 + u64_stats_init(&priv->rx[i].statss); 339 + ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 340 + gve_add_napi(priv, ntfy_idx); 341 + } 342 + 343 + return 0; 344 + 345 + free_rx: 346 + kfree(priv->rx); 347 + priv->rx = NULL; 348 + free_tx_queue: 349 + gve_tx_free_rings(priv); 350 + free_tx: 351 + kfree(priv->tx); 352 + priv->tx = NULL; 353 + return err; 354 + } 355 + 356 + static int gve_destroy_rings(struct gve_priv *priv) 357 + { 358 + int err; 359 + int i; 360 + 361 + for (i = 0; i < priv->tx_cfg.num_queues; i++) { 362 + err = gve_adminq_destroy_tx_queue(priv, i); 363 + if (err) { 364 + netif_err(priv, drv, priv->dev, 365 + "failed to destroy tx queue %d\n", 366 + i); 367 + return err; 368 + } 369 + netif_dbg(priv, drv, priv->dev, "destroyed tx queue %d\n", i); 370 + } 371 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 372 + err = gve_adminq_destroy_rx_queue(priv, i); 373 + if (err) { 374 + netif_err(priv, drv, priv->dev, 375 + "failed to destroy rx queue %d\n", 376 + i); 377 + return err; 378 + } 379 + netif_dbg(priv, drv, priv->dev, "destroyed rx queue %d\n", i); 380 + } 381 + return 0; 382 + } 383 + 384 + static void gve_free_rings(struct gve_priv *priv) 385 + { 386 + int ntfy_idx; 387 + int i; 388 + 389 + if (priv->tx) { 390 + for (i = 0; i < priv->tx_cfg.num_queues; i++) { 391 + ntfy_idx = gve_tx_idx_to_ntfy(priv, i); 392 + gve_remove_napi(priv, ntfy_idx); 393 + } 394 + gve_tx_free_rings(priv); 395 + kfree(priv->tx); 396 + priv->tx = NULL; 397 + } 398 + if (priv->rx) { 399 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 400 + ntfy_idx = gve_rx_idx_to_ntfy(priv, i); 401 + gve_remove_napi(priv, ntfy_idx); 402 + } 403 + gve_rx_free_rings(priv); 404 + kfree(priv->rx); 405 + priv->rx = NULL; 406 + } 407 + } 408 + 409 + int gve_alloc_page(struct device *dev, struct page **page, dma_addr_t *dma, 410 + enum dma_data_direction dir) 411 + { 412 + *page = alloc_page(GFP_KERNEL); 413 + if (!page) 414 + return -ENOMEM; 415 + *dma = dma_map_page(dev, *page, 0, PAGE_SIZE, dir); 416 + if (dma_mapping_error(dev, *dma)) { 417 + put_page(*page); 418 + return -ENOMEM; 419 + } 420 + return 0; 421 + } 422 + 423 + static int gve_alloc_queue_page_list(struct gve_priv *priv, u32 id, 424 + int pages) 425 + { 426 + struct gve_queue_page_list *qpl = &priv->qpls[id]; 427 + int err; 428 + int i; 429 + 430 + if (pages + priv->num_registered_pages > priv->max_registered_pages) { 431 + netif_err(priv, drv, priv->dev, 432 + "Reached max number of registered pages %llu > %llu\n", 433 + pages + priv->num_registered_pages, 434 + priv->max_registered_pages); 435 + return -EINVAL; 436 + } 437 + 438 + qpl->id = id; 439 + qpl->num_entries = pages; 440 + qpl->pages = kvzalloc(pages * sizeof(*qpl->pages), GFP_KERNEL); 441 + /* caller handles clean up */ 442 + if (!qpl->pages) 443 + return -ENOMEM; 444 + qpl->page_buses = kvzalloc(pages * sizeof(*qpl->page_buses), 445 + GFP_KERNEL); 446 + /* caller handles clean up */ 447 + if (!qpl->page_buses) 448 + return -ENOMEM; 449 + 450 + for (i = 0; i < pages; i++) { 451 + err = gve_alloc_page(&priv->pdev->dev, &qpl->pages[i], 452 + &qpl->page_buses[i], 453 + gve_qpl_dma_dir(priv, id)); 454 + /* caller handles clean up */ 455 + if (err) 456 + return -ENOMEM; 457 + } 458 + priv->num_registered_pages += pages; 459 + 460 + return 0; 461 + } 462 + 463 + void gve_free_page(struct device *dev, struct page *page, dma_addr_t dma, 464 + enum dma_data_direction dir) 465 + { 466 + if (!dma_mapping_error(dev, dma)) 467 + dma_unmap_page(dev, dma, PAGE_SIZE, dir); 468 + if (page) 469 + put_page(page); 470 + } 471 + 472 + static void gve_free_queue_page_list(struct gve_priv *priv, 473 + int id) 474 + { 475 + struct gve_queue_page_list *qpl = &priv->qpls[id]; 476 + int i; 477 + 478 + if (!qpl->pages) 479 + return; 480 + if (!qpl->page_buses) 481 + goto free_pages; 482 + 483 + for (i = 0; i < qpl->num_entries; i++) 484 + gve_free_page(&priv->pdev->dev, qpl->pages[i], 485 + qpl->page_buses[i], gve_qpl_dma_dir(priv, id)); 486 + 487 + kfree(qpl->page_buses); 488 + free_pages: 489 + kfree(qpl->pages); 490 + priv->num_registered_pages -= qpl->num_entries; 491 + } 492 + 493 + static int gve_alloc_qpls(struct gve_priv *priv) 494 + { 495 + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 496 + int i, j; 497 + int err; 498 + 499 + priv->qpls = kvzalloc(num_qpls * sizeof(*priv->qpls), GFP_KERNEL); 500 + if (!priv->qpls) 501 + return -ENOMEM; 502 + 503 + for (i = 0; i < gve_num_tx_qpls(priv); i++) { 504 + err = gve_alloc_queue_page_list(priv, i, 505 + priv->tx_pages_per_qpl); 506 + if (err) 507 + goto free_qpls; 508 + } 509 + for (; i < num_qpls; i++) { 510 + err = gve_alloc_queue_page_list(priv, i, 511 + priv->rx_pages_per_qpl); 512 + if (err) 513 + goto free_qpls; 514 + } 515 + 516 + priv->qpl_cfg.qpl_map_size = BITS_TO_LONGS(num_qpls) * 517 + sizeof(unsigned long) * BITS_PER_BYTE; 518 + priv->qpl_cfg.qpl_id_map = kvzalloc(BITS_TO_LONGS(num_qpls) * 519 + sizeof(unsigned long), GFP_KERNEL); 520 + if (!priv->qpl_cfg.qpl_id_map) 521 + goto free_qpls; 522 + 523 + return 0; 524 + 525 + free_qpls: 526 + for (j = 0; j <= i; j++) 527 + gve_free_queue_page_list(priv, j); 528 + kfree(priv->qpls); 529 + return err; 530 + } 531 + 532 + static void gve_free_qpls(struct gve_priv *priv) 533 + { 534 + int num_qpls = gve_num_tx_qpls(priv) + gve_num_rx_qpls(priv); 535 + int i; 536 + 537 + kfree(priv->qpl_cfg.qpl_id_map); 538 + 539 + for (i = 0; i < num_qpls; i++) 540 + gve_free_queue_page_list(priv, i); 541 + 542 + kfree(priv->qpls); 543 + } 544 + 545 + static void gve_turndown(struct gve_priv *priv); 546 + static void gve_turnup(struct gve_priv *priv); 547 + 548 + static int gve_open(struct net_device *dev) 549 + { 550 + struct gve_priv *priv = netdev_priv(dev); 551 + int err; 552 + 553 + err = gve_alloc_qpls(priv); 554 + if (err) 555 + return err; 556 + err = gve_alloc_rings(priv); 557 + if (err) 558 + goto free_qpls; 559 + 560 + err = netif_set_real_num_tx_queues(dev, priv->tx_cfg.num_queues); 561 + if (err) 562 + goto free_rings; 563 + err = netif_set_real_num_rx_queues(dev, priv->rx_cfg.num_queues); 564 + if (err) 565 + goto free_rings; 566 + 567 + err = gve_register_qpls(priv); 568 + if (err) 569 + return err; 570 + err = gve_create_rings(priv); 571 + if (err) 572 + return err; 573 + gve_set_device_rings_ok(priv); 574 + 575 + gve_turnup(priv); 576 + netif_carrier_on(dev); 577 + return 0; 578 + 579 + free_rings: 580 + gve_free_rings(priv); 581 + free_qpls: 582 + gve_free_qpls(priv); 583 + return err; 584 + } 585 + 586 + static int gve_close(struct net_device *dev) 587 + { 588 + struct gve_priv *priv = netdev_priv(dev); 589 + int err; 590 + 591 + netif_carrier_off(dev); 592 + if (gve_get_device_rings_ok(priv)) { 593 + gve_turndown(priv); 594 + err = gve_destroy_rings(priv); 595 + if (err) 596 + return err; 597 + err = gve_unregister_qpls(priv); 598 + if (err) 599 + return err; 600 + gve_clear_device_rings_ok(priv); 601 + } 602 + 603 + gve_free_rings(priv); 604 + gve_free_qpls(priv); 605 + return 0; 606 + } 607 + 608 + static void gve_turndown(struct gve_priv *priv) 609 + { 610 + int idx; 611 + 612 + if (netif_carrier_ok(priv->dev)) 613 + netif_carrier_off(priv->dev); 614 + 615 + if (!gve_get_napi_enabled(priv)) 616 + return; 617 + 618 + /* Disable napi to prevent more work from coming in */ 619 + for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 620 + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 621 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 622 + 623 + napi_disable(&block->napi); 624 + } 625 + for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 626 + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 627 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 628 + 629 + napi_disable(&block->napi); 630 + } 631 + 632 + /* Stop tx queues */ 633 + netif_tx_disable(priv->dev); 634 + 635 + gve_clear_napi_enabled(priv); 636 + } 637 + 638 + static void gve_turnup(struct gve_priv *priv) 639 + { 640 + int idx; 641 + 642 + /* Start the tx queues */ 643 + netif_tx_start_all_queues(priv->dev); 644 + 645 + /* Enable napi and unmask interrupts for all queues */ 646 + for (idx = 0; idx < priv->tx_cfg.num_queues; idx++) { 647 + int ntfy_idx = gve_tx_idx_to_ntfy(priv, idx); 648 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 649 + 650 + napi_enable(&block->napi); 651 + iowrite32be(0, gve_irq_doorbell(priv, block)); 652 + } 653 + for (idx = 0; idx < priv->rx_cfg.num_queues; idx++) { 654 + int ntfy_idx = gve_rx_idx_to_ntfy(priv, idx); 655 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 656 + 657 + napi_enable(&block->napi); 658 + iowrite32be(0, gve_irq_doorbell(priv, block)); 659 + } 660 + 661 + gve_set_napi_enabled(priv); 662 + } 663 + 664 + static void gve_tx_timeout(struct net_device *dev) 665 + { 666 + struct gve_priv *priv = netdev_priv(dev); 667 + 668 + priv->tx_timeo_cnt++; 669 + } 670 + 671 + static const struct net_device_ops gve_netdev_ops = { 672 + .ndo_start_xmit = gve_tx, 673 + .ndo_open = gve_open, 674 + .ndo_stop = gve_close, 675 + .ndo_get_stats64 = gve_get_stats, 676 + .ndo_tx_timeout = gve_tx_timeout, 677 + }; 678 + 306 679 static int gve_init_priv(struct gve_priv *priv, bool skip_describe_device) 307 680 { 308 681 int num_ntfy; ··· 805 264 goto err; 806 265 } 807 266 267 + priv->num_registered_pages = 0; 268 + priv->rx_copybreak = GVE_DEFAULT_RX_COPYBREAK; 808 269 /* gvnic has one Notification Block per MSI-x vector, except for the 809 270 * management vector 810 271 */ 811 272 priv->num_ntfy_blks = (num_ntfy - 1) & ~0x1; 812 273 priv->mgmt_msix_idx = priv->num_ntfy_blks; 274 + 275 + priv->tx_cfg.max_queues = 276 + min_t(int, priv->tx_cfg.max_queues, priv->num_ntfy_blks / 2); 277 + priv->rx_cfg.max_queues = 278 + min_t(int, priv->rx_cfg.max_queues, priv->num_ntfy_blks / 2); 279 + 280 + priv->tx_cfg.num_queues = priv->tx_cfg.max_queues; 281 + priv->rx_cfg.num_queues = priv->rx_cfg.max_queues; 282 + if (priv->default_num_queues > 0) { 283 + priv->tx_cfg.num_queues = min_t(int, priv->default_num_queues, 284 + priv->tx_cfg.num_queues); 285 + priv->rx_cfg.num_queues = min_t(int, priv->default_num_queues, 286 + priv->rx_cfg.num_queues); 287 + } 288 + 289 + netif_info(priv, drv, priv->dev, "TX queues %d, RX queues %d\n", 290 + priv->tx_cfg.num_queues, priv->rx_cfg.num_queues); 291 + netif_info(priv, drv, priv->dev, "Max TX queues %d, Max RX queues %d\n", 292 + priv->tx_cfg.max_queues, priv->rx_cfg.max_queues); 813 293 814 294 setup_device: 815 295 err = gve_setup_device_resources(priv); ··· 898 336 899 337 reg_bar = pci_iomap(pdev, GVE_REGISTER_BAR, 0); 900 338 if (!reg_bar) { 339 + dev_err(&pdev->dev, "Failed to map pci bar!\n"); 901 340 err = -ENOMEM; 902 341 goto abort_with_pci_region; 903 342 } ··· 922 359 } 923 360 SET_NETDEV_DEV(dev, &pdev->dev); 924 361 pci_set_drvdata(pdev, dev); 362 + dev->netdev_ops = &gve_netdev_ops; 925 363 /* advertise features */ 926 364 dev->hw_features = NETIF_F_HIGHDMA; 927 365 dev->hw_features |= NETIF_F_SG; ··· 933 369 dev->hw_features |= NETIF_F_RXCSUM; 934 370 dev->hw_features |= NETIF_F_RXHASH; 935 371 dev->features = dev->hw_features; 372 + dev->watchdog_timeo = 5 * HZ; 936 373 dev->min_mtu = ETH_MIN_MTU; 937 374 netif_carrier_off(dev); 938 375 ··· 944 379 priv->reg_bar0 = reg_bar; 945 380 priv->db_bar2 = db_bar; 946 381 priv->state_flags = 0x0; 382 + priv->tx_cfg.max_queues = max_tx_queues; 383 + priv->rx_cfg.max_queues = max_rx_queues; 947 384 948 385 err = gve_init_priv(priv, false); 949 386 if (err)

+443

drivers/net/ethernet/google/gve/gve_rx.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 + /* Google virtual Ethernet (gve) driver 3 + * 4 + * Copyright (C) 2015-2019 Google, Inc. 5 + */ 6 + 7 + #include "gve.h" 8 + #include "gve_adminq.h" 9 + #include <linux/etherdevice.h> 10 + 11 + static void gve_rx_remove_from_block(struct gve_priv *priv, int queue_idx) 12 + { 13 + struct gve_notify_block *block = 14 + &priv->ntfy_blocks[gve_rx_idx_to_ntfy(priv, queue_idx)]; 15 + 16 + block->rx = NULL; 17 + } 18 + 19 + static void gve_rx_free_ring(struct gve_priv *priv, int idx) 20 + { 21 + struct gve_rx_ring *rx = &priv->rx[idx]; 22 + struct device *dev = &priv->pdev->dev; 23 + size_t bytes; 24 + u32 slots; 25 + 26 + gve_rx_remove_from_block(priv, idx); 27 + 28 + bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 29 + dma_free_coherent(dev, bytes, rx->desc.desc_ring, rx->desc.bus); 30 + rx->desc.desc_ring = NULL; 31 + 32 + dma_free_coherent(dev, sizeof(*rx->q_resources), 33 + rx->q_resources, rx->q_resources_bus); 34 + rx->q_resources = NULL; 35 + 36 + gve_unassign_qpl(priv, rx->data.qpl->id); 37 + rx->data.qpl = NULL; 38 + kfree(rx->data.page_info); 39 + 40 + slots = rx->data.mask + 1; 41 + bytes = sizeof(*rx->data.data_ring) * slots; 42 + dma_free_coherent(dev, bytes, rx->data.data_ring, 43 + rx->data.data_bus); 44 + rx->data.data_ring = NULL; 45 + netif_dbg(priv, drv, priv->dev, "freed rx ring %d\n", idx); 46 + } 47 + 48 + static void gve_setup_rx_buffer(struct gve_rx_slot_page_info *page_info, 49 + struct gve_rx_data_slot *slot, 50 + dma_addr_t addr, struct page *page) 51 + { 52 + page_info->page = page; 53 + page_info->page_offset = 0; 54 + page_info->page_address = page_address(page); 55 + slot->qpl_offset = cpu_to_be64(addr); 56 + } 57 + 58 + static int gve_prefill_rx_pages(struct gve_rx_ring *rx) 59 + { 60 + struct gve_priv *priv = rx->gve; 61 + u32 slots; 62 + int i; 63 + 64 + /* Allocate one page per Rx queue slot. Each page is split into two 65 + * packet buffers, when possible we "page flip" between the two. 66 + */ 67 + slots = rx->data.mask + 1; 68 + 69 + rx->data.page_info = kvzalloc(slots * 70 + sizeof(*rx->data.page_info), GFP_KERNEL); 71 + if (!rx->data.page_info) 72 + return -ENOMEM; 73 + 74 + rx->data.qpl = gve_assign_rx_qpl(priv); 75 + 76 + for (i = 0; i < slots; i++) { 77 + struct page *page = rx->data.qpl->pages[i]; 78 + dma_addr_t addr = i * PAGE_SIZE; 79 + 80 + gve_setup_rx_buffer(&rx->data.page_info[i], 81 + &rx->data.data_ring[i], addr, page); 82 + } 83 + 84 + return slots; 85 + } 86 + 87 + static void gve_rx_add_to_block(struct gve_priv *priv, int queue_idx) 88 + { 89 + u32 ntfy_idx = gve_rx_idx_to_ntfy(priv, queue_idx); 90 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 91 + struct gve_rx_ring *rx = &priv->rx[queue_idx]; 92 + 93 + block->rx = rx; 94 + rx->ntfy_id = ntfy_idx; 95 + } 96 + 97 + static int gve_rx_alloc_ring(struct gve_priv *priv, int idx) 98 + { 99 + struct gve_rx_ring *rx = &priv->rx[idx]; 100 + struct device *hdev = &priv->pdev->dev; 101 + u32 slots, npages; 102 + int filled_pages; 103 + size_t bytes; 104 + int err; 105 + 106 + netif_dbg(priv, drv, priv->dev, "allocating rx ring\n"); 107 + /* Make sure everything is zeroed to start with */ 108 + memset(rx, 0, sizeof(*rx)); 109 + 110 + rx->gve = priv; 111 + rx->q_num = idx; 112 + 113 + slots = priv->rx_pages_per_qpl; 114 + rx->data.mask = slots - 1; 115 + 116 + /* alloc rx data ring */ 117 + bytes = sizeof(*rx->data.data_ring) * slots; 118 + rx->data.data_ring = dma_alloc_coherent(hdev, bytes, 119 + &rx->data.data_bus, 120 + GFP_KERNEL); 121 + if (!rx->data.data_ring) 122 + return -ENOMEM; 123 + filled_pages = gve_prefill_rx_pages(rx); 124 + if (filled_pages < 0) { 125 + err = -ENOMEM; 126 + goto abort_with_slots; 127 + } 128 + rx->desc.fill_cnt = filled_pages; 129 + /* Ensure data ring slots (packet buffers) are visible. */ 130 + dma_wmb(); 131 + 132 + /* Alloc gve_queue_resources */ 133 + rx->q_resources = 134 + dma_alloc_coherent(hdev, 135 + sizeof(*rx->q_resources), 136 + &rx->q_resources_bus, 137 + GFP_KERNEL); 138 + if (!rx->q_resources) { 139 + err = -ENOMEM; 140 + goto abort_filled; 141 + } 142 + netif_dbg(priv, drv, priv->dev, "rx[%d]->data.data_bus=%lx\n", idx, 143 + (unsigned long)rx->data.data_bus); 144 + 145 + /* alloc rx desc ring */ 146 + bytes = sizeof(struct gve_rx_desc) * priv->rx_desc_cnt; 147 + npages = bytes / PAGE_SIZE; 148 + if (npages * PAGE_SIZE != bytes) { 149 + err = -EIO; 150 + goto abort_with_q_resources; 151 + } 152 + 153 + rx->desc.desc_ring = dma_alloc_coherent(hdev, bytes, &rx->desc.bus, 154 + GFP_KERNEL); 155 + if (!rx->desc.desc_ring) { 156 + err = -ENOMEM; 157 + goto abort_with_q_resources; 158 + } 159 + rx->desc.mask = slots - 1; 160 + rx->desc.cnt = 0; 161 + rx->desc.seqno = 1; 162 + gve_rx_add_to_block(priv, idx); 163 + 164 + return 0; 165 + 166 + abort_with_q_resources: 167 + dma_free_coherent(hdev, sizeof(*rx->q_resources), 168 + rx->q_resources, rx->q_resources_bus); 169 + rx->q_resources = NULL; 170 + abort_filled: 171 + kfree(rx->data.page_info); 172 + abort_with_slots: 173 + bytes = sizeof(*rx->data.data_ring) * slots; 174 + dma_free_coherent(hdev, bytes, rx->data.data_ring, rx->data.data_bus); 175 + rx->data.data_ring = NULL; 176 + 177 + return err; 178 + } 179 + 180 + int gve_rx_alloc_rings(struct gve_priv *priv) 181 + { 182 + int err = 0; 183 + int i; 184 + 185 + for (i = 0; i < priv->rx_cfg.num_queues; i++) { 186 + err = gve_rx_alloc_ring(priv, i); 187 + if (err) { 188 + netif_err(priv, drv, priv->dev, 189 + "Failed to alloc rx ring=%d: err=%d\n", 190 + i, err); 191 + break; 192 + } 193 + } 194 + /* Unallocate if there was an error */ 195 + if (err) { 196 + int j; 197 + 198 + for (j = 0; j < i; j++) 199 + gve_rx_free_ring(priv, j); 200 + } 201 + return err; 202 + } 203 + 204 + void gve_rx_free_rings(struct gve_priv *priv) 205 + { 206 + int i; 207 + 208 + for (i = 0; i < priv->rx_cfg.num_queues; i++) 209 + gve_rx_free_ring(priv, i); 210 + } 211 + 212 + void gve_rx_write_doorbell(struct gve_priv *priv, struct gve_rx_ring *rx) 213 + { 214 + u32 db_idx = be32_to_cpu(rx->q_resources->db_index); 215 + 216 + iowrite32be(rx->desc.fill_cnt, &priv->db_bar2[db_idx]); 217 + } 218 + 219 + static enum pkt_hash_types gve_rss_type(__be16 pkt_flags) 220 + { 221 + if (likely(pkt_flags & (GVE_RXF_TCP | GVE_RXF_UDP))) 222 + return PKT_HASH_TYPE_L4; 223 + if (pkt_flags & (GVE_RXF_IPV4 | GVE_RXF_IPV6)) 224 + return PKT_HASH_TYPE_L3; 225 + return PKT_HASH_TYPE_L2; 226 + } 227 + 228 + static struct sk_buff *gve_rx_copy(struct net_device *dev, 229 + struct napi_struct *napi, 230 + struct gve_rx_slot_page_info *page_info, 231 + u16 len) 232 + { 233 + struct sk_buff *skb = napi_alloc_skb(napi, len); 234 + void *va = page_info->page_address + GVE_RX_PAD + 235 + page_info->page_offset; 236 + 237 + if (unlikely(!skb)) 238 + return NULL; 239 + 240 + __skb_put(skb, len); 241 + 242 + skb_copy_to_linear_data(skb, va, len); 243 + 244 + skb->protocol = eth_type_trans(skb, dev); 245 + return skb; 246 + } 247 + 248 + static struct sk_buff *gve_rx_add_frags(struct net_device *dev, 249 + struct napi_struct *napi, 250 + struct gve_rx_slot_page_info *page_info, 251 + u16 len) 252 + { 253 + struct sk_buff *skb = napi_get_frags(napi); 254 + 255 + if (unlikely(!skb)) 256 + return NULL; 257 + 258 + skb_add_rx_frag(skb, 0, page_info->page, 259 + page_info->page_offset + 260 + GVE_RX_PAD, len, PAGE_SIZE / 2); 261 + 262 + return skb; 263 + } 264 + 265 + static void gve_rx_flip_buff(struct gve_rx_slot_page_info *page_info, 266 + struct gve_rx_data_slot *data_ring) 267 + { 268 + u64 addr = be64_to_cpu(data_ring->qpl_offset); 269 + 270 + page_info->page_offset ^= PAGE_SIZE / 2; 271 + addr ^= PAGE_SIZE / 2; 272 + data_ring->qpl_offset = cpu_to_be64(addr); 273 + } 274 + 275 + static bool gve_rx(struct gve_rx_ring *rx, struct gve_rx_desc *rx_desc, 276 + netdev_features_t feat) 277 + { 278 + struct gve_rx_slot_page_info *page_info; 279 + struct gve_priv *priv = rx->gve; 280 + struct napi_struct *napi = &priv->ntfy_blocks[rx->ntfy_id].napi; 281 + struct net_device *dev = priv->dev; 282 + struct sk_buff *skb; 283 + int pagecount; 284 + u16 len; 285 + u32 idx; 286 + 287 + /* drop this packet */ 288 + if (unlikely(rx_desc->flags_seq & GVE_RXF_ERR)) 289 + return true; 290 + 291 + len = be16_to_cpu(rx_desc->len) - GVE_RX_PAD; 292 + idx = rx->data.cnt & rx->data.mask; 293 + page_info = &rx->data.page_info[idx]; 294 + 295 + /* gvnic can only receive into registered segments. If the buffer 296 + * can't be recycled, our only choice is to copy the data out of 297 + * it so that we can return it to the device. 298 + */ 299 + 300 + #if PAGE_SIZE == 4096 301 + if (len <= priv->rx_copybreak) { 302 + /* Just copy small packets */ 303 + skb = gve_rx_copy(dev, napi, page_info, len); 304 + goto have_skb; 305 + } 306 + if (unlikely(!gve_can_recycle_pages(dev))) { 307 + skb = gve_rx_copy(dev, napi, page_info, len); 308 + goto have_skb; 309 + } 310 + pagecount = page_count(page_info->page); 311 + if (pagecount == 1) { 312 + /* No part of this page is used by any SKBs; we attach 313 + * the page fragment to a new SKB and pass it up the 314 + * stack. 315 + */ 316 + skb = gve_rx_add_frags(dev, napi, page_info, len); 317 + if (!skb) 318 + return true; 319 + /* Make sure the kernel stack can't release the page */ 320 + get_page(page_info->page); 321 + /* "flip" to other packet buffer on this page */ 322 + gve_rx_flip_buff(page_info, &rx->data.data_ring[idx]); 323 + } else if (pagecount >= 2) { 324 + /* We have previously passed the other half of this 325 + * page up the stack, but it has not yet been freed. 326 + */ 327 + skb = gve_rx_copy(dev, napi, page_info, len); 328 + } else { 329 + WARN(pagecount < 1, "Pagecount should never be < 1"); 330 + return false; 331 + } 332 + #else 333 + skb = gve_rx_copy(dev, napi, page_info, len); 334 + #endif 335 + 336 + have_skb: 337 + if (!skb) 338 + return true; 339 + 340 + rx->data.cnt++; 341 + 342 + if (likely(feat & NETIF_F_RXCSUM)) { 343 + /* NIC passes up the partial sum */ 344 + if (rx_desc->csum) 345 + skb->ip_summed = CHECKSUM_COMPLETE; 346 + else 347 + skb->ip_summed = CHECKSUM_NONE; 348 + skb->csum = csum_unfold(rx_desc->csum); 349 + } 350 + 351 + /* parse flags & pass relevant info up */ 352 + if (likely(feat & NETIF_F_RXHASH) && 353 + gve_needs_rss(rx_desc->flags_seq)) 354 + skb_set_hash(skb, be32_to_cpu(rx_desc->rss_hash), 355 + gve_rss_type(rx_desc->flags_seq)); 356 + 357 + if (skb_is_nonlinear(skb)) 358 + napi_gro_frags(napi); 359 + else 360 + napi_gro_receive(napi, skb); 361 + return true; 362 + } 363 + 364 + static bool gve_rx_work_pending(struct gve_rx_ring *rx) 365 + { 366 + struct gve_rx_desc *desc; 367 + __be16 flags_seq; 368 + u32 next_idx; 369 + 370 + next_idx = rx->desc.cnt & rx->desc.mask; 371 + desc = rx->desc.desc_ring + next_idx; 372 + 373 + flags_seq = desc->flags_seq; 374 + /* Make sure we have synchronized the seq no with the device */ 375 + smp_rmb(); 376 + 377 + return (GVE_SEQNO(flags_seq) == rx->desc.seqno); 378 + } 379 + 380 + bool gve_clean_rx_done(struct gve_rx_ring *rx, int budget, 381 + netdev_features_t feat) 382 + { 383 + struct gve_priv *priv = rx->gve; 384 + struct gve_rx_desc *desc; 385 + u32 cnt = rx->desc.cnt; 386 + u32 idx = cnt & rx->desc.mask; 387 + u32 work_done = 0; 388 + u64 bytes = 0; 389 + 390 + desc = rx->desc.desc_ring + idx; 391 + while ((GVE_SEQNO(desc->flags_seq) == rx->desc.seqno) && 392 + work_done < budget) { 393 + netif_info(priv, rx_status, priv->dev, 394 + "[%d] idx=%d desc=%p desc->flags_seq=0x%x\n", 395 + rx->q_num, idx, desc, desc->flags_seq); 396 + netif_info(priv, rx_status, priv->dev, 397 + "[%d] seqno=%d rx->desc.seqno=%d\n", 398 + rx->q_num, GVE_SEQNO(desc->flags_seq), 399 + rx->desc.seqno); 400 + bytes += be16_to_cpu(desc->len) - GVE_RX_PAD; 401 + if (!gve_rx(rx, desc, feat)) 402 + return false; 403 + cnt++; 404 + idx = cnt & rx->desc.mask; 405 + desc = rx->desc.desc_ring + idx; 406 + rx->desc.seqno = gve_next_seqno(rx->desc.seqno); 407 + work_done++; 408 + } 409 + 410 + if (!work_done) 411 + return false; 412 + 413 + u64_stats_update_begin(&rx->statss); 414 + rx->rpackets += work_done; 415 + rx->rbytes += bytes; 416 + u64_stats_update_end(&rx->statss); 417 + rx->desc.cnt = cnt; 418 + rx->desc.fill_cnt += work_done; 419 + 420 + /* restock desc ring slots */ 421 + dma_wmb(); /* Ensure descs are visible before ringing doorbell */ 422 + gve_rx_write_doorbell(priv, rx); 423 + return gve_rx_work_pending(rx); 424 + } 425 + 426 + bool gve_rx_poll(struct gve_notify_block *block, int budget) 427 + { 428 + struct gve_rx_ring *rx = block->rx; 429 + netdev_features_t feat; 430 + bool repoll = false; 431 + 432 + feat = block->napi.dev->features; 433 + 434 + /* If budget is 0, do all the work */ 435 + if (budget == 0) 436 + budget = INT_MAX; 437 + 438 + if (budget > 0) 439 + repoll |= gve_clean_rx_done(rx, budget, feat); 440 + else 441 + repoll |= gve_rx_work_pending(rx); 442 + return repoll; 443 + }

+584

drivers/net/ethernet/google/gve/gve_tx.c

··· 1 + // SPDX-License-Identifier: (GPL-2.0 OR MIT) 2 + /* Google virtual Ethernet (gve) driver 3 + * 4 + * Copyright (C) 2015-2019 Google, Inc. 5 + */ 6 + 7 + #include "gve.h" 8 + #include "gve_adminq.h" 9 + #include <linux/ip.h> 10 + #include <linux/tcp.h> 11 + #include <linux/vmalloc.h> 12 + #include <linux/skbuff.h> 13 + 14 + static inline void gve_tx_put_doorbell(struct gve_priv *priv, 15 + struct gve_queue_resources *q_resources, 16 + u32 val) 17 + { 18 + iowrite32be(val, &priv->db_bar2[be32_to_cpu(q_resources->db_index)]); 19 + } 20 + 21 + /* gvnic can only transmit from a Registered Segment. 22 + * We copy skb payloads into the registered segment before writing Tx 23 + * descriptors and ringing the Tx doorbell. 24 + * 25 + * gve_tx_fifo_* manages the Registered Segment as a FIFO - clients must 26 + * free allocations in the order they were allocated. 27 + */ 28 + 29 + static int gve_tx_fifo_init(struct gve_priv *priv, struct gve_tx_fifo *fifo) 30 + { 31 + fifo->base = vmap(fifo->qpl->pages, fifo->qpl->num_entries, VM_MAP, 32 + PAGE_KERNEL); 33 + if (unlikely(!fifo->base)) { 34 + netif_err(priv, drv, priv->dev, "Failed to vmap fifo, qpl_id = %d\n", 35 + fifo->qpl->id); 36 + return -ENOMEM; 37 + } 38 + 39 + fifo->size = fifo->qpl->num_entries * PAGE_SIZE; 40 + atomic_set(&fifo->available, fifo->size); 41 + fifo->head = 0; 42 + return 0; 43 + } 44 + 45 + static void gve_tx_fifo_release(struct gve_priv *priv, struct gve_tx_fifo *fifo) 46 + { 47 + WARN(atomic_read(&fifo->available) != fifo->size, 48 + "Releasing non-empty fifo"); 49 + 50 + vunmap(fifo->base); 51 + } 52 + 53 + static int gve_tx_fifo_pad_alloc_one_frag(struct gve_tx_fifo *fifo, 54 + size_t bytes) 55 + { 56 + return (fifo->head + bytes < fifo->size) ? 0 : fifo->size - fifo->head; 57 + } 58 + 59 + static bool gve_tx_fifo_can_alloc(struct gve_tx_fifo *fifo, size_t bytes) 60 + { 61 + return (atomic_read(&fifo->available) <= bytes) ? false : true; 62 + } 63 + 64 + /* gve_tx_alloc_fifo - Allocate fragment(s) from Tx FIFO 65 + * @fifo: FIFO to allocate from 66 + * @bytes: Allocation size 67 + * @iov: Scatter-gather elements to fill with allocation fragment base/len 68 + * 69 + * Returns number of valid elements in iov[] or negative on error. 70 + * 71 + * Allocations from a given FIFO must be externally synchronized but concurrent 72 + * allocation and frees are allowed. 73 + */ 74 + static int gve_tx_alloc_fifo(struct gve_tx_fifo *fifo, size_t bytes, 75 + struct gve_tx_iovec iov[2]) 76 + { 77 + size_t overflow, padding; 78 + u32 aligned_head; 79 + int nfrags = 0; 80 + 81 + if (!bytes) 82 + return 0; 83 + 84 + /* This check happens before we know how much padding is needed to 85 + * align to a cacheline boundary for the payload, but that is fine, 86 + * because the FIFO head always start aligned, and the FIFO's boundaries 87 + * are aligned, so if there is space for the data, there is space for 88 + * the padding to the next alignment. 89 + */ 90 + WARN(!gve_tx_fifo_can_alloc(fifo, bytes), 91 + "Reached %s when there's not enough space in the fifo", __func__); 92 + 93 + nfrags++; 94 + 95 + iov[0].iov_offset = fifo->head; 96 + iov[0].iov_len = bytes; 97 + fifo->head += bytes; 98 + 99 + if (fifo->head > fifo->size) { 100 + /* If the allocation did not fit in the tail fragment of the 101 + * FIFO, also use the head fragment. 102 + */ 103 + nfrags++; 104 + overflow = fifo->head - fifo->size; 105 + iov[0].iov_len -= overflow; 106 + iov[1].iov_offset = 0; /* Start of fifo*/ 107 + iov[1].iov_len = overflow; 108 + 109 + fifo->head = overflow; 110 + } 111 + 112 + /* Re-align to a cacheline boundary */ 113 + aligned_head = L1_CACHE_ALIGN(fifo->head); 114 + padding = aligned_head - fifo->head; 115 + iov[nfrags - 1].iov_padding = padding; 116 + atomic_sub(bytes + padding, &fifo->available); 117 + fifo->head = aligned_head; 118 + 119 + if (fifo->head == fifo->size) 120 + fifo->head = 0; 121 + 122 + return nfrags; 123 + } 124 + 125 + /* gve_tx_free_fifo - Return space to Tx FIFO 126 + * @fifo: FIFO to return fragments to 127 + * @bytes: Bytes to free 128 + */ 129 + static void gve_tx_free_fifo(struct gve_tx_fifo *fifo, size_t bytes) 130 + { 131 + atomic_add(bytes, &fifo->available); 132 + } 133 + 134 + static void gve_tx_remove_from_block(struct gve_priv *priv, int queue_idx) 135 + { 136 + struct gve_notify_block *block = 137 + &priv->ntfy_blocks[gve_tx_idx_to_ntfy(priv, queue_idx)]; 138 + 139 + block->tx = NULL; 140 + } 141 + 142 + static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 143 + u32 to_do, bool try_to_wake); 144 + 145 + static void gve_tx_free_ring(struct gve_priv *priv, int idx) 146 + { 147 + struct gve_tx_ring *tx = &priv->tx[idx]; 148 + struct device *hdev = &priv->pdev->dev; 149 + size_t bytes; 150 + u32 slots; 151 + 152 + gve_tx_remove_from_block(priv, idx); 153 + slots = tx->mask + 1; 154 + gve_clean_tx_done(priv, tx, tx->req, false); 155 + netdev_tx_reset_queue(tx->netdev_txq); 156 + 157 + dma_free_coherent(hdev, sizeof(*tx->q_resources), 158 + tx->q_resources, tx->q_resources_bus); 159 + tx->q_resources = NULL; 160 + 161 + gve_tx_fifo_release(priv, &tx->tx_fifo); 162 + gve_unassign_qpl(priv, tx->tx_fifo.qpl->id); 163 + tx->tx_fifo.qpl = NULL; 164 + 165 + bytes = sizeof(*tx->desc) * slots; 166 + dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 167 + tx->desc = NULL; 168 + 169 + vfree(tx->info); 170 + tx->info = NULL; 171 + 172 + netif_dbg(priv, drv, priv->dev, "freed tx queue %d\n", idx); 173 + } 174 + 175 + static void gve_tx_add_to_block(struct gve_priv *priv, int queue_idx) 176 + { 177 + int ntfy_idx = gve_tx_idx_to_ntfy(priv, queue_idx); 178 + struct gve_notify_block *block = &priv->ntfy_blocks[ntfy_idx]; 179 + struct gve_tx_ring *tx = &priv->tx[queue_idx]; 180 + 181 + block->tx = tx; 182 + tx->ntfy_id = ntfy_idx; 183 + } 184 + 185 + static int gve_tx_alloc_ring(struct gve_priv *priv, int idx) 186 + { 187 + struct gve_tx_ring *tx = &priv->tx[idx]; 188 + struct device *hdev = &priv->pdev->dev; 189 + u32 slots = priv->tx_desc_cnt; 190 + size_t bytes; 191 + 192 + /* Make sure everything is zeroed to start */ 193 + memset(tx, 0, sizeof(*tx)); 194 + tx->q_num = idx; 195 + 196 + tx->mask = slots - 1; 197 + 198 + /* alloc metadata */ 199 + tx->info = vzalloc(sizeof(*tx->info) * slots); 200 + if (!tx->info) 201 + return -ENOMEM; 202 + 203 + /* alloc tx queue */ 204 + bytes = sizeof(*tx->desc) * slots; 205 + tx->desc = dma_alloc_coherent(hdev, bytes, &tx->bus, GFP_KERNEL); 206 + if (!tx->desc) 207 + goto abort_with_info; 208 + 209 + tx->tx_fifo.qpl = gve_assign_tx_qpl(priv); 210 + 211 + /* map Tx FIFO */ 212 + if (gve_tx_fifo_init(priv, &tx->tx_fifo)) 213 + goto abort_with_desc; 214 + 215 + tx->q_resources = 216 + dma_alloc_coherent(hdev, 217 + sizeof(*tx->q_resources), 218 + &tx->q_resources_bus, 219 + GFP_KERNEL); 220 + if (!tx->q_resources) 221 + goto abort_with_fifo; 222 + 223 + netif_dbg(priv, drv, priv->dev, "tx[%d]->bus=%lx\n", idx, 224 + (unsigned long)tx->bus); 225 + tx->netdev_txq = netdev_get_tx_queue(priv->dev, idx); 226 + gve_tx_add_to_block(priv, idx); 227 + 228 + return 0; 229 + 230 + abort_with_fifo: 231 + gve_tx_fifo_release(priv, &tx->tx_fifo); 232 + abort_with_desc: 233 + dma_free_coherent(hdev, bytes, tx->desc, tx->bus); 234 + tx->desc = NULL; 235 + abort_with_info: 236 + vfree(tx->info); 237 + tx->info = NULL; 238 + return -ENOMEM; 239 + } 240 + 241 + int gve_tx_alloc_rings(struct gve_priv *priv) 242 + { 243 + int err = 0; 244 + int i; 245 + 246 + for (i = 0; i < priv->tx_cfg.num_queues; i++) { 247 + err = gve_tx_alloc_ring(priv, i); 248 + if (err) { 249 + netif_err(priv, drv, priv->dev, 250 + "Failed to alloc tx ring=%d: err=%d\n", 251 + i, err); 252 + break; 253 + } 254 + } 255 + /* Unallocate if there was an error */ 256 + if (err) { 257 + int j; 258 + 259 + for (j = 0; j < i; j++) 260 + gve_tx_free_ring(priv, j); 261 + } 262 + return err; 263 + } 264 + 265 + void gve_tx_free_rings(struct gve_priv *priv) 266 + { 267 + int i; 268 + 269 + for (i = 0; i < priv->tx_cfg.num_queues; i++) 270 + gve_tx_free_ring(priv, i); 271 + } 272 + 273 + /* gve_tx_avail - Calculates the number of slots available in the ring 274 + * @tx: tx ring to check 275 + * 276 + * Returns the number of slots available 277 + * 278 + * The capacity of the queue is mask + 1. We don't need to reserve an entry. 279 + **/ 280 + static inline u32 gve_tx_avail(struct gve_tx_ring *tx) 281 + { 282 + return tx->mask + 1 - (tx->req - tx->done); 283 + } 284 + 285 + static inline int gve_skb_fifo_bytes_required(struct gve_tx_ring *tx, 286 + struct sk_buff *skb) 287 + { 288 + int pad_bytes, align_hdr_pad; 289 + int bytes; 290 + int hlen; 291 + 292 + hlen = skb_is_gso(skb) ? skb_checksum_start_offset(skb) + 293 + tcp_hdrlen(skb) : skb_headlen(skb); 294 + 295 + pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, 296 + hlen); 297 + /* We need to take into account the header alignment padding. */ 298 + align_hdr_pad = L1_CACHE_ALIGN(hlen) - hlen; 299 + bytes = align_hdr_pad + pad_bytes + skb->len; 300 + 301 + return bytes; 302 + } 303 + 304 + /* The most descriptors we could need are 3 - 1 for the headers, 1 for 305 + * the beginning of the payload at the end of the FIFO, and 1 if the 306 + * payload wraps to the beginning of the FIFO. 307 + */ 308 + #define MAX_TX_DESC_NEEDED 3 309 + 310 + /* Check if sufficient resources (descriptor ring space, FIFO space) are 311 + * available to transmit the given number of bytes. 312 + */ 313 + static inline bool gve_can_tx(struct gve_tx_ring *tx, int bytes_required) 314 + { 315 + return (gve_tx_avail(tx) >= MAX_TX_DESC_NEEDED && 316 + gve_tx_fifo_can_alloc(&tx->tx_fifo, bytes_required)); 317 + } 318 + 319 + /* Stops the queue if the skb cannot be transmitted. */ 320 + static int gve_maybe_stop_tx(struct gve_tx_ring *tx, struct sk_buff *skb) 321 + { 322 + int bytes_required; 323 + 324 + bytes_required = gve_skb_fifo_bytes_required(tx, skb); 325 + if (likely(gve_can_tx(tx, bytes_required))) 326 + return 0; 327 + 328 + /* No space, so stop the queue */ 329 + tx->stop_queue++; 330 + netif_tx_stop_queue(tx->netdev_txq); 331 + smp_mb(); /* sync with restarting queue in gve_clean_tx_done() */ 332 + 333 + /* Now check for resources again, in case gve_clean_tx_done() freed 334 + * resources after we checked and we stopped the queue after 335 + * gve_clean_tx_done() checked. 336 + * 337 + * gve_maybe_stop_tx() gve_clean_tx_done() 338 + * nsegs/can_alloc test failed 339 + * gve_tx_free_fifo() 340 + * if (tx queue stopped) 341 + * netif_tx_queue_wake() 342 + * netif_tx_stop_queue() 343 + * Need to check again for space here! 344 + */ 345 + if (likely(!gve_can_tx(tx, bytes_required))) 346 + return -EBUSY; 347 + 348 + netif_tx_start_queue(tx->netdev_txq); 349 + tx->wake_queue++; 350 + return 0; 351 + } 352 + 353 + static void gve_tx_fill_pkt_desc(union gve_tx_desc *pkt_desc, 354 + struct sk_buff *skb, bool is_gso, 355 + int l4_hdr_offset, u32 desc_cnt, 356 + u16 hlen, u64 addr) 357 + { 358 + /* l4_hdr_offset and csum_offset are in units of 16-bit words */ 359 + if (is_gso) { 360 + pkt_desc->pkt.type_flags = GVE_TXD_TSO | GVE_TXF_L4CSUM; 361 + pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 362 + pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 363 + } else if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) { 364 + pkt_desc->pkt.type_flags = GVE_TXD_STD | GVE_TXF_L4CSUM; 365 + pkt_desc->pkt.l4_csum_offset = skb->csum_offset >> 1; 366 + pkt_desc->pkt.l4_hdr_offset = l4_hdr_offset >> 1; 367 + } else { 368 + pkt_desc->pkt.type_flags = GVE_TXD_STD; 369 + pkt_desc->pkt.l4_csum_offset = 0; 370 + pkt_desc->pkt.l4_hdr_offset = 0; 371 + } 372 + pkt_desc->pkt.desc_cnt = desc_cnt; 373 + pkt_desc->pkt.len = cpu_to_be16(skb->len); 374 + pkt_desc->pkt.seg_len = cpu_to_be16(hlen); 375 + pkt_desc->pkt.seg_addr = cpu_to_be64(addr); 376 + } 377 + 378 + static void gve_tx_fill_seg_desc(union gve_tx_desc *seg_desc, 379 + struct sk_buff *skb, bool is_gso, 380 + u16 len, u64 addr) 381 + { 382 + seg_desc->seg.type_flags = GVE_TXD_SEG; 383 + if (is_gso) { 384 + if (skb_is_gso_v6(skb)) 385 + seg_desc->seg.type_flags |= GVE_TXSF_IPV6; 386 + seg_desc->seg.l3_offset = skb_network_offset(skb) >> 1; 387 + seg_desc->seg.mss = cpu_to_be16(skb_shinfo(skb)->gso_size); 388 + } 389 + seg_desc->seg.seg_len = cpu_to_be16(len); 390 + seg_desc->seg.seg_addr = cpu_to_be64(addr); 391 + } 392 + 393 + static int gve_tx_add_skb(struct gve_tx_ring *tx, struct sk_buff *skb) 394 + { 395 + int pad_bytes, hlen, hdr_nfrags, payload_nfrags, l4_hdr_offset; 396 + union gve_tx_desc *pkt_desc, *seg_desc; 397 + struct gve_tx_buffer_state *info; 398 + bool is_gso = skb_is_gso(skb); 399 + u32 idx = tx->req & tx->mask; 400 + int payload_iov = 2; 401 + int copy_offset; 402 + u32 next_idx; 403 + int i; 404 + 405 + info = &tx->info[idx]; 406 + pkt_desc = &tx->desc[idx]; 407 + 408 + l4_hdr_offset = skb_checksum_start_offset(skb); 409 + /* If the skb is gso, then we want the tcp header in the first segment 410 + * otherwise we want the linear portion of the skb (which will contain 411 + * the checksum because skb->csum_start and skb->csum_offset are given 412 + * relative to skb->head) in the first segment. 413 + */ 414 + hlen = is_gso ? l4_hdr_offset + tcp_hdrlen(skb) : 415 + skb_headlen(skb); 416 + 417 + info->skb = skb; 418 + /* We don't want to split the header, so if necessary, pad to the end 419 + * of the fifo and then put the header at the beginning of the fifo. 420 + */ 421 + pad_bytes = gve_tx_fifo_pad_alloc_one_frag(&tx->tx_fifo, hlen); 422 + hdr_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, hlen + pad_bytes, 423 + &info->iov[0]); 424 + WARN(!hdr_nfrags, "hdr_nfrags should never be 0!"); 425 + payload_nfrags = gve_tx_alloc_fifo(&tx->tx_fifo, skb->len - hlen, 426 + &info->iov[payload_iov]); 427 + 428 + gve_tx_fill_pkt_desc(pkt_desc, skb, is_gso, l4_hdr_offset, 429 + 1 + payload_nfrags, hlen, 430 + info->iov[hdr_nfrags - 1].iov_offset); 431 + 432 + skb_copy_bits(skb, 0, 433 + tx->tx_fifo.base + info->iov[hdr_nfrags - 1].iov_offset, 434 + hlen); 435 + copy_offset = hlen; 436 + 437 + for (i = payload_iov; i < payload_nfrags + payload_iov; i++) { 438 + next_idx = (tx->req + 1 + i - payload_iov) & tx->mask; 439 + seg_desc = &tx->desc[next_idx]; 440 + 441 + gve_tx_fill_seg_desc(seg_desc, skb, is_gso, 442 + info->iov[i].iov_len, 443 + info->iov[i].iov_offset); 444 + 445 + skb_copy_bits(skb, copy_offset, 446 + tx->tx_fifo.base + info->iov[i].iov_offset, 447 + info->iov[i].iov_len); 448 + copy_offset += info->iov[i].iov_len; 449 + } 450 + 451 + return 1 + payload_nfrags; 452 + } 453 + 454 + netdev_tx_t gve_tx(struct sk_buff *skb, struct net_device *dev) 455 + { 456 + struct gve_priv *priv = netdev_priv(dev); 457 + struct gve_tx_ring *tx; 458 + int nsegs; 459 + 460 + WARN(skb_get_queue_mapping(skb) > priv->tx_cfg.num_queues, 461 + "skb queue index out of range"); 462 + tx = &priv->tx[skb_get_queue_mapping(skb)]; 463 + if (unlikely(gve_maybe_stop_tx(tx, skb))) { 464 + /* We need to ring the txq doorbell -- we have stopped the Tx 465 + * queue for want of resources, but prior calls to gve_tx() 466 + * may have added descriptors without ringing the doorbell. 467 + */ 468 + 469 + /* Ensure tx descs from a prior gve_tx are visible before 470 + * ringing doorbell. 471 + */ 472 + dma_wmb(); 473 + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 474 + return NETDEV_TX_BUSY; 475 + } 476 + nsegs = gve_tx_add_skb(tx, skb); 477 + 478 + netdev_tx_sent_queue(tx->netdev_txq, skb->len); 479 + skb_tx_timestamp(skb); 480 + 481 + /* give packets to NIC */ 482 + tx->req += nsegs; 483 + 484 + if (!netif_xmit_stopped(tx->netdev_txq) && netdev_xmit_more()) 485 + return NETDEV_TX_OK; 486 + 487 + /* Ensure tx descs are visible before ringing doorbell */ 488 + dma_wmb(); 489 + gve_tx_put_doorbell(priv, tx->q_resources, tx->req); 490 + return NETDEV_TX_OK; 491 + } 492 + 493 + #define GVE_TX_START_THRESH PAGE_SIZE 494 + 495 + static int gve_clean_tx_done(struct gve_priv *priv, struct gve_tx_ring *tx, 496 + u32 to_do, bool try_to_wake) 497 + { 498 + struct gve_tx_buffer_state *info; 499 + u64 pkts = 0, bytes = 0; 500 + size_t space_freed = 0; 501 + struct sk_buff *skb; 502 + int i, j; 503 + u32 idx; 504 + 505 + for (j = 0; j < to_do; j++) { 506 + idx = tx->done & tx->mask; 507 + netif_info(priv, tx_done, priv->dev, 508 + "[%d] %s: idx=%d (req=%u done=%u)\n", 509 + tx->q_num, __func__, idx, tx->req, tx->done); 510 + info = &tx->info[idx]; 511 + skb = info->skb; 512 + 513 + /* Mark as free */ 514 + if (skb) { 515 + info->skb = NULL; 516 + bytes += skb->len; 517 + pkts++; 518 + dev_consume_skb_any(skb); 519 + /* FIFO free */ 520 + for (i = 0; i < ARRAY_SIZE(info->iov); i++) { 521 + space_freed += info->iov[i].iov_len + 522 + info->iov[i].iov_padding; 523 + info->iov[i].iov_len = 0; 524 + info->iov[i].iov_padding = 0; 525 + } 526 + } 527 + tx->done++; 528 + } 529 + 530 + gve_tx_free_fifo(&tx->tx_fifo, space_freed); 531 + u64_stats_update_begin(&tx->statss); 532 + tx->bytes_done += bytes; 533 + tx->pkt_done += pkts; 534 + u64_stats_update_end(&tx->statss); 535 + netdev_tx_completed_queue(tx->netdev_txq, pkts, bytes); 536 + 537 + /* start the queue if we've stopped it */ 538 + #ifndef CONFIG_BQL 539 + /* Make sure that the doorbells are synced */ 540 + smp_mb(); 541 + #endif 542 + if (try_to_wake && netif_tx_queue_stopped(tx->netdev_txq) && 543 + likely(gve_can_tx(tx, GVE_TX_START_THRESH))) { 544 + tx->wake_queue++; 545 + netif_tx_wake_queue(tx->netdev_txq); 546 + } 547 + 548 + return pkts; 549 + } 550 + 551 + __be32 gve_tx_load_event_counter(struct gve_priv *priv, 552 + struct gve_tx_ring *tx) 553 + { 554 + u32 counter_index = be32_to_cpu((tx->q_resources->counter_index)); 555 + 556 + return READ_ONCE(priv->counter_array[counter_index]); 557 + } 558 + 559 + bool gve_tx_poll(struct gve_notify_block *block, int budget) 560 + { 561 + struct gve_priv *priv = block->priv; 562 + struct gve_tx_ring *tx = block->tx; 563 + bool repoll = false; 564 + u32 nic_done; 565 + u32 to_do; 566 + 567 + /* If budget is 0, do all the work */ 568 + if (budget == 0) 569 + budget = INT_MAX; 570 + 571 + /* Find out how much work there is to be done */ 572 + tx->last_nic_done = gve_tx_load_event_counter(priv, tx); 573 + nic_done = be32_to_cpu(tx->last_nic_done); 574 + if (budget > 0) { 575 + /* Do as much work as we have that the budget will 576 + * allow 577 + */ 578 + to_do = min_t(u32, (nic_done - tx->done), budget); 579 + gve_clean_tx_done(priv, tx, to_do, true); 580 + } 581 + /* If we still have work we want to repoll */ 582 + repoll |= (nic_done != tx->done); 583 + return repoll; 584 + }