include/net/libeth/rx.h at v6.10-rc7

tjh.dev / kernel
fork
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
fork
kernel / include / net / libeth / rx.h
at v6.10-rc7 242 lines 6.7 kB view raw
wrap content
  1/* SPDX-License-Identifier: GPL-2.0-only */
  2/* Copyright (C) 2024 Intel Corporation */
  3
  4#ifndef __LIBETH_RX_H
  5#define __LIBETH_RX_H
  6
  7#include <linux/if_vlan.h>
  8
  9#include <net/page_pool/helpers.h>
 10#include <net/xdp.h>
 11
 12/* Rx buffer management */
 13
 14/* Space reserved in front of each frame */
 15#define LIBETH_SKB_HEADROOM	(NET_SKB_PAD + NET_IP_ALIGN)
 16/* Maximum headroom for worst-case calculations */
 17#define LIBETH_MAX_HEADROOM	LIBETH_SKB_HEADROOM
 18/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
 19#define LIBETH_RX_LL_LEN	(ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
 20
 21/* Always use order-0 pages */
 22#define LIBETH_RX_PAGE_ORDER	0
 23/* Pick a sane buffer stride and align to a cacheline boundary */
 24#define LIBETH_RX_BUF_STRIDE	SKB_DATA_ALIGN(128)
 25/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
 26#define LIBETH_RX_PAGE_LEN(hr)						  \
 27	ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER),		  \
 28		   LIBETH_RX_BUF_STRIDE)
 29
 30/**
 31 * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
 32 * @page: page holding the buffer
 33 * @offset: offset from the page start (to the headroom)
 34 * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
 35 *
 36 * Depending on the MTU, API switches between one-page-per-frame and shared
 37 * page model (to conserve memory on bigger-page platforms). In case of the
 38 * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
 39 */
 40struct libeth_fqe {
 41	struct page		*page;
 42	u32			offset;
 43	u32			truesize;
 44} __aligned_largest;
 45
 46/**
 47 * struct libeth_fq - structure representing a buffer (fill) queue
 48 * @fp: hotpath part of the structure
 49 * @pp: &page_pool for buffer management
 50 * @fqes: array of Rx buffers
 51 * @truesize: size to allocate per buffer, w/overhead
 52 * @count: number of descriptors/buffers the queue has
 53 * @buf_len: HW-writeable length per each buffer
 54 * @nid: ID of the closest NUMA node with memory
 55 */
 56struct libeth_fq {
 57	struct_group_tagged(libeth_fq_fp, fp,
 58		struct page_pool	*pp;
 59		struct libeth_fqe	*fqes;
 60
 61		u32			truesize;
 62		u32			count;
 63	);
 64
 65	/* Cold fields */
 66	u32			buf_len;
 67	int			nid;
 68};
 69
 70int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
 71void libeth_rx_fq_destroy(struct libeth_fq *fq);
 72
 73/**
 74 * libeth_rx_alloc - allocate a new Rx buffer
 75 * @fq: fill queue to allocate for
 76 * @i: index of the buffer within the queue
 77 *
 78 * Return: DMA address to be passed to HW for Rx on successful allocation,
 79 * ```DMA_MAPPING_ERROR``` otherwise.
 80 */
 81static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
 82{
 83	struct libeth_fqe *buf = &fq->fqes[i];
 84
 85	buf->truesize = fq->truesize;
 86	buf->page = page_pool_dev_alloc(fq->pp, &buf->offset, &buf->truesize);
 87	if (unlikely(!buf->page))
 88		return DMA_MAPPING_ERROR;
 89
 90	return page_pool_get_dma_addr(buf->page) + buf->offset +
 91	       fq->pp->p.offset;
 92}
 93
 94void libeth_rx_recycle_slow(struct page *page);
 95
 96/**
 97 * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
 98 * @fqe: buffer to process
 99 * @len: frame length from the descriptor
100 *
101 * Process the buffer after it's written by HW. The regular path is to
102 * synchronize DMA for CPU, but in case of no data it will be immediately
103 * recycled back to its PP.
104 *
105 * Return: true when there's data to process, false otherwise.
106 */
107static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
108					  u32 len)
109{
110	struct page *page = fqe->page;
111
112	/* Very rare, but possible case. The most common reason:
113	 * the last fragment contained FCS only, which was then
114	 * stripped by the HW.
115	 */
116	if (unlikely(!len)) {
117		libeth_rx_recycle_slow(page);
118		return false;
119	}
120
121	page_pool_dma_sync_for_cpu(page->pp, page, fqe->offset, len);
122
123	return true;
124}
125
126/* Converting abstract packet type numbers into a software structure with
127 * the packet parameters to do O(1) lookup on Rx.
128 */
129
130enum {
131	LIBETH_RX_PT_OUTER_L2			= 0U,
132	LIBETH_RX_PT_OUTER_IPV4,
133	LIBETH_RX_PT_OUTER_IPV6,
134};
135
136enum {
137	LIBETH_RX_PT_NOT_FRAG			= 0U,
138	LIBETH_RX_PT_FRAG,
139};
140
141enum {
142	LIBETH_RX_PT_TUNNEL_IP_NONE		= 0U,
143	LIBETH_RX_PT_TUNNEL_IP_IP,
144	LIBETH_RX_PT_TUNNEL_IP_GRENAT,
145	LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
146	LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
147};
148
149enum {
150	LIBETH_RX_PT_TUNNEL_END_NONE		= 0U,
151	LIBETH_RX_PT_TUNNEL_END_IPV4,
152	LIBETH_RX_PT_TUNNEL_END_IPV6,
153};
154
155enum {
156	LIBETH_RX_PT_INNER_NONE			= 0U,
157	LIBETH_RX_PT_INNER_UDP,
158	LIBETH_RX_PT_INNER_TCP,
159	LIBETH_RX_PT_INNER_SCTP,
160	LIBETH_RX_PT_INNER_ICMP,
161	LIBETH_RX_PT_INNER_TIMESYNC,
162};
163
164#define LIBETH_RX_PT_PAYLOAD_NONE		PKT_HASH_TYPE_NONE
165#define LIBETH_RX_PT_PAYLOAD_L2			PKT_HASH_TYPE_L2
166#define LIBETH_RX_PT_PAYLOAD_L3			PKT_HASH_TYPE_L3
167#define LIBETH_RX_PT_PAYLOAD_L4			PKT_HASH_TYPE_L4
168
169struct libeth_rx_pt {
170	u32					outer_ip:2;
171	u32					outer_frag:1;
172	u32					tunnel_type:3;
173	u32					tunnel_end_prot:2;
174	u32					tunnel_end_frag:1;
175	u32					inner_prot:3;
176	enum pkt_hash_types			payload_layer:2;
177
178	u32					pad:2;
179	enum xdp_rss_hash_type			hash_type:16;
180};
181
182void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);
183
184/**
185 * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
186 * @pt: packet type params
187 *
188 * Wrapper to compile out the IPv6 code from the drivers when not supported
189 * by the kernel.
190 *
191 * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
192 */
193static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
194{
195#if !IS_ENABLED(CONFIG_IPV6)
196	switch (pt.outer_ip) {
197	case LIBETH_RX_PT_OUTER_IPV4:
198		return LIBETH_RX_PT_OUTER_IPV4;
199	default:
200		return LIBETH_RX_PT_OUTER_L2;
201	}
202#else
203	return pt.outer_ip;
204#endif
205}
206
207/* libeth_has_*() can be used to quickly check whether the HW metadata is
208 * available to avoid further expensive processing such as descriptor reads.
209 * They already check for the corresponding netdev feature to be enabled,
210 * thus can be used as drop-in replacements.
211 */
212
213static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
214					     struct libeth_rx_pt pt)
215{
216	/* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
217	 * it is enough to check only for the L4 type.
218	 */
219	return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
220		      (dev->features & NETIF_F_RXCSUM));
221}
222
223static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
224					 struct libeth_rx_pt pt)
225{
226	return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
227		      (dev->features & NETIF_F_RXHASH));
228}
229
230/**
231 * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
232 * @skb: skb to fill the hash in
233 * @hash: 32-bit hash value from the descriptor
234 * @pt: packet type
235 */
236static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
237					 struct libeth_rx_pt pt)
238{
239	skb_set_hash(skb, hash, pt.payload_layer);
240}
241
242#endif /* __LIBETH_RX_H */
Configure Feed

Configure Feed