include/linux/filter.h at v4.18-rc3 · tjh.dev/kernel

tjh.dev / kernel
Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
kernel / include / linux / filter.h
at v4.18-rc3 30 kB view raw
   1/* SPDX-License-Identifier: GPL-2.0 */
   2/*
   3 * Linux Socket Filter Data Structures
   4 */
   5#ifndef __LINUX_FILTER_H__
   6#define __LINUX_FILTER_H__
   7
   8#include <stdarg.h>
   9
  10#include <linux/atomic.h>
  11#include <linux/refcount.h>
  12#include <linux/compat.h>
  13#include <linux/skbuff.h>
  14#include <linux/linkage.h>
  15#include <linux/printk.h>
  16#include <linux/workqueue.h>
  17#include <linux/sched.h>
  18#include <linux/capability.h>
  19#include <linux/cryptohash.h>
  20#include <linux/set_memory.h>
  21#include <linux/kallsyms.h>
  22#include <linux/if_vlan.h>
  23
  24#include <net/sch_generic.h>
  25
  26#include <uapi/linux/filter.h>
  27#include <uapi/linux/bpf.h>
  28
  29struct sk_buff;
  30struct sock;
  31struct seccomp_data;
  32struct bpf_prog_aux;
  33struct xdp_rxq_info;
  34struct xdp_buff;
  35
  36/* ArgX, context and stack frame pointer register positions. Note,
  37 * Arg1, Arg2, Arg3, etc are used as argument mappings of function
  38 * calls in BPF_CALL instruction.
  39 */
  40#define BPF_REG_ARG1	BPF_REG_1
  41#define BPF_REG_ARG2	BPF_REG_2
  42#define BPF_REG_ARG3	BPF_REG_3
  43#define BPF_REG_ARG4	BPF_REG_4
  44#define BPF_REG_ARG5	BPF_REG_5
  45#define BPF_REG_CTX	BPF_REG_6
  46#define BPF_REG_FP	BPF_REG_10
  47
  48/* Additional register mappings for converted user programs. */
  49#define BPF_REG_A	BPF_REG_0
  50#define BPF_REG_X	BPF_REG_7
  51#define BPF_REG_TMP	BPF_REG_2	/* scratch reg */
  52#define BPF_REG_D	BPF_REG_8	/* data, callee-saved */
  53#define BPF_REG_H	BPF_REG_9	/* hlen, callee-saved */
  54
  55/* Kernel hidden auxiliary/helper register for hardening step.
  56 * Only used by eBPF JITs. It's nothing more than a temporary
  57 * register that JITs use internally, only that here it's part
  58 * of eBPF instructions that have been rewritten for blinding
  59 * constants. See JIT pre-step in bpf_jit_blind_constants().
  60 */
  61#define BPF_REG_AX		MAX_BPF_REG
  62#define MAX_BPF_JIT_REG		(MAX_BPF_REG + 1)
  63
  64/* unused opcode to mark special call to bpf_tail_call() helper */
  65#define BPF_TAIL_CALL	0xf0
  66
  67/* unused opcode to mark call to interpreter with arguments */
  68#define BPF_CALL_ARGS	0xe0
  69
  70/* As per nm, we expose JITed images as text (code) section for
  71 * kallsyms. That way, tools like perf can find it to match
  72 * addresses.
  73 */
  74#define BPF_SYM_ELF_TYPE	't'
  75
  76/* BPF program can access up to 512 bytes of stack space. */
  77#define MAX_BPF_STACK	512
  78
  79/* Helper macros for filter block array initializers. */
  80
  81/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */
  82
  83#define BPF_ALU64_REG(OP, DST, SRC)				\
  84	((struct bpf_insn) {					\
  85		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_X,	\
  86		.dst_reg = DST,					\
  87		.src_reg = SRC,					\
  88		.off   = 0,					\
  89		.imm   = 0 })
  90
  91#define BPF_ALU32_REG(OP, DST, SRC)				\
  92	((struct bpf_insn) {					\
  93		.code  = BPF_ALU | BPF_OP(OP) | BPF_X,		\
  94		.dst_reg = DST,					\
  95		.src_reg = SRC,					\
  96		.off   = 0,					\
  97		.imm   = 0 })
  98
  99/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */
 100
 101#define BPF_ALU64_IMM(OP, DST, IMM)				\
 102	((struct bpf_insn) {					\
 103		.code  = BPF_ALU64 | BPF_OP(OP) | BPF_K,	\
 104		.dst_reg = DST,					\
 105		.src_reg = 0,					\
 106		.off   = 0,					\
 107		.imm   = IMM })
 108
 109#define BPF_ALU32_IMM(OP, DST, IMM)				\
 110	((struct bpf_insn) {					\
 111		.code  = BPF_ALU | BPF_OP(OP) | BPF_K,		\
 112		.dst_reg = DST,					\
 113		.src_reg = 0,					\
 114		.off   = 0,					\
 115		.imm   = IMM })
 116
 117/* Endianess conversion, cpu_to_{l,b}e(), {l,b}e_to_cpu() */
 118
 119#define BPF_ENDIAN(TYPE, DST, LEN)				\
 120	((struct bpf_insn) {					\
 121		.code  = BPF_ALU | BPF_END | BPF_SRC(TYPE),	\
 122		.dst_reg = DST,					\
 123		.src_reg = 0,					\
 124		.off   = 0,					\
 125		.imm   = LEN })
 126
 127/* Short form of mov, dst_reg = src_reg */
 128
 129#define BPF_MOV64_REG(DST, SRC)					\
 130	((struct bpf_insn) {					\
 131		.code  = BPF_ALU64 | BPF_MOV | BPF_X,		\
 132		.dst_reg = DST,					\
 133		.src_reg = SRC,					\
 134		.off   = 0,					\
 135		.imm   = 0 })
 136
 137#define BPF_MOV32_REG(DST, SRC)					\
 138	((struct bpf_insn) {					\
 139		.code  = BPF_ALU | BPF_MOV | BPF_X,		\
 140		.dst_reg = DST,					\
 141		.src_reg = SRC,					\
 142		.off   = 0,					\
 143		.imm   = 0 })
 144
 145/* Short form of mov, dst_reg = imm32 */
 146
 147#define BPF_MOV64_IMM(DST, IMM)					\
 148	((struct bpf_insn) {					\
 149		.code  = BPF_ALU64 | BPF_MOV | BPF_K,		\
 150		.dst_reg = DST,					\
 151		.src_reg = 0,					\
 152		.off   = 0,					\
 153		.imm   = IMM })
 154
 155#define BPF_MOV32_IMM(DST, IMM)					\
 156	((struct bpf_insn) {					\
 157		.code  = BPF_ALU | BPF_MOV | BPF_K,		\
 158		.dst_reg = DST,					\
 159		.src_reg = 0,					\
 160		.off   = 0,					\
 161		.imm   = IMM })
 162
 163/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
 164#define BPF_LD_IMM64(DST, IMM)					\
 165	BPF_LD_IMM64_RAW(DST, 0, IMM)
 166
 167#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
 168	((struct bpf_insn) {					\
 169		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
 170		.dst_reg = DST,					\
 171		.src_reg = SRC,					\
 172		.off   = 0,					\
 173		.imm   = (__u32) (IMM) }),			\
 174	((struct bpf_insn) {					\
 175		.code  = 0, /* zero is reserved opcode */	\
 176		.dst_reg = 0,					\
 177		.src_reg = 0,					\
 178		.off   = 0,					\
 179		.imm   = ((__u64) (IMM)) >> 32 })
 180
 181/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
 182#define BPF_LD_MAP_FD(DST, MAP_FD)				\
 183	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
 184
 185/* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 186
 187#define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
 188	((struct bpf_insn) {					\
 189		.code  = BPF_ALU64 | BPF_MOV | BPF_SRC(TYPE),	\
 190		.dst_reg = DST,					\
 191		.src_reg = SRC,					\
 192		.off   = 0,					\
 193		.imm   = IMM })
 194
 195#define BPF_MOV32_RAW(TYPE, DST, SRC, IMM)			\
 196	((struct bpf_insn) {					\
 197		.code  = BPF_ALU | BPF_MOV | BPF_SRC(TYPE),	\
 198		.dst_reg = DST,					\
 199		.src_reg = SRC,					\
 200		.off   = 0,					\
 201		.imm   = IMM })
 202
 203/* Direct packet access, R0 = *(uint *) (skb->data + imm32) */
 204
 205#define BPF_LD_ABS(SIZE, IMM)					\
 206	((struct bpf_insn) {					\
 207		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_ABS,	\
 208		.dst_reg = 0,					\
 209		.src_reg = 0,					\
 210		.off   = 0,					\
 211		.imm   = IMM })
 212
 213/* Indirect packet access, R0 = *(uint *) (skb->data + src_reg + imm32) */
 214
 215#define BPF_LD_IND(SIZE, SRC, IMM)				\
 216	((struct bpf_insn) {					\
 217		.code  = BPF_LD | BPF_SIZE(SIZE) | BPF_IND,	\
 218		.dst_reg = 0,					\
 219		.src_reg = SRC,					\
 220		.off   = 0,					\
 221		.imm   = IMM })
 222
 223/* Memory load, dst_reg = *(uint *) (src_reg + off16) */
 224
 225#define BPF_LDX_MEM(SIZE, DST, SRC, OFF)			\
 226	((struct bpf_insn) {					\
 227		.code  = BPF_LDX | BPF_SIZE(SIZE) | BPF_MEM,	\
 228		.dst_reg = DST,					\
 229		.src_reg = SRC,					\
 230		.off   = OFF,					\
 231		.imm   = 0 })
 232
 233/* Memory store, *(uint *) (dst_reg + off16) = src_reg */
 234
 235#define BPF_STX_MEM(SIZE, DST, SRC, OFF)			\
 236	((struct bpf_insn) {					\
 237		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_MEM,	\
 238		.dst_reg = DST,					\
 239		.src_reg = SRC,					\
 240		.off   = OFF,					\
 241		.imm   = 0 })
 242
 243/* Atomic memory add, *(uint *)(dst_reg + off16) += src_reg */
 244
 245#define BPF_STX_XADD(SIZE, DST, SRC, OFF)			\
 246	((struct bpf_insn) {					\
 247		.code  = BPF_STX | BPF_SIZE(SIZE) | BPF_XADD,	\
 248		.dst_reg = DST,					\
 249		.src_reg = SRC,					\
 250		.off   = OFF,					\
 251		.imm   = 0 })
 252
 253/* Memory store, *(uint *) (dst_reg + off16) = imm32 */
 254
 255#define BPF_ST_MEM(SIZE, DST, OFF, IMM)				\
 256	((struct bpf_insn) {					\
 257		.code  = BPF_ST | BPF_SIZE(SIZE) | BPF_MEM,	\
 258		.dst_reg = DST,					\
 259		.src_reg = 0,					\
 260		.off   = OFF,					\
 261		.imm   = IMM })
 262
 263/* Conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */
 264
 265#define BPF_JMP_REG(OP, DST, SRC, OFF)				\
 266	((struct bpf_insn) {					\
 267		.code  = BPF_JMP | BPF_OP(OP) | BPF_X,		\
 268		.dst_reg = DST,					\
 269		.src_reg = SRC,					\
 270		.off   = OFF,					\
 271		.imm   = 0 })
 272
 273/* Conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */
 274
 275#define BPF_JMP_IMM(OP, DST, IMM, OFF)				\
 276	((struct bpf_insn) {					\
 277		.code  = BPF_JMP | BPF_OP(OP) | BPF_K,		\
 278		.dst_reg = DST,					\
 279		.src_reg = 0,					\
 280		.off   = OFF,					\
 281		.imm   = IMM })
 282
 283/* Unconditional jumps, goto pc + off16 */
 284
 285#define BPF_JMP_A(OFF)						\
 286	((struct bpf_insn) {					\
 287		.code  = BPF_JMP | BPF_JA,			\
 288		.dst_reg = 0,					\
 289		.src_reg = 0,					\
 290		.off   = OFF,					\
 291		.imm   = 0 })
 292
 293/* Relative call */
 294
 295#define BPF_CALL_REL(TGT)					\
 296	((struct bpf_insn) {					\
 297		.code  = BPF_JMP | BPF_CALL,			\
 298		.dst_reg = 0,					\
 299		.src_reg = BPF_PSEUDO_CALL,			\
 300		.off   = 0,					\
 301		.imm   = TGT })
 302
 303/* Function call */
 304
 305#define BPF_CAST_CALL(x)					\
 306		((u64 (*)(u64, u64, u64, u64, u64))(x))
 307
 308#define BPF_EMIT_CALL(FUNC)					\
 309	((struct bpf_insn) {					\
 310		.code  = BPF_JMP | BPF_CALL,			\
 311		.dst_reg = 0,					\
 312		.src_reg = 0,					\
 313		.off   = 0,					\
 314		.imm   = ((FUNC) - __bpf_call_base) })
 315
 316/* Raw code statement block */
 317
 318#define BPF_RAW_INSN(CODE, DST, SRC, OFF, IMM)			\
 319	((struct bpf_insn) {					\
 320		.code  = CODE,					\
 321		.dst_reg = DST,					\
 322		.src_reg = SRC,					\
 323		.off   = OFF,					\
 324		.imm   = IMM })
 325
 326/* Program exit */
 327
 328#define BPF_EXIT_INSN()						\
 329	((struct bpf_insn) {					\
 330		.code  = BPF_JMP | BPF_EXIT,			\
 331		.dst_reg = 0,					\
 332		.src_reg = 0,					\
 333		.off   = 0,					\
 334		.imm   = 0 })
 335
 336/* Internal classic blocks for direct assignment */
 337
 338#define __BPF_STMT(CODE, K)					\
 339	((struct sock_filter) BPF_STMT(CODE, K))
 340
 341#define __BPF_JUMP(CODE, K, JT, JF)				\
 342	((struct sock_filter) BPF_JUMP(CODE, K, JT, JF))
 343
 344#define bytes_to_bpf_size(bytes)				\
 345({								\
 346	int bpf_size = -EINVAL;					\
 347								\
 348	if (bytes == sizeof(u8))				\
 349		bpf_size = BPF_B;				\
 350	else if (bytes == sizeof(u16))				\
 351		bpf_size = BPF_H;				\
 352	else if (bytes == sizeof(u32))				\
 353		bpf_size = BPF_W;				\
 354	else if (bytes == sizeof(u64))				\
 355		bpf_size = BPF_DW;				\
 356								\
 357	bpf_size;						\
 358})
 359
 360#define bpf_size_to_bytes(bpf_size)				\
 361({								\
 362	int bytes = -EINVAL;					\
 363								\
 364	if (bpf_size == BPF_B)					\
 365		bytes = sizeof(u8);				\
 366	else if (bpf_size == BPF_H)				\
 367		bytes = sizeof(u16);				\
 368	else if (bpf_size == BPF_W)				\
 369		bytes = sizeof(u32);				\
 370	else if (bpf_size == BPF_DW)				\
 371		bytes = sizeof(u64);				\
 372								\
 373	bytes;							\
 374})
 375
 376#define BPF_SIZEOF(type)					\
 377	({							\
 378		const int __size = bytes_to_bpf_size(sizeof(type)); \
 379		BUILD_BUG_ON(__size < 0);			\
 380		__size;						\
 381	})
 382
 383#define BPF_FIELD_SIZEOF(type, field)				\
 384	({							\
 385		const int __size = bytes_to_bpf_size(FIELD_SIZEOF(type, field)); \
 386		BUILD_BUG_ON(__size < 0);			\
 387		__size;						\
 388	})
 389
 390#define BPF_LDST_BYTES(insn)					\
 391	({							\
 392		const int __size = bpf_size_to_bytes(BPF_SIZE((insn)->code)); \
 393		WARN_ON(__size < 0);				\
 394		__size;						\
 395	})
 396
 397#define __BPF_MAP_0(m, v, ...) v
 398#define __BPF_MAP_1(m, v, t, a, ...) m(t, a)
 399#define __BPF_MAP_2(m, v, t, a, ...) m(t, a), __BPF_MAP_1(m, v, __VA_ARGS__)
 400#define __BPF_MAP_3(m, v, t, a, ...) m(t, a), __BPF_MAP_2(m, v, __VA_ARGS__)
 401#define __BPF_MAP_4(m, v, t, a, ...) m(t, a), __BPF_MAP_3(m, v, __VA_ARGS__)
 402#define __BPF_MAP_5(m, v, t, a, ...) m(t, a), __BPF_MAP_4(m, v, __VA_ARGS__)
 403
 404#define __BPF_REG_0(...) __BPF_PAD(5)
 405#define __BPF_REG_1(...) __BPF_MAP(1, __VA_ARGS__), __BPF_PAD(4)
 406#define __BPF_REG_2(...) __BPF_MAP(2, __VA_ARGS__), __BPF_PAD(3)
 407#define __BPF_REG_3(...) __BPF_MAP(3, __VA_ARGS__), __BPF_PAD(2)
 408#define __BPF_REG_4(...) __BPF_MAP(4, __VA_ARGS__), __BPF_PAD(1)
 409#define __BPF_REG_5(...) __BPF_MAP(5, __VA_ARGS__)
 410
 411#define __BPF_MAP(n, ...) __BPF_MAP_##n(__VA_ARGS__)
 412#define __BPF_REG(n, ...) __BPF_REG_##n(__VA_ARGS__)
 413
 414#define __BPF_CAST(t, a)						       \
 415	(__force t)							       \
 416	(__force							       \
 417	 typeof(__builtin_choose_expr(sizeof(t) == sizeof(unsigned long),      \
 418				      (unsigned long)0, (t)0))) a
 419#define __BPF_V void
 420#define __BPF_N
 421
 422#define __BPF_DECL_ARGS(t, a) t   a
 423#define __BPF_DECL_REGS(t, a) u64 a
 424
 425#define __BPF_PAD(n)							       \
 426	__BPF_MAP(n, __BPF_DECL_ARGS, __BPF_N, u64, __ur_1, u64, __ur_2,       \
 427		  u64, __ur_3, u64, __ur_4, u64, __ur_5)
 428
 429#define BPF_CALL_x(x, name, ...)					       \
 430	static __always_inline						       \
 431	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__));   \
 432	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__));	       \
 433	u64 name(__BPF_REG(x, __BPF_DECL_REGS, __BPF_N, __VA_ARGS__))	       \
 434	{								       \
 435		return ____##name(__BPF_MAP(x,__BPF_CAST,__BPF_N,__VA_ARGS__));\
 436	}								       \
 437	static __always_inline						       \
 438	u64 ____##name(__BPF_MAP(x, __BPF_DECL_ARGS, __BPF_V, __VA_ARGS__))
 439
 440#define BPF_CALL_0(name, ...)	BPF_CALL_x(0, name, __VA_ARGS__)
 441#define BPF_CALL_1(name, ...)	BPF_CALL_x(1, name, __VA_ARGS__)
 442#define BPF_CALL_2(name, ...)	BPF_CALL_x(2, name, __VA_ARGS__)
 443#define BPF_CALL_3(name, ...)	BPF_CALL_x(3, name, __VA_ARGS__)
 444#define BPF_CALL_4(name, ...)	BPF_CALL_x(4, name, __VA_ARGS__)
 445#define BPF_CALL_5(name, ...)	BPF_CALL_x(5, name, __VA_ARGS__)
 446
 447#define bpf_ctx_range(TYPE, MEMBER)						\
 448	offsetof(TYPE, MEMBER) ... offsetofend(TYPE, MEMBER) - 1
 449#define bpf_ctx_range_till(TYPE, MEMBER1, MEMBER2)				\
 450	offsetof(TYPE, MEMBER1) ... offsetofend(TYPE, MEMBER2) - 1
 451
 452#define bpf_target_off(TYPE, MEMBER, SIZE, PTR_SIZE)				\
 453	({									\
 454		BUILD_BUG_ON(FIELD_SIZEOF(TYPE, MEMBER) != (SIZE));		\
 455		*(PTR_SIZE) = (SIZE);						\
 456		offsetof(TYPE, MEMBER);						\
 457	})
 458
 459#ifdef CONFIG_COMPAT
 460/* A struct sock_filter is architecture independent. */
 461struct compat_sock_fprog {
 462	u16		len;
 463	compat_uptr_t	filter;	/* struct sock_filter * */
 464};
 465#endif
 466
 467struct sock_fprog_kern {
 468	u16			len;
 469	struct sock_filter	*filter;
 470};
 471
 472struct bpf_binary_header {
 473	u16 pages;
 474	u16 locked:1;
 475
 476	/* Some arches need word alignment for their instructions */
 477	u8 image[] __aligned(4);
 478};
 479
 480struct bpf_prog {
 481	u16			pages;		/* Number of allocated pages */
 482	u16			jited:1,	/* Is our filter JIT'ed? */
 483				jit_requested:1,/* archs need to JIT the prog */
 484				locked:1,	/* Program image locked? */
 485				gpl_compatible:1, /* Is filter GPL compatible? */
 486				cb_access:1,	/* Is control block accessed? */
 487				dst_needed:1,	/* Do we need dst entry? */
 488				blinded:1,	/* Was blinded */
 489				is_func:1,	/* program is a bpf function */
 490				kprobe_override:1, /* Do we override a kprobe? */
 491				has_callchain_buf:1; /* callchain buffer allocated? */
 492	enum bpf_prog_type	type;		/* Type of BPF program */
 493	enum bpf_attach_type	expected_attach_type; /* For some prog types */
 494	u32			len;		/* Number of filter blocks */
 495	u32			jited_len;	/* Size of jited insns in bytes */
 496	u8			tag[BPF_TAG_SIZE];
 497	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 498	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 499	unsigned int		(*bpf_func)(const void *ctx,
 500					    const struct bpf_insn *insn);
 501	/* Instructions for interpreter */
 502	union {
 503		struct sock_filter	insns[0];
 504		struct bpf_insn		insnsi[0];
 505	};
 506};
 507
 508struct sk_filter {
 509	refcount_t	refcnt;
 510	struct rcu_head	rcu;
 511	struct bpf_prog	*prog;
 512};
 513
 514#define BPF_PROG_RUN(filter, ctx)  (*(filter)->bpf_func)(ctx, (filter)->insnsi)
 515
 516#define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN
 517
 518struct bpf_skb_data_end {
 519	struct qdisc_skb_cb qdisc_cb;
 520	void *data_meta;
 521	void *data_end;
 522};
 523
 524struct sk_msg_buff {
 525	void *data;
 526	void *data_end;
 527	__u32 apply_bytes;
 528	__u32 cork_bytes;
 529	int sg_copybreak;
 530	int sg_start;
 531	int sg_curr;
 532	int sg_end;
 533	struct scatterlist sg_data[MAX_SKB_FRAGS];
 534	bool sg_copy[MAX_SKB_FRAGS];
 535	__u32 flags;
 536	struct sock *sk_redir;
 537	struct sock *sk;
 538	struct sk_buff *skb;
 539	struct list_head list;
 540};
 541
 542/* Compute the linear packet data range [data, data_end) which
 543 * will be accessed by various program types (cls_bpf, act_bpf,
 544 * lwt, ...). Subsystems allowing direct data access must (!)
 545 * ensure that cb[] area can be written to when BPF program is
 546 * invoked (otherwise cb[] save/restore is necessary).
 547 */
 548static inline void bpf_compute_data_pointers(struct sk_buff *skb)
 549{
 550	struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb;
 551
 552	BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb));
 553	cb->data_meta = skb->data - skb_metadata_len(skb);
 554	cb->data_end  = skb->data + skb_headlen(skb);
 555}
 556
 557static inline u8 *bpf_skb_cb(struct sk_buff *skb)
 558{
 559	/* eBPF programs may read/write skb->cb[] area to transfer meta
 560	 * data between tail calls. Since this also needs to work with
 561	 * tc, that scratch memory is mapped to qdisc_skb_cb's data area.
 562	 *
 563	 * In some socket filter cases, the cb unfortunately needs to be
 564	 * saved/restored so that protocol specific skb->cb[] data won't
 565	 * be lost. In any case, due to unpriviledged eBPF programs
 566	 * attached to sockets, we need to clear the bpf_skb_cb() area
 567	 * to not leak previous contents to user space.
 568	 */
 569	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != BPF_SKB_CB_LEN);
 570	BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) !=
 571		     FIELD_SIZEOF(struct qdisc_skb_cb, data));
 572
 573	return qdisc_skb_cb(skb)->data;
 574}
 575
 576static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog,
 577				       struct sk_buff *skb)
 578{
 579	u8 *cb_data = bpf_skb_cb(skb);
 580	u8 cb_saved[BPF_SKB_CB_LEN];
 581	u32 res;
 582
 583	if (unlikely(prog->cb_access)) {
 584		memcpy(cb_saved, cb_data, sizeof(cb_saved));
 585		memset(cb_data, 0, sizeof(cb_saved));
 586	}
 587
 588	res = BPF_PROG_RUN(prog, skb);
 589
 590	if (unlikely(prog->cb_access))
 591		memcpy(cb_data, cb_saved, sizeof(cb_saved));
 592
 593	return res;
 594}
 595
 596static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog,
 597					struct sk_buff *skb)
 598{
 599	u8 *cb_data = bpf_skb_cb(skb);
 600
 601	if (unlikely(prog->cb_access))
 602		memset(cb_data, 0, BPF_SKB_CB_LEN);
 603
 604	return BPF_PROG_RUN(prog, skb);
 605}
 606
 607static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
 608					    struct xdp_buff *xdp)
 609{
 610	/* Caller needs to hold rcu_read_lock() (!), otherwise program
 611	 * can be released while still running, or map elements could be
 612	 * freed early while still having concurrent users. XDP fastpath
 613	 * already takes rcu_read_lock() when fetching the program, so
 614	 * it's not necessary here anymore.
 615	 */
 616	return BPF_PROG_RUN(prog, xdp);
 617}
 618
 619static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
 620{
 621	return prog->len * sizeof(struct bpf_insn);
 622}
 623
 624static inline u32 bpf_prog_tag_scratch_size(const struct bpf_prog *prog)
 625{
 626	return round_up(bpf_prog_insn_size(prog) +
 627			sizeof(__be64) + 1, SHA_MESSAGE_BYTES);
 628}
 629
 630static inline unsigned int bpf_prog_size(unsigned int proglen)
 631{
 632	return max(sizeof(struct bpf_prog),
 633		   offsetof(struct bpf_prog, insns[proglen]));
 634}
 635
 636static inline bool bpf_prog_was_classic(const struct bpf_prog *prog)
 637{
 638	/* When classic BPF programs have been loaded and the arch
 639	 * does not have a classic BPF JIT (anymore), they have been
 640	 * converted via bpf_migrate_filter() to eBPF and thus always
 641	 * have an unspec program type.
 642	 */
 643	return prog->type == BPF_PROG_TYPE_UNSPEC;
 644}
 645
 646static inline u32 bpf_ctx_off_adjust_machine(u32 size)
 647{
 648	const u32 size_machine = sizeof(unsigned long);
 649
 650	if (size > size_machine && size % size_machine == 0)
 651		size = size_machine;
 652
 653	return size;
 654}
 655
 656static inline bool bpf_ctx_narrow_align_ok(u32 off, u32 size_access,
 657					   u32 size_default)
 658{
 659	size_default = bpf_ctx_off_adjust_machine(size_default);
 660	size_access  = bpf_ctx_off_adjust_machine(size_access);
 661
 662#ifdef __LITTLE_ENDIAN
 663	return (off & (size_default - 1)) == 0;
 664#else
 665	return (off & (size_default - 1)) + size_access == size_default;
 666#endif
 667}
 668
 669static inline bool
 670bpf_ctx_narrow_access_ok(u32 off, u32 size, u32 size_default)
 671{
 672	return bpf_ctx_narrow_align_ok(off, size, size_default) &&
 673	       size <= size_default && (size & (size - 1)) == 0;
 674}
 675
 676#define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 677
 678static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
 679{
 680#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 681	fp->locked = 1;
 682	if (set_memory_ro((unsigned long)fp, fp->pages))
 683		fp->locked = 0;
 684#endif
 685}
 686
 687static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
 688{
 689#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 690	if (fp->locked) {
 691		WARN_ON_ONCE(set_memory_rw((unsigned long)fp, fp->pages));
 692		/* In case set_memory_rw() fails, we want to be the first
 693		 * to crash here instead of some random place later on.
 694		 */
 695		fp->locked = 0;
 696	}
 697#endif
 698}
 699
 700static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr)
 701{
 702#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 703	hdr->locked = 1;
 704	if (set_memory_ro((unsigned long)hdr, hdr->pages))
 705		hdr->locked = 0;
 706#endif
 707}
 708
 709static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr)
 710{
 711#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 712	if (hdr->locked) {
 713		WARN_ON_ONCE(set_memory_rw((unsigned long)hdr, hdr->pages));
 714		/* In case set_memory_rw() fails, we want to be the first
 715		 * to crash here instead of some random place later on.
 716		 */
 717		hdr->locked = 0;
 718	}
 719#endif
 720}
 721
 722static inline struct bpf_binary_header *
 723bpf_jit_binary_hdr(const struct bpf_prog *fp)
 724{
 725	unsigned long real_start = (unsigned long)fp->bpf_func;
 726	unsigned long addr = real_start & PAGE_MASK;
 727
 728	return (void *)addr;
 729}
 730
 731#ifdef CONFIG_ARCH_HAS_SET_MEMORY
 732static inline int bpf_prog_check_pages_ro_single(const struct bpf_prog *fp)
 733{
 734	if (!fp->locked)
 735		return -ENOLCK;
 736	if (fp->jited) {
 737		const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp);
 738
 739		if (!hdr->locked)
 740			return -ENOLCK;
 741	}
 742
 743	return 0;
 744}
 745#endif
 746
 747int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
 748static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 749{
 750	return sk_filter_trim_cap(sk, skb, 1);
 751}
 752
 753struct bpf_prog *bpf_prog_select_runtime(struct bpf_prog *fp, int *err);
 754void bpf_prog_free(struct bpf_prog *fp);
 755
 756bool bpf_opcode_in_insntable(u8 code);
 757
 758struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
 759struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 760				  gfp_t gfp_extra_flags);
 761void __bpf_prog_free(struct bpf_prog *fp);
 762
 763static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 764{
 765	bpf_prog_unlock_ro(fp);
 766	__bpf_prog_free(fp);
 767}
 768
 769typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter,
 770				       unsigned int flen);
 771
 772int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 773int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 774			      bpf_aux_classic_check_t trans, bool save_orig);
 775void bpf_prog_destroy(struct bpf_prog *fp);
 776
 777int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 778int sk_attach_bpf(u32 ufd, struct sock *sk);
 779int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 780int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk);
 781int sk_detach_filter(struct sock *sk);
 782int sk_get_filter(struct sock *sk, struct sock_filter __user *filter,
 783		  unsigned int len);
 784
 785bool sk_filter_charge(struct sock *sk, struct sk_filter *fp);
 786void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 787
 788u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 789#define __bpf_call_base_args \
 790	((u64 (*)(u64, u64, u64, u64, u64, const struct bpf_insn *)) \
 791	 __bpf_call_base)
 792
 793struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog);
 794void bpf_jit_compile(struct bpf_prog *prog);
 795bool bpf_helper_changes_pkt_data(void *func);
 796
 797static inline bool bpf_dump_raw_ok(void)
 798{
 799	/* Reconstruction of call-sites is dependent on kallsyms,
 800	 * thus make dump the same restriction.
 801	 */
 802	return kallsyms_show_value() == 1;
 803}
 804
 805struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off,
 806				       const struct bpf_insn *patch, u32 len);
 807
 808static inline int __xdp_generic_ok_fwd_dev(struct sk_buff *skb,
 809					   struct net_device *fwd)
 810{
 811	unsigned int len;
 812
 813	if (unlikely(!(fwd->flags & IFF_UP)))
 814		return -ENETDOWN;
 815
 816	len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN;
 817	if (skb->len > len)
 818		return -EMSGSIZE;
 819
 820	return 0;
 821}
 822
 823/* The pair of xdp_do_redirect and xdp_do_flush_map MUST be called in the
 824 * same cpu context. Further for best results no more than a single map
 825 * for the do_redirect/do_flush pair should be used. This limitation is
 826 * because we only track one map and force a flush when the map changes.
 827 * This does not appear to be a real limitation for existing software.
 828 */
 829int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
 830			    struct xdp_buff *xdp, struct bpf_prog *prog);
 831int xdp_do_redirect(struct net_device *dev,
 832		    struct xdp_buff *xdp,
 833		    struct bpf_prog *prog);
 834void xdp_do_flush_map(void);
 835
 836void bpf_warn_invalid_xdp_action(u32 act);
 837
 838struct sock *do_sk_redirect_map(struct sk_buff *skb);
 839struct sock *do_msg_redirect_map(struct sk_msg_buff *md);
 840
 841#ifdef CONFIG_BPF_JIT
 842extern int bpf_jit_enable;
 843extern int bpf_jit_harden;
 844extern int bpf_jit_kallsyms;
 845
 846typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
 847
 848struct bpf_binary_header *
 849bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 850		     unsigned int alignment,
 851		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
 852void bpf_jit_binary_free(struct bpf_binary_header *hdr);
 853
 854void bpf_jit_free(struct bpf_prog *fp);
 855
 856struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp);
 857void bpf_jit_prog_release_other(struct bpf_prog *fp, struct bpf_prog *fp_other);
 858
 859static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
 860				u32 pass, void *image)
 861{
 862	pr_err("flen=%u proglen=%u pass=%u image=%pK from=%s pid=%d\n", flen,
 863	       proglen, pass, image, current->comm, task_pid_nr(current));
 864
 865	if (image)
 866		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
 867			       16, 1, image, proglen, false);
 868}
 869
 870static inline bool bpf_jit_is_ebpf(void)
 871{
 872# ifdef CONFIG_HAVE_EBPF_JIT
 873	return true;
 874# else
 875	return false;
 876# endif
 877}
 878
 879static inline bool ebpf_jit_enabled(void)
 880{
 881	return bpf_jit_enable && bpf_jit_is_ebpf();
 882}
 883
 884static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
 885{
 886	return fp->jited && bpf_jit_is_ebpf();
 887}
 888
 889static inline bool bpf_jit_blinding_enabled(struct bpf_prog *prog)
 890{
 891	/* These are the prerequisites, should someone ever have the
 892	 * idea to call blinding outside of them, we make sure to
 893	 * bail out.
 894	 */
 895	if (!bpf_jit_is_ebpf())
 896		return false;
 897	if (!prog->jit_requested)
 898		return false;
 899	if (!bpf_jit_harden)
 900		return false;
 901	if (bpf_jit_harden == 1 && capable(CAP_SYS_ADMIN))
 902		return false;
 903
 904	return true;
 905}
 906
 907static inline bool bpf_jit_kallsyms_enabled(void)
 908{
 909	/* There are a couple of corner cases where kallsyms should
 910	 * not be enabled f.e. on hardening.
 911	 */
 912	if (bpf_jit_harden)
 913		return false;
 914	if (!bpf_jit_kallsyms)
 915		return false;
 916	if (bpf_jit_kallsyms == 1)
 917		return true;
 918
 919	return false;
 920}
 921
 922const char *__bpf_address_lookup(unsigned long addr, unsigned long *size,
 923				 unsigned long *off, char *sym);
 924bool is_bpf_text_address(unsigned long addr);
 925int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 926		    char *sym);
 927
 928static inline const char *
 929bpf_address_lookup(unsigned long addr, unsigned long *size,
 930		   unsigned long *off, char **modname, char *sym)
 931{
 932	const char *ret = __bpf_address_lookup(addr, size, off, sym);
 933
 934	if (ret && modname)
 935		*modname = NULL;
 936	return ret;
 937}
 938
 939void bpf_prog_kallsyms_add(struct bpf_prog *fp);
 940void bpf_prog_kallsyms_del(struct bpf_prog *fp);
 941
 942#else /* CONFIG_BPF_JIT */
 943
 944static inline bool ebpf_jit_enabled(void)
 945{
 946	return false;
 947}
 948
 949static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp)
 950{
 951	return false;
 952}
 953
 954static inline void bpf_jit_free(struct bpf_prog *fp)
 955{
 956	bpf_prog_unlock_free(fp);
 957}
 958
 959static inline bool bpf_jit_kallsyms_enabled(void)
 960{
 961	return false;
 962}
 963
 964static inline const char *
 965__bpf_address_lookup(unsigned long addr, unsigned long *size,
 966		     unsigned long *off, char *sym)
 967{
 968	return NULL;
 969}
 970
 971static inline bool is_bpf_text_address(unsigned long addr)
 972{
 973	return false;
 974}
 975
 976static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value,
 977				  char *type, char *sym)
 978{
 979	return -ERANGE;
 980}
 981
 982static inline const char *
 983bpf_address_lookup(unsigned long addr, unsigned long *size,
 984		   unsigned long *off, char **modname, char *sym)
 985{
 986	return NULL;
 987}
 988
 989static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 990{
 991}
 992
 993static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp)
 994{
 995}
 996#endif /* CONFIG_BPF_JIT */
 997
 998void bpf_prog_kallsyms_del_subprogs(struct bpf_prog *fp);
 999void bpf_prog_kallsyms_del_all(struct bpf_prog *fp);
1000
1001#define BPF_ANC		BIT(15)
1002
1003static inline bool bpf_needs_clear_a(const struct sock_filter *first)
1004{
1005	switch (first->code) {
1006	case BPF_RET | BPF_K:
1007	case BPF_LD | BPF_W | BPF_LEN:
1008		return false;
1009
1010	case BPF_LD | BPF_W | BPF_ABS:
1011	case BPF_LD | BPF_H | BPF_ABS:
1012	case BPF_LD | BPF_B | BPF_ABS:
1013		if (first->k == SKF_AD_OFF + SKF_AD_ALU_XOR_X)
1014			return true;
1015		return false;
1016
1017	default:
1018		return true;
1019	}
1020}
1021
1022static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
1023{
1024	BUG_ON(ftest->code & BPF_ANC);
1025
1026	switch (ftest->code) {
1027	case BPF_LD | BPF_W | BPF_ABS:
1028	case BPF_LD | BPF_H | BPF_ABS:
1029	case BPF_LD | BPF_B | BPF_ABS:
1030#define BPF_ANCILLARY(CODE)	case SKF_AD_OFF + SKF_AD_##CODE:	\
1031				return BPF_ANC | SKF_AD_##CODE
1032		switch (ftest->k) {
1033		BPF_ANCILLARY(PROTOCOL);
1034		BPF_ANCILLARY(PKTTYPE);
1035		BPF_ANCILLARY(IFINDEX);
1036		BPF_ANCILLARY(NLATTR);
1037		BPF_ANCILLARY(NLATTR_NEST);
1038		BPF_ANCILLARY(MARK);
1039		BPF_ANCILLARY(QUEUE);
1040		BPF_ANCILLARY(HATYPE);
1041		BPF_ANCILLARY(RXHASH);
1042		BPF_ANCILLARY(CPU);
1043		BPF_ANCILLARY(ALU_XOR_X);
1044		BPF_ANCILLARY(VLAN_TAG);
1045		BPF_ANCILLARY(VLAN_TAG_PRESENT);
1046		BPF_ANCILLARY(PAY_OFFSET);
1047		BPF_ANCILLARY(RANDOM);
1048		BPF_ANCILLARY(VLAN_TPID);
1049		}
1050		/* Fallthrough. */
1051	default:
1052		return ftest->code;
1053	}
1054}
1055
1056void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb,
1057					   int k, unsigned int size);
1058
1059static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
1060				     unsigned int size, void *buffer)
1061{
1062	if (k >= 0)
1063		return skb_header_pointer(skb, k, size, buffer);
1064
1065	return bpf_internal_load_pointer_neg_helper(skb, k, size);
1066}
1067
1068static inline int bpf_tell_extensions(void)
1069{
1070	return SKF_AD_MAX;
1071}
1072
1073struct bpf_sock_addr_kern {
1074	struct sock *sk;
1075	struct sockaddr *uaddr;
1076	/* Temporary "register" to make indirect stores to nested structures
1077	 * defined above. We need three registers to make such a store, but
1078	 * only two (src and dst) are available at convert_ctx_access time
1079	 */
1080	u64 tmp_reg;
1081	void *t_ctx;	/* Attach type specific context. */
1082};
1083
1084struct bpf_sock_ops_kern {
1085	struct	sock *sk;
1086	u32	op;
1087	union {
1088		u32 args[4];
1089		u32 reply;
1090		u32 replylong[4];
1091	};
1092	u32	is_fullsock;
1093	u64	temp;			/* temp and everything after is not
1094					 * initialized to 0 before calling
1095					 * the BPF program. New fields that
1096					 * should be initialized to 0 should
1097					 * be inserted before temp.
1098					 * temp is scratch storage used by
1099					 * sock_ops_convert_ctx_access
1100					 * as temporary storage of a register.
1101					 */
1102};
1103
1104#endif /* __LINUX_FILTER_H__ */