Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

selftests/bpf: track tcp payload offset as scalar in xdp_synproxy

This change prepares syncookie_{tc,xdp} for update in callbakcs
verification logic. To allow bpf_loop() verification converge when
multiple callback itreations are considered:
- track offset inside TCP payload explicitly, not as a part of the
pointer;
- make sure that offset does not exceed MAX_PACKET_OFF enforced by
verifier;
- make sure that offset is tracked as unbound scalar between
iterations, otherwise verifier won't be able infer that bpf_loop
callback reaches identical states.

Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
Link: https://lore.kernel.org/r/20231121020701.26440-2-eddyz87@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>

authored by

Eduard Zingerman and committed by
Alexei Starovoitov
977bc146 fcb905d8

+52 -32
+52 -32
tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
··· 53 53 #define DEFAULT_TTL 64 54 54 #define MAX_ALLOWED_PORTS 8 55 55 56 + #define MAX_PACKET_OFF 0xffff 57 + 56 58 #define swap(a, b) \ 57 59 do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) 58 60 ··· 185 183 } 186 184 187 185 struct tcpopt_context { 188 - __u8 *ptr; 189 - __u8 *end; 186 + void *data; 190 187 void *data_end; 191 188 __be32 *tsecr; 192 189 __u8 wscale; 193 190 bool option_timestamp; 194 191 bool option_sack; 192 + __u32 off; 195 193 }; 194 + 195 + static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz) 196 + { 197 + __u64 off = ctx->off; 198 + __u8 *data; 199 + 200 + /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */ 201 + if (off > MAX_PACKET_OFF - sz) 202 + return NULL; 203 + 204 + data = ctx->data + off; 205 + barrier_var(data); 206 + if (data + sz >= ctx->data_end) 207 + return NULL; 208 + 209 + ctx->off += sz; 210 + return data; 211 + } 196 212 197 213 static int tscookie_tcpopt_parse(struct tcpopt_context *ctx) 198 214 { 199 - __u8 opcode, opsize; 215 + __u8 *opcode, *opsize, *wscale, *tsecr; 216 + __u32 off = ctx->off; 200 217 201 - if (ctx->ptr >= ctx->end) 202 - return 1; 203 - if (ctx->ptr >= ctx->data_end) 218 + opcode = next(ctx, 1); 219 + if (!opcode) 204 220 return 1; 205 221 206 - opcode = ctx->ptr[0]; 207 - 208 - if (opcode == TCPOPT_EOL) 222 + if (*opcode == TCPOPT_EOL) 209 223 return 1; 210 - if (opcode == TCPOPT_NOP) { 211 - ++ctx->ptr; 224 + if (*opcode == TCPOPT_NOP) 212 225 return 0; 213 - } 214 226 215 - if (ctx->ptr + 1 >= ctx->end) 216 - return 1; 217 - if (ctx->ptr + 1 >= ctx->data_end) 218 - return 1; 219 - opsize = ctx->ptr[1]; 220 - if (opsize < 2) 227 + opsize = next(ctx, 1); 228 + if (!opsize || *opsize < 2) 221 229 return 1; 222 230 223 - if (ctx->ptr + opsize > ctx->end) 224 - return 1; 225 - 226 - switch (opcode) { 231 + switch (*opcode) { 227 232 case TCPOPT_WINDOW: 228 - if (opsize == TCPOLEN_WINDOW && ctx->ptr + TCPOLEN_WINDOW <= ctx->data_end) 229 - ctx->wscale = ctx->ptr[2] < TCP_MAX_WSCALE ? ctx->ptr[2] : TCP_MAX_WSCALE; 233 + wscale = next(ctx, 1); 234 + if (!wscale) 235 + return 1; 236 + if (*opsize == TCPOLEN_WINDOW) 237 + ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE; 230 238 break; 231 239 case TCPOPT_TIMESTAMP: 232 - if (opsize == TCPOLEN_TIMESTAMP && ctx->ptr + TCPOLEN_TIMESTAMP <= ctx->data_end) { 240 + tsecr = next(ctx, 4); 241 + if (!tsecr) 242 + return 1; 243 + if (*opsize == TCPOLEN_TIMESTAMP) { 233 244 ctx->option_timestamp = true; 234 245 /* Client's tsval becomes our tsecr. */ 235 - *ctx->tsecr = get_unaligned((__be32 *)(ctx->ptr + 2)); 246 + *ctx->tsecr = get_unaligned((__be32 *)tsecr); 236 247 } 237 248 break; 238 249 case TCPOPT_SACK_PERM: 239 - if (opsize == TCPOLEN_SACK_PERM) 250 + if (*opsize == TCPOLEN_SACK_PERM) 240 251 ctx->option_sack = true; 241 252 break; 242 253 } 243 254 244 - ctx->ptr += opsize; 255 + ctx->off = off + *opsize; 245 256 246 257 return 0; 247 258 } ··· 271 256 272 257 static __always_inline bool tscookie_init(struct tcphdr *tcp_header, 273 258 __u16 tcp_len, __be32 *tsval, 274 - __be32 *tsecr, void *data_end) 259 + __be32 *tsecr, void *data, void *data_end) 275 260 { 276 261 struct tcpopt_context loop_ctx = { 277 - .ptr = (__u8 *)(tcp_header + 1), 278 - .end = (__u8 *)tcp_header + tcp_len, 262 + .data = data, 279 263 .data_end = data_end, 280 264 .tsecr = tsecr, 281 265 .wscale = TS_OPT_WSCALE_MASK, 282 266 .option_timestamp = false, 283 267 .option_sack = false, 268 + /* Note: currently verifier would track .off as unbound scalar. 269 + * In case if verifier would at some point get smarter and 270 + * compute bounded value for this var, beware that it might 271 + * hinder bpf_loop() convergence validation. 272 + */ 273 + .off = (__u8 *)(tcp_header + 1) - (__u8 *)data, 284 274 }; 285 275 u32 cookie; 286 276 ··· 655 635 cookie = (__u32)value; 656 636 657 637 if (tscookie_init((void *)hdr->tcp, hdr->tcp_len, 658 - &tsopt_buf[0], &tsopt_buf[1], data_end)) 638 + &tsopt_buf[0], &tsopt_buf[1], data, data_end)) 659 639 tsopt = tsopt_buf; 660 640 661 641 /* Check that there is enough space for a SYNACK. It also covers