Merge git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next

+23 -5

Documentation/networking/af_xdp.rst

··· 295 295 { 296 296 rr = (rr + 1) & (MAX_SOCKS - 1); 297 297 298 - return bpf_redirect_map(&xsks_map, rr, 0); 298 + return bpf_redirect_map(&xsks_map, rr, XDP_DROP); 299 299 } 300 300 301 301 Note, that since there is only a single set of FILL and COMPLETION ··· 303 303 to make sure that multiple processes or threads do not use these rings 304 304 concurrently. There are no synchronization primitives in the 305 305 libbpf code that protects multiple users at this point in time. 306 + 307 + Libbpf uses this mode if you create more than one socket tied to the 308 + same umem. However, note that you need to supply the 309 + XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD libbpf_flag with the 310 + xsk_socket__create calls and load your own XDP program as there is no 311 + built in one in libbpf that will route the traffic for you. 306 312 307 313 XDP_USE_NEED_WAKEUP bind flag 308 314 ----------------------------- ··· 361 355 both, you will be able to both receive and send traffic from your 362 356 application, but if you only want to do one of them, you can save 363 357 resources by only setting up one of them. Both the FILL ring and the 364 - COMPLETION ring are mandatory if you have a UMEM tied to your socket, 365 - which is the normal case. But if the XDP_SHARED_UMEM flag is used, any 366 - socket after the first one does not have a UMEM and should in that 367 - case not have any FILL or COMPLETION rings created. 358 + COMPLETION ring are mandatory as you need to have a UMEM tied to your 359 + socket. But if the XDP_SHARED_UMEM flag is used, any socket after the 360 + first one does not have a UMEM and should in that case not have any 361 + FILL or COMPLETION rings created as the ones from the shared umem will 362 + be used. Note, that the rings are single-producer single-consumer, so 363 + do not try to access them from multiple processes at the same 364 + time. See the XDP_SHARED_UMEM section. 365 + 366 + In libbpf, you can create Rx-only and Tx-only sockets by supplying 367 + NULL to the rx and tx arguments, respectively, to the 368 + xsk_socket__create function. 369 + 370 + If you create a Tx-only socket, we recommend that you do not put any 371 + packets on the fill ring. If you do this, drivers might think you are 372 + going to receive something when you in fact will not, and this can 373 + negatively impact performance. 368 374 369 375 XDP_UMEM_REG setsockopt 370 376 -----------------------

+4 -4

Documentation/networking/filter.txt

··· 770 770 callq foo 771 771 mov %rax,%r13 772 772 mov %rbx,%rdi 773 - mov $0x2,%esi 774 - mov $0x3,%edx 775 - mov $0x4,%ecx 776 - mov $0x5,%r8d 773 + mov $0x6,%esi 774 + mov $0x7,%edx 775 + mov $0x8,%ecx 776 + mov $0x9,%r8d 777 777 callq bar 778 778 add %r13,%rax 779 779 mov -0x228(%rbp),%rbx

+369 -133

arch/s390/net/bpf_jit_comp.c

··· 23 23 #include <linux/filter.h> 24 24 #include <linux/init.h> 25 25 #include <linux/bpf.h> 26 + #include <linux/mm.h> 27 + #include <linux/kernel.h> 26 28 #include <asm/cacheflush.h> 27 29 #include <asm/dis.h> 28 30 #include <asm/facility.h> ··· 40 38 int size; /* Size of program and literal pool */ 41 39 int size_prg; /* Size of program */ 42 40 int prg; /* Current position in program */ 43 - int lit_start; /* Start of literal pool */ 44 - int lit; /* Current position in literal pool */ 41 + int lit32_start; /* Start of 32-bit literal pool */ 42 + int lit32; /* Current position in 32-bit literal pool */ 43 + int lit64_start; /* Start of 64-bit literal pool */ 44 + int lit64; /* Current position in 64-bit literal pool */ 45 45 int base_ip; /* Base address for literal pool */ 46 - int ret0_ip; /* Address of return 0 */ 47 46 int exit_ip; /* Address of exit */ 48 47 int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ 49 48 int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ ··· 52 49 int labels[1]; /* Labels for local jumps */ 53 50 }; 54 51 55 - #define BPF_SIZE_MAX 0xffff /* Max size for program (16 bit branches) */ 56 - 57 - #define SEEN_MEM (1 << 0) /* use mem[] for temporary storage */ 58 - #define SEEN_RET0 (1 << 1) /* ret0_ip points to a valid return 0 */ 59 - #define SEEN_LITERAL (1 << 2) /* code uses literals */ 60 - #define SEEN_FUNC (1 << 3) /* calls C functions */ 61 - #define SEEN_TAIL_CALL (1 << 4) /* code uses tail calls */ 62 - #define SEEN_REG_AX (1 << 5) /* code uses constant blinding */ 52 + #define SEEN_MEM BIT(0) /* use mem[] for temporary storage */ 53 + #define SEEN_LITERAL BIT(1) /* code uses literals */ 54 + #define SEEN_FUNC BIT(2) /* calls C functions */ 55 + #define SEEN_TAIL_CALL BIT(3) /* code uses tail calls */ 63 56 #define SEEN_STACK (SEEN_FUNC | SEEN_MEM) 64 57 65 58 /* ··· 130 131 #define _EMIT2(op) \ 131 132 ({ \ 132 133 if (jit->prg_buf) \ 133 - *(u16 *) (jit->prg_buf + jit->prg) = op; \ 134 + *(u16 *) (jit->prg_buf + jit->prg) = (op); \ 134 135 jit->prg += 2; \ 135 136 }) 136 137 137 138 #define EMIT2(op, b1, b2) \ 138 139 ({ \ 139 - _EMIT2(op | reg(b1, b2)); \ 140 + _EMIT2((op) | reg(b1, b2)); \ 140 141 REG_SET_SEEN(b1); \ 141 142 REG_SET_SEEN(b2); \ 142 143 }) ··· 144 145 #define _EMIT4(op) \ 145 146 ({ \ 146 147 if (jit->prg_buf) \ 147 - *(u32 *) (jit->prg_buf + jit->prg) = op; \ 148 + *(u32 *) (jit->prg_buf + jit->prg) = (op); \ 148 149 jit->prg += 4; \ 149 150 }) 150 151 151 152 #define EMIT4(op, b1, b2) \ 152 153 ({ \ 153 - _EMIT4(op | reg(b1, b2)); \ 154 + _EMIT4((op) | reg(b1, b2)); \ 154 155 REG_SET_SEEN(b1); \ 155 156 REG_SET_SEEN(b2); \ 156 157 }) 157 158 158 159 #define EMIT4_RRF(op, b1, b2, b3) \ 159 160 ({ \ 160 - _EMIT4(op | reg_high(b3) << 8 | reg(b1, b2)); \ 161 + _EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2)); \ 161 162 REG_SET_SEEN(b1); \ 162 163 REG_SET_SEEN(b2); \ 163 164 REG_SET_SEEN(b3); \ ··· 166 167 #define _EMIT4_DISP(op, disp) \ 167 168 ({ \ 168 169 unsigned int __disp = (disp) & 0xfff; \ 169 - _EMIT4(op | __disp); \ 170 + _EMIT4((op) | __disp); \ 170 171 }) 171 172 172 173 #define EMIT4_DISP(op, b1, b2, disp) \ 173 174 ({ \ 174 - _EMIT4_DISP(op | reg_high(b1) << 16 | \ 175 - reg_high(b2) << 8, disp); \ 175 + _EMIT4_DISP((op) | reg_high(b1) << 16 | \ 176 + reg_high(b2) << 8, (disp)); \ 176 177 REG_SET_SEEN(b1); \ 177 178 REG_SET_SEEN(b2); \ 178 179 }) ··· 180 181 #define EMIT4_IMM(op, b1, imm) \ 181 182 ({ \ 182 183 unsigned int __imm = (imm) & 0xffff; \ 183 - _EMIT4(op | reg_high(b1) << 16 | __imm); \ 184 + _EMIT4((op) | reg_high(b1) << 16 | __imm); \ 184 185 REG_SET_SEEN(b1); \ 185 186 }) 186 187 187 188 #define EMIT4_PCREL(op, pcrel) \ 188 189 ({ \ 189 190 long __pcrel = ((pcrel) >> 1) & 0xffff; \ 190 - _EMIT4(op | __pcrel); \ 191 + _EMIT4((op) | __pcrel); \ 192 + }) 193 + 194 + #define EMIT4_PCREL_RIC(op, mask, target) \ 195 + ({ \ 196 + int __rel = ((target) - jit->prg) / 2; \ 197 + _EMIT4((op) | (mask) << 20 | (__rel & 0xffff)); \ 191 198 }) 192 199 193 200 #define _EMIT6(op1, op2) \ 194 201 ({ \ 195 202 if (jit->prg_buf) { \ 196 - *(u32 *) (jit->prg_buf + jit->prg) = op1; \ 197 - *(u16 *) (jit->prg_buf + jit->prg + 4) = op2; \ 203 + *(u32 *) (jit->prg_buf + jit->prg) = (op1); \ 204 + *(u16 *) (jit->prg_buf + jit->prg + 4) = (op2); \ 198 205 } \ 199 206 jit->prg += 6; \ 200 207 }) ··· 208 203 #define _EMIT6_DISP(op1, op2, disp) \ 209 204 ({ \ 210 205 unsigned int __disp = (disp) & 0xfff; \ 211 - _EMIT6(op1 | __disp, op2); \ 206 + _EMIT6((op1) | __disp, op2); \ 212 207 }) 213 208 214 209 #define _EMIT6_DISP_LH(op1, op2, disp) \ 215 210 ({ \ 216 - u32 _disp = (u32) disp; \ 211 + u32 _disp = (u32) (disp); \ 217 212 unsigned int __disp_h = _disp & 0xff000; \ 218 213 unsigned int __disp_l = _disp & 0x00fff; \ 219 - _EMIT6(op1 | __disp_l, op2 | __disp_h >> 4); \ 214 + _EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4); \ 220 215 }) 221 216 222 217 #define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp) \ 223 218 ({ \ 224 - _EMIT6_DISP_LH(op1 | reg(b1, b2) << 16 | \ 219 + _EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 | \ 225 220 reg_high(b3) << 8, op2, disp); \ 226 221 REG_SET_SEEN(b1); \ 227 222 REG_SET_SEEN(b2); \ ··· 231 226 #define EMIT6_PCREL_LABEL(op1, op2, b1, b2, label, mask) \ 232 227 ({ \ 233 228 int rel = (jit->labels[label] - jit->prg) >> 1; \ 234 - _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), \ 235 - op2 | mask << 12); \ 229 + _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), \ 230 + (op2) | (mask) << 12); \ 236 231 REG_SET_SEEN(b1); \ 237 232 REG_SET_SEEN(b2); \ 238 233 }) ··· 240 235 #define EMIT6_PCREL_IMM_LABEL(op1, op2, b1, imm, label, mask) \ 241 236 ({ \ 242 237 int rel = (jit->labels[label] - jit->prg) >> 1; \ 243 - _EMIT6(op1 | (reg_high(b1) | mask) << 16 | \ 244 - (rel & 0xffff), op2 | (imm & 0xff) << 8); \ 238 + _EMIT6((op1) | (reg_high(b1) | (mask)) << 16 | \ 239 + (rel & 0xffff), (op2) | ((imm) & 0xff) << 8); \ 245 240 REG_SET_SEEN(b1); \ 246 - BUILD_BUG_ON(((unsigned long) imm) > 0xff); \ 241 + BUILD_BUG_ON(((unsigned long) (imm)) > 0xff); \ 247 242 }) 248 243 249 244 #define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask) \ 250 245 ({ \ 251 246 /* Branch instruction needs 6 bytes */ \ 252 - int rel = (addrs[i + off + 1] - (addrs[i + 1] - 6)) / 2;\ 253 - _EMIT6(op1 | reg(b1, b2) << 16 | (rel & 0xffff), op2 | mask); \ 247 + int rel = (addrs[(i) + (off) + 1] - (addrs[(i) + 1] - 6)) / 2;\ 248 + _EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\ 254 249 REG_SET_SEEN(b1); \ 255 250 REG_SET_SEEN(b2); \ 256 251 }) 257 252 258 253 #define EMIT6_PCREL_RILB(op, b, target) \ 259 254 ({ \ 260 - int rel = (target - jit->prg) / 2; \ 261 - _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ 255 + unsigned int rel = (int)((target) - jit->prg) / 2; \ 256 + _EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\ 262 257 REG_SET_SEEN(b); \ 263 258 }) 264 259 265 260 #define EMIT6_PCREL_RIL(op, target) \ 266 261 ({ \ 267 - int rel = (target - jit->prg) / 2; \ 268 - _EMIT6(op | rel >> 16, rel & 0xffff); \ 262 + unsigned int rel = (int)((target) - jit->prg) / 2; \ 263 + _EMIT6((op) | rel >> 16, rel & 0xffff); \ 264 + }) 265 + 266 + #define EMIT6_PCREL_RILC(op, mask, target) \ 267 + ({ \ 268 + EMIT6_PCREL_RIL((op) | (mask) << 20, (target)); \ 269 269 }) 270 270 271 271 #define _EMIT6_IMM(op, imm) \ 272 272 ({ \ 273 273 unsigned int __imm = (imm); \ 274 - _EMIT6(op | (__imm >> 16), __imm & 0xffff); \ 274 + _EMIT6((op) | (__imm >> 16), __imm & 0xffff); \ 275 275 }) 276 276 277 277 #define EMIT6_IMM(op, b1, imm) \ 278 278 ({ \ 279 - _EMIT6_IMM(op | reg_high(b1) << 16, imm); \ 279 + _EMIT6_IMM((op) | reg_high(b1) << 16, imm); \ 280 280 REG_SET_SEEN(b1); \ 281 + }) 282 + 283 + #define _EMIT_CONST_U32(val) \ 284 + ({ \ 285 + unsigned int ret; \ 286 + ret = jit->lit32; \ 287 + if (jit->prg_buf) \ 288 + *(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\ 289 + jit->lit32 += 4; \ 290 + ret; \ 281 291 }) 282 292 283 293 #define EMIT_CONST_U32(val) \ 284 294 ({ \ 285 - unsigned int ret; \ 286 - ret = jit->lit - jit->base_ip; \ 287 295 jit->seen |= SEEN_LITERAL; \ 296 + _EMIT_CONST_U32(val) - jit->base_ip; \ 297 + }) 298 + 299 + #define _EMIT_CONST_U64(val) \ 300 + ({ \ 301 + unsigned int ret; \ 302 + ret = jit->lit64; \ 288 303 if (jit->prg_buf) \ 289 - *(u32 *) (jit->prg_buf + jit->lit) = (u32) val; \ 290 - jit->lit += 4; \ 304 + *(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\ 305 + jit->lit64 += 8; \ 291 306 ret; \ 292 307 }) 293 308 294 309 #define EMIT_CONST_U64(val) \ 295 310 ({ \ 296 - unsigned int ret; \ 297 - ret = jit->lit - jit->base_ip; \ 298 311 jit->seen |= SEEN_LITERAL; \ 299 - if (jit->prg_buf) \ 300 - *(u64 *) (jit->prg_buf + jit->lit) = (u64) val; \ 301 - jit->lit += 8; \ 302 - ret; \ 312 + _EMIT_CONST_U64(val) - jit->base_ip; \ 303 313 }) 304 314 305 315 #define EMIT_ZERO(b1) \ ··· 325 305 REG_SET_SEEN(b1); \ 326 306 } \ 327 307 }) 308 + 309 + /* 310 + * Return whether this is the first pass. The first pass is special, since we 311 + * don't know any sizes yet, and thus must be conservative. 312 + */ 313 + static bool is_first_pass(struct bpf_jit *jit) 314 + { 315 + return jit->size == 0; 316 + } 317 + 318 + /* 319 + * Return whether this is the code generation pass. The code generation pass is 320 + * special, since we should change as little as possible. 321 + */ 322 + static bool is_codegen_pass(struct bpf_jit *jit) 323 + { 324 + return jit->prg_buf; 325 + } 326 + 327 + /* 328 + * Return whether "rel" can be encoded as a short PC-relative offset 329 + */ 330 + static bool is_valid_rel(int rel) 331 + { 332 + return rel >= -65536 && rel <= 65534; 333 + } 334 + 335 + /* 336 + * Return whether "off" can be reached using a short PC-relative offset 337 + */ 338 + static bool can_use_rel(struct bpf_jit *jit, int off) 339 + { 340 + return is_valid_rel(off - jit->prg); 341 + } 342 + 343 + /* 344 + * Return whether given displacement can be encoded using 345 + * Long-Displacement Facility 346 + */ 347 + static bool is_valid_ldisp(int disp) 348 + { 349 + return disp >= -524288 && disp <= 524287; 350 + } 351 + 352 + /* 353 + * Return whether the next 32-bit literal pool entry can be referenced using 354 + * Long-Displacement Facility 355 + */ 356 + static bool can_use_ldisp_for_lit32(struct bpf_jit *jit) 357 + { 358 + return is_valid_ldisp(jit->lit32 - jit->base_ip); 359 + } 360 + 361 + /* 362 + * Return whether the next 64-bit literal pool entry can be referenced using 363 + * Long-Displacement Facility 364 + */ 365 + static bool can_use_ldisp_for_lit64(struct bpf_jit *jit) 366 + { 367 + return is_valid_ldisp(jit->lit64 - jit->base_ip); 368 + } 328 369 329 370 /* 330 371 * Fill whole space with illegal instructions ··· 464 383 */ 465 384 static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth) 466 385 { 467 - 386 + const int last = 15, save_restore_size = 6; 468 387 int re = 6, rs; 388 + 389 + if (is_first_pass(jit)) { 390 + /* 391 + * We don't know yet which registers are used. Reserve space 392 + * conservatively. 393 + */ 394 + jit->prg += (last - re + 1) * save_restore_size; 395 + return; 396 + } 469 397 470 398 do { 471 399 rs = get_start(jit, re); ··· 486 396 else 487 397 restore_regs(jit, rs, re, stack_depth); 488 398 re++; 489 - } while (re <= 15); 399 + } while (re <= last); 490 400 } 491 401 492 402 /* ··· 510 420 /* Save registers */ 511 421 save_restore_regs(jit, REGS_SAVE, stack_depth); 512 422 /* Setup literal pool */ 513 - if (jit->seen & SEEN_LITERAL) { 514 - /* basr %r13,0 */ 515 - EMIT2(0x0d00, REG_L, REG_0); 516 - jit->base_ip = jit->prg; 423 + if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) { 424 + if (!is_first_pass(jit) && 425 + is_valid_ldisp(jit->size - (jit->prg + 2))) { 426 + /* basr %l,0 */ 427 + EMIT2(0x0d00, REG_L, REG_0); 428 + jit->base_ip = jit->prg; 429 + } else { 430 + /* larl %l,lit32_start */ 431 + EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start); 432 + jit->base_ip = jit->lit32_start; 433 + } 517 434 } 518 435 /* Setup stack and backchain */ 519 - if (jit->seen & SEEN_STACK) { 520 - if (jit->seen & SEEN_FUNC) 436 + if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) { 437 + if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) 521 438 /* lgr %w1,%r15 (backchain) */ 522 439 EMIT4(0xb9040000, REG_W1, REG_15); 523 440 /* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */ 524 441 EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED); 525 442 /* aghi %r15,-STK_OFF */ 526 443 EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth)); 527 - if (jit->seen & SEEN_FUNC) 444 + if (is_first_pass(jit) || (jit->seen & SEEN_FUNC)) 528 445 /* stg %w1,152(%r15) (backchain) */ 529 446 EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0, 530 447 REG_15, 152); ··· 543 446 */ 544 447 static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) 545 448 { 546 - /* Return 0 */ 547 - if (jit->seen & SEEN_RET0) { 548 - jit->ret0_ip = jit->prg; 549 - /* lghi %b0,0 */ 550 - EMIT4_IMM(0xa7090000, BPF_REG_0, 0); 551 - } 552 449 jit->exit_ip = jit->prg; 553 450 /* Load exit code: lgr %r2,%b0 */ 554 451 EMIT4(0xb9040000, REG_2, BPF_REG_0); ··· 567 476 _EMIT2(0x07fe); 568 477 569 478 if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable && 570 - (jit->seen & SEEN_FUNC)) { 479 + (is_first_pass(jit) || (jit->seen & SEEN_FUNC))) { 571 480 jit->r1_thunk_ip = jit->prg; 572 481 /* Generate __s390_indirect_jump_r1 thunk */ 573 482 if (test_facility(35)) { ··· 597 506 int i, bool extra_pass) 598 507 { 599 508 struct bpf_insn *insn = &fp->insnsi[i]; 600 - int jmp_off, last, insn_count = 1; 601 509 u32 dst_reg = insn->dst_reg; 602 510 u32 src_reg = insn->src_reg; 511 + int last, insn_count = 1; 603 512 u32 *addrs = jit->addrs; 604 513 s32 imm = insn->imm; 605 514 s16 off = insn->off; 606 515 unsigned int mask; 607 516 608 - if (dst_reg == BPF_REG_AX || src_reg == BPF_REG_AX) 609 - jit->seen |= SEEN_REG_AX; 610 517 switch (insn->code) { 611 518 /* 612 519 * BPF_MOV ··· 638 549 u64 imm64; 639 550 640 551 imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32; 641 - /* lg %dst,<d(imm)>(%l) */ 642 - EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, REG_0, REG_L, 643 - EMIT_CONST_U64(imm64)); 552 + /* lgrl %dst,imm */ 553 + EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64)); 644 554 insn_count = 2; 645 555 break; 646 556 } ··· 768 680 EMIT4_IMM(0xa7080000, REG_W0, 0); 769 681 /* lr %w1,%dst */ 770 682 EMIT2(0x1800, REG_W1, dst_reg); 771 - /* dl %w0,<d(imm)>(%l) */ 772 - EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, 773 - EMIT_CONST_U32(imm)); 683 + if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) { 684 + /* dl %w0,<d(imm)>(%l) */ 685 + EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L, 686 + EMIT_CONST_U32(imm)); 687 + } else { 688 + /* lgfrl %dst,imm */ 689 + EMIT6_PCREL_RILB(0xc40c0000, dst_reg, 690 + _EMIT_CONST_U32(imm)); 691 + jit->seen |= SEEN_LITERAL; 692 + /* dlr %w0,%dst */ 693 + EMIT4(0xb9970000, REG_W0, dst_reg); 694 + } 774 695 /* llgfr %dst,%rc */ 775 696 EMIT4(0xb9160000, dst_reg, rc_reg); 776 697 if (insn_is_zext(&insn[1])) ··· 801 704 EMIT4_IMM(0xa7090000, REG_W0, 0); 802 705 /* lgr %w1,%dst */ 803 706 EMIT4(0xb9040000, REG_W1, dst_reg); 804 - /* dlg %w0,<d(imm)>(%l) */ 805 - EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, 806 - EMIT_CONST_U64(imm)); 707 + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { 708 + /* dlg %w0,<d(imm)>(%l) */ 709 + EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L, 710 + EMIT_CONST_U64(imm)); 711 + } else { 712 + /* lgrl %dst,imm */ 713 + EMIT6_PCREL_RILB(0xc4080000, dst_reg, 714 + _EMIT_CONST_U64(imm)); 715 + jit->seen |= SEEN_LITERAL; 716 + /* dlgr %w0,%dst */ 717 + EMIT4(0xb9870000, REG_W0, dst_reg); 718 + } 807 719 /* lgr %dst,%rc */ 808 720 EMIT4(0xb9040000, dst_reg, rc_reg); 809 721 break; ··· 835 729 EMIT_ZERO(dst_reg); 836 730 break; 837 731 case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */ 838 - /* ng %dst,<d(imm)>(%l) */ 839 - EMIT6_DISP_LH(0xe3000000, 0x0080, dst_reg, REG_0, REG_L, 840 - EMIT_CONST_U64(imm)); 732 + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { 733 + /* ng %dst,<d(imm)>(%l) */ 734 + EMIT6_DISP_LH(0xe3000000, 0x0080, 735 + dst_reg, REG_0, REG_L, 736 + EMIT_CONST_U64(imm)); 737 + } else { 738 + /* lgrl %w0,imm */ 739 + EMIT6_PCREL_RILB(0xc4080000, REG_W0, 740 + _EMIT_CONST_U64(imm)); 741 + jit->seen |= SEEN_LITERAL; 742 + /* ngr %dst,%w0 */ 743 + EMIT4(0xb9800000, dst_reg, REG_W0); 744 + } 841 745 break; 842 746 /* 843 747 * BPF_OR ··· 867 751 EMIT_ZERO(dst_reg); 868 752 break; 869 753 case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */ 870 - /* og %dst,<d(imm)>(%l) */ 871 - EMIT6_DISP_LH(0xe3000000, 0x0081, dst_reg, REG_0, REG_L, 872 - EMIT_CONST_U64(imm)); 754 + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { 755 + /* og %dst,<d(imm)>(%l) */ 756 + EMIT6_DISP_LH(0xe3000000, 0x0081, 757 + dst_reg, REG_0, REG_L, 758 + EMIT_CONST_U64(imm)); 759 + } else { 760 + /* lgrl %w0,imm */ 761 + EMIT6_PCREL_RILB(0xc4080000, REG_W0, 762 + _EMIT_CONST_U64(imm)); 763 + jit->seen |= SEEN_LITERAL; 764 + /* ogr %dst,%w0 */ 765 + EMIT4(0xb9810000, dst_reg, REG_W0); 766 + } 873 767 break; 874 768 /* 875 769 * BPF_XOR ··· 901 775 EMIT_ZERO(dst_reg); 902 776 break; 903 777 case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */ 904 - /* xg %dst,<d(imm)>(%l) */ 905 - EMIT6_DISP_LH(0xe3000000, 0x0082, dst_reg, REG_0, REG_L, 906 - EMIT_CONST_U64(imm)); 778 + if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) { 779 + /* xg %dst,<d(imm)>(%l) */ 780 + EMIT6_DISP_LH(0xe3000000, 0x0082, 781 + dst_reg, REG_0, REG_L, 782 + EMIT_CONST_U64(imm)); 783 + } else { 784 + /* lgrl %w0,imm */ 785 + EMIT6_PCREL_RILB(0xc4080000, REG_W0, 786 + _EMIT_CONST_U64(imm)); 787 + jit->seen |= SEEN_LITERAL; 788 + /* xgr %dst,%w0 */ 789 + EMIT4(0xb9820000, dst_reg, REG_W0); 790 + } 907 791 break; 908 792 /* 909 793 * BPF_LSH ··· 1159 1023 1160 1024 REG_SET_SEEN(BPF_REG_5); 1161 1025 jit->seen |= SEEN_FUNC; 1162 - /* lg %w1,<d(imm)>(%l) */ 1163 - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, 1164 - EMIT_CONST_U64(func)); 1026 + /* lgrl %w1,func */ 1027 + EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func)); 1165 1028 if (__is_defined(CC_USING_EXPOLINE) && !nospec_disable) { 1166 1029 /* brasl %r14,__s390_indirect_jump_r1 */ 1167 1030 EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); ··· 1189 1054 /* llgf %w1,map.max_entries(%b2) */ 1190 1055 EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2, 1191 1056 offsetof(struct bpf_array, map.max_entries)); 1192 - /* clrj %b3,%w1,0xa,label0: if (u32)%b3 >= (u32)%w1 goto out */ 1193 - EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3, 1194 - REG_W1, 0, 0xa); 1057 + /* if ((u32)%b3 >= (u32)%w1) goto out; */ 1058 + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { 1059 + /* clrj %b3,%w1,0xa,label0 */ 1060 + EMIT6_PCREL_LABEL(0xec000000, 0x0077, BPF_REG_3, 1061 + REG_W1, 0, 0xa); 1062 + } else { 1063 + /* clr %b3,%w1 */ 1064 + EMIT2(0x1500, BPF_REG_3, REG_W1); 1065 + /* brcl 0xa,label0 */ 1066 + EMIT6_PCREL_RILC(0xc0040000, 0xa, jit->labels[0]); 1067 + } 1195 1068 1196 1069 /* 1197 1070 * if (tail_call_cnt++ > MAX_TAIL_CALL_CNT) ··· 1214 1071 EMIT4_IMM(0xa7080000, REG_W0, 1); 1215 1072 /* laal %w1,%w0,off(%r15) */ 1216 1073 EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off); 1217 - /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */ 1218 - EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1, 1219 - MAX_TAIL_CALL_CNT, 0, 0x2); 1074 + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { 1075 + /* clij %w1,MAX_TAIL_CALL_CNT,0x2,label0 */ 1076 + EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007f, REG_W1, 1077 + MAX_TAIL_CALL_CNT, 0, 0x2); 1078 + } else { 1079 + /* clfi %w1,MAX_TAIL_CALL_CNT */ 1080 + EMIT6_IMM(0xc20f0000, REG_W1, MAX_TAIL_CALL_CNT); 1081 + /* brcl 0x2,label0 */ 1082 + EMIT6_PCREL_RILC(0xc0040000, 0x2, jit->labels[0]); 1083 + } 1220 1084 1221 1085 /* 1222 1086 * prog = array->ptrs[index]; ··· 1235 1085 EMIT4(0xb9160000, REG_1, BPF_REG_3); 1236 1086 /* sllg %r1,%r1,3: %r1 *= 8 */ 1237 1087 EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3); 1238 - /* lg %r1,prog(%b2,%r1) */ 1239 - EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, BPF_REG_2, 1088 + /* ltg %r1,prog(%b2,%r1) */ 1089 + EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2, 1240 1090 REG_1, offsetof(struct bpf_array, ptrs)); 1241 - /* clgij %r1,0,0x8,label0 */ 1242 - EMIT6_PCREL_IMM_LABEL(0xec000000, 0x007d, REG_1, 0, 0, 0x8); 1091 + if (!is_first_pass(jit) && can_use_rel(jit, jit->labels[0])) { 1092 + /* brc 0x8,label0 */ 1093 + EMIT4_PCREL_RIC(0xa7040000, 0x8, jit->labels[0]); 1094 + } else { 1095 + /* brcl 0x8,label0 */ 1096 + EMIT6_PCREL_RILC(0xc0040000, 0x8, jit->labels[0]); 1097 + } 1243 1098 1244 1099 /* 1245 1100 * Restore registers before calling function ··· 1265 1110 break; 1266 1111 case BPF_JMP | BPF_EXIT: /* return b0 */ 1267 1112 last = (i == fp->len - 1) ? 1 : 0; 1268 - if (last && !(jit->seen & SEEN_RET0)) 1113 + if (last) 1269 1114 break; 1270 1115 /* j <exit> */ 1271 1116 EMIT4_PCREL(0xa7f40000, jit->exit_ip - jit->prg); ··· 1401 1246 goto branch_oc; 1402 1247 branch_ks: 1403 1248 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1404 - /* lgfi %w1,imm (load sign extend imm) */ 1405 - EMIT6_IMM(0xc0010000, REG_W1, imm); 1406 - /* crj or cgrj %dst,%w1,mask,off */ 1407 - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), 1408 - dst_reg, REG_W1, i, off, mask); 1249 + /* cfi or cgfi %dst,imm */ 1250 + EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000, 1251 + dst_reg, imm); 1252 + if (!is_first_pass(jit) && 1253 + can_use_rel(jit, addrs[i + off + 1])) { 1254 + /* brc mask,off */ 1255 + EMIT4_PCREL_RIC(0xa7040000, 1256 + mask >> 12, addrs[i + off + 1]); 1257 + } else { 1258 + /* brcl mask,off */ 1259 + EMIT6_PCREL_RILC(0xc0040000, 1260 + mask >> 12, addrs[i + off + 1]); 1261 + } 1409 1262 break; 1410 1263 branch_ku: 1411 1264 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1412 - /* lgfi %w1,imm (load sign extend imm) */ 1413 - EMIT6_IMM(0xc0010000, REG_W1, imm); 1414 - /* clrj or clgrj %dst,%w1,mask,off */ 1415 - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), 1416 - dst_reg, REG_W1, i, off, mask); 1265 + /* clfi or clgfi %dst,imm */ 1266 + EMIT6_IMM(is_jmp32 ? 0xc20f0000 : 0xc20e0000, 1267 + dst_reg, imm); 1268 + if (!is_first_pass(jit) && 1269 + can_use_rel(jit, addrs[i + off + 1])) { 1270 + /* brc mask,off */ 1271 + EMIT4_PCREL_RIC(0xa7040000, 1272 + mask >> 12, addrs[i + off + 1]); 1273 + } else { 1274 + /* brcl mask,off */ 1275 + EMIT6_PCREL_RILC(0xc0040000, 1276 + mask >> 12, addrs[i + off + 1]); 1277 + } 1417 1278 break; 1418 1279 branch_xs: 1419 1280 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1420 - /* crj or cgrj %dst,%src,mask,off */ 1421 - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), 1422 - dst_reg, src_reg, i, off, mask); 1281 + if (!is_first_pass(jit) && 1282 + can_use_rel(jit, addrs[i + off + 1])) { 1283 + /* crj or cgrj %dst,%src,mask,off */ 1284 + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064), 1285 + dst_reg, src_reg, i, off, mask); 1286 + } else { 1287 + /* cr or cgr %dst,%src */ 1288 + if (is_jmp32) 1289 + EMIT2(0x1900, dst_reg, src_reg); 1290 + else 1291 + EMIT4(0xb9200000, dst_reg, src_reg); 1292 + /* brcl mask,off */ 1293 + EMIT6_PCREL_RILC(0xc0040000, 1294 + mask >> 12, addrs[i + off + 1]); 1295 + } 1423 1296 break; 1424 1297 branch_xu: 1425 1298 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32; 1426 - /* clrj or clgrj %dst,%src,mask,off */ 1427 - EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), 1428 - dst_reg, src_reg, i, off, mask); 1299 + if (!is_first_pass(jit) && 1300 + can_use_rel(jit, addrs[i + off + 1])) { 1301 + /* clrj or clgrj %dst,%src,mask,off */ 1302 + EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065), 1303 + dst_reg, src_reg, i, off, mask); 1304 + } else { 1305 + /* clr or clgr %dst,%src */ 1306 + if (is_jmp32) 1307 + EMIT2(0x1500, dst_reg, src_reg); 1308 + else 1309 + EMIT4(0xb9210000, dst_reg, src_reg); 1310 + /* brcl mask,off */ 1311 + EMIT6_PCREL_RILC(0xc0040000, 1312 + mask >> 12, addrs[i + off + 1]); 1313 + } 1429 1314 break; 1430 1315 branch_oc: 1431 - /* brc mask,jmp_off (branch instruction needs 4 bytes) */ 1432 - jmp_off = addrs[i + off + 1] - (addrs[i + 1] - 4); 1433 - EMIT4_PCREL(0xa7040000 | mask << 8, jmp_off); 1316 + if (!is_first_pass(jit) && 1317 + can_use_rel(jit, addrs[i + off + 1])) { 1318 + /* brc mask,off */ 1319 + EMIT4_PCREL_RIC(0xa7040000, 1320 + mask >> 12, addrs[i + off + 1]); 1321 + } else { 1322 + /* brcl mask,off */ 1323 + EMIT6_PCREL_RILC(0xc0040000, 1324 + mask >> 12, addrs[i + off + 1]); 1325 + } 1434 1326 break; 1435 1327 } 1436 1328 default: /* too complex, give up */ ··· 1488 1286 } 1489 1287 1490 1288 /* 1289 + * Return whether new i-th instruction address does not violate any invariant 1290 + */ 1291 + static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i) 1292 + { 1293 + /* On the first pass anything goes */ 1294 + if (is_first_pass(jit)) 1295 + return true; 1296 + 1297 + /* The codegen pass must not change anything */ 1298 + if (is_codegen_pass(jit)) 1299 + return jit->addrs[i] == jit->prg; 1300 + 1301 + /* Passes in between must not increase code size */ 1302 + return jit->addrs[i] >= jit->prg; 1303 + } 1304 + 1305 + /* 1306 + * Update the address of i-th instruction 1307 + */ 1308 + static int bpf_set_addr(struct bpf_jit *jit, int i) 1309 + { 1310 + if (!bpf_is_new_addr_sane(jit, i)) 1311 + return -1; 1312 + jit->addrs[i] = jit->prg; 1313 + return 0; 1314 + } 1315 + 1316 + /* 1491 1317 * Compile eBPF program into s390x code 1492 1318 */ 1493 1319 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp, 1494 1320 bool extra_pass) 1495 1321 { 1496 - int i, insn_count; 1322 + int i, insn_count, lit32_size, lit64_size; 1497 1323 1498 - jit->lit = jit->lit_start; 1324 + jit->lit32 = jit->lit32_start; 1325 + jit->lit64 = jit->lit64_start; 1499 1326 jit->prg = 0; 1500 1327 1501 1328 bpf_jit_prologue(jit, fp->aux->stack_depth); 1329 + if (bpf_set_addr(jit, 0) < 0) 1330 + return -1; 1502 1331 for (i = 0; i < fp->len; i += insn_count) { 1503 1332 insn_count = bpf_jit_insn(jit, fp, i, extra_pass); 1504 1333 if (insn_count < 0) 1505 1334 return -1; 1506 1335 /* Next instruction address */ 1507 - jit->addrs[i + insn_count] = jit->prg; 1336 + if (bpf_set_addr(jit, i + insn_count) < 0) 1337 + return -1; 1508 1338 } 1509 1339 bpf_jit_epilogue(jit, fp->aux->stack_depth); 1510 1340 1511 - jit->lit_start = jit->prg; 1512 - jit->size = jit->lit; 1341 + lit32_size = jit->lit32 - jit->lit32_start; 1342 + lit64_size = jit->lit64 - jit->lit64_start; 1343 + jit->lit32_start = jit->prg; 1344 + if (lit32_size) 1345 + jit->lit32_start = ALIGN(jit->lit32_start, 4); 1346 + jit->lit64_start = jit->lit32_start + lit32_size; 1347 + if (lit64_size) 1348 + jit->lit64_start = ALIGN(jit->lit64_start, 8); 1349 + jit->size = jit->lit64_start + lit64_size; 1513 1350 jit->size_prg = jit->prg; 1514 1351 return 0; 1515 1352 } ··· 1610 1369 } 1611 1370 1612 1371 memset(&jit, 0, sizeof(jit)); 1613 - jit.addrs = kcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); 1372 + jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL); 1614 1373 if (jit.addrs == NULL) { 1615 1374 fp = orig_fp; 1616 1375 goto out; ··· 1629 1388 /* 1630 1389 * Final pass: Allocate and generate program 1631 1390 */ 1632 - if (jit.size >= BPF_SIZE_MAX) { 1633 - fp = orig_fp; 1634 - goto free_addrs; 1635 - } 1636 - 1637 - header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 2, jit_fill_hole); 1391 + header = bpf_jit_binary_alloc(jit.size, &jit.prg_buf, 8, jit_fill_hole); 1638 1392 if (!header) { 1639 1393 fp = orig_fp; 1640 1394 goto free_addrs; ··· 1658 1422 if (!fp->is_func || extra_pass) { 1659 1423 bpf_prog_fill_jited_linfo(fp, jit.addrs + 1); 1660 1424 free_addrs: 1661 - kfree(jit.addrs); 1425 + kvfree(jit.addrs); 1662 1426 kfree(jit_data); 1663 1427 fp->aux->jit_data = NULL; 1664 1428 }

+18 -6

arch/x86/include/asm/text-patching.h

··· 26 26 #define POKE_MAX_OPCODE_SIZE 5 27 27 28 28 struct text_poke_loc { 29 - void *detour; 30 29 void *addr; 31 - size_t len; 32 - const char opcode[POKE_MAX_OPCODE_SIZE]; 30 + int len; 31 + s32 rel32; 32 + u8 opcode; 33 + const u8 text[POKE_MAX_OPCODE_SIZE]; 33 34 }; 34 35 35 36 extern void text_poke_early(void *addr, const void *opcode, size_t len); ··· 52 51 extern void *text_poke(void *addr, const void *opcode, size_t len); 53 52 extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); 54 53 extern int poke_int3_handler(struct pt_regs *regs); 55 - extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); 54 + extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); 56 55 extern void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries); 56 + extern void text_poke_loc_init(struct text_poke_loc *tp, void *addr, 57 + const void *opcode, size_t len, const void *emulate); 57 58 extern int after_bootmem; 58 59 extern __ro_after_init struct mm_struct *poking_mm; 59 60 extern __ro_after_init unsigned long poking_addr; ··· 66 63 regs->ip = ip; 67 64 } 68 65 69 - #define INT3_INSN_SIZE 1 70 - #define CALL_INSN_SIZE 5 66 + #define INT3_INSN_SIZE 1 67 + #define INT3_INSN_OPCODE 0xCC 68 + 69 + #define CALL_INSN_SIZE 5 70 + #define CALL_INSN_OPCODE 0xE8 71 + 72 + #define JMP32_INSN_SIZE 5 73 + #define JMP32_INSN_OPCODE 0xE9 74 + 75 + #define JMP8_INSN_SIZE 2 76 + #define JMP8_INSN_OPCODE 0xEB 71 77 72 78 static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) 73 79 {

+101 -31

arch/x86/kernel/alternative.c

··· 956 956 int poke_int3_handler(struct pt_regs *regs) 957 957 { 958 958 struct text_poke_loc *tp; 959 - unsigned char int3 = 0xcc; 960 959 void *ip; 961 960 962 961 /* 963 962 * Having observed our INT3 instruction, we now must observe 964 963 * bp_patching.nr_entries. 965 964 * 966 - * nr_entries != 0 INT3 967 - * WMB RMB 968 - * write INT3 if (nr_entries) 965 + * nr_entries != 0 INT3 966 + * WMB RMB 967 + * write INT3 if (nr_entries) 969 968 * 970 969 * Idem for other elements in bp_patching. 971 970 */ ··· 977 978 return 0; 978 979 979 980 /* 980 - * Discount the sizeof(int3). See text_poke_bp_batch(). 981 + * Discount the INT3. See text_poke_bp_batch(). 981 982 */ 982 - ip = (void *) regs->ip - sizeof(int3); 983 + ip = (void *) regs->ip - INT3_INSN_SIZE; 983 984 984 985 /* 985 986 * Skip the binary search if there is a single member in the vector. ··· 996 997 return 0; 997 998 } 998 999 999 - /* set up the specified breakpoint detour */ 1000 - regs->ip = (unsigned long) tp->detour; 1000 + ip += tp->len; 1001 + 1002 + switch (tp->opcode) { 1003 + case INT3_INSN_OPCODE: 1004 + /* 1005 + * Someone poked an explicit INT3, they'll want to handle it, 1006 + * do not consume. 1007 + */ 1008 + return 0; 1009 + 1010 + case CALL_INSN_OPCODE: 1011 + int3_emulate_call(regs, (long)ip + tp->rel32); 1012 + break; 1013 + 1014 + case JMP32_INSN_OPCODE: 1015 + case JMP8_INSN_OPCODE: 1016 + int3_emulate_jmp(regs, (long)ip + tp->rel32); 1017 + break; 1018 + 1019 + default: 1020 + BUG(); 1021 + } 1001 1022 1002 1023 return 1; 1003 1024 } ··· 1033 1014 * synchronization using int3 breakpoint. 1034 1015 * 1035 1016 * The way it is done: 1036 - * - For each entry in the vector: 1017 + * - For each entry in the vector: 1037 1018 * - add a int3 trap to the address that will be patched 1038 1019 * - sync cores 1039 1020 * - For each entry in the vector: ··· 1046 1027 */ 1047 1028 void text_poke_bp_batch(struct text_poke_loc *tp, unsigned int nr_entries) 1048 1029 { 1049 - int patched_all_but_first = 0; 1050 - unsigned char int3 = 0xcc; 1030 + unsigned char int3 = INT3_INSN_OPCODE; 1051 1031 unsigned int i; 1032 + int do_sync; 1052 1033 1053 1034 lockdep_assert_held(&text_mutex); 1054 1035 ··· 1072 1053 /* 1073 1054 * Second step: update all but the first byte of the patched range. 1074 1055 */ 1075 - for (i = 0; i < nr_entries; i++) { 1056 + for (do_sync = 0, i = 0; i < nr_entries; i++) { 1076 1057 if (tp[i].len - sizeof(int3) > 0) { 1077 1058 text_poke((char *)tp[i].addr + sizeof(int3), 1078 - (const char *)tp[i].opcode + sizeof(int3), 1059 + (const char *)tp[i].text + sizeof(int3), 1079 1060 tp[i].len - sizeof(int3)); 1080 - patched_all_but_first++; 1061 + do_sync++; 1081 1062 } 1082 1063 } 1083 1064 1084 - if (patched_all_but_first) { 1065 + if (do_sync) { 1085 1066 /* 1086 1067 * According to Intel, this core syncing is very likely 1087 1068 * not necessary and we'd be safe even without it. But ··· 1094 1075 * Third step: replace the first byte (int3) by the first byte of 1095 1076 * replacing opcode. 1096 1077 */ 1097 - for (i = 0; i < nr_entries; i++) 1098 - text_poke(tp[i].addr, tp[i].opcode, sizeof(int3)); 1078 + for (do_sync = 0, i = 0; i < nr_entries; i++) { 1079 + if (tp[i].text[0] == INT3_INSN_OPCODE) 1080 + continue; 1099 1081 1100 - on_each_cpu(do_sync_core, NULL, 1); 1082 + text_poke(tp[i].addr, tp[i].text, sizeof(int3)); 1083 + do_sync++; 1084 + } 1085 + 1086 + if (do_sync) 1087 + on_each_cpu(do_sync_core, NULL, 1); 1088 + 1101 1089 /* 1102 1090 * sync_core() implies an smp_mb() and orders this store against 1103 1091 * the writing of the new instruction. 1104 1092 */ 1105 1093 bp_patching.vec = NULL; 1106 1094 bp_patching.nr_entries = 0; 1095 + } 1096 + 1097 + void text_poke_loc_init(struct text_poke_loc *tp, void *addr, 1098 + const void *opcode, size_t len, const void *emulate) 1099 + { 1100 + struct insn insn; 1101 + 1102 + if (!opcode) 1103 + opcode = (void *)tp->text; 1104 + else 1105 + memcpy((void *)tp->text, opcode, len); 1106 + 1107 + if (!emulate) 1108 + emulate = opcode; 1109 + 1110 + kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); 1111 + insn_get_length(&insn); 1112 + 1113 + BUG_ON(!insn_complete(&insn)); 1114 + BUG_ON(len != insn.length); 1115 + 1116 + tp->addr = addr; 1117 + tp->len = len; 1118 + tp->opcode = insn.opcode.bytes[0]; 1119 + 1120 + switch (tp->opcode) { 1121 + case INT3_INSN_OPCODE: 1122 + break; 1123 + 1124 + case CALL_INSN_OPCODE: 1125 + case JMP32_INSN_OPCODE: 1126 + case JMP8_INSN_OPCODE: 1127 + tp->rel32 = insn.immediate.value; 1128 + break; 1129 + 1130 + default: /* assume NOP */ 1131 + switch (len) { 1132 + case 2: /* NOP2 -- emulate as JMP8+0 */ 1133 + BUG_ON(memcmp(emulate, ideal_nops[len], len)); 1134 + tp->opcode = JMP8_INSN_OPCODE; 1135 + tp->rel32 = 0; 1136 + break; 1137 + 1138 + case 5: /* NOP5 -- emulate as JMP32+0 */ 1139 + BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], len)); 1140 + tp->opcode = JMP32_INSN_OPCODE; 1141 + tp->rel32 = 0; 1142 + break; 1143 + 1144 + default: /* unknown instruction */ 1145 + BUG(); 1146 + } 1147 + break; 1148 + } 1107 1149 } 1108 1150 1109 1151 /** ··· 1178 1098 * dynamically allocated memory. This function should be used when it is 1179 1099 * not possible to allocate memory. 1180 1100 */ 1181 - void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) 1101 + void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) 1182 1102 { 1183 - struct text_poke_loc tp = { 1184 - .detour = handler, 1185 - .addr = addr, 1186 - .len = len, 1187 - }; 1103 + struct text_poke_loc tp; 1188 1104 1189 - if (len > POKE_MAX_OPCODE_SIZE) { 1190 - WARN_ONCE(1, "len is larger than %d\n", POKE_MAX_OPCODE_SIZE); 1191 - return; 1192 - } 1193 - 1194 - memcpy((void *)tp.opcode, opcode, len); 1195 - 1105 + text_poke_loc_init(&tp, addr, opcode, len, emulate); 1196 1106 text_poke_bp_batch(&tp, 1); 1197 1107 }

+3 -6

arch/x86/kernel/jump_label.c

··· 89 89 return; 90 90 } 91 91 92 - text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, 93 - (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); 92 + text_poke_bp((void *)jump_entry_code(entry), &code, JUMP_LABEL_NOP_SIZE, NULL); 94 93 } 95 94 96 95 void arch_jump_label_transform(struct jump_entry *entry, ··· 146 147 } 147 148 148 149 __jump_label_set_jump_code(entry, type, 149 - (union jump_code_union *) &tp->opcode, 0); 150 + (union jump_code_union *)&tp->text, 0); 150 151 151 - tp->addr = entry_code; 152 - tp->detour = entry_code + JUMP_LABEL_NOP_SIZE; 153 - tp->len = JUMP_LABEL_NOP_SIZE; 152 + text_poke_loc_init(tp, entry_code, NULL, JUMP_LABEL_NOP_SIZE, NULL); 154 153 155 154 tp_vec_nr++; 156 155

+8 -3

arch/x86/kernel/kprobes/opt.c

··· 437 437 insn_buff[0] = RELATIVEJUMP_OPCODE; 438 438 *(s32 *)(&insn_buff[1]) = rel; 439 439 440 - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, 441 - op->optinsn.insn); 440 + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); 442 441 443 442 list_del_init(&op->list); 444 443 } ··· 447 448 void arch_unoptimize_kprobe(struct optimized_kprobe *op) 448 449 { 449 450 u8 insn_buff[RELATIVEJUMP_SIZE]; 451 + u8 emulate_buff[RELATIVEJUMP_SIZE]; 450 452 451 453 /* Set int3 to first byte for kprobes */ 452 454 insn_buff[0] = BREAKPOINT_INSTRUCTION; 453 455 memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); 456 + 457 + emulate_buff[0] = RELATIVEJUMP_OPCODE; 458 + *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - 459 + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); 460 + 454 461 text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, 455 - op->optinsn.insn); 462 + emulate_buff); 456 463 } 457 464 458 465 /*

+366 -58

arch/x86/net/bpf_jit_comp.c

··· 9 9 #include <linux/filter.h> 10 10 #include <linux/if_vlan.h> 11 11 #include <linux/bpf.h> 12 + #include <linux/memory.h> 12 13 #include <asm/extable.h> 13 14 #include <asm/set_memory.h> 14 15 #include <asm/nospec-branch.h> 16 + #include <asm/text-patching.h> 15 17 16 18 static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 17 19 { ··· 98 96 99 97 /* Pick a register outside of BPF range for JIT internal work */ 100 98 #define AUX_REG (MAX_BPF_JIT_REG + 1) 99 + #define X86_REG_R9 (MAX_BPF_JIT_REG + 2) 101 100 102 101 /* 103 102 * The following table maps BPF registers to x86-64 registers. ··· 107 104 * register in load/store instructions, it always needs an 108 105 * extra byte of encoding and is callee saved. 109 106 * 110 - * Also x86-64 register R9 is unused. x86-64 register R10 is 111 - * used for blinding (if enabled). 107 + * x86-64 register R9 is not used by BPF programs, but can be used by BPF 108 + * trampoline. x86-64 register R10 is used for blinding (if enabled). 112 109 */ 113 110 static const int reg2hex[] = { 114 111 [BPF_REG_0] = 0, /* RAX */ ··· 124 121 [BPF_REG_FP] = 5, /* RBP readonly */ 125 122 [BPF_REG_AX] = 2, /* R10 temp register */ 126 123 [AUX_REG] = 3, /* R11 temp register */ 124 + [X86_REG_R9] = 1, /* R9 register, 6th function argument */ 127 125 }; 128 126 129 127 static const int reg2pt_regs[] = { ··· 152 148 BIT(BPF_REG_7) | 153 149 BIT(BPF_REG_8) | 154 150 BIT(BPF_REG_9) | 151 + BIT(X86_REG_R9) | 155 152 BIT(BPF_REG_AX)); 156 153 } 157 154 ··· 203 198 /* Maximum number of bytes emitted while JITing one eBPF insn */ 204 199 #define BPF_MAX_INSN_SIZE 128 205 200 #define BPF_INSN_SAFETY 64 201 + /* number of bytes emit_call() needs to generate call instruction */ 202 + #define X86_CALL_SIZE 5 206 203 207 - #define PROLOGUE_SIZE 20 204 + #define PROLOGUE_SIZE 25 208 205 209 206 /* 210 207 * Emit x86-64 prologue code for BPF program and check its size. ··· 215 208 static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) 216 209 { 217 210 u8 *prog = *pprog; 218 - int cnt = 0; 211 + int cnt = X86_CALL_SIZE; 219 212 213 + /* BPF trampoline can be made to work without these nops, 214 + * but let's waste 5 bytes for now and optimize later 215 + */ 216 + memcpy(prog, ideal_nops[NOP_ATOMIC5], cnt); 217 + prog += cnt; 220 218 EMIT1(0x55); /* push rbp */ 221 219 EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 222 220 /* sub rsp, rounded_stack_depth */ ··· 402 390 *pprog = prog; 403 391 } 404 392 393 + /* LDX: dst_reg = *(u8*)(src_reg + off) */ 394 + static void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 395 + { 396 + u8 *prog = *pprog; 397 + int cnt = 0; 398 + 399 + switch (size) { 400 + case BPF_B: 401 + /* Emit 'movzx rax, byte ptr [rax + off]' */ 402 + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 403 + break; 404 + case BPF_H: 405 + /* Emit 'movzx rax, word ptr [rax + off]' */ 406 + EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 407 + break; 408 + case BPF_W: 409 + /* Emit 'mov eax, dword ptr [rax+0x14]' */ 410 + if (is_ereg(dst_reg) || is_ereg(src_reg)) 411 + EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 412 + else 413 + EMIT1(0x8B); 414 + break; 415 + case BPF_DW: 416 + /* Emit 'mov rax, qword ptr [rax+0x14]' */ 417 + EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 418 + break; 419 + } 420 + /* 421 + * If insn->off == 0 we can save one extra byte, but 422 + * special case of x86 R13 which always needs an offset 423 + * is not worth the hassle 424 + */ 425 + if (is_imm8(off)) 426 + EMIT2(add_2reg(0x40, src_reg, dst_reg), off); 427 + else 428 + EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), off); 429 + *pprog = prog; 430 + } 431 + 432 + /* STX: *(u8*)(dst_reg + off) = src_reg */ 433 + static void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 434 + { 435 + u8 *prog = *pprog; 436 + int cnt = 0; 437 + 438 + switch (size) { 439 + case BPF_B: 440 + /* Emit 'mov byte ptr [rax + off], al' */ 441 + if (is_ereg(dst_reg) || is_ereg(src_reg) || 442 + /* We have to add extra byte for x86 SIL, DIL regs */ 443 + src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 444 + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 445 + else 446 + EMIT1(0x88); 447 + break; 448 + case BPF_H: 449 + if (is_ereg(dst_reg) || is_ereg(src_reg)) 450 + EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 451 + else 452 + EMIT2(0x66, 0x89); 453 + break; 454 + case BPF_W: 455 + if (is_ereg(dst_reg) || is_ereg(src_reg)) 456 + EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 457 + else 458 + EMIT1(0x89); 459 + break; 460 + case BPF_DW: 461 + EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 462 + break; 463 + } 464 + if (is_imm8(off)) 465 + EMIT2(add_2reg(0x40, dst_reg, src_reg), off); 466 + else 467 + EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), off); 468 + *pprog = prog; 469 + } 470 + 471 + static int emit_call(u8 **pprog, void *func, void *ip) 472 + { 473 + u8 *prog = *pprog; 474 + int cnt = 0; 475 + s64 offset; 476 + 477 + offset = func - (ip + X86_CALL_SIZE); 478 + if (!is_simm32(offset)) { 479 + pr_err("Target call %p is out of range\n", func); 480 + return -EINVAL; 481 + } 482 + EMIT1_off32(0xE8, offset); 483 + *pprog = prog; 484 + return 0; 485 + } 486 + 487 + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 488 + void *old_addr, void *new_addr) 489 + { 490 + u8 old_insn[X86_CALL_SIZE] = {}; 491 + u8 new_insn[X86_CALL_SIZE] = {}; 492 + u8 *prog; 493 + int ret; 494 + 495 + if (!is_kernel_text((long)ip) && 496 + !is_bpf_text_address((long)ip)) 497 + /* BPF trampoline in modules is not supported */ 498 + return -EINVAL; 499 + 500 + if (old_addr) { 501 + prog = old_insn; 502 + ret = emit_call(&prog, old_addr, (void *)ip); 503 + if (ret) 504 + return ret; 505 + } 506 + if (new_addr) { 507 + prog = new_insn; 508 + ret = emit_call(&prog, new_addr, (void *)ip); 509 + if (ret) 510 + return ret; 511 + } 512 + ret = -EBUSY; 513 + mutex_lock(&text_mutex); 514 + switch (t) { 515 + case BPF_MOD_NOP_TO_CALL: 516 + if (memcmp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE)) 517 + goto out; 518 + text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL); 519 + break; 520 + case BPF_MOD_CALL_TO_CALL: 521 + if (memcmp(ip, old_insn, X86_CALL_SIZE)) 522 + goto out; 523 + text_poke_bp(ip, new_insn, X86_CALL_SIZE, NULL); 524 + break; 525 + case BPF_MOD_CALL_TO_NOP: 526 + if (memcmp(ip, old_insn, X86_CALL_SIZE)) 527 + goto out; 528 + text_poke_bp(ip, ideal_nops[NOP_ATOMIC5], X86_CALL_SIZE, NULL); 529 + break; 530 + } 531 + ret = 0; 532 + out: 533 + mutex_unlock(&text_mutex); 534 + return ret; 535 + } 405 536 406 537 static bool ex_handler_bpf(const struct exception_table_entry *x, 407 538 struct pt_regs *regs, int trapnr, ··· 928 773 929 774 /* STX: *(u8*)(dst_reg + off) = src_reg */ 930 775 case BPF_STX | BPF_MEM | BPF_B: 931 - /* Emit 'mov byte ptr [rax + off], al' */ 932 - if (is_ereg(dst_reg) || is_ereg(src_reg) || 933 - /* We have to add extra byte for x86 SIL, DIL regs */ 934 - src_reg == BPF_REG_1 || src_reg == BPF_REG_2) 935 - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 936 - else 937 - EMIT1(0x88); 938 - goto stx; 939 776 case BPF_STX | BPF_MEM | BPF_H: 940 - if (is_ereg(dst_reg) || is_ereg(src_reg)) 941 - EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 942 - else 943 - EMIT2(0x66, 0x89); 944 - goto stx; 945 777 case BPF_STX | BPF_MEM | BPF_W: 946 - if (is_ereg(dst_reg) || is_ereg(src_reg)) 947 - EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 948 - else 949 - EMIT1(0x89); 950 - goto stx; 951 778 case BPF_STX | BPF_MEM | BPF_DW: 952 - EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 953 - stx: if (is_imm8(insn->off)) 954 - EMIT2(add_2reg(0x40, dst_reg, src_reg), insn->off); 955 - else 956 - EMIT1_off32(add_2reg(0x80, dst_reg, src_reg), 957 - insn->off); 779 + emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 958 780 break; 959 781 960 782 /* LDX: dst_reg = *(u8*)(src_reg + off) */ 961 783 case BPF_LDX | BPF_MEM | BPF_B: 962 784 case BPF_LDX | BPF_PROBE_MEM | BPF_B: 963 - /* Emit 'movzx rax, byte ptr [rax + off]' */ 964 - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 965 - goto ldx; 966 785 case BPF_LDX | BPF_MEM | BPF_H: 967 786 case BPF_LDX | BPF_PROBE_MEM | BPF_H: 968 - /* Emit 'movzx rax, word ptr [rax + off]' */ 969 - EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 970 - goto ldx; 971 787 case BPF_LDX | BPF_MEM | BPF_W: 972 788 case BPF_LDX | BPF_PROBE_MEM | BPF_W: 973 - /* Emit 'mov eax, dword ptr [rax+0x14]' */ 974 - if (is_ereg(dst_reg) || is_ereg(src_reg)) 975 - EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 976 - else 977 - EMIT1(0x8B); 978 - goto ldx; 979 789 case BPF_LDX | BPF_MEM | BPF_DW: 980 790 case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 981 - /* Emit 'mov rax, qword ptr [rax+0x14]' */ 982 - EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 983 - ldx: /* 984 - * If insn->off == 0 we can save one extra byte, but 985 - * special case of x86 R13 which always needs an offset 986 - * is not worth the hassle 987 - */ 988 - if (is_imm8(insn->off)) 989 - EMIT2(add_2reg(0x40, src_reg, dst_reg), insn->off); 990 - else 991 - EMIT1_off32(add_2reg(0x80, src_reg, dst_reg), 992 - insn->off); 791 + emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 993 792 if (BPF_MODE(insn->code) == BPF_PROBE_MEM) { 994 793 struct exception_table_entry *ex; 995 794 u8 *_insn = image + proglen; ··· 1008 899 /* call */ 1009 900 case BPF_JMP | BPF_CALL: 1010 901 func = (u8 *) __bpf_call_base + imm32; 1011 - jmp_offset = func - (image + addrs[i]); 1012 - if (!imm32 || !is_simm32(jmp_offset)) { 1013 - pr_err("unsupported BPF func %d addr %p image %p\n", 1014 - imm32, func, image); 902 + if (!imm32 || emit_call(&prog, func, image + addrs[i - 1])) 1015 903 return -EINVAL; 1016 - } 1017 - EMIT1_off32(0xE8, jmp_offset); 1018 904 break; 1019 905 1020 906 case BPF_JMP | BPF_TAIL_CALL: ··· 1240 1136 return -EFAULT; 1241 1137 } 1242 1138 return proglen; 1139 + } 1140 + 1141 + static void save_regs(struct btf_func_model *m, u8 **prog, int nr_args, 1142 + int stack_size) 1143 + { 1144 + int i; 1145 + /* Store function arguments to stack. 1146 + * For a function that accepts two pointers the sequence will be: 1147 + * mov QWORD PTR [rbp-0x10],rdi 1148 + * mov QWORD PTR [rbp-0x8],rsi 1149 + */ 1150 + for (i = 0; i < min(nr_args, 6); i++) 1151 + emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), 1152 + BPF_REG_FP, 1153 + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, 1154 + -(stack_size - i * 8)); 1155 + } 1156 + 1157 + static void restore_regs(struct btf_func_model *m, u8 **prog, int nr_args, 1158 + int stack_size) 1159 + { 1160 + int i; 1161 + 1162 + /* Restore function arguments from stack. 1163 + * For a function that accepts two pointers the sequence will be: 1164 + * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] 1165 + * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] 1166 + */ 1167 + for (i = 0; i < min(nr_args, 6); i++) 1168 + emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), 1169 + i == 5 ? X86_REG_R9 : BPF_REG_1 + i, 1170 + BPF_REG_FP, 1171 + -(stack_size - i * 8)); 1172 + } 1173 + 1174 + static int invoke_bpf(struct btf_func_model *m, u8 **pprog, 1175 + struct bpf_prog **progs, int prog_cnt, int stack_size) 1176 + { 1177 + u8 *prog = *pprog; 1178 + int cnt = 0, i; 1179 + 1180 + for (i = 0; i < prog_cnt; i++) { 1181 + if (emit_call(&prog, __bpf_prog_enter, prog)) 1182 + return -EINVAL; 1183 + /* remember prog start time returned by __bpf_prog_enter */ 1184 + emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 1185 + 1186 + /* arg1: lea rdi, [rbp - stack_size] */ 1187 + EMIT4(0x48, 0x8D, 0x7D, -stack_size); 1188 + /* arg2: progs[i]->insnsi for interpreter */ 1189 + if (!progs[i]->jited) 1190 + emit_mov_imm64(&prog, BPF_REG_2, 1191 + (long) progs[i]->insnsi >> 32, 1192 + (u32) (long) progs[i]->insnsi); 1193 + /* call JITed bpf program or interpreter */ 1194 + if (emit_call(&prog, progs[i]->bpf_func, prog)) 1195 + return -EINVAL; 1196 + 1197 + /* arg1: mov rdi, progs[i] */ 1198 + emit_mov_imm64(&prog, BPF_REG_1, (long) progs[i] >> 32, 1199 + (u32) (long) progs[i]); 1200 + /* arg2: mov rsi, rbx <- start time in nsec */ 1201 + emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 1202 + if (emit_call(&prog, __bpf_prog_exit, prog)) 1203 + return -EINVAL; 1204 + } 1205 + *pprog = prog; 1206 + return 0; 1207 + } 1208 + 1209 + /* Example: 1210 + * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); 1211 + * its 'struct btf_func_model' will be nr_args=2 1212 + * The assembly code when eth_type_trans is executing after trampoline: 1213 + * 1214 + * push rbp 1215 + * mov rbp, rsp 1216 + * sub rsp, 16 // space for skb and dev 1217 + * push rbx // temp regs to pass start time 1218 + * mov qword ptr [rbp - 16], rdi // save skb pointer to stack 1219 + * mov qword ptr [rbp - 8], rsi // save dev pointer to stack 1220 + * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1221 + * mov rbx, rax // remember start time in bpf stats are enabled 1222 + * lea rdi, [rbp - 16] // R1==ctx of bpf prog 1223 + * call addr_of_jited_FENTRY_prog 1224 + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1225 + * mov rsi, rbx // prog start time 1226 + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1227 + * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack 1228 + * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack 1229 + * pop rbx 1230 + * leave 1231 + * ret 1232 + * 1233 + * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be 1234 + * replaced with 'call generated_bpf_trampoline'. When it returns 1235 + * eth_type_trans will continue executing with original skb and dev pointers. 1236 + * 1237 + * The assembly code when eth_type_trans is called from trampoline: 1238 + * 1239 + * push rbp 1240 + * mov rbp, rsp 1241 + * sub rsp, 24 // space for skb, dev, return value 1242 + * push rbx // temp regs to pass start time 1243 + * mov qword ptr [rbp - 24], rdi // save skb pointer to stack 1244 + * mov qword ptr [rbp - 16], rsi // save dev pointer to stack 1245 + * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1246 + * mov rbx, rax // remember start time if bpf stats are enabled 1247 + * lea rdi, [rbp - 24] // R1==ctx of bpf prog 1248 + * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev 1249 + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1250 + * mov rsi, rbx // prog start time 1251 + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1252 + * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack 1253 + * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack 1254 + * call eth_type_trans+5 // execute body of eth_type_trans 1255 + * mov qword ptr [rbp - 8], rax // save return value 1256 + * call __bpf_prog_enter // rcu_read_lock and preempt_disable 1257 + * mov rbx, rax // remember start time in bpf stats are enabled 1258 + * lea rdi, [rbp - 24] // R1==ctx of bpf prog 1259 + * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value 1260 + * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 1261 + * mov rsi, rbx // prog start time 1262 + * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 1263 + * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value 1264 + * pop rbx 1265 + * leave 1266 + * add rsp, 8 // skip eth_type_trans's frame 1267 + * ret // return to its caller 1268 + */ 1269 + int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 1270 + struct bpf_prog **fentry_progs, int fentry_cnt, 1271 + struct bpf_prog **fexit_progs, int fexit_cnt, 1272 + void *orig_call) 1273 + { 1274 + int cnt = 0, nr_args = m->nr_args; 1275 + int stack_size = nr_args * 8; 1276 + u8 *prog; 1277 + 1278 + /* x86-64 supports up to 6 arguments. 7+ can be added in the future */ 1279 + if (nr_args > 6) 1280 + return -ENOTSUPP; 1281 + 1282 + if ((flags & BPF_TRAMP_F_RESTORE_REGS) && 1283 + (flags & BPF_TRAMP_F_SKIP_FRAME)) 1284 + return -EINVAL; 1285 + 1286 + if (flags & BPF_TRAMP_F_CALL_ORIG) 1287 + stack_size += 8; /* room for return value of orig_call */ 1288 + 1289 + if (flags & BPF_TRAMP_F_SKIP_FRAME) 1290 + /* skip patched call instruction and point orig_call to actual 1291 + * body of the kernel function. 1292 + */ 1293 + orig_call += X86_CALL_SIZE; 1294 + 1295 + prog = image; 1296 + 1297 + EMIT1(0x55); /* push rbp */ 1298 + EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 1299 + EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ 1300 + EMIT1(0x53); /* push rbx */ 1301 + 1302 + save_regs(m, &prog, nr_args, stack_size); 1303 + 1304 + if (fentry_cnt) 1305 + if (invoke_bpf(m, &prog, fentry_progs, fentry_cnt, stack_size)) 1306 + return -EINVAL; 1307 + 1308 + if (flags & BPF_TRAMP_F_CALL_ORIG) { 1309 + if (fentry_cnt) 1310 + restore_regs(m, &prog, nr_args, stack_size); 1311 + 1312 + /* call original function */ 1313 + if (emit_call(&prog, orig_call, prog)) 1314 + return -EINVAL; 1315 + /* remember return value in a stack for bpf prog to access */ 1316 + emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 1317 + } 1318 + 1319 + if (fexit_cnt) 1320 + if (invoke_bpf(m, &prog, fexit_progs, fexit_cnt, stack_size)) 1321 + return -EINVAL; 1322 + 1323 + if (flags & BPF_TRAMP_F_RESTORE_REGS) 1324 + restore_regs(m, &prog, nr_args, stack_size); 1325 + 1326 + if (flags & BPF_TRAMP_F_CALL_ORIG) 1327 + /* restore original return value back into RAX */ 1328 + emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); 1329 + 1330 + EMIT1(0x5B); /* pop rbx */ 1331 + EMIT1(0xC9); /* leave */ 1332 + if (flags & BPF_TRAMP_F_SKIP_FRAME) 1333 + /* skip our return address and return to parent */ 1334 + EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 1335 + EMIT1(0xC3); /* ret */ 1336 + /* One half of the page has active running trampoline. 1337 + * Another half is an area for next trampoline. 1338 + * Make sure the trampoline generation logic doesn't overflow. 1339 + */ 1340 + if (WARN_ON_ONCE(prog - (u8 *)image > PAGE_SIZE / 2 - BPF_INSN_SAFETY)) 1341 + return -EFAULT; 1342 + return 0; 1243 1343 } 1244 1344 1245 1345 struct x64_jit_data {

+2 -7

drivers/net/ethernet/broadcom/bnxt/bnxt.c

··· 3175 3175 bnxt_init_rxbd_pages(ring, type); 3176 3176 3177 3177 if (BNXT_RX_PAGE_MODE(bp) && bp->xdp_prog) { 3178 - rxr->xdp_prog = bpf_prog_add(bp->xdp_prog, 1); 3179 - if (IS_ERR(rxr->xdp_prog)) { 3180 - int rc = PTR_ERR(rxr->xdp_prog); 3181 - 3182 - rxr->xdp_prog = NULL; 3183 - return rc; 3184 - } 3178 + bpf_prog_add(bp->xdp_prog, 1); 3179 + rxr->xdp_prog = bp->xdp_prog; 3185 3180 } 3186 3181 prod = rxr->rx_prod; 3187 3182 for (i = 0; i < bp->rx_ring_size; i++) {

+2 -7

drivers/net/ethernet/cavium/thunder/nicvf_main.c

··· 1876 1876 1877 1877 if (nic->xdp_prog) { 1878 1878 /* Attach BPF program */ 1879 - nic->xdp_prog = bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1880 - if (!IS_ERR(nic->xdp_prog)) { 1881 - bpf_attached = true; 1882 - } else { 1883 - ret = PTR_ERR(nic->xdp_prog); 1884 - nic->xdp_prog = NULL; 1885 - } 1879 + bpf_prog_add(nic->xdp_prog, nic->rx_queues - 1); 1880 + bpf_attached = true; 1886 1881 } 1887 1882 1888 1883 /* Calculate Tx queues needed for XDP and network stack */

+2 -5

drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c

··· 1807 1807 if (prog && !xdp_mtu_valid(priv, dev->mtu)) 1808 1808 return -EINVAL; 1809 1809 1810 - if (prog) { 1811 - prog = bpf_prog_add(prog, priv->num_channels); 1812 - if (IS_ERR(prog)) 1813 - return PTR_ERR(prog); 1814 - } 1810 + if (prog) 1811 + bpf_prog_add(prog, priv->num_channels); 1815 1812 1816 1813 up = netif_running(dev); 1817 1814 need_update = (!!priv->xdp_prog != !!prog);

+6 -18

drivers/net/ethernet/mellanox/mlx4/en_netdev.c

··· 2286 2286 lockdep_is_held(&priv->mdev->state_lock)); 2287 2287 2288 2288 if (xdp_prog && carry_xdp_prog) { 2289 - xdp_prog = bpf_prog_add(xdp_prog, tmp->rx_ring_num); 2290 - if (IS_ERR(xdp_prog)) { 2291 - mlx4_en_free_resources(tmp); 2292 - return PTR_ERR(xdp_prog); 2293 - } 2289 + bpf_prog_add(xdp_prog, tmp->rx_ring_num); 2294 2290 for (i = 0; i < tmp->rx_ring_num; i++) 2295 2291 rcu_assign_pointer(tmp->rx_ring[i]->xdp_prog, 2296 2292 xdp_prog); ··· 2778 2782 * program for a new one. 2779 2783 */ 2780 2784 if (priv->tx_ring_num[TX_XDP] == xdp_ring_num) { 2781 - if (prog) { 2782 - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); 2783 - if (IS_ERR(prog)) 2784 - return PTR_ERR(prog); 2785 - } 2785 + if (prog) 2786 + bpf_prog_add(prog, priv->rx_ring_num - 1); 2787 + 2786 2788 mutex_lock(&mdev->state_lock); 2787 2789 for (i = 0; i < priv->rx_ring_num; i++) { 2788 2790 old_prog = rcu_dereference_protected( ··· 2801 2807 if (!tmp) 2802 2808 return -ENOMEM; 2803 2809 2804 - if (prog) { 2805 - prog = bpf_prog_add(prog, priv->rx_ring_num - 1); 2806 - if (IS_ERR(prog)) { 2807 - err = PTR_ERR(prog); 2808 - goto out; 2809 - } 2810 - } 2810 + if (prog) 2811 + bpf_prog_add(prog, priv->rx_ring_num - 1); 2811 2812 2812 2813 mutex_lock(&mdev->state_lock); 2813 2814 memcpy(&new_prof, priv->prof, sizeof(struct mlx4_en_port_profile)); ··· 2851 2862 2852 2863 unlock_out: 2853 2864 mutex_unlock(&mdev->state_lock); 2854 - out: 2855 2865 kfree(tmp); 2856 2866 return err; 2857 2867 }

+5 -13

drivers/net/ethernet/mellanox/mlx5/core/en_main.c

··· 409 409 rq->stats = &c->priv->channel_stats[c->ix].rq; 410 410 INIT_WORK(&rq->recover_work, mlx5e_rq_err_cqe_work); 411 411 412 - rq->xdp_prog = params->xdp_prog ? bpf_prog_inc(params->xdp_prog) : NULL; 413 - if (IS_ERR(rq->xdp_prog)) { 414 - err = PTR_ERR(rq->xdp_prog); 415 - rq->xdp_prog = NULL; 416 - goto err_rq_wq_destroy; 417 - } 412 + if (params->xdp_prog) 413 + bpf_prog_inc(params->xdp_prog); 414 + rq->xdp_prog = params->xdp_prog; 418 415 419 416 rq_xdp_ix = rq->ix; 420 417 if (xsk) ··· 4404 4407 /* no need for full reset when exchanging programs */ 4405 4408 reset = (!priv->channels.params.xdp_prog || !prog); 4406 4409 4407 - if (was_opened && !reset) { 4410 + if (was_opened && !reset) 4408 4411 /* num_channels is invariant here, so we can take the 4409 4412 * batched reference right upfront. 4410 4413 */ 4411 - prog = bpf_prog_add(prog, priv->channels.num); 4412 - if (IS_ERR(prog)) { 4413 - err = PTR_ERR(prog); 4414 - goto unlock; 4415 - } 4416 - } 4414 + bpf_prog_add(prog, priv->channels.num); 4417 4415 4418 4416 if (was_opened && reset) { 4419 4417 struct mlx5e_channels new_channels = {};

+1 -3

drivers/net/ethernet/netronome/nfp/bpf/offload.c

··· 46 46 /* Grab a single ref to the map for our record. The prog destroy ndo 47 47 * happens after free_used_maps(). 48 48 */ 49 - map = bpf_map_inc(map, false); 50 - if (IS_ERR(map)) 51 - return PTR_ERR(map); 49 + bpf_map_inc(map); 52 50 53 51 record = kmalloc(sizeof(*record), GFP_KERNEL); 54 52 if (!record) {

+2 -6

drivers/net/ethernet/qlogic/qede/qede_main.c

··· 2115 2115 if (rc) 2116 2116 goto out; 2117 2117 2118 - fp->rxq->xdp_prog = bpf_prog_add(edev->xdp_prog, 1); 2119 - if (IS_ERR(fp->rxq->xdp_prog)) { 2120 - rc = PTR_ERR(fp->rxq->xdp_prog); 2121 - fp->rxq->xdp_prog = NULL; 2122 - goto out; 2123 - } 2118 + bpf_prog_add(edev->xdp_prog, 1); 2119 + fp->rxq->xdp_prog = edev->xdp_prog; 2124 2120 } 2125 2121 2126 2122 if (fp->type & QEDE_FASTPATH_TX) {

+2 -5

drivers/net/virtio_net.c

··· 2445 2445 if (!prog && !old_prog) 2446 2446 return 0; 2447 2447 2448 - if (prog) { 2449 - prog = bpf_prog_add(prog, vi->max_queue_pairs - 1); 2450 - if (IS_ERR(prog)) 2451 - return PTR_ERR(prog); 2452 - } 2448 + if (prog) 2449 + bpf_prog_add(prog, vi->max_queue_pairs - 1); 2453 2450 2454 2451 /* Make sure NAPI is not using any XDP TX queues for RX. */ 2455 2452 if (netif_running(dev)) {

+3

include/linux/audit.h

··· 159 159 extern void audit_log_link_denied(const char *operation); 160 160 extern void audit_log_lost(const char *message); 161 161 162 + extern void audit_log_task(struct audit_buffer *ab); 162 163 extern int audit_log_task_context(struct audit_buffer *ab); 163 164 extern void audit_log_task_info(struct audit_buffer *ab); 164 165 ··· 219 218 static inline void audit_log_key(struct audit_buffer *ab, char *key) 220 219 { } 221 220 static inline void audit_log_link_denied(const char *string) 221 + { } 222 + static inline void audit_log_task(struct audit_buffer *ab) 222 223 { } 223 224 static inline int audit_log_task_context(struct audit_buffer *ab) 224 225 {

+153 -19

include/linux/bpf.h

··· 12 12 #include <linux/err.h> 13 13 #include <linux/rbtree_latch.h> 14 14 #include <linux/numa.h> 15 + #include <linux/mm_types.h> 15 16 #include <linux/wait.h> 16 17 #include <linux/u64_stats_sync.h> 18 + #include <linux/refcount.h> 19 + #include <linux/mutex.h> 17 20 18 21 struct bpf_verifier_env; 19 22 struct bpf_verifier_log; ··· 69 66 u64 *imm, u32 off); 70 67 int (*map_direct_value_meta)(const struct bpf_map *map, 71 68 u64 imm, u32 *off); 69 + int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma); 72 70 }; 73 71 74 72 struct bpf_map_memory { ··· 98 94 u32 btf_value_type_id; 99 95 struct btf *btf; 100 96 struct bpf_map_memory memory; 97 + char name[BPF_OBJ_NAME_LEN]; 101 98 bool unpriv_array; 102 - bool frozen; /* write-once */ 103 - /* 48 bytes hole */ 99 + bool frozen; /* write-once; write-protected by freeze_mutex */ 100 + /* 22 bytes hole */ 104 101 105 102 /* The 3rd and 4th cacheline with misc members to avoid false sharing 106 103 * particularly with refcounting. 107 104 */ 108 - atomic_t refcnt ____cacheline_aligned; 109 - atomic_t usercnt; 105 + atomic64_t refcnt ____cacheline_aligned; 106 + atomic64_t usercnt; 110 107 struct work_struct work; 111 - char name[BPF_OBJ_NAME_LEN]; 108 + struct mutex freeze_mutex; 109 + u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ 112 110 }; 113 111 114 112 static inline bool map_value_has_spin_lock(const struct bpf_map *map) ··· 252 246 }; 253 247 enum bpf_arg_type arg_type[5]; 254 248 }; 255 - u32 *btf_id; /* BTF ids of arguments */ 249 + int *btf_id; /* BTF ids of arguments */ 256 250 }; 257 251 258 252 /* bpf_context is intentionally undefined structure. Pointer to bpf_context is ··· 390 384 struct u64_stats_sync syncp; 391 385 } __aligned(2 * sizeof(u64)); 392 386 387 + struct btf_func_model { 388 + u8 ret_size; 389 + u8 nr_args; 390 + u8 arg_size[MAX_BPF_FUNC_ARGS]; 391 + }; 392 + 393 + /* Restore arguments before returning from trampoline to let original function 394 + * continue executing. This flag is used for fentry progs when there are no 395 + * fexit progs. 396 + */ 397 + #define BPF_TRAMP_F_RESTORE_REGS BIT(0) 398 + /* Call original function after fentry progs, but before fexit progs. 399 + * Makes sense for fentry/fexit, normal calls and indirect calls. 400 + */ 401 + #define BPF_TRAMP_F_CALL_ORIG BIT(1) 402 + /* Skip current frame and return to parent. Makes sense for fentry/fexit 403 + * programs only. Should not be used with normal calls and indirect calls. 404 + */ 405 + #define BPF_TRAMP_F_SKIP_FRAME BIT(2) 406 + 407 + /* Different use cases for BPF trampoline: 408 + * 1. replace nop at the function entry (kprobe equivalent) 409 + * flags = BPF_TRAMP_F_RESTORE_REGS 410 + * fentry = a set of programs to run before returning from trampoline 411 + * 412 + * 2. replace nop at the function entry (kprobe + kretprobe equivalent) 413 + * flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME 414 + * orig_call = fentry_ip + MCOUNT_INSN_SIZE 415 + * fentry = a set of program to run before calling original function 416 + * fexit = a set of program to run after original function 417 + * 418 + * 3. replace direct call instruction anywhere in the function body 419 + * or assign a function pointer for indirect call (like tcp_congestion_ops->cong_avoid) 420 + * With flags = 0 421 + * fentry = a set of programs to run before returning from trampoline 422 + * With flags = BPF_TRAMP_F_CALL_ORIG 423 + * orig_call = original callback addr or direct function addr 424 + * fentry = a set of program to run before calling original function 425 + * fexit = a set of program to run after original function 426 + */ 427 + int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 428 + struct bpf_prog **fentry_progs, int fentry_cnt, 429 + struct bpf_prog **fexit_progs, int fexit_cnt, 430 + void *orig_call); 431 + /* these two functions are called from generated trampoline */ 432 + u64 notrace __bpf_prog_enter(void); 433 + void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start); 434 + 435 + enum bpf_tramp_prog_type { 436 + BPF_TRAMP_FENTRY, 437 + BPF_TRAMP_FEXIT, 438 + BPF_TRAMP_MAX 439 + }; 440 + 441 + struct bpf_trampoline { 442 + /* hlist for trampoline_table */ 443 + struct hlist_node hlist; 444 + /* serializes access to fields of this trampoline */ 445 + struct mutex mutex; 446 + refcount_t refcnt; 447 + u64 key; 448 + struct { 449 + struct btf_func_model model; 450 + void *addr; 451 + } func; 452 + /* list of BPF programs using this trampoline */ 453 + struct hlist_head progs_hlist[BPF_TRAMP_MAX]; 454 + /* Number of attached programs. A counter per kind. */ 455 + int progs_cnt[BPF_TRAMP_MAX]; 456 + /* Executable image of trampoline */ 457 + void *image; 458 + u64 selector; 459 + }; 460 + #ifdef CONFIG_BPF_JIT 461 + struct bpf_trampoline *bpf_trampoline_lookup(u64 key); 462 + int bpf_trampoline_link_prog(struct bpf_prog *prog); 463 + int bpf_trampoline_unlink_prog(struct bpf_prog *prog); 464 + void bpf_trampoline_put(struct bpf_trampoline *tr); 465 + #else 466 + static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 467 + { 468 + return NULL; 469 + } 470 + static inline int bpf_trampoline_link_prog(struct bpf_prog *prog) 471 + { 472 + return -ENOTSUPP; 473 + } 474 + static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) 475 + { 476 + return -ENOTSUPP; 477 + } 478 + static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} 479 + #endif 480 + 481 + struct bpf_func_info_aux { 482 + bool unreliable; 483 + }; 484 + 393 485 struct bpf_prog_aux { 394 - atomic_t refcnt; 486 + atomic64_t refcnt; 395 487 u32 used_map_cnt; 396 488 u32 max_ctx_offset; 397 489 u32 max_pkt_offset; ··· 499 395 u32 func_cnt; /* used by non-func prog as the number of func progs */ 500 396 u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */ 501 397 u32 attach_btf_id; /* in-kernel BTF type id to attach to */ 398 + struct bpf_prog *linked_prog; 502 399 bool verifier_zext; /* Zero extensions has been inserted by verifier. */ 503 400 bool offload_requested; 504 401 bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */ 402 + bool func_proto_unreliable; 403 + enum bpf_tramp_prog_type trampoline_prog_type; 404 + struct bpf_trampoline *trampoline; 405 + struct hlist_node tramp_hlist; 505 406 /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */ 506 407 const struct btf_type *attach_func_proto; 507 408 /* function name for valid attach_btf_id */ ··· 528 419 struct bpf_prog_offload *offload; 529 420 struct btf *btf; 530 421 struct bpf_func_info *func_info; 422 + struct bpf_func_info_aux *func_info_aux; 531 423 /* bpf_line_info loaded from userspace. linfo->insn_off 532 424 * has the xlated insn offset. 533 425 * Both the main and sub prog share the same linfo. ··· 758 648 extern const struct file_operations bpf_map_fops; 759 649 extern const struct file_operations bpf_prog_fops; 760 650 761 - #define BPF_PROG_TYPE(_id, _name) \ 651 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 762 652 extern const struct bpf_prog_ops _name ## _prog_ops; \ 763 653 extern const struct bpf_verifier_ops _name ## _verifier_ops; 764 654 #define BPF_MAP_TYPE(_id, _ops) \ ··· 774 664 struct bpf_prog *bpf_prog_get(u32 ufd); 775 665 struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, 776 666 bool attach_drv); 777 - struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); 667 + void bpf_prog_add(struct bpf_prog *prog, int i); 778 668 void bpf_prog_sub(struct bpf_prog *prog, int i); 779 - struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); 669 + void bpf_prog_inc(struct bpf_prog *prog); 780 670 struct bpf_prog * __must_check bpf_prog_inc_not_zero(struct bpf_prog *prog); 781 671 void bpf_prog_put(struct bpf_prog *prog); 782 672 int __bpf_prog_charge(struct user_struct *user, u32 pages); ··· 787 677 788 678 struct bpf_map *bpf_map_get_with_uref(u32 ufd); 789 679 struct bpf_map *__bpf_map_get(struct fd f); 790 - struct bpf_map * __must_check bpf_map_inc(struct bpf_map *map, bool uref); 791 - struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map, 792 - bool uref); 680 + void bpf_map_inc(struct bpf_map *map); 681 + void bpf_map_inc_with_uref(struct bpf_map *map); 682 + struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map); 793 683 void bpf_map_put_with_uref(struct bpf_map *map); 794 684 void bpf_map_put(struct bpf_map *map); 795 685 int bpf_map_charge_memlock(struct bpf_map *map, u32 pages); ··· 799 689 void bpf_map_charge_move(struct bpf_map_memory *dst, 800 690 struct bpf_map_memory *src); 801 691 void *bpf_map_area_alloc(u64 size, int numa_node); 692 + void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); 802 693 void bpf_map_area_free(void *base); 803 694 void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); 804 695 ··· 893 782 const struct btf_type *t, int off, int size, 894 783 enum bpf_access_type atype, 895 784 u32 *next_btf_id); 896 - u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *, int); 785 + int btf_resolve_helper_id(struct bpf_verifier_log *log, 786 + const struct bpf_func_proto *fn, int); 787 + 788 + int btf_distill_func_proto(struct bpf_verifier_log *log, 789 + struct btf *btf, 790 + const struct btf_type *func_proto, 791 + const char *func_name, 792 + struct btf_func_model *m); 793 + 794 + int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); 897 795 898 796 #else /* !CONFIG_BPF_SYSCALL */ 899 797 static inline struct bpf_prog *bpf_prog_get(u32 ufd) ··· 917 797 return ERR_PTR(-EOPNOTSUPP); 918 798 } 919 799 920 - static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, 921 - int i) 800 + static inline void bpf_prog_add(struct bpf_prog *prog, int i) 922 801 { 923 - return ERR_PTR(-EOPNOTSUPP); 924 802 } 925 803 926 804 static inline void bpf_prog_sub(struct bpf_prog *prog, int i) ··· 929 811 { 930 812 } 931 813 932 - static inline struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog) 814 + static inline void bpf_prog_inc(struct bpf_prog *prog) 933 815 { 934 - return ERR_PTR(-EOPNOTSUPP); 935 816 } 936 817 937 818 static inline struct bpf_prog *__must_check ··· 1224 1107 #endif 1225 1108 1226 1109 #ifdef CONFIG_INET 1110 + struct sk_reuseport_kern { 1111 + struct sk_buff *skb; 1112 + struct sock *sk; 1113 + struct sock *selected_sk; 1114 + void *data_end; 1115 + u32 hash; 1116 + u32 reuseport_id; 1117 + bool bind_inany; 1118 + }; 1227 1119 bool bpf_tcp_sock_is_valid_access(int off, int size, enum bpf_access_type type, 1228 1120 struct bpf_insn_access_aux *info); 1229 1121 ··· 1282 1156 return 0; 1283 1157 } 1284 1158 #endif /* CONFIG_INET */ 1159 + 1160 + enum bpf_text_poke_type { 1161 + BPF_MOD_NOP_TO_CALL, 1162 + BPF_MOD_CALL_TO_CALL, 1163 + BPF_MOD_CALL_TO_NOP, 1164 + }; 1165 + int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 1166 + void *addr1, void *addr2); 1285 1167 1286 1168 #endif /* _LINUX_BPF_H */

+52 -26

include/linux/bpf_types.h

··· 2 2 /* internal file - do not include directly */ 3 3 4 4 #ifdef CONFIG_NET 5 - BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) 6 - BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) 7 - BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) 8 - BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) 5 + BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter, 6 + struct __sk_buff, struct sk_buff) 7 + BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act, 8 + struct __sk_buff, struct sk_buff) 9 + BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act, 10 + struct __sk_buff, struct sk_buff) 11 + BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp, 12 + struct xdp_md, struct xdp_buff) 9 13 #ifdef CONFIG_CGROUP_BPF 10 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) 11 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) 12 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr) 14 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb, 15 + struct __sk_buff, struct sk_buff) 16 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock, 17 + struct bpf_sock, struct sock) 18 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, cg_sock_addr, 19 + struct bpf_sock_addr, struct bpf_sock_addr_kern) 13 20 #endif 14 - BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in) 15 - BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out) 16 - BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) 17 - BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local) 18 - BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) 19 - BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) 20 - BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg) 21 - BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector) 21 + BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_in, 22 + struct __sk_buff, struct sk_buff) 23 + BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_out, 24 + struct __sk_buff, struct sk_buff) 25 + BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit, 26 + struct __sk_buff, struct sk_buff) 27 + BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_SEG6LOCAL, lwt_seg6local, 28 + struct __sk_buff, struct sk_buff) 29 + BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops, 30 + struct bpf_sock_ops, struct bpf_sock_ops_kern) 31 + BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb, 32 + struct __sk_buff, struct sk_buff) 33 + BPF_PROG_TYPE(BPF_PROG_TYPE_SK_MSG, sk_msg, 34 + struct sk_msg_md, struct sk_msg) 35 + BPF_PROG_TYPE(BPF_PROG_TYPE_FLOW_DISSECTOR, flow_dissector, 36 + struct __sk_buff, struct bpf_flow_dissector) 22 37 #endif 23 38 #ifdef CONFIG_BPF_EVENTS 24 - BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) 25 - BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) 26 - BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) 27 - BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint) 28 - BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable) 29 - BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing) 39 + BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe, 40 + bpf_user_pt_regs_t, struct pt_regs) 41 + BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint, 42 + __u64, u64) 43 + BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event, 44 + struct bpf_perf_event_data, struct bpf_perf_event_data_kern) 45 + BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint, 46 + struct bpf_raw_tracepoint_args, u64) 47 + BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable, 48 + struct bpf_raw_tracepoint_args, u64) 49 + BPF_PROG_TYPE(BPF_PROG_TYPE_TRACING, tracing, 50 + void *, void *) 30 51 #endif 31 52 #ifdef CONFIG_CGROUP_BPF 32 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) 33 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl) 34 - BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt) 53 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev, 54 + struct bpf_cgroup_dev_ctx, struct bpf_cgroup_dev_ctx) 55 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SYSCTL, cg_sysctl, 56 + struct bpf_sysctl, struct bpf_sysctl_kern) 57 + BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCKOPT, cg_sockopt, 58 + struct bpf_sockopt, struct bpf_sockopt_kern) 35 59 #endif 36 60 #ifdef CONFIG_BPF_LIRC_MODE2 37 - BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2) 61 + BPF_PROG_TYPE(BPF_PROG_TYPE_LIRC_MODE2, lirc_mode2, 62 + __u32, u32) 38 63 #endif 39 64 #ifdef CONFIG_INET 40 - BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport) 65 + BPF_PROG_TYPE(BPF_PROG_TYPE_SK_REUSEPORT, sk_reuseport, 66 + struct sk_reuseport_md, struct sk_reuseport_kern) 41 67 #endif 42 68 43 69 BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops)

+1

include/linux/bpf_verifier.h

··· 343 343 #define BPF_MAX_SUBPROGS 256 344 344 345 345 struct bpf_subprog_info { 346 + /* 'start' has to be the first field otherwise find_subprog() won't work */ 346 347 u32 start; /* insn idx of function entry point */ 347 348 u32 linfo_idx; /* The idx to the main_prog->aux->linfo */ 348 349 u16 stack_depth; /* max. stack depth used by this function */

+1

include/linux/btf.h

··· 88 88 const struct btf_type *btf_type_by_id(const struct btf *btf, u32 type_id); 89 89 const char *btf_name_by_offset(const struct btf *btf, u32 offset); 90 90 struct btf *btf_parse_vmlinux(void); 91 + struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog); 91 92 #else 92 93 static inline const struct btf_type *btf_type_by_id(const struct btf *btf, 93 94 u32 type_id)

+4 -2

include/linux/filter.h

··· 515 515 struct sock_filter *filter; 516 516 }; 517 517 518 + /* Some arches need doubleword alignment for their instructions and/or data */ 519 + #define BPF_IMAGE_ALIGNMENT 8 520 + 518 521 struct bpf_binary_header { 519 522 u32 pages; 520 - /* Some arches need word alignment for their instructions */ 521 - u8 image[] __aligned(4); 523 + u8 image[] __aligned(BPF_IMAGE_ALIGNMENT); 522 524 }; 523 525 524 526 struct bpf_prog {

+1

include/linux/vmalloc.h

··· 93 93 extern void *vmalloc_user(unsigned long size); 94 94 extern void *vmalloc_node(unsigned long size, int node); 95 95 extern void *vzalloc_node(unsigned long size, int node); 96 + extern void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags); 96 97 extern void *vmalloc_exec(unsigned long size); 97 98 extern void *vmalloc_32(unsigned long size); 98 99 extern void *vmalloc_32_user(unsigned long size);

+1

include/uapi/linux/audit.h

··· 116 116 #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ 117 117 #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ 118 118 #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ 119 + #define AUDIT_BPF 1334 /* BPF subsystem */ 119 120 120 121 #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ 121 122 #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */

+6

include/uapi/linux/bpf.h

··· 201 201 BPF_CGROUP_GETSOCKOPT, 202 202 BPF_CGROUP_SETSOCKOPT, 203 203 BPF_TRACE_RAW_TP, 204 + BPF_TRACE_FENTRY, 205 + BPF_TRACE_FEXIT, 204 206 __MAX_BPF_ATTACH_TYPE 205 207 }; 206 208 ··· 348 346 /* Clone map from listener for newly accepted socket */ 349 347 #define BPF_F_CLONE (1U << 9) 350 348 349 + /* Enable memory-mapping BPF map */ 350 + #define BPF_F_MMAPABLE (1U << 10) 351 + 351 352 /* flags for BPF_PROG_QUERY */ 352 353 #define BPF_F_QUERY_EFFECTIVE (1U << 0) 353 354 ··· 428 423 __aligned_u64 line_info; /* line info */ 429 424 __u32 line_info_cnt; /* number of bpf_line_info records */ 430 425 __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ 426 + __u32 attach_prog_fd; /* 0 to attach to vmlinux */ 431 427 }; 432 428 433 429 struct { /* anonymous struct used by BPF_OBJ_* commands */

+1 -1

kernel/auditsc.c

··· 2545 2545 audit_log_ntp_val(ad, "adjust", AUDIT_NTP_ADJUST); 2546 2546 } 2547 2547 2548 - static void audit_log_task(struct audit_buffer *ab) 2548 + void audit_log_task(struct audit_buffer *ab) 2549 2549 { 2550 2550 kuid_t auid, uid; 2551 2551 kgid_t gid;

+1

kernel/bpf/Makefile

··· 6 6 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o 7 7 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o 8 8 obj-$(CONFIG_BPF_SYSCALL) += disasm.o 9 + obj-$(CONFIG_BPF_JIT) += trampoline.o 9 10 obj-$(CONFIG_BPF_SYSCALL) += btf.o 10 11 ifeq ($(CONFIG_NET),y) 11 12 obj-$(CONFIG_BPF_SYSCALL) += devmap.o

+52 -6

kernel/bpf/arraymap.c

··· 14 14 #include "map_in_map.h" 15 15 16 16 #define ARRAY_CREATE_FLAG_MASK \ 17 - (BPF_F_NUMA_NODE | BPF_F_ACCESS_MASK) 17 + (BPF_F_NUMA_NODE | BPF_F_MMAPABLE | BPF_F_ACCESS_MASK) 18 18 19 19 static void bpf_array_free_percpu(struct bpf_array *array) 20 20 { ··· 57 57 attr->map_flags & ~ARRAY_CREATE_FLAG_MASK || 58 58 !bpf_map_flags_access_ok(attr->map_flags) || 59 59 (percpu && numa_node != NUMA_NO_NODE)) 60 + return -EINVAL; 61 + 62 + if (attr->map_type != BPF_MAP_TYPE_ARRAY && 63 + attr->map_flags & BPF_F_MMAPABLE) 60 64 return -EINVAL; 61 65 62 66 if (attr->value_size > KMALLOC_MAX_SIZE) ··· 106 102 } 107 103 108 104 array_size = sizeof(*array); 109 - if (percpu) 105 + if (percpu) { 110 106 array_size += (u64) max_entries * sizeof(void *); 111 - else 112 - array_size += (u64) max_entries * elem_size; 107 + } else { 108 + /* rely on vmalloc() to return page-aligned memory and 109 + * ensure array->value is exactly page-aligned 110 + */ 111 + if (attr->map_flags & BPF_F_MMAPABLE) { 112 + array_size = PAGE_ALIGN(array_size); 113 + array_size += PAGE_ALIGN((u64) max_entries * elem_size); 114 + } else { 115 + array_size += (u64) max_entries * elem_size; 116 + } 117 + } 113 118 114 119 /* make sure there is no u32 overflow later in round_up() */ 115 120 cost = array_size; ··· 130 117 return ERR_PTR(ret); 131 118 132 119 /* allocate all map elements and zero-initialize them */ 133 - array = bpf_map_area_alloc(array_size, numa_node); 120 + if (attr->map_flags & BPF_F_MMAPABLE) { 121 + void *data; 122 + 123 + /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ 124 + data = bpf_map_area_mmapable_alloc(array_size, numa_node); 125 + if (!data) { 126 + bpf_map_charge_finish(&mem); 127 + return ERR_PTR(-ENOMEM); 128 + } 129 + array = data + PAGE_ALIGN(sizeof(struct bpf_array)) 130 + - offsetof(struct bpf_array, value); 131 + } else { 132 + array = bpf_map_area_alloc(array_size, numa_node); 133 + } 134 134 if (!array) { 135 135 bpf_map_charge_finish(&mem); 136 136 return ERR_PTR(-ENOMEM); ··· 376 350 return -EINVAL; 377 351 } 378 352 353 + static void *array_map_vmalloc_addr(struct bpf_array *array) 354 + { 355 + return (void *)round_down((unsigned long)array, PAGE_SIZE); 356 + } 357 + 379 358 /* Called when map->refcnt goes to zero, either from workqueue or from syscall */ 380 359 static void array_map_free(struct bpf_map *map) 381 360 { ··· 396 365 if (array->map.map_type == BPF_MAP_TYPE_PERCPU_ARRAY) 397 366 bpf_array_free_percpu(array); 398 367 399 - bpf_map_area_free(array); 368 + if (array->map.map_flags & BPF_F_MMAPABLE) 369 + bpf_map_area_free(array_map_vmalloc_addr(array)); 370 + else 371 + bpf_map_area_free(array); 400 372 } 401 373 402 374 static void array_map_seq_show_elem(struct bpf_map *map, void *key, ··· 478 444 return 0; 479 445 } 480 446 447 + static int array_map_mmap(struct bpf_map *map, struct vm_area_struct *vma) 448 + { 449 + struct bpf_array *array = container_of(map, struct bpf_array, map); 450 + pgoff_t pgoff = PAGE_ALIGN(sizeof(*array)) >> PAGE_SHIFT; 451 + 452 + if (!(map->map_flags & BPF_F_MMAPABLE)) 453 + return -EINVAL; 454 + 455 + return remap_vmalloc_range(vma, array_map_vmalloc_addr(array), pgoff); 456 + } 457 + 481 458 const struct bpf_map_ops array_map_ops = { 482 459 .map_alloc_check = array_map_alloc_check, 483 460 .map_alloc = array_map_alloc, ··· 500 455 .map_gen_lookup = array_map_gen_lookup, 501 456 .map_direct_value_addr = array_map_direct_value_addr, 502 457 .map_direct_value_meta = array_map_direct_value_meta, 458 + .map_mmap = array_map_mmap, 503 459 .map_seq_show_elem = array_map_seq_show_elem, 504 460 .map_check_btf = array_map_check_btf, 505 461 };

+518 -44

kernel/bpf/btf.c

··· 2 2 /* Copyright (c) 2018 Facebook */ 3 3 4 4 #include <uapi/linux/btf.h> 5 + #include <uapi/linux/bpf.h> 6 + #include <uapi/linux/bpf_perf_event.h> 5 7 #include <uapi/linux/types.h> 6 8 #include <linux/seq_file.h> 7 9 #include <linux/compiler.h> ··· 18 16 #include <linux/sort.h> 19 17 #include <linux/bpf_verifier.h> 20 18 #include <linux/btf.h> 19 + #include <linux/skmsg.h> 20 + #include <linux/perf_event.h> 21 + #include <net/sock.h> 21 22 22 23 /* BTF (BPF Type Format) is the meta data format which describes 23 24 * the data types of BPF program/map. Hence, it basically focus ··· 1039 1034 static const struct resolve_vertex *env_stack_peak(struct btf_verifier_env *env) 1040 1035 { 1041 1036 return env->top_stack ? &env->stack[env->top_stack - 1] : NULL; 1037 + } 1038 + 1039 + /* Resolve the size of a passed-in "type" 1040 + * 1041 + * type: is an array (e.g. u32 array[x][y]) 1042 + * return type: type "u32[x][y]", i.e. BTF_KIND_ARRAY, 1043 + * *type_size: (x * y * sizeof(u32)). Hence, *type_size always 1044 + * corresponds to the return type. 1045 + * *elem_type: u32 1046 + * *total_nelems: (x * y). Hence, individual elem size is 1047 + * (*type_size / *total_nelems) 1048 + * 1049 + * type: is not an array (e.g. const struct X) 1050 + * return type: type "struct X" 1051 + * *type_size: sizeof(struct X) 1052 + * *elem_type: same as return type ("struct X") 1053 + * *total_nelems: 1 1054 + */ 1055 + static const struct btf_type * 1056 + btf_resolve_size(const struct btf *btf, const struct btf_type *type, 1057 + u32 *type_size, const struct btf_type **elem_type, 1058 + u32 *total_nelems) 1059 + { 1060 + const struct btf_type *array_type = NULL; 1061 + const struct btf_array *array; 1062 + u32 i, size, nelems = 1; 1063 + 1064 + for (i = 0; i < MAX_RESOLVE_DEPTH; i++) { 1065 + switch (BTF_INFO_KIND(type->info)) { 1066 + /* type->size can be used */ 1067 + case BTF_KIND_INT: 1068 + case BTF_KIND_STRUCT: 1069 + case BTF_KIND_UNION: 1070 + case BTF_KIND_ENUM: 1071 + size = type->size; 1072 + goto resolved; 1073 + 1074 + case BTF_KIND_PTR: 1075 + size = sizeof(void *); 1076 + goto resolved; 1077 + 1078 + /* Modifiers */ 1079 + case BTF_KIND_TYPEDEF: 1080 + case BTF_KIND_VOLATILE: 1081 + case BTF_KIND_CONST: 1082 + case BTF_KIND_RESTRICT: 1083 + type = btf_type_by_id(btf, type->type); 1084 + break; 1085 + 1086 + case BTF_KIND_ARRAY: 1087 + if (!array_type) 1088 + array_type = type; 1089 + array = btf_type_array(type); 1090 + if (nelems && array->nelems > U32_MAX / nelems) 1091 + return ERR_PTR(-EINVAL); 1092 + nelems *= array->nelems; 1093 + type = btf_type_by_id(btf, array->type); 1094 + break; 1095 + 1096 + /* type without size */ 1097 + default: 1098 + return ERR_PTR(-EINVAL); 1099 + } 1100 + } 1101 + 1102 + return ERR_PTR(-EINVAL); 1103 + 1104 + resolved: 1105 + if (nelems && size > U32_MAX / nelems) 1106 + return ERR_PTR(-EINVAL); 1107 + 1108 + *type_size = nelems * size; 1109 + *total_nelems = nelems; 1110 + *elem_type = type; 1111 + 1112 + return array_type ? : type; 1042 1113 } 1043 1114 1044 1115 /* The input param "type_id" must point to a needs_resolve type */ ··· 3444 3363 3445 3364 extern char __weak _binary__btf_vmlinux_bin_start[]; 3446 3365 extern char __weak _binary__btf_vmlinux_bin_end[]; 3366 + extern struct btf *btf_vmlinux; 3367 + 3368 + #define BPF_MAP_TYPE(_id, _ops) 3369 + static union { 3370 + struct bpf_ctx_convert { 3371 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 3372 + prog_ctx_type _id##_prog; \ 3373 + kern_ctx_type _id##_kern; 3374 + #include <linux/bpf_types.h> 3375 + #undef BPF_PROG_TYPE 3376 + } *__t; 3377 + /* 't' is written once under lock. Read many times. */ 3378 + const struct btf_type *t; 3379 + } bpf_ctx_convert; 3380 + enum { 3381 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 3382 + __ctx_convert##_id, 3383 + #include <linux/bpf_types.h> 3384 + #undef BPF_PROG_TYPE 3385 + }; 3386 + static u8 bpf_ctx_convert_map[] = { 3387 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 3388 + [_id] = __ctx_convert##_id, 3389 + #include <linux/bpf_types.h> 3390 + #undef BPF_PROG_TYPE 3391 + }; 3392 + #undef BPF_MAP_TYPE 3393 + 3394 + static const struct btf_member * 3395 + btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, 3396 + const struct btf_type *t, enum bpf_prog_type prog_type) 3397 + { 3398 + const struct btf_type *conv_struct; 3399 + const struct btf_type *ctx_struct; 3400 + const struct btf_member *ctx_type; 3401 + const char *tname, *ctx_tname; 3402 + 3403 + conv_struct = bpf_ctx_convert.t; 3404 + if (!conv_struct) { 3405 + bpf_log(log, "btf_vmlinux is malformed\n"); 3406 + return NULL; 3407 + } 3408 + t = btf_type_by_id(btf, t->type); 3409 + while (btf_type_is_modifier(t)) 3410 + t = btf_type_by_id(btf, t->type); 3411 + if (!btf_type_is_struct(t)) { 3412 + /* Only pointer to struct is supported for now. 3413 + * That means that BPF_PROG_TYPE_TRACEPOINT with BTF 3414 + * is not supported yet. 3415 + * BPF_PROG_TYPE_RAW_TRACEPOINT is fine. 3416 + */ 3417 + bpf_log(log, "BPF program ctx type is not a struct\n"); 3418 + return NULL; 3419 + } 3420 + tname = btf_name_by_offset(btf, t->name_off); 3421 + if (!tname) { 3422 + bpf_log(log, "BPF program ctx struct doesn't have a name\n"); 3423 + return NULL; 3424 + } 3425 + /* prog_type is valid bpf program type. No need for bounds check. */ 3426 + ctx_type = btf_type_member(conv_struct) + bpf_ctx_convert_map[prog_type] * 2; 3427 + /* ctx_struct is a pointer to prog_ctx_type in vmlinux. 3428 + * Like 'struct __sk_buff' 3429 + */ 3430 + ctx_struct = btf_type_by_id(btf_vmlinux, ctx_type->type); 3431 + if (!ctx_struct) 3432 + /* should not happen */ 3433 + return NULL; 3434 + ctx_tname = btf_name_by_offset(btf_vmlinux, ctx_struct->name_off); 3435 + if (!ctx_tname) { 3436 + /* should not happen */ 3437 + bpf_log(log, "Please fix kernel include/linux/bpf_types.h\n"); 3438 + return NULL; 3439 + } 3440 + /* only compare that prog's ctx type name is the same as 3441 + * kernel expects. No need to compare field by field. 3442 + * It's ok for bpf prog to do: 3443 + * struct __sk_buff {}; 3444 + * int socket_filter_bpf_prog(struct __sk_buff *skb) 3445 + * { // no fields of skb are ever used } 3446 + */ 3447 + if (strcmp(ctx_tname, tname)) 3448 + return NULL; 3449 + return ctx_type; 3450 + } 3451 + 3452 + static int btf_translate_to_vmlinux(struct bpf_verifier_log *log, 3453 + struct btf *btf, 3454 + const struct btf_type *t, 3455 + enum bpf_prog_type prog_type) 3456 + { 3457 + const struct btf_member *prog_ctx_type, *kern_ctx_type; 3458 + 3459 + prog_ctx_type = btf_get_prog_ctx_type(log, btf, t, prog_type); 3460 + if (!prog_ctx_type) 3461 + return -ENOENT; 3462 + kern_ctx_type = prog_ctx_type + 1; 3463 + return kern_ctx_type->type; 3464 + } 3447 3465 3448 3466 struct btf *btf_parse_vmlinux(void) 3449 3467 { 3450 3468 struct btf_verifier_env *env = NULL; 3451 3469 struct bpf_verifier_log *log; 3452 3470 struct btf *btf = NULL; 3453 - int err; 3471 + int err, i; 3454 3472 3455 3473 env = kzalloc(sizeof(*env), GFP_KERNEL | __GFP_NOWARN); 3456 3474 if (!env) ··· 3583 3403 if (err) 3584 3404 goto errout; 3585 3405 3406 + /* find struct bpf_ctx_convert for type checking later */ 3407 + for (i = 1; i <= btf->nr_types; i++) { 3408 + const struct btf_type *t; 3409 + const char *tname; 3410 + 3411 + t = btf_type_by_id(btf, i); 3412 + if (!__btf_type_is_struct(t)) 3413 + continue; 3414 + tname = __btf_name_by_offset(btf, t->name_off); 3415 + if (!strcmp(tname, "bpf_ctx_convert")) { 3416 + /* btf_parse_vmlinux() runs under bpf_verifier_lock */ 3417 + bpf_ctx_convert.t = t; 3418 + break; 3419 + } 3420 + } 3421 + if (i > btf->nr_types) { 3422 + err = -ENOENT; 3423 + goto errout; 3424 + } 3425 + 3586 3426 btf_verifier_env_free(env); 3587 3427 refcount_set(&btf->refcnt, 1); 3588 3428 return btf; ··· 3616 3416 return ERR_PTR(err); 3617 3417 } 3618 3418 3619 - extern struct btf *btf_vmlinux; 3419 + struct btf *bpf_prog_get_target_btf(const struct bpf_prog *prog) 3420 + { 3421 + struct bpf_prog *tgt_prog = prog->aux->linked_prog; 3422 + 3423 + if (tgt_prog) { 3424 + return tgt_prog->aux->btf; 3425 + } else { 3426 + return btf_vmlinux; 3427 + } 3428 + } 3620 3429 3621 3430 bool btf_ctx_access(int off, int size, enum bpf_access_type type, 3622 3431 const struct bpf_prog *prog, 3623 3432 struct bpf_insn_access_aux *info) 3624 3433 { 3625 3434 const struct btf_type *t = prog->aux->attach_func_proto; 3435 + struct bpf_prog *tgt_prog = prog->aux->linked_prog; 3436 + struct btf *btf = bpf_prog_get_target_btf(prog); 3626 3437 const char *tname = prog->aux->attach_func_name; 3627 3438 struct bpf_verifier_log *log = info->log; 3628 3439 const struct btf_param *args; 3629 3440 u32 nr_args, arg; 3441 + int ret; 3630 3442 3631 3443 if (off % 8) { 3632 3444 bpf_log(log, "func '%s' offset %d is not multiple of 8\n", ··· 3647 3435 } 3648 3436 arg = off / 8; 3649 3437 args = (const struct btf_param *)(t + 1); 3650 - nr_args = btf_type_vlen(t); 3438 + /* if (t == NULL) Fall back to default BPF prog with 5 u64 arguments */ 3439 + nr_args = t ? btf_type_vlen(t) : 5; 3651 3440 if (prog->aux->attach_btf_trace) { 3652 3441 /* skip first 'void *__data' argument in btf_trace_##name typedef */ 3653 3442 args++; 3654 3443 nr_args--; 3655 3444 } 3656 - if (arg >= nr_args) { 3657 - bpf_log(log, "func '%s' doesn't have %d-th argument\n", 3658 - tname, arg); 3659 - return false; 3660 - } 3661 3445 3662 - t = btf_type_by_id(btf_vmlinux, args[arg].type); 3446 + if (prog->expected_attach_type == BPF_TRACE_FEXIT && 3447 + arg == nr_args) { 3448 + if (!t) 3449 + /* Default prog with 5 args. 6th arg is retval. */ 3450 + return true; 3451 + /* function return type */ 3452 + t = btf_type_by_id(btf, t->type); 3453 + } else if (arg >= nr_args) { 3454 + bpf_log(log, "func '%s' doesn't have %d-th argument\n", 3455 + tname, arg + 1); 3456 + return false; 3457 + } else { 3458 + if (!t) 3459 + /* Default prog with 5 args */ 3460 + return true; 3461 + t = btf_type_by_id(btf, args[arg].type); 3462 + } 3663 3463 /* skip modifiers */ 3664 3464 while (btf_type_is_modifier(t)) 3665 - t = btf_type_by_id(btf_vmlinux, t->type); 3465 + t = btf_type_by_id(btf, t->type); 3666 3466 if (btf_type_is_int(t)) 3667 3467 /* accessing a scalar */ 3668 3468 return true; ··· 3682 3458 bpf_log(log, 3683 3459 "func '%s' arg%d '%s' has type %s. Only pointer access is allowed\n", 3684 3460 tname, arg, 3685 - __btf_name_by_offset(btf_vmlinux, t->name_off), 3461 + __btf_name_by_offset(btf, t->name_off), 3686 3462 btf_kind_str[BTF_INFO_KIND(t->info)]); 3687 3463 return false; 3688 3464 } ··· 3697 3473 info->reg_type = PTR_TO_BTF_ID; 3698 3474 info->btf_id = t->type; 3699 3475 3700 - t = btf_type_by_id(btf_vmlinux, t->type); 3476 + if (tgt_prog) { 3477 + ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type); 3478 + if (ret > 0) { 3479 + info->btf_id = ret; 3480 + return true; 3481 + } else { 3482 + return false; 3483 + } 3484 + } 3485 + t = btf_type_by_id(btf, t->type); 3701 3486 /* skip modifiers */ 3702 3487 while (btf_type_is_modifier(t)) 3703 - t = btf_type_by_id(btf_vmlinux, t->type); 3488 + t = btf_type_by_id(btf, t->type); 3704 3489 if (!btf_type_is_struct(t)) { 3705 3490 bpf_log(log, 3706 3491 "func '%s' arg%d type %s is not a struct\n", ··· 3718 3485 } 3719 3486 bpf_log(log, "func '%s' arg%d has btf_id %d type %s '%s'\n", 3720 3487 tname, arg, info->btf_id, btf_kind_str[BTF_INFO_KIND(t->info)], 3721 - __btf_name_by_offset(btf_vmlinux, t->name_off)); 3488 + __btf_name_by_offset(btf, t->name_off)); 3722 3489 return true; 3723 3490 } 3724 3491 ··· 3727 3494 enum bpf_access_type atype, 3728 3495 u32 *next_btf_id) 3729 3496 { 3497 + u32 i, moff, mtrue_end, msize = 0, total_nelems = 0; 3498 + const struct btf_type *mtype, *elem_type = NULL; 3730 3499 const struct btf_member *member; 3731 - const struct btf_type *mtype; 3732 3500 const char *tname, *mname; 3733 - int i, moff = 0, msize; 3734 3501 3735 3502 again: 3736 3503 tname = __btf_name_by_offset(btf_vmlinux, t->name_off); ··· 3740 3507 } 3741 3508 3742 3509 for_each_member(i, t, member) { 3743 - /* offset of the field in bits */ 3744 - moff = btf_member_bit_offset(t, member); 3745 - 3746 3510 if (btf_member_bitfield_size(t, member)) 3747 3511 /* bitfields are not supported yet */ 3748 3512 continue; 3749 3513 3750 - if (off + size <= moff / 8) 3514 + /* offset of the field in bytes */ 3515 + moff = btf_member_bit_offset(t, member) / 8; 3516 + if (off + size <= moff) 3751 3517 /* won't find anything, field is already too far */ 3752 3518 break; 3519 + /* In case of "off" is pointing to holes of a struct */ 3520 + if (off < moff) 3521 + continue; 3753 3522 3754 3523 /* type of the field */ 3755 3524 mtype = btf_type_by_id(btf_vmlinux, member->type); 3756 3525 mname = __btf_name_by_offset(btf_vmlinux, member->name_off); 3757 3526 3758 - /* skip modifiers */ 3759 - while (btf_type_is_modifier(mtype)) 3760 - mtype = btf_type_by_id(btf_vmlinux, mtype->type); 3761 - 3762 - if (btf_type_is_array(mtype)) 3763 - /* array deref is not supported yet */ 3764 - continue; 3765 - 3766 - if (!btf_type_has_size(mtype) && !btf_type_is_ptr(mtype)) { 3527 + mtype = btf_resolve_size(btf_vmlinux, mtype, &msize, 3528 + &elem_type, &total_nelems); 3529 + if (IS_ERR(mtype)) { 3767 3530 bpf_log(log, "field %s doesn't have size\n", mname); 3768 3531 return -EFAULT; 3769 3532 } 3770 - if (btf_type_is_ptr(mtype)) 3771 - msize = 8; 3772 - else 3773 - msize = mtype->size; 3774 - if (off >= moff / 8 + msize) 3533 + 3534 + mtrue_end = moff + msize; 3535 + if (off >= mtrue_end) 3775 3536 /* no overlap with member, keep iterating */ 3776 3537 continue; 3538 + 3539 + if (btf_type_is_array(mtype)) { 3540 + u32 elem_idx; 3541 + 3542 + /* btf_resolve_size() above helps to 3543 + * linearize a multi-dimensional array. 3544 + * 3545 + * The logic here is treating an array 3546 + * in a struct as the following way: 3547 + * 3548 + * struct outer { 3549 + * struct inner array[2][2]; 3550 + * }; 3551 + * 3552 + * looks like: 3553 + * 3554 + * struct outer { 3555 + * struct inner array_elem0; 3556 + * struct inner array_elem1; 3557 + * struct inner array_elem2; 3558 + * struct inner array_elem3; 3559 + * }; 3560 + * 3561 + * When accessing outer->array[1][0], it moves 3562 + * moff to "array_elem2", set mtype to 3563 + * "struct inner", and msize also becomes 3564 + * sizeof(struct inner). Then most of the 3565 + * remaining logic will fall through without 3566 + * caring the current member is an array or 3567 + * not. 3568 + * 3569 + * Unlike mtype/msize/moff, mtrue_end does not 3570 + * change. The naming difference ("_true") tells 3571 + * that it is not always corresponding to 3572 + * the current mtype/msize/moff. 3573 + * It is the true end of the current 3574 + * member (i.e. array in this case). That 3575 + * will allow an int array to be accessed like 3576 + * a scratch space, 3577 + * i.e. allow access beyond the size of 3578 + * the array's element as long as it is 3579 + * within the mtrue_end boundary. 3580 + */ 3581 + 3582 + /* skip empty array */ 3583 + if (moff == mtrue_end) 3584 + continue; 3585 + 3586 + msize /= total_nelems; 3587 + elem_idx = (off - moff) / msize; 3588 + moff += elem_idx * msize; 3589 + mtype = elem_type; 3590 + } 3591 + 3777 3592 /* the 'off' we're looking for is either equal to start 3778 3593 * of this field or inside of this struct 3779 3594 */ ··· 3830 3549 t = mtype; 3831 3550 3832 3551 /* adjust offset we're looking for */ 3833 - off -= moff / 8; 3552 + off -= moff; 3834 3553 goto again; 3835 - } 3836 - if (msize != size) { 3837 - /* field access size doesn't match */ 3838 - bpf_log(log, 3839 - "cannot access %d bytes in struct %s field %s that has size %d\n", 3840 - size, tname, mname, msize); 3841 - return -EACCES; 3842 3554 } 3843 3555 3844 3556 if (btf_type_is_ptr(mtype)) { 3845 3557 const struct btf_type *stype; 3558 + 3559 + if (msize != size || off != moff) { 3560 + bpf_log(log, 3561 + "cannot access ptr member %s with moff %u in struct %s with off %u size %u\n", 3562 + mname, moff, tname, off, size); 3563 + return -EACCES; 3564 + } 3846 3565 3847 3566 stype = btf_type_by_id(btf_vmlinux, mtype->type); 3848 3567 /* skip modifiers */ ··· 3853 3572 return PTR_TO_BTF_ID; 3854 3573 } 3855 3574 } 3856 - /* all other fields are treated as scalars */ 3575 + 3576 + /* Allow more flexible access within an int as long as 3577 + * it is within mtrue_end. 3578 + * Since mtrue_end could be the end of an array, 3579 + * that also allows using an array of int as a scratch 3580 + * space. e.g. skb->cb[]. 3581 + */ 3582 + if (off + size > mtrue_end) { 3583 + bpf_log(log, 3584 + "access beyond the end of member %s (mend:%u) in struct %s with off %u size %u\n", 3585 + mname, mtrue_end, tname, off, size); 3586 + return -EACCES; 3587 + } 3588 + 3857 3589 return SCALAR_VALUE; 3858 3590 } 3859 3591 bpf_log(log, "struct %s doesn't have field at offset %d\n", tname, off); 3860 3592 return -EINVAL; 3861 3593 } 3862 3594 3863 - u32 btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, int arg) 3595 + static int __btf_resolve_helper_id(struct bpf_verifier_log *log, void *fn, 3596 + int arg) 3864 3597 { 3865 3598 char fnname[KSYM_SYMBOL_LEN + 4] = "btf_"; 3866 3599 const struct btf_param *args; ··· 3940 3645 bpf_log(log, "helper %s arg%d has btf_id %d struct %s\n", fnname + 4, 3941 3646 arg, btf_id, __btf_name_by_offset(btf_vmlinux, t->name_off)); 3942 3647 return btf_id; 3648 + } 3649 + 3650 + int btf_resolve_helper_id(struct bpf_verifier_log *log, 3651 + const struct bpf_func_proto *fn, int arg) 3652 + { 3653 + int *btf_id = &fn->btf_id[arg]; 3654 + int ret; 3655 + 3656 + if (fn->arg_type[arg] != ARG_PTR_TO_BTF_ID) 3657 + return -EINVAL; 3658 + 3659 + ret = READ_ONCE(*btf_id); 3660 + if (ret) 3661 + return ret; 3662 + /* ok to race the search. The result is the same */ 3663 + ret = __btf_resolve_helper_id(log, fn->func, arg); 3664 + if (!ret) { 3665 + /* Function argument cannot be type 'void' */ 3666 + bpf_log(log, "BTF resolution bug\n"); 3667 + return -EFAULT; 3668 + } 3669 + WRITE_ONCE(*btf_id, ret); 3670 + return ret; 3671 + } 3672 + 3673 + static int __get_type_size(struct btf *btf, u32 btf_id, 3674 + const struct btf_type **bad_type) 3675 + { 3676 + const struct btf_type *t; 3677 + 3678 + if (!btf_id) 3679 + /* void */ 3680 + return 0; 3681 + t = btf_type_by_id(btf, btf_id); 3682 + while (t && btf_type_is_modifier(t)) 3683 + t = btf_type_by_id(btf, t->type); 3684 + if (!t) 3685 + return -EINVAL; 3686 + if (btf_type_is_ptr(t)) 3687 + /* kernel size of pointer. Not BPF's size of pointer*/ 3688 + return sizeof(void *); 3689 + if (btf_type_is_int(t) || btf_type_is_enum(t)) 3690 + return t->size; 3691 + *bad_type = t; 3692 + return -EINVAL; 3693 + } 3694 + 3695 + int btf_distill_func_proto(struct bpf_verifier_log *log, 3696 + struct btf *btf, 3697 + const struct btf_type *func, 3698 + const char *tname, 3699 + struct btf_func_model *m) 3700 + { 3701 + const struct btf_param *args; 3702 + const struct btf_type *t; 3703 + u32 i, nargs; 3704 + int ret; 3705 + 3706 + if (!func) { 3707 + /* BTF function prototype doesn't match the verifier types. 3708 + * Fall back to 5 u64 args. 3709 + */ 3710 + for (i = 0; i < 5; i++) 3711 + m->arg_size[i] = 8; 3712 + m->ret_size = 8; 3713 + m->nr_args = 5; 3714 + return 0; 3715 + } 3716 + args = (const struct btf_param *)(func + 1); 3717 + nargs = btf_type_vlen(func); 3718 + if (nargs >= MAX_BPF_FUNC_ARGS) { 3719 + bpf_log(log, 3720 + "The function %s has %d arguments. Too many.\n", 3721 + tname, nargs); 3722 + return -EINVAL; 3723 + } 3724 + ret = __get_type_size(btf, func->type, &t); 3725 + if (ret < 0) { 3726 + bpf_log(log, 3727 + "The function %s return type %s is unsupported.\n", 3728 + tname, btf_kind_str[BTF_INFO_KIND(t->info)]); 3729 + return -EINVAL; 3730 + } 3731 + m->ret_size = ret; 3732 + 3733 + for (i = 0; i < nargs; i++) { 3734 + ret = __get_type_size(btf, args[i].type, &t); 3735 + if (ret < 0) { 3736 + bpf_log(log, 3737 + "The function %s arg%d type %s is unsupported.\n", 3738 + tname, i, btf_kind_str[BTF_INFO_KIND(t->info)]); 3739 + return -EINVAL; 3740 + } 3741 + m->arg_size[i] = ret; 3742 + } 3743 + m->nr_args = nargs; 3744 + return 0; 3745 + } 3746 + 3747 + int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog) 3748 + { 3749 + struct bpf_verifier_state *st = env->cur_state; 3750 + struct bpf_func_state *func = st->frame[st->curframe]; 3751 + struct bpf_reg_state *reg = func->regs; 3752 + struct bpf_verifier_log *log = &env->log; 3753 + struct bpf_prog *prog = env->prog; 3754 + struct btf *btf = prog->aux->btf; 3755 + const struct btf_param *args; 3756 + const struct btf_type *t; 3757 + u32 i, nargs, btf_id; 3758 + const char *tname; 3759 + 3760 + if (!prog->aux->func_info) 3761 + return 0; 3762 + 3763 + btf_id = prog->aux->func_info[subprog].type_id; 3764 + if (!btf_id) 3765 + return 0; 3766 + 3767 + if (prog->aux->func_info_aux[subprog].unreliable) 3768 + return 0; 3769 + 3770 + t = btf_type_by_id(btf, btf_id); 3771 + if (!t || !btf_type_is_func(t)) { 3772 + bpf_log(log, "BTF of subprog %d doesn't point to KIND_FUNC\n", 3773 + subprog); 3774 + return -EINVAL; 3775 + } 3776 + tname = btf_name_by_offset(btf, t->name_off); 3777 + 3778 + t = btf_type_by_id(btf, t->type); 3779 + if (!t || !btf_type_is_func_proto(t)) { 3780 + bpf_log(log, "Invalid type of func %s\n", tname); 3781 + return -EINVAL; 3782 + } 3783 + args = (const struct btf_param *)(t + 1); 3784 + nargs = btf_type_vlen(t); 3785 + if (nargs > 5) { 3786 + bpf_log(log, "Function %s has %d > 5 args\n", tname, nargs); 3787 + goto out; 3788 + } 3789 + /* check that BTF function arguments match actual types that the 3790 + * verifier sees. 3791 + */ 3792 + for (i = 0; i < nargs; i++) { 3793 + t = btf_type_by_id(btf, args[i].type); 3794 + while (btf_type_is_modifier(t)) 3795 + t = btf_type_by_id(btf, t->type); 3796 + if (btf_type_is_int(t) || btf_type_is_enum(t)) { 3797 + if (reg[i + 1].type == SCALAR_VALUE) 3798 + continue; 3799 + bpf_log(log, "R%d is not a scalar\n", i + 1); 3800 + goto out; 3801 + } 3802 + if (btf_type_is_ptr(t)) { 3803 + if (reg[i + 1].type == SCALAR_VALUE) { 3804 + bpf_log(log, "R%d is not a pointer\n", i + 1); 3805 + goto out; 3806 + } 3807 + /* If program is passing PTR_TO_CTX into subprogram 3808 + * check that BTF type matches. 3809 + */ 3810 + if (reg[i + 1].type == PTR_TO_CTX && 3811 + !btf_get_prog_ctx_type(log, btf, t, prog->type)) 3812 + goto out; 3813 + /* All other pointers are ok */ 3814 + continue; 3815 + } 3816 + bpf_log(log, "Unrecognized argument type %s\n", 3817 + btf_kind_str[BTF_INFO_KIND(t->info)]); 3818 + goto out; 3819 + } 3820 + return 0; 3821 + out: 3822 + /* LLVM optimizations can remove arguments from static functions. */ 3823 + bpf_log(log, 3824 + "Type info disagrees with actual arguments due to compiler optimizations\n"); 3825 + prog->aux->func_info_aux[subprog].unreliable = true; 3826 + return 0; 3943 3827 } 3944 3828 3945 3829 void btf_type_seq_show(const struct btf *btf, u32 type_id, void *obj,

+14 -1

kernel/bpf/core.c

··· 31 31 #include <linux/rcupdate.h> 32 32 #include <linux/perf_event.h> 33 33 #include <linux/extable.h> 34 + #include <linux/log2.h> 34 35 #include <asm/unaligned.h> 35 36 36 37 /* Registers */ ··· 816 815 struct bpf_binary_header *hdr; 817 816 u32 size, hole, start, pages; 818 817 818 + WARN_ON_ONCE(!is_power_of_2(alignment) || 819 + alignment > BPF_IMAGE_ALIGNMENT); 820 + 819 821 /* Most of BPF filters are really small, but if some of them 820 822 * fill a page, allow at least 128 extra bytes to insert a 821 823 * random section of illegal instructions. ··· 1573 1569 #undef LDST 1574 1570 #define LDX_PROBE(SIZEOP, SIZE) \ 1575 1571 LDX_PROBE_MEM_##SIZEOP: \ 1576 - bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) SRC); \ 1572 + bpf_probe_read_kernel(&DST, SIZE, (const void *)(long) (SRC + insn->off)); \ 1577 1573 CONT; 1578 1574 LDX_PROBE(B, 1) 1579 1575 LDX_PROBE(H, 2) ··· 2015 2011 if (aux->prog->has_callchain_buf) 2016 2012 put_callchain_buffers(); 2017 2013 #endif 2014 + bpf_trampoline_put(aux->trampoline); 2018 2015 for (i = 0; i < aux->func_cnt; i++) 2019 2016 bpf_jit_free(aux->func[i]); 2020 2017 if (aux->func_cnt) { ··· 2031 2026 { 2032 2027 struct bpf_prog_aux *aux = fp->aux; 2033 2028 2029 + if (aux->linked_prog) 2030 + bpf_prog_put(aux->linked_prog); 2034 2031 INIT_WORK(&aux->work, bpf_prog_free_deferred); 2035 2032 schedule_work(&aux->work); 2036 2033 } ··· 2145 2138 int len) 2146 2139 { 2147 2140 return -EFAULT; 2141 + } 2142 + 2143 + int __weak bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 2144 + void *addr1, void *addr2) 2145 + { 2146 + return -ENOTSUPP; 2148 2147 } 2149 2148 2150 2149 DEFINE_STATIC_KEY_FALSE(bpf_stats_enabled_key);

+4 -3

kernel/bpf/inode.c

··· 31 31 { 32 32 switch (type) { 33 33 case BPF_TYPE_PROG: 34 - raw = bpf_prog_inc(raw); 34 + bpf_prog_inc(raw); 35 35 break; 36 36 case BPF_TYPE_MAP: 37 - raw = bpf_map_inc(raw, true); 37 + bpf_map_inc_with_uref(raw); 38 38 break; 39 39 default: 40 40 WARN_ON_ONCE(1); ··· 534 534 if (!bpf_prog_get_ok(prog, &type, false)) 535 535 return ERR_PTR(-EINVAL); 536 536 537 - return bpf_prog_inc(prog); 537 + bpf_prog_inc(prog); 538 + return prog; 538 539 } 539 540 540 541 struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type)

+1 -1

kernel/bpf/map_in_map.c

··· 98 98 return inner_map; 99 99 100 100 if (bpf_map_meta_equal(map->inner_map_meta, inner_map)) 101 - inner_map = bpf_map_inc(inner_map, false); 101 + bpf_map_inc(inner_map); 102 102 else 103 103 inner_map = ERR_PTR(-EINVAL); 104 104

+222 -60

kernel/bpf/syscall.c

··· 23 23 #include <linux/timekeeping.h> 24 24 #include <linux/ctype.h> 25 25 #include <linux/nospec.h> 26 + #include <linux/audit.h> 26 27 #include <uapi/linux/btf.h> 27 28 28 29 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PROG_ARRAY || \ ··· 44 43 int sysctl_unprivileged_bpf_disabled __read_mostly; 45 44 46 45 static const struct bpf_map_ops * const bpf_map_types[] = { 47 - #define BPF_PROG_TYPE(_id, _ops) 46 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) 48 47 #define BPF_MAP_TYPE(_id, _ops) \ 49 48 [_id] = &_ops, 50 49 #include <linux/bpf_types.h> ··· 128 127 return map; 129 128 } 130 129 131 - void *bpf_map_area_alloc(u64 size, int numa_node) 130 + static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable) 132 131 { 133 132 /* We really just want to fail instead of triggering OOM killer 134 133 * under memory pressure, therefore we set __GFP_NORETRY to kmalloc, ··· 146 145 if (size >= SIZE_MAX) 147 146 return NULL; 148 147 149 - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 148 + /* kmalloc()'ed memory can't be mmap()'ed */ 149 + if (!mmapable && size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) { 150 150 area = kmalloc_node(size, GFP_USER | __GFP_NORETRY | flags, 151 151 numa_node); 152 152 if (area != NULL) 153 153 return area; 154 154 } 155 - 155 + if (mmapable) { 156 + BUG_ON(!PAGE_ALIGNED(size)); 157 + return vmalloc_user_node_flags(size, numa_node, GFP_KERNEL | 158 + __GFP_RETRY_MAYFAIL | flags); 159 + } 156 160 return __vmalloc_node_flags_caller(size, numa_node, 157 161 GFP_KERNEL | __GFP_RETRY_MAYFAIL | 158 162 flags, __builtin_return_address(0)); 163 + } 164 + 165 + void *bpf_map_area_alloc(u64 size, int numa_node) 166 + { 167 + return __bpf_map_area_alloc(size, numa_node, false); 168 + } 169 + 170 + void *bpf_map_area_mmapable_alloc(u64 size, int numa_node) 171 + { 172 + return __bpf_map_area_alloc(size, numa_node, true); 159 173 } 160 174 161 175 void bpf_map_area_free(void *area) ··· 330 314 331 315 static void bpf_map_put_uref(struct bpf_map *map) 332 316 { 333 - if (atomic_dec_and_test(&map->usercnt)) { 317 + if (atomic64_dec_and_test(&map->usercnt)) { 334 318 if (map->ops->map_release_uref) 335 319 map->ops->map_release_uref(map); 336 320 } ··· 341 325 */ 342 326 static void __bpf_map_put(struct bpf_map *map, bool do_idr_lock) 343 327 { 344 - if (atomic_dec_and_test(&map->refcnt)) { 328 + if (atomic64_dec_and_test(&map->refcnt)) { 345 329 /* bpf_map_free_id() must be called first */ 346 330 bpf_map_free_id(map, do_idr_lock); 347 331 btf_put(map->btf); ··· 444 428 return -EINVAL; 445 429 } 446 430 431 + /* called for any extra memory-mapped regions (except initial) */ 432 + static void bpf_map_mmap_open(struct vm_area_struct *vma) 433 + { 434 + struct bpf_map *map = vma->vm_file->private_data; 435 + 436 + bpf_map_inc_with_uref(map); 437 + 438 + if (vma->vm_flags & VM_WRITE) { 439 + mutex_lock(&map->freeze_mutex); 440 + map->writecnt++; 441 + mutex_unlock(&map->freeze_mutex); 442 + } 443 + } 444 + 445 + /* called for all unmapped memory region (including initial) */ 446 + static void bpf_map_mmap_close(struct vm_area_struct *vma) 447 + { 448 + struct bpf_map *map = vma->vm_file->private_data; 449 + 450 + if (vma->vm_flags & VM_WRITE) { 451 + mutex_lock(&map->freeze_mutex); 452 + map->writecnt--; 453 + mutex_unlock(&map->freeze_mutex); 454 + } 455 + 456 + bpf_map_put_with_uref(map); 457 + } 458 + 459 + static const struct vm_operations_struct bpf_map_default_vmops = { 460 + .open = bpf_map_mmap_open, 461 + .close = bpf_map_mmap_close, 462 + }; 463 + 464 + static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) 465 + { 466 + struct bpf_map *map = filp->private_data; 467 + int err; 468 + 469 + if (!map->ops->map_mmap || map_value_has_spin_lock(map)) 470 + return -ENOTSUPP; 471 + 472 + if (!(vma->vm_flags & VM_SHARED)) 473 + return -EINVAL; 474 + 475 + mutex_lock(&map->freeze_mutex); 476 + 477 + if ((vma->vm_flags & VM_WRITE) && map->frozen) { 478 + err = -EPERM; 479 + goto out; 480 + } 481 + 482 + /* set default open/close callbacks */ 483 + vma->vm_ops = &bpf_map_default_vmops; 484 + vma->vm_private_data = map; 485 + 486 + err = map->ops->map_mmap(map, vma); 487 + if (err) 488 + goto out; 489 + 490 + bpf_map_inc_with_uref(map); 491 + 492 + if (vma->vm_flags & VM_WRITE) 493 + map->writecnt++; 494 + out: 495 + mutex_unlock(&map->freeze_mutex); 496 + return err; 497 + } 498 + 447 499 const struct file_operations bpf_map_fops = { 448 500 #ifdef CONFIG_PROC_FS 449 501 .show_fdinfo = bpf_map_show_fdinfo, ··· 519 435 .release = bpf_map_release, 520 436 .read = bpf_dummy_read, 521 437 .write = bpf_dummy_write, 438 + .mmap = bpf_map_mmap, 522 439 }; 523 440 524 441 int bpf_map_new_fd(struct bpf_map *map, int flags) ··· 663 578 if (err) 664 579 goto free_map; 665 580 666 - atomic_set(&map->refcnt, 1); 667 - atomic_set(&map->usercnt, 1); 581 + atomic64_set(&map->refcnt, 1); 582 + atomic64_set(&map->usercnt, 1); 583 + mutex_init(&map->freeze_mutex); 668 584 669 585 if (attr->btf_key_type_id || attr->btf_value_type_id) { 670 586 struct btf *btf; ··· 742 656 return f.file->private_data; 743 657 } 744 658 745 - /* prog's and map's refcnt limit */ 746 - #define BPF_MAX_REFCNT 32768 747 - 748 - struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref) 659 + void bpf_map_inc(struct bpf_map *map) 749 660 { 750 - if (atomic_inc_return(&map->refcnt) > BPF_MAX_REFCNT) { 751 - atomic_dec(&map->refcnt); 752 - return ERR_PTR(-EBUSY); 753 - } 754 - if (uref) 755 - atomic_inc(&map->usercnt); 756 - return map; 661 + atomic64_inc(&map->refcnt); 757 662 } 758 663 EXPORT_SYMBOL_GPL(bpf_map_inc); 664 + 665 + void bpf_map_inc_with_uref(struct bpf_map *map) 666 + { 667 + atomic64_inc(&map->refcnt); 668 + atomic64_inc(&map->usercnt); 669 + } 670 + EXPORT_SYMBOL_GPL(bpf_map_inc_with_uref); 759 671 760 672 struct bpf_map *bpf_map_get_with_uref(u32 ufd) 761 673 { ··· 764 680 if (IS_ERR(map)) 765 681 return map; 766 682 767 - map = bpf_map_inc(map, true); 683 + bpf_map_inc_with_uref(map); 768 684 fdput(f); 769 685 770 686 return map; 771 687 } 772 688 773 689 /* map_idr_lock should have been held */ 774 - static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, 775 - bool uref) 690 + static struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 776 691 { 777 692 int refold; 778 693 779 - refold = atomic_fetch_add_unless(&map->refcnt, 1, 0); 780 - 781 - if (refold >= BPF_MAX_REFCNT) { 782 - __bpf_map_put(map, false); 783 - return ERR_PTR(-EBUSY); 784 - } 785 - 694 + refold = atomic64_fetch_add_unless(&map->refcnt, 1, 0); 786 695 if (!refold) 787 696 return ERR_PTR(-ENOENT); 788 - 789 697 if (uref) 790 - atomic_inc(&map->usercnt); 698 + atomic64_inc(&map->usercnt); 791 699 792 700 return map; 793 701 } 794 702 795 - struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map, bool uref) 703 + struct bpf_map *bpf_map_inc_not_zero(struct bpf_map *map) 796 704 { 797 705 spin_lock_bh(&map_idr_lock); 798 - map = __bpf_map_inc_not_zero(map, uref); 706 + map = __bpf_map_inc_not_zero(map, false); 799 707 spin_unlock_bh(&map_idr_lock); 800 708 801 709 return map; ··· 1252 1176 map = __bpf_map_get(f); 1253 1177 if (IS_ERR(map)) 1254 1178 return PTR_ERR(map); 1179 + 1180 + mutex_lock(&map->freeze_mutex); 1181 + 1182 + if (map->writecnt) { 1183 + err = -EBUSY; 1184 + goto err_put; 1185 + } 1255 1186 if (READ_ONCE(map->frozen)) { 1256 1187 err = -EBUSY; 1257 1188 goto err_put; ··· 1270 1187 1271 1188 WRITE_ONCE(map->frozen, true); 1272 1189 err_put: 1190 + mutex_unlock(&map->freeze_mutex); 1273 1191 fdput(f); 1274 1192 return err; 1275 1193 } 1276 1194 1277 1195 static const struct bpf_prog_ops * const bpf_prog_types[] = { 1278 - #define BPF_PROG_TYPE(_id, _name) \ 1196 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 1279 1197 [_id] = & _name ## _prog_ops, 1280 1198 #define BPF_MAP_TYPE(_id, _ops) 1281 1199 #include <linux/bpf_types.h> ··· 1320 1236 bpf_map_put(aux->used_maps[i]); 1321 1237 1322 1238 kfree(aux->used_maps); 1239 + } 1240 + 1241 + enum bpf_event { 1242 + BPF_EVENT_LOAD, 1243 + BPF_EVENT_UNLOAD, 1244 + }; 1245 + 1246 + static const char * const bpf_event_audit_str[] = { 1247 + [BPF_EVENT_LOAD] = "LOAD", 1248 + [BPF_EVENT_UNLOAD] = "UNLOAD", 1249 + }; 1250 + 1251 + static void bpf_audit_prog(const struct bpf_prog *prog, enum bpf_event event) 1252 + { 1253 + bool has_task_context = event == BPF_EVENT_LOAD; 1254 + struct audit_buffer *ab; 1255 + 1256 + if (audit_enabled == AUDIT_OFF) 1257 + return; 1258 + ab = audit_log_start(audit_context(), GFP_ATOMIC, AUDIT_BPF); 1259 + if (unlikely(!ab)) 1260 + return; 1261 + if (has_task_context) 1262 + audit_log_task(ab); 1263 + audit_log_format(ab, "%sprog-id=%u event=%s", 1264 + has_task_context ? " " : "", 1265 + prog->aux->id, bpf_event_audit_str[event]); 1266 + audit_log_end(ab); 1323 1267 } 1324 1268 1325 1269 int __bpf_prog_charge(struct user_struct *user, u32 pages) ··· 1443 1331 struct bpf_prog_aux *aux = container_of(rcu, struct bpf_prog_aux, rcu); 1444 1332 1445 1333 kvfree(aux->func_info); 1334 + kfree(aux->func_info_aux); 1446 1335 free_used_maps(aux); 1447 1336 bpf_prog_uncharge_memlock(aux->prog); 1448 1337 security_bpf_prog_free(aux); ··· 1464 1351 1465 1352 static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) 1466 1353 { 1467 - if (atomic_dec_and_test(&prog->aux->refcnt)) { 1354 + if (atomic64_dec_and_test(&prog->aux->refcnt)) { 1468 1355 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); 1356 + bpf_audit_prog(prog, BPF_EVENT_UNLOAD); 1469 1357 /* bpf_prog_free_id() must be called first */ 1470 1358 bpf_prog_free_id(prog, do_idr_lock); 1471 1359 __bpf_prog_put_noref(prog, true); ··· 1571 1457 return f.file->private_data; 1572 1458 } 1573 1459 1574 - struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) 1460 + void bpf_prog_add(struct bpf_prog *prog, int i) 1575 1461 { 1576 - if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { 1577 - atomic_sub(i, &prog->aux->refcnt); 1578 - return ERR_PTR(-EBUSY); 1579 - } 1580 - return prog; 1462 + atomic64_add(i, &prog->aux->refcnt); 1581 1463 } 1582 1464 EXPORT_SYMBOL_GPL(bpf_prog_add); 1583 1465 ··· 1584 1474 * path holds a reference to the program, thus atomic_sub() can 1585 1475 * be safely used in such cases! 1586 1476 */ 1587 - WARN_ON(atomic_sub_return(i, &prog->aux->refcnt) == 0); 1477 + WARN_ON(atomic64_sub_return(i, &prog->aux->refcnt) == 0); 1588 1478 } 1589 1479 EXPORT_SYMBOL_GPL(bpf_prog_sub); 1590 1480 1591 - struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) 1481 + void bpf_prog_inc(struct bpf_prog *prog) 1592 1482 { 1593 - return bpf_prog_add(prog, 1); 1483 + atomic64_inc(&prog->aux->refcnt); 1594 1484 } 1595 1485 EXPORT_SYMBOL_GPL(bpf_prog_inc); 1596 1486 ··· 1599 1489 { 1600 1490 int refold; 1601 1491 1602 - refold = atomic_fetch_add_unless(&prog->aux->refcnt, 1, 0); 1603 - 1604 - if (refold >= BPF_MAX_REFCNT) { 1605 - __bpf_prog_put(prog, false); 1606 - return ERR_PTR(-EBUSY); 1607 - } 1492 + refold = atomic64_fetch_add_unless(&prog->aux->refcnt, 1, 0); 1608 1493 1609 1494 if (!refold) 1610 1495 return ERR_PTR(-ENOENT); ··· 1637 1532 goto out; 1638 1533 } 1639 1534 1640 - prog = bpf_prog_inc(prog); 1535 + bpf_prog_inc(prog); 1641 1536 out: 1642 1537 fdput(f); 1643 1538 return prog; ··· 1684 1579 static int 1685 1580 bpf_prog_load_check_attach(enum bpf_prog_type prog_type, 1686 1581 enum bpf_attach_type expected_attach_type, 1687 - u32 btf_id) 1582 + u32 btf_id, u32 prog_fd) 1688 1583 { 1689 1584 switch (prog_type) { 1690 1585 case BPF_PROG_TYPE_TRACING: ··· 1692 1587 return -EINVAL; 1693 1588 break; 1694 1589 default: 1695 - if (btf_id) 1590 + if (btf_id || prog_fd) 1696 1591 return -EINVAL; 1697 1592 break; 1698 1593 } ··· 1743 1638 } 1744 1639 1745 1640 /* last field in 'union bpf_attr' used by this command */ 1746 - #define BPF_PROG_LOAD_LAST_FIELD attach_btf_id 1641 + #define BPF_PROG_LOAD_LAST_FIELD attach_prog_fd 1747 1642 1748 1643 static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) 1749 1644 { ··· 1786 1681 1787 1682 bpf_prog_load_fixup_attach_type(attr); 1788 1683 if (bpf_prog_load_check_attach(type, attr->expected_attach_type, 1789 - attr->attach_btf_id)) 1684 + attr->attach_btf_id, 1685 + attr->attach_prog_fd)) 1790 1686 return -EINVAL; 1791 1687 1792 1688 /* plain bpf_prog allocation */ ··· 1797 1691 1798 1692 prog->expected_attach_type = attr->expected_attach_type; 1799 1693 prog->aux->attach_btf_id = attr->attach_btf_id; 1694 + if (attr->attach_prog_fd) { 1695 + struct bpf_prog *tgt_prog; 1696 + 1697 + tgt_prog = bpf_prog_get(attr->attach_prog_fd); 1698 + if (IS_ERR(tgt_prog)) { 1699 + err = PTR_ERR(tgt_prog); 1700 + goto free_prog_nouncharge; 1701 + } 1702 + prog->aux->linked_prog = tgt_prog; 1703 + } 1800 1704 1801 1705 prog->aux->offload_requested = !!attr->prog_ifindex; 1802 1706 ··· 1828 1712 prog->orig_prog = NULL; 1829 1713 prog->jited = 0; 1830 1714 1831 - atomic_set(&prog->aux->refcnt, 1); 1715 + atomic64_set(&prog->aux->refcnt, 1); 1832 1716 prog->gpl_compatible = is_gpl ? 1 : 0; 1833 1717 1834 1718 if (bpf_prog_is_dev_bound(prog->aux)) { ··· 1876 1760 */ 1877 1761 bpf_prog_kallsyms_add(prog); 1878 1762 perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); 1763 + bpf_audit_prog(prog, BPF_EVENT_LOAD); 1879 1764 1880 1765 err = bpf_prog_new_fd(prog); 1881 1766 if (err < 0) ··· 1917 1800 1918 1801 return bpf_obj_get_user(u64_to_user_ptr(attr->pathname), 1919 1802 attr->file_flags); 1803 + } 1804 + 1805 + static int bpf_tracing_prog_release(struct inode *inode, struct file *filp) 1806 + { 1807 + struct bpf_prog *prog = filp->private_data; 1808 + 1809 + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); 1810 + bpf_prog_put(prog); 1811 + return 0; 1812 + } 1813 + 1814 + static const struct file_operations bpf_tracing_prog_fops = { 1815 + .release = bpf_tracing_prog_release, 1816 + .read = bpf_dummy_read, 1817 + .write = bpf_dummy_write, 1818 + }; 1819 + 1820 + static int bpf_tracing_prog_attach(struct bpf_prog *prog) 1821 + { 1822 + int tr_fd, err; 1823 + 1824 + if (prog->expected_attach_type != BPF_TRACE_FENTRY && 1825 + prog->expected_attach_type != BPF_TRACE_FEXIT) { 1826 + err = -EINVAL; 1827 + goto out_put_prog; 1828 + } 1829 + 1830 + err = bpf_trampoline_link_prog(prog); 1831 + if (err) 1832 + goto out_put_prog; 1833 + 1834 + tr_fd = anon_inode_getfd("bpf-tracing-prog", &bpf_tracing_prog_fops, 1835 + prog, O_CLOEXEC); 1836 + if (tr_fd < 0) { 1837 + WARN_ON_ONCE(bpf_trampoline_unlink_prog(prog)); 1838 + err = tr_fd; 1839 + goto out_put_prog; 1840 + } 1841 + return tr_fd; 1842 + 1843 + out_put_prog: 1844 + bpf_prog_put(prog); 1845 + return err; 1920 1846 } 1921 1847 1922 1848 struct bpf_raw_tracepoint { ··· 2013 1853 2014 1854 if (prog->type == BPF_PROG_TYPE_TRACING) { 2015 1855 if (attr->raw_tracepoint.name) { 2016 - /* raw_tp name should not be specified in raw_tp 2017 - * programs that were verified via in-kernel BTF info 1856 + /* The attach point for this category of programs 1857 + * should be specified via btf_id during program load. 2018 1858 */ 2019 1859 err = -EINVAL; 2020 1860 goto out_put_prog; 2021 1861 } 2022 - /* raw_tp name is taken from type name instead */ 2023 - tp_name = prog->aux->attach_func_name; 1862 + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) 1863 + tp_name = prog->aux->attach_func_name; 1864 + else 1865 + return bpf_tracing_prog_attach(prog); 2024 1866 } else { 2025 1867 if (strncpy_from_user(buf, 2026 1868 u64_to_user_ptr(attr->raw_tracepoint.name),

+253

kernel/bpf/trampoline.c

··· 1 + // SPDX-License-Identifier: GPL-2.0-only 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <linux/hash.h> 4 + #include <linux/bpf.h> 5 + #include <linux/filter.h> 6 + 7 + /* btf_vmlinux has ~22k attachable functions. 1k htab is enough. */ 8 + #define TRAMPOLINE_HASH_BITS 10 9 + #define TRAMPOLINE_TABLE_SIZE (1 << TRAMPOLINE_HASH_BITS) 10 + 11 + static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; 12 + 13 + /* serializes access to trampoline_table */ 14 + static DEFINE_MUTEX(trampoline_mutex); 15 + 16 + struct bpf_trampoline *bpf_trampoline_lookup(u64 key) 17 + { 18 + struct bpf_trampoline *tr; 19 + struct hlist_head *head; 20 + void *image; 21 + int i; 22 + 23 + mutex_lock(&trampoline_mutex); 24 + head = &trampoline_table[hash_64(key, TRAMPOLINE_HASH_BITS)]; 25 + hlist_for_each_entry(tr, head, hlist) { 26 + if (tr->key == key) { 27 + refcount_inc(&tr->refcnt); 28 + goto out; 29 + } 30 + } 31 + tr = kzalloc(sizeof(*tr), GFP_KERNEL); 32 + if (!tr) 33 + goto out; 34 + 35 + /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ 36 + image = bpf_jit_alloc_exec(PAGE_SIZE); 37 + if (!image) { 38 + kfree(tr); 39 + tr = NULL; 40 + goto out; 41 + } 42 + 43 + tr->key = key; 44 + INIT_HLIST_NODE(&tr->hlist); 45 + hlist_add_head(&tr->hlist, head); 46 + refcount_set(&tr->refcnt, 1); 47 + mutex_init(&tr->mutex); 48 + for (i = 0; i < BPF_TRAMP_MAX; i++) 49 + INIT_HLIST_HEAD(&tr->progs_hlist[i]); 50 + 51 + set_vm_flush_reset_perms(image); 52 + /* Keep image as writeable. The alternative is to keep flipping ro/rw 53 + * everytime new program is attached or detached. 54 + */ 55 + set_memory_x((long)image, 1); 56 + tr->image = image; 57 + out: 58 + mutex_unlock(&trampoline_mutex); 59 + return tr; 60 + } 61 + 62 + /* Each call __bpf_prog_enter + call bpf_func + call __bpf_prog_exit is ~50 63 + * bytes on x86. Pick a number to fit into PAGE_SIZE / 2 64 + */ 65 + #define BPF_MAX_TRAMP_PROGS 40 66 + 67 + static int bpf_trampoline_update(struct bpf_trampoline *tr) 68 + { 69 + void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2; 70 + void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2; 71 + struct bpf_prog *progs_to_run[BPF_MAX_TRAMP_PROGS]; 72 + int fentry_cnt = tr->progs_cnt[BPF_TRAMP_FENTRY]; 73 + int fexit_cnt = tr->progs_cnt[BPF_TRAMP_FEXIT]; 74 + struct bpf_prog **progs, **fentry, **fexit; 75 + u32 flags = BPF_TRAMP_F_RESTORE_REGS; 76 + struct bpf_prog_aux *aux; 77 + int err; 78 + 79 + if (fentry_cnt + fexit_cnt == 0) { 80 + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_NOP, 81 + old_image, NULL); 82 + tr->selector = 0; 83 + goto out; 84 + } 85 + 86 + /* populate fentry progs */ 87 + fentry = progs = progs_to_run; 88 + hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FENTRY], tramp_hlist) 89 + *progs++ = aux->prog; 90 + 91 + /* populate fexit progs */ 92 + fexit = progs; 93 + hlist_for_each_entry(aux, &tr->progs_hlist[BPF_TRAMP_FEXIT], tramp_hlist) 94 + *progs++ = aux->prog; 95 + 96 + if (fexit_cnt) 97 + flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME; 98 + 99 + err = arch_prepare_bpf_trampoline(new_image, &tr->func.model, flags, 100 + fentry, fentry_cnt, 101 + fexit, fexit_cnt, 102 + tr->func.addr); 103 + if (err) 104 + goto out; 105 + 106 + if (tr->selector) 107 + /* progs already running at this address */ 108 + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_CALL_TO_CALL, 109 + old_image, new_image); 110 + else 111 + /* first time registering */ 112 + err = bpf_arch_text_poke(tr->func.addr, BPF_MOD_NOP_TO_CALL, 113 + NULL, new_image); 114 + if (err) 115 + goto out; 116 + tr->selector++; 117 + out: 118 + return err; 119 + } 120 + 121 + static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(enum bpf_attach_type t) 122 + { 123 + switch (t) { 124 + case BPF_TRACE_FENTRY: 125 + return BPF_TRAMP_FENTRY; 126 + default: 127 + return BPF_TRAMP_FEXIT; 128 + } 129 + } 130 + 131 + int bpf_trampoline_link_prog(struct bpf_prog *prog) 132 + { 133 + enum bpf_tramp_prog_type kind; 134 + struct bpf_trampoline *tr; 135 + int err = 0; 136 + 137 + tr = prog->aux->trampoline; 138 + kind = bpf_attach_type_to_tramp(prog->expected_attach_type); 139 + mutex_lock(&tr->mutex); 140 + if (tr->progs_cnt[BPF_TRAMP_FENTRY] + tr->progs_cnt[BPF_TRAMP_FEXIT] 141 + >= BPF_MAX_TRAMP_PROGS) { 142 + err = -E2BIG; 143 + goto out; 144 + } 145 + if (!hlist_unhashed(&prog->aux->tramp_hlist)) { 146 + /* prog already linked */ 147 + err = -EBUSY; 148 + goto out; 149 + } 150 + hlist_add_head(&prog->aux->tramp_hlist, &tr->progs_hlist[kind]); 151 + tr->progs_cnt[kind]++; 152 + err = bpf_trampoline_update(prog->aux->trampoline); 153 + if (err) { 154 + hlist_del(&prog->aux->tramp_hlist); 155 + tr->progs_cnt[kind]--; 156 + } 157 + out: 158 + mutex_unlock(&tr->mutex); 159 + return err; 160 + } 161 + 162 + /* bpf_trampoline_unlink_prog() should never fail. */ 163 + int bpf_trampoline_unlink_prog(struct bpf_prog *prog) 164 + { 165 + enum bpf_tramp_prog_type kind; 166 + struct bpf_trampoline *tr; 167 + int err; 168 + 169 + tr = prog->aux->trampoline; 170 + kind = bpf_attach_type_to_tramp(prog->expected_attach_type); 171 + mutex_lock(&tr->mutex); 172 + hlist_del(&prog->aux->tramp_hlist); 173 + tr->progs_cnt[kind]--; 174 + err = bpf_trampoline_update(prog->aux->trampoline); 175 + mutex_unlock(&tr->mutex); 176 + return err; 177 + } 178 + 179 + void bpf_trampoline_put(struct bpf_trampoline *tr) 180 + { 181 + if (!tr) 182 + return; 183 + mutex_lock(&trampoline_mutex); 184 + if (!refcount_dec_and_test(&tr->refcnt)) 185 + goto out; 186 + WARN_ON_ONCE(mutex_is_locked(&tr->mutex)); 187 + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FENTRY]))) 188 + goto out; 189 + if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT]))) 190 + goto out; 191 + bpf_jit_free_exec(tr->image); 192 + hlist_del(&tr->hlist); 193 + kfree(tr); 194 + out: 195 + mutex_unlock(&trampoline_mutex); 196 + } 197 + 198 + /* The logic is similar to BPF_PROG_RUN, but with explicit rcu and preempt that 199 + * are needed for trampoline. The macro is split into 200 + * call _bpf_prog_enter 201 + * call prog->bpf_func 202 + * call __bpf_prog_exit 203 + */ 204 + u64 notrace __bpf_prog_enter(void) 205 + { 206 + u64 start = 0; 207 + 208 + rcu_read_lock(); 209 + preempt_disable(); 210 + if (static_branch_unlikely(&bpf_stats_enabled_key)) 211 + start = sched_clock(); 212 + return start; 213 + } 214 + 215 + void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start) 216 + { 217 + struct bpf_prog_stats *stats; 218 + 219 + if (static_branch_unlikely(&bpf_stats_enabled_key) && 220 + /* static_key could be enabled in __bpf_prog_enter 221 + * and disabled in __bpf_prog_exit. 222 + * And vice versa. 223 + * Hence check that 'start' is not zero. 224 + */ 225 + start) { 226 + stats = this_cpu_ptr(prog->aux->stats); 227 + u64_stats_update_begin(&stats->syncp); 228 + stats->cnt++; 229 + stats->nsecs += sched_clock() - start; 230 + u64_stats_update_end(&stats->syncp); 231 + } 232 + preempt_enable(); 233 + rcu_read_unlock(); 234 + } 235 + 236 + int __weak 237 + arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags, 238 + struct bpf_prog **fentry_progs, int fentry_cnt, 239 + struct bpf_prog **fexit_progs, int fexit_cnt, 240 + void *orig_call) 241 + { 242 + return -ENOTSUPP; 243 + } 244 + 245 + static int __init init_trampolines(void) 246 + { 247 + int i; 248 + 249 + for (i = 0; i < TRAMPOLINE_TABLE_SIZE; i++) 250 + INIT_HLIST_HEAD(&trampoline_table[i]); 251 + return 0; 252 + } 253 + late_initcall(init_trampolines);

+119 -18

kernel/bpf/verifier.c

··· 23 23 #include "disasm.h" 24 24 25 25 static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { 26 - #define BPF_PROG_TYPE(_id, _name) \ 26 + #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ 27 27 [_id] = & _name ## _verifier_ops, 28 28 #define BPF_MAP_TYPE(_id, _ops) 29 29 #include <linux/bpf_types.h> ··· 3970 3970 /* only increment it after check_reg_arg() finished */ 3971 3971 state->curframe++; 3972 3972 3973 + if (btf_check_func_arg_match(env, subprog)) 3974 + return -EINVAL; 3975 + 3973 3976 /* and go analyze first insn of the callee */ 3974 3977 *insn_idx = target_insn; 3975 3978 ··· 4150 4147 meta.func_id = func_id; 4151 4148 /* check args */ 4152 4149 for (i = 0; i < 5; i++) { 4153 - if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID) { 4154 - if (!fn->btf_id[i]) 4155 - fn->btf_id[i] = btf_resolve_helper_id(&env->log, fn->func, i); 4156 - meta.btf_id = fn->btf_id[i]; 4157 - } 4150 + err = btf_resolve_helper_id(&env->log, fn, i); 4151 + if (err > 0) 4152 + meta.btf_id = err; 4158 4153 err = check_func_arg(env, BPF_REG_1 + i, fn->arg_type[i], &meta); 4159 4154 if (err) 4160 4155 return err; ··· 6567 6566 u32 i, nfuncs, urec_size, min_size; 6568 6567 u32 krec_size = sizeof(struct bpf_func_info); 6569 6568 struct bpf_func_info *krecord; 6569 + struct bpf_func_info_aux *info_aux = NULL; 6570 6570 const struct btf_type *type; 6571 6571 struct bpf_prog *prog; 6572 6572 const struct btf *btf; ··· 6601 6599 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN); 6602 6600 if (!krecord) 6603 6601 return -ENOMEM; 6602 + info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN); 6603 + if (!info_aux) 6604 + goto err_free; 6604 6605 6605 6606 for (i = 0; i < nfuncs; i++) { 6606 6607 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size); ··· 6655 6650 ret = -EINVAL; 6656 6651 goto err_free; 6657 6652 } 6658 - 6659 6653 prev_offset = krecord[i].insn_off; 6660 6654 urecord += urec_size; 6661 6655 } 6662 6656 6663 6657 prog->aux->func_info = krecord; 6664 6658 prog->aux->func_info_cnt = nfuncs; 6659 + prog->aux->func_info_aux = info_aux; 6665 6660 return 0; 6666 6661 6667 6662 err_free: 6668 6663 kvfree(krecord); 6664 + kfree(info_aux); 6669 6665 return ret; 6670 6666 } 6671 6667 6672 6668 static void adjust_btf_func(struct bpf_verifier_env *env) 6673 6669 { 6670 + struct bpf_prog_aux *aux = env->prog->aux; 6674 6671 int i; 6675 6672 6676 - if (!env->prog->aux->func_info) 6673 + if (!aux->func_info) 6677 6674 return; 6678 6675 6679 6676 for (i = 0; i < env->subprog_cnt; i++) 6680 - env->prog->aux->func_info[i].insn_off = env->subprog_info[i].start; 6677 + aux->func_info[i].insn_off = env->subprog_info[i].start; 6681 6678 } 6682 6679 6683 6680 #define MIN_BPF_LINEINFO_SIZE (offsetof(struct bpf_line_info, line_col) + \ ··· 7660 7653 0 /* frameno */, 7661 7654 0 /* subprogno, zero == main subprog */); 7662 7655 7656 + if (btf_check_func_arg_match(env, 0)) 7657 + return -EINVAL; 7658 + 7663 7659 for (;;) { 7664 7660 struct bpf_insn *insn; 7665 7661 u8 class; ··· 8179 8169 * will be used by the valid program until it's unloaded 8180 8170 * and all maps are released in free_used_maps() 8181 8171 */ 8182 - map = bpf_map_inc(map, false); 8183 - if (IS_ERR(map)) { 8184 - fdput(f); 8185 - return PTR_ERR(map); 8186 - } 8172 + bpf_map_inc(map); 8187 8173 8188 8174 aux->map_index = env->used_map_cnt; 8189 8175 env->used_maps[env->used_map_cnt++] = map; ··· 9386 9380 static int check_attach_btf_id(struct bpf_verifier_env *env) 9387 9381 { 9388 9382 struct bpf_prog *prog = env->prog; 9383 + struct bpf_prog *tgt_prog = prog->aux->linked_prog; 9389 9384 u32 btf_id = prog->aux->attach_btf_id; 9390 9385 const char prefix[] = "btf_trace_"; 9386 + int ret = 0, subprog = -1, i; 9387 + struct bpf_trampoline *tr; 9391 9388 const struct btf_type *t; 9389 + bool conservative = true; 9392 9390 const char *tname; 9391 + struct btf *btf; 9392 + long addr; 9393 + u64 key; 9393 9394 9394 9395 if (prog->type != BPF_PROG_TYPE_TRACING) 9395 9396 return 0; ··· 9405 9392 verbose(env, "Tracing programs must provide btf_id\n"); 9406 9393 return -EINVAL; 9407 9394 } 9408 - t = btf_type_by_id(btf_vmlinux, btf_id); 9395 + btf = bpf_prog_get_target_btf(prog); 9396 + if (!btf) { 9397 + verbose(env, 9398 + "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); 9399 + return -EINVAL; 9400 + } 9401 + t = btf_type_by_id(btf, btf_id); 9409 9402 if (!t) { 9410 9403 verbose(env, "attach_btf_id %u is invalid\n", btf_id); 9411 9404 return -EINVAL; 9412 9405 } 9413 - tname = btf_name_by_offset(btf_vmlinux, t->name_off); 9406 + tname = btf_name_by_offset(btf, t->name_off); 9414 9407 if (!tname) { 9415 9408 verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); 9416 9409 return -EINVAL; 9417 9410 } 9411 + if (tgt_prog) { 9412 + struct bpf_prog_aux *aux = tgt_prog->aux; 9413 + 9414 + for (i = 0; i < aux->func_info_cnt; i++) 9415 + if (aux->func_info[i].type_id == btf_id) { 9416 + subprog = i; 9417 + break; 9418 + } 9419 + if (subprog == -1) { 9420 + verbose(env, "Subprog %s doesn't exist\n", tname); 9421 + return -EINVAL; 9422 + } 9423 + conservative = aux->func_info_aux[subprog].unreliable; 9424 + key = ((u64)aux->id) << 32 | btf_id; 9425 + } else { 9426 + key = btf_id; 9427 + } 9418 9428 9419 9429 switch (prog->expected_attach_type) { 9420 9430 case BPF_TRACE_RAW_TP: 9431 + if (tgt_prog) { 9432 + verbose(env, 9433 + "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); 9434 + return -EINVAL; 9435 + } 9421 9436 if (!btf_type_is_typedef(t)) { 9422 9437 verbose(env, "attach_btf_id %u is not a typedef\n", 9423 9438 btf_id); ··· 9457 9416 return -EINVAL; 9458 9417 } 9459 9418 tname += sizeof(prefix) - 1; 9460 - t = btf_type_by_id(btf_vmlinux, t->type); 9419 + t = btf_type_by_id(btf, t->type); 9461 9420 if (!btf_type_is_ptr(t)) 9462 9421 /* should never happen in valid vmlinux build */ 9463 9422 return -EINVAL; 9464 - t = btf_type_by_id(btf_vmlinux, t->type); 9423 + t = btf_type_by_id(btf, t->type); 9465 9424 if (!btf_type_is_func_proto(t)) 9466 9425 /* should never happen in valid vmlinux build */ 9467 9426 return -EINVAL; ··· 9473 9432 prog->aux->attach_func_proto = t; 9474 9433 prog->aux->attach_btf_trace = true; 9475 9434 return 0; 9435 + case BPF_TRACE_FENTRY: 9436 + case BPF_TRACE_FEXIT: 9437 + if (!btf_type_is_func(t)) { 9438 + verbose(env, "attach_btf_id %u is not a function\n", 9439 + btf_id); 9440 + return -EINVAL; 9441 + } 9442 + t = btf_type_by_id(btf, t->type); 9443 + if (!btf_type_is_func_proto(t)) 9444 + return -EINVAL; 9445 + tr = bpf_trampoline_lookup(key); 9446 + if (!tr) 9447 + return -ENOMEM; 9448 + prog->aux->attach_func_name = tname; 9449 + /* t is either vmlinux type or another program's type */ 9450 + prog->aux->attach_func_proto = t; 9451 + mutex_lock(&tr->mutex); 9452 + if (tr->func.addr) { 9453 + prog->aux->trampoline = tr; 9454 + goto out; 9455 + } 9456 + if (tgt_prog && conservative) { 9457 + prog->aux->attach_func_proto = NULL; 9458 + t = NULL; 9459 + } 9460 + ret = btf_distill_func_proto(&env->log, btf, t, 9461 + tname, &tr->func.model); 9462 + if (ret < 0) 9463 + goto out; 9464 + if (tgt_prog) { 9465 + if (!tgt_prog->jited) { 9466 + /* for now */ 9467 + verbose(env, "Can trace only JITed BPF progs\n"); 9468 + ret = -EINVAL; 9469 + goto out; 9470 + } 9471 + if (tgt_prog->type == BPF_PROG_TYPE_TRACING) { 9472 + /* prevent cycles */ 9473 + verbose(env, "Cannot recursively attach\n"); 9474 + ret = -EINVAL; 9475 + goto out; 9476 + } 9477 + addr = (long) tgt_prog->aux->func[subprog]->bpf_func; 9478 + } else { 9479 + addr = kallsyms_lookup_name(tname); 9480 + if (!addr) { 9481 + verbose(env, 9482 + "The address of function %s cannot be found\n", 9483 + tname); 9484 + ret = -ENOENT; 9485 + goto out; 9486 + } 9487 + } 9488 + tr->func.addr = (void *)addr; 9489 + prog->aux->trampoline = tr; 9490 + out: 9491 + mutex_unlock(&tr->mutex); 9492 + if (ret) 9493 + bpf_trampoline_put(tr); 9494 + return ret; 9476 9495 default: 9477 9496 return -EINVAL; 9478 9497 }

+2 -4

kernel/bpf/xskmap.c

··· 11 11 12 12 int xsk_map_inc(struct xsk_map *map) 13 13 { 14 - struct bpf_map *m = &map->map; 15 - 16 - m = bpf_map_inc(m, false); 17 - return PTR_ERR_OR_ZERO(m); 14 + bpf_map_inc(&map->map); 15 + return 0; 18 16 } 19 17 20 18 void xsk_map_put(struct xsk_map *map)

+2 -5

kernel/events/core.c

··· 10477 10477 context = parent_event->overflow_handler_context; 10478 10478 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_EVENT_TRACING) 10479 10479 if (overflow_handler == bpf_overflow_handler) { 10480 - struct bpf_prog *prog = bpf_prog_inc(parent_event->prog); 10480 + struct bpf_prog *prog = parent_event->prog; 10481 10481 10482 - if (IS_ERR(prog)) { 10483 - err = PTR_ERR(prog); 10484 - goto err_ns; 10485 - } 10482 + bpf_prog_inc(prog); 10486 10483 event->prog = prog; 10487 10484 event->orig_overflow_handler = 10488 10485 parent_event->orig_overflow_handler;

+20

mm/vmalloc.c

··· 2672 2672 EXPORT_SYMBOL(vzalloc_node); 2673 2673 2674 2674 /** 2675 + * vmalloc_user_node_flags - allocate memory for userspace on a specific node 2676 + * @size: allocation size 2677 + * @node: numa node 2678 + * @flags: flags for the page level allocator 2679 + * 2680 + * The resulting memory area is zeroed so it can be mapped to userspace 2681 + * without leaking data. 2682 + * 2683 + * Return: pointer to the allocated memory or %NULL on error 2684 + */ 2685 + void *vmalloc_user_node_flags(unsigned long size, int node, gfp_t flags) 2686 + { 2687 + return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END, 2688 + flags | __GFP_ZERO, PAGE_KERNEL, 2689 + VM_USERMAP, node, 2690 + __builtin_return_address(0)); 2691 + } 2692 + EXPORT_SYMBOL(vmalloc_user_node_flags); 2693 + 2694 + /** 2675 2695 * vmalloc_exec - allocate virtually contiguous, executable memory 2676 2696 * @size: allocation size 2677 2697 *

+43

net/bpf/test_run.c

··· 105 105 return err; 106 106 } 107 107 108 + /* Integer types of various sizes and pointer combinations cover variety of 109 + * architecture dependent calling conventions. 7+ can be supported in the 110 + * future. 111 + */ 112 + int noinline bpf_fentry_test1(int a) 113 + { 114 + return a + 1; 115 + } 116 + 117 + int noinline bpf_fentry_test2(int a, u64 b) 118 + { 119 + return a + b; 120 + } 121 + 122 + int noinline bpf_fentry_test3(char a, int b, u64 c) 123 + { 124 + return a + b + c; 125 + } 126 + 127 + int noinline bpf_fentry_test4(void *a, char b, int c, u64 d) 128 + { 129 + return (long)a + b + c + d; 130 + } 131 + 132 + int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e) 133 + { 134 + return a + (long)b + c + d + e; 135 + } 136 + 137 + int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f) 138 + { 139 + return a + (long)b + c + d + (long)e + f; 140 + } 141 + 108 142 static void *bpf_test_init(const union bpf_attr *kattr, u32 size, 109 143 u32 headroom, u32 tailroom) 110 144 { ··· 153 119 return ERR_PTR(-ENOMEM); 154 120 155 121 if (copy_from_user(data + headroom, data_in, size)) { 122 + kfree(data); 123 + return ERR_PTR(-EFAULT); 124 + } 125 + if (bpf_fentry_test1(1) != 2 || 126 + bpf_fentry_test2(2, 3) != 5 || 127 + bpf_fentry_test3(4, 5, 6) != 15 || 128 + bpf_fentry_test4((void *)7, 8, 9, 10) != 34 || 129 + bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 || 130 + bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111) { 156 131 kfree(data); 157 132 return ERR_PTR(-EFAULT); 158 133 }

+1 -1

net/core/bpf_sk_storage.c

··· 798 798 * Try to grab map refcnt to make sure that it's still 799 799 * alive and prevent concurrent removal. 800 800 */ 801 - map = bpf_map_inc_not_zero(&smap->map, false); 801 + map = bpf_map_inc_not_zero(&smap->map); 802 802 if (IS_ERR(map)) 803 803 continue; 804 804

+1 -11

net/core/filter.c

··· 3816 3816 .arg5_type = ARG_CONST_SIZE_OR_ZERO, 3817 3817 }; 3818 3818 3819 - static u32 bpf_skb_output_btf_ids[5]; 3819 + static int bpf_skb_output_btf_ids[5]; 3820 3820 const struct bpf_func_proto bpf_skb_output_proto = { 3821 3821 .func = bpf_skb_event_output, 3822 3822 .gpl_only = true, ··· 8684 8684 } 8685 8685 8686 8686 #ifdef CONFIG_INET 8687 - struct sk_reuseport_kern { 8688 - struct sk_buff *skb; 8689 - struct sock *sk; 8690 - struct sock *selected_sk; 8691 - void *data_end; 8692 - u32 hash; 8693 - u32 reuseport_id; 8694 - bool bind_inany; 8695 - }; 8696 - 8697 8687 static void bpf_init_reuseport_kern(struct sk_reuseport_kern *reuse_kern, 8698 8688 struct sock_reuseport *reuse, 8699 8689 struct sock *sk, struct sk_buff *skb,

+1

samples/bpf/Makefile

··· 167 167 always += ibumad_kern.o 168 168 always += hbm_out_kern.o 169 169 always += hbm_edt_kern.o 170 + always += xdpsock_kern.o 170 171 171 172 ifeq ($(ARCH), arm) 172 173 # Strip all except -D__LINUX_ARM_ARCH__ option needed to handle linux

+1 -1

samples/bpf/hbm.c

··· 147 147 } 148 148 149 149 if (ret) { 150 - printf("ERROR: load_bpf_file failed for: %s\n", prog); 150 + printf("ERROR: bpf_prog_load_xattr failed for: %s\n", prog); 151 151 printf(" Output from verifier:\n%s\n------\n", bpf_log_buf); 152 152 ret = -1; 153 153 } else {

+6 -6

samples/bpf/sockex1_kern.c

··· 5 5 #include "bpf_helpers.h" 6 6 #include "bpf_legacy.h" 7 7 8 - struct bpf_map_def SEC("maps") my_map = { 9 - .type = BPF_MAP_TYPE_ARRAY, 10 - .key_size = sizeof(u32), 11 - .value_size = sizeof(long), 12 - .max_entries = 256, 13 - }; 8 + struct { 9 + __uint(type, BPF_MAP_TYPE_ARRAY); 10 + __type(key, u32); 11 + __type(value, long); 12 + __uint(max_entries, 256); 13 + } my_map SEC(".maps"); 14 14 15 15 SEC("socket1") 16 16 int bpf_prog1(struct __sk_buff *skb)

+6 -6

samples/bpf/sockex2_kern.c

··· 190 190 long bytes; 191 191 }; 192 192 193 - struct bpf_map_def SEC("maps") hash_map = { 194 - .type = BPF_MAP_TYPE_HASH, 195 - .key_size = sizeof(__be32), 196 - .value_size = sizeof(struct pair), 197 - .max_entries = 1024, 198 - }; 193 + struct { 194 + __uint(type, BPF_MAP_TYPE_HASH); 195 + __type(key, __be32); 196 + __type(value, struct pair); 197 + __uint(max_entries, 1024); 198 + } hash_map SEC(".maps"); 199 199 200 200 SEC("socket2") 201 201 int bpf_prog2(struct __sk_buff *skb)

+6 -6

samples/bpf/xdp1_kern.c

··· 14 14 #include <linux/ipv6.h> 15 15 #include "bpf_helpers.h" 16 16 17 - struct bpf_map_def SEC("maps") rxcnt = { 18 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 19 - .key_size = sizeof(u32), 20 - .value_size = sizeof(long), 21 - .max_entries = 256, 22 - }; 17 + struct { 18 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 + __type(key, u32); 20 + __type(value, long); 21 + __uint(max_entries, 256); 22 + } rxcnt SEC(".maps"); 23 23 24 24 static int parse_ipv4(void *data, u64 nh_off, void *data_end) 25 25 {

+1 -1

samples/bpf/xdp1_user.c

··· 139 139 map_fd = bpf_map__fd(map); 140 140 141 141 if (!prog_fd) { 142 - printf("load_bpf_file: %s\n", strerror(errno)); 142 + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); 143 143 return 1; 144 144 } 145 145

+6 -6

samples/bpf/xdp2_kern.c

··· 14 14 #include <linux/ipv6.h> 15 15 #include "bpf_helpers.h" 16 16 17 - struct bpf_map_def SEC("maps") rxcnt = { 18 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 19 - .key_size = sizeof(u32), 20 - .value_size = sizeof(long), 21 - .max_entries = 256, 22 - }; 17 + struct { 18 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 19 + __type(key, u32); 20 + __type(value, long); 21 + __uint(max_entries, 256); 22 + } rxcnt SEC(".maps"); 23 23 24 24 static void swap_src_dst_mac(void *data) 25 25 {

+6 -6

samples/bpf/xdp_adjust_tail_kern.c

··· 28 28 /* volatile to prevent compiler optimizations */ 29 29 static volatile __u32 max_pcktsz = MAX_PCKT_SIZE; 30 30 31 - struct bpf_map_def SEC("maps") icmpcnt = { 32 - .type = BPF_MAP_TYPE_ARRAY, 33 - .key_size = sizeof(__u32), 34 - .value_size = sizeof(__u64), 35 - .max_entries = 1, 36 - }; 31 + struct { 32 + __uint(type, BPF_MAP_TYPE_ARRAY); 33 + __type(key, __u32); 34 + __type(value, __u64); 35 + __uint(max_entries, 1); 36 + } icmpcnt SEC(".maps"); 37 37 38 38 static __always_inline void count_icmp(void) 39 39 {

+6 -7

samples/bpf/xdp_fwd_kern.c

··· 23 23 24 24 #define IPV6_FLOWINFO_MASK cpu_to_be32(0x0FFFFFFF) 25 25 26 - /* For TX-traffic redirect requires net_device ifindex to be in this devmap */ 27 - struct bpf_map_def SEC("maps") xdp_tx_ports = { 28 - .type = BPF_MAP_TYPE_DEVMAP, 29 - .key_size = sizeof(int), 30 - .value_size = sizeof(int), 31 - .max_entries = 64, 32 - }; 26 + struct { 27 + __uint(type, BPF_MAP_TYPE_DEVMAP); 28 + __uint(key_size, sizeof(int)); 29 + __uint(value_size, sizeof(int)); 30 + __uint(max_entries, 64); 31 + } xdp_tx_ports SEC(".maps"); 33 32 34 33 /* from include/net/ip.h */ 35 34 static __always_inline int ip_decrease_ttl(struct iphdr *iph)

+54 -54

samples/bpf/xdp_redirect_cpu_kern.c

··· 18 18 #define MAX_CPUS 64 /* WARNING - sync with _user.c */ 19 19 20 20 /* Special map type that can XDP_REDIRECT frames to another CPU */ 21 - struct bpf_map_def SEC("maps") cpu_map = { 22 - .type = BPF_MAP_TYPE_CPUMAP, 23 - .key_size = sizeof(u32), 24 - .value_size = sizeof(u32), 25 - .max_entries = MAX_CPUS, 26 - }; 21 + struct { 22 + __uint(type, BPF_MAP_TYPE_CPUMAP); 23 + __uint(key_size, sizeof(u32)); 24 + __uint(value_size, sizeof(u32)); 25 + __uint(max_entries, MAX_CPUS); 26 + } cpu_map SEC(".maps"); 27 27 28 28 /* Common stats data record to keep userspace more simple */ 29 29 struct datarec { ··· 35 35 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success 36 36 * feedback. Redirect TX errors can be caught via a tracepoint. 37 37 */ 38 - struct bpf_map_def SEC("maps") rx_cnt = { 39 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 40 - .key_size = sizeof(u32), 41 - .value_size = sizeof(struct datarec), 42 - .max_entries = 1, 43 - }; 38 + struct { 39 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 40 + __type(key, u32); 41 + __type(value, struct datarec); 42 + __uint(max_entries, 1); 43 + } rx_cnt SEC(".maps"); 44 44 45 45 /* Used by trace point */ 46 - struct bpf_map_def SEC("maps") redirect_err_cnt = { 47 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 48 - .key_size = sizeof(u32), 49 - .value_size = sizeof(struct datarec), 50 - .max_entries = 2, 46 + struct { 47 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 48 + __type(key, u32); 49 + __type(value, struct datarec); 50 + __uint(max_entries, 2); 51 51 /* TODO: have entries for all possible errno's */ 52 - }; 52 + } redirect_err_cnt SEC(".maps"); 53 53 54 54 /* Used by trace point */ 55 - struct bpf_map_def SEC("maps") cpumap_enqueue_cnt = { 56 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 57 - .key_size = sizeof(u32), 58 - .value_size = sizeof(struct datarec), 59 - .max_entries = MAX_CPUS, 60 - }; 55 + struct { 56 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 57 + __type(key, u32); 58 + __type(value, struct datarec); 59 + __uint(max_entries, MAX_CPUS); 60 + } cpumap_enqueue_cnt SEC(".maps"); 61 61 62 62 /* Used by trace point */ 63 - struct bpf_map_def SEC("maps") cpumap_kthread_cnt = { 64 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 65 - .key_size = sizeof(u32), 66 - .value_size = sizeof(struct datarec), 67 - .max_entries = 1, 68 - }; 63 + struct { 64 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 65 + __type(key, u32); 66 + __type(value, struct datarec); 67 + __uint(max_entries, 1); 68 + } cpumap_kthread_cnt SEC(".maps"); 69 69 70 70 /* Set of maps controlling available CPU, and for iterating through 71 71 * selectable redirect CPUs. 72 72 */ 73 - struct bpf_map_def SEC("maps") cpus_available = { 74 - .type = BPF_MAP_TYPE_ARRAY, 75 - .key_size = sizeof(u32), 76 - .value_size = sizeof(u32), 77 - .max_entries = MAX_CPUS, 78 - }; 79 - struct bpf_map_def SEC("maps") cpus_count = { 80 - .type = BPF_MAP_TYPE_ARRAY, 81 - .key_size = sizeof(u32), 82 - .value_size = sizeof(u32), 83 - .max_entries = 1, 84 - }; 85 - struct bpf_map_def SEC("maps") cpus_iterator = { 86 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 87 - .key_size = sizeof(u32), 88 - .value_size = sizeof(u32), 89 - .max_entries = 1, 90 - }; 73 + struct { 74 + __uint(type, BPF_MAP_TYPE_ARRAY); 75 + __type(key, u32); 76 + __type(value, u32); 77 + __uint(max_entries, MAX_CPUS); 78 + } cpus_available SEC(".maps"); 79 + struct { 80 + __uint(type, BPF_MAP_TYPE_ARRAY); 81 + __type(key, u32); 82 + __type(value, u32); 83 + __uint(max_entries, 1); 84 + } cpus_count SEC(".maps"); 85 + struct { 86 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 87 + __type(key, u32); 88 + __type(value, u32); 89 + __uint(max_entries, 1); 90 + } cpus_iterator SEC(".maps"); 91 91 92 92 /* Used by trace point */ 93 - struct bpf_map_def SEC("maps") exception_cnt = { 94 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 95 - .key_size = sizeof(u32), 96 - .value_size = sizeof(struct datarec), 97 - .max_entries = 1, 98 - }; 93 + struct { 94 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 95 + __type(key, u32); 96 + __type(value, struct datarec); 97 + __uint(max_entries, 1); 98 + } exception_cnt SEC(".maps"); 99 99 100 100 /* Helper parse functions */ 101 101

+12 -12

samples/bpf/xdp_redirect_kern.c

··· 19 19 #include <linux/ipv6.h> 20 20 #include "bpf_helpers.h" 21 21 22 - struct bpf_map_def SEC("maps") tx_port = { 23 - .type = BPF_MAP_TYPE_ARRAY, 24 - .key_size = sizeof(int), 25 - .value_size = sizeof(int), 26 - .max_entries = 1, 27 - }; 22 + struct { 23 + __uint(type, BPF_MAP_TYPE_ARRAY); 24 + __type(key, int); 25 + __type(value, int); 26 + __uint(max_entries, 1); 27 + } tx_port SEC(".maps"); 28 28 29 29 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success 30 30 * feedback. Redirect TX errors can be caught via a tracepoint. 31 31 */ 32 - struct bpf_map_def SEC("maps") rxcnt = { 33 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 34 - .key_size = sizeof(u32), 35 - .value_size = sizeof(long), 36 - .max_entries = 1, 37 - }; 32 + struct { 33 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 34 + __type(key, u32); 35 + __type(value, long); 36 + __uint(max_entries, 1); 37 + } rxcnt SEC(".maps"); 38 38 39 39 static void swap_src_dst_mac(void *data) 40 40 {

+12 -12

samples/bpf/xdp_redirect_map_kern.c

··· 19 19 #include <linux/ipv6.h> 20 20 #include "bpf_helpers.h" 21 21 22 - struct bpf_map_def SEC("maps") tx_port = { 23 - .type = BPF_MAP_TYPE_DEVMAP, 24 - .key_size = sizeof(int), 25 - .value_size = sizeof(int), 26 - .max_entries = 100, 27 - }; 22 + struct { 23 + __uint(type, BPF_MAP_TYPE_DEVMAP); 24 + __uint(key_size, sizeof(int)); 25 + __uint(value_size, sizeof(int)); 26 + __uint(max_entries, 100); 27 + } tx_port SEC(".maps"); 28 28 29 29 /* Count RX packets, as XDP bpf_prog doesn't get direct TX-success 30 30 * feedback. Redirect TX errors can be caught via a tracepoint. 31 31 */ 32 - struct bpf_map_def SEC("maps") rxcnt = { 33 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 34 - .key_size = sizeof(u32), 35 - .value_size = sizeof(long), 36 - .max_entries = 1, 37 - }; 32 + struct { 33 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 34 + __type(key, u32); 35 + __type(value, long); 36 + __uint(max_entries, 1); 37 + } rxcnt SEC(".maps"); 38 38 39 39 static void swap_src_dst_mac(void *data) 40 40 {

+31 -31

samples/bpf/xdp_router_ipv4_kern.c

··· 42 42 }; 43 43 44 44 /* Map for trie implementation*/ 45 - struct bpf_map_def SEC("maps") lpm_map = { 46 - .type = BPF_MAP_TYPE_LPM_TRIE, 47 - .key_size = 8, 48 - .value_size = sizeof(struct trie_value), 49 - .max_entries = 50, 50 - .map_flags = BPF_F_NO_PREALLOC, 51 - }; 45 + struct { 46 + __uint(type, BPF_MAP_TYPE_LPM_TRIE); 47 + __uint(key_size, 8); 48 + __uint(value_size, sizeof(struct trie_value)); 49 + __uint(max_entries, 50); 50 + __uint(map_flags, BPF_F_NO_PREALLOC); 51 + } lpm_map SEC(".maps"); 52 52 53 53 /* Map for counter*/ 54 - struct bpf_map_def SEC("maps") rxcnt = { 55 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 56 - .key_size = sizeof(u32), 57 - .value_size = sizeof(u64), 58 - .max_entries = 256, 59 - }; 54 + struct { 55 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 56 + __type(key, u32); 57 + __type(value, u64); 58 + __uint(max_entries, 256); 59 + } rxcnt SEC(".maps"); 60 60 61 61 /* Map for ARP table*/ 62 - struct bpf_map_def SEC("maps") arp_table = { 63 - .type = BPF_MAP_TYPE_HASH, 64 - .key_size = sizeof(__be32), 65 - .value_size = sizeof(__be64), 66 - .max_entries = 50, 67 - }; 62 + struct { 63 + __uint(type, BPF_MAP_TYPE_HASH); 64 + __type(key, __be32); 65 + __type(value, __be64); 66 + __uint(max_entries, 50); 67 + } arp_table SEC(".maps"); 68 68 69 69 /* Map to keep the exact match entries in the route table*/ 70 - struct bpf_map_def SEC("maps") exact_match = { 71 - .type = BPF_MAP_TYPE_HASH, 72 - .key_size = sizeof(__be32), 73 - .value_size = sizeof(struct direct_map), 74 - .max_entries = 50, 75 - }; 70 + struct { 71 + __uint(type, BPF_MAP_TYPE_HASH); 72 + __type(key, __be32); 73 + __type(value, struct direct_map); 74 + __uint(max_entries, 50); 75 + } exact_match SEC(".maps"); 76 76 77 - struct bpf_map_def SEC("maps") tx_port = { 78 - .type = BPF_MAP_TYPE_DEVMAP, 79 - .key_size = sizeof(int), 80 - .value_size = sizeof(int), 81 - .max_entries = 100, 82 - }; 77 + struct { 78 + __uint(type, BPF_MAP_TYPE_DEVMAP); 79 + __uint(key_size, sizeof(int)); 80 + __uint(value_size, sizeof(int)); 81 + __uint(max_entries, 100); 82 + } tx_port SEC(".maps"); 83 83 84 84 /* Function to set source and destination mac of the packet */ 85 85 static inline void set_src_dst_mac(void *data, void *src, void *dst)

+19 -18

samples/bpf/xdp_rxq_info_kern.c

··· 23 23 READ_MEM = 0x1U, 24 24 SWAP_MAC = 0x2U, 25 25 }; 26 - struct bpf_map_def SEC("maps") config_map = { 27 - .type = BPF_MAP_TYPE_ARRAY, 28 - .key_size = sizeof(int), 29 - .value_size = sizeof(struct config), 30 - .max_entries = 1, 31 - }; 26 + 27 + struct { 28 + __uint(type, BPF_MAP_TYPE_ARRAY); 29 + __type(key, int); 30 + __type(value, struct config); 31 + __uint(max_entries, 1); 32 + } config_map SEC(".maps"); 32 33 33 34 /* Common stats data record (shared with userspace) */ 34 35 struct datarec { ··· 37 36 __u64 issue; 38 37 }; 39 38 40 - struct bpf_map_def SEC("maps") stats_global_map = { 41 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 42 - .key_size = sizeof(u32), 43 - .value_size = sizeof(struct datarec), 44 - .max_entries = 1, 45 - }; 39 + struct { 40 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 41 + __type(key, u32); 42 + __type(value, struct datarec); 43 + __uint(max_entries, 1); 44 + } stats_global_map SEC(".maps"); 46 45 47 46 #define MAX_RXQs 64 48 47 49 48 /* Stats per rx_queue_index (per CPU) */ 50 - struct bpf_map_def SEC("maps") rx_queue_index_map = { 51 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 52 - .key_size = sizeof(u32), 53 - .value_size = sizeof(struct datarec), 54 - .max_entries = MAX_RXQs + 1, 55 - }; 49 + struct { 50 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 51 + __type(key, u32); 52 + __type(value, struct datarec); 53 + __uint(max_entries, MAX_RXQs + 1); 54 + } rx_queue_index_map SEC(".maps"); 56 55 57 56 static __always_inline 58 57 void swap_src_dst_mac(void *data)

+3 -3

samples/bpf/xdp_rxq_info_user.c

··· 51 51 {"sec", required_argument, NULL, 's' }, 52 52 {"no-separators", no_argument, NULL, 'z' }, 53 53 {"action", required_argument, NULL, 'a' }, 54 - {"readmem", no_argument, NULL, 'r' }, 55 - {"swapmac", no_argument, NULL, 'm' }, 54 + {"readmem", no_argument, NULL, 'r' }, 55 + {"swapmac", no_argument, NULL, 'm' }, 56 56 {"force", no_argument, NULL, 'F' }, 57 57 {0, 0, NULL, 0 } 58 58 }; ··· 499 499 map_fd = bpf_map__fd(map); 500 500 501 501 if (!prog_fd) { 502 - fprintf(stderr, "ERR: load_bpf_file: %s\n", strerror(errno)); 502 + fprintf(stderr, "ERR: bpf_prog_load_xattr: %s\n", strerror(errno)); 503 503 return EXIT_FAIL; 504 504 } 505 505

+1 -1

samples/bpf/xdp_sample_pkts_user.c

··· 150 150 return 1; 151 151 152 152 if (!prog_fd) { 153 - printf("load_bpf_file: %s\n", strerror(errno)); 153 + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); 154 154 return 1; 155 155 } 156 156

+12 -12

samples/bpf/xdp_tx_iptunnel_kern.c

··· 19 19 #include "bpf_helpers.h" 20 20 #include "xdp_tx_iptunnel_common.h" 21 21 22 - struct bpf_map_def SEC("maps") rxcnt = { 23 - .type = BPF_MAP_TYPE_PERCPU_ARRAY, 24 - .key_size = sizeof(__u32), 25 - .value_size = sizeof(__u64), 26 - .max_entries = 256, 27 - }; 22 + struct { 23 + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); 24 + __type(key, __u32); 25 + __type(value, __u64); 26 + __uint(max_entries, 256); 27 + } rxcnt SEC(".maps"); 28 28 29 - struct bpf_map_def SEC("maps") vip2tnl = { 30 - .type = BPF_MAP_TYPE_HASH, 31 - .key_size = sizeof(struct vip), 32 - .value_size = sizeof(struct iptnl_info), 33 - .max_entries = MAX_IPTNL_ENTRIES, 34 - }; 29 + struct { 30 + __uint(type, BPF_MAP_TYPE_HASH); 31 + __type(key, struct vip); 32 + __type(value, struct iptnl_info); 33 + __uint(max_entries, MAX_IPTNL_ENTRIES); 34 + } vip2tnl SEC(".maps"); 35 35 36 36 static __always_inline void count_tx(u32 protocol) 37 37 {

+1 -1

samples/bpf/xdp_tx_iptunnel_user.c

··· 268 268 return 1; 269 269 270 270 if (!prog_fd) { 271 - printf("load_bpf_file: %s\n", strerror(errno)); 271 + printf("bpf_prog_load_xattr: %s\n", strerror(errno)); 272 272 return 1; 273 273 } 274 274

+11

samples/bpf/xdpsock.h

··· 1 + /* SPDX-License-Identifier: GPL-2.0 2 + * 3 + * Copyright(c) 2019 Intel Corporation. 4 + */ 5 + 6 + #ifndef XDPSOCK_H_ 7 + #define XDPSOCK_H_ 8 + 9 + #define MAX_SOCKS 4 10 + 11 + #endif /* XDPSOCK_H */

+24

samples/bpf/xdpsock_kern.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <linux/bpf.h> 3 + #include "bpf_helpers.h" 4 + #include "xdpsock.h" 5 + 6 + /* This XDP program is only needed for the XDP_SHARED_UMEM mode. 7 + * If you do not use this mode, libbpf can supply an XDP program for you. 8 + */ 9 + 10 + struct { 11 + __uint(type, BPF_MAP_TYPE_XSKMAP); 12 + __uint(max_entries, MAX_SOCKS); 13 + __uint(key_size, sizeof(int)); 14 + __uint(value_size, sizeof(int)); 15 + } xsks_map SEC(".maps"); 16 + 17 + static unsigned int rr; 18 + 19 + SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) 20 + { 21 + rr = (rr + 1) & (MAX_SOCKS - 1); 22 + 23 + return bpf_redirect_map(&xsks_map, rr, XDP_DROP); 24 + }

+119 -44

samples/bpf/xdpsock_user.c

··· 29 29 30 30 #include "libbpf.h" 31 31 #include "xsk.h" 32 + #include "xdpsock.h" 32 33 #include <bpf/bpf.h> 33 34 34 35 #ifndef SOL_XDP ··· 48 47 #define BATCH_SIZE 64 49 48 50 49 #define DEBUG_HEXDUMP 0 51 - #define MAX_SOCKS 8 52 50 53 51 typedef __u64 u64; 54 52 typedef __u32 u32; ··· 75 75 static int opt_xsk_frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; 76 76 static int opt_timeout = 1000; 77 77 static bool opt_need_wakeup = true; 78 - static __u32 prog_id; 78 + static u32 opt_num_xsks = 1; 79 + static u32 prog_id; 79 80 80 81 struct xsk_umem_info { 81 82 struct xsk_ring_prod fq; ··· 180 179 181 180 static void remove_xdp_program(void) 182 181 { 183 - __u32 curr_prog_id = 0; 182 + u32 curr_prog_id = 0; 184 183 185 184 if (bpf_get_link_xdp_id(opt_ifindex, &curr_prog_id, opt_xdp_flags)) { 186 185 printf("bpf_get_link_xdp_id failed\n"); ··· 197 196 static void int_exit(int sig) 198 197 { 199 198 struct xsk_umem *umem = xsks[0]->umem->umem; 200 - 201 - (void)sig; 199 + int i; 202 200 203 201 dump_stats(); 204 - xsk_socket__delete(xsks[0]->xsk); 202 + for (i = 0; i < num_socks; i++) 203 + xsk_socket__delete(xsks[i]->xsk); 205 204 (void)xsk_umem__delete(umem); 206 205 remove_xdp_program(); 207 206 ··· 291 290 .frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM, 292 291 .flags = opt_umem_flags 293 292 }; 294 - 295 293 int ret; 296 294 297 295 umem = calloc(1, sizeof(*umem)); ··· 299 299 300 300 ret = xsk_umem__create(&umem->umem, buffer, size, &umem->fq, &umem->cq, 301 301 &cfg); 302 - 303 302 if (ret) 304 303 exit_with_error(-ret); 305 304 ··· 306 307 return umem; 307 308 } 308 309 309 - static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem) 310 + static void xsk_populate_fill_ring(struct xsk_umem_info *umem) 311 + { 312 + int ret, i; 313 + u32 idx; 314 + 315 + ret = xsk_ring_prod__reserve(&umem->fq, 316 + XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx); 317 + if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) 318 + exit_with_error(-ret); 319 + for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) 320 + *xsk_ring_prod__fill_addr(&umem->fq, idx++) = 321 + i * opt_xsk_frame_size; 322 + xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS); 323 + } 324 + 325 + static struct xsk_socket_info *xsk_configure_socket(struct xsk_umem_info *umem, 326 + bool rx, bool tx) 310 327 { 311 328 struct xsk_socket_config cfg; 312 329 struct xsk_socket_info *xsk; 330 + struct xsk_ring_cons *rxr; 331 + struct xsk_ring_prod *txr; 313 332 int ret; 314 - u32 idx; 315 - int i; 316 333 317 334 xsk = calloc(1, sizeof(*xsk)); 318 335 if (!xsk) ··· 337 322 xsk->umem = umem; 338 323 cfg.rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; 339 324 cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; 340 - cfg.libbpf_flags = 0; 325 + if (opt_num_xsks > 1) 326 + cfg.libbpf_flags = XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD; 327 + else 328 + cfg.libbpf_flags = 0; 341 329 cfg.xdp_flags = opt_xdp_flags; 342 330 cfg.bind_flags = opt_xdp_bind_flags; 331 + 332 + rxr = rx ? &xsk->rx : NULL; 333 + txr = tx ? &xsk->tx : NULL; 343 334 ret = xsk_socket__create(&xsk->xsk, opt_if, opt_queue, umem->umem, 344 - &xsk->rx, &xsk->tx, &cfg); 335 + rxr, txr, &cfg); 345 336 if (ret) 346 337 exit_with_error(-ret); 347 338 348 339 ret = bpf_get_link_xdp_id(opt_ifindex, &prog_id, opt_xdp_flags); 349 340 if (ret) 350 341 exit_with_error(-ret); 351 - 352 - ret = xsk_ring_prod__reserve(&xsk->umem->fq, 353 - XSK_RING_PROD__DEFAULT_NUM_DESCS, 354 - &idx); 355 - if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS) 356 - exit_with_error(-ret); 357 - for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++) 358 - *xsk_ring_prod__fill_addr(&xsk->umem->fq, idx++) = 359 - i * opt_xsk_frame_size; 360 - xsk_ring_prod__submit(&xsk->umem->fq, 361 - XSK_RING_PROD__DEFAULT_NUM_DESCS); 362 342 363 343 return xsk; 364 344 } ··· 373 363 {"frame-size", required_argument, 0, 'f'}, 374 364 {"no-need-wakeup", no_argument, 0, 'm'}, 375 365 {"unaligned", no_argument, 0, 'u'}, 366 + {"shared-umem", no_argument, 0, 'M'}, 367 + {"force", no_argument, 0, 'F'}, 376 368 {0, 0, 0, 0} 377 369 }; 378 370 ··· 394 382 " -n, --interval=n Specify statistics update interval (default 1 sec).\n" 395 383 " -z, --zero-copy Force zero-copy mode.\n" 396 384 " -c, --copy Force copy mode.\n" 397 - " -f, --frame-size=n Set the frame size (must be a power of two, default is %d).\n" 398 385 " -m, --no-need-wakeup Turn off use of driver need wakeup flag.\n" 399 386 " -f, --frame-size=n Set the frame size (must be a power of two in aligned mode, default is %d).\n" 400 387 " -u, --unaligned Enable unaligned chunk placement\n" 388 + " -M, --shared-umem Enable XDP_SHARED_UMEM\n" 389 + " -F, --force Force loading the XDP prog\n" 401 390 "\n"; 402 391 fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); 403 392 exit(EXIT_FAILURE); ··· 411 398 opterr = 0; 412 399 413 400 for (;;) { 414 - c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:mu", 401 + c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM", 415 402 long_options, &option_index); 416 403 if (c == -1) 417 404 break; ··· 461 448 break; 462 449 case 'f': 463 450 opt_xsk_frame_size = atoi(optarg); 451 + break; 464 452 case 'm': 465 453 opt_need_wakeup = false; 466 454 opt_xdp_bind_flags &= ~XDP_USE_NEED_WAKEUP; 467 455 break; 468 - 456 + case 'M': 457 + opt_num_xsks = MAX_SOCKS; 458 + break; 469 459 default: 470 460 usage(basename(argv[0])); 471 461 } ··· 602 586 603 587 static void rx_drop_all(void) 604 588 { 605 - struct pollfd fds[MAX_SOCKS + 1]; 589 + struct pollfd fds[MAX_SOCKS] = {}; 606 590 int i, ret; 607 - 608 - memset(fds, 0, sizeof(fds)); 609 591 610 592 for (i = 0; i < num_socks; i++) { 611 593 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); ··· 647 633 648 634 static void tx_only_all(void) 649 635 { 650 - struct pollfd fds[MAX_SOCKS]; 636 + struct pollfd fds[MAX_SOCKS] = {}; 651 637 u32 frame_nb[MAX_SOCKS] = {}; 652 638 int i, ret; 653 639 654 - memset(fds, 0, sizeof(fds)); 655 640 for (i = 0; i < num_socks; i++) { 656 641 fds[0].fd = xsk_socket__fd(xsks[i]->xsk); 657 642 fds[0].events = POLLOUT; ··· 719 706 720 707 static void l2fwd_all(void) 721 708 { 722 - struct pollfd fds[MAX_SOCKS]; 709 + struct pollfd fds[MAX_SOCKS] = {}; 723 710 int i, ret; 724 - 725 - memset(fds, 0, sizeof(fds)); 726 711 727 712 for (i = 0; i < num_socks; i++) { 728 713 fds[i].fd = xsk_socket__fd(xsks[i]->xsk); ··· 739 728 } 740 729 } 741 730 731 + static void load_xdp_program(char **argv, struct bpf_object **obj) 732 + { 733 + struct bpf_prog_load_attr prog_load_attr = { 734 + .prog_type = BPF_PROG_TYPE_XDP, 735 + }; 736 + char xdp_filename[256]; 737 + int prog_fd; 738 + 739 + snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv[0]); 740 + prog_load_attr.file = xdp_filename; 741 + 742 + if (bpf_prog_load_xattr(&prog_load_attr, obj, &prog_fd)) 743 + exit(EXIT_FAILURE); 744 + if (prog_fd < 0) { 745 + fprintf(stderr, "ERROR: no program found: %s\n", 746 + strerror(prog_fd)); 747 + exit(EXIT_FAILURE); 748 + } 749 + 750 + if (bpf_set_link_xdp_fd(opt_ifindex, prog_fd, opt_xdp_flags) < 0) { 751 + fprintf(stderr, "ERROR: link set xdp fd failed\n"); 752 + exit(EXIT_FAILURE); 753 + } 754 + } 755 + 756 + static void enter_xsks_into_map(struct bpf_object *obj) 757 + { 758 + struct bpf_map *map; 759 + int i, xsks_map; 760 + 761 + map = bpf_object__find_map_by_name(obj, "xsks_map"); 762 + xsks_map = bpf_map__fd(map); 763 + if (xsks_map < 0) { 764 + fprintf(stderr, "ERROR: no xsks map found: %s\n", 765 + strerror(xsks_map)); 766 + exit(EXIT_FAILURE); 767 + } 768 + 769 + for (i = 0; i < num_socks; i++) { 770 + int fd = xsk_socket__fd(xsks[i]->xsk); 771 + int key, ret; 772 + 773 + key = i; 774 + ret = bpf_map_update_elem(xsks_map, &key, &fd, 0); 775 + if (ret) { 776 + fprintf(stderr, "ERROR: bpf_map_update_elem %d\n", i); 777 + exit(EXIT_FAILURE); 778 + } 779 + } 780 + } 781 + 742 782 int main(int argc, char **argv) 743 783 { 744 784 struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; 785 + bool rx = false, tx = false; 745 786 struct xsk_umem_info *umem; 787 + struct bpf_object *obj; 746 788 pthread_t pt; 789 + int i, ret; 747 790 void *bufs; 748 - int ret; 749 791 750 792 parse_command_line(argc, argv); 751 793 ··· 808 744 exit(EXIT_FAILURE); 809 745 } 810 746 747 + if (opt_num_xsks > 1) 748 + load_xdp_program(argv, &obj); 749 + 811 750 /* Reserve memory for the umem. Use hugepages if unaligned chunk mode */ 812 751 bufs = mmap(NULL, NUM_FRAMES * opt_xsk_frame_size, 813 752 PROT_READ | PROT_WRITE, ··· 819 752 printf("ERROR: mmap failed\n"); 820 753 exit(EXIT_FAILURE); 821 754 } 822 - /* Create sockets... */ 755 + 756 + /* Create sockets... */ 823 757 umem = xsk_configure_umem(bufs, NUM_FRAMES * opt_xsk_frame_size); 824 - xsks[num_socks++] = xsk_configure_socket(umem); 825 - 826 - if (opt_bench == BENCH_TXONLY) { 827 - int i; 828 - 829 - for (i = 0; i < NUM_FRAMES; i++) 830 - (void)gen_eth_frame(umem, i * opt_xsk_frame_size); 758 + if (opt_bench == BENCH_RXDROP || opt_bench == BENCH_L2FWD) { 759 + rx = true; 760 + xsk_populate_fill_ring(umem); 831 761 } 762 + if (opt_bench == BENCH_L2FWD || opt_bench == BENCH_TXONLY) 763 + tx = true; 764 + for (i = 0; i < opt_num_xsks; i++) 765 + xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); 766 + 767 + if (opt_bench == BENCH_TXONLY) 768 + for (i = 0; i < NUM_FRAMES; i++) 769 + gen_eth_frame(umem, i * opt_xsk_frame_size); 770 + 771 + if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) 772 + enter_xsks_into_map(obj); 832 773 833 774 signal(SIGINT, int_exit); 834 775 signal(SIGTERM, int_exit);

+12 -2

tools/bpf/bpf_exp.y

··· 545 545 } 546 546 } 547 547 548 + static uint8_t bpf_encode_jt_jf_offset(int off, int i) 549 + { 550 + int delta = off - i - 1; 551 + 552 + if (delta < 0 || delta > 255) 553 + fprintf(stderr, "warning: insn #%d jumps to insn #%d, " 554 + "which is out of range\n", i, off); 555 + return (uint8_t) delta; 556 + } 557 + 548 558 static void bpf_reduce_jt_jumps(void) 549 559 { 550 560 int i; ··· 562 552 for (i = 0; i < curr_instr; i++) { 563 553 if (labels_jt[i]) { 564 554 int off = bpf_find_insns_offset(labels_jt[i]); 565 - out[i].jt = (uint8_t) (off - i -1); 555 + out[i].jt = bpf_encode_jt_jf_offset(off, i); 566 556 } 567 557 } 568 558 } ··· 574 564 for (i = 0; i < curr_instr; i++) { 575 565 if (labels_jf[i]) { 576 566 int off = bpf_find_insns_offset(labels_jf[i]); 577 - out[i].jf = (uint8_t) (off - i - 1); 567 + out[i].jf = bpf_encode_jt_jf_offset(off, i); 578 568 } 579 569 } 580 570 }

+6

tools/include/uapi/linux/bpf.h

··· 201 201 BPF_CGROUP_GETSOCKOPT, 202 202 BPF_CGROUP_SETSOCKOPT, 203 203 BPF_TRACE_RAW_TP, 204 + BPF_TRACE_FENTRY, 205 + BPF_TRACE_FEXIT, 204 206 __MAX_BPF_ATTACH_TYPE 205 207 }; 206 208 ··· 348 346 /* Clone map from listener for newly accepted socket */ 349 347 #define BPF_F_CLONE (1U << 9) 350 348 349 + /* Enable memory-mapping BPF map */ 350 + #define BPF_F_MMAPABLE (1U << 10) 351 + 351 352 /* flags for BPF_PROG_QUERY */ 352 353 #define BPF_F_QUERY_EFFECTIVE (1U << 0) 353 354 ··· 428 423 __aligned_u64 line_info; /* line info */ 429 424 __u32 line_info_cnt; /* number of bpf_line_info records */ 430 425 __u32 attach_btf_id; /* in-kernel BTF type id to attach to */ 426 + __u32 attach_prog_fd; /* 0 to attach to vmlinux */ 431 427 }; 432 428 433 429 struct { /* anonymous struct used by BPF_OBJ_* commands */

+6 -4

tools/lib/bpf/bpf.c

··· 189 189 alloc_zero_tailing_info(const void *orecord, __u32 cnt, 190 190 __u32 actual_rec_size, __u32 expected_rec_size) 191 191 { 192 - __u64 info_len = actual_rec_size * cnt; 192 + __u64 info_len = (__u64)actual_rec_size * cnt; 193 193 void *info, *nrecord; 194 194 int i; 195 195 ··· 228 228 memset(&attr, 0, sizeof(attr)); 229 229 attr.prog_type = load_attr->prog_type; 230 230 attr.expected_attach_type = load_attr->expected_attach_type; 231 - if (attr.prog_type == BPF_PROG_TYPE_TRACING) 231 + if (attr.prog_type == BPF_PROG_TYPE_TRACING) { 232 232 attr.attach_btf_id = load_attr->attach_btf_id; 233 - else 233 + attr.attach_prog_fd = load_attr->attach_prog_fd; 234 + } else { 234 235 attr.prog_ifindex = load_attr->prog_ifindex; 236 + attr.kern_version = load_attr->kern_version; 237 + } 235 238 attr.insn_cnt = (__u32)load_attr->insns_cnt; 236 239 attr.insns = ptr_to_u64(load_attr->insns); 237 240 attr.license = ptr_to_u64(load_attr->license); ··· 248 245 attr.log_size = 0; 249 246 } 250 247 251 - attr.kern_version = load_attr->kern_version; 252 248 attr.prog_btf_fd = load_attr->prog_btf_fd; 253 249 attr.func_info_rec_size = load_attr->func_info_rec_size; 254 250 attr.func_info_cnt = load_attr->func_info_cnt;

+4 -1

tools/lib/bpf/bpf.h

··· 77 77 const struct bpf_insn *insns; 78 78 size_t insns_cnt; 79 79 const char *license; 80 - __u32 kern_version; 80 + union { 81 + __u32 kern_version; 82 + __u32 attach_prog_fd; 83 + }; 81 84 union { 82 85 __u32 prog_ifindex; 83 86 __u32 attach_btf_id;

+74

tools/lib/bpf/bpf_core_read.h

··· 12 12 */ 13 13 enum bpf_field_info_kind { 14 14 BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ 15 + BPF_FIELD_BYTE_SIZE = 1, 15 16 BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ 17 + BPF_FIELD_SIGNED = 3, 18 + BPF_FIELD_LSHIFT_U64 = 4, 19 + BPF_FIELD_RSHIFT_U64 = 5, 16 20 }; 21 + 22 + #define __CORE_RELO(src, field, info) \ 23 + __builtin_preserve_field_info((src)->field, BPF_FIELD_##info) 24 + 25 + #if __BYTE_ORDER == __LITTLE_ENDIAN 26 + #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ 27 + bpf_probe_read((void *)dst, \ 28 + __CORE_RELO(src, fld, BYTE_SIZE), \ 29 + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) 30 + #else 31 + /* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so 32 + * for big-endian we need to adjust destination pointer accordingly, based on 33 + * field byte size 34 + */ 35 + #define __CORE_BITFIELD_PROBE_READ(dst, src, fld) \ 36 + bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)), \ 37 + __CORE_RELO(src, fld, BYTE_SIZE), \ 38 + (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) 39 + #endif 40 + 41 + /* 42 + * Extract bitfield, identified by s->field, and return its value as u64. 43 + * All this is done in relocatable manner, so bitfield changes such as 44 + * signedness, bit size, offset changes, this will be handled automatically. 45 + * This version of macro is using bpf_probe_read() to read underlying integer 46 + * storage. Macro functions as an expression and its return type is 47 + * bpf_probe_read()'s return value: 0, on success, <0 on error. 48 + */ 49 + #define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({ \ 50 + unsigned long long val = 0; \ 51 + \ 52 + __CORE_BITFIELD_PROBE_READ(&val, s, field); \ 53 + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ 54 + if (__CORE_RELO(s, field, SIGNED)) \ 55 + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ 56 + else \ 57 + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ 58 + val; \ 59 + }) 60 + 61 + /* 62 + * Extract bitfield, identified by s->field, and return its value as u64. 63 + * This version of macro is using direct memory reads and should be used from 64 + * BPF program types that support such functionality (e.g., typed raw 65 + * tracepoints). 66 + */ 67 + #define BPF_CORE_READ_BITFIELD(s, field) ({ \ 68 + const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ 69 + unsigned long long val; \ 70 + \ 71 + switch (__CORE_RELO(s, field, BYTE_SIZE)) { \ 72 + case 1: val = *(const unsigned char *)p; \ 73 + case 2: val = *(const unsigned short *)p; \ 74 + case 4: val = *(const unsigned int *)p; \ 75 + case 8: val = *(const unsigned long long *)p; \ 76 + } \ 77 + val <<= __CORE_RELO(s, field, LSHIFT_U64); \ 78 + if (__CORE_RELO(s, field, SIGNED)) \ 79 + val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64); \ 80 + else \ 81 + val = val >> __CORE_RELO(s, field, RSHIFT_U64); \ 82 + val; \ 83 + }) 17 84 18 85 /* 19 86 * Convenience macro to check that field actually exists in target kernel's. ··· 90 23 */ 91 24 #define bpf_core_field_exists(field) \ 92 25 __builtin_preserve_field_info(field, BPF_FIELD_EXISTS) 26 + 27 + /* 28 + * Convenience macro to get byte size of a field. Works for integers, 29 + * struct/unions, pointers, arrays, and enums. 30 + */ 31 + #define bpf_core_field_size(field) \ 32 + __builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) 93 33 94 34 /* 95 35 * bpf_core_read() abstracts away bpf_probe_read() call and captures offset

+13

tools/lib/bpf/bpf_helpers.h

··· 44 44 LIBBPF_PIN_BY_NAME, 45 45 }; 46 46 47 + /* The following types should be used by BPF_PROG_TYPE_TRACING program to 48 + * access kernel function arguments. BPF trampoline and raw tracepoints 49 + * typecast arguments to 'unsigned long long'. 50 + */ 51 + typedef int __attribute__((aligned(8))) ks32; 52 + typedef char __attribute__((aligned(8))) ks8; 53 + typedef short __attribute__((aligned(8))) ks16; 54 + typedef long long __attribute__((aligned(8))) ks64; 55 + typedef unsigned int __attribute__((aligned(8))) ku32; 56 + typedef unsigned char __attribute__((aligned(8))) ku8; 57 + typedef unsigned short __attribute__((aligned(8))) ku16; 58 + typedef unsigned long long __attribute__((aligned(8))) ku64; 59 + 47 60 #endif

+7 -7

tools/lib/bpf/bpf_prog_linfo.c

··· 101 101 { 102 102 struct bpf_prog_linfo *prog_linfo; 103 103 __u32 nr_linfo, nr_jited_func; 104 + __u64 data_sz; 104 105 105 106 nr_linfo = info->nr_line_info; 106 107 ··· 123 122 /* Copy xlated line_info */ 124 123 prog_linfo->nr_linfo = nr_linfo; 125 124 prog_linfo->rec_size = info->line_info_rec_size; 126 - prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); 125 + data_sz = (__u64)nr_linfo * prog_linfo->rec_size; 126 + prog_linfo->raw_linfo = malloc(data_sz); 127 127 if (!prog_linfo->raw_linfo) 128 128 goto err_free; 129 - memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, 130 - nr_linfo * prog_linfo->rec_size); 129 + memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz); 131 130 132 131 nr_jited_func = info->nr_jited_ksyms; 133 132 if (!nr_jited_func || ··· 143 142 /* Copy jited_line_info */ 144 143 prog_linfo->nr_jited_func = nr_jited_func; 145 144 prog_linfo->jited_rec_size = info->jited_line_info_rec_size; 146 - prog_linfo->raw_jited_linfo = malloc(nr_linfo * 147 - prog_linfo->jited_rec_size); 145 + data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; 146 + prog_linfo->raw_jited_linfo = malloc(data_sz); 148 147 if (!prog_linfo->raw_jited_linfo) 149 148 goto err_free; 150 149 memcpy(prog_linfo->raw_jited_linfo, 151 - (void *)(long)info->jited_line_info, 152 - nr_linfo * prog_linfo->jited_rec_size); 150 + (void *)(long)info->jited_line_info, data_sz); 153 151 154 152 /* Number of jited_line_info per jited func */ 155 153 prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func *

+23 -2

tools/lib/bpf/btf.c

··· 269 269 t = btf__type_by_id(btf, type_id); 270 270 } 271 271 272 + done: 272 273 if (size < 0) 273 274 return -EINVAL; 274 - 275 - done: 276 275 if (nelems && size > UINT32_MAX / nelems) 277 276 return -E2BIG; 278 277 ··· 309 310 const struct btf_type *t = btf->types[i]; 310 311 const char *name = btf__name_by_offset(btf, t->name_off); 311 312 313 + if (name && !strcmp(type_name, name)) 314 + return i; 315 + } 316 + 317 + return -ENOENT; 318 + } 319 + 320 + __s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, 321 + __u32 kind) 322 + { 323 + __u32 i; 324 + 325 + if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) 326 + return 0; 327 + 328 + for (i = 1; i <= btf->nr_types; i++) { 329 + const struct btf_type *t = btf->types[i]; 330 + const char *name; 331 + 332 + if (btf_kind(t) != kind) 333 + continue; 334 + name = btf__name_by_offset(btf, t->name_off); 312 335 if (name && !strcmp(type_name, name)) 313 336 return i; 314 337 }

+2

tools/lib/bpf/btf.h

··· 72 72 LIBBPF_API int btf__load(struct btf *btf); 73 73 LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, 74 74 const char *type_name); 75 + LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, 76 + const char *type_name, __u32 kind); 75 77 LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); 76 78 LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, 77 79 __u32 id);

+366 -116

tools/lib/bpf/libbpf.c

··· 142 142 __u32 btf_func:1; 143 143 /* BTF_KIND_VAR and BTF_KIND_DATASEC support */ 144 144 __u32 btf_datasec:1; 145 + /* BPF_F_MMAPABLE is supported for arrays */ 146 + __u32 array_mmap:1; 145 147 }; 146 148 147 149 /* ··· 191 189 192 190 enum bpf_attach_type expected_attach_type; 193 191 __u32 attach_btf_id; 192 + __u32 attach_prog_fd; 194 193 void *func_info; 195 194 __u32 func_info_rec_size; 196 195 __u32 func_info_cnt; ··· 232 229 enum libbpf_map_type libbpf_type; 233 230 char *pin_path; 234 231 bool pinned; 232 + bool reused; 235 233 }; 236 234 237 235 struct bpf_secdata { ··· 859 855 pr_warn("failed to alloc map name\n"); 860 856 return -ENOMEM; 861 857 } 862 - pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n", 863 - map_name, map->sec_idx, map->sec_offset); 864 858 865 859 def = &map->def; 866 860 def->type = BPF_MAP_TYPE_ARRAY; ··· 866 864 def->value_size = data->d_size; 867 865 def->max_entries = 1; 868 866 def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; 867 + if (obj->caps.array_mmap) 868 + def->map_flags |= BPF_F_MMAPABLE; 869 + 870 + pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", 871 + map_name, map->sec_idx, map->sec_offset, def->map_flags); 872 + 869 873 if (data_buff) { 870 874 *data_buff = malloc(data->d_size); 871 875 if (!*data_buff) { ··· 964 956 pr_debug("maps in %s: %d maps in %zd bytes\n", 965 957 obj->path, nr_maps, data->d_size); 966 958 967 - map_def_sz = data->d_size / nr_maps; 968 - if (!data->d_size || (data->d_size % nr_maps) != 0) { 959 + if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { 969 960 pr_warn("unable to determine map definition size " 970 961 "section %s, %d maps in %zd bytes\n", 971 962 obj->path, nr_maps, data->d_size); 972 963 return -EINVAL; 973 964 } 965 + map_def_sz = data->d_size / nr_maps; 974 966 975 967 /* Fill obj->maps using data in "maps" section. */ 976 968 for (i = 0; i < nr_syms; i++) { ··· 1870 1862 pr_warn("incorrect bpf_call opcode\n"); 1871 1863 return -LIBBPF_ERRNO__RELOC; 1872 1864 } 1865 + if (sym.st_value % 8) { 1866 + pr_warn("bad call relo offset: %lu\n", sym.st_value); 1867 + return -LIBBPF_ERRNO__RELOC; 1868 + } 1873 1869 prog->reloc_desc[i].type = RELO_CALL; 1874 1870 prog->reloc_desc[i].insn_idx = insn_idx; 1875 - prog->reloc_desc[i].text_off = sym.st_value; 1871 + prog->reloc_desc[i].text_off = sym.st_value / 8; 1876 1872 obj->has_pseudo_calls = true; 1877 1873 continue; 1878 1874 } ··· 2007 1995 map->def.map_flags = info.map_flags; 2008 1996 map->btf_key_type_id = info.btf_key_type_id; 2009 1997 map->btf_value_type_id = info.btf_value_type_id; 1998 + map->reused = true; 2010 1999 2011 2000 return 0; 2012 2001 ··· 2171 2158 return 0; 2172 2159 } 2173 2160 2161 + static int bpf_object__probe_array_mmap(struct bpf_object *obj) 2162 + { 2163 + struct bpf_create_map_attr attr = { 2164 + .map_type = BPF_MAP_TYPE_ARRAY, 2165 + .map_flags = BPF_F_MMAPABLE, 2166 + .key_size = sizeof(int), 2167 + .value_size = sizeof(int), 2168 + .max_entries = 1, 2169 + }; 2170 + int fd; 2171 + 2172 + fd = bpf_create_map_xattr(&attr); 2173 + if (fd >= 0) { 2174 + obj->caps.array_mmap = 1; 2175 + close(fd); 2176 + return 1; 2177 + } 2178 + 2179 + return 0; 2180 + } 2181 + 2174 2182 static int 2175 2183 bpf_object__probe_caps(struct bpf_object *obj) 2176 2184 { ··· 2200 2166 bpf_object__probe_global_data, 2201 2167 bpf_object__probe_btf_func, 2202 2168 bpf_object__probe_btf_datasec, 2169 + bpf_object__probe_array_mmap, 2203 2170 }; 2204 2171 int i, ret; 2205 2172 ··· 2505 2470 int raw_spec[BPF_CORE_SPEC_MAX_LEN]; 2506 2471 /* raw spec length */ 2507 2472 int raw_len; 2508 - /* field byte offset represented by spec */ 2509 - __u32 offset; 2473 + /* field bit offset represented by spec */ 2474 + __u32 bit_offset; 2510 2475 }; 2511 2476 2512 2477 static bool str_is_empty(const char *s) ··· 2517 2482 /* 2518 2483 * Turn bpf_field_reloc into a low- and high-level spec representation, 2519 2484 * validating correctness along the way, as well as calculating resulting 2520 - * field offset (in bytes), specified by accessor string. Low-level spec 2521 - * captures every single level of nestedness, including traversing anonymous 2485 + * field bit offset, specified by accessor string. Low-level spec captures 2486 + * every single level of nestedness, including traversing anonymous 2522 2487 * struct/union members. High-level one only captures semantically meaningful 2523 2488 * "turning points": named fields and array indicies. 2524 2489 * E.g., for this case: ··· 2590 2555 sz = btf__resolve_size(btf, id); 2591 2556 if (sz < 0) 2592 2557 return sz; 2593 - spec->offset = access_idx * sz; 2558 + spec->bit_offset = access_idx * sz * 8; 2594 2559 2595 2560 for (i = 1; i < spec->raw_len; i++) { 2596 2561 t = skip_mods_and_typedefs(btf, id, &id); ··· 2601 2566 2602 2567 if (btf_is_composite(t)) { 2603 2568 const struct btf_member *m; 2604 - __u32 offset; 2569 + __u32 bit_offset; 2605 2570 2606 2571 if (access_idx >= btf_vlen(t)) 2607 2572 return -EINVAL; 2608 - if (btf_member_bitfield_size(t, access_idx)) 2609 - return -EINVAL; 2610 2573 2611 - offset = btf_member_bit_offset(t, access_idx); 2612 - if (offset % 8) 2613 - return -EINVAL; 2614 - spec->offset += offset / 8; 2574 + bit_offset = btf_member_bit_offset(t, access_idx); 2575 + spec->bit_offset += bit_offset; 2615 2576 2616 2577 m = btf_members(t) + access_idx; 2617 2578 if (m->name_off) { ··· 2636 2605 sz = btf__resolve_size(btf, id); 2637 2606 if (sz < 0) 2638 2607 return sz; 2639 - spec->offset += access_idx * sz; 2608 + spec->bit_offset += access_idx * sz * 8; 2640 2609 } else { 2641 2610 pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", 2642 2611 type_id, spec_str, i, id, btf_kind(t)); ··· 2737 2706 } 2738 2707 2739 2708 /* Check two types for compatibility, skipping const/volatile/restrict and 2740 - * typedefs, to ensure we are relocating offset to the compatible entities: 2709 + * typedefs, to ensure we are relocating compatible entities: 2741 2710 * - any two STRUCTs/UNIONs are compatible and can be mixed; 2742 - * - any two FWDs are compatible; 2711 + * - any two FWDs are compatible, if their names match (modulo flavor suffix); 2743 2712 * - any two PTRs are always compatible; 2713 + * - for ENUMs, names should be the same (ignoring flavor suffix) or at 2714 + * least one of enums should be anonymous; 2744 2715 * - for ENUMs, check sizes, names are ignored; 2745 - * - for INT, size and bitness should match, signedness is ignored; 2716 + * - for INT, size and signedness are ignored; 2746 2717 * - for ARRAY, dimensionality is ignored, element types are checked for 2747 2718 * compatibility recursively; 2748 2719 * - everything else shouldn't be ever a target of relocation. ··· 2770 2737 return 0; 2771 2738 2772 2739 switch (btf_kind(local_type)) { 2773 - case BTF_KIND_FWD: 2774 2740 case BTF_KIND_PTR: 2775 2741 return 1; 2776 - case BTF_KIND_ENUM: 2777 - return local_type->size == targ_type->size; 2742 + case BTF_KIND_FWD: 2743 + case BTF_KIND_ENUM: { 2744 + const char *local_name, *targ_name; 2745 + size_t local_len, targ_len; 2746 + 2747 + local_name = btf__name_by_offset(local_btf, 2748 + local_type->name_off); 2749 + targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); 2750 + local_len = bpf_core_essential_name_len(local_name); 2751 + targ_len = bpf_core_essential_name_len(targ_name); 2752 + /* one of them is anonymous or both w/ same flavor-less names */ 2753 + return local_len == 0 || targ_len == 0 || 2754 + (local_len == targ_len && 2755 + strncmp(local_name, targ_name, local_len) == 0); 2756 + } 2778 2757 case BTF_KIND_INT: 2758 + /* just reject deprecated bitfield-like integers; all other 2759 + * integers are by default compatible between each other 2760 + */ 2779 2761 return btf_int_offset(local_type) == 0 && 2780 - btf_int_offset(targ_type) == 0 && 2781 - local_type->size == targ_type->size && 2782 - btf_int_bits(local_type) == btf_int_bits(targ_type); 2762 + btf_int_offset(targ_type) == 0; 2783 2763 case BTF_KIND_ARRAY: 2784 2764 local_id = btf_array(local_type)->type; 2785 2765 targ_id = btf_array(targ_type)->type; ··· 2808 2762 * Given single high-level named field accessor in local type, find 2809 2763 * corresponding high-level accessor for a target type. Along the way, 2810 2764 * maintain low-level spec for target as well. Also keep updating target 2811 - * offset. 2765 + * bit offset. 2812 2766 * 2813 2767 * Searching is performed through recursive exhaustive enumeration of all 2814 2768 * fields of a struct/union. If there are any anonymous (embedded) ··· 2847 2801 n = btf_vlen(targ_type); 2848 2802 m = btf_members(targ_type); 2849 2803 for (i = 0; i < n; i++, m++) { 2850 - __u32 offset; 2804 + __u32 bit_offset; 2851 2805 2852 - /* bitfield relocations not supported */ 2853 - if (btf_member_bitfield_size(targ_type, i)) 2854 - continue; 2855 - offset = btf_member_bit_offset(targ_type, i); 2856 - if (offset % 8) 2857 - continue; 2806 + bit_offset = btf_member_bit_offset(targ_type, i); 2858 2807 2859 2808 /* too deep struct/union/array nesting */ 2860 2809 if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN) 2861 2810 return -E2BIG; 2862 2811 2863 2812 /* speculate this member will be the good one */ 2864 - spec->offset += offset / 8; 2813 + spec->bit_offset += bit_offset; 2865 2814 spec->raw_spec[spec->raw_len++] = i; 2866 2815 2867 2816 targ_name = btf__name_by_offset(targ_btf, m->name_off); ··· 2885 2844 return found; 2886 2845 } 2887 2846 /* member turned out not to be what we looked for */ 2888 - spec->offset -= offset / 8; 2847 + spec->bit_offset -= bit_offset; 2889 2848 spec->raw_len--; 2890 2849 } 2891 2850 ··· 2894 2853 2895 2854 /* 2896 2855 * Try to match local spec to a target type and, if successful, produce full 2897 - * target spec (high-level, low-level + offset). 2856 + * target spec (high-level, low-level + bit offset). 2898 2857 */ 2899 2858 static int bpf_core_spec_match(struct bpf_core_spec *local_spec, 2900 2859 const struct btf *targ_btf, __u32 targ_id, ··· 2957 2916 sz = btf__resolve_size(targ_btf, targ_id); 2958 2917 if (sz < 0) 2959 2918 return sz; 2960 - targ_spec->offset += local_acc->idx * sz; 2919 + targ_spec->bit_offset += local_acc->idx * sz * 8; 2961 2920 } 2962 2921 } 2963 2922 2964 2923 return 1; 2924 + } 2925 + 2926 + static int bpf_core_calc_field_relo(const struct bpf_program *prog, 2927 + const struct bpf_field_reloc *relo, 2928 + const struct bpf_core_spec *spec, 2929 + __u32 *val, bool *validate) 2930 + { 2931 + const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; 2932 + const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); 2933 + __u32 byte_off, byte_sz, bit_off, bit_sz; 2934 + const struct btf_member *m; 2935 + const struct btf_type *mt; 2936 + bool bitfield; 2937 + __s64 sz; 2938 + 2939 + /* a[n] accessor needs special handling */ 2940 + if (!acc->name) { 2941 + if (relo->kind == BPF_FIELD_BYTE_OFFSET) { 2942 + *val = spec->bit_offset / 8; 2943 + } else if (relo->kind == BPF_FIELD_BYTE_SIZE) { 2944 + sz = btf__resolve_size(spec->btf, acc->type_id); 2945 + if (sz < 0) 2946 + return -EINVAL; 2947 + *val = sz; 2948 + } else { 2949 + pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", 2950 + bpf_program__title(prog, false), 2951 + relo->kind, relo->insn_off / 8); 2952 + return -EINVAL; 2953 + } 2954 + if (validate) 2955 + *validate = true; 2956 + return 0; 2957 + } 2958 + 2959 + m = btf_members(t) + acc->idx; 2960 + mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); 2961 + bit_off = spec->bit_offset; 2962 + bit_sz = btf_member_bitfield_size(t, acc->idx); 2963 + 2964 + bitfield = bit_sz > 0; 2965 + if (bitfield) { 2966 + byte_sz = mt->size; 2967 + byte_off = bit_off / 8 / byte_sz * byte_sz; 2968 + /* figure out smallest int size necessary for bitfield load */ 2969 + while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { 2970 + if (byte_sz >= 8) { 2971 + /* bitfield can't be read with 64-bit read */ 2972 + pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", 2973 + bpf_program__title(prog, false), 2974 + relo->kind, relo->insn_off / 8); 2975 + return -E2BIG; 2976 + } 2977 + byte_sz *= 2; 2978 + byte_off = bit_off / 8 / byte_sz * byte_sz; 2979 + } 2980 + } else { 2981 + sz = btf__resolve_size(spec->btf, m->type); 2982 + if (sz < 0) 2983 + return -EINVAL; 2984 + byte_sz = sz; 2985 + byte_off = spec->bit_offset / 8; 2986 + bit_sz = byte_sz * 8; 2987 + } 2988 + 2989 + /* for bitfields, all the relocatable aspects are ambiguous and we 2990 + * might disagree with compiler, so turn off validation of expected 2991 + * value, except for signedness 2992 + */ 2993 + if (validate) 2994 + *validate = !bitfield; 2995 + 2996 + switch (relo->kind) { 2997 + case BPF_FIELD_BYTE_OFFSET: 2998 + *val = byte_off; 2999 + break; 3000 + case BPF_FIELD_BYTE_SIZE: 3001 + *val = byte_sz; 3002 + break; 3003 + case BPF_FIELD_SIGNED: 3004 + /* enums will be assumed unsigned */ 3005 + *val = btf_is_enum(mt) || 3006 + (btf_int_encoding(mt) & BTF_INT_SIGNED); 3007 + if (validate) 3008 + *validate = true; /* signedness is never ambiguous */ 3009 + break; 3010 + case BPF_FIELD_LSHIFT_U64: 3011 + #if __BYTE_ORDER == __LITTLE_ENDIAN 3012 + *val = 64 - (bit_off + bit_sz - byte_off * 8); 3013 + #else 3014 + *val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); 3015 + #endif 3016 + break; 3017 + case BPF_FIELD_RSHIFT_U64: 3018 + *val = 64 - bit_sz; 3019 + if (validate) 3020 + *validate = true; /* right shift is never ambiguous */ 3021 + break; 3022 + case BPF_FIELD_EXISTS: 3023 + default: 3024 + pr_warn("prog '%s': unknown relo %d at insn #%d\n", 3025 + bpf_program__title(prog, false), 3026 + relo->kind, relo->insn_off / 8); 3027 + return -EINVAL; 3028 + } 3029 + 3030 + return 0; 2965 3031 } 2966 3032 2967 3033 /* ··· 3090 2942 const struct bpf_core_spec *local_spec, 3091 2943 const struct bpf_core_spec *targ_spec) 3092 2944 { 2945 + bool failed = false, validate = true; 3093 2946 __u32 orig_val, new_val; 3094 2947 struct bpf_insn *insn; 3095 - int insn_idx; 2948 + int insn_idx, err; 3096 2949 __u8 class; 3097 2950 3098 2951 if (relo->insn_off % sizeof(struct bpf_insn)) 3099 2952 return -EINVAL; 3100 2953 insn_idx = relo->insn_off / sizeof(struct bpf_insn); 3101 2954 3102 - switch (relo->kind) { 3103 - case BPF_FIELD_BYTE_OFFSET: 3104 - orig_val = local_spec->offset; 3105 - if (targ_spec) { 3106 - new_val = targ_spec->offset; 3107 - } else { 3108 - pr_warn("prog '%s': patching insn #%d w/ failed reloc, imm %d -> %d\n", 3109 - bpf_program__title(prog, false), insn_idx, 3110 - orig_val, -1); 3111 - new_val = (__u32)-1; 3112 - } 3113 - break; 3114 - case BPF_FIELD_EXISTS: 2955 + if (relo->kind == BPF_FIELD_EXISTS) { 3115 2956 orig_val = 1; /* can't generate EXISTS relo w/o local field */ 3116 2957 new_val = targ_spec ? 1 : 0; 3117 - break; 3118 - default: 3119 - pr_warn("prog '%s': unknown relo %d at insn #%d'\n", 3120 - bpf_program__title(prog, false), 3121 - relo->kind, insn_idx); 3122 - return -EINVAL; 2958 + } else if (!targ_spec) { 2959 + failed = true; 2960 + new_val = (__u32)-1; 2961 + } else { 2962 + err = bpf_core_calc_field_relo(prog, relo, local_spec, 2963 + &orig_val, &validate); 2964 + if (err) 2965 + return err; 2966 + err = bpf_core_calc_field_relo(prog, relo, targ_spec, 2967 + &new_val, NULL); 2968 + if (err) 2969 + return err; 3123 2970 } 3124 2971 3125 2972 insn = &prog->insns[insn_idx]; ··· 3123 2980 if (class == BPF_ALU || class == BPF_ALU64) { 3124 2981 if (BPF_SRC(insn->code) != BPF_K) 3125 2982 return -EINVAL; 3126 - if (insn->imm != orig_val) 2983 + if (!failed && validate && insn->imm != orig_val) { 2984 + pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", 2985 + bpf_program__title(prog, false), insn_idx, 2986 + insn->imm, orig_val, new_val); 3127 2987 return -EINVAL; 2988 + } 2989 + orig_val = insn->imm; 3128 2990 insn->imm = new_val; 3129 - pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", 3130 - bpf_program__title(prog, false), 3131 - insn_idx, orig_val, new_val); 2991 + pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", 2992 + bpf_program__title(prog, false), insn_idx, 2993 + failed ? " w/ failed reloc" : "", orig_val, new_val); 3132 2994 } else { 3133 2995 pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", 3134 2996 bpf_program__title(prog, false), ··· 3251 3103 libbpf_print(level, "%d%s", spec->raw_spec[i], 3252 3104 i == spec->raw_len - 1 ? " => " : ":"); 3253 3105 3254 - libbpf_print(level, "%u @ &x", spec->offset); 3106 + libbpf_print(level, "%u.%u @ &x", 3107 + spec->bit_offset / 8, spec->bit_offset % 8); 3255 3108 3256 3109 for (i = 0; i < spec->len; i++) { 3257 3110 if (spec->spec[i].name) ··· 3366 3217 return -EINVAL; 3367 3218 } 3368 3219 3369 - pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); 3220 + pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, 3221 + relo->kind); 3370 3222 bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); 3371 3223 libbpf_print(LIBBPF_DEBUG, "\n"); 3372 3224 ··· 3407 3257 3408 3258 if (j == 0) { 3409 3259 targ_spec = cand_spec; 3410 - } else if (cand_spec.offset != targ_spec.offset) { 3260 + } else if (cand_spec.bit_offset != targ_spec.bit_offset) { 3411 3261 /* if there are many candidates, they should all 3412 - * resolve to the same offset 3262 + * resolve to the same bit offset 3413 3263 */ 3414 3264 pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", 3415 - prog_name, relo_idx, cand_spec.offset, 3416 - targ_spec.offset); 3265 + prog_name, relo_idx, cand_spec.bit_offset, 3266 + targ_spec.bit_offset); 3417 3267 return -EINVAL; 3418 3268 } 3419 3269 ··· 3558 3408 pr_warn("oom in prog realloc\n"); 3559 3409 return -ENOMEM; 3560 3410 } 3411 + prog->insns = new_insn; 3561 3412 3562 3413 if (obj->btf_ext) { 3563 3414 err = bpf_program_reloc_btf_ext(prog, obj, ··· 3570 3419 3571 3420 memcpy(new_insn + prog->insns_cnt, text->insns, 3572 3421 text->insns_cnt * sizeof(*insn)); 3573 - prog->insns = new_insn; 3574 3422 prog->main_prog_cnt = prog->insns_cnt; 3575 3423 prog->insns_cnt = new_cnt; 3576 3424 pr_debug("added %zd insn from %s to prog %s\n", ··· 3577 3427 prog->section_name); 3578 3428 } 3579 3429 insn = &prog->insns[relo->insn_idx]; 3580 - insn->imm += prog->main_prog_cnt - relo->insn_idx; 3430 + insn->imm += relo->text_off + prog->main_prog_cnt - relo->insn_idx; 3581 3431 return 0; 3582 3432 } 3583 3433 ··· 3716 3566 load_attr.insns = insns; 3717 3567 load_attr.insns_cnt = insns_cnt; 3718 3568 load_attr.license = license; 3719 - load_attr.kern_version = kern_version; 3720 - load_attr.prog_ifindex = prog->prog_ifindex; 3569 + if (prog->type == BPF_PROG_TYPE_TRACING) { 3570 + load_attr.attach_prog_fd = prog->attach_prog_fd; 3571 + load_attr.attach_btf_id = prog->attach_btf_id; 3572 + } else { 3573 + load_attr.kern_version = kern_version; 3574 + load_attr.prog_ifindex = prog->prog_ifindex; 3575 + } 3721 3576 /* if .BTF.ext was loaded, kernel supports associated BTF for prog */ 3722 3577 if (prog->obj->btf_ext) 3723 3578 btf_fd = bpf_object__btf_fd(prog->obj); ··· 3737 3582 load_attr.line_info_cnt = prog->line_info_cnt; 3738 3583 load_attr.log_level = prog->log_level; 3739 3584 load_attr.prog_flags = prog->prog_flags; 3740 - load_attr.attach_btf_id = prog->attach_btf_id; 3741 3585 3742 3586 retry_load: 3743 3587 log_buf = malloc(log_buf_size); ··· 3758 3604 free(log_buf); 3759 3605 goto retry_load; 3760 3606 } 3761 - ret = -LIBBPF_ERRNO__LOAD; 3607 + ret = -errno; 3762 3608 cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); 3763 3609 pr_warn("load bpf program failed: %s\n", cp); 3764 3610 ··· 3771 3617 pr_warn("Program too large (%zu insns), at most %d insns\n", 3772 3618 load_attr.insns_cnt, BPF_MAXINSNS); 3773 3619 ret = -LIBBPF_ERRNO__PROG2BIG; 3774 - } else { 3620 + } else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { 3775 3621 /* Wrong program type? */ 3776 - if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { 3777 - int fd; 3622 + int fd; 3778 3623 3779 - load_attr.prog_type = BPF_PROG_TYPE_KPROBE; 3780 - load_attr.expected_attach_type = 0; 3781 - fd = bpf_load_program_xattr(&load_attr, NULL, 0); 3782 - if (fd >= 0) { 3783 - close(fd); 3784 - ret = -LIBBPF_ERRNO__PROGTYPE; 3785 - goto out; 3786 - } 3624 + load_attr.prog_type = BPF_PROG_TYPE_KPROBE; 3625 + load_attr.expected_attach_type = 0; 3626 + fd = bpf_load_program_xattr(&load_attr, NULL, 0); 3627 + if (fd >= 0) { 3628 + close(fd); 3629 + ret = -LIBBPF_ERRNO__PROGTYPE; 3630 + goto out; 3787 3631 } 3788 - 3789 - if (log_buf) 3790 - ret = -LIBBPF_ERRNO__KVER; 3791 3632 } 3792 3633 3793 3634 out: ··· 3893 3744 return 0; 3894 3745 } 3895 3746 3896 - static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id); 3897 - 3747 + static int libbpf_find_attach_btf_id(const char *name, 3748 + enum bpf_attach_type attach_type, 3749 + __u32 attach_prog_fd); 3898 3750 static struct bpf_object * 3899 3751 __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, 3900 3752 struct bpf_object_open_opts *opts) ··· 3906 3756 const char *obj_name; 3907 3757 char tmp_name[64]; 3908 3758 bool relaxed_maps; 3759 + __u32 attach_prog_fd; 3909 3760 int err; 3910 3761 3911 3762 if (elf_version(EV_CURRENT) == EV_NONE) { ··· 3937 3786 obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); 3938 3787 relaxed_maps = OPTS_GET(opts, relaxed_maps, false); 3939 3788 pin_root_path = OPTS_GET(opts, pin_root_path, NULL); 3789 + attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); 3940 3790 3941 3791 CHECK_ERR(bpf_object__elf_init(obj), err, out); 3942 3792 CHECK_ERR(bpf_object__check_endianness(obj), err, out); ··· 3950 3798 bpf_object__for_each_program(prog, obj) { 3951 3799 enum bpf_prog_type prog_type; 3952 3800 enum bpf_attach_type attach_type; 3953 - __u32 btf_id; 3954 3801 3955 3802 err = libbpf_prog_type_by_name(prog->section_name, &prog_type, 3956 3803 &attach_type); ··· 3962 3811 bpf_program__set_type(prog, prog_type); 3963 3812 bpf_program__set_expected_attach_type(prog, attach_type); 3964 3813 if (prog_type == BPF_PROG_TYPE_TRACING) { 3965 - err = libbpf_attach_btf_id_by_name(prog->section_name, &btf_id); 3966 - if (err) 3814 + err = libbpf_find_attach_btf_id(prog->section_name, 3815 + attach_type, 3816 + attach_prog_fd); 3817 + if (err <= 0) 3967 3818 goto out; 3968 - prog->attach_btf_id = btf_id; 3819 + prog->attach_btf_id = err; 3820 + prog->attach_prog_fd = attach_prog_fd; 3969 3821 } 3970 3822 } 3971 3823 ··· 4065 3911 int bpf_object__load_xattr(struct bpf_object_load_attr *attr) 4066 3912 { 4067 3913 struct bpf_object *obj; 4068 - int err; 3914 + int err, i; 4069 3915 4070 3916 if (!attr) 4071 3917 return -EINVAL; ··· 4086 3932 4087 3933 return 0; 4088 3934 out: 3935 + /* unpin any maps that were auto-pinned during load */ 3936 + for (i = 0; i < obj->nr_maps; i++) 3937 + if (obj->maps[i].pinned && !obj->maps[i].reused) 3938 + bpf_map__unpin(&obj->maps[i], NULL); 3939 + 4089 3940 bpf_object__unload(obj); 4090 3941 pr_warn("failed to load object '%s'\n", obj->path); 4091 3942 return err; ··· 4824 4665 return bpf_program__nth_fd(prog, 0); 4825 4666 } 4826 4667 4668 + size_t bpf_program__size(const struct bpf_program *prog) 4669 + { 4670 + return prog->insns_cnt * sizeof(struct bpf_insn); 4671 + } 4672 + 4827 4673 int bpf_program__set_prep(struct bpf_program *prog, int nr_instances, 4828 4674 bpf_program_prep_t prep) 4829 4675 { ··· 4977 4813 BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), 4978 4814 BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, 4979 4815 BPF_TRACE_RAW_TP), 4816 + BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, 4817 + BPF_TRACE_FENTRY), 4818 + BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, 4819 + BPF_TRACE_FEXIT), 4980 4820 BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), 4981 4821 BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), 4982 4822 BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), ··· 5098 4930 } 5099 4931 5100 4932 #define BTF_PREFIX "btf_trace_" 5101 - static int libbpf_attach_btf_id_by_name(const char *name, __u32 *btf_id) 4933 + int libbpf_find_vmlinux_btf_id(const char *name, 4934 + enum bpf_attach_type attach_type) 5102 4935 { 5103 4936 struct btf *btf = bpf_core_find_kernel_btf(); 5104 - char raw_tp_btf_name[128] = BTF_PREFIX; 5105 - char *dst = raw_tp_btf_name + sizeof(BTF_PREFIX) - 1; 5106 - int ret, i, err = -EINVAL; 4937 + char raw_tp_btf[128] = BTF_PREFIX; 4938 + char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; 4939 + const char *btf_name; 4940 + int err = -EINVAL; 4941 + u32 kind; 5107 4942 5108 4943 if (IS_ERR(btf)) { 5109 4944 pr_warn("vmlinux BTF is not found\n"); 5110 4945 return -EINVAL; 5111 4946 } 5112 4947 5113 - if (!name) 4948 + if (attach_type == BPF_TRACE_RAW_TP) { 4949 + /* prepend "btf_trace_" prefix per kernel convention */ 4950 + strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); 4951 + btf_name = raw_tp_btf; 4952 + kind = BTF_KIND_TYPEDEF; 4953 + } else { 4954 + btf_name = name; 4955 + kind = BTF_KIND_FUNC; 4956 + } 4957 + err = btf__find_by_name_kind(btf, btf_name, kind); 4958 + btf__free(btf); 4959 + return err; 4960 + } 4961 + 4962 + static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) 4963 + { 4964 + struct bpf_prog_info_linear *info_linear; 4965 + struct bpf_prog_info *info; 4966 + struct btf *btf = NULL; 4967 + int err = -EINVAL; 4968 + 4969 + info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); 4970 + if (IS_ERR_OR_NULL(info_linear)) { 4971 + pr_warn("failed get_prog_info_linear for FD %d\n", 4972 + attach_prog_fd); 4973 + return -EINVAL; 4974 + } 4975 + info = &info_linear->info; 4976 + if (!info->btf_id) { 4977 + pr_warn("The target program doesn't have BTF\n"); 5114 4978 goto out; 4979 + } 4980 + if (btf__get_from_id(info->btf_id, &btf)) { 4981 + pr_warn("Failed to get BTF of the program\n"); 4982 + goto out; 4983 + } 4984 + err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); 4985 + btf__free(btf); 4986 + if (err <= 0) { 4987 + pr_warn("%s is not found in prog's BTF\n", name); 4988 + goto out; 4989 + } 4990 + out: 4991 + free(info_linear); 4992 + return err; 4993 + } 4994 + 4995 + static int libbpf_find_attach_btf_id(const char *name, 4996 + enum bpf_attach_type attach_type, 4997 + __u32 attach_prog_fd) 4998 + { 4999 + int i, err; 5000 + 5001 + if (!name) 5002 + return -EINVAL; 5115 5003 5116 5004 for (i = 0; i < ARRAY_SIZE(section_names); i++) { 5117 5005 if (!section_names[i].is_attach_btf) 5118 5006 continue; 5119 5007 if (strncmp(name, section_names[i].sec, section_names[i].len)) 5120 5008 continue; 5121 - /* prepend "btf_trace_" prefix per kernel convention */ 5122 - strncat(dst, name + section_names[i].len, 5123 - sizeof(raw_tp_btf_name) - sizeof(BTF_PREFIX)); 5124 - ret = btf__find_by_name(btf, raw_tp_btf_name); 5125 - if (ret <= 0) { 5126 - pr_warn("%s is not found in vmlinux BTF\n", dst); 5127 - goto out; 5128 - } 5129 - *btf_id = ret; 5130 - err = 0; 5131 - goto out; 5009 + if (attach_prog_fd) 5010 + err = libbpf_find_prog_btf_id(name + section_names[i].len, 5011 + attach_prog_fd); 5012 + else 5013 + err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, 5014 + attach_type); 5015 + if (err <= 0) 5016 + pr_warn("%s is not found in vmlinux BTF\n", name); 5017 + return err; 5132 5018 } 5133 5019 pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); 5134 - err = -ESRCH; 5135 - out: 5136 - btf__free(btf); 5137 - return err; 5020 + return -ESRCH; 5138 5021 } 5139 5022 5140 5023 int libbpf_attach_type_by_name(const char *name, ··· 5806 5587 free(link); 5807 5588 pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", 5808 5589 bpf_program__title(prog, false), tp_name, 5590 + libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 5591 + return ERR_PTR(pfd); 5592 + } 5593 + link->fd = pfd; 5594 + return (struct bpf_link *)link; 5595 + } 5596 + 5597 + struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) 5598 + { 5599 + char errmsg[STRERR_BUFSIZE]; 5600 + struct bpf_link_fd *link; 5601 + int prog_fd, pfd; 5602 + 5603 + prog_fd = bpf_program__fd(prog); 5604 + if (prog_fd < 0) { 5605 + pr_warn("program '%s': can't attach before loaded\n", 5606 + bpf_program__title(prog, false)); 5607 + return ERR_PTR(-EINVAL); 5608 + } 5609 + 5610 + link = malloc(sizeof(*link)); 5611 + if (!link) 5612 + return ERR_PTR(-ENOMEM); 5613 + link->link.destroy = &bpf_link__destroy_fd; 5614 + 5615 + pfd = bpf_raw_tracepoint_open(NULL, prog_fd); 5616 + if (pfd < 0) { 5617 + pfd = -errno; 5618 + free(link); 5619 + pr_warn("program '%s': failed to attach to trace: %s\n", 5620 + bpf_program__title(prog, false), 5809 5621 libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); 5810 5622 return ERR_PTR(pfd); 5811 5623 }

+19 -1

tools/lib/bpf/libbpf.h

··· 108 108 * auto-pinned to that path on load; defaults to "/sys/fs/bpf". 109 109 */ 110 110 const char *pin_root_path; 111 + __u32 attach_prog_fd; 111 112 }; 112 - #define bpf_object_open_opts__last_field pin_root_path 113 + #define bpf_object_open_opts__last_field attach_prog_fd 113 114 114 115 LIBBPF_API struct bpf_object *bpf_object__open(const char *path); 115 116 LIBBPF_API struct bpf_object * ··· 189 188 enum bpf_attach_type *expected_attach_type); 190 189 LIBBPF_API int libbpf_attach_type_by_name(const char *name, 191 190 enum bpf_attach_type *attach_type); 191 + LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, 192 + enum bpf_attach_type attach_type); 192 193 193 194 /* Accessors of bpf_program */ 194 195 struct bpf_program; ··· 216 213 217 214 LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, 218 215 bool needs_copy); 216 + 217 + /* returns program size in bytes */ 218 + LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); 219 219 220 220 LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, 221 221 __u32 kern_version); ··· 254 248 bpf_program__attach_raw_tracepoint(struct bpf_program *prog, 255 249 const char *tp_name); 256 250 251 + LIBBPF_API struct bpf_link * 252 + bpf_program__attach_trace(struct bpf_program *prog); 257 253 struct bpf_insn; 258 254 259 255 /* ··· 435 427 LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, 436 428 struct bpf_object **pobj, int *prog_fd); 437 429 430 + struct xdp_link_info { 431 + __u32 prog_id; 432 + __u32 drv_prog_id; 433 + __u32 hw_prog_id; 434 + __u32 skb_prog_id; 435 + __u8 attach_mode; 436 + }; 437 + 438 438 LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); 439 439 LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); 440 + LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, 441 + size_t info_size, __u32 flags); 440 442 441 443 struct perf_buffer; 442 444

+5

tools/lib/bpf/libbpf.map

··· 193 193 194 194 LIBBPF_0.0.6 { 195 195 global: 196 + bpf_get_link_xdp_info; 196 197 bpf_map__get_pin_path; 197 198 bpf_map__is_pinned; 198 199 bpf_map__set_pin_path; 199 200 bpf_object__open_file; 200 201 bpf_object__open_mem; 202 + bpf_program__attach_trace; 201 203 bpf_program__get_expected_attach_type; 202 204 bpf_program__get_type; 203 205 bpf_program__is_tracing; 204 206 bpf_program__set_tracing; 207 + bpf_program__size; 208 + btf__find_by_name_kind; 209 + libbpf_find_vmlinux_btf_id; 205 210 } LIBBPF_0.0.5;

+4

tools/lib/bpf/libbpf_internal.h

··· 158 158 */ 159 159 enum bpf_field_info_kind { 160 160 BPF_FIELD_BYTE_OFFSET = 0, /* field byte offset */ 161 + BPF_FIELD_BYTE_SIZE = 1, 161 162 BPF_FIELD_EXISTS = 2, /* field existence in target kernel */ 163 + BPF_FIELD_SIGNED = 3, 164 + BPF_FIELD_LSHIFT_U64 = 4, 165 + BPF_FIELD_RSHIFT_U64 = 5, 162 166 }; 163 167 164 168 /* The minimum bpf_field_reloc checked by the loader

+58 -29

tools/lib/bpf/netlink.c

··· 12 12 13 13 #include "bpf.h" 14 14 #include "libbpf.h" 15 + #include "libbpf_internal.h" 15 16 #include "nlattr.h" 16 17 17 18 #ifndef SOL_NETLINK ··· 25 24 struct xdp_id_md { 26 25 int ifindex; 27 26 __u32 flags; 28 - __u32 id; 27 + struct xdp_link_info info; 29 28 }; 30 29 31 30 int libbpf_netlink_open(__u32 *nl_pid) ··· 44 43 45 44 if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK, 46 45 &one, sizeof(one)) < 0) { 47 - fprintf(stderr, "Netlink error reporting not supported\n"); 46 + pr_warn("Netlink error reporting not supported\n"); 48 47 } 49 48 50 49 if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { ··· 203 202 return dump_link_nlmsg(cookie, ifi, tb); 204 203 } 205 204 206 - static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) 207 - { 208 - if (mode != XDP_ATTACHED_MULTI) 209 - return IFLA_XDP_PROG_ID; 210 - if (flags & XDP_FLAGS_DRV_MODE) 211 - return IFLA_XDP_DRV_PROG_ID; 212 - if (flags & XDP_FLAGS_HW_MODE) 213 - return IFLA_XDP_HW_PROG_ID; 214 - if (flags & XDP_FLAGS_SKB_MODE) 215 - return IFLA_XDP_SKB_PROG_ID; 216 - 217 - return IFLA_XDP_UNSPEC; 218 - } 219 - 220 - static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) 205 + static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb) 221 206 { 222 207 struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; 223 208 struct xdp_id_md *xdp_id = cookie; 224 209 struct ifinfomsg *ifinfo = msg; 225 - unsigned char mode, xdp_attr; 226 210 int ret; 227 211 228 212 if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) ··· 223 237 if (!xdp_tb[IFLA_XDP_ATTACHED]) 224 238 return 0; 225 239 226 - mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); 227 - if (mode == XDP_ATTACHED_NONE) 240 + xdp_id->info.attach_mode = libbpf_nla_getattr_u8( 241 + xdp_tb[IFLA_XDP_ATTACHED]); 242 + 243 + if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE) 228 244 return 0; 229 245 230 - xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); 231 - if (!xdp_attr || !xdp_tb[xdp_attr]) 232 - return 0; 246 + if (xdp_tb[IFLA_XDP_PROG_ID]) 247 + xdp_id->info.prog_id = libbpf_nla_getattr_u32( 248 + xdp_tb[IFLA_XDP_PROG_ID]); 233 249 234 - xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); 250 + if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) 251 + xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( 252 + xdp_tb[IFLA_XDP_SKB_PROG_ID]); 253 + 254 + if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) 255 + xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( 256 + xdp_tb[IFLA_XDP_DRV_PROG_ID]); 257 + 258 + if (xdp_tb[IFLA_XDP_HW_PROG_ID]) 259 + xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( 260 + xdp_tb[IFLA_XDP_HW_PROG_ID]); 235 261 236 262 return 0; 237 263 } 238 264 239 - int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) 265 + int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, 266 + size_t info_size, __u32 flags) 240 267 { 241 268 struct xdp_id_md xdp_id = {}; 242 269 int sock, ret; 243 270 __u32 nl_pid; 244 271 __u32 mask; 245 272 246 - if (flags & ~XDP_FLAGS_MASK) 273 + if (flags & ~XDP_FLAGS_MASK || !info_size) 247 274 return -EINVAL; 248 275 249 276 /* Check whether the single {HW,DRV,SKB} mode is set */ ··· 272 273 xdp_id.ifindex = ifindex; 273 274 xdp_id.flags = flags; 274 275 275 - ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); 276 - if (!ret) 277 - *prog_id = xdp_id.id; 276 + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); 277 + if (!ret) { 278 + size_t sz = min(info_size, sizeof(xdp_id.info)); 279 + 280 + memcpy(info, &xdp_id.info, sz); 281 + memset((void *) info + sz, 0, info_size - sz); 282 + } 278 283 279 284 close(sock); 285 + return ret; 286 + } 287 + 288 + static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) 289 + { 290 + if (info->attach_mode != XDP_ATTACHED_MULTI) 291 + return info->prog_id; 292 + if (flags & XDP_FLAGS_DRV_MODE) 293 + return info->drv_prog_id; 294 + if (flags & XDP_FLAGS_HW_MODE) 295 + return info->hw_prog_id; 296 + if (flags & XDP_FLAGS_SKB_MODE) 297 + return info->skb_prog_id; 298 + 299 + return 0; 300 + } 301 + 302 + int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) 303 + { 304 + struct xdp_link_info info; 305 + int ret; 306 + 307 + ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); 308 + if (!ret) 309 + *prog_id = get_xdp_id(&info, flags); 310 + 280 311 return ret; 281 312 } 282 313

+5 -5

tools/lib/bpf/nlattr.c

··· 8 8 9 9 #include <errno.h> 10 10 #include "nlattr.h" 11 + #include "libbpf_internal.h" 11 12 #include <linux/rtnetlink.h> 12 13 #include <string.h> 13 14 #include <stdio.h> ··· 122 121 } 123 122 124 123 if (tb[type]) 125 - fprintf(stderr, "Attribute of type %#x found multiple times in message, " 126 - "previous attribute is being ignored.\n", type); 124 + pr_warn("Attribute of type %#x found multiple times in message, " 125 + "previous attribute is being ignored.\n", type); 127 126 128 127 tb[type] = nla; 129 128 } ··· 182 181 183 182 if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen, 184 183 extack_policy) != 0) { 185 - fprintf(stderr, 186 - "Failed to parse extended error attributes\n"); 184 + pr_warn("Failed to parse extended error attributes\n"); 187 185 return 0; 188 186 } 189 187 190 188 if (tb[NLMSGERR_ATTR_MSG]) 191 189 errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); 192 190 193 - fprintf(stderr, "Kernel error message: %s\n", errmsg); 191 + pr_warn("Kernel error message: %s\n", errmsg); 194 192 195 193 return 0; 196 194 }

+30 -15

tools/lib/bpf/xsk.c

··· 431 431 goto out; 432 432 } 433 433 434 - if (err || channels.max_combined == 0) 434 + if (err) { 435 435 /* If the device says it has no channels, then all traffic 436 436 * is sent to a single stream, so max queues = 1. 437 437 */ 438 438 ret = 1; 439 - else 440 - ret = channels.max_combined; 439 + } else { 440 + /* Take the max of rx, tx, combined. Drivers return 441 + * the number of channels in different ways. 442 + */ 443 + ret = max(channels.max_rx, channels.max_tx); 444 + ret = max(ret, (int)channels.max_combined); 445 + } 441 446 442 447 out: 443 448 close(fd); ··· 558 553 } 559 554 } else { 560 555 xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); 556 + if (xsk->prog_fd < 0) 557 + return -errno; 561 558 err = xsk_lookup_bpf_maps(xsk); 562 559 if (err) { 563 560 close(xsk->prog_fd); ··· 567 560 } 568 561 } 569 562 570 - err = xsk_set_bpf_maps(xsk); 563 + if (xsk->rx) 564 + err = xsk_set_bpf_maps(xsk); 571 565 if (err) { 572 566 xsk_delete_bpf_maps(xsk); 573 567 close(xsk->prog_fd); ··· 589 581 struct xsk_socket *xsk; 590 582 int err; 591 583 592 - if (!umem || !xsk_ptr || !rx || !tx) 584 + if (!umem || !xsk_ptr || !(rx || tx)) 593 585 return -EFAULT; 594 - 595 - if (umem->refcount) { 596 - pr_warn("Error: shared umems not supported by libbpf.\n"); 597 - return -EBUSY; 598 - } 599 586 600 587 xsk = calloc(1, sizeof(*xsk)); 601 588 if (!xsk) 602 589 return -ENOMEM; 590 + 591 + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); 592 + if (err) 593 + goto out_xsk_alloc; 594 + 595 + if (umem->refcount && 596 + !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { 597 + pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); 598 + err = -EBUSY; 599 + goto out_xsk_alloc; 600 + } 603 601 604 602 if (umem->refcount++ > 0) { 605 603 xsk->fd = socket(AF_XDP, SOCK_RAW, 0); ··· 627 613 } 628 614 memcpy(xsk->ifname, ifname, IFNAMSIZ - 1); 629 615 xsk->ifname[IFNAMSIZ - 1] = '\0'; 630 - 631 - err = xsk_set_xdp_socket_config(&xsk->config, usr_config); 632 - if (err) 633 - goto out_socket; 634 616 635 617 if (rx) { 636 618 err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, ··· 695 685 sxdp.sxdp_family = PF_XDP; 696 686 sxdp.sxdp_ifindex = xsk->ifindex; 697 687 sxdp.sxdp_queue_id = xsk->queue_id; 698 - sxdp.sxdp_flags = xsk->config.bind_flags; 688 + if (umem->refcount > 1) { 689 + sxdp.sxdp_flags = XDP_SHARED_UMEM; 690 + sxdp.sxdp_shared_umem_fd = umem->fd; 691 + } else { 692 + sxdp.sxdp_flags = xsk->config.bind_flags; 693 + } 699 694 700 695 err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); 701 696 if (err) {

+11 -3

tools/testing/selftests/bpf/Makefile

··· 30 30 test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ 31 31 test_cgroup_storage test_select_reuseport \ 32 32 test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ 33 - test_cgroup_attach xdping test_progs-no_alu32 33 + test_cgroup_attach test_progs-no_alu32 34 34 35 35 # Also test bpf-gcc, if present 36 36 ifneq ($(BPF_GCC),) ··· 38 38 endif 39 39 40 40 TEST_GEN_FILES = 41 - TEST_FILES = 41 + TEST_FILES = test_lwt_ip_encap.o \ 42 + test_tc_edt.o 42 43 43 44 # Order correspond to 'make run_tests' order 44 45 TEST_PROGS := test_kmod.sh \ ··· 71 70 # Compile but not part of 'make run_tests' 72 71 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ 73 72 flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \ 74 - test_lirc_mode2_user 73 + test_lirc_mode2_user xdping 75 74 76 75 TEST_CUSTOM_PROGS = urandom_read 77 76 ··· 162 161 ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 163 162 -c $1 -o - || echo "BPF obj compilation failed") | \ 164 163 $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 164 + endef 165 + # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 166 + define CLANG_NOALU32_BPF_BUILD_RULE 167 + ($(CLANG) $3 -O2 -target bpf -emit-llvm \ 168 + -c $1 -o - || echo "BPF obj compilation failed") | \ 169 + $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 165 170 endef 166 171 # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC 167 172 define CLANG_NATIVE_BPF_BUILD_RULE ··· 281 274 $(eval $(call DEFINE_TEST_RUNNER,test_progs)) 282 275 283 276 # Define test_progs-no_alu32 test runner. 277 + TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE 284 278 TRUNNER_BPF_LDFLAGS := 285 279 $(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32)) 286 280

+143 -31

tools/testing/selftests/bpf/prog_tests/core_reloc.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 3 #include "progs/core_reloc_types.h" 4 + #include <sys/mman.h> 4 5 5 6 #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) 6 7 ··· 175 174 .fails = true, \ 176 175 } 177 176 178 - #define EXISTENCE_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \ 179 - .a = 42, \ 180 - } 181 - 182 177 #define EXISTENCE_CASE_COMMON(name) \ 183 178 .case_name = #name, \ 184 179 .bpf_obj_file = "test_core_reloc_existence.o", \ 185 180 .btf_src_file = "btf__core_reloc_" #name ".o", \ 186 - .relaxed_core_relocs = true \ 181 + .relaxed_core_relocs = true 187 182 188 183 #define EXISTENCE_ERR_CASE(name) { \ 189 184 EXISTENCE_CASE_COMMON(name), \ 185 + .fails = true, \ 186 + } 187 + 188 + #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \ 189 + .case_name = test_name_prefix#name, \ 190 + .bpf_obj_file = objfile, \ 191 + .btf_src_file = "btf__core_reloc_" #name ".o" 192 + 193 + #define BITFIELDS_CASE(name, ...) { \ 194 + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ 195 + "direct:", name), \ 196 + .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ 197 + .input_len = sizeof(struct core_reloc_##name), \ 198 + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ 199 + __VA_ARGS__, \ 200 + .output_len = sizeof(struct core_reloc_bitfields_output), \ 201 + }, { \ 202 + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ 203 + "probed:", name), \ 204 + .input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \ 205 + .input_len = sizeof(struct core_reloc_##name), \ 206 + .output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \ 207 + __VA_ARGS__, \ 208 + .output_len = sizeof(struct core_reloc_bitfields_output), \ 209 + .direct_raw_tp = true, \ 210 + } 211 + 212 + 213 + #define BITFIELDS_ERR_CASE(name) { \ 214 + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \ 215 + "probed:", name), \ 216 + .fails = true, \ 217 + }, { \ 218 + BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \ 219 + "direct:", name), \ 220 + .direct_raw_tp = true, \ 221 + .fails = true, \ 222 + } 223 + 224 + #define SIZE_CASE_COMMON(name) \ 225 + .case_name = #name, \ 226 + .bpf_obj_file = "test_core_reloc_size.o", \ 227 + .btf_src_file = "btf__core_reloc_" #name ".o", \ 228 + .relaxed_core_relocs = true 229 + 230 + #define SIZE_OUTPUT_DATA(type) \ 231 + STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \ 232 + .int_sz = sizeof(((type *)0)->int_field), \ 233 + .struct_sz = sizeof(((type *)0)->struct_field), \ 234 + .union_sz = sizeof(((type *)0)->union_field), \ 235 + .arr_sz = sizeof(((type *)0)->arr_field), \ 236 + .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \ 237 + .ptr_sz = sizeof(((type *)0)->ptr_field), \ 238 + .enum_sz = sizeof(((type *)0)->enum_field), \ 239 + } 240 + 241 + #define SIZE_CASE(name) { \ 242 + SIZE_CASE_COMMON(name), \ 243 + .input_len = 0, \ 244 + .output = SIZE_OUTPUT_DATA(struct core_reloc_##name), \ 245 + .output_len = sizeof(struct core_reloc_size_output), \ 246 + } 247 + 248 + #define SIZE_ERR_CASE(name) { \ 249 + SIZE_CASE_COMMON(name), \ 190 250 .fails = true, \ 191 251 } 192 252 ··· 261 199 int output_len; 262 200 bool fails; 263 201 bool relaxed_core_relocs; 202 + bool direct_raw_tp; 264 203 }; 265 204 266 205 static struct core_reloc_test_case test_cases[] = { ··· 338 275 INTS_CASE(ints___bool), 339 276 INTS_CASE(ints___reverse_sign), 340 277 341 - INTS_ERR_CASE(ints___err_bitfield), 342 - INTS_ERR_CASE(ints___err_wrong_sz_8), 343 - INTS_ERR_CASE(ints___err_wrong_sz_16), 344 - INTS_ERR_CASE(ints___err_wrong_sz_32), 345 - INTS_ERR_CASE(ints___err_wrong_sz_64), 346 - 347 278 /* validate edge cases of capturing relocations */ 348 279 { 349 280 .case_name = "misc", ··· 409 352 EXISTENCE_ERR_CASE(existence__err_arr_kind), 410 353 EXISTENCE_ERR_CASE(existence__err_arr_value_type), 411 354 EXISTENCE_ERR_CASE(existence__err_struct_type), 355 + 356 + /* bitfield relocation checks */ 357 + BITFIELDS_CASE(bitfields, { 358 + .ub1 = 1, 359 + .ub2 = 2, 360 + .ub7 = 96, 361 + .sb4 = -7, 362 + .sb20 = -0x76543, 363 + .u32 = 0x80000000, 364 + .s32 = -0x76543210, 365 + }), 366 + BITFIELDS_CASE(bitfields___bit_sz_change, { 367 + .ub1 = 6, 368 + .ub2 = 0xABCDE, 369 + .ub7 = 1, 370 + .sb4 = -1, 371 + .sb20 = -0x17654321, 372 + .u32 = 0xBEEF, 373 + .s32 = -0x3FEDCBA987654321, 374 + }), 375 + BITFIELDS_CASE(bitfields___bitfield_vs_int, { 376 + .ub1 = 0xFEDCBA9876543210, 377 + .ub2 = 0xA6, 378 + .ub7 = -0x7EDCBA987654321, 379 + .sb4 = -0x6123456789ABCDE, 380 + .sb20 = 0xD00D, 381 + .u32 = -0x76543, 382 + .s32 = 0x0ADEADBEEFBADB0B, 383 + }), 384 + BITFIELDS_CASE(bitfields___just_big_enough, { 385 + .ub1 = 0xF, 386 + .ub2 = 0x0812345678FEDCBA, 387 + }), 388 + BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield), 389 + 390 + /* size relocation checks */ 391 + SIZE_CASE(size), 392 + SIZE_CASE(size___diff_sz), 412 393 }; 413 394 414 395 struct data { ··· 454 359 char out[256]; 455 360 }; 456 361 362 + static size_t roundup_page(size_t sz) 363 + { 364 + long page_size = sysconf(_SC_PAGE_SIZE); 365 + return (sz + page_size - 1) / page_size * page_size; 366 + } 367 + 457 368 void test_core_reloc(void) 458 369 { 459 - const char *probe_name = "raw_tracepoint/sys_enter"; 370 + const size_t mmap_sz = roundup_page(sizeof(struct data)); 460 371 struct bpf_object_load_attr load_attr = {}; 461 372 struct core_reloc_test_case *test_case; 373 + const char *tp_name, *probe_name; 462 374 int err, duration = 0, i, equal; 463 375 struct bpf_link *link = NULL; 464 376 struct bpf_map *data_map; 465 377 struct bpf_program *prog; 466 378 struct bpf_object *obj; 467 - const int zero = 0; 468 - struct data data; 379 + struct data *data; 380 + void *mmap_data = NULL; 469 381 470 382 for (i = 0; i < ARRAY_SIZE(test_cases); i++) { 471 383 test_case = &test_cases[i]; ··· 484 382 ); 485 383 486 384 obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); 487 - if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", 488 - "failed to open '%s': %ld\n", 385 + if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", 489 386 test_case->bpf_obj_file, PTR_ERR(obj))) 490 387 continue; 388 + 389 + /* for typed raw tracepoints, NULL should be specified */ 390 + if (test_case->direct_raw_tp) { 391 + probe_name = "tp_btf/sys_enter"; 392 + tp_name = NULL; 393 + } else { 394 + probe_name = "raw_tracepoint/sys_enter"; 395 + tp_name = "sys_enter"; 396 + } 491 397 492 398 prog = bpf_object__find_program_by_title(obj, probe_name); 493 399 if (CHECK(!prog, "find_probe", ··· 517 407 goto cleanup; 518 408 } 519 409 520 - link = bpf_program__attach_raw_tracepoint(prog, "sys_enter"); 410 + link = bpf_program__attach_raw_tracepoint(prog, tp_name); 521 411 if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", 522 412 PTR_ERR(link))) 523 413 goto cleanup; ··· 526 416 if (CHECK(!data_map, "find_data_map", "data map not found\n")) 527 417 goto cleanup; 528 418 529 - memset(&data, 0, sizeof(data)); 530 - memcpy(data.in, test_case->input, test_case->input_len); 531 - 532 - err = bpf_map_update_elem(bpf_map__fd(data_map), 533 - &zero, &data, 0); 534 - if (CHECK(err, "update_data_map", 535 - "failed to update .data map: %d\n", err)) 419 + mmap_data = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, 420 + MAP_SHARED, bpf_map__fd(data_map), 0); 421 + if (CHECK(mmap_data == MAP_FAILED, "mmap", 422 + ".bss mmap failed: %d", errno)) { 423 + mmap_data = NULL; 536 424 goto cleanup; 425 + } 426 + data = mmap_data; 427 + 428 + memset(mmap_data, 0, sizeof(*data)); 429 + memcpy(data->in, test_case->input, test_case->input_len); 537 430 538 431 /* trigger test run */ 539 432 usleep(1); 540 433 541 - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &data); 542 - if (CHECK(err, "get_result", 543 - "failed to get output data: %d\n", err)) 544 - goto cleanup; 545 - 546 - equal = memcmp(data.out, test_case->output, 434 + equal = memcmp(data->out, test_case->output, 547 435 test_case->output_len) == 0; 548 436 if (CHECK(!equal, "check_result", 549 437 "input/output data don't match\n")) { ··· 553 445 } 554 446 for (j = 0; j < test_case->output_len; j++) { 555 447 printf("output byte #%d: EXP 0x%02hhx GOT 0x%02hhx\n", 556 - j, test_case->output[j], data.out[j]); 448 + j, test_case->output[j], data->out[j]); 557 449 } 558 450 goto cleanup; 559 451 } 560 452 561 453 cleanup: 454 + if (mmap_data) { 455 + CHECK_FAIL(munmap(mmap_data, mmap_sz)); 456 + mmap_data = NULL; 457 + } 562 458 if (!IS_ERR_OR_NULL(link)) { 563 459 bpf_link__destroy(link); 564 460 link = NULL;

+90

tools/testing/selftests/bpf/prog_tests/fentry_fexit.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <test_progs.h> 4 + 5 + void test_fentry_fexit(void) 6 + { 7 + struct bpf_prog_load_attr attr_fentry = { 8 + .file = "./fentry_test.o", 9 + }; 10 + struct bpf_prog_load_attr attr_fexit = { 11 + .file = "./fexit_test.o", 12 + }; 13 + 14 + struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj; 15 + struct bpf_map *data_map_fentry, *data_map_fexit; 16 + char fentry_name[] = "fentry/bpf_fentry_testX"; 17 + char fexit_name[] = "fexit/bpf_fentry_testX"; 18 + int err, pkt_fd, kfree_skb_fd, i; 19 + struct bpf_link *link[12] = {}; 20 + struct bpf_program *prog[12]; 21 + __u32 duration, retval; 22 + const int zero = 0; 23 + u64 result[12]; 24 + 25 + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 26 + &pkt_obj, &pkt_fd); 27 + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 28 + return; 29 + err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd); 30 + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 31 + goto close_prog; 32 + err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd); 33 + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 34 + goto close_prog; 35 + 36 + for (i = 0; i < 6; i++) { 37 + fentry_name[sizeof(fentry_name) - 2] = '1' + i; 38 + prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name); 39 + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name)) 40 + goto close_prog; 41 + link[i] = bpf_program__attach_trace(prog[i]); 42 + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 43 + goto close_prog; 44 + } 45 + data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss"); 46 + if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n")) 47 + goto close_prog; 48 + 49 + for (i = 6; i < 12; i++) { 50 + fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6; 51 + prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name); 52 + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name)) 53 + goto close_prog; 54 + link[i] = bpf_program__attach_trace(prog[i]); 55 + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 56 + goto close_prog; 57 + } 58 + data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss"); 59 + if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n")) 60 + goto close_prog; 61 + 62 + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 63 + NULL, NULL, &retval, &duration); 64 + CHECK(err || retval, "ipv6", 65 + "err %d errno %d retval %d duration %d\n", 66 + err, errno, retval, duration); 67 + 68 + err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result); 69 + if (CHECK(err, "get_result", 70 + "failed to get output data: %d\n", err)) 71 + goto close_prog; 72 + 73 + err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6); 74 + if (CHECK(err, "get_result", 75 + "failed to get output data: %d\n", err)) 76 + goto close_prog; 77 + 78 + for (i = 0; i < 12; i++) 79 + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 80 + i % 6 + 1, result[i])) 81 + goto close_prog; 82 + 83 + close_prog: 84 + for (i = 0; i < 12; i++) 85 + if (!IS_ERR_OR_NULL(link[i])) 86 + bpf_link__destroy(link[i]); 87 + bpf_object__close(obj_fentry); 88 + bpf_object__close(obj_fexit); 89 + bpf_object__close(pkt_obj); 90 + }

+64

tools/testing/selftests/bpf/prog_tests/fentry_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <test_progs.h> 4 + 5 + void test_fentry_test(void) 6 + { 7 + struct bpf_prog_load_attr attr = { 8 + .file = "./fentry_test.o", 9 + }; 10 + 11 + char prog_name[] = "fentry/bpf_fentry_testX"; 12 + struct bpf_object *obj = NULL, *pkt_obj; 13 + int err, pkt_fd, kfree_skb_fd, i; 14 + struct bpf_link *link[6] = {}; 15 + struct bpf_program *prog[6]; 16 + __u32 duration, retval; 17 + struct bpf_map *data_map; 18 + const int zero = 0; 19 + u64 result[6]; 20 + 21 + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 22 + &pkt_obj, &pkt_fd); 23 + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 24 + return; 25 + err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); 26 + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 27 + goto close_prog; 28 + 29 + for (i = 0; i < 6; i++) { 30 + prog_name[sizeof(prog_name) - 2] = '1' + i; 31 + prog[i] = bpf_object__find_program_by_title(obj, prog_name); 32 + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) 33 + goto close_prog; 34 + link[i] = bpf_program__attach_trace(prog[i]); 35 + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 36 + goto close_prog; 37 + } 38 + data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss"); 39 + if (CHECK(!data_map, "find_data_map", "data map not found\n")) 40 + goto close_prog; 41 + 42 + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 43 + NULL, NULL, &retval, &duration); 44 + CHECK(err || retval, "ipv6", 45 + "err %d errno %d retval %d duration %d\n", 46 + err, errno, retval, duration); 47 + 48 + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); 49 + if (CHECK(err, "get_result", 50 + "failed to get output data: %d\n", err)) 51 + goto close_prog; 52 + 53 + for (i = 0; i < 6; i++) 54 + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 55 + i + 1, result[i])) 56 + goto close_prog; 57 + 58 + close_prog: 59 + for (i = 0; i < 6; i++) 60 + if (!IS_ERR_OR_NULL(link[i])) 61 + bpf_link__destroy(link[i]); 62 + bpf_object__close(obj); 63 + bpf_object__close(pkt_obj); 64 + }

+76

tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <test_progs.h> 4 + 5 + #define PROG_CNT 3 6 + 7 + void test_fexit_bpf2bpf(void) 8 + { 9 + const char *prog_name[PROG_CNT] = { 10 + "fexit/test_pkt_access", 11 + "fexit/test_pkt_access_subprog1", 12 + "fexit/test_pkt_access_subprog2", 13 + }; 14 + struct bpf_object *obj = NULL, *pkt_obj; 15 + int err, pkt_fd, i; 16 + struct bpf_link *link[PROG_CNT] = {}; 17 + struct bpf_program *prog[PROG_CNT]; 18 + __u32 duration, retval; 19 + struct bpf_map *data_map; 20 + const int zero = 0; 21 + u64 result[PROG_CNT]; 22 + 23 + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_UNSPEC, 24 + &pkt_obj, &pkt_fd); 25 + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 26 + return; 27 + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, 28 + .attach_prog_fd = pkt_fd, 29 + ); 30 + 31 + obj = bpf_object__open_file("./fexit_bpf2bpf.o", &opts); 32 + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", 33 + "failed to open fexit_bpf2bpf: %ld\n", 34 + PTR_ERR(obj))) 35 + goto close_prog; 36 + 37 + err = bpf_object__load(obj); 38 + if (CHECK(err, "obj_load", "err %d\n", err)) 39 + goto close_prog; 40 + 41 + for (i = 0; i < PROG_CNT; i++) { 42 + prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]); 43 + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i])) 44 + goto close_prog; 45 + link[i] = bpf_program__attach_trace(prog[i]); 46 + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 47 + goto close_prog; 48 + } 49 + data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); 50 + if (CHECK(!data_map, "find_data_map", "data map not found\n")) 51 + goto close_prog; 52 + 53 + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 54 + NULL, NULL, &retval, &duration); 55 + CHECK(err || retval, "ipv6", 56 + "err %d errno %d retval %d duration %d\n", 57 + err, errno, retval, duration); 58 + 59 + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); 60 + if (CHECK(err, "get_result", 61 + "failed to get output data: %d\n", err)) 62 + goto close_prog; 63 + 64 + for (i = 0; i < PROG_CNT; i++) 65 + if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n", 66 + result[i])) 67 + goto close_prog; 68 + 69 + close_prog: 70 + for (i = 0; i < PROG_CNT; i++) 71 + if (!IS_ERR_OR_NULL(link[i])) 72 + bpf_link__destroy(link[i]); 73 + if (!IS_ERR_OR_NULL(obj)) 74 + bpf_object__close(obj); 75 + bpf_object__close(pkt_obj); 76 + }

+76

tools/testing/selftests/bpf/prog_tests/fexit_stress.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <test_progs.h> 4 + 5 + /* x86-64 fits 55 JITed and 43 interpreted progs into half page */ 6 + #define CNT 40 7 + 8 + void test_fexit_stress(void) 9 + { 10 + char test_skb[128] = {}; 11 + int fexit_fd[CNT] = {}; 12 + int link_fd[CNT] = {}; 13 + __u32 duration = 0; 14 + char error[4096]; 15 + __u32 prog_ret; 16 + int err, i, filter_fd; 17 + 18 + const struct bpf_insn trace_program[] = { 19 + BPF_MOV64_IMM(BPF_REG_0, 0), 20 + BPF_EXIT_INSN(), 21 + }; 22 + 23 + struct bpf_load_program_attr load_attr = { 24 + .prog_type = BPF_PROG_TYPE_TRACING, 25 + .license = "GPL", 26 + .insns = trace_program, 27 + .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn), 28 + .expected_attach_type = BPF_TRACE_FEXIT, 29 + }; 30 + 31 + const struct bpf_insn skb_program[] = { 32 + BPF_MOV64_IMM(BPF_REG_0, 0), 33 + BPF_EXIT_INSN(), 34 + }; 35 + 36 + struct bpf_load_program_attr skb_load_attr = { 37 + .prog_type = BPF_PROG_TYPE_SOCKET_FILTER, 38 + .license = "GPL", 39 + .insns = skb_program, 40 + .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn), 41 + }; 42 + 43 + err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1", 44 + load_attr.expected_attach_type); 45 + if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err)) 46 + goto out; 47 + load_attr.attach_btf_id = err; 48 + 49 + for (i = 0; i < CNT; i++) { 50 + fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error)); 51 + if (CHECK(fexit_fd[i] < 0, "fexit loaded", 52 + "failed: %d errno %d\n", fexit_fd[i], errno)) 53 + goto out; 54 + link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]); 55 + if (CHECK(link_fd[i] < 0, "fexit attach failed", 56 + "prog %d failed: %d err %d\n", i, link_fd[i], errno)) 57 + goto out; 58 + } 59 + 60 + filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error)); 61 + if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n", 62 + filter_fd, errno)) 63 + goto out; 64 + 65 + err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 66 + 0, &prog_ret, 0); 67 + close(filter_fd); 68 + CHECK_FAIL(err); 69 + out: 70 + for (i = 0; i < CNT; i++) { 71 + if (link_fd[i]) 72 + close(link_fd[i]); 73 + if (fexit_fd[i]) 74 + close(fexit_fd[i]); 75 + } 76 + }

+64

tools/testing/selftests/bpf/prog_tests/fexit_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <test_progs.h> 4 + 5 + void test_fexit_test(void) 6 + { 7 + struct bpf_prog_load_attr attr = { 8 + .file = "./fexit_test.o", 9 + }; 10 + 11 + char prog_name[] = "fexit/bpf_fentry_testX"; 12 + struct bpf_object *obj = NULL, *pkt_obj; 13 + int err, pkt_fd, kfree_skb_fd, i; 14 + struct bpf_link *link[6] = {}; 15 + struct bpf_program *prog[6]; 16 + __u32 duration, retval; 17 + struct bpf_map *data_map; 18 + const int zero = 0; 19 + u64 result[6]; 20 + 21 + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 22 + &pkt_obj, &pkt_fd); 23 + if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 24 + return; 25 + err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); 26 + if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) 27 + goto close_prog; 28 + 29 + for (i = 0; i < 6; i++) { 30 + prog_name[sizeof(prog_name) - 2] = '1' + i; 31 + prog[i] = bpf_object__find_program_by_title(obj, prog_name); 32 + if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) 33 + goto close_prog; 34 + link[i] = bpf_program__attach_trace(prog[i]); 35 + if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) 36 + goto close_prog; 37 + } 38 + data_map = bpf_object__find_map_by_name(obj, "fexit_te.bss"); 39 + if (CHECK(!data_map, "find_data_map", "data map not found\n")) 40 + goto close_prog; 41 + 42 + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 43 + NULL, NULL, &retval, &duration); 44 + CHECK(err || retval, "ipv6", 45 + "err %d errno %d retval %d duration %d\n", 46 + err, errno, retval, duration); 47 + 48 + err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); 49 + if (CHECK(err, "get_result", 50 + "failed to get output data: %d\n", err)) 51 + goto close_prog; 52 + 53 + for (i = 0; i < 6; i++) 54 + if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", 55 + i + 1, result[i])) 56 + goto close_prog; 57 + 58 + close_prog: 59 + for (i = 0; i < 6; i++) 60 + if (!IS_ERR_OR_NULL(link[i])) 61 + bpf_link__destroy(link[i]); 62 + bpf_object__close(obj); 63 + bpf_object__close(pkt_obj); 64 + }

+79 -14

tools/testing/selftests/bpf/prog_tests/kfree_skb.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 #include <test_progs.h> 3 3 4 + struct meta { 5 + int ifindex; 6 + __u32 cb32_0; 7 + __u8 cb8_0; 8 + }; 9 + 10 + static union { 11 + __u32 cb32[5]; 12 + __u8 cb8[20]; 13 + } cb = { 14 + .cb32[0] = 0x81828384, 15 + }; 16 + 4 17 static void on_sample(void *ctx, int cpu, void *data, __u32 size) 5 18 { 6 - int ifindex = *(int *)data, duration = 0; 7 - struct ipv6_packet *pkt_v6 = data + 4; 19 + struct meta *meta = (struct meta *)data; 20 + struct ipv6_packet *pkt_v6 = data + sizeof(*meta); 21 + int duration = 0; 8 22 9 - if (ifindex != 1) 23 + if (CHECK(size != 72 + sizeof(*meta), "check_size", "size %u != %zu\n", 24 + size, 72 + sizeof(*meta))) 25 + return; 26 + if (CHECK(meta->ifindex != 1, "check_meta_ifindex", 27 + "meta->ifindex = %d\n", meta->ifindex)) 10 28 /* spurious kfree_skb not on loopback device */ 11 29 return; 12 - if (CHECK(size != 76, "check_size", "size %u != 76\n", size)) 30 + if (CHECK(meta->cb8_0 != cb.cb8[0], "check_cb8_0", "cb8_0 %x != %x\n", 31 + meta->cb8_0, cb.cb8[0])) 32 + return; 33 + if (CHECK(meta->cb32_0 != cb.cb32[0], "check_cb32_0", 34 + "cb32_0 %x != %x\n", 35 + meta->cb32_0, cb.cb32[0])) 13 36 return; 14 37 if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth", 15 38 "h_proto %x\n", pkt_v6->eth.h_proto)) ··· 49 26 50 27 void test_kfree_skb(void) 51 28 { 29 + struct __sk_buff skb = {}; 30 + struct bpf_prog_test_run_attr tattr = { 31 + .data_in = &pkt_v6, 32 + .data_size_in = sizeof(pkt_v6), 33 + .ctx_in = &skb, 34 + .ctx_size_in = sizeof(skb), 35 + }; 52 36 struct bpf_prog_load_attr attr = { 53 37 .file = "./kfree_skb.o", 54 38 }; 55 39 40 + struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL; 41 + struct bpf_map *perf_buf_map, *global_data; 42 + struct bpf_program *prog, *fentry, *fexit; 56 43 struct bpf_object *obj, *obj2 = NULL; 57 44 struct perf_buffer_opts pb_opts = {}; 58 45 struct perf_buffer *pb = NULL; 59 - struct bpf_link *link = NULL; 60 - struct bpf_map *perf_buf_map; 61 - struct bpf_program *prog; 62 - __u32 duration, retval; 63 - int err, pkt_fd, kfree_skb_fd; 46 + int err, kfree_skb_fd; 64 47 bool passed = false; 48 + __u32 duration = 0; 49 + const int zero = 0; 50 + bool test_ok[2]; 65 51 66 - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, &obj, &pkt_fd); 52 + err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, 53 + &obj, &tattr.prog_fd); 67 54 if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) 68 55 return; 69 56 ··· 84 51 prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb"); 85 52 if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n")) 86 53 goto close_prog; 54 + fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans"); 55 + if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n")) 56 + goto close_prog; 57 + fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans"); 58 + if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n")) 59 + goto close_prog; 60 + 61 + global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss"); 62 + if (CHECK(!global_data, "find global data", "not found\n")) 63 + goto close_prog; 64 + 87 65 link = bpf_program__attach_raw_tracepoint(prog, NULL); 88 66 if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) 67 + goto close_prog; 68 + link_fentry = bpf_program__attach_trace(fentry); 69 + if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n", 70 + PTR_ERR(link_fentry))) 71 + goto close_prog; 72 + link_fexit = bpf_program__attach_trace(fexit); 73 + if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n", 74 + PTR_ERR(link_fexit))) 89 75 goto close_prog; 90 76 91 77 perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map"); ··· 118 66 if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) 119 67 goto close_prog; 120 68 121 - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), 122 - NULL, NULL, &retval, &duration); 123 - CHECK(err || retval, "ipv6", 69 + memcpy(skb.cb, &cb, sizeof(cb)); 70 + err = bpf_prog_test_run_xattr(&tattr); 71 + duration = tattr.duration; 72 + CHECK(err || tattr.retval, "ipv6", 124 73 "err %d errno %d retval %d duration %d\n", 125 - err, errno, retval, duration); 74 + err, errno, tattr.retval, duration); 126 75 127 76 /* read perf buffer */ 128 77 err = perf_buffer__poll(pb, 100); 129 78 if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) 130 79 goto close_prog; 80 + 131 81 /* make sure kfree_skb program was triggered 132 82 * and it sent expected skb into ring buffer 133 83 */ 134 84 CHECK_FAIL(!passed); 85 + 86 + err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok); 87 + if (CHECK(err, "get_result", 88 + "failed to get output data: %d\n", err)) 89 + goto close_prog; 90 + 91 + CHECK_FAIL(!test_ok[0] || !test_ok[1]); 135 92 close_prog: 136 93 perf_buffer__free(pb); 137 94 if (!IS_ERR_OR_NULL(link)) 138 95 bpf_link__destroy(link); 96 + if (!IS_ERR_OR_NULL(link_fentry)) 97 + bpf_link__destroy(link_fentry); 98 + if (!IS_ERR_OR_NULL(link_fexit)) 99 + bpf_link__destroy(link_fexit); 139 100 bpf_object__close(obj); 140 101 bpf_object__close(obj2); 141 102 }

+220

tools/testing/selftests/bpf/prog_tests/mmap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + #include <test_progs.h> 3 + #include <sys/mman.h> 4 + 5 + struct map_data { 6 + __u64 val[512 * 4]; 7 + }; 8 + 9 + struct bss_data { 10 + __u64 in_val; 11 + __u64 out_val; 12 + }; 13 + 14 + static size_t roundup_page(size_t sz) 15 + { 16 + long page_size = sysconf(_SC_PAGE_SIZE); 17 + return (sz + page_size - 1) / page_size * page_size; 18 + } 19 + 20 + void test_mmap(void) 21 + { 22 + const char *file = "test_mmap.o"; 23 + const char *probe_name = "raw_tracepoint/sys_enter"; 24 + const char *tp_name = "sys_enter"; 25 + const size_t bss_sz = roundup_page(sizeof(struct bss_data)); 26 + const size_t map_sz = roundup_page(sizeof(struct map_data)); 27 + const int zero = 0, one = 1, two = 2, far = 1500; 28 + const long page_size = sysconf(_SC_PAGE_SIZE); 29 + int err, duration = 0, i, data_map_fd; 30 + struct bpf_program *prog; 31 + struct bpf_object *obj; 32 + struct bpf_link *link = NULL; 33 + struct bpf_map *data_map, *bss_map; 34 + void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; 35 + volatile struct bss_data *bss_data; 36 + volatile struct map_data *map_data; 37 + __u64 val = 0; 38 + 39 + obj = bpf_object__open_file("test_mmap.o", NULL); 40 + if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", 41 + file, PTR_ERR(obj))) 42 + return; 43 + prog = bpf_object__find_program_by_title(obj, probe_name); 44 + if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) 45 + goto cleanup; 46 + err = bpf_object__load(obj); 47 + if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n", 48 + probe_name, err)) 49 + goto cleanup; 50 + 51 + bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss"); 52 + if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n")) 53 + goto cleanup; 54 + data_map = bpf_object__find_map_by_name(obj, "data_map"); 55 + if (CHECK(!data_map, "find_data_map", "data_map map not found\n")) 56 + goto cleanup; 57 + data_map_fd = bpf_map__fd(data_map); 58 + 59 + bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED, 60 + bpf_map__fd(bss_map), 0); 61 + if (CHECK(bss_mmaped == MAP_FAILED, "bss_mmap", 62 + ".bss mmap failed: %d\n", errno)) { 63 + bss_mmaped = NULL; 64 + goto cleanup; 65 + } 66 + /* map as R/W first */ 67 + map_mmaped = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, 68 + data_map_fd, 0); 69 + if (CHECK(map_mmaped == MAP_FAILED, "data_mmap", 70 + "data_map mmap failed: %d\n", errno)) { 71 + map_mmaped = NULL; 72 + goto cleanup; 73 + } 74 + 75 + bss_data = bss_mmaped; 76 + map_data = map_mmaped; 77 + 78 + CHECK_FAIL(bss_data->in_val); 79 + CHECK_FAIL(bss_data->out_val); 80 + CHECK_FAIL(map_data->val[0]); 81 + CHECK_FAIL(map_data->val[1]); 82 + CHECK_FAIL(map_data->val[2]); 83 + CHECK_FAIL(map_data->val[far]); 84 + 85 + link = bpf_program__attach_raw_tracepoint(prog, tp_name); 86 + if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) 87 + goto cleanup; 88 + 89 + bss_data->in_val = 123; 90 + val = 111; 91 + CHECK_FAIL(bpf_map_update_elem(data_map_fd, &zero, &val, 0)); 92 + 93 + usleep(1); 94 + 95 + CHECK_FAIL(bss_data->in_val != 123); 96 + CHECK_FAIL(bss_data->out_val != 123); 97 + CHECK_FAIL(map_data->val[0] != 111); 98 + CHECK_FAIL(map_data->val[1] != 222); 99 + CHECK_FAIL(map_data->val[2] != 123); 100 + CHECK_FAIL(map_data->val[far] != 3 * 123); 101 + 102 + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &zero, &val)); 103 + CHECK_FAIL(val != 111); 104 + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &one, &val)); 105 + CHECK_FAIL(val != 222); 106 + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &two, &val)); 107 + CHECK_FAIL(val != 123); 108 + CHECK_FAIL(bpf_map_lookup_elem(data_map_fd, &far, &val)); 109 + CHECK_FAIL(val != 3 * 123); 110 + 111 + /* data_map freeze should fail due to R/W mmap() */ 112 + err = bpf_map_freeze(data_map_fd); 113 + if (CHECK(!err || errno != EBUSY, "no_freeze", 114 + "data_map freeze succeeded: err=%d, errno=%d\n", err, errno)) 115 + goto cleanup; 116 + 117 + /* unmap R/W mapping */ 118 + err = munmap(map_mmaped, map_sz); 119 + map_mmaped = NULL; 120 + if (CHECK(err, "data_map_munmap", "data_map munmap failed: %d\n", errno)) 121 + goto cleanup; 122 + 123 + /* re-map as R/O now */ 124 + map_mmaped = mmap(NULL, map_sz, PROT_READ, MAP_SHARED, data_map_fd, 0); 125 + if (CHECK(map_mmaped == MAP_FAILED, "data_mmap", 126 + "data_map R/O mmap failed: %d\n", errno)) { 127 + map_mmaped = NULL; 128 + goto cleanup; 129 + } 130 + map_data = map_mmaped; 131 + 132 + /* map/unmap in a loop to test ref counting */ 133 + for (i = 0; i < 10; i++) { 134 + int flags = i % 2 ? PROT_READ : PROT_WRITE; 135 + void *p; 136 + 137 + p = mmap(NULL, map_sz, flags, MAP_SHARED, data_map_fd, 0); 138 + if (CHECK_FAIL(p == MAP_FAILED)) 139 + goto cleanup; 140 + err = munmap(p, map_sz); 141 + if (CHECK_FAIL(err)) 142 + goto cleanup; 143 + } 144 + 145 + /* data_map freeze should now succeed due to no R/W mapping */ 146 + err = bpf_map_freeze(data_map_fd); 147 + if (CHECK(err, "freeze", "data_map freeze failed: err=%d, errno=%d\n", 148 + err, errno)) 149 + goto cleanup; 150 + 151 + /* mapping as R/W now should fail */ 152 + tmp1 = mmap(NULL, map_sz, PROT_READ | PROT_WRITE, MAP_SHARED, 153 + data_map_fd, 0); 154 + if (CHECK(tmp1 != MAP_FAILED, "data_mmap", "mmap succeeded\n")) { 155 + munmap(tmp1, map_sz); 156 + goto cleanup; 157 + } 158 + 159 + bss_data->in_val = 321; 160 + usleep(1); 161 + CHECK_FAIL(bss_data->in_val != 321); 162 + CHECK_FAIL(bss_data->out_val != 321); 163 + CHECK_FAIL(map_data->val[0] != 111); 164 + CHECK_FAIL(map_data->val[1] != 222); 165 + CHECK_FAIL(map_data->val[2] != 321); 166 + CHECK_FAIL(map_data->val[far] != 3 * 321); 167 + 168 + /* check some more advanced mmap() manipulations */ 169 + 170 + /* map all but last page: pages 1-3 mapped */ 171 + tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED, 172 + data_map_fd, 0); 173 + if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno)) 174 + goto cleanup; 175 + 176 + /* unmap second page: pages 1, 3 mapped */ 177 + err = munmap(tmp1 + page_size, page_size); 178 + if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) { 179 + munmap(tmp1, map_sz); 180 + goto cleanup; 181 + } 182 + 183 + /* map page 2 back */ 184 + tmp2 = mmap(tmp1 + page_size, page_size, PROT_READ, 185 + MAP_SHARED | MAP_FIXED, data_map_fd, 0); 186 + if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) { 187 + munmap(tmp1, page_size); 188 + munmap(tmp1 + 2*page_size, page_size); 189 + goto cleanup; 190 + } 191 + CHECK(tmp1 + page_size != tmp2, "adv_mmap4", 192 + "tmp1: %p, tmp2: %p\n", tmp1, tmp2); 193 + 194 + /* re-map all 4 pages */ 195 + tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED, 196 + data_map_fd, 0); 197 + if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) { 198 + munmap(tmp1, 3 * page_size); /* unmap page 1 */ 199 + goto cleanup; 200 + } 201 + CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2); 202 + 203 + map_data = tmp2; 204 + CHECK_FAIL(bss_data->in_val != 321); 205 + CHECK_FAIL(bss_data->out_val != 321); 206 + CHECK_FAIL(map_data->val[0] != 111); 207 + CHECK_FAIL(map_data->val[1] != 222); 208 + CHECK_FAIL(map_data->val[2] != 321); 209 + CHECK_FAIL(map_data->val[far] != 3 * 321); 210 + 211 + munmap(tmp2, 4 * page_size); 212 + cleanup: 213 + if (bss_mmaped) 214 + CHECK_FAIL(munmap(bss_mmaped, bss_sz)); 215 + if (map_mmaped) 216 + CHECK_FAIL(munmap(map_mmaped, map_sz)); 217 + if (!IS_ERR_OR_NULL(link)) 218 + bpf_link__destroy(link); 219 + bpf_object__close(obj); 220 + }

+17 -3

tools/testing/selftests/bpf/prog_tests/pinning.c

··· 163 163 goto out; 164 164 } 165 165 166 - /* swap pin paths of the two maps */ 166 + /* set pin paths so that nopinmap2 will attempt to reuse the map at 167 + * pinpath (which will fail), but not before pinmap has already been 168 + * reused 169 + */ 167 170 bpf_object__for_each_map(map, obj) { 168 171 if (!strcmp(bpf_map__name(map), "nopinmap")) 172 + err = bpf_map__set_pin_path(map, nopinpath2); 173 + else if (!strcmp(bpf_map__name(map), "nopinmap2")) 169 174 err = bpf_map__set_pin_path(map, pinpath); 170 - else if (!strcmp(bpf_map__name(map), "pinmap")) 171 - err = bpf_map__set_pin_path(map, NULL); 172 175 else 173 176 continue; 174 177 ··· 182 179 /* should fail because of map parameter mismatch */ 183 180 err = bpf_object__load(obj); 184 181 if (CHECK(err != -EINVAL, "param mismatch load", "err %d errno %d\n", err, errno)) 182 + goto out; 183 + 184 + /* nopinmap2 should have been pinned and cleaned up again */ 185 + err = stat(nopinpath2, &statbuf); 186 + if (CHECK(!err || errno != ENOENT, "stat nopinpath2", 187 + "err %d errno %d\n", err, errno)) 188 + goto out; 189 + 190 + /* pinmap should still be there */ 191 + err = stat(pinpath, &statbuf); 192 + if (CHECK(err, "stat pinpath", "err %d errno %d\n", err, errno)) 185 193 goto out; 186 194 187 195 bpf_object__close(obj);

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_arrays___err_wrong_val_type x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type1.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_arrays___err_wrong_val_type1 x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_wrong_val_type2.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_arrays___err_wrong_val_type2 x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_bitfields x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bit_sz_change.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_bitfields___bit_sz_change x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___bitfield_vs_int.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_bitfields___bitfield_vs_int x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___err_too_big_bitfield.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_bitfields___err_too_big_bitfield x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_bitfields___just_big_enough.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_bitfields___just_big_enough x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_bitfield.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_ints___err_bitfield x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_16.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_ints___err_wrong_sz_16 x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_32.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_ints___err_wrong_sz_32 x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_64.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_ints___err_wrong_sz_64 x) {}

-3

tools/testing/selftests/bpf/progs/btf__core_reloc_ints___err_wrong_sz_8.c

··· 1 - #include "core_reloc_types.h" 2 - 3 - void f(struct core_reloc_ints___err_wrong_sz_8 x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_size.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_size x) {}

+3

tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_sz.c

··· 1 + #include "core_reloc_types.h" 2 + 3 + void f(struct core_reloc_size___diff_sz x) {}

+104 -69

tools/testing/selftests/bpf/progs/core_reloc_types.h

··· 386 386 struct core_reloc_arrays_substruct d[1][2]; 387 387 }; 388 388 389 - struct core_reloc_arrays___err_wrong_val_type1 { 390 - char a[5]; /* char instead of int */ 391 - char b[2][3][4]; 392 - struct core_reloc_arrays_substruct c[3]; 393 - struct core_reloc_arrays_substruct d[1][2]; 394 - }; 395 - 396 - struct core_reloc_arrays___err_wrong_val_type2 { 389 + struct core_reloc_arrays___err_wrong_val_type { 397 390 int a[5]; 398 391 char b[2][3][4]; 399 392 int c[3]; /* value is not a struct */ ··· 582 589 int64_t s64_field; 583 590 }; 584 591 585 - struct core_reloc_ints___err_bitfield { 586 - uint8_t u8_field; 587 - int8_t s8_field; 588 - uint16_t u16_field; 589 - int16_t s16_field; 590 - uint32_t u32_field: 32; /* bitfields are not supported */ 591 - int32_t s32_field; 592 - uint64_t u64_field; 593 - int64_t s64_field; 594 - }; 595 - 596 - struct core_reloc_ints___err_wrong_sz_8 { 597 - uint16_t u8_field; /* not 8-bit anymore */ 598 - int16_t s8_field; /* not 8-bit anymore */ 599 - 600 - uint16_t u16_field; 601 - int16_t s16_field; 602 - uint32_t u32_field; 603 - int32_t s32_field; 604 - uint64_t u64_field; 605 - int64_t s64_field; 606 - }; 607 - 608 - struct core_reloc_ints___err_wrong_sz_16 { 609 - uint8_t u8_field; 610 - int8_t s8_field; 611 - 612 - uint32_t u16_field; /* not 16-bit anymore */ 613 - int32_t s16_field; /* not 16-bit anymore */ 614 - 615 - uint32_t u32_field; 616 - int32_t s32_field; 617 - uint64_t u64_field; 618 - int64_t s64_field; 619 - }; 620 - 621 - struct core_reloc_ints___err_wrong_sz_32 { 622 - uint8_t u8_field; 623 - int8_t s8_field; 624 - uint16_t u16_field; 625 - int16_t s16_field; 626 - 627 - uint64_t u32_field; /* not 32-bit anymore */ 628 - int64_t s32_field; /* not 32-bit anymore */ 629 - 630 - uint64_t u64_field; 631 - int64_t s64_field; 632 - }; 633 - 634 - struct core_reloc_ints___err_wrong_sz_64 { 635 - uint8_t u8_field; 636 - int8_t s8_field; 637 - uint16_t u16_field; 638 - int16_t s16_field; 639 - uint32_t u32_field; 640 - int32_t s32_field; 641 - 642 - uint32_t u64_field; /* not 64-bit anymore */ 643 - int32_t s64_field; /* not 64-bit anymore */ 644 - }; 645 - 646 592 /* 647 593 * MISC 648 594 */ ··· 661 729 662 730 struct core_reloc_existence___err_wrong_struct_type { 663 731 int s; 732 + }; 733 + 734 + /* 735 + * BITFIELDS 736 + */ 737 + /* bitfield read results, all as plain integers */ 738 + struct core_reloc_bitfields_output { 739 + int64_t ub1; 740 + int64_t ub2; 741 + int64_t ub7; 742 + int64_t sb4; 743 + int64_t sb20; 744 + int64_t u32; 745 + int64_t s32; 746 + }; 747 + 748 + struct core_reloc_bitfields { 749 + /* unsigned bitfields */ 750 + uint8_t ub1: 1; 751 + uint8_t ub2: 2; 752 + uint32_t ub7: 7; 753 + /* signed bitfields */ 754 + int8_t sb4: 4; 755 + int32_t sb20: 20; 756 + /* non-bitfields */ 757 + uint32_t u32; 758 + int32_t s32; 759 + }; 760 + 761 + /* different bit sizes (both up and down) */ 762 + struct core_reloc_bitfields___bit_sz_change { 763 + /* unsigned bitfields */ 764 + uint16_t ub1: 3; /* 1 -> 3 */ 765 + uint32_t ub2: 20; /* 2 -> 20 */ 766 + uint8_t ub7: 1; /* 7 -> 1 */ 767 + /* signed bitfields */ 768 + int8_t sb4: 1; /* 4 -> 1 */ 769 + int32_t sb20: 30; /* 20 -> 30 */ 770 + /* non-bitfields */ 771 + uint16_t u32; /* 32 -> 16 */ 772 + int64_t s32; /* 32 -> 64 */ 773 + }; 774 + 775 + /* turn bitfield into non-bitfield and vice versa */ 776 + struct core_reloc_bitfields___bitfield_vs_int { 777 + uint64_t ub1; /* 3 -> 64 non-bitfield */ 778 + uint8_t ub2; /* 20 -> 8 non-bitfield */ 779 + int64_t ub7; /* 7 -> 64 non-bitfield signed */ 780 + int64_t sb4; /* 4 -> 64 non-bitfield signed */ 781 + uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */ 782 + int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */ 783 + uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */ 784 + }; 785 + 786 + struct core_reloc_bitfields___just_big_enough { 787 + uint64_t ub1: 4; 788 + uint64_t ub2: 60; /* packed tightly */ 789 + uint32_t ub7; 790 + uint32_t sb4; 791 + uint32_t sb20; 792 + uint32_t u32; 793 + uint32_t s32; 794 + } __attribute__((packed)) ; 795 + 796 + struct core_reloc_bitfields___err_too_big_bitfield { 797 + uint64_t ub1: 4; 798 + uint64_t ub2: 61; /* packed tightly */ 799 + uint32_t ub7; 800 + uint32_t sb4; 801 + uint32_t sb20; 802 + uint32_t u32; 803 + uint32_t s32; 804 + } __attribute__((packed)) ; 805 + 806 + /* 807 + * SIZE 808 + */ 809 + struct core_reloc_size_output { 810 + int int_sz; 811 + int struct_sz; 812 + int union_sz; 813 + int arr_sz; 814 + int arr_elem_sz; 815 + int ptr_sz; 816 + int enum_sz; 817 + }; 818 + 819 + struct core_reloc_size { 820 + int int_field; 821 + struct { int x; } struct_field; 822 + union { int x; } union_field; 823 + int arr_field[4]; 824 + void *ptr_field; 825 + enum { VALUE = 123 } enum_field; 826 + }; 827 + 828 + struct core_reloc_size___diff_sz { 829 + uint64_t int_field; 830 + struct { int x; int y; int z; } struct_field; 831 + union { int x; char bla[123]; } union_field; 832 + char arr_field[10]; 833 + void *ptr_field; 834 + enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; 664 835 };

+90

tools/testing/selftests/bpf/progs/fentry_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <linux/bpf.h> 4 + #include "bpf_helpers.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct test1 { 9 + ks32 a; 10 + }; 11 + static volatile __u64 test1_result; 12 + SEC("fentry/bpf_fentry_test1") 13 + int test1(struct test1 *ctx) 14 + { 15 + test1_result = ctx->a == 1; 16 + return 0; 17 + } 18 + 19 + struct test2 { 20 + ks32 a; 21 + ku64 b; 22 + }; 23 + static volatile __u64 test2_result; 24 + SEC("fentry/bpf_fentry_test2") 25 + int test2(struct test2 *ctx) 26 + { 27 + test2_result = ctx->a == 2 && ctx->b == 3; 28 + return 0; 29 + } 30 + 31 + struct test3 { 32 + ks8 a; 33 + ks32 b; 34 + ku64 c; 35 + }; 36 + static volatile __u64 test3_result; 37 + SEC("fentry/bpf_fentry_test3") 38 + int test3(struct test3 *ctx) 39 + { 40 + test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6; 41 + return 0; 42 + } 43 + 44 + struct test4 { 45 + void *a; 46 + ks8 b; 47 + ks32 c; 48 + ku64 d; 49 + }; 50 + static volatile __u64 test4_result; 51 + SEC("fentry/bpf_fentry_test4") 52 + int test4(struct test4 *ctx) 53 + { 54 + test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 && 55 + ctx->d == 10; 56 + return 0; 57 + } 58 + 59 + struct test5 { 60 + ku64 a; 61 + void *b; 62 + ks16 c; 63 + ks32 d; 64 + ku64 e; 65 + }; 66 + static volatile __u64 test5_result; 67 + SEC("fentry/bpf_fentry_test5") 68 + int test5(struct test5 *ctx) 69 + { 70 + test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 && 71 + ctx->d == 14 && ctx->e == 15; 72 + return 0; 73 + } 74 + 75 + struct test6 { 76 + ku64 a; 77 + void *b; 78 + ks16 c; 79 + ks32 d; 80 + void *e; 81 + ks64 f; 82 + }; 83 + static volatile __u64 test6_result; 84 + SEC("fentry/bpf_fentry_test6") 85 + int test6(struct test6 *ctx) 86 + { 87 + test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 && 88 + ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21; 89 + return 0; 90 + }

+91

tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <linux/bpf.h> 4 + #include "bpf_helpers.h" 5 + 6 + struct sk_buff { 7 + unsigned int len; 8 + }; 9 + 10 + struct args { 11 + struct sk_buff *skb; 12 + ks32 ret; 13 + }; 14 + static volatile __u64 test_result; 15 + SEC("fexit/test_pkt_access") 16 + int test_main(struct args *ctx) 17 + { 18 + struct sk_buff *skb = ctx->skb; 19 + int len; 20 + 21 + __builtin_preserve_access_index(({ 22 + len = skb->len; 23 + })); 24 + if (len != 74 || ctx->ret != 0) 25 + return 0; 26 + test_result = 1; 27 + return 0; 28 + } 29 + 30 + struct args_subprog1 { 31 + struct sk_buff *skb; 32 + ks32 ret; 33 + }; 34 + static volatile __u64 test_result_subprog1; 35 + SEC("fexit/test_pkt_access_subprog1") 36 + int test_subprog1(struct args_subprog1 *ctx) 37 + { 38 + struct sk_buff *skb = ctx->skb; 39 + int len; 40 + 41 + __builtin_preserve_access_index(({ 42 + len = skb->len; 43 + })); 44 + if (len != 74 || ctx->ret != 148) 45 + return 0; 46 + test_result_subprog1 = 1; 47 + return 0; 48 + } 49 + 50 + /* Though test_pkt_access_subprog2() is defined in C as: 51 + * static __attribute__ ((noinline)) 52 + * int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) 53 + * { 54 + * return skb->len * val; 55 + * } 56 + * llvm optimizations remove 'int val' argument and generate BPF assembly: 57 + * r0 = *(u32 *)(r1 + 0) 58 + * w0 <<= 1 59 + * exit 60 + * In such case the verifier falls back to conservative and 61 + * tracing program can access arguments and return value as u64 62 + * instead of accurate types. 63 + */ 64 + struct args_subprog2 { 65 + ku64 args[5]; 66 + ku64 ret; 67 + }; 68 + static volatile __u64 test_result_subprog2; 69 + SEC("fexit/test_pkt_access_subprog2") 70 + int test_subprog2(struct args_subprog2 *ctx) 71 + { 72 + struct sk_buff *skb = (void *)ctx->args[0]; 73 + __u64 ret; 74 + int len; 75 + 76 + bpf_probe_read_kernel(&len, sizeof(len), 77 + __builtin_preserve_access_index(&skb->len)); 78 + 79 + ret = ctx->ret; 80 + /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32 81 + * which randomizes upper 32 bits after BPF_ALU32 insns. 82 + * Hence after 'w0 <<= 1' upper bits of $rax are random. 83 + * That is expected and correct. Trim them. 84 + */ 85 + ret = (__u32) ret; 86 + if (len != 74 || ret != 148) 87 + return 0; 88 + test_result_subprog2 = 1; 89 + return 0; 90 + } 91 + char _license[] SEC("license") = "GPL";

+98

tools/testing/selftests/bpf/progs/fexit_test.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + /* Copyright (c) 2019 Facebook */ 3 + #include <linux/bpf.h> 4 + #include "bpf_helpers.h" 5 + 6 + char _license[] SEC("license") = "GPL"; 7 + 8 + struct test1 { 9 + ks32 a; 10 + ks32 ret; 11 + }; 12 + static volatile __u64 test1_result; 13 + SEC("fexit/bpf_fentry_test1") 14 + int test1(struct test1 *ctx) 15 + { 16 + test1_result = ctx->a == 1 && ctx->ret == 2; 17 + return 0; 18 + } 19 + 20 + struct test2 { 21 + ks32 a; 22 + ku64 b; 23 + ks32 ret; 24 + }; 25 + static volatile __u64 test2_result; 26 + SEC("fexit/bpf_fentry_test2") 27 + int test2(struct test2 *ctx) 28 + { 29 + test2_result = ctx->a == 2 && ctx->b == 3 && ctx->ret == 5; 30 + return 0; 31 + } 32 + 33 + struct test3 { 34 + ks8 a; 35 + ks32 b; 36 + ku64 c; 37 + ks32 ret; 38 + }; 39 + static volatile __u64 test3_result; 40 + SEC("fexit/bpf_fentry_test3") 41 + int test3(struct test3 *ctx) 42 + { 43 + test3_result = ctx->a == 4 && ctx->b == 5 && ctx->c == 6 && 44 + ctx->ret == 15; 45 + return 0; 46 + } 47 + 48 + struct test4 { 49 + void *a; 50 + ks8 b; 51 + ks32 c; 52 + ku64 d; 53 + ks32 ret; 54 + }; 55 + static volatile __u64 test4_result; 56 + SEC("fexit/bpf_fentry_test4") 57 + int test4(struct test4 *ctx) 58 + { 59 + test4_result = ctx->a == (void *)7 && ctx->b == 8 && ctx->c == 9 && 60 + ctx->d == 10 && ctx->ret == 34; 61 + return 0; 62 + } 63 + 64 + struct test5 { 65 + ku64 a; 66 + void *b; 67 + ks16 c; 68 + ks32 d; 69 + ku64 e; 70 + ks32 ret; 71 + }; 72 + static volatile __u64 test5_result; 73 + SEC("fexit/bpf_fentry_test5") 74 + int test5(struct test5 *ctx) 75 + { 76 + test5_result = ctx->a == 11 && ctx->b == (void *)12 && ctx->c == 13 && 77 + ctx->d == 14 && ctx->e == 15 && ctx->ret == 65; 78 + return 0; 79 + } 80 + 81 + struct test6 { 82 + ku64 a; 83 + void *b; 84 + ks16 c; 85 + ks32 d; 86 + void *e; 87 + ks64 f; 88 + ks32 ret; 89 + }; 90 + static volatile __u64 test6_result; 91 + SEC("fexit/bpf_fentry_test6") 92 + int test6(struct test6 *ctx) 93 + { 94 + test6_result = ctx->a == 16 && ctx->b == (void *)17 && ctx->c == 18 && 95 + ctx->d == 19 && ctx->e == (void *)20 && ctx->f == 21 && 96 + ctx->ret == 111; 97 + return 0; 98 + }

+72 -5

tools/testing/selftests/bpf/progs/kfree_skb.c

··· 1 1 // SPDX-License-Identifier: GPL-2.0 2 2 // Copyright (c) 2019 Facebook 3 3 #include <linux/bpf.h> 4 + #include <stdbool.h> 4 5 #include "bpf_helpers.h" 5 6 #include "bpf_endian.h" 6 7 ··· 44 43 refcount_t users; 45 44 unsigned char *data; 46 45 char __pkt_type_offset[0]; 46 + char cb[48]; 47 47 }; 48 48 49 49 /* copy arguments from ··· 59 57 void *location; 60 58 }; 61 59 60 + struct meta { 61 + int ifindex; 62 + __u32 cb32_0; 63 + __u8 cb8_0; 64 + }; 65 + 62 66 SEC("tp_btf/kfree_skb") 63 67 int trace_kfree_skb(struct trace_kfree_skb *ctx) 64 68 { 65 69 struct sk_buff *skb = ctx->skb; 66 70 struct net_device *dev; 67 - int ifindex; 68 71 struct callback_head *ptr; 69 72 void *func; 70 73 int users; 71 74 unsigned char *data; 72 75 unsigned short pkt_data; 76 + struct meta meta = {}; 73 77 char pkt_type; 78 + __u32 *cb32; 79 + __u8 *cb8; 74 80 75 81 __builtin_preserve_access_index(({ 76 82 users = skb->users.refs.counter; 77 83 data = skb->data; 78 84 dev = skb->dev; 79 - ifindex = dev->ifindex; 80 85 ptr = dev->ifalias->rcuhead.next; 81 86 func = ptr->func; 87 + cb8 = (__u8 *)&skb->cb; 88 + cb32 = (__u32 *)&skb->cb; 82 89 })); 90 + 91 + meta.ifindex = _(dev->ifindex); 92 + meta.cb8_0 = cb8[8]; 93 + meta.cb32_0 = cb32[2]; 83 94 84 95 bpf_probe_read_kernel(&pkt_type, sizeof(pkt_type), _(&skb->__pkt_type_offset)); 85 96 pkt_type &= 7; ··· 105 90 _(skb->len), users, pkt_type); 106 91 bpf_printk("skb->queue_mapping %d\n", _(skb->queue_mapping)); 107 92 bpf_printk("dev->ifindex %d data %llx pkt_data %x\n", 108 - ifindex, data, pkt_data); 93 + meta.ifindex, data, pkt_data); 94 + bpf_printk("cb8_0:%x cb32_0:%x\n", meta.cb8_0, meta.cb32_0); 109 95 110 - if (users != 1 || pkt_data != bpf_htons(0x86dd) || ifindex != 1) 96 + if (users != 1 || pkt_data != bpf_htons(0x86dd) || meta.ifindex != 1) 111 97 /* raw tp ignores return value */ 112 98 return 0; 113 99 114 100 /* send first 72 byte of the packet to user space */ 115 101 bpf_skb_output(skb, &perf_buf_map, (72ull << 32) | BPF_F_CURRENT_CPU, 116 - &ifindex, sizeof(ifindex)); 102 + &meta, sizeof(meta)); 103 + return 0; 104 + } 105 + 106 + static volatile struct { 107 + bool fentry_test_ok; 108 + bool fexit_test_ok; 109 + } result; 110 + 111 + struct eth_type_trans_args { 112 + struct sk_buff *skb; 113 + struct net_device *dev; 114 + unsigned short protocol; /* return value available to fexit progs */ 115 + }; 116 + 117 + SEC("fentry/eth_type_trans") 118 + int fentry_eth_type_trans(struct eth_type_trans_args *ctx) 119 + { 120 + struct sk_buff *skb = ctx->skb; 121 + struct net_device *dev = ctx->dev; 122 + int len, ifindex; 123 + 124 + __builtin_preserve_access_index(({ 125 + len = skb->len; 126 + ifindex = dev->ifindex; 127 + })); 128 + 129 + /* fentry sees full packet including L2 header */ 130 + if (len != 74 || ifindex != 1) 131 + return 0; 132 + result.fentry_test_ok = true; 133 + return 0; 134 + } 135 + 136 + SEC("fexit/eth_type_trans") 137 + int fexit_eth_type_trans(struct eth_type_trans_args *ctx) 138 + { 139 + struct sk_buff *skb = ctx->skb; 140 + struct net_device *dev = ctx->dev; 141 + int len, ifindex; 142 + 143 + __builtin_preserve_access_index(({ 144 + len = skb->len; 145 + ifindex = dev->ifindex; 146 + })); 147 + 148 + /* fexit sees packet without L2 header that eth_type_trans should have 149 + * consumed. 150 + */ 151 + if (len != 60 || ctx->protocol != bpf_htons(0x86dd) || ifindex != 1) 152 + return 0; 153 + result.fexit_test_ok = true; 117 154 return 0; 118 155 }

+2 -2

tools/testing/selftests/bpf/progs/test_btf_haskv.c

··· 26 26 }; 27 27 28 28 __attribute__((noinline)) 29 - static int test_long_fname_2(struct dummy_tracepoint_args *arg) 29 + int test_long_fname_2(struct dummy_tracepoint_args *arg) 30 30 { 31 31 struct ipv_counts *counts; 32 32 int key = 0; ··· 44 44 } 45 45 46 46 __attribute__((noinline)) 47 - static int test_long_fname_1(struct dummy_tracepoint_args *arg) 47 + int test_long_fname_1(struct dummy_tracepoint_args *arg) 48 48 { 49 49 return test_long_fname_2(arg); 50 50 }

+2 -2

tools/testing/selftests/bpf/progs/test_btf_newkv.c

··· 34 34 }; 35 35 36 36 __attribute__((noinline)) 37 - static int test_long_fname_2(struct dummy_tracepoint_args *arg) 37 + int test_long_fname_2(struct dummy_tracepoint_args *arg) 38 38 { 39 39 struct ipv_counts *counts; 40 40 int key = 0; ··· 57 57 } 58 58 59 59 __attribute__((noinline)) 60 - static int test_long_fname_1(struct dummy_tracepoint_args *arg) 60 + int test_long_fname_1(struct dummy_tracepoint_args *arg) 61 61 { 62 62 return test_long_fname_2(arg); 63 63 }

+2 -2

tools/testing/selftests/bpf/progs/test_btf_nokv.c

··· 23 23 }; 24 24 25 25 __attribute__((noinline)) 26 - static int test_long_fname_2(struct dummy_tracepoint_args *arg) 26 + int test_long_fname_2(struct dummy_tracepoint_args *arg) 27 27 { 28 28 struct ipv_counts *counts; 29 29 int key = 0; ··· 41 41 } 42 42 43 43 __attribute__((noinline)) 44 - static int test_long_fname_1(struct dummy_tracepoint_args *arg) 44 + int test_long_fname_1(struct dummy_tracepoint_args *arg) 45 45 { 46 46 return test_long_fname_2(arg); 47 47 }

+63

tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_direct.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2019 Facebook 3 + 4 + #include <linux/bpf.h> 5 + #include <stdint.h> 6 + #include "bpf_helpers.h" 7 + #include "bpf_core_read.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + static volatile struct data { 12 + char in[256]; 13 + char out[256]; 14 + } data; 15 + 16 + struct core_reloc_bitfields { 17 + /* unsigned bitfields */ 18 + uint8_t ub1: 1; 19 + uint8_t ub2: 2; 20 + uint32_t ub7: 7; 21 + /* signed bitfields */ 22 + int8_t sb4: 4; 23 + int32_t sb20: 20; 24 + /* non-bitfields */ 25 + uint32_t u32; 26 + int32_t s32; 27 + }; 28 + 29 + /* bitfield read results, all as plain integers */ 30 + struct core_reloc_bitfields_output { 31 + int64_t ub1; 32 + int64_t ub2; 33 + int64_t ub7; 34 + int64_t sb4; 35 + int64_t sb20; 36 + int64_t u32; 37 + int64_t s32; 38 + }; 39 + 40 + struct pt_regs; 41 + 42 + struct trace_sys_enter { 43 + struct pt_regs *regs; 44 + long id; 45 + }; 46 + 47 + SEC("tp_btf/sys_enter") 48 + int test_core_bitfields_direct(void *ctx) 49 + { 50 + struct core_reloc_bitfields *in = (void *)&data.in; 51 + struct core_reloc_bitfields_output *out = (void *)&data.out; 52 + 53 + out->ub1 = BPF_CORE_READ_BITFIELD(in, ub1); 54 + out->ub2 = BPF_CORE_READ_BITFIELD(in, ub2); 55 + out->ub7 = BPF_CORE_READ_BITFIELD(in, ub7); 56 + out->sb4 = BPF_CORE_READ_BITFIELD(in, sb4); 57 + out->sb20 = BPF_CORE_READ_BITFIELD(in, sb20); 58 + out->u32 = BPF_CORE_READ_BITFIELD(in, u32); 59 + out->s32 = BPF_CORE_READ_BITFIELD(in, s32); 60 + 61 + return 0; 62 + } 63 +

+57

tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2019 Facebook 3 + 4 + #include <linux/bpf.h> 5 + #include <stdint.h> 6 + #include "bpf_helpers.h" 7 + #include "bpf_core_read.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + static volatile struct data { 12 + char in[256]; 13 + char out[256]; 14 + } data; 15 + 16 + struct core_reloc_bitfields { 17 + /* unsigned bitfields */ 18 + uint8_t ub1: 1; 19 + uint8_t ub2: 2; 20 + uint32_t ub7: 7; 21 + /* signed bitfields */ 22 + int8_t sb4: 4; 23 + int32_t sb20: 20; 24 + /* non-bitfields */ 25 + uint32_t u32; 26 + int32_t s32; 27 + }; 28 + 29 + /* bitfield read results, all as plain integers */ 30 + struct core_reloc_bitfields_output { 31 + int64_t ub1; 32 + int64_t ub2; 33 + int64_t ub7; 34 + int64_t sb4; 35 + int64_t sb20; 36 + int64_t u32; 37 + int64_t s32; 38 + }; 39 + 40 + SEC("raw_tracepoint/sys_enter") 41 + int test_core_bitfields(void *ctx) 42 + { 43 + struct core_reloc_bitfields *in = (void *)&data.in; 44 + struct core_reloc_bitfields_output *out = (void *)&data.out; 45 + uint64_t res; 46 + 47 + out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1); 48 + out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2); 49 + out->ub7 = BPF_CORE_READ_BITFIELD_PROBED(in, ub7); 50 + out->sb4 = BPF_CORE_READ_BITFIELD_PROBED(in, sb4); 51 + out->sb20 = BPF_CORE_READ_BITFIELD_PROBED(in, sb20); 52 + out->u32 = BPF_CORE_READ_BITFIELD_PROBED(in, u32); 53 + out->s32 = BPF_CORE_READ_BITFIELD_PROBED(in, s32); 54 + 55 + return 0; 56 + } 57 +

+51

tools/testing/selftests/bpf/progs/test_core_reloc_size.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2019 Facebook 3 + 4 + #include <linux/bpf.h> 5 + #include <stdint.h> 6 + #include "bpf_helpers.h" 7 + #include "bpf_core_read.h" 8 + 9 + char _license[] SEC("license") = "GPL"; 10 + 11 + static volatile struct data { 12 + char in[256]; 13 + char out[256]; 14 + } data; 15 + 16 + struct core_reloc_size_output { 17 + int int_sz; 18 + int struct_sz; 19 + int union_sz; 20 + int arr_sz; 21 + int arr_elem_sz; 22 + int ptr_sz; 23 + int enum_sz; 24 + }; 25 + 26 + struct core_reloc_size { 27 + int int_field; 28 + struct { int x; } struct_field; 29 + union { int x; } union_field; 30 + int arr_field[4]; 31 + void *ptr_field; 32 + enum { VALUE = 123 } enum_field; 33 + }; 34 + 35 + SEC("raw_tracepoint/sys_enter") 36 + int test_core_size(void *ctx) 37 + { 38 + struct core_reloc_size *in = (void *)&data.in; 39 + struct core_reloc_size_output *out = (void *)&data.out; 40 + 41 + out->int_sz = bpf_core_field_size(in->int_field); 42 + out->struct_sz = bpf_core_field_size(in->struct_field); 43 + out->union_sz = bpf_core_field_size(in->union_field); 44 + out->arr_sz = bpf_core_field_size(in->arr_field); 45 + out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]); 46 + out->ptr_sz = bpf_core_field_size(in->ptr_field); 47 + out->enum_sz = bpf_core_field_size(in->enum_field); 48 + 49 + return 0; 50 + } 51 +

+45

tools/testing/selftests/bpf/progs/test_mmap.c

··· 1 + // SPDX-License-Identifier: GPL-2.0 2 + // Copyright (c) 2019 Facebook 3 + 4 + #include <linux/bpf.h> 5 + #include <stdint.h> 6 + #include "bpf_helpers.h" 7 + 8 + char _license[] SEC("license") = "GPL"; 9 + 10 + struct { 11 + __uint(type, BPF_MAP_TYPE_ARRAY); 12 + __uint(max_entries, 512 * 4); /* at least 4 pages of data */ 13 + __uint(map_flags, BPF_F_MMAPABLE); 14 + __type(key, __u32); 15 + __type(value, __u64); 16 + } data_map SEC(".maps"); 17 + 18 + static volatile __u64 in_val; 19 + static volatile __u64 out_val; 20 + 21 + SEC("raw_tracepoint/sys_enter") 22 + int test_mmap(void *ctx) 23 + { 24 + int zero = 0, one = 1, two = 2, far = 1500; 25 + __u64 val, *p; 26 + 27 + out_val = in_val; 28 + 29 + /* data_map[2] = in_val; */ 30 + bpf_map_update_elem(&data_map, &two, (const void *)&in_val, 0); 31 + 32 + /* data_map[1] = data_map[0] * 2; */ 33 + p = bpf_map_lookup_elem(&data_map, &zero); 34 + if (p) { 35 + val = (*p) * 2; 36 + bpf_map_update_elem(&data_map, &one, &val, 0); 37 + } 38 + 39 + /* data_map[far] = in_val * 3; */ 40 + val = in_val * 3; 41 + bpf_map_update_elem(&data_map, &far, &val, 0); 42 + 43 + return 0; 44 + } 45 +

+1 -1

tools/testing/selftests/bpf/progs/test_pinning.c

··· 21 21 } nopinmap SEC(".maps"); 22 22 23 23 struct { 24 - __uint(type, BPF_MAP_TYPE_ARRAY); 24 + __uint(type, BPF_MAP_TYPE_HASH); 25 25 __uint(max_entries, 1); 26 26 __type(key, __u32); 27 27 __type(value, __u64);

+36 -2

tools/testing/selftests/bpf/progs/test_pkt_access.c

··· 17 17 #define barrier() __asm__ __volatile__("": : :"memory") 18 18 int _version SEC("version") = 1; 19 19 20 - SEC("test1") 21 - int process(struct __sk_buff *skb) 20 + /* llvm will optimize both subprograms into exactly the same BPF assembly 21 + * 22 + * Disassembly of section .text: 23 + * 24 + * 0000000000000000 test_pkt_access_subprog1: 25 + * ; return skb->len * 2; 26 + * 0: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) 27 + * 1: 64 00 00 00 01 00 00 00 w0 <<= 1 28 + * 2: 95 00 00 00 00 00 00 00 exit 29 + * 30 + * 0000000000000018 test_pkt_access_subprog2: 31 + * ; return skb->len * val; 32 + * 3: 61 10 00 00 00 00 00 00 r0 = *(u32 *)(r1 + 0) 33 + * 4: 64 00 00 00 01 00 00 00 w0 <<= 1 34 + * 5: 95 00 00 00 00 00 00 00 exit 35 + * 36 + * Which makes it an interesting test for BTF-enabled verifier. 37 + */ 38 + static __attribute__ ((noinline)) 39 + int test_pkt_access_subprog1(volatile struct __sk_buff *skb) 40 + { 41 + return skb->len * 2; 42 + } 43 + 44 + static __attribute__ ((noinline)) 45 + int test_pkt_access_subprog2(int val, volatile struct __sk_buff *skb) 46 + { 47 + return skb->len * val; 48 + } 49 + 50 + SEC("classifier/test_pkt_access") 51 + int test_pkt_access(struct __sk_buff *skb) 22 52 { 23 53 void *data_end = (void *)(long)skb->data_end; 24 54 void *data = (void *)(long)skb->data; ··· 78 48 tcp = (struct tcphdr *)((void *)(ip6h) + ihl_len); 79 49 } 80 50 51 + if (test_pkt_access_subprog1(skb) != skb->len * 2) 52 + return TC_ACT_SHOT; 53 + if (test_pkt_access_subprog2(2, skb) != skb->len * 2) 54 + return TC_ACT_SHOT; 81 55 if (tcp) { 82 56 if (((void *)(tcp) + 20) > data_end || proto != 6) 83 57 return TC_ACT_SHOT;

+3 -1

tools/testing/selftests/bpf/progs/test_seg6_loop.c

··· 132 132 *pad_off = 0; 133 133 134 134 // we can only go as far as ~10 TLVs due to the BPF max stack size 135 + // workaround: define induction variable "i" as "long" instead 136 + // of "int" to prevent alu32 sub-register spilling. 135 137 #pragma clang loop unroll(disable) 136 - for (int i = 0; i < 100; i++) { 138 + for (long i = 0; i < 100; i++) { 137 139 struct sr6_tlv_t tlv; 138 140 139 141 if (cur_off == *tlv_off)

+4 -1

tools/testing/selftests/bpf/progs/test_sysctl_loop1.c

··· 44 44 unsigned long tcp_mem[TCP_MEM_LOOPS] = {}; 45 45 char value[MAX_VALUE_STR_LEN]; 46 46 unsigned char i, off = 0; 47 - int ret; 47 + /* a workaround to prevent compiler from generating 48 + * codes verifier cannot handle yet. 49 + */ 50 + volatile int ret; 48 51 49 52 if (ctx->write) 50 53 return 0;

+5

tools/testing/selftests/bpf/test_tc_tunnel.sh

··· 62 62 if [[ -f "${infile}" ]]; then 63 63 rm "${infile}" 64 64 fi 65 + 66 + if [[ -n $server_pid ]]; then 67 + kill $server_pid 2> /dev/null 68 + fi 65 69 } 66 70 67 71 server_listen() { ··· 81 77 82 78 verify_data() { 83 79 wait "${server_pid}" 80 + server_pid= 84 81 # sha1sum returns two fields [sha1] [filepath] 85 82 # convert to bash array and access first elem 86 83 insum=($(sha1sum ${infile}))