jcs's openbsd hax
openbsd
at jcs 9154 lines 238 kB view raw
1/* $OpenBSD: pf.c,v 1.1236 2026/02/05 03:26:00 dlg Exp $ */ 2 3/* 4 * Copyright (c) 2001 Daniel Hartmeier 5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * - Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * - Redistributions in binary form must reproduce the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer in the documentation and/or other materials provided 17 * with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 30 * POSSIBILITY OF SUCH DAMAGE. 31 * 32 * Effort sponsored in part by the Defense Advanced Research Projects 33 * Agency (DARPA) and Air Force Research Laboratory, Air Force 34 * Materiel Command, USAF, under agreement number F30602-01-2-0537. 35 * 36 */ 37 38#include "carp.h" 39#include "pflog.h" 40#include "pfsync.h" 41#include "pflow.h" 42 43#include <sys/param.h> 44#include <sys/systm.h> 45#include <sys/mbuf.h> 46#include <sys/socket.h> 47#include <sys/socketvar.h> 48#include <sys/time.h> 49#include <sys/pool.h> 50#include <sys/proc.h> 51#include <sys/rwlock.h> 52#include <sys/percpu.h> 53#include <sys/syslog.h> 54 55#include <crypto/sha2.h> 56 57#include <net/if.h> 58#include <net/if_var.h> 59#include <net/if_types.h> 60#include <net/route.h> 61#include <net/toeplitz.h> 62 63#include <netinet/in.h> 64#include <netinet/in_var.h> 65#include <netinet/ip.h> 66#include <netinet/in_pcb.h> 67#include <netinet/ip_var.h> 68#include <netinet/ip_icmp.h> 69#include <netinet/tcp.h> 70#include <netinet/tcp_seq.h> 71#include <netinet/tcp_timer.h> 72#include <netinet/tcp_var.h> 73#include <netinet/tcp_fsm.h> 74#include <netinet/udp.h> 75#include <netinet/udp_var.h> 76#include <netinet/ip_divert.h> 77 78#ifdef INET6 79#include <netinet6/in6_var.h> 80#include <netinet/ip6.h> 81#include <netinet6/ip6_var.h> 82#include <netinet/icmp6.h> 83#endif /* INET6 */ 84 85#include <net/pfvar.h> 86#include <net/pfvar_priv.h> 87 88#if NPFLOW > 0 89#include <net/if_pflow.h> 90#endif /* NPFLOW > 0 */ 91 92#if NPFSYNC > 0 93#include <net/if_pfsync.h> 94#endif /* NPFSYNC > 0 */ 95 96/* 97 * Global variables 98 */ 99struct pf_state_tree pf_statetbl; 100struct pf_queuehead pf_queues[2]; 101struct pf_queuehead *pf_queues_active; 102struct pf_queuehead *pf_queues_inactive; 103 104struct pf_status pf_status; 105static struct cpumem *pf_status_fcounters; 106 107struct mutex pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); 108 109int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */ 110 111SHA2_CTX pf_tcp_secret_ctx; 112u_char pf_tcp_secret[16]; 113int pf_tcp_secret_init; 114int pf_tcp_iss_off; 115 116enum pf_test_status { 117 PF_TEST_FAIL = -1, 118 PF_TEST_OK, 119 PF_TEST_QUICK 120}; 121 122struct pf_test_ctx { 123 struct pf_pdesc *pd; 124 struct pf_rule_actions act; 125 u_int8_t icmpcode; 126 u_int8_t icmptype; 127 int icmp_dir; 128 int state_icmp; 129 int tag; 130 int limiter_drop; 131 u_short reason; 132 struct pf_rule_item *ri; 133 struct pf_src_node *sns[PF_SN_MAX]; 134 struct pf_rule_slist rules; 135 struct pf_rule *nr; 136 struct pf_rule **rm; 137 struct pf_rule *a; 138 struct pf_rule **am; 139 struct pf_ruleset **rsm; 140 struct pf_ruleset *arsm; 141 struct pf_ruleset *aruleset; 142 struct tcphdr *th; 143 struct pf_statelim *statelim; 144 struct pf_sourcelim *sourcelim; 145 struct pf_source *source; 146}; 147 148struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl; 149struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl; 150struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl; 151struct pool pf_statelim_pl, pf_sourcelim_pl, pf_source_pl; 152struct pool pf_state_link_pl; 153 154void pf_add_threshold(struct pf_threshold *); 155int pf_check_threshold(struct pf_threshold *); 156int pf_check_tcp_cksum(struct mbuf *, int, int, 157 sa_family_t); 158__inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t, 159 u_int8_t); 160void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *, 161 const struct pf_addr *, sa_family_t, u_int8_t); 162int pf_modulate_sack(struct pf_pdesc *, 163 struct pf_state_peer *); 164int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *, 165 u_int16_t *, u_int16_t *); 166int pf_change_icmp_af(struct mbuf *, int, 167 struct pf_pdesc *, struct pf_pdesc *, 168 struct pf_addr *, struct pf_addr *, sa_family_t, 169 sa_family_t); 170int pf_translate_a(struct pf_pdesc *, struct pf_addr *, 171 struct pf_addr *); 172void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *, 173 u_int16_t *, struct pf_addr *, struct pf_addr *, 174 u_int16_t); 175int pf_translate_icmp_af(struct pf_pdesc*, int, void *); 176void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int, 177 sa_family_t, struct pf_rule *, u_int); 178void pf_detach_state(struct pf_state *); 179struct pf_state_key *pf_state_key_attach(struct pf_state_key *, 180 struct pf_state *, int); 181void pf_state_key_detach(struct pf_state *, int); 182u_int32_t pf_tcp_iss(struct pf_pdesc *); 183void pf_rule_to_actions(struct pf_rule *, 184 struct pf_rule_actions *); 185int pf_test_rule(struct pf_pdesc *, struct pf_rule **, 186 struct pf_state **, struct pf_rule **, 187 struct pf_ruleset **, u_short *); 188static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *, 189 struct pf_rule *, struct pf_rule *, 190 struct pf_state_key **, struct pf_state_key **, 191 int *, struct pf_state **, int, 192 struct pf_rule_slist *, struct pf_rule_actions *, 193 struct pf_src_node **, struct pf_test_ctx *); 194static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *, 195 int, struct pf_addr *, int, struct pf_addr *, 196 int, int); 197int pf_state_key_setup(struct pf_pdesc *, struct 198 pf_state_key **, struct pf_state_key **, int); 199int pf_tcp_track_full(struct pf_pdesc *, 200 struct pf_state **, u_short *, int *, int); 201int pf_tcp_track_sloppy(struct pf_pdesc *, 202 struct pf_state **, u_short *); 203static __inline int pf_synproxy_ack(struct pf_rule *, struct pf_pdesc *, 204 struct pf_state **, struct pf_rule_actions *); 205static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **, 206 u_short *); 207int pf_test_state(struct pf_pdesc *, struct pf_state **, 208 u_short *); 209int pf_icmp_state_lookup(struct pf_pdesc *, 210 struct pf_state_key_cmp *, struct pf_state **, 211 u_int16_t, u_int16_t, int, int *, int, int); 212int pf_test_state_icmp(struct pf_pdesc *, 213 struct pf_state **, u_short *); 214u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int, 215 uint16_t, uint16_t); 216static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *, 217 sa_family_t, struct pf_src_node **); 218struct pf_divert *pf_get_divert(struct mbuf *); 219int pf_walk_option(struct pf_pdesc *, struct ip *, 220 int, int, u_short *); 221int pf_walk_header(struct pf_pdesc *, struct ip *, 222 u_short *); 223int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *, 224 int, int, u_short *); 225int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *, 226 u_short *); 227void pf_print_state_parts(struct pf_state *, 228 struct pf_state_key *, struct pf_state_key *); 229int pf_addr_wrap_neq(struct pf_addr_wrap *, 230 struct pf_addr_wrap *); 231int pf_compare_state_keys(struct pf_state_key *, 232 struct pf_state_key *, struct pfi_kif *, u_int); 233u_int16_t pf_pkt_hash(sa_family_t, uint8_t, 234 const struct pf_addr *, const struct pf_addr *, 235 uint16_t, uint16_t); 236int pf_find_state(struct pf_pdesc *, 237 struct pf_state_key_cmp *, struct pf_state **); 238int pf_src_connlimit(struct pf_state **); 239int pf_match_rcvif(struct mbuf *, struct pf_rule *); 240enum pf_test_status pf_match_rule(struct pf_test_ctx *, 241 struct pf_ruleset *); 242void pf_counters_inc(int, struct pf_pdesc *, 243 struct pf_state *, struct pf_rule *, 244 struct pf_rule *); 245 246int pf_state_insert(struct pfi_kif *, 247 struct pf_state_key **, struct pf_state_key **, 248 struct pf_state *); 249 250int pf_state_key_isvalid(struct pf_state_key *); 251struct pf_state_key *pf_state_key_ref(struct pf_state_key *); 252void pf_state_key_unref(struct pf_state_key *); 253void pf_state_key_link_reverse(struct pf_state_key *, 254 struct pf_state_key *); 255void pf_state_key_unlink_reverse(struct pf_state_key *); 256void pf_state_key_link_inpcb(struct pf_state_key *, 257 struct inpcb *); 258void pf_state_key_unlink_inpcb(struct pf_state_key *); 259void pf_pktenqueue_delayed(void *); 260int32_t pf_state_expires(const struct pf_state *, uint8_t); 261 262#if NPFLOG > 0 263void pf_log_matches(struct pf_pdesc *, struct pf_rule *, 264 struct pf_rule *, struct pf_ruleset *, 265 struct pf_rule_slist *); 266#endif /* NPFLOG > 0 */ 267 268extern struct pool pfr_ktable_pl; 269extern struct pool pfr_kentry_pl; 270 271struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = { 272 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT }, 273 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT }, 274 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT }, 275 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT }, 276 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT }, 277 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS }, 278 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT } 279}; 280 281#define BOUND_IFACE(r, k) \ 282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all 283 284#define STATE_INC_COUNTERS(s) \ 285 do { \ 286 struct pf_rule_item *mrm; \ 287 s->rule.ptr->states_cur++; \ 288 s->rule.ptr->states_tot++; \ 289 if (s->anchor.ptr != NULL) { \ 290 s->anchor.ptr->states_cur++; \ 291 s->anchor.ptr->states_tot++; \ 292 } \ 293 SLIST_FOREACH(mrm, &s->match_rules, entry) \ 294 mrm->r->states_cur++; \ 295 } while (0) 296 297static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *); 298static inline int pf_state_compare_key(const struct pf_state_key *, 299 const struct pf_state_key *); 300static inline int pf_state_compare_id(const struct pf_state *, 301 const struct pf_state *); 302#ifdef INET6 303static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t); 304static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t); 305#endif /* INET6 */ 306static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t); 307 308static inline int 309pf_statelim_id_cmp(const struct pf_statelim *a, const struct pf_statelim *b) 310{ 311 if (a->pfstlim_id > b->pfstlim_id) 312 return (1); 313 if (a->pfstlim_id < b->pfstlim_id) 314 return (-1); 315 316 return (0); 317} 318 319RBT_GENERATE(pf_statelim_id_tree, pf_statelim, pfstlim_id_tree, 320 pf_statelim_id_cmp); 321 322static inline int 323pf_statelim_nm_cmp(const struct pf_statelim *a, const struct pf_statelim *b) 324{ 325 return (strncmp(a->pfstlim_nm, b->pfstlim_nm, sizeof(a->pfstlim_nm))); 326} 327 328RBT_GENERATE(pf_statelim_nm_tree, pf_statelim, pfstlim_nm_tree, 329 pf_statelim_nm_cmp); 330 331struct pf_statelim_id_tree pf_statelim_id_tree_active = 332 RBT_INITIALIZER(pf_statelim_id_tree_active); 333struct pf_statelim_list pf_statelim_list_active = 334 TAILQ_HEAD_INITIALIZER(pf_statelim_list_active); 335 336struct pf_statelim_id_tree pf_statelim_id_tree_inactive = 337 RBT_INITIALIZER(pf_statelim_id_tree_inactive); 338struct pf_statelim_nm_tree pf_statelim_nm_tree_inactive = 339 RBT_INITIALIZER(pf_statelim_nm_tree_inactive); 340struct pf_statelim_list pf_statelim_list_inactive = 341 TAILQ_HEAD_INITIALIZER(pf_statelim_list_inactive); 342 343static inline int 344pf_sourcelim_id_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b) 345{ 346 if (a->pfsrlim_id > b->pfsrlim_id) 347 return (1); 348 if (a->pfsrlim_id < b->pfsrlim_id) 349 return (-1); 350 351 return (0); 352} 353 354RBT_GENERATE(pf_sourcelim_id_tree, pf_sourcelim, pfsrlim_id_tree, 355 pf_sourcelim_id_cmp); 356 357static inline int 358pf_sourcelim_nm_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b) 359{ 360 return (strncmp(a->pfsrlim_nm, b->pfsrlim_nm, sizeof(a->pfsrlim_nm))); 361} 362 363RBT_GENERATE(pf_sourcelim_nm_tree, pf_sourcelim, pfsrlim_nm_tree, 364 pf_sourcelim_nm_cmp); 365 366static inline int 367pf_source_cmp(const struct pf_source *a, const struct pf_source *b) 368{ 369 if (a->pfsr_af > b->pfsr_af) 370 return (1); 371 if (a->pfsr_af < b->pfsr_af) 372 return (-1); 373 if (a->pfsr_rdomain > b->pfsr_rdomain) 374 return (1); 375 if (a->pfsr_rdomain < b->pfsr_rdomain) 376 return (-1); 377 378 return (pf_addr_compare(&a->pfsr_addr, &b->pfsr_addr, a->pfsr_af)); 379} 380 381RBT_GENERATE(pf_source_tree, pf_source, pfsr_tree, pf_source_cmp); 382 383static inline int 384pf_source_ioc_cmp(const struct pf_source *a, const struct pf_source *b) 385{ 386 size_t i; 387 388 if (a->pfsr_af > b->pfsr_af) 389 return (1); 390 if (a->pfsr_af < b->pfsr_af) 391 return (-1); 392 if (a->pfsr_rdomain > b->pfsr_rdomain) 393 return (1); 394 if (a->pfsr_rdomain < b->pfsr_rdomain) 395 return (-1); 396 397 for (i = 0; i < nitems(a->pfsr_addr.addr32); i++) { 398 uint32_t wa = ntohl(a->pfsr_addr.addr32[i]); 399 uint32_t wb = ntohl(b->pfsr_addr.addr32[i]); 400 401 if (wa > wb) 402 return (1); 403 if (wa < wb) 404 return (-1); 405 } 406 407 return (0); 408} 409 410RBT_GENERATE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, pf_source_ioc_cmp); 411 412struct pf_sourcelim_id_tree pf_sourcelim_id_tree_active = 413 RBT_INITIALIZER(pf_sourcelim_id_tree_active); 414struct pf_sourcelim_list pf_sourcelim_list_active = 415 TAILQ_HEAD_INITIALIZER(pf_sourcelim_list_active); 416 417struct pf_sourcelim_id_tree pf_sourcelim_id_tree_inactive = 418 RBT_INITIALIZER(pf_sourcelim_id_tree_inactive); 419struct pf_sourcelim_nm_tree pf_sourcelim_nm_tree_inactive = 420 RBT_INITIALIZER(pf_sourcelim_nm_tree_inactive); 421struct pf_sourcelim_list pf_sourcelim_list_inactive = 422 TAILQ_HEAD_INITIALIZER(pf_sourcelim_list_inactive); 423 424static inline struct pf_statelim * 425pf_statelim_find(uint32_t id) 426{ 427 struct pf_statelim key; 428 429 /* only the id is used in cmp, so don't have to zero all the things */ 430 key.pfstlim_id = id; 431 432 return (RBT_FIND(pf_statelim_id_tree, 433 &pf_statelim_id_tree_active, &key)); 434} 435 436static inline struct pf_sourcelim * 437pf_sourcelim_find(uint32_t id) 438{ 439 struct pf_sourcelim key; 440 441 /* only the id is used in cmp, so don't have to zero all the things */ 442 key.pfsrlim_id = id; 443 444 return (RBT_FIND(pf_sourcelim_id_tree, 445 &pf_sourcelim_id_tree_active, &key)); 446} 447 448struct pf_source_list pf_source_gc = TAILQ_HEAD_INITIALIZER(pf_source_gc); 449 450static void 451pf_source_purge(void) 452{ 453 struct pf_source *sr, *nsr; 454 time_t now = getuptime(); 455 456 TAILQ_FOREACH_SAFE(sr, &pf_source_gc, pfsr_empty_gc, nsr) { 457 struct pf_sourcelim *srlim = sr->pfsr_parent; 458 459 if (now <= sr->pfsr_empty_ts + srlim->pfsrlim_rate.seconds + 1) 460 continue; 461 462 TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc); 463 464 RBT_REMOVE(pf_source_tree, &srlim->pfsrlim_sources, sr); 465 RBT_REMOVE(pf_source_ioc_tree, &srlim->pfsrlim_ioc_sources, sr); 466 srlim->pfsrlim_nsources--; 467 468 pool_put(&pf_source_pl, sr); 469 } 470} 471 472static void 473pf_source_pfr_addr(struct pfr_addr *p, const struct pf_source *sr) 474{ 475 struct pf_sourcelim *srlim = sr->pfsr_parent; 476 477 memset(p, 0, sizeof(*p)); 478 479 p->pfra_af = sr->pfsr_af; 480 switch (sr->pfsr_af) { 481 case AF_INET: 482 p->pfra_net = srlim->pfsrlim_ipv4_prefix; 483 p->pfra_ip4addr = sr->pfsr_addr.v4; 484 break; 485#ifdef INET6 486 case AF_INET6: 487 p->pfra_net = srlim->pfsrlim_ipv6_prefix; 488 p->pfra_ip6addr = sr->pfsr_addr.v6; 489 break; 490#endif /* INET6 */ 491 } 492} 493 494static void 495pf_source_used(struct pf_source *sr) 496{ 497 struct pf_sourcelim *srlim = sr->pfsr_parent; 498 struct pfr_ktable *t; 499 unsigned int used; 500 501 used = sr->pfsr_inuse++; 502 sr->pfsr_rate_ts += srlim->pfsrlim_rate_token; 503 504 if (used == 0) 505 TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc); 506 else if ((t = srlim->pfsrlim_overload.table) != NULL && 507 used >= srlim->pfsrlim_overload.hwm && !sr->pfsr_intable) { 508 struct pfr_addr p; 509 510 pf_source_pfr_addr(&p, sr); 511 512 pfr_insert_kentry(t, &p, gettime()); 513 sr->pfsr_intable = 1; 514 } 515} 516 517static void 518pf_source_rele(struct pf_source *sr) 519{ 520 struct pf_sourcelim *srlim = sr->pfsr_parent; 521 struct pfr_ktable *t; 522 unsigned int used; 523 524 used = --sr->pfsr_inuse; 525 526 t = srlim->pfsrlim_overload.table; 527 if (t != NULL && sr->pfsr_intable && 528 used < srlim->pfsrlim_overload.lwm) { 529 struct pfr_addr p; 530 531 pf_source_pfr_addr(&p, sr); 532 533 pfr_remove_kentry(t, &p); 534 sr->pfsr_intable = 0; 535 } 536 537 if (used == 0) { 538 TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc); 539 sr->pfsr_empty_ts = getuptime() + srlim->pfsrlim_rate.seconds; 540 } 541} 542 543static inline void 544pf_source_key(struct pf_sourcelim *srlim, struct pf_source *key, 545 sa_family_t af, unsigned int rdomain, const struct pf_addr *addr) 546{ 547 size_t i; 548 549 /* only af+addr is used for lookup. */ 550 key->pfsr_af = af; 551 key->pfsr_rdomain = rdomain; 552 switch (af) { 553 case AF_INET: 554 key->pfsr_addr.addr32[0] = 555 srlim->pfsrlim_ipv4_mask.v4.s_addr & 556 addr->v4.s_addr; 557 558 for (i = 1; i < nitems(key->pfsr_addr.addr32); i++) 559 key->pfsr_addr.addr32[i] = htonl(0); 560 break; 561#ifdef INET6 562 case AF_INET6: 563 for (i = 0; i < nitems(key->pfsr_addr.addr32); i++) { 564 key->pfsr_addr.addr32[i] = 565 srlim->pfsrlim_ipv6_mask.addr32[i] & 566 addr->addr32[i]; 567 } 568 break; 569#endif 570 default: 571 unhandled_af(af); 572 /* NOTREACHED */ 573 } 574} 575 576static inline struct pf_source * 577pf_source_find(struct pf_sourcelim *srlim, const struct pf_source *key) 578{ 579 return (RBT_FIND(pf_source_tree, &srlim->pfsrlim_sources, key)); 580} 581 582struct pf_src_tree tree_src_tracking; 583 584struct pf_state_tree_id tree_id; 585struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list); 586 587RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare); 588RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key); 589RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id); 590 591int 592pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b, 593 sa_family_t af) 594{ 595 switch (af) { 596 case AF_INET: 597 if (a->addr32[0] > b->addr32[0]) 598 return (1); 599 if (a->addr32[0] < b->addr32[0]) 600 return (-1); 601 break; 602#ifdef INET6 603 case AF_INET6: 604 if (a->addr32[3] > b->addr32[3]) 605 return (1); 606 if (a->addr32[3] < b->addr32[3]) 607 return (-1); 608 if (a->addr32[2] > b->addr32[2]) 609 return (1); 610 if (a->addr32[2] < b->addr32[2]) 611 return (-1); 612 if (a->addr32[1] > b->addr32[1]) 613 return (1); 614 if (a->addr32[1] < b->addr32[1]) 615 return (-1); 616 if (a->addr32[0] > b->addr32[0]) 617 return (1); 618 if (a->addr32[0] < b->addr32[0]) 619 return (-1); 620 break; 621#endif /* INET6 */ 622 } 623 return (0); 624} 625 626static __inline int 627pf_src_compare(struct pf_src_node *a, struct pf_src_node *b) 628{ 629 int diff; 630 631 if (a->rule.ptr > b->rule.ptr) 632 return (1); 633 if (a->rule.ptr < b->rule.ptr) 634 return (-1); 635 if ((diff = a->type - b->type) != 0) 636 return (diff); 637 if ((diff = a->af - b->af) != 0) 638 return (diff); 639 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0) 640 return (diff); 641 return (0); 642} 643 644static __inline void 645pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate) 646{ 647 if (which == PF_PEER_DST || which == PF_PEER_BOTH) 648 st->dst.state = newstate; 649 if (which == PF_PEER_DST) 650 return; 651 652 if (st->src.state == newstate) 653 return; 654 if (st->creatorid == pf_status.hostid && 655 st->key[PF_SK_STACK]->proto == IPPROTO_TCP && 656 !(TCPS_HAVEESTABLISHED(st->src.state) || 657 st->src.state == TCPS_CLOSED) && 658 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED)) 659 atomic_dec_int(&pf_status.states_halfopen); 660 661 st->src.state = newstate; 662} 663 664void 665pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af) 666{ 667 switch (af) { 668 case AF_INET: 669 dst->addr32[0] = src->addr32[0]; 670 break; 671#ifdef INET6 672 case AF_INET6: 673 dst->addr32[0] = src->addr32[0]; 674 dst->addr32[1] = src->addr32[1]; 675 dst->addr32[2] = src->addr32[2]; 676 dst->addr32[3] = src->addr32[3]; 677 break; 678#endif /* INET6 */ 679 default: 680 unhandled_af(af); 681 } 682} 683 684void 685pf_init_threshold(struct pf_threshold *threshold, 686 u_int32_t limit, u_int32_t seconds) 687{ 688 threshold->limit = limit * PF_THRESHOLD_MULT; 689 threshold->seconds = seconds; 690 threshold->count = 0; 691 threshold->last = getuptime(); 692} 693 694void 695pf_add_threshold(struct pf_threshold *threshold) 696{ 697 u_int32_t t = getuptime(), diff = t - threshold->last; 698 699 if (diff >= threshold->seconds) 700 threshold->count = 0; 701 else 702 threshold->count -= threshold->count * diff / 703 threshold->seconds; 704 threshold->count += PF_THRESHOLD_MULT; 705 threshold->last = t; 706} 707 708int 709pf_check_threshold(struct pf_threshold *threshold) 710{ 711 return (threshold->count > threshold->limit); 712} 713 714void 715pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st) 716{ 717 /* 718 * we can always put states on the end of the list. 719 * 720 * things reading the list should take a read lock, then 721 * the mutex, get the head and tail pointers, release the 722 * mutex, and then they can iterate between the head and tail. 723 */ 724 725 pf_state_ref(st); /* get a ref for the list */ 726 727 mtx_enter(&pfs->pfs_mtx); 728 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list); 729 mtx_leave(&pfs->pfs_mtx); 730} 731 732void 733pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st) 734{ 735 /* states can only be removed when the write lock is held */ 736 rw_assert_wrlock(&pfs->pfs_rwl); 737 738 mtx_enter(&pfs->pfs_mtx); 739 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list); 740 mtx_leave(&pfs->pfs_mtx); 741 742 pf_state_unref(st); /* list no longer references the state */ 743} 744 745void 746pf_update_state_timeout(struct pf_state *st, int to) 747{ 748 mtx_enter(&st->mtx); 749 if (st->timeout != PFTM_UNLINKED) 750 st->timeout = to; 751 mtx_leave(&st->mtx); 752} 753 754int 755pf_src_connlimit(struct pf_state **stp) 756{ 757 int bad = 0; 758 struct pf_src_node *sn; 759 760 if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL) 761 return (0); 762 763 sn->conn++; 764 (*stp)->src.tcp_est = 1; 765 pf_add_threshold(&sn->conn_rate); 766 767 if ((*stp)->rule.ptr->max_src_conn && 768 (*stp)->rule.ptr->max_src_conn < sn->conn) { 769 pf_status.lcounters[LCNT_SRCCONN]++; 770 bad++; 771 } 772 773 if ((*stp)->rule.ptr->max_src_conn_rate.limit && 774 pf_check_threshold(&sn->conn_rate)) { 775 pf_status.lcounters[LCNT_SRCCONNRATE]++; 776 bad++; 777 } 778 779 if (!bad) 780 return (0); 781 782 if ((*stp)->rule.ptr->overload_tbl) { 783 struct pfr_addr p; 784 u_int32_t killed = 0; 785 786 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++; 787 if (pf_status.debug >= LOG_NOTICE) { 788 log(LOG_NOTICE, 789 "pf: pf_src_connlimit: blocking address "); 790 pf_print_host(&sn->addr, 0, 791 (*stp)->key[PF_SK_WIRE]->af); 792 } 793 794 memset(&p, 0, sizeof(p)); 795 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af; 796 switch ((*stp)->key[PF_SK_WIRE]->af) { 797 case AF_INET: 798 p.pfra_net = 32; 799 p.pfra_ip4addr = sn->addr.v4; 800 break; 801#ifdef INET6 802 case AF_INET6: 803 p.pfra_net = 128; 804 p.pfra_ip6addr = sn->addr.v6; 805 break; 806#endif /* INET6 */ 807 } 808 809 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl, 810 &p, gettime()); 811 812 /* kill existing states if that's required. */ 813 if ((*stp)->rule.ptr->flush) { 814 struct pf_state_key *sk; 815 struct pf_state *st; 816 817 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++; 818 RBT_FOREACH(st, pf_state_tree_id, &tree_id) { 819 sk = st->key[PF_SK_WIRE]; 820 /* 821 * Kill states from this source. (Only those 822 * from the same rule if PF_FLUSH_GLOBAL is not 823 * set) 824 */ 825 if (sk->af == 826 (*stp)->key[PF_SK_WIRE]->af && 827 (((*stp)->direction == PF_OUT && 828 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) || 829 ((*stp)->direction == PF_IN && 830 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) && 831 ((*stp)->rule.ptr->flush & 832 PF_FLUSH_GLOBAL || 833 (*stp)->rule.ptr == st->rule.ptr)) { 834 pf_update_state_timeout(st, PFTM_PURGE); 835 pf_set_protostate(st, PF_PEER_BOTH, 836 TCPS_CLOSED); 837 killed++; 838 } 839 } 840 if (pf_status.debug >= LOG_NOTICE) 841 addlog(", %u states killed", killed); 842 } 843 if (pf_status.debug >= LOG_NOTICE) 844 addlog("\n"); 845 } 846 847 /* kill this state */ 848 pf_update_state_timeout(*stp, PFTM_PURGE); 849 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED); 850 return (1); 851} 852 853int 854pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule, 855 enum pf_sn_types type, sa_family_t af, struct pf_addr *src, 856 struct pf_addr *raddr, struct pfi_kif *kif) 857{ 858 struct pf_src_node k; 859 860 if (*sn == NULL) { 861 k.af = af; 862 k.type = type; 863 pf_addrcpy(&k.addr, src, af); 864 k.rule.ptr = rule; 865 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++; 866 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k); 867 } 868 if (*sn == NULL) { 869 if (!rule->max_src_nodes || 870 rule->src_nodes < rule->max_src_nodes) 871 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO); 872 else 873 pf_status.lcounters[LCNT_SRCNODES]++; 874 if ((*sn) == NULL) 875 return (-1); 876 877 pf_init_threshold(&(*sn)->conn_rate, 878 rule->max_src_conn_rate.limit, 879 rule->max_src_conn_rate.seconds); 880 881 (*sn)->type = type; 882 (*sn)->af = af; 883 (*sn)->rule.ptr = rule; 884 pf_addrcpy(&(*sn)->addr, src, af); 885 if (raddr) 886 pf_addrcpy(&(*sn)->raddr, raddr, af); 887 if (RB_INSERT(pf_src_tree, 888 &tree_src_tracking, *sn) != NULL) { 889 if (pf_status.debug >= LOG_NOTICE) { 890 log(LOG_NOTICE, 891 "pf: src_tree insert failed: "); 892 pf_print_host(&(*sn)->addr, 0, af); 893 addlog("\n"); 894 } 895 pool_put(&pf_src_tree_pl, *sn); 896 return (-1); 897 } 898 (*sn)->creation = getuptime(); 899 (*sn)->rule.ptr->src_nodes++; 900 if (kif != NULL) { 901 (*sn)->kif = kif; 902 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE); 903 } 904 pf_status.scounters[SCNT_SRC_NODE_INSERT]++; 905 pf_status.src_nodes++; 906 } else { 907 if (rule->max_src_states && 908 (*sn)->states >= rule->max_src_states) { 909 pf_status.lcounters[LCNT_SRCSTATES]++; 910 return (-1); 911 } 912 } 913 return (0); 914} 915 916void 917pf_remove_src_node(struct pf_src_node *sn) 918{ 919 if (sn->states > 0 || sn->expire > getuptime()) 920 return; 921 922 sn->rule.ptr->src_nodes--; 923 if (sn->rule.ptr->states_cur == 0 && 924 sn->rule.ptr->src_nodes == 0) 925 pf_rm_rule(NULL, sn->rule.ptr); 926 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn); 927 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++; 928 pf_status.src_nodes--; 929 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE); 930 pool_put(&pf_src_tree_pl, sn); 931} 932 933struct pf_src_node * 934pf_get_src_node(struct pf_state *st, enum pf_sn_types type) 935{ 936 struct pf_sn_item *sni; 937 938 SLIST_FOREACH(sni, &st->src_nodes, next) 939 if (sni->sn->type == type) 940 return (sni->sn); 941 return (NULL); 942} 943 944void 945pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn) 946{ 947 struct pf_sn_item *sni, *snin, *snip = NULL; 948 949 for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) { 950 snin = SLIST_NEXT(sni, next); 951 if (sni->sn == sn) { 952 if (snip) 953 SLIST_REMOVE_AFTER(snip, next); 954 else 955 SLIST_REMOVE_HEAD(&st->src_nodes, next); 956 pool_put(&pf_sn_item_pl, sni); 957 sni = NULL; 958 sn->states--; 959 } 960 if (sni != NULL) 961 snip = sni; 962 } 963} 964 965/* state table stuff */ 966 967static inline int 968pf_state_compare_key(const struct pf_state_key *a, 969 const struct pf_state_key *b) 970{ 971 int diff; 972 973 if ((diff = a->hash - b->hash) != 0) 974 return (diff); 975 if ((diff = a->proto - b->proto) != 0) 976 return (diff); 977 if ((diff = a->af - b->af) != 0) 978 return (diff); 979 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0) 980 return (diff); 981 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0) 982 return (diff); 983 if ((diff = a->port[0] - b->port[0]) != 0) 984 return (diff); 985 if ((diff = a->port[1] - b->port[1]) != 0) 986 return (diff); 987 if ((diff = a->rdomain - b->rdomain) != 0) 988 return (diff); 989 return (0); 990} 991 992static inline int 993pf_state_compare_id(const struct pf_state *a, const struct pf_state *b) 994{ 995 if (a->id > b->id) 996 return (1); 997 if (a->id < b->id) 998 return (-1); 999 if (a->creatorid > b->creatorid) 1000 return (1); 1001 if (a->creatorid < b->creatorid) 1002 return (-1); 1003 1004 return (0); 1005} 1006 1007/* 1008 * on failure, pf_state_key_attach() releases the pf_state_key 1009 * reference and returns NULL. 1010 */ 1011struct pf_state_key * 1012pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx) 1013{ 1014 struct pf_state_item *si; 1015 struct pf_state_key *cur; 1016 struct pf_state *oldst = NULL; 1017 1018 PF_ASSERT_LOCKED(); 1019 1020 KASSERT(st->key[idx] == NULL); 1021 sk->sk_removed = 0; 1022 cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk); 1023 if (cur != NULL) { 1024 sk->sk_removed = 1; 1025 /* key exists. check for same kif, if none, add to key */ 1026 TAILQ_FOREACH(si, &cur->sk_states, si_entry) { 1027 struct pf_state *sist = si->si_st; 1028 if (sist->kif == st->kif && 1029 ((sist->key[PF_SK_WIRE]->af == sk->af && 1030 sist->direction == st->direction) || 1031 (sist->key[PF_SK_WIRE]->af != 1032 sist->key[PF_SK_STACK]->af && 1033 sk->af == sist->key[PF_SK_STACK]->af && 1034 sist->direction != st->direction))) { 1035 int reuse = 0; 1036 1037 if (sk->proto == IPPROTO_TCP && 1038 sist->src.state >= TCPS_FIN_WAIT_2 && 1039 sist->dst.state >= TCPS_FIN_WAIT_2) 1040 reuse = 1; 1041 if (pf_status.debug >= LOG_NOTICE) { 1042 log(LOG_NOTICE, 1043 "pf: %s key attach %s on %s: ", 1044 (idx == PF_SK_WIRE) ? 1045 "wire" : "stack", 1046 reuse ? "reuse" : "failed", 1047 st->kif->pfik_name); 1048 pf_print_state_parts(st, 1049 (idx == PF_SK_WIRE) ? sk : NULL, 1050 (idx == PF_SK_STACK) ? sk : NULL); 1051 addlog(", existing: "); 1052 pf_print_state_parts(sist, 1053 (idx == PF_SK_WIRE) ? sk : NULL, 1054 (idx == PF_SK_STACK) ? sk : NULL); 1055 addlog("\n"); 1056 } 1057 if (reuse) { 1058 pf_set_protostate(sist, PF_PEER_BOTH, 1059 TCPS_CLOSED); 1060 /* remove late or sks can go away */ 1061 oldst = sist; 1062 } else { 1063 pf_state_key_unref(sk); 1064 return (NULL); /* collision! */ 1065 } 1066 } 1067 } 1068 1069 /* reuse the existing state key */ 1070 pf_state_key_unref(sk); 1071 sk = cur; 1072 } 1073 1074 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) { 1075 if (TAILQ_EMPTY(&sk->sk_states)) { 1076 KASSERT(cur == NULL); 1077 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 1078 sk->sk_removed = 1; 1079 pf_state_key_unref(sk); 1080 } 1081 1082 return (NULL); 1083 } 1084 1085 st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */ 1086 si->si_st = pf_state_ref(st); 1087 1088 /* list is sorted, if-bound states before floating */ 1089 if (st->kif == pfi_all) 1090 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry); 1091 else 1092 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry); 1093 1094 if (oldst) 1095 pf_remove_state(oldst); 1096 1097 /* caller owns the pf_state ref, which owns a pf_state_key ref now */ 1098 return (sk); 1099} 1100 1101void 1102pf_detach_state(struct pf_state *st) 1103{ 1104 KASSERT(st->key[PF_SK_WIRE] != NULL); 1105 pf_state_key_detach(st, PF_SK_WIRE); 1106 1107 KASSERT(st->key[PF_SK_STACK] != NULL); 1108 if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE]) 1109 pf_state_key_detach(st, PF_SK_STACK); 1110} 1111 1112void 1113pf_state_key_detach(struct pf_state *st, int idx) 1114{ 1115 struct pf_state_item *si; 1116 struct pf_state_key *sk; 1117 1118 PF_ASSERT_LOCKED(); 1119 1120 sk = st->key[idx]; 1121 if (sk == NULL) 1122 return; 1123 1124 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1125 if (si->si_st == st) 1126 break; 1127 } 1128 if (si == NULL) 1129 return; 1130 1131 TAILQ_REMOVE(&sk->sk_states, si, si_entry); 1132 pool_put(&pf_state_item_pl, si); 1133 1134 if (TAILQ_EMPTY(&sk->sk_states)) { 1135 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk); 1136 sk->sk_removed = 1; 1137 pf_state_key_unlink_reverse(sk); 1138 pf_state_key_unlink_inpcb(sk); 1139 pf_state_key_unref(sk); 1140 } 1141 1142 pf_state_unref(st); 1143} 1144 1145struct pf_state_key * 1146pf_alloc_state_key(int pool_flags) 1147{ 1148 struct pf_state_key *sk; 1149 1150 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL) 1151 return (NULL); 1152 1153 PF_REF_INIT(sk->sk_refcnt); 1154 TAILQ_INIT(&sk->sk_states); 1155 sk->sk_removed = 1; 1156 1157 return (sk); 1158} 1159 1160static __inline int 1161pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx, 1162 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi) 1163{ 1164 struct pf_state_key_cmp *key = arg; 1165#ifdef INET6 1166 struct pf_addr *target; 1167 1168 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6) 1169 goto copy; 1170 1171 switch (pd->hdr.icmp6.icmp6_type) { 1172 case ND_NEIGHBOR_SOLICIT: 1173 if (multi) 1174 return (-1); 1175 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 1176 daddr = target; 1177 break; 1178 case ND_NEIGHBOR_ADVERT: 1179 if (multi) 1180 return (-1); 1181 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target; 1182 saddr = target; 1183 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) { 1184 key->addr[didx].addr32[0] = 0; 1185 key->addr[didx].addr32[1] = 0; 1186 key->addr[didx].addr32[2] = 0; 1187 key->addr[didx].addr32[3] = 0; 1188 daddr = NULL; /* overwritten */ 1189 } 1190 break; 1191 default: 1192 if (multi) { 1193 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL; 1194 key->addr[sidx].addr32[1] = 0; 1195 key->addr[sidx].addr32[2] = 0; 1196 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE; 1197 saddr = NULL; /* overwritten */ 1198 } 1199 } 1200 copy: 1201#endif /* INET6 */ 1202 if (saddr) 1203 pf_addrcpy(&key->addr[sidx], saddr, af); 1204 if (daddr) 1205 pf_addrcpy(&key->addr[didx], daddr, af); 1206 1207 return (0); 1208} 1209 1210int 1211pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw, 1212 struct pf_state_key **sks, int rtableid) 1213{ 1214 /* if returning error we MUST pool_put state keys ourselves */ 1215 struct pf_state_key *sk1, *sk2; 1216 u_int wrdom = pd->rdomain; 1217 int afto = pd->af != pd->naf; 1218 1219 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) 1220 return (ENOMEM); 1221 1222 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst, 1223 pd->af, 0); 1224 sk1->port[pd->sidx] = pd->osport; 1225 sk1->port[pd->didx] = pd->odport; 1226 sk1->proto = pd->proto; 1227 sk1->af = pd->af; 1228 sk1->rdomain = pd->rdomain; 1229 sk1->hash = pf_pkt_hash(sk1->af, sk1->proto, 1230 &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]); 1231 if (rtableid >= 0) 1232 wrdom = rtable_l2(rtableid); 1233 1234 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) || 1235 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) || 1236 pd->nsport != pd->osport || pd->ndport != pd->odport || 1237 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */ 1238 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) { 1239 pf_state_key_unref(sk1); 1240 return (ENOMEM); 1241 } 1242 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx, 1243 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr, 1244 pd->naf, 0); 1245 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport; 1246 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport; 1247 if (afto) { 1248 switch (pd->proto) { 1249 case IPPROTO_ICMP: 1250 sk2->proto = IPPROTO_ICMPV6; 1251 break; 1252 case IPPROTO_ICMPV6: 1253 sk2->proto = IPPROTO_ICMP; 1254 break; 1255 default: 1256 sk2->proto = pd->proto; 1257 } 1258 } else 1259 sk2->proto = pd->proto; 1260 sk2->af = pd->naf; 1261 sk2->rdomain = wrdom; 1262 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto, 1263 &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]); 1264 } else 1265 sk2 = pf_state_key_ref(sk1); 1266 1267 if (pd->dir == PF_IN) { 1268 *skw = sk1; 1269 *sks = sk2; 1270 } else { 1271 *sks = sk1; 1272 *skw = sk2; 1273 } 1274 1275 if (pf_status.debug >= LOG_DEBUG) { 1276 log(LOG_DEBUG, "pf: key setup: "); 1277 pf_print_state_parts(NULL, *skw, *sks); 1278 addlog("\n"); 1279 } 1280 1281 return (0); 1282} 1283 1284/* 1285 * pf_state_insert() does the following: 1286 * - links the pf_state up with pf_state_key(s). 1287 * - inserts the pf_state_keys into pf_state_tree. 1288 * - inserts the pf_state into the into pf_state_tree_id. 1289 * - tells pfsync about the state. 1290 * 1291 * pf_state_insert() owns the references to the pf_state_key structs 1292 * it is given. on failure to insert, these references are released. 1293 * on success, the caller owns a pf_state reference that allows it 1294 * to access the state keys. 1295 */ 1296 1297int 1298pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp, 1299 struct pf_state_key **sksp, struct pf_state *st) 1300{ 1301 struct pf_state_key *skw = *skwp; 1302 struct pf_state_key *sks = *sksp; 1303 int same = (skw == sks); 1304 1305 PF_ASSERT_LOCKED(); 1306 1307 st->kif = kif; 1308 PF_STATE_ENTER_WRITE(); 1309 1310 skw = pf_state_key_attach(skw, st, PF_SK_WIRE); 1311 if (skw == NULL) { 1312 pf_state_key_unref(sks); 1313 PF_STATE_EXIT_WRITE(); 1314 return (-1); 1315 } 1316 1317 if (same) { 1318 /* pf_state_key_attach might have swapped skw */ 1319 if (skw != sks) { 1320 pf_state_key_unref(sks); 1321 sks = pf_state_key_ref(skw); 1322 } 1323 st->key[PF_SK_STACK] = sks; 1324 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) { 1325 pf_state_key_detach(st, PF_SK_WIRE); 1326 PF_STATE_EXIT_WRITE(); 1327 return (-1); 1328 } 1329 1330 if (st->id == 0 && st->creatorid == 0) { 1331 st->id = htobe64(pf_status.stateid++); 1332 st->creatorid = pf_status.hostid; 1333 } 1334 if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) { 1335 if (pf_status.debug >= LOG_NOTICE) { 1336 log(LOG_NOTICE, "pf: state insert failed: " 1337 "id: %016llx creatorid: %08x", 1338 betoh64(st->id), ntohl(st->creatorid)); 1339 addlog("\n"); 1340 } 1341 pf_detach_state(st); 1342 PF_STATE_EXIT_WRITE(); 1343 return (-1); 1344 } 1345 pf_state_list_insert(&pf_state_list, st); 1346 counters_inc(pf_status_fcounters, FCNT_STATE_INSERT); 1347 pf_status.states++; 1348 pfi_kif_ref(kif, PFI_KIF_REF_STATE); 1349 PF_STATE_EXIT_WRITE(); 1350 1351#if NPFSYNC > 0 1352 pfsync_insert_state(st); 1353#endif /* NPFSYNC > 0 */ 1354 1355 *skwp = skw; 1356 *sksp = sks; 1357 1358 return (0); 1359} 1360 1361struct pf_state * 1362pf_find_state_byid(struct pf_state_cmp *key) 1363{ 1364 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH); 1365 1366 return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key)); 1367} 1368 1369int 1370pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b, 1371 struct pfi_kif *kif, u_int dir) 1372{ 1373 /* a (from hdr) and b (new) must be exact opposites of each other */ 1374 if (a->af == b->af && a->proto == b->proto && 1375 PF_AEQ(&a->addr[0], &b->addr[1], a->af) && 1376 PF_AEQ(&a->addr[1], &b->addr[0], a->af) && 1377 a->port[0] == b->port[1] && 1378 a->port[1] == b->port[0] && a->rdomain == b->rdomain) 1379 return (0); 1380 else { 1381 /* mismatch. must not happen. */ 1382 if (pf_status.debug >= LOG_ERR) { 1383 log(LOG_ERR, 1384 "pf: state key linking mismatch! dir=%s, " 1385 "if=%s, stored af=%u, a0: ", 1386 dir == PF_OUT ? "OUT" : "IN", 1387 kif->pfik_name, a->af); 1388 pf_print_host(&a->addr[0], a->port[0], a->af); 1389 addlog(", a1: "); 1390 pf_print_host(&a->addr[1], a->port[1], a->af); 1391 addlog(", proto=%u", a->proto); 1392 addlog(", found af=%u, a0: ", b->af); 1393 pf_print_host(&b->addr[0], b->port[0], b->af); 1394 addlog(", a1: "); 1395 pf_print_host(&b->addr[1], b->port[1], b->af); 1396 addlog(", proto=%u", b->proto); 1397 addlog("\n"); 1398 } 1399 return (-1); 1400 } 1401} 1402 1403int 1404pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 1405 struct pf_state **stp) 1406{ 1407 struct pf_state_key *sk, *pkt_sk; 1408 struct pf_state_item *si; 1409 struct pf_state *st = NULL; 1410 int didx; 1411 1412 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH); 1413 if (pf_status.debug >= LOG_DEBUG) { 1414 log(LOG_DEBUG, "pf: key search, %s on %s: ", 1415 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name); 1416 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL); 1417 addlog("\n"); 1418 } 1419 1420 pkt_sk = NULL; 1421 sk = NULL; 1422 if (pd->dir == PF_OUT) { 1423 /* first if block deals with outbound forwarded packet */ 1424 pkt_sk = pd->m->m_pkthdr.pf.statekey; 1425 1426 if (!pf_state_key_isvalid(pkt_sk)) { 1427 pf_mbuf_unlink_state_key(pd->m); 1428 pkt_sk = NULL; 1429 } 1430 1431 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse)) 1432 sk = pkt_sk->sk_reverse; 1433 1434 if (pkt_sk == NULL) { 1435 struct inpcb *inp = pd->m->m_pkthdr.pf.inp; 1436 1437 /* here we deal with local outbound packet */ 1438 if (inp != NULL) { 1439 struct pf_state_key *inp_sk; 1440 1441 mtx_enter(&pf_inp_mtx); 1442 inp_sk = inp->inp_pf_sk; 1443 if (pf_state_key_isvalid(inp_sk)) { 1444 sk = inp_sk; 1445 mtx_leave(&pf_inp_mtx); 1446 } else if (inp_sk != NULL) { 1447 KASSERT(inp_sk->sk_inp == inp); 1448 inp_sk->sk_inp = NULL; 1449 inp->inp_pf_sk = NULL; 1450 mtx_leave(&pf_inp_mtx); 1451 1452 pf_state_key_unref(inp_sk); 1453 in_pcbunref(inp); 1454 } else 1455 mtx_leave(&pf_inp_mtx); 1456 } 1457 } 1458 } 1459 1460 if (sk == NULL) { 1461 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl, 1462 (struct pf_state_key *)key)) == NULL) 1463 return (PF_DROP); 1464 if (pd->dir == PF_OUT && pkt_sk && 1465 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0) 1466 pf_state_key_link_reverse(sk, pkt_sk); 1467 else if (pd->dir == PF_OUT) 1468 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp); 1469 } 1470 1471 /* remove firewall data from outbound packet */ 1472 if (pd->dir == PF_OUT) 1473 pf_pkt_addr_changed(pd->m); 1474 1475 didx = (pd->dir == PF_IN) ? PF_SK_WIRE : PF_SK_STACK; 1476 1477 /* list is sorted, if-bound states before floating ones */ 1478 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1479 struct pf_state *sist = si->si_st; 1480 if (sist->timeout == PFTM_PURGE) 1481 continue; 1482 if (sist->kif != pfi_all && sist->kif != pd->kif) 1483 continue; 1484 1485 /* af-to needs to handled specially */ 1486 if (sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af) { 1487 if (sk != sist->key[didx]) 1488 continue; 1489 1490 /* af-to case */ 1491 } else { 1492 /* 1493 * af-to creates state for incoming (PF_IN) 1494 * connections, and then forces forwarding without 1495 * creating an outgoing state. this means the one 1496 * state covers both sides of the stack, so should 1497 * only match when pd dir is PF_IN. 1498 */ 1499 if (pd->dir != PF_IN) 1500 continue; 1501 1502 /* one of the st keys has to be sk */ 1503 } 1504 1505 st = sist; 1506 break; 1507 } 1508 1509 if (st == NULL) 1510 return (PF_DROP); 1511 if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED)) 1512 return (PF_DROP); 1513 1514 if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) { 1515 pf_add_threshold(&st->rule.ptr->pktrate); 1516 if (pf_check_threshold(&st->rule.ptr->pktrate)) 1517 return (PF_DROP); 1518 } 1519 1520 *stp = st; 1521 1522 return (PF_MATCH); 1523} 1524 1525struct pf_state * 1526pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more) 1527{ 1528 struct pf_state_key *sk; 1529 struct pf_state_item *si, *ret = NULL; 1530 1531 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH); 1532 1533 sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key); 1534 1535 if (sk != NULL) { 1536 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 1537 struct pf_state *sist = si->si_st; 1538 if (dir == PF_INOUT || 1539 (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] : 1540 sist->key[PF_SK_STACK]))) { 1541 if (more == NULL) 1542 return (sist); 1543 1544 if (ret) 1545 (*more)++; 1546 else 1547 ret = si; 1548 } 1549 } 1550 } 1551 return (ret ? ret->si_st : NULL); 1552} 1553 1554void 1555pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d) 1556{ 1557 d->seqlo = htonl(s->seqlo); 1558 d->seqhi = htonl(s->seqhi); 1559 d->seqdiff = htonl(s->seqdiff); 1560 d->max_win = htons(s->max_win); 1561 d->mss = htons(s->mss); 1562 d->state = s->state; 1563 d->wscale = s->wscale; 1564 if (s->scrub) { 1565 d->scrub.pfss_flags = 1566 htons(s->scrub->pfss_flags & PFSS_TIMESTAMP); 1567 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl; 1568 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod); 1569 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID; 1570 } 1571} 1572 1573void 1574pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d) 1575{ 1576 d->seqlo = ntohl(s->seqlo); 1577 d->seqhi = ntohl(s->seqhi); 1578 d->seqdiff = ntohl(s->seqdiff); 1579 d->max_win = ntohs(s->max_win); 1580 d->mss = ntohs(s->mss); 1581 d->state = s->state; 1582 d->wscale = s->wscale; 1583 if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID && 1584 d->scrub != NULL) { 1585 d->scrub->pfss_flags = 1586 ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP; 1587 d->scrub->pfss_ttl = s->scrub.pfss_ttl; 1588 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod); 1589 } 1590} 1591 1592void 1593pf_state_export(struct pfsync_state *sp, struct pf_state *st) 1594{ 1595 int32_t expire; 1596 1597 memset(sp, 0, sizeof(struct pfsync_state)); 1598 1599 /* copy from state key */ 1600 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0]; 1601 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1]; 1602 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0]; 1603 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1]; 1604 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain); 1605 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af; 1606 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0]; 1607 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1]; 1608 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0]; 1609 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1]; 1610 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain); 1611 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af; 1612 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]); 1613 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]); 1614 sp->proto = st->key[PF_SK_WIRE]->proto; 1615 sp->af = st->key[PF_SK_WIRE]->af; 1616 1617 /* copy from state */ 1618 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname)); 1619 sp->rt = st->rt; 1620 sp->rt_addr = st->rt_addr; 1621 sp->creation = htonl(getuptime() - st->creation); 1622 expire = pf_state_expires(st, st->timeout); 1623 if (expire <= getuptime()) 1624 sp->expire = htonl(0); 1625 else 1626 sp->expire = htonl(expire - getuptime()); 1627 1628 sp->direction = st->direction; 1629#if NPFLOG > 0 1630 sp->log = st->log; 1631#endif /* NPFLOG > 0 */ 1632 sp->timeout = st->timeout; 1633 sp->state_flags = htons(st->state_flags); 1634 if (READ_ONCE(st->sync_defer) != NULL) 1635 sp->state_flags |= htons(PFSTATE_ACK); 1636 if (!SLIST_EMPTY(&st->src_nodes)) 1637 sp->sync_flags |= PFSYNC_FLAG_SRCNODE; 1638 1639 sp->id = st->id; 1640 sp->creatorid = st->creatorid; 1641 pf_state_peer_hton(&st->src, &sp->src); 1642 pf_state_peer_hton(&st->dst, &sp->dst); 1643 1644 if (st->rule.ptr == NULL) 1645 sp->rule = htonl(-1); 1646 else 1647 sp->rule = htonl(st->rule.ptr->nr); 1648 if (st->anchor.ptr == NULL) 1649 sp->anchor = htonl(-1); 1650 else 1651 sp->anchor = htonl(st->anchor.ptr->nr); 1652 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */ 1653 1654 pf_state_counter_hton(st->packets[0], sp->packets[0]); 1655 pf_state_counter_hton(st->packets[1], sp->packets[1]); 1656 pf_state_counter_hton(st->bytes[0], sp->bytes[0]); 1657 pf_state_counter_hton(st->bytes[1], sp->bytes[1]); 1658 1659 sp->max_mss = htons(st->max_mss); 1660 sp->min_ttl = st->min_ttl; 1661 sp->set_tos = st->set_tos; 1662 sp->set_prio[0] = st->set_prio[0]; 1663 sp->set_prio[1] = st->set_prio[1]; 1664} 1665 1666int 1667pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s, 1668 struct pf_state_peer *d) 1669{ 1670 if (s->scrub.scrub_flag && d->scrub == NULL) 1671 return (pf_normalize_tcp_alloc(d)); 1672 1673 return (0); 1674} 1675 1676#if NPFSYNC > 0 1677int 1678pf_state_import(const struct pfsync_state *sp, int flags) 1679{ 1680 struct pf_state *st = NULL; 1681 struct pf_state_key *skw = NULL, *sks = NULL; 1682 struct pf_rule *r = NULL; 1683 struct pfi_kif *kif; 1684 int pool_flags; 1685 int error = ENOMEM; 1686 int n = 0; 1687 1688 PF_ASSERT_LOCKED(); 1689 1690 if (sp->creatorid == 0) { 1691 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__, 1692 ntohl(sp->creatorid)); 1693 return (EINVAL); 1694 } 1695 1696 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) { 1697 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__, 1698 sp->ifname); 1699 if (flags & PFSYNC_SI_IOCTL) 1700 return (EINVAL); 1701 return (0); /* skip this state */ 1702 } 1703 1704 if (sp->af == 0) 1705 return (0); /* skip this state */ 1706 1707 /* 1708 * If the ruleset checksums match or the state is coming from the ioctl, 1709 * it's safe to associate the state with the rule of that number. 1710 */ 1711 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) && 1712 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) && 1713 ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) { 1714 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries) 1715 if (ntohl(sp->rule) == n++) 1716 break; 1717 } else 1718 r = &pf_default_rule; 1719 1720 if ((r->max_states && r->states_cur >= r->max_states)) 1721 goto cleanup; 1722 1723 if (flags & PFSYNC_SI_IOCTL) 1724 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO; 1725 else 1726 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO; 1727 1728 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL) 1729 goto cleanup; 1730 1731 if ((skw = pf_alloc_state_key(pool_flags)) == NULL) 1732 goto cleanup; 1733 1734 if ((sp->key[PF_SK_WIRE].af && 1735 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) || 1736 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0], 1737 &sp->key[PF_SK_STACK].addr[0], sp->af) || 1738 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1], 1739 &sp->key[PF_SK_STACK].addr[1], sp->af) || 1740 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] || 1741 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] || 1742 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) { 1743 if ((sks = pf_alloc_state_key(pool_flags)) == NULL) 1744 goto cleanup; 1745 } else 1746 sks = pf_state_key_ref(skw); 1747 1748 /* allocate memory for scrub info */ 1749 if (pf_state_alloc_scrub_memory(&sp->src, &st->src) || 1750 pf_state_alloc_scrub_memory(&sp->dst, &st->dst)) 1751 goto cleanup; 1752 1753 /* copy to state key(s) */ 1754 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0]; 1755 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1]; 1756 skw->port[0] = sp->key[PF_SK_WIRE].port[0]; 1757 skw->port[1] = sp->key[PF_SK_WIRE].port[1]; 1758 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain); 1759 skw->proto = sp->proto; 1760 if (!(skw->af = sp->key[PF_SK_WIRE].af)) 1761 skw->af = sp->af; 1762 skw->hash = pf_pkt_hash(skw->af, skw->proto, 1763 &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]); 1764 1765 if (sks != skw) { 1766 sks->addr[0] = sp->key[PF_SK_STACK].addr[0]; 1767 sks->addr[1] = sp->key[PF_SK_STACK].addr[1]; 1768 sks->port[0] = sp->key[PF_SK_STACK].port[0]; 1769 sks->port[1] = sp->key[PF_SK_STACK].port[1]; 1770 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain); 1771 if (!(sks->af = sp->key[PF_SK_STACK].af)) 1772 sks->af = sp->af; 1773 if (sks->af != skw->af) { 1774 switch (sp->proto) { 1775 case IPPROTO_ICMP: 1776 sks->proto = IPPROTO_ICMPV6; 1777 break; 1778 case IPPROTO_ICMPV6: 1779 sks->proto = IPPROTO_ICMP; 1780 break; 1781 default: 1782 sks->proto = sp->proto; 1783 } 1784 } else 1785 sks->proto = sp->proto; 1786 1787 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) || 1788 ((skw->af != AF_INET) && (skw->af != AF_INET6))) { 1789 error = EINVAL; 1790 goto cleanup; 1791 } 1792 1793 sks->hash = pf_pkt_hash(sks->af, sks->proto, 1794 &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]); 1795 1796 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) { 1797 error = EINVAL; 1798 goto cleanup; 1799 } 1800 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]); 1801 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]); 1802 1803 /* copy to state */ 1804 st->rt_addr = sp->rt_addr; 1805 st->rt = sp->rt; 1806 st->creation = getuptime() - ntohl(sp->creation); 1807 st->expire = getuptime(); 1808 if (ntohl(sp->expire)) { 1809 u_int32_t timeout; 1810 1811 timeout = r->timeout[sp->timeout]; 1812 if (!timeout) 1813 timeout = pf_default_rule.timeout[sp->timeout]; 1814 1815 /* sp->expire may have been adaptively scaled by export. */ 1816 st->expire -= timeout - ntohl(sp->expire); 1817 } 1818 1819 st->direction = sp->direction; 1820 st->log = sp->log; 1821 st->timeout = sp->timeout; 1822 st->state_flags = ntohs(sp->state_flags); 1823 st->max_mss = ntohs(sp->max_mss); 1824 st->min_ttl = sp->min_ttl; 1825 st->set_tos = sp->set_tos; 1826 st->set_prio[0] = sp->set_prio[0]; 1827 st->set_prio[1] = sp->set_prio[1]; 1828 1829 st->id = sp->id; 1830 st->creatorid = sp->creatorid; 1831 pf_state_peer_ntoh(&sp->src, &st->src); 1832 pf_state_peer_ntoh(&sp->dst, &st->dst); 1833 1834 st->rule.ptr = r; 1835 st->anchor.ptr = NULL; 1836 1837 PF_REF_INIT(st->refcnt); 1838 mtx_init(&st->mtx, IPL_NET); 1839 1840 /* XXX when we have anchors, use STATE_INC_COUNTERS */ 1841 r->states_cur++; 1842 r->states_tot++; 1843 1844 st->sync_state = PFSYNC_S_NONE; 1845 st->pfsync_time = getuptime(); 1846#if NPFSYNC > 0 1847 pfsync_init_state(st, skw, sks, flags); 1848#endif 1849 1850 if (pf_state_insert(kif, &skw, &sks, st) != 0) { 1851 /* XXX when we have anchors, use STATE_DEC_COUNTERS */ 1852 r->states_cur--; 1853 error = EEXIST; 1854 goto cleanup_state; 1855 } 1856 1857 return (0); 1858 1859 cleanup: 1860 if (skw != NULL) 1861 pf_state_key_unref(skw); 1862 if (sks != NULL) 1863 pf_state_key_unref(sks); 1864 1865 cleanup_state: /* pf_state_insert frees the state keys */ 1866 if (st) { 1867 if (st->dst.scrub) 1868 pool_put(&pf_state_scrub_pl, st->dst.scrub); 1869 if (st->src.scrub) 1870 pool_put(&pf_state_scrub_pl, st->src.scrub); 1871 pool_put(&pf_state_pl, st); 1872 } 1873 return (error); 1874} 1875#endif /* NPFSYNC > 0 */ 1876 1877/* END state table stuff */ 1878 1879void pf_purge_states(void *); 1880struct task pf_purge_states_task = 1881 TASK_INITIALIZER(pf_purge_states, NULL); 1882 1883void pf_purge_states_tick(void *); 1884struct timeout pf_purge_states_to = 1885 TIMEOUT_INITIALIZER(pf_purge_states_tick, NULL); 1886 1887unsigned int pf_purge_expired_states(unsigned int, unsigned int); 1888 1889/* 1890 * how many states to scan this interval. 1891 * 1892 * this is set when the timeout fires, and reduced by the task. the 1893 * task will reschedule itself until the limit is reduced to zero, 1894 * and then it adds the timeout again. 1895 */ 1896unsigned int pf_purge_states_limit; 1897 1898/* 1899 * limit how many states are processed with locks held per run of 1900 * the state purge task. 1901 */ 1902unsigned int pf_purge_states_collect = 64; 1903 1904 void 1905pf_purge_states_tick(void *null) 1906 { 1907 unsigned int limit = pf_status.states; 1908 unsigned int interval = pf_default_rule.timeout[PFTM_INTERVAL]; 1909 1910 if (limit == 0) { 1911 timeout_add_sec(&pf_purge_states_to, 1); 1912 return; 1913 } 1914 1915 /* 1916 * process a fraction of the state table every second 1917 */ 1918 1919 if (interval > 1) 1920 limit /= interval; 1921 1922 pf_purge_states_limit = limit; 1923 task_add(systqmp, &pf_purge_states_task); 1924} 1925 1926void 1927pf_purge_states(void *null) 1928{ 1929 unsigned int limit; 1930 unsigned int scanned; 1931 1932 limit = pf_purge_states_limit; 1933 if (limit < pf_purge_states_collect) 1934 limit = pf_purge_states_collect; 1935 1936 scanned = pf_purge_expired_states(limit, pf_purge_states_collect); 1937 if (scanned >= pf_purge_states_limit) { 1938 /* we've run out of states to scan this "interval" */ 1939 timeout_add_sec(&pf_purge_states_to, 1); 1940 return; 1941 } 1942 1943 pf_purge_states_limit -= scanned; 1944 task_add(systqmp, &pf_purge_states_task); 1945} 1946 1947void pf_purge_tick(void *); 1948struct timeout pf_purge_to = 1949 TIMEOUT_INITIALIZER(pf_purge_tick, NULL); 1950 1951void pf_purge(void *); 1952struct task pf_purge_task = 1953 TASK_INITIALIZER(pf_purge, NULL); 1954 1955void 1956pf_purge_tick(void *null) 1957{ 1958 task_add(systqmp, &pf_purge_task); 1959} 1960 1961void 1962pf_purge(void *null) 1963{ 1964 unsigned int interval = max(1, pf_default_rule.timeout[PFTM_INTERVAL]); 1965 1966 PF_LOCK(); 1967 1968 pf_purge_expired_src_nodes(); 1969 pf_source_purge(); 1970 1971 PF_UNLOCK(); 1972 1973 /* 1974 * Fragments don't require PF_LOCK(), they use their own lock. 1975 */ 1976 pf_purge_expired_fragments(); 1977 1978 /* interpret the interval as idle time between runs */ 1979 timeout_add_sec(&pf_purge_to, interval); 1980} 1981 1982int32_t 1983pf_state_expires(const struct pf_state *st, uint8_t stimeout) 1984{ 1985 u_int32_t timeout; 1986 u_int32_t start; 1987 u_int32_t end; 1988 u_int32_t states; 1989 1990 /* 1991 * pf_state_expires is used by the state purge task to 1992 * decide if a state is a candidate for cleanup, and by the 1993 * pfsync state export code to populate an expiry time. 1994 * 1995 * this function may be called by the state purge task while 1996 * the state is being modified. avoid inconsistent reads of 1997 * state->timeout by having the caller do the read (and any 1998 * checks it needs to do on the same variable) and then pass 1999 * their view of the timeout in here for this function to use. 2000 * the only consequence of using a stale timeout value is 2001 * that the state won't be a candidate for purging until the 2002 * next pass of the purge task. 2003 */ 2004 2005 /* handle all PFTM_* >= PFTM_MAX here */ 2006 if (stimeout >= PFTM_MAX) 2007 return (0); 2008 2009 KASSERT(stimeout < PFTM_MAX); 2010 2011 timeout = st->rule.ptr->timeout[stimeout]; 2012 if (!timeout) 2013 timeout = pf_default_rule.timeout[stimeout]; 2014 2015 start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START]; 2016 if (start) { 2017 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END]; 2018 states = st->rule.ptr->states_cur; 2019 } else { 2020 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START]; 2021 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END]; 2022 states = pf_status.states; 2023 } 2024 if (end && states > start && start < end) { 2025 if (states >= end) 2026 return (0); 2027 2028 timeout = (u_int64_t)timeout * (end - states) / (end - start); 2029 } 2030 2031 return (st->expire + timeout); 2032} 2033 2034void 2035pf_purge_expired_src_nodes(void) 2036{ 2037 struct pf_src_node *cur, *next; 2038 2039 PF_ASSERT_LOCKED(); 2040 2041 RB_FOREACH_SAFE(cur, pf_src_tree, &tree_src_tracking, next) { 2042 if (cur->states == 0 && cur->expire <= getuptime()) { 2043 pf_remove_src_node(cur); 2044 } 2045 } 2046} 2047 2048void 2049pf_src_tree_remove_state(struct pf_state *st) 2050{ 2051 u_int32_t timeout; 2052 struct pf_sn_item *sni; 2053 2054 while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) { 2055 SLIST_REMOVE_HEAD(&st->src_nodes, next); 2056 if (st->src.tcp_est) 2057 --sni->sn->conn; 2058 if (--sni->sn->states == 0) { 2059 timeout = st->rule.ptr->timeout[PFTM_SRC_NODE]; 2060 if (!timeout) 2061 timeout = 2062 pf_default_rule.timeout[PFTM_SRC_NODE]; 2063 sni->sn->expire = getuptime() + timeout; 2064 } 2065 pool_put(&pf_sn_item_pl, sni); 2066 } 2067} 2068 2069void 2070pf_remove_state(struct pf_state *st) 2071{ 2072 struct pf_state_link *pfl; 2073 2074 PF_ASSERT_LOCKED(); 2075 2076 mtx_enter(&st->mtx); 2077 if (st->timeout == PFTM_UNLINKED) { 2078 mtx_leave(&st->mtx); 2079 return; 2080 } 2081 st->timeout = PFTM_UNLINKED; 2082 mtx_leave(&st->mtx); 2083 2084 /* handle load balancing related tasks */ 2085 pf_postprocess_addr(st); 2086 2087 if (st->src.state == PF_TCPS_PROXY_DST) { 2088 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af, 2089 &st->key[PF_SK_WIRE]->addr[1], 2090 &st->key[PF_SK_WIRE]->addr[0], 2091 st->key[PF_SK_WIRE]->port[1], 2092 st->key[PF_SK_WIRE]->port[0], 2093 st->src.seqhi, st->src.seqlo + 1, 2094 TH_RST|TH_ACK, 0, 0, 0, 1, st->tag, 2095 st->key[PF_SK_WIRE]->rdomain, NULL); 2096 } 2097 if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP) 2098 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED); 2099 2100 while ((pfl = SLIST_FIRST(&st->linkage)) != NULL) { 2101 struct pf_state_link_list *list; 2102 unsigned int gen; 2103 2104 SLIST_REMOVE_HEAD(&st->linkage, pfl_linkage); 2105 2106 switch (pfl->pfl_type) { 2107 case PF_STATE_LINK_TYPE_STATELIM: { 2108 struct pf_statelim *stlim; 2109 2110 stlim = pf_statelim_find(st->statelim); 2111 KASSERTMSG(stlim != NULL, 2112 "pf_state %p pfl %p cannot find statelim %u", 2113 st, pfl, st->statelim); 2114 2115 gen = pf_statelim_enter(stlim); 2116 stlim->pfstlim_inuse--; 2117 pf_statelim_leave(stlim, gen); 2118 2119 list = &stlim->pfstlim_states; 2120 break; 2121 } 2122 case PF_STATE_LINK_TYPE_SOURCELIM: { 2123 struct pf_sourcelim *srlim; 2124 struct pf_source key, *sr; 2125 2126 srlim = pf_sourcelim_find(st->sourcelim); 2127 KASSERTMSG(srlim != NULL, 2128 "pf_state %p pfl %p cannot find sourcelim %u", 2129 st, pfl, st->sourcelim); 2130 2131 pf_source_key(srlim, &key, 2132 st->key[PF_SK_WIRE]->af, 2133 st->key[PF_SK_WIRE]->rdomain, 2134 &st->key[PF_SK_WIRE]->addr[0 /* XXX or 1? */]); 2135 2136 sr = pf_source_find(srlim, &key); 2137 KASSERTMSG(sr != NULL, 2138 "pf_state %p pfl %p cannot find source in %u", 2139 st, pfl, st->sourcelim); 2140 2141 gen = pf_sourcelim_enter(srlim); 2142 srlim->pfsrlim_counters.inuse--; 2143 pf_sourcelim_leave(srlim, gen); 2144 pf_source_rele(sr); 2145 2146 list = &sr->pfsr_states; 2147 break; 2148 } 2149 default: 2150 panic("%s: unexpected link type on pfl %p", 2151 __func__, pfl); 2152 } 2153 2154 PF_STATE_ASSERT_LOCKED(); 2155 TAILQ_REMOVE(list, pfl, pfl_link); 2156 pool_put(&pf_state_link_pl, pfl); 2157 } 2158 2159 RBT_REMOVE(pf_state_tree_id, &tree_id, st); 2160#if NPFLOW > 0 2161 if (st->state_flags & PFSTATE_PFLOW) 2162 export_pflow(st); 2163#endif /* NPFLOW > 0 */ 2164#if NPFSYNC > 0 2165 pfsync_delete_state(st); 2166#endif /* NPFSYNC > 0 */ 2167 pf_src_tree_remove_state(st); 2168 pf_detach_state(st); 2169} 2170 2171void 2172pf_remove_divert_state(struct inpcb *inp) 2173{ 2174 struct pf_state_key *sk; 2175 struct pf_state_item *si; 2176 2177 PF_ASSERT_UNLOCKED(); 2178 2179 if (READ_ONCE(inp->inp_pf_sk) == NULL) 2180 return; 2181 2182 mtx_enter(&pf_inp_mtx); 2183 sk = pf_state_key_ref(inp->inp_pf_sk); 2184 mtx_leave(&pf_inp_mtx); 2185 if (sk == NULL) 2186 return; 2187 2188 PF_LOCK(); 2189 PF_STATE_ENTER_WRITE(); 2190 TAILQ_FOREACH(si, &sk->sk_states, si_entry) { 2191 struct pf_state *sist = si->si_st; 2192 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr && 2193 (sist->rule.ptr->divert.type == PF_DIVERT_TO || 2194 sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) { 2195 if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP && 2196 sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) { 2197 /* 2198 * If the local address is translated, keep 2199 * the state for "tcp.closed" seconds to 2200 * prevent its source port from being reused. 2201 */ 2202 if (sist->src.state < TCPS_FIN_WAIT_2 || 2203 sist->dst.state < TCPS_FIN_WAIT_2) { 2204 pf_set_protostate(sist, PF_PEER_BOTH, 2205 TCPS_TIME_WAIT); 2206 pf_update_state_timeout(sist, 2207 PFTM_TCP_CLOSED); 2208 sist->expire = getuptime(); 2209 } 2210 sist->state_flags |= PFSTATE_INP_UNLINKED; 2211 } else 2212 pf_remove_state(sist); 2213 break; 2214 } 2215 } 2216 PF_STATE_EXIT_WRITE(); 2217 PF_UNLOCK(); 2218 2219 pf_state_key_unref(sk); 2220} 2221 2222void 2223pf_free_state(struct pf_state *st) 2224{ 2225 struct pf_rule_item *ri; 2226 2227 PF_ASSERT_LOCKED(); 2228 2229#if NPFSYNC > 0 2230 if (pfsync_state_in_use(st)) 2231 return; 2232#endif /* NPFSYNC > 0 */ 2233 2234 KASSERT(st->timeout == PFTM_UNLINKED); 2235 if (--st->rule.ptr->states_cur == 0 && 2236 st->rule.ptr->src_nodes == 0) 2237 pf_rm_rule(NULL, st->rule.ptr); 2238 if (st->anchor.ptr != NULL) 2239 if (--st->anchor.ptr->states_cur == 0) 2240 pf_rm_rule(NULL, st->anchor.ptr); 2241 while ((ri = SLIST_FIRST(&st->match_rules))) { 2242 SLIST_REMOVE_HEAD(&st->match_rules, entry); 2243 if (--ri->r->states_cur == 0 && 2244 ri->r->src_nodes == 0) 2245 pf_rm_rule(NULL, ri->r); 2246 pool_put(&pf_rule_item_pl, ri); 2247 } 2248 pf_normalize_tcp_cleanup(st); 2249 pfi_kif_unref(st->kif, PFI_KIF_REF_STATE); 2250 pf_state_list_remove(&pf_state_list, st); 2251 if (st->tag) 2252 pf_tag_unref(st->tag); 2253 pf_state_unref(st); 2254 counters_inc(pf_status_fcounters, FCNT_STATE_REMOVALS); 2255 pf_status.states--; 2256} 2257 2258unsigned int 2259pf_purge_expired_states(const unsigned int limit, const unsigned int collect) 2260{ 2261 /* 2262 * this task/thread/context/whatever is the only thing that 2263 * removes states from the pf_state_list, so the cur reference 2264 * it holds between calls is guaranteed to still be in the 2265 * list. 2266 */ 2267 static struct pf_state *cur = NULL; 2268 2269 struct pf_state *head, *tail; 2270 struct pf_state *st; 2271 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl); 2272 time_t now; 2273 unsigned int scanned; 2274 unsigned int collected = 0; 2275 2276 PF_ASSERT_UNLOCKED(); 2277 2278 rw_enter_read(&pf_state_list.pfs_rwl); 2279 2280 mtx_enter(&pf_state_list.pfs_mtx); 2281 head = TAILQ_FIRST(&pf_state_list.pfs_list); 2282 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue); 2283 mtx_leave(&pf_state_list.pfs_mtx); 2284 2285 if (head == NULL) { 2286 /* the list is empty */ 2287 rw_exit_read(&pf_state_list.pfs_rwl); 2288 return (limit); 2289 } 2290 2291 /* (re)start at the front of the list */ 2292 if (cur == NULL) 2293 cur = head; 2294 2295 now = getuptime(); 2296 2297 for (scanned = 0; scanned < limit; scanned++) { 2298 uint8_t stimeout = cur->timeout; 2299 unsigned int limited = 0; 2300 2301 if ((stimeout == PFTM_UNLINKED) || 2302 (pf_state_expires(cur, stimeout) <= now)) { 2303 st = pf_state_ref(cur); 2304 SLIST_INSERT_HEAD(&gcl, st, gc_list); 2305 2306 if (++collected >= collect) 2307 limited = 1; 2308 } 2309 2310 /* don't iterate past the end of our view of the list */ 2311 if (cur == tail) { 2312 cur = NULL; 2313 break; 2314 } 2315 2316 cur = TAILQ_NEXT(cur, entry_list); 2317 2318 /* don't spend too much time here. */ 2319 if (ISSET(READ_ONCE(curcpu()->ci_schedstate.spc_schedflags), 2320 SPCF_SHOULDYIELD) || limited) 2321 break; 2322 } 2323 2324 rw_exit_read(&pf_state_list.pfs_rwl); 2325 2326 if (SLIST_EMPTY(&gcl)) 2327 return (scanned); 2328 2329 rw_enter_write(&pf_state_list.pfs_rwl); 2330 PF_LOCK(); 2331 PF_STATE_ENTER_WRITE(); 2332 SLIST_FOREACH(st, &gcl, gc_list) { 2333 if (st->timeout != PFTM_UNLINKED) 2334 pf_remove_state(st); 2335 2336 pf_free_state(st); 2337 } 2338 PF_STATE_EXIT_WRITE(); 2339 PF_UNLOCK(); 2340 rw_exit_write(&pf_state_list.pfs_rwl); 2341 2342 while ((st = SLIST_FIRST(&gcl)) != NULL) { 2343 SLIST_REMOVE_HEAD(&gcl, gc_list); 2344 pf_state_unref(st); 2345 } 2346 2347 return (scanned); 2348} 2349 2350int 2351pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait) 2352{ 2353 if (aw->type != PF_ADDR_TABLE) 2354 return (0); 2355 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL) 2356 return (1); 2357 return (0); 2358} 2359 2360void 2361pf_tbladdr_remove(struct pf_addr_wrap *aw) 2362{ 2363 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL) 2364 return; 2365 pfr_detach_table(aw->p.tbl); 2366 aw->p.tbl = NULL; 2367} 2368 2369void 2370pf_tbladdr_copyout(struct pf_addr_wrap *aw) 2371{ 2372 struct pfr_ktable *kt = aw->p.tbl; 2373 2374 if (aw->type != PF_ADDR_TABLE || kt == NULL) 2375 return; 2376 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL) 2377 kt = kt->pfrkt_root; 2378 aw->p.tbl = NULL; 2379 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ? 2380 kt->pfrkt_cnt : -1; 2381} 2382 2383void 2384pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af) 2385{ 2386 switch (af) { 2387 case AF_INET: { 2388 u_int32_t a = ntohl(addr->addr32[0]); 2389 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255, 2390 (a>>8)&255, a&255); 2391 if (p) { 2392 p = ntohs(p); 2393 addlog(":%u", p); 2394 } 2395 break; 2396 } 2397#ifdef INET6 2398 case AF_INET6: { 2399 u_int16_t b; 2400 u_int8_t i, curstart, curend, maxstart, maxend; 2401 curstart = curend = maxstart = maxend = 255; 2402 for (i = 0; i < 8; i++) { 2403 if (!addr->addr16[i]) { 2404 if (curstart == 255) 2405 curstart = i; 2406 curend = i; 2407 } else { 2408 if ((curend - curstart) > 2409 (maxend - maxstart)) { 2410 maxstart = curstart; 2411 maxend = curend; 2412 } 2413 curstart = curend = 255; 2414 } 2415 } 2416 if ((curend - curstart) > 2417 (maxend - maxstart)) { 2418 maxstart = curstart; 2419 maxend = curend; 2420 } 2421 for (i = 0; i < 8; i++) { 2422 if (i >= maxstart && i <= maxend) { 2423 if (i == 0) 2424 addlog(":"); 2425 if (i == maxend) 2426 addlog(":"); 2427 } else { 2428 b = ntohs(addr->addr16[i]); 2429 addlog("%x", b); 2430 if (i < 7) 2431 addlog(":"); 2432 } 2433 } 2434 if (p) { 2435 p = ntohs(p); 2436 addlog("[%u]", p); 2437 } 2438 break; 2439 } 2440#endif /* INET6 */ 2441 } 2442} 2443 2444void 2445pf_print_state(struct pf_state *st) 2446{ 2447 pf_print_state_parts(st, NULL, NULL); 2448} 2449 2450void 2451pf_print_state_parts(struct pf_state *st, 2452 struct pf_state_key *skwp, struct pf_state_key *sksp) 2453{ 2454 struct pf_state_key *skw, *sks; 2455 u_int8_t proto, dir; 2456 2457 /* Do our best to fill these, but they're skipped if NULL */ 2458 skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL); 2459 sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL); 2460 proto = skw ? skw->proto : (sks ? sks->proto : 0); 2461 dir = st ? st->direction : 0; 2462 2463 switch (proto) { 2464 case IPPROTO_IPV4: 2465 addlog("IPv4"); 2466 break; 2467 case IPPROTO_IPV6: 2468 addlog("IPv6"); 2469 break; 2470 case IPPROTO_TCP: 2471 addlog("TCP"); 2472 break; 2473 case IPPROTO_UDP: 2474 addlog("UDP"); 2475 break; 2476 case IPPROTO_ICMP: 2477 addlog("ICMP"); 2478 break; 2479 case IPPROTO_ICMPV6: 2480 addlog("ICMPv6"); 2481 break; 2482 default: 2483 addlog("%u", proto); 2484 break; 2485 } 2486 switch (dir) { 2487 case PF_IN: 2488 addlog(" in"); 2489 break; 2490 case PF_OUT: 2491 addlog(" out"); 2492 break; 2493 } 2494 if (skw) { 2495 addlog(" wire: (%d) ", skw->rdomain); 2496 pf_print_host(&skw->addr[0], skw->port[0], skw->af); 2497 addlog(" "); 2498 pf_print_host(&skw->addr[1], skw->port[1], skw->af); 2499 } 2500 if (sks) { 2501 addlog(" stack: (%d) ", sks->rdomain); 2502 if (sks != skw) { 2503 pf_print_host(&sks->addr[0], sks->port[0], sks->af); 2504 addlog(" "); 2505 pf_print_host(&sks->addr[1], sks->port[1], sks->af); 2506 } else 2507 addlog("-"); 2508 } 2509 if (st) { 2510 if (proto == IPPROTO_TCP) { 2511 addlog(" [lo=%u high=%u win=%u modulator=%u", 2512 st->src.seqlo, st->src.seqhi, 2513 st->src.max_win, st->src.seqdiff); 2514 if (st->src.wscale && st->dst.wscale) 2515 addlog(" wscale=%u", 2516 st->src.wscale & PF_WSCALE_MASK); 2517 addlog("]"); 2518 addlog(" [lo=%u high=%u win=%u modulator=%u", 2519 st->dst.seqlo, st->dst.seqhi, 2520 st->dst.max_win, st->dst.seqdiff); 2521 if (st->src.wscale && st->dst.wscale) 2522 addlog(" wscale=%u", 2523 st->dst.wscale & PF_WSCALE_MASK); 2524 addlog("]"); 2525 } 2526 addlog(" %u:%u", st->src.state, st->dst.state); 2527 if (st->rule.ptr) 2528 addlog(" @%d", st->rule.ptr->nr); 2529 } 2530} 2531 2532void 2533pf_print_flags(u_int8_t f) 2534{ 2535 if (f) 2536 addlog(" "); 2537 if (f & TH_FIN) 2538 addlog("F"); 2539 if (f & TH_SYN) 2540 addlog("S"); 2541 if (f & TH_RST) 2542 addlog("R"); 2543 if (f & TH_PUSH) 2544 addlog("P"); 2545 if (f & TH_ACK) 2546 addlog("A"); 2547 if (f & TH_URG) 2548 addlog("U"); 2549 if (f & TH_ECE) 2550 addlog("E"); 2551 if (f & TH_CWR) 2552 addlog("W"); 2553} 2554 2555#define PF_SET_SKIP_STEPS(i) \ 2556 do { \ 2557 while (head[i] != cur) { \ 2558 head[i]->skip[i].ptr = cur; \ 2559 head[i] = TAILQ_NEXT(head[i], entries); \ 2560 } \ 2561 } while (0) 2562 2563void 2564pf_calc_skip_steps(struct pf_rulequeue *rules) 2565{ 2566 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT]; 2567 int i; 2568 2569 cur = TAILQ_FIRST(rules); 2570 prev = cur; 2571 for (i = 0; i < PF_SKIP_COUNT; ++i) 2572 head[i] = cur; 2573 while (cur != NULL) { 2574 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot) 2575 PF_SET_SKIP_STEPS(PF_SKIP_IFP); 2576 if (cur->direction != prev->direction) 2577 PF_SET_SKIP_STEPS(PF_SKIP_DIR); 2578 if (cur->onrdomain != prev->onrdomain || 2579 cur->ifnot != prev->ifnot) 2580 PF_SET_SKIP_STEPS(PF_SKIP_RDOM); 2581 if (cur->af != prev->af) 2582 PF_SET_SKIP_STEPS(PF_SKIP_AF); 2583 if (cur->proto != prev->proto) 2584 PF_SET_SKIP_STEPS(PF_SKIP_PROTO); 2585 if (cur->src.neg != prev->src.neg || 2586 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr)) 2587 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR); 2588 if (cur->dst.neg != prev->dst.neg || 2589 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr)) 2590 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR); 2591 if (cur->src.port[0] != prev->src.port[0] || 2592 cur->src.port[1] != prev->src.port[1] || 2593 cur->src.port_op != prev->src.port_op) 2594 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT); 2595 if (cur->dst.port[0] != prev->dst.port[0] || 2596 cur->dst.port[1] != prev->dst.port[1] || 2597 cur->dst.port_op != prev->dst.port_op) 2598 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT); 2599 2600 prev = cur; 2601 cur = TAILQ_NEXT(cur, entries); 2602 } 2603 for (i = 0; i < PF_SKIP_COUNT; ++i) 2604 PF_SET_SKIP_STEPS(i); 2605} 2606 2607int 2608pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2) 2609{ 2610 if (aw1->type != aw2->type) 2611 return (1); 2612 switch (aw1->type) { 2613 case PF_ADDR_ADDRMASK: 2614 case PF_ADDR_RANGE: 2615 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6)) 2616 return (1); 2617 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6)) 2618 return (1); 2619 return (0); 2620 case PF_ADDR_DYNIFTL: 2621 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt); 2622 case PF_ADDR_NONE: 2623 case PF_ADDR_NOROUTE: 2624 case PF_ADDR_URPFFAILED: 2625 return (0); 2626 case PF_ADDR_TABLE: 2627 return (aw1->p.tbl != aw2->p.tbl); 2628 case PF_ADDR_RTLABEL: 2629 return (aw1->v.rtlabel != aw2->v.rtlabel); 2630 default: 2631 addlog("invalid address type: %d\n", aw1->type); 2632 return (1); 2633 } 2634} 2635 2636/* This algorithm computes 'a + b - c' in ones-complement using a trick to 2637 * emulate at most one ones-complement subtraction. This thereby limits net 2638 * carries/borrows to at most one, eliminating a reduction step and saving one 2639 * each of +, >>, & and ~. 2640 * 2641 * def. x mod y = x - (x//y)*y for integer x,y 2642 * def. sum = x mod 2^16 2643 * def. accumulator = (x >> 16) mod 2^16 2644 * 2645 * The trick works as follows: subtracting exactly one u_int16_t from the 2646 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the 2647 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the 2648 * ones-complement borrow: 2649 * 2650 * (sum + accumulator) mod 2^16 2651 * = { assume underflow: accumulator := 2^16 - 1 } 2652 * (sum + 2^16 - 1) mod 2^16 2653 * = { mod } 2654 * (sum - 1) mod 2^16 2655 * 2656 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's 2657 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown 2658 * to zero as that requires subtraction of at least 2^16, which exceeds a 2659 * single u_int16_t's range. 2660 * 2661 * We use the following theorem to derive the implementation: 2662 * 2663 * th. (x + (y mod z)) mod z = (x + y) mod z (0) 2664 * proof. 2665 * (x + (y mod z)) mod z 2666 * = { def mod } 2667 * (x + y - (y//z)*z) mod z 2668 * = { (a + b*c) mod c = a mod c } 2669 * (x + y) mod z [end of proof] 2670 * 2671 * ... and thereby obtain: 2672 * 2673 * (sum + accumulator) mod 2^16 2674 * = { def. accumulator, def. sum } 2675 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16 2676 * = { (0), twice } 2677 * (x + (x >> 16)) mod 2^16 2678 * = { x mod 2^n = x & (2^n - 1) } 2679 * (x + (x >> 16)) & 0xffff 2680 * 2681 * Note: this serves also as a reduction step for at most one add (as the 2682 * trailing mod 2^16 prevents further reductions by destroying carries). 2683 */ 2684__inline void 2685pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now, 2686 u_int8_t proto) 2687{ 2688 u_int32_t x; 2689 const int udp = proto == IPPROTO_UDP; 2690 2691 x = *cksum + was - now; 2692 x = (x + (x >> 16)) & 0xffff; 2693 2694 /* optimise: eliminate a branch when not udp */ 2695 if (udp && *cksum == 0x0000) 2696 return; 2697 if (udp && x == 0x0000) 2698 x = 0xffff; 2699 2700 *cksum = (u_int16_t)(x); 2701} 2702 2703#ifdef INET6 2704/* pre: coverage(cksum) is superset of coverage(covered_cksum) */ 2705static __inline void 2706pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto) 2707{ 2708 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto); 2709} 2710 2711/* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */ 2712static __inline void 2713pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto) 2714{ 2715 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto); 2716} 2717#endif /* INET6 */ 2718 2719/* pre: *a is 16-bit aligned within its packet 2720 * 2721 * This algorithm emulates 16-bit ones-complement sums on a twos-complement 2722 * machine by conserving ones-complement's otherwise discarded carries in the 2723 * upper bits of x. These accumulated carries when added to the lower 16-bits 2724 * over at least zero 'reduction' steps then complete the ones-complement sum. 2725 * 2726 * def. sum = x mod 2^16 2727 * def. accumulator = (x >> 16) 2728 * 2729 * At most two reduction steps 2730 * 2731 * x := sum + accumulator 2732 * = { def sum, def accumulator } 2733 * x := x mod 2^16 + (x >> 16) 2734 * = { x mod 2^n = x & (2^n - 1) } 2735 * x := (x & 0xffff) + (x >> 16) 2736 * 2737 * are necessary to incorporate the accumulated carries (at most one per add) 2738 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits. 2739 * 2740 * The function is also invariant over the endian of the host. Why? 2741 * 2742 * Define the unary transpose operator ~ on a bitstring in python slice 2743 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P. 2744 * 2745 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e. 2746 * 2747 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length) 2748 * 2749 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two 2750 * 'half-adds'. Under ones-complement addition, each half-add carries to the 2751 * other, so the sum of each half-add is unaffected by their relative 2752 * order. Therefore: 2753 * 2754 * ~m +_1 ~n 2755 * = { half-adds invariant under transposition } 2756 * ~s 2757 * = { substitute } 2758 * ~(m +_1 n) [end of proof] 2759 * 2760 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine 2761 * with the converse endian does not alter the result. 2762 * 2763 * proof. 2764 * { converse machine endian: load/store transposes, P := 8 } 2765 * ~(~m +_1 ~n) 2766 * = { ~ over +_1 } 2767 * ~~m +_1 ~~n 2768 * = { ~ is an involution } 2769 * m +_1 n [end of proof] 2770 * 2771 */ 2772#define NEG(x) ((u_int16_t)~(x)) 2773void 2774pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a, 2775 const struct pf_addr *an, sa_family_t af, u_int8_t proto) 2776{ 2777 u_int32_t x; 2778 const u_int16_t *n = an->addr16; 2779 const u_int16_t *o = a->addr16; 2780 const int udp = proto == IPPROTO_UDP; 2781 2782 switch (af) { 2783 case AF_INET: 2784 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]); 2785 break; 2786#ifdef INET6 2787 case AF_INET6: 2788 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\ 2789 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\ 2790 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\ 2791 o[6] + NEG(n[6]) + o[7] + NEG(n[7]); 2792 break; 2793#endif /* INET6 */ 2794 default: 2795 unhandled_af(af); 2796 } 2797 2798 x = (x & 0xffff) + (x >> 16); 2799 x = (x & 0xffff) + (x >> 16); 2800 2801 /* optimise: eliminate a branch when not udp */ 2802 if (udp && *cksum == 0x0000) 2803 return; 2804 if (udp && x == 0x0000) 2805 x = 0xffff; 2806 2807 *cksum = (u_int16_t)(x); 2808} 2809 2810int 2811pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi) 2812{ 2813 int rewrite = 0; 2814 2815 if (*f != v) { 2816 u_int16_t old = htons(hi ? (*f << 8) : *f); 2817 u_int16_t new = htons(hi ? ( v << 8) : v); 2818 2819 pf_cksum_fixup(pd->pcksum, old, new, pd->proto); 2820 *f = v; 2821 rewrite = 1; 2822 } 2823 2824 return (rewrite); 2825} 2826 2827/* pre: *f is 16-bit aligned within its packet */ 2828int 2829pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v) 2830{ 2831 int rewrite = 0; 2832 2833 if (*f != v) { 2834 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto); 2835 *f = v; 2836 rewrite = 1; 2837 } 2838 2839 return (rewrite); 2840} 2841 2842int 2843pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi) 2844{ 2845 int rewrite = 0; 2846 u_int8_t *fb = (u_int8_t*)f; 2847 u_int8_t *vb = (u_int8_t*)&v; 2848 2849 if (hi && ALIGNED_POINTER(f, u_int16_t)) { 2850 return (pf_patch_16(pd, f, v)); /* optimise */ 2851 } 2852 2853 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2854 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2855 2856 return (rewrite); 2857} 2858 2859/* pre: *f is 16-bit aligned within its packet */ 2860/* pre: pd->proto != IPPROTO_UDP */ 2861int 2862pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v) 2863{ 2864 int rewrite = 0; 2865 u_int16_t *pc = pd->pcksum; 2866 u_int8_t proto = pd->proto; 2867 2868 /* optimise: inline udp fixup code is unused; let compiler scrub it */ 2869 if (proto == IPPROTO_UDP) 2870 panic("%s: udp", __func__); 2871 2872 /* optimise: skip *f != v guard; true for all use-cases */ 2873 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto); 2874 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto); 2875 2876 *f = v; 2877 rewrite = 1; 2878 2879 return (rewrite); 2880} 2881 2882int 2883pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi) 2884{ 2885 int rewrite = 0; 2886 u_int8_t *fb = (u_int8_t*)f; 2887 u_int8_t *vb = (u_int8_t*)&v; 2888 2889 if (hi && ALIGNED_POINTER(f, u_int32_t)) { 2890 return (pf_patch_32(pd, f, v)); /* optimise */ 2891 } 2892 2893 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2894 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2895 rewrite += pf_patch_8(pd, fb++, *vb++, hi); 2896 rewrite += pf_patch_8(pd, fb++, *vb++,!hi); 2897 2898 return (rewrite); 2899} 2900 2901int 2902pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir, 2903 u_int16_t *virtual_id, u_int16_t *virtual_type) 2904{ 2905 /* 2906 * ICMP types marked with PF_OUT are typically responses to 2907 * PF_IN, and will match states in the opposite direction. 2908 * PF_IN ICMP types need to match a state with that type. 2909 */ 2910 *icmp_dir = PF_OUT; 2911 2912 /* Queries (and responses) */ 2913 switch (pd->af) { 2914 case AF_INET: 2915 switch (type) { 2916 case ICMP_ECHO: 2917 *icmp_dir = PF_IN; 2918 /* FALLTHROUGH */ 2919 case ICMP_ECHOREPLY: 2920 *virtual_type = ICMP_ECHO; 2921 *virtual_id = pd->hdr.icmp.icmp_id; 2922 break; 2923 2924 case ICMP_TSTAMP: 2925 *icmp_dir = PF_IN; 2926 /* FALLTHROUGH */ 2927 case ICMP_TSTAMPREPLY: 2928 *virtual_type = ICMP_TSTAMP; 2929 *virtual_id = pd->hdr.icmp.icmp_id; 2930 break; 2931 2932 case ICMP_IREQ: 2933 *icmp_dir = PF_IN; 2934 /* FALLTHROUGH */ 2935 case ICMP_IREQREPLY: 2936 *virtual_type = ICMP_IREQ; 2937 *virtual_id = pd->hdr.icmp.icmp_id; 2938 break; 2939 2940 case ICMP_MASKREQ: 2941 *icmp_dir = PF_IN; 2942 /* FALLTHROUGH */ 2943 case ICMP_MASKREPLY: 2944 *virtual_type = ICMP_MASKREQ; 2945 *virtual_id = pd->hdr.icmp.icmp_id; 2946 break; 2947 2948 case ICMP_IPV6_WHEREAREYOU: 2949 *icmp_dir = PF_IN; 2950 /* FALLTHROUGH */ 2951 case ICMP_IPV6_IAMHERE: 2952 *virtual_type = ICMP_IPV6_WHEREAREYOU; 2953 *virtual_id = 0; /* Nothing sane to match on! */ 2954 break; 2955 2956 case ICMP_MOBILE_REGREQUEST: 2957 *icmp_dir = PF_IN; 2958 /* FALLTHROUGH */ 2959 case ICMP_MOBILE_REGREPLY: 2960 *virtual_type = ICMP_MOBILE_REGREQUEST; 2961 *virtual_id = 0; /* Nothing sane to match on! */ 2962 break; 2963 2964 case ICMP_ROUTERSOLICIT: 2965 *icmp_dir = PF_IN; 2966 /* FALLTHROUGH */ 2967 case ICMP_ROUTERADVERT: 2968 *virtual_type = ICMP_ROUTERSOLICIT; 2969 *virtual_id = 0; /* Nothing sane to match on! */ 2970 break; 2971 2972 /* These ICMP types map to other connections */ 2973 case ICMP_UNREACH: 2974 case ICMP_SOURCEQUENCH: 2975 case ICMP_REDIRECT: 2976 case ICMP_TIMXCEED: 2977 case ICMP_PARAMPROB: 2978 /* These will not be used, but set them anyway */ 2979 *icmp_dir = PF_IN; 2980 *virtual_type = htons(type); 2981 *virtual_id = 0; 2982 return (1); /* These types match to another state */ 2983 2984 /* 2985 * All remaining ICMP types get their own states, 2986 * and will only match in one direction. 2987 */ 2988 default: 2989 *icmp_dir = PF_IN; 2990 *virtual_type = type; 2991 *virtual_id = 0; 2992 break; 2993 } 2994 break; 2995#ifdef INET6 2996 case AF_INET6: 2997 switch (type) { 2998 case ICMP6_ECHO_REQUEST: 2999 *icmp_dir = PF_IN; 3000 /* FALLTHROUGH */ 3001 case ICMP6_ECHO_REPLY: 3002 *virtual_type = ICMP6_ECHO_REQUEST; 3003 *virtual_id = pd->hdr.icmp6.icmp6_id; 3004 break; 3005 3006 case MLD_LISTENER_QUERY: 3007 case MLD_LISTENER_REPORT: { 3008 struct mld_hdr *mld = &pd->hdr.mld; 3009 u_int32_t h; 3010 3011 /* 3012 * Listener Report can be sent by clients 3013 * without an associated Listener Query. 3014 * In addition to that, when Report is sent as a 3015 * reply to a Query its source and destination 3016 * address are different. 3017 */ 3018 *icmp_dir = PF_IN; 3019 *virtual_type = MLD_LISTENER_QUERY; 3020 /* generate fake id for these messages */ 3021 h = mld->mld_addr.s6_addr32[0] ^ 3022 mld->mld_addr.s6_addr32[1] ^ 3023 mld->mld_addr.s6_addr32[2] ^ 3024 mld->mld_addr.s6_addr32[3]; 3025 *virtual_id = (h >> 16) ^ (h & 0xffff); 3026 break; 3027 } 3028 3029 /* 3030 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as 3031 * ICMP6_WRU 3032 */ 3033 case ICMP6_WRUREQUEST: 3034 *icmp_dir = PF_IN; 3035 /* FALLTHROUGH */ 3036 case ICMP6_WRUREPLY: 3037 *virtual_type = ICMP6_WRUREQUEST; 3038 *virtual_id = 0; /* Nothing sane to match on! */ 3039 break; 3040 3041 case MLD_MTRACE: 3042 *icmp_dir = PF_IN; 3043 /* FALLTHROUGH */ 3044 case MLD_MTRACE_RESP: 3045 *virtual_type = MLD_MTRACE; 3046 *virtual_id = 0; /* Nothing sane to match on! */ 3047 break; 3048 3049 case ND_NEIGHBOR_SOLICIT: 3050 *icmp_dir = PF_IN; 3051 /* FALLTHROUGH */ 3052 case ND_NEIGHBOR_ADVERT: { 3053 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns; 3054 u_int32_t h; 3055 3056 *virtual_type = ND_NEIGHBOR_SOLICIT; 3057 /* generate fake id for these messages */ 3058 h = nd->nd_ns_target.s6_addr32[0] ^ 3059 nd->nd_ns_target.s6_addr32[1] ^ 3060 nd->nd_ns_target.s6_addr32[2] ^ 3061 nd->nd_ns_target.s6_addr32[3]; 3062 *virtual_id = (h >> 16) ^ (h & 0xffff); 3063 /* 3064 * the extra work here deals with 'keep state' option 3065 * at pass rule for unsolicited advertisement. By 3066 * returning 1 (state_icmp = 1) we override 'keep 3067 * state' to 'no state' so we don't create state for 3068 * unsolicited advertisements. No one expects answer to 3069 * unsolicited advertisements so we should be good. 3070 */ 3071 if (type == ND_NEIGHBOR_ADVERT) { 3072 *virtual_type = htons(*virtual_type); 3073 return (1); 3074 } 3075 break; 3076 } 3077 3078 /* 3079 * These ICMP types map to other connections. 3080 * ND_REDIRECT can't be in this list because the triggering 3081 * packet header is optional. 3082 */ 3083 case ICMP6_DST_UNREACH: 3084 case ICMP6_PACKET_TOO_BIG: 3085 case ICMP6_TIME_EXCEEDED: 3086 case ICMP6_PARAM_PROB: 3087 /* These will not be used, but set them anyway */ 3088 *icmp_dir = PF_IN; 3089 *virtual_type = htons(type); 3090 *virtual_id = 0; 3091 return (1); /* These types match to another state */ 3092 /* 3093 * All remaining ICMP6 types get their own states, 3094 * and will only match in one direction. 3095 */ 3096 default: 3097 *icmp_dir = PF_IN; 3098 *virtual_type = type; 3099 *virtual_id = 0; 3100 break; 3101 } 3102 break; 3103#endif /* INET6 */ 3104 } 3105 *virtual_type = htons(*virtual_type); 3106 return (0); /* These types match to their own state */ 3107} 3108 3109void 3110pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp, 3111 struct pf_addr *oa, struct pf_addr *na, u_int16_t np) 3112{ 3113 /* note: doesn't trouble to fixup quoted checksums, if any */ 3114 3115 /* change quoted protocol port */ 3116 if (qp != NULL) 3117 pf_patch_16(pd, qp, np); 3118 3119 /* change quoted ip address */ 3120 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto); 3121 pf_addrcpy(qa, na, pd->af); 3122 3123 /* change network-header's ip address */ 3124 if (oa) 3125 pf_translate_a(pd, oa, na); 3126} 3127 3128/* pre: *a is 16-bit aligned within its packet */ 3129/* *a is a network header src/dst address */ 3130int 3131pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an) 3132{ 3133 int rewrite = 0; 3134 3135 /* warning: !PF_ANEQ != PF_AEQ */ 3136 if (!PF_ANEQ(a, an, pd->af)) 3137 return (0); 3138 3139 /* fixup transport pseudo-header, if any */ 3140 switch (pd->proto) { 3141 case IPPROTO_TCP: /* FALLTHROUGH */ 3142 case IPPROTO_UDP: /* FALLTHROUGH */ 3143 case IPPROTO_ICMPV6: 3144 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto); 3145 break; 3146 default: 3147 break; /* assume no pseudo-header */ 3148 } 3149 3150 pf_addrcpy(a, an, pd->af); 3151 rewrite = 1; 3152 3153 return (rewrite); 3154} 3155 3156#ifdef INET6 3157/* pf_translate_af() may change pd->m, adjust local copies after calling */ 3158int 3159pf_translate_af(struct pf_pdesc *pd) 3160{ 3161 static const struct pf_addr zero; 3162 struct ip *ip4; 3163 struct ip6_hdr *ip6; 3164 int copyback = 0; 3165 u_int hlen, ohlen, dlen; 3166 u_int16_t *pc; 3167 u_int8_t af_proto, naf_proto; 3168 3169 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6); 3170 ohlen = pd->off; 3171 dlen = pd->tot_len - pd->off; 3172 pc = pd->pcksum; 3173 3174 af_proto = naf_proto = pd->proto; 3175 if (naf_proto == IPPROTO_ICMP) 3176 af_proto = IPPROTO_ICMPV6; 3177 if (naf_proto == IPPROTO_ICMPV6) 3178 af_proto = IPPROTO_ICMP; 3179 3180 /* uncover stale pseudo-header */ 3181 switch (af_proto) { 3182 case IPPROTO_ICMPV6: 3183 /* optimise: unchanged for TCP/UDP */ 3184 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto); 3185 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto); 3186 /* FALLTHROUGH */ 3187 case IPPROTO_UDP: /* FALLTHROUGH */ 3188 case IPPROTO_TCP: 3189 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto); 3190 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto); 3191 copyback = 1; 3192 break; 3193 default: 3194 break; /* assume no pseudo-header */ 3195 } 3196 3197 /* replace the network header */ 3198 m_adj(pd->m, pd->off); 3199 pd->src = NULL; 3200 pd->dst = NULL; 3201 3202 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) { 3203 pd->m = NULL; 3204 return (-1); 3205 } 3206 3207 pd->off = hlen; 3208 pd->tot_len += hlen - ohlen; 3209 3210 switch (pd->naf) { 3211 case AF_INET: 3212 ip4 = mtod(pd->m, struct ip *); 3213 memset(ip4, 0, hlen); 3214 ip4->ip_v = IPVERSION; 3215 ip4->ip_hl = hlen >> 2; 3216 ip4->ip_tos = pd->tos; 3217 ip4->ip_len = htons(hlen + dlen); 3218 ip4->ip_id = htons(ip_randomid()); 3219 ip4->ip_off = htons(IP_DF); 3220 ip4->ip_ttl = pd->ttl; 3221 ip4->ip_p = pd->proto; 3222 ip4->ip_src = pd->nsaddr.v4; 3223 ip4->ip_dst = pd->ndaddr.v4; 3224 break; 3225 case AF_INET6: 3226 ip6 = mtod(pd->m, struct ip6_hdr *); 3227 memset(ip6, 0, hlen); 3228 ip6->ip6_vfc = IPV6_VERSION; 3229 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20); 3230 ip6->ip6_plen = htons(dlen); 3231 ip6->ip6_nxt = pd->proto; 3232 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM) 3233 ip6->ip6_hlim = IPV6_DEFHLIM; 3234 else 3235 ip6->ip6_hlim = pd->ttl; 3236 ip6->ip6_src = pd->nsaddr.v6; 3237 ip6->ip6_dst = pd->ndaddr.v6; 3238 break; 3239 default: 3240 unhandled_af(pd->naf); 3241 } 3242 3243 /* UDP over IPv6 must be checksummed per rfc2460 p27 */ 3244 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 && 3245 pd->naf == AF_INET6) { 3246 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT; 3247 } 3248 3249 /* cover fresh pseudo-header */ 3250 switch (naf_proto) { 3251 case IPPROTO_ICMPV6: 3252 /* optimise: unchanged for TCP/UDP */ 3253 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto); 3254 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto); 3255 /* FALLTHROUGH */ 3256 case IPPROTO_UDP: /* FALLTHROUGH */ 3257 case IPPROTO_TCP: 3258 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto); 3259 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto); 3260 copyback = 1; 3261 break; 3262 default: 3263 break; /* assume no pseudo-header */ 3264 } 3265 3266 /* flush pd->pcksum */ 3267 if (copyback) 3268 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 3269 3270 return (0); 3271} 3272 3273int 3274pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd, 3275 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst, 3276 sa_family_t af, sa_family_t naf) 3277{ 3278 struct mbuf *n = NULL; 3279 struct ip *ip4; 3280 struct ip6_hdr *ip6; 3281 u_int hlen, ohlen, dlen; 3282 int d; 3283 3284 if (af == naf || (af != AF_INET && af != AF_INET6) || 3285 (naf != AF_INET && naf != AF_INET6)) 3286 return (-1); 3287 3288 /* split the mbuf chain on the quoted ip/ip6 header boundary */ 3289 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL) 3290 return (-1); 3291 3292 /* new quoted header */ 3293 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6); 3294 /* old quoted header */ 3295 ohlen = pd2->off - ipoff2; 3296 3297 /* trim old quoted header */ 3298 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto); 3299 m_adj(n, ohlen); 3300 3301 /* prepend a new, translated, quoted header */ 3302 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL) 3303 return (-1); 3304 3305 switch (naf) { 3306 case AF_INET: 3307 ip4 = mtod(n, struct ip *); 3308 memset(ip4, 0, sizeof(*ip4)); 3309 ip4->ip_v = IPVERSION; 3310 ip4->ip_hl = sizeof(*ip4) >> 2; 3311 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen); 3312 ip4->ip_id = htons(ip_randomid()); 3313 ip4->ip_off = htons(IP_DF); 3314 ip4->ip_ttl = pd2->ttl; 3315 if (pd2->proto == IPPROTO_ICMPV6) 3316 ip4->ip_p = IPPROTO_ICMP; 3317 else 3318 ip4->ip_p = pd2->proto; 3319 ip4->ip_src = src->v4; 3320 ip4->ip_dst = dst->v4; 3321 in_hdr_cksum_out(n, NULL); 3322 break; 3323 case AF_INET6: 3324 ip6 = mtod(n, struct ip6_hdr *); 3325 memset(ip6, 0, sizeof(*ip6)); 3326 ip6->ip6_vfc = IPV6_VERSION; 3327 ip6->ip6_plen = htons(pd2->tot_len - ohlen); 3328 if (pd2->proto == IPPROTO_ICMP) 3329 ip6->ip6_nxt = IPPROTO_ICMPV6; 3330 else 3331 ip6->ip6_nxt = pd2->proto; 3332 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM) 3333 ip6->ip6_hlim = IPV6_DEFHLIM; 3334 else 3335 ip6->ip6_hlim = pd2->ttl; 3336 ip6->ip6_src = src->v6; 3337 ip6->ip6_dst = dst->v6; 3338 break; 3339 } 3340 3341 /* cover new quoted header */ 3342 /* optimise: any new AF_INET header of ours sums to zero */ 3343 if (naf != AF_INET) { 3344 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto); 3345 } 3346 3347 /* reattach modified quoted packet to outer header */ 3348 { 3349 int nlen = n->m_pkthdr.len; 3350 m_cat(m, n); 3351 m->m_pkthdr.len += nlen; 3352 } 3353 3354 /* account for altered length */ 3355 d = hlen - ohlen; 3356 3357 if (pd->proto == IPPROTO_ICMPV6) { 3358 /* fixup pseudo-header */ 3359 dlen = pd->tot_len - pd->off; 3360 pf_cksum_fixup(pd->pcksum, 3361 htons(dlen), htons(dlen + d), pd->proto); 3362 } 3363 3364 pd->tot_len += d; 3365 pd2->tot_len += d; 3366 pd2->off += d; 3367 3368 /* note: not bothering to update network headers as 3369 these due for rewrite by pf_translate_af() */ 3370 3371 return (0); 3372} 3373 3374 3375#define PTR_IP(field) (offsetof(struct ip, field)) 3376#define PTR_IP6(field) (offsetof(struct ip6_hdr, field)) 3377 3378int 3379pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg) 3380{ 3381 struct icmp *icmp4; 3382 struct icmp6_hdr *icmp6; 3383 u_int32_t mtu; 3384 int32_t ptr = -1; 3385 u_int8_t type; 3386 u_int8_t code; 3387 3388 switch (af) { 3389 case AF_INET: 3390 icmp6 = arg; 3391 type = icmp6->icmp6_type; 3392 code = icmp6->icmp6_code; 3393 mtu = ntohl(icmp6->icmp6_mtu); 3394 3395 switch (type) { 3396 case ICMP6_ECHO_REQUEST: 3397 type = ICMP_ECHO; 3398 break; 3399 case ICMP6_ECHO_REPLY: 3400 type = ICMP_ECHOREPLY; 3401 break; 3402 case ICMP6_DST_UNREACH: 3403 type = ICMP_UNREACH; 3404 switch (code) { 3405 case ICMP6_DST_UNREACH_NOROUTE: 3406 case ICMP6_DST_UNREACH_BEYONDSCOPE: 3407 case ICMP6_DST_UNREACH_ADDR: 3408 code = ICMP_UNREACH_HOST; 3409 break; 3410 case ICMP6_DST_UNREACH_ADMIN: 3411 code = ICMP_UNREACH_HOST_PROHIB; 3412 break; 3413 case ICMP6_DST_UNREACH_NOPORT: 3414 code = ICMP_UNREACH_PORT; 3415 break; 3416 default: 3417 return (-1); 3418 } 3419 break; 3420 case ICMP6_PACKET_TOO_BIG: 3421 type = ICMP_UNREACH; 3422 code = ICMP_UNREACH_NEEDFRAG; 3423 mtu -= 20; 3424 break; 3425 case ICMP6_TIME_EXCEEDED: 3426 type = ICMP_TIMXCEED; 3427 break; 3428 case ICMP6_PARAM_PROB: 3429 switch (code) { 3430 case ICMP6_PARAMPROB_HEADER: 3431 type = ICMP_PARAMPROB; 3432 code = ICMP_PARAMPROB_ERRATPTR; 3433 ptr = ntohl(icmp6->icmp6_pptr); 3434 3435 if (ptr == PTR_IP6(ip6_vfc)) 3436 ; /* preserve */ 3437 else if (ptr == PTR_IP6(ip6_vfc) + 1) 3438 ptr = PTR_IP(ip_tos); 3439 else if (ptr == PTR_IP6(ip6_plen) || 3440 ptr == PTR_IP6(ip6_plen) + 1) 3441 ptr = PTR_IP(ip_len); 3442 else if (ptr == PTR_IP6(ip6_nxt)) 3443 ptr = PTR_IP(ip_p); 3444 else if (ptr == PTR_IP6(ip6_hlim)) 3445 ptr = PTR_IP(ip_ttl); 3446 else if (ptr >= PTR_IP6(ip6_src) && 3447 ptr < PTR_IP6(ip6_dst)) 3448 ptr = PTR_IP(ip_src); 3449 else if (ptr >= PTR_IP6(ip6_dst) && 3450 ptr < sizeof(struct ip6_hdr)) 3451 ptr = PTR_IP(ip_dst); 3452 else { 3453 return (-1); 3454 } 3455 break; 3456 case ICMP6_PARAMPROB_NEXTHEADER: 3457 type = ICMP_UNREACH; 3458 code = ICMP_UNREACH_PROTOCOL; 3459 break; 3460 default: 3461 return (-1); 3462 } 3463 break; 3464 default: 3465 return (-1); 3466 } 3467 3468 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI); 3469 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO); 3470 3471 /* aligns well with a icmpv4 nextmtu */ 3472 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu)); 3473 3474 /* icmpv4 pptr is a one most significant byte */ 3475 if (ptr >= 0) 3476 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24)); 3477 break; 3478 case AF_INET6: 3479 icmp4 = arg; 3480 type = icmp4->icmp_type; 3481 code = icmp4->icmp_code; 3482 mtu = ntohs(icmp4->icmp_nextmtu); 3483 3484 switch (type) { 3485 case ICMP_ECHO: 3486 type = ICMP6_ECHO_REQUEST; 3487 break; 3488 case ICMP_ECHOREPLY: 3489 type = ICMP6_ECHO_REPLY; 3490 break; 3491 case ICMP_UNREACH: 3492 type = ICMP6_DST_UNREACH; 3493 switch (code) { 3494 case ICMP_UNREACH_NET: 3495 case ICMP_UNREACH_HOST: 3496 case ICMP_UNREACH_NET_UNKNOWN: 3497 case ICMP_UNREACH_HOST_UNKNOWN: 3498 case ICMP_UNREACH_ISOLATED: 3499 case ICMP_UNREACH_TOSNET: 3500 case ICMP_UNREACH_TOSHOST: 3501 code = ICMP6_DST_UNREACH_NOROUTE; 3502 break; 3503 case ICMP_UNREACH_PORT: 3504 code = ICMP6_DST_UNREACH_NOPORT; 3505 break; 3506 case ICMP_UNREACH_NET_PROHIB: 3507 case ICMP_UNREACH_HOST_PROHIB: 3508 case ICMP_UNREACH_FILTER_PROHIB: 3509 case ICMP_UNREACH_PRECEDENCE_CUTOFF: 3510 code = ICMP6_DST_UNREACH_ADMIN; 3511 break; 3512 case ICMP_UNREACH_PROTOCOL: 3513 type = ICMP6_PARAM_PROB; 3514 code = ICMP6_PARAMPROB_NEXTHEADER; 3515 ptr = offsetof(struct ip6_hdr, ip6_nxt); 3516 break; 3517 case ICMP_UNREACH_NEEDFRAG: 3518 type = ICMP6_PACKET_TOO_BIG; 3519 code = 0; 3520 mtu += 20; 3521 break; 3522 default: 3523 return (-1); 3524 } 3525 break; 3526 case ICMP_TIMXCEED: 3527 type = ICMP6_TIME_EXCEEDED; 3528 break; 3529 case ICMP_PARAMPROB: 3530 type = ICMP6_PARAM_PROB; 3531 switch (code) { 3532 case ICMP_PARAMPROB_ERRATPTR: 3533 code = ICMP6_PARAMPROB_HEADER; 3534 break; 3535 case ICMP_PARAMPROB_LENGTH: 3536 code = ICMP6_PARAMPROB_HEADER; 3537 break; 3538 default: 3539 return (-1); 3540 } 3541 3542 ptr = icmp4->icmp_pptr; 3543 if (ptr == 0 || ptr == PTR_IP(ip_tos)) 3544 ; /* preserve */ 3545 else if (ptr == PTR_IP(ip_len) || 3546 ptr == PTR_IP(ip_len) + 1) 3547 ptr = PTR_IP6(ip6_plen); 3548 else if (ptr == PTR_IP(ip_ttl)) 3549 ptr = PTR_IP6(ip6_hlim); 3550 else if (ptr == PTR_IP(ip_p)) 3551 ptr = PTR_IP6(ip6_nxt); 3552 else if (ptr >= PTR_IP(ip_src) && 3553 ptr < PTR_IP(ip_dst)) 3554 ptr = PTR_IP6(ip6_src); 3555 else if (ptr >= PTR_IP(ip_dst) && 3556 ptr < sizeof(struct ip)) 3557 ptr = PTR_IP6(ip6_dst); 3558 else { 3559 return (-1); 3560 } 3561 break; 3562 default: 3563 return (-1); 3564 } 3565 3566 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI); 3567 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO); 3568 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu)); 3569 if (ptr >= 0) 3570 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr)); 3571 break; 3572 } 3573 3574 return (0); 3575} 3576#endif /* INET6 */ 3577 3578/* 3579 * Need to modulate the sequence numbers in the TCP SACK option 3580 * (credits to Krzysztof Pfaff for report and patch) 3581 */ 3582int 3583pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst) 3584{ 3585 struct sackblk sack; 3586 int copyback = 0, i; 3587 int olen, optsoff; 3588 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh; 3589 3590 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 3591 optsoff = pd->off + sizeof(struct tcphdr); 3592#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2) 3593 if (olen < TCPOLEN_MINSACK || 3594 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af)) 3595 return (0); 3596 3597 eoh = opts + olen; 3598 opt = opts; 3599 while ((opt = pf_find_tcpopt(opt, opts, olen, 3600 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL) 3601 { 3602 size_t safelen = MIN(opt[1], (eoh - opt)); 3603 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) { 3604 size_t startoff = (opt + i) - opts; 3605 memcpy(&sack, &opt[i], sizeof(sack)); 3606 pf_patch_32_unaligned(pd, &sack.start, 3607 htonl(ntohl(sack.start) - dst->seqdiff), 3608 PF_ALGNMNT(startoff)); 3609 pf_patch_32_unaligned(pd, &sack.end, 3610 htonl(ntohl(sack.end) - dst->seqdiff), 3611 PF_ALGNMNT(startoff + sizeof(sack.start))); 3612 memcpy(&opt[i], &sack, sizeof(sack)); 3613 } 3614 copyback = 1; 3615 opt += opt[1]; 3616 } 3617 3618 if (copyback) 3619 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT); 3620 return (copyback); 3621} 3622 3623struct mbuf * 3624pf_build_tcp(const struct pf_rule *r, sa_family_t af, 3625 const struct pf_addr *saddr, const struct pf_addr *daddr, 3626 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3627 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3628 u_int16_t rtag, u_int sack, u_int rdom, u_short *reason) 3629{ 3630 struct mbuf *m; 3631 int len, tlen; 3632 struct ip *h; 3633#ifdef INET6 3634 struct ip6_hdr *h6; 3635#endif /* INET6 */ 3636 struct tcphdr *th; 3637 char *opt; 3638 3639 /* maximum segment size tcp option */ 3640 tlen = sizeof(struct tcphdr); 3641 if (mss) 3642 tlen += 4; 3643 if (sack) 3644 tlen += 2; 3645 3646 switch (af) { 3647 case AF_INET: 3648 len = sizeof(struct ip) + tlen; 3649 break; 3650#ifdef INET6 3651 case AF_INET6: 3652 len = sizeof(struct ip6_hdr) + tlen; 3653 break; 3654#endif /* INET6 */ 3655 default: 3656 unhandled_af(af); 3657 } 3658 3659 /* create outgoing mbuf */ 3660 m = m_gethdr(M_DONTWAIT, MT_HEADER); 3661 if (m == NULL) { 3662 REASON_SET(reason, PFRES_MEMORY); 3663 return (NULL); 3664 } 3665 if (tag) 3666 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3667 m->m_pkthdr.pf.tag = rtag; 3668 m->m_pkthdr.ph_rtableid = rdom; 3669 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3670 m->m_pkthdr.pf.prio = r->set_prio[0]; 3671 if (r && r->qid) 3672 m->m_pkthdr.pf.qid = r->qid; 3673 m->m_data += max_linkhdr; 3674 m->m_pkthdr.len = m->m_len = len; 3675 m->m_pkthdr.ph_ifidx = 0; 3676 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT; 3677 memset(m->m_data, 0, len); 3678 switch (af) { 3679 case AF_INET: 3680 h = mtod(m, struct ip *); 3681 h->ip_p = IPPROTO_TCP; 3682 h->ip_len = htons(tlen); 3683 h->ip_v = 4; 3684 h->ip_hl = sizeof(*h) >> 2; 3685 h->ip_tos = IPTOS_LOWDELAY; 3686 h->ip_len = htons(len); 3687 h->ip_off = htons(atomic_load_int(&ip_mtudisc) ? IP_DF : 0); 3688 h->ip_ttl = ttl ? ttl : atomic_load_int(&ip_defttl); 3689 h->ip_sum = 0; 3690 h->ip_src.s_addr = saddr->v4.s_addr; 3691 h->ip_dst.s_addr = daddr->v4.s_addr; 3692 3693 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip)); 3694 break; 3695#ifdef INET6 3696 case AF_INET6: 3697 h6 = mtod(m, struct ip6_hdr *); 3698 h6->ip6_nxt = IPPROTO_TCP; 3699 h6->ip6_plen = htons(tlen); 3700 h6->ip6_vfc |= IPV6_VERSION; 3701 h6->ip6_hlim = IPV6_DEFHLIM; 3702 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr)); 3703 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr)); 3704 3705 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr)); 3706 break; 3707#endif /* INET6 */ 3708 default: 3709 unhandled_af(af); 3710 } 3711 3712 /* TCP header */ 3713 th->th_sport = sport; 3714 th->th_dport = dport; 3715 th->th_seq = htonl(seq); 3716 th->th_ack = htonl(ack); 3717 th->th_off = tlen >> 2; 3718 th->th_flags = flags; 3719 th->th_win = htons(win); 3720 3721 opt = (char *)(th + 1); 3722 if (mss) { 3723 opt[0] = TCPOPT_MAXSEG; 3724 opt[1] = 4; 3725 mss = htons(mss); 3726 memcpy((opt + 2), &mss, 2); 3727 opt += 4; 3728 } 3729 if (sack) { 3730 opt[0] = TCPOPT_SACK_PERMITTED; 3731 opt[1] = 2; 3732 opt += 2; 3733 } 3734 3735 return (m); 3736} 3737 3738void 3739pf_send_tcp(const struct pf_rule *r, sa_family_t af, 3740 const struct pf_addr *saddr, const struct pf_addr *daddr, 3741 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack, 3742 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag, 3743 u_int16_t rtag, u_int rdom, u_short *reason) 3744{ 3745 struct mbuf *m; 3746 3747 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack, 3748 flags, win, mss, ttl, tag, rtag, 0, rdom, reason)) == NULL) 3749 return; 3750 3751 switch (af) { 3752 case AF_INET: 3753 ip_send(m); 3754 break; 3755#ifdef INET6 3756 case AF_INET6: 3757 ip6_send(m); 3758 break; 3759#endif /* INET6 */ 3760 } 3761} 3762 3763static void 3764pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st, 3765 struct pf_state_peer *src, struct pf_state_peer *dst, u_short *reason) 3766{ 3767 /* 3768 * We are sending challenge ACK as a response to SYN packet, which 3769 * matches existing state (modulo TCP window check). Therefore packet 3770 * must be sent on behalf of destination. 3771 * 3772 * We expect sender to remain either silent, or send RST packet 3773 * so both, firewall and remote peer, can purge dead state from 3774 * memory. 3775 */ 3776 pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src, 3777 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo, 3778 src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0, 3779 pd->rdomain, reason); 3780} 3781 3782void 3783pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param, 3784 sa_family_t af, struct pf_rule *r, u_int rdomain) 3785{ 3786 struct mbuf *m0; 3787 3788 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL) 3789 return; 3790 3791 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 3792 m0->m_pkthdr.ph_rtableid = rdomain; 3793 if (r && (r->scrub_flags & PFSTATE_SETPRIO)) 3794 m0->m_pkthdr.pf.prio = r->set_prio[0]; 3795 if (r && r->qid) 3796 m0->m_pkthdr.pf.qid = r->qid; 3797 3798 switch (af) { 3799 case AF_INET: 3800 icmp_error(m0, type, code, 0, param); 3801 break; 3802#ifdef INET6 3803 case AF_INET6: 3804 icmp6_error(m0, type, code, param); 3805 break; 3806#endif /* INET6 */ 3807 } 3808} 3809 3810/* 3811 * Return ((n = 0) == (a = b [with mask m])) 3812 * Note: n != 0 => returns (a != b [with mask m]) 3813 */ 3814int 3815pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m, 3816 struct pf_addr *b, sa_family_t af) 3817{ 3818 switch (af) { 3819 case AF_INET: 3820 if ((a->addr32[0] & m->addr32[0]) == 3821 (b->addr32[0] & m->addr32[0])) 3822 return (n == 0); 3823 break; 3824#ifdef INET6 3825 case AF_INET6: 3826 if (((a->addr32[0] & m->addr32[0]) == 3827 (b->addr32[0] & m->addr32[0])) && 3828 ((a->addr32[1] & m->addr32[1]) == 3829 (b->addr32[1] & m->addr32[1])) && 3830 ((a->addr32[2] & m->addr32[2]) == 3831 (b->addr32[2] & m->addr32[2])) && 3832 ((a->addr32[3] & m->addr32[3]) == 3833 (b->addr32[3] & m->addr32[3]))) 3834 return (n == 0); 3835 break; 3836#endif /* INET6 */ 3837 } 3838 3839 return (n != 0); 3840} 3841 3842/* 3843 * Return 1 if b <= a <= e, otherwise return 0. 3844 */ 3845int 3846pf_match_addr_range(struct pf_addr *b, struct pf_addr *e, 3847 struct pf_addr *a, sa_family_t af) 3848{ 3849 switch (af) { 3850 case AF_INET: 3851 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) || 3852 (ntohl(a->addr32[0]) > ntohl(e->addr32[0]))) 3853 return (0); 3854 break; 3855#ifdef INET6 3856 case AF_INET6: { 3857 int i; 3858 3859 /* check a >= b */ 3860 for (i = 0; i < 4; ++i) 3861 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i])) 3862 break; 3863 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i])) 3864 return (0); 3865 /* check a <= e */ 3866 for (i = 0; i < 4; ++i) 3867 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i])) 3868 break; 3869 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i])) 3870 return (0); 3871 break; 3872 } 3873#endif /* INET6 */ 3874 } 3875 return (1); 3876} 3877 3878int 3879pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p) 3880{ 3881 switch (op) { 3882 case PF_OP_IRG: 3883 return ((p > a1) && (p < a2)); 3884 case PF_OP_XRG: 3885 return ((p < a1) || (p > a2)); 3886 case PF_OP_RRG: 3887 return ((p >= a1) && (p <= a2)); 3888 case PF_OP_EQ: 3889 return (p == a1); 3890 case PF_OP_NE: 3891 return (p != a1); 3892 case PF_OP_LT: 3893 return (p < a1); 3894 case PF_OP_LE: 3895 return (p <= a1); 3896 case PF_OP_GT: 3897 return (p > a1); 3898 case PF_OP_GE: 3899 return (p >= a1); 3900 } 3901 return (0); /* never reached */ 3902} 3903 3904int 3905pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p) 3906{ 3907 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p))); 3908} 3909 3910int 3911pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u) 3912{ 3913 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3914 return (0); 3915 return (pf_match(op, a1, a2, u)); 3916} 3917 3918int 3919pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g) 3920{ 3921 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE) 3922 return (0); 3923 return (pf_match(op, a1, a2, g)); 3924} 3925 3926int 3927pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag) 3928{ 3929 if (*tag == -1) 3930 *tag = m->m_pkthdr.pf.tag; 3931 3932 return ((!r->match_tag_not && r->match_tag == *tag) || 3933 (r->match_tag_not && r->match_tag != *tag)); 3934} 3935 3936int 3937pf_match_rcvif(struct mbuf *m, struct pf_rule *r) 3938{ 3939 struct ifnet *ifp; 3940 struct pfi_kif *kif = NULL; 3941 3942 if (m->m_pkthdr.ph_ifidx == 0) 3943 return (0); 3944 3945 smr_read_enter(); 3946 ifp = if_get_smr(m->m_pkthdr.ph_ifidx); 3947 if (ifp != NULL) { 3948 kif = (struct pfi_kif *)ifp->if_pf_kif; 3949#if NCARP > 0 3950 if (ifp->if_type == IFT_CARP) { 3951 struct ifnet *ifp0 = if_get_smr(ifp->if_carpdevidx); 3952 if (ifp0 != NULL) 3953 kif = (struct pfi_kif *)ifp0->if_pf_kif; 3954 } 3955#endif /* NCARP */ 3956 } 3957 smr_read_leave(); 3958 3959 if (kif == NULL) { 3960 DPFPRINTF(LOG_ERR, 3961 "%s: kif == NULL, @%d via %s", __func__, 3962 r->nr, r->rcv_ifname); 3963 return (0); 3964 } 3965 3966 return (pfi_kif_match(r->rcv_kif, kif)); 3967} 3968 3969void 3970pf_tag_packet(struct mbuf *m, int tag, int rtableid) 3971{ 3972 if (tag > 0) 3973 m->m_pkthdr.pf.tag = tag; 3974 if (rtableid >= 0) 3975 m->m_pkthdr.ph_rtableid = (u_int)rtableid; 3976} 3977 3978void 3979pf_anchor_stack_init(void) 3980{ 3981 struct pf_anchor_stackframe *stack; 3982 3983 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3984 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0]; 3985 cpumem_leave(pf_anchor_stack, stack); 3986} 3987 3988int 3989pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf) 3990{ 3991 struct pf_anchor_stackframe *stack; 3992 int rv; 3993 3994 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 3995 rv = (sf == &stack[PF_ANCHOR_STACK_MAX]); 3996 cpumem_leave(pf_anchor_stack, stack); 3997 3998 return (rv); 3999} 4000 4001int 4002pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf) 4003{ 4004 struct pf_anchor_stackframe *stack; 4005 int rv; 4006 4007 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 4008 rv = (sf == &stack[0]); 4009 cpumem_leave(pf_anchor_stack, stack); 4010 4011 return (rv); 4012} 4013 4014struct pf_anchor_stackframe * 4015pf_anchor_stack_top(void) 4016{ 4017 struct pf_anchor_stackframe *stack; 4018 struct pf_anchor_stackframe *top_sf; 4019 4020 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 4021 top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top; 4022 cpumem_leave(pf_anchor_stack, stack); 4023 4024 return (top_sf); 4025} 4026 4027int 4028pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *anchor, 4029 struct pf_rule *r, struct pf_anchor *child, int jump_target) 4030{ 4031 struct pf_anchor_stackframe *stack; 4032 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 4033 4034 top_sf++; 4035 if (pf_anchor_stack_is_full(top_sf)) 4036 return (-1); 4037 4038 top_sf->sf_rs = rs; 4039 top_sf->sf_anchor = anchor; 4040 top_sf->sf_r = r; 4041 top_sf->sf_child = child; 4042 top_sf->sf_jump_target = jump_target; 4043 4044 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 4045 4046 if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 4047 panic("%s: top frame outside of anchor stack range", __func__); 4048 4049 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 4050 cpumem_leave(pf_anchor_stack, stack); 4051 4052 return (0); 4053} 4054 4055int 4056pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **anchor, 4057 struct pf_rule **r, struct pf_anchor **child, int *jump_target) 4058{ 4059 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top(); 4060 struct pf_anchor_stackframe *stack; 4061 int on_top; 4062 4063 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack); 4064 if (pf_anchor_stack_is_empty(top_sf)) { 4065 on_top = -1; 4066 } else { 4067 if ((top_sf <= &stack[0]) || 4068 (top_sf >= &stack[PF_ANCHOR_STACK_MAX])) 4069 panic("%s: top frame outside of anchor stack range", 4070 __func__); 4071 4072 *rs = top_sf->sf_rs; 4073 *anchor = top_sf->sf_anchor; 4074 *r = top_sf->sf_r; 4075 *child = top_sf->sf_child; 4076 *jump_target = top_sf->sf_jump_target; 4077 top_sf--; 4078 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf; 4079 on_top = 0; 4080 } 4081 cpumem_leave(pf_anchor_stack, stack); 4082 4083 return (on_top); 4084} 4085 4086void 4087pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr, 4088 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af) 4089{ 4090 switch (af) { 4091 case AF_INET: 4092 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 4093 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 4094 break; 4095#ifdef INET6 4096 case AF_INET6: 4097 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) | 4098 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]); 4099 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) | 4100 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]); 4101 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) | 4102 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]); 4103 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) | 4104 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]); 4105 break; 4106#endif /* INET6 */ 4107 default: 4108 unhandled_af(af); 4109 } 4110} 4111 4112void 4113pf_addr_inc(struct pf_addr *addr, sa_family_t af) 4114{ 4115 switch (af) { 4116 case AF_INET: 4117 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1); 4118 break; 4119#ifdef INET6 4120 case AF_INET6: 4121 if (addr->addr32[3] == 0xffffffff) { 4122 addr->addr32[3] = 0; 4123 if (addr->addr32[2] == 0xffffffff) { 4124 addr->addr32[2] = 0; 4125 if (addr->addr32[1] == 0xffffffff) { 4126 addr->addr32[1] = 0; 4127 addr->addr32[0] = 4128 htonl(ntohl(addr->addr32[0]) + 1); 4129 } else 4130 addr->addr32[1] = 4131 htonl(ntohl(addr->addr32[1]) + 1); 4132 } else 4133 addr->addr32[2] = 4134 htonl(ntohl(addr->addr32[2]) + 1); 4135 } else 4136 addr->addr32[3] = 4137 htonl(ntohl(addr->addr32[3]) + 1); 4138 break; 4139#endif /* INET6 */ 4140 default: 4141 unhandled_af(af); 4142 } 4143} 4144 4145int 4146pf_socket_lookup(struct pf_pdesc *pd) 4147{ 4148 struct pf_addr *saddr, *daddr; 4149 u_int16_t sport, dport; 4150 struct inpcbtable *table; 4151 struct inpcb *inp; 4152 4153 pd->lookup.uid = -1; 4154 pd->lookup.gid = -1; 4155 pd->lookup.pid = NO_PID; 4156 switch (pd->virtual_proto) { 4157 case IPPROTO_TCP: 4158 sport = pd->hdr.tcp.th_sport; 4159 dport = pd->hdr.tcp.th_dport; 4160 PF_ASSERT_LOCKED(); 4161 NET_ASSERT_LOCKED(); 4162 table = &tcbtable; 4163 break; 4164 case IPPROTO_UDP: 4165 sport = pd->hdr.udp.uh_sport; 4166 dport = pd->hdr.udp.uh_dport; 4167 PF_ASSERT_LOCKED(); 4168 NET_ASSERT_LOCKED(); 4169 table = &udbtable; 4170 break; 4171 default: 4172 return (-1); 4173 } 4174 if (pd->dir == PF_IN) { 4175 saddr = pd->src; 4176 daddr = pd->dst; 4177 } else { 4178 u_int16_t p; 4179 4180 p = sport; 4181 sport = dport; 4182 dport = p; 4183 saddr = pd->dst; 4184 daddr = pd->src; 4185 } 4186 switch (pd->af) { 4187 case AF_INET: 4188 /* 4189 * Fails when rtable is changed while evaluating the ruleset 4190 * The socket looked up will not match the one hit in the end. 4191 */ 4192 inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport, 4193 pd->rdomain); 4194 if (inp == NULL) { 4195 inp = in_pcblookup_listen(table, daddr->v4, dport, 4196 NULL, pd->rdomain); 4197 if (inp == NULL) 4198 return (-1); 4199 } 4200 break; 4201#ifdef INET6 4202 case AF_INET6: 4203 if (pd->virtual_proto == IPPROTO_UDP) 4204 table = &udb6table; 4205 if (pd->virtual_proto == IPPROTO_TCP) 4206 table = &tcb6table; 4207 inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6, 4208 dport, pd->rdomain); 4209 if (inp == NULL) { 4210 inp = in6_pcblookup_listen(table, &daddr->v6, dport, 4211 NULL, pd->rdomain); 4212 if (inp == NULL) 4213 return (-1); 4214 } 4215 break; 4216#endif /* INET6 */ 4217 default: 4218 unhandled_af(pd->af); 4219 } 4220 pd->lookup.uid = inp->inp_socket->so_euid; 4221 pd->lookup.gid = inp->inp_socket->so_egid; 4222 pd->lookup.pid = inp->inp_socket->so_cpid; 4223 in_pcbunref(inp); 4224 return (1); 4225} 4226 4227/* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity" 4228 * /\ (eoh - r) >= min_typelen >= 2 "safety" ) 4229 * 4230 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen 4231 */ 4232u_int8_t* 4233pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type, 4234 u_int8_t min_typelen) 4235{ 4236 u_int8_t *eoh = opts + hlen; 4237 4238 if (min_typelen < 2) 4239 return (NULL); 4240 4241 while ((eoh - opt) >= min_typelen) { 4242 switch (*opt) { 4243 case TCPOPT_EOL: 4244 /* FALLTHROUGH - Workaround the failure of some 4245 systems to NOP-pad their bzero'd option buffers, 4246 producing spurious EOLs */ 4247 case TCPOPT_NOP: 4248 opt++; 4249 continue; 4250 default: 4251 if (opt[0] == type && 4252 opt[1] >= min_typelen) 4253 return (opt); 4254 } 4255 4256 opt += MAX(opt[1], 2); /* evade infinite loops */ 4257 } 4258 4259 return (NULL); 4260} 4261 4262u_int8_t 4263pf_get_wscale(struct pf_pdesc *pd) 4264{ 4265 int olen; 4266 u_int8_t opts[MAX_TCPOPTLEN], *opt; 4267 u_int8_t wscale = 0; 4268 4269 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 4270 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m, 4271 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) 4272 return (0); 4273 4274 opt = opts; 4275 while ((opt = pf_find_tcpopt(opt, opts, olen, 4276 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) { 4277 wscale = opt[2]; 4278 wscale = MIN(wscale, TCP_MAX_WINSHIFT); 4279 wscale |= PF_WSCALE_FLAG; 4280 4281 opt += opt[1]; 4282 } 4283 4284 return (wscale); 4285} 4286 4287u_int16_t 4288pf_get_mss(struct pf_pdesc *pd, uint16_t mssdflt) 4289{ 4290 int olen; 4291 u_int8_t opts[MAX_TCPOPTLEN], *opt; 4292 u_int16_t mss; 4293 4294 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr); 4295 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m, 4296 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af)) 4297 return (0); 4298 4299 mss = mssdflt; 4300 opt = opts; 4301 while ((opt = pf_find_tcpopt(opt, opts, olen, 4302 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) { 4303 memcpy(&mss, (opt + 2), 2); 4304 mss = ntohs(mss); 4305 4306 opt += opt[1]; 4307 } 4308 return (mss); 4309} 4310 4311u_int16_t 4312pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, uint16_t offer, 4313 uint16_t mssdflt) 4314{ 4315 struct ifnet *ifp; 4316 struct sockaddr_in *dst; 4317#ifdef INET6 4318 struct sockaddr_in6 *dst6; 4319#endif /* INET6 */ 4320 struct rtentry *rt = NULL; 4321 struct sockaddr_storage ss; 4322 int hlen, mss; 4323 4324 memset(&ss, 0, sizeof(ss)); 4325 4326 switch (af) { 4327 case AF_INET: 4328 hlen = sizeof(struct ip); 4329 dst = (struct sockaddr_in *)&ss; 4330 dst->sin_family = AF_INET; 4331 dst->sin_len = sizeof(*dst); 4332 dst->sin_addr = addr->v4; 4333 rt = rtalloc(sintosa(dst), 0, rtableid); 4334 break; 4335#ifdef INET6 4336 case AF_INET6: 4337 hlen = sizeof(struct ip6_hdr); 4338 dst6 = (struct sockaddr_in6 *)&ss; 4339 dst6->sin6_family = AF_INET6; 4340 dst6->sin6_len = sizeof(*dst6); 4341 dst6->sin6_addr = addr->v6; 4342 rt = rtalloc(sin6tosa(dst6), 0, rtableid); 4343 break; 4344#endif /* INET6 */ 4345 } 4346 4347 mss = mssdflt; 4348 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) { 4349 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr); 4350 mss = imax(mss, mssdflt); 4351 if_put(ifp); 4352 } 4353 rtfree(rt); 4354 mss = imin(mss, offer); 4355 mss = imax(mss, 64); /* sanity - at least max opt space */ 4356 return (mss); 4357} 4358 4359static __inline int 4360pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af, 4361 struct pf_src_node **sns) 4362{ 4363 struct pf_rule *r = st->rule.ptr; 4364 int rv; 4365 4366 if (!r->rt) 4367 return (0); 4368 4369 rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns, 4370 &r->route, PF_SN_ROUTE); 4371 if (rv == 0) 4372 st->rt = r->rt; 4373 4374 return (rv); 4375} 4376 4377u_int32_t 4378pf_tcp_iss(struct pf_pdesc *pd) 4379{ 4380 SHA2_CTX ctx; 4381 union { 4382 uint8_t bytes[SHA512_DIGEST_LENGTH]; 4383 uint32_t words[1]; 4384 } digest; 4385 4386 if (pf_tcp_secret_init == 0) { 4387 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret)); 4388 SHA512Init(&pf_tcp_secret_ctx); 4389 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret, 4390 sizeof(pf_tcp_secret)); 4391 pf_tcp_secret_init = 1; 4392 } 4393 ctx = pf_tcp_secret_ctx; 4394 4395 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain)); 4396 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short)); 4397 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short)); 4398 switch (pd->af) { 4399 case AF_INET: 4400 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr)); 4401 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr)); 4402 break; 4403#ifdef INET6 4404 case AF_INET6: 4405 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr)); 4406 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr)); 4407 break; 4408#endif /* INET6 */ 4409 } 4410 SHA512Final(digest.bytes, &ctx); 4411 pf_tcp_iss_off += 4096; 4412 return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off); 4413} 4414 4415void 4416pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a) 4417{ 4418 if (r->qid) 4419 a->qid = r->qid; 4420 if (r->pqid) 4421 a->pqid = r->pqid; 4422 if (r->rtableid >= 0) 4423 a->rtableid = r->rtableid; 4424#if NPFLOG > 0 4425 a->log |= r->log; 4426#endif /* NPFLOG > 0 */ 4427 if (r->scrub_flags & PFSTATE_SETTOS) 4428 a->set_tos = r->set_tos; 4429 if (r->min_ttl) 4430 a->min_ttl = r->min_ttl; 4431 if (r->max_mss) 4432 a->max_mss = r->max_mss; 4433 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID| 4434 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO)); 4435 if (r->scrub_flags & PFSTATE_SETPRIO) { 4436 a->set_prio[0] = r->set_prio[0]; 4437 a->set_prio[1] = r->set_prio[1]; 4438 } 4439 if (r->rule_flag & PFRULE_SETDELAY) 4440 a->delay = r->delay; 4441} 4442 4443#define PF_TEST_ATTRIB(t, a) \ 4444 if (t) { \ 4445 r = a; \ 4446 continue; \ 4447 } else do { \ 4448 } while (0) 4449 4450enum pf_test_status 4451pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset) 4452{ 4453 struct pf_rule *r; 4454 struct pf_anchor *child = NULL; 4455 int target; 4456 4457 pf_anchor_stack_init(); 4458enter_ruleset: 4459 r = TAILQ_FIRST(ruleset->rules.active.ptr); 4460 while (r != NULL) { 4461 struct pf_statelim *stlim = NULL; 4462 struct pf_sourcelim *srlim = NULL; 4463 struct pf_source *sr = NULL; 4464 unsigned int gen; 4465 4466 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED, 4467 TAILQ_NEXT(r, entries)); 4468 r->evaluations++; 4469 PF_TEST_ATTRIB( 4470 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot), 4471 r->skip[PF_SKIP_IFP].ptr); 4472 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir), 4473 r->skip[PF_SKIP_DIR].ptr); 4474 PF_TEST_ATTRIB((r->onrdomain >= 0 && 4475 (r->onrdomain == ctx->pd->rdomain) == r->ifnot), 4476 r->skip[PF_SKIP_RDOM].ptr); 4477 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af), 4478 r->skip[PF_SKIP_AF].ptr); 4479 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto), 4480 r->skip[PF_SKIP_PROTO].ptr); 4481 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr, 4482 ctx->pd->naf, r->src.neg, ctx->pd->kif, 4483 ctx->act.rtableid)), 4484 r->skip[PF_SKIP_SRC_ADDR].ptr); 4485 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr, 4486 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)), 4487 r->skip[PF_SKIP_DST_ADDR].ptr); 4488 4489 switch (ctx->pd->virtual_proto) { 4490 case PF_VPROTO_FRAGMENT: 4491 /* tcp/udp only. port_op always 0 in other cases */ 4492 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op), 4493 TAILQ_NEXT(r, entries)); 4494 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP && 4495 r->flagset), 4496 TAILQ_NEXT(r, entries)); 4497 /* icmp only. type/code always 0 in other cases */ 4498 PF_TEST_ATTRIB((r->type || r->code), 4499 TAILQ_NEXT(r, entries)); 4500 /* tcp/udp only. {uid|gid}.op always 0 in other cases */ 4501 PF_TEST_ATTRIB((r->gid.op || r->uid.op), 4502 TAILQ_NEXT(r, entries)); 4503 break; 4504 4505 case IPPROTO_TCP: 4506 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) != 4507 r->flags), 4508 TAILQ_NEXT(r, entries)); 4509 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY && 4510 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd), 4511 r->os_fingerprint)), 4512 TAILQ_NEXT(r, entries)); 4513 /* FALLTHROUGH */ 4514 4515 case IPPROTO_UDP: 4516 /* tcp/udp only. port_op always 0 in other cases */ 4517 PF_TEST_ATTRIB((r->src.port_op && 4518 !pf_match_port(r->src.port_op, r->src.port[0], 4519 r->src.port[1], ctx->pd->nsport)), 4520 r->skip[PF_SKIP_SRC_PORT].ptr); 4521 PF_TEST_ATTRIB((r->dst.port_op && 4522 !pf_match_port(r->dst.port_op, r->dst.port[0], 4523 r->dst.port[1], ctx->pd->ndport)), 4524 r->skip[PF_SKIP_DST_PORT].ptr); 4525 /* tcp/udp only. uid.op always 0 in other cases */ 4526 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done || 4527 (ctx->pd->lookup.done = 4528 pf_socket_lookup(ctx->pd), 1)) && 4529 !pf_match_uid(r->uid.op, r->uid.uid[0], 4530 r->uid.uid[1], ctx->pd->lookup.uid)), 4531 TAILQ_NEXT(r, entries)); 4532 /* tcp/udp only. gid.op always 0 in other cases */ 4533 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done || 4534 (ctx->pd->lookup.done = 4535 pf_socket_lookup(ctx->pd), 1)) && 4536 !pf_match_gid(r->gid.op, r->gid.gid[0], 4537 r->gid.gid[1], ctx->pd->lookup.gid)), 4538 TAILQ_NEXT(r, entries)); 4539 break; 4540 4541 case IPPROTO_ICMP: 4542 /* icmp only. type always 0 in other cases */ 4543 PF_TEST_ATTRIB((r->type && 4544 r->type != ctx->icmptype + 1), 4545 TAILQ_NEXT(r, entries)); 4546 /* icmp only. type always 0 in other cases */ 4547 PF_TEST_ATTRIB((r->code && 4548 r->code != ctx->icmpcode + 1), 4549 TAILQ_NEXT(r, entries)); 4550 /* icmp only. don't create states on replies */ 4551 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4552 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4553 ctx->icmp_dir != PF_IN), 4554 TAILQ_NEXT(r, entries)); 4555 break; 4556 4557 case IPPROTO_ICMPV6: 4558 /* icmp only. type always 0 in other cases */ 4559 PF_TEST_ATTRIB((r->type && 4560 r->type != ctx->icmptype + 1), 4561 TAILQ_NEXT(r, entries)); 4562 /* icmp only. type always 0 in other cases */ 4563 PF_TEST_ATTRIB((r->code && 4564 r->code != ctx->icmpcode + 1), 4565 TAILQ_NEXT(r, entries)); 4566 /* icmp only. don't create states on replies */ 4567 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp && 4568 (r->rule_flag & PFRULE_STATESLOPPY) == 0 && 4569 ctx->icmp_dir != PF_IN && 4570 ctx->icmptype != ND_NEIGHBOR_ADVERT), 4571 TAILQ_NEXT(r, entries)); 4572 break; 4573 4574 default: 4575 break; 4576 } 4577 4578 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT && 4579 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT), 4580 TAILQ_NEXT(r, entries)); 4581 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)), 4582 TAILQ_NEXT(r, entries)); 4583 PF_TEST_ATTRIB((r->prob && 4584 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1), 4585 TAILQ_NEXT(r, entries)); 4586 PF_TEST_ATTRIB((r->match_tag && 4587 !pf_match_tag(ctx->pd->m, r, &ctx->tag)), 4588 TAILQ_NEXT(r, entries)); 4589 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) == 4590 r->rcvifnot), 4591 TAILQ_NEXT(r, entries)); 4592 PF_TEST_ATTRIB((r->prio && 4593 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) != 4594 ctx->pd->m->m_pkthdr.pf.prio), 4595 TAILQ_NEXT(r, entries)); 4596 4597 if (r->statelim.id != PF_STATELIM_ID_NONE) { 4598 stlim = pf_statelim_find(r->statelim.id); 4599 4600 /* 4601 * Treat a missing limiter like an exhausted limiter. 4602 * There is no "backend" to get a resource out of 4603 * so the rule can't create state. 4604 */ 4605 PF_TEST_ATTRIB(stlim == NULL, 4606 TAILQ_NEXT(r, entries)); 4607 4608 /* 4609 * An overcommitted pool means this rule 4610 * can't create state. 4611 */ 4612 if (stlim->pfstlim_inuse >= stlim->pfstlim_limit) { 4613 gen = pf_statelim_enter(stlim); 4614 stlim->pfstlim_counters.hardlimited++; 4615 pf_statelim_leave(stlim, gen); 4616 if (r->statelim.limiter_action == PF_LIMITER_BLOCK) { 4617 ctx->limiter_drop = 1; 4618 REASON_SET(&ctx->reason, PFRES_MAXSTATES); 4619 break; /* stop rule processing */ 4620 } 4621 4622 r = TAILQ_NEXT(r, entries); 4623 continue; 4624 } 4625 4626 /* 4627 * Is access to the pool rate limited? 4628 */ 4629 if (stlim->pfstlim_rate.limit != 0) { 4630 uint64_t ts = getnsecuptime(); 4631 uint64_t diff = ts - stlim->pfstlim_rate_ts; 4632 4633 if (diff < stlim->pfstlim_rate_token) { 4634 gen = pf_statelim_enter(stlim); 4635 stlim->pfstlim_counters.ratelimited++; 4636 pf_statelim_leave(stlim, gen); 4637 if (r->statelim.limiter_action == 4638 PF_LIMITER_BLOCK) { 4639 ctx->limiter_drop = 1; 4640 REASON_SET(&ctx->reason, 4641 PFRES_MAXSTATES); 4642 /* stop rule processing */ 4643 break; 4644 } 4645 r = TAILQ_NEXT(r, entries); 4646 continue; 4647 } 4648 4649 if (diff > stlim->pfstlim_rate_bucket) { 4650 stlim->pfstlim_rate_ts = 4651 ts - stlim->pfstlim_rate_bucket; 4652 } 4653 } 4654 } 4655 4656 if (r->sourcelim.id != PF_SOURCELIM_ID_NONE) { 4657 struct pf_source key; 4658 4659 srlim = pf_sourcelim_find(r->sourcelim.id); 4660 4661 /* 4662 * Treat a missing pool like an overcommitted pool. 4663 * There is no "backend" to get a resource out of 4664 * so the rule can't create state. 4665 */ 4666 PF_TEST_ATTRIB(srlim == NULL, 4667 TAILQ_NEXT(r, entries)); 4668 4669 pf_source_key(srlim, &key, 4670 ctx->pd->af, ctx->pd->rdomain, ctx->pd->src); 4671 sr = pf_source_find(srlim, &key); 4672 if (sr != NULL) { 4673 /* 4674 * An overcommitted limiter means this rule 4675 * can't create state. 4676 */ 4677 if (sr->pfsr_inuse >= srlim->pfsrlim_limit) { 4678 sr->pfsr_counters.hardlimited++; 4679 gen = pf_sourcelim_enter(srlim); 4680 srlim->pfsrlim_counters.hardlimited++; 4681 pf_sourcelim_leave(srlim, gen); 4682 if (r->sourcelim.limiter_action == 4683 PF_LIMITER_BLOCK) { 4684 ctx->limiter_drop = 1; 4685 REASON_SET(&ctx->reason, 4686 PFRES_SRCLIMIT); 4687 /* stop rule processing */ 4688 break; 4689 } 4690 r = TAILQ_NEXT(r, entries); 4691 continue; 4692 } 4693 4694 /* 4695 * Is access to the pool rate limited? 4696 */ 4697 if (srlim->pfsrlim_rate.limit != 0) { 4698 uint64_t ts = getnsecuptime(); 4699 uint64_t diff = ts - sr->pfsr_rate_ts; 4700 4701 if (diff < srlim->pfsrlim_rate_token) { 4702 sr->pfsr_counters.ratelimited++; 4703 gen = pf_sourcelim_enter(srlim); 4704 srlim->pfsrlim_counters.ratelimited++; 4705 pf_sourcelim_leave(srlim, gen); 4706 if (r->sourcelim.limiter_action == 4707 PF_LIMITER_BLOCK) { 4708 ctx->limiter_drop = 1; 4709 REASON_SET(&ctx->reason, 4710 PFRES_SRCLIMIT); 4711 /* stop rules */ 4712 break; 4713 } 4714 r = TAILQ_NEXT(r, entries); 4715 continue; 4716 } 4717 4718 if (diff > srlim->pfsrlim_rate_bucket) { 4719 sr->pfsr_rate_ts = ts - 4720 srlim->pfsrlim_rate_bucket; 4721 } 4722 } 4723 } else { 4724 /* 4725 * a new source entry will (should) 4726 * admit a state. 4727 */ 4728 4729 if (srlim->pfsrlim_nsources >= 4730 srlim->pfsrlim_entries) { 4731 gen = pf_sourcelim_enter(srlim); 4732 srlim->pfsrlim_counters.addrlimited++; 4733 pf_sourcelim_leave(srlim, gen); 4734 if (r->sourcelim.limiter_action == 4735 PF_LIMITER_BLOCK) { 4736 ctx->limiter_drop = 1; 4737 REASON_SET(&ctx->reason, 4738 PFRES_SRCLIMIT); 4739 /* stop rules processing */ 4740 break; 4741 } 4742 r = TAILQ_NEXT(r, entries); 4743 continue; 4744 } 4745 } 4746 } 4747 4748 /* must be last! */ 4749 if (r->pktrate.limit) { 4750 pf_add_threshold(&r->pktrate); 4751 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)), 4752 TAILQ_NEXT(r, entries)); 4753 } 4754 4755 /* FALLTHROUGH */ 4756 if (r->tag) 4757 ctx->tag = r->tag; 4758 if (r->anchor == NULL) { 4759 4760 if (r->rule_flag & PFRULE_ONCE) { 4761 u_int32_t rule_flag; 4762 4763 rule_flag = r->rule_flag; 4764 if (((rule_flag & PFRULE_EXPIRED) == 0) && 4765 atomic_cas_uint(&r->rule_flag, rule_flag, 4766 rule_flag | PFRULE_EXPIRED) == rule_flag) { 4767 r->exptime = gettime(); 4768 } else { 4769 r = TAILQ_NEXT(r, entries); 4770 continue; 4771 } 4772 } 4773 4774 if (r->action == PF_MATCH) { 4775 if ((ctx->ri = pool_get(&pf_rule_item_pl, 4776 PR_NOWAIT)) == NULL) { 4777 REASON_SET(&ctx->reason, PFRES_MEMORY); 4778 return (PF_TEST_FAIL); 4779 } 4780 ctx->ri->r = r; 4781 /* order is irrelevant */ 4782 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry); 4783 ctx->ri = NULL; 4784 pf_rule_to_actions(r, &ctx->act); 4785 if (r->rule_flag & PFRULE_AFTO) 4786 ctx->pd->naf = r->naf; 4787 if (pf_get_transaddr(r, ctx->pd, ctx->sns, 4788 &ctx->nr) == -1) { 4789 REASON_SET(&ctx->reason, 4790 PFRES_TRANSLATE); 4791 return (PF_TEST_FAIL); 4792 } 4793#if NPFLOG > 0 4794 if (r->log) { 4795 REASON_SET(&ctx->reason, PFRES_MATCH); 4796 pflog_packet(ctx->pd, ctx->reason, r, 4797 ctx->a, ruleset, NULL); 4798 } 4799#endif /* NPFLOG > 0 */ 4800 } else { 4801 /* 4802 * found matching r 4803 */ 4804 *ctx->rm = r; 4805 /* 4806 * anchor, with ruleset, where r belongs to 4807 */ 4808 *ctx->am = ctx->a; 4809 /* 4810 * ruleset where r belongs to 4811 */ 4812 *ctx->rsm = ruleset; 4813 /* 4814 * ruleset, where anchor belongs to. 4815 */ 4816 ctx->arsm = ctx->aruleset; 4817 /* 4818 * state/source pools 4819 */ 4820 4821 ctx->statelim = stlim; 4822 ctx->sourcelim = srlim; 4823 ctx->source = sr; 4824 } 4825 4826#if NPFLOG > 0 4827 if (ctx->act.log & PF_LOG_MATCHES) 4828 pf_log_matches(ctx->pd, r, ctx->a, ruleset, 4829 &ctx->rules); 4830#endif /* NPFLOG > 0 */ 4831 4832 if (r->quick) 4833 return (PF_TEST_QUICK); 4834 } else { 4835 ctx->aruleset = &r->anchor->ruleset; 4836 if (r->anchor_wildcard) { 4837 RB_FOREACH(child, pf_anchor_node, 4838 &r->anchor->children) { 4839 if (pf_anchor_stack_push(ruleset, 4840 ctx->a, r, child, 4841 PF_NEXT_CHILD) != 0) 4842 return (PF_TEST_FAIL); 4843 4844 ctx->a = r; 4845 ruleset = &child->ruleset; 4846 goto enter_ruleset; 4847next_child: 4848 continue; /* with RB_FOREACH() */ 4849 } 4850 } else { 4851 if (pf_anchor_stack_push(ruleset, ctx->a, 4852 r, child, PF_NEXT_RULE) != 0) 4853 return (PF_TEST_FAIL); 4854 4855 ctx->a = r; 4856 ruleset = &r->anchor->ruleset; 4857 child = NULL; 4858 goto enter_ruleset; 4859next_rule: 4860 ; 4861 } 4862 } 4863 r = TAILQ_NEXT(r, entries); 4864 } 4865 4866 if (pf_anchor_stack_pop(&ruleset, &ctx->a, &r, &child, 4867 &target) == 0) { 4868 4869 /* stop if any rule matched within quick anchors. */ 4870 if (r->quick && *ctx->am == r) 4871 return (PF_TEST_QUICK); 4872 4873 switch (target) { 4874 case PF_NEXT_CHILD: 4875 goto next_child; 4876 case PF_NEXT_RULE: 4877 goto next_rule; 4878 default: 4879 panic("%s: unknown jump target", __func__); 4880 } 4881 } 4882 4883 return (PF_TEST_OK); 4884} 4885 4886int 4887pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm, 4888 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason) 4889{ 4890 struct pf_rule *r = NULL; 4891 struct pf_rule *a = NULL; 4892 struct pf_ruleset *ruleset = NULL; 4893 struct pf_state_key *skw = NULL, *sks = NULL; 4894 int rewrite = 0; 4895 u_int16_t virtual_type, virtual_id; 4896 int action = PF_DROP; 4897 struct pf_test_ctx ctx; 4898 int rv; 4899 4900 PF_ASSERT_LOCKED(); 4901 4902 memset(&ctx, 0, sizeof(ctx)); 4903 ctx.pd = pd; 4904 ctx.rm = rm; 4905 ctx.am = am; 4906 ctx.rsm = rsm; 4907 ctx.th = &pd->hdr.tcp; 4908 ctx.act.rtableid = pd->rdomain; 4909 ctx.tag = -1; 4910 SLIST_INIT(&ctx.rules); 4911 4912 if (pd->dir == PF_IN && if_congested()) { 4913 REASON_SET(&ctx.reason, PFRES_CONGEST); 4914 return (PF_DROP); 4915 } 4916 4917 switch (pd->virtual_proto) { 4918 case IPPROTO_ICMP: 4919 ctx.icmptype = pd->hdr.icmp.icmp_type; 4920 ctx.icmpcode = pd->hdr.icmp.icmp_code; 4921 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4922 &ctx.icmp_dir, &virtual_id, &virtual_type); 4923 if (ctx.icmp_dir == PF_IN) { 4924 pd->osport = pd->nsport = virtual_id; 4925 pd->odport = pd->ndport = virtual_type; 4926 } else { 4927 pd->osport = pd->nsport = virtual_type; 4928 pd->odport = pd->ndport = virtual_id; 4929 } 4930 break; 4931#ifdef INET6 4932 case IPPROTO_ICMPV6: 4933 ctx.icmptype = pd->hdr.icmp6.icmp6_type; 4934 ctx.icmpcode = pd->hdr.icmp6.icmp6_code; 4935 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype, 4936 &ctx.icmp_dir, &virtual_id, &virtual_type); 4937 if (ctx.icmp_dir == PF_IN) { 4938 pd->osport = pd->nsport = virtual_id; 4939 pd->odport = pd->ndport = virtual_type; 4940 } else { 4941 pd->osport = pd->nsport = virtual_type; 4942 pd->odport = pd->ndport = virtual_id; 4943 } 4944 break; 4945#endif /* INET6 */ 4946 } 4947 4948 ruleset = &pf_main_ruleset; 4949 rv = pf_match_rule(&ctx, ruleset); 4950 if (rv == PF_TEST_FAIL || ctx.limiter_drop == 1) { 4951 REASON_SET(reason, ctx.reason); 4952 goto cleanup; 4953 } 4954 4955 r = *ctx.rm; /* matching rule */ 4956 a = *ctx.am; /* rule that defines an anchor containing 'r' */ 4957 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */ 4958 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */ 4959 4960 /* apply actions for last matching pass/block rule */ 4961 pf_rule_to_actions(r, &ctx.act); 4962 if (r->rule_flag & PFRULE_AFTO) 4963 pd->naf = r->naf; 4964 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) { 4965 REASON_SET(&ctx.reason, PFRES_TRANSLATE); 4966 goto cleanup; 4967 } 4968 REASON_SET(&ctx.reason, PFRES_MATCH); 4969 4970#if NPFLOG > 0 4971 if (r->log) 4972 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL); 4973 if (ctx.act.log & PF_LOG_MATCHES) 4974 pf_log_matches(pd, r, a, ruleset, &ctx.rules); 4975#endif /* NPFLOG > 0 */ 4976 4977 if (pd->virtual_proto != PF_VPROTO_FRAGMENT && 4978 (r->action == PF_DROP) && 4979 ((r->rule_flag & PFRULE_RETURNRST) || 4980 (r->rule_flag & PFRULE_RETURNICMP) || 4981 (r->rule_flag & PFRULE_RETURN))) { 4982 if (pd->proto == IPPROTO_TCP && 4983 ((r->rule_flag & PFRULE_RETURNRST) || 4984 (r->rule_flag & PFRULE_RETURN)) && 4985 !(ctx.th->th_flags & TH_RST)) { 4986 u_int32_t ack = 4987 ntohl(ctx.th->th_seq) + pd->p_len; 4988 4989 if (pf_check_tcp_cksum(pd->m, pd->off, 4990 pd->tot_len - pd->off, pd->af)) 4991 REASON_SET(&ctx.reason, PFRES_PROTCKSUM); 4992 else { 4993 if (ctx.th->th_flags & TH_SYN) 4994 ack++; 4995 if (ctx.th->th_flags & TH_FIN) 4996 ack++; 4997 pf_send_tcp(r, pd->af, pd->dst, 4998 pd->src, ctx.th->th_dport, 4999 ctx.th->th_sport, ntohl(ctx.th->th_ack), 5000 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl, 5001 1, 0, pd->rdomain, &ctx.reason); 5002 } 5003 } else if ((pd->proto != IPPROTO_ICMP || 5004 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET && 5005 r->return_icmp) 5006 pf_send_icmp(pd->m, r->return_icmp >> 8, 5007 r->return_icmp & 255, 0, pd->af, r, pd->rdomain); 5008 else if ((pd->proto != IPPROTO_ICMPV6 || 5009 (ctx.icmptype >= ICMP6_ECHO_REQUEST && 5010 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 && 5011 r->return_icmp6) 5012 pf_send_icmp(pd->m, r->return_icmp6 >> 8, 5013 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain); 5014 } 5015 5016 if (r->action == PF_DROP) 5017 goto cleanup; 5018 5019 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid); 5020 if (ctx.act.rtableid >= 0 && 5021 rtable_l2(ctx.act.rtableid) != pd->rdomain) 5022 pd->destchg = 1; 5023 5024 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) { 5025 REASON_SET(&ctx.reason, PFRES_IPOPTIONS); 5026#if NPFLOG > 0 5027 pd->pflog |= PF_LOG_FORCE; 5028#endif /* NPFLOG > 0 */ 5029 DPFPRINTF(LOG_NOTICE, "dropping packet with " 5030 "ip/ipv6 options in pf_test_rule()"); 5031 goto cleanup; 5032 } 5033 5034 if (pd->virtual_proto != PF_VPROTO_FRAGMENT 5035 && !ctx.state_icmp && r->keep_state) { 5036 5037 if (r->rule_flag & PFRULE_SRCTRACK && 5038 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE, 5039 pd->af, pd->src, NULL, NULL) != 0) { 5040 REASON_SET(&ctx.reason, PFRES_SRCLIMIT); 5041 goto cleanup; 5042 } 5043 5044 if (r->max_states && (r->states_cur >= r->max_states)) { 5045 pf_status.lcounters[LCNT_STATES]++; 5046 REASON_SET(&ctx.reason, PFRES_MAXSTATES); 5047 goto cleanup; 5048 } 5049 5050 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks, 5051 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns, 5052 &ctx); 5053 5054 if (action != PF_PASS) 5055 goto cleanup; 5056 5057 if (pd->proto == IPPROTO_TCP && 5058 r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 5059 action = pf_synproxy_ack(r, pd, sm, &ctx.act); 5060 if (action != PF_PASS) 5061 return (action); /* PF_SYNPROXY_DROP */ 5062 } 5063 5064 if (sks != skw) { 5065 struct pf_state_key *sk; 5066 5067 if (pd->dir == PF_IN) 5068 sk = sks; 5069 else 5070 sk = skw; 5071 rewrite += pf_translate(pd, 5072 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx], 5073 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx], 5074 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx], 5075 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx], 5076 virtual_type, ctx.icmp_dir); 5077 } 5078 5079#ifdef INET6 5080 if (rewrite && skw->af != sks->af) 5081 action = PF_AFRT; 5082#endif /* INET6 */ 5083 5084 } else { 5085 action = PF_PASS; 5086 5087 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 5088 SLIST_REMOVE_HEAD(&ctx.rules, entry); 5089 pool_put(&pf_rule_item_pl, ctx.ri); 5090 } 5091 } 5092 5093 /* copy back packet headers if needed */ 5094 if (rewrite && pd->hdrlen) { 5095 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 5096 } 5097 5098#if NPFSYNC > 0 5099 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) && 5100 pd->dir == PF_OUT && pfsync_is_up()) { 5101 /* 5102 * We want the state created, but we dont 5103 * want to send this in case a partner 5104 * firewall has to know about it to allow 5105 * replies through it. 5106 */ 5107 if (pfsync_defer(*sm, pd->m)) 5108 return (PF_DEFER); 5109 } 5110#endif /* NPFSYNC > 0 */ 5111 5112 return (action); 5113 5114cleanup: 5115 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) { 5116 SLIST_REMOVE_HEAD(&ctx.rules, entry); 5117 pool_put(&pf_rule_item_pl, ctx.ri); 5118 } 5119 5120 return (action); 5121} 5122 5123static __inline int 5124pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a, 5125 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks, 5126 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules, 5127 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX], 5128 struct pf_test_ctx *ctx) 5129{ 5130 struct pf_state *st = NULL; 5131 struct pf_statelim *stlim = NULL; 5132 struct pf_sourcelim *srlim = NULL; 5133 struct pf_source *sr = NULL; 5134 struct pf_state_link *pfl; 5135 struct tcphdr *th = &pd->hdr.tcp; 5136 u_short reason; 5137 u_int i; 5138 5139 st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO); 5140 if (st == NULL) { 5141 REASON_SET(&reason, PFRES_MEMORY); 5142 goto csfailed; 5143 } 5144 st->rule.ptr = r; 5145 st->anchor.ptr = a; 5146 st->natrule.ptr = nr; 5147 if (r->allow_opts) 5148 st->state_flags |= PFSTATE_ALLOWOPTS; 5149 if (r->rule_flag & PFRULE_STATESLOPPY) 5150 st->state_flags |= PFSTATE_SLOPPY; 5151 if (r->rule_flag & PFRULE_PFLOW) 5152 st->state_flags |= PFSTATE_PFLOW; 5153 if (r->rule_flag & PFRULE_NOSYNC) 5154 st->state_flags |= PFSTATE_NOSYNC; 5155#if NPFLOG > 0 5156 st->log = act->log & PF_LOG_ALL; 5157#endif /* NPFLOG > 0 */ 5158 st->qid = act->qid; 5159 st->pqid = act->pqid; 5160 st->rtableid[pd->didx] = act->rtableid; 5161 st->rtableid[pd->sidx] = -1; /* return traffic is routed normally */ 5162 st->min_ttl = act->min_ttl; 5163 st->set_tos = act->set_tos; 5164 st->max_mss = act->max_mss; 5165 st->state_flags |= act->flags; 5166#if NPFSYNC > 0 5167 st->sync_state = PFSYNC_S_NONE; 5168#endif /* NPFSYNC > 0 */ 5169 st->set_prio[0] = act->set_prio[0]; 5170 st->set_prio[1] = act->set_prio[1]; 5171 st->delay = act->delay; 5172 SLIST_INIT(&st->src_nodes); 5173 SLIST_INIT(&st->linkage); 5174 5175 /* 5176 * must initialize refcnt, before pf_state_insert() gets called. 5177 * pf_state_inserts() grabs reference for pfsync! 5178 */ 5179 PF_REF_INIT(st->refcnt); 5180 mtx_init(&st->mtx, IPL_NET); 5181 5182 switch (pd->proto) { 5183 case IPPROTO_TCP: 5184 st->src.seqlo = ntohl(th->th_seq); 5185 st->src.seqhi = st->src.seqlo + pd->p_len + 1; 5186 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 5187 r->keep_state == PF_STATE_MODULATE) { 5188 /* Generate sequence number modulator */ 5189 st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo; 5190 if (st->src.seqdiff == 0) 5191 st->src.seqdiff = 1; 5192 pf_patch_32(pd, &th->th_seq, 5193 htonl(st->src.seqlo + st->src.seqdiff)); 5194 *rewrite = 1; 5195 } else 5196 st->src.seqdiff = 0; 5197 if (th->th_flags & TH_SYN) { 5198 st->src.seqhi++; 5199 st->src.wscale = pf_get_wscale(pd); 5200 } 5201 st->src.max_win = MAX(ntohs(th->th_win), 1); 5202 if (st->src.wscale & PF_WSCALE_MASK) { 5203 /* Remove scale factor from initial window */ 5204 int win = st->src.max_win; 5205 win += 1 << (st->src.wscale & PF_WSCALE_MASK); 5206 st->src.max_win = (win - 1) >> 5207 (st->src.wscale & PF_WSCALE_MASK); 5208 } 5209 if (th->th_flags & TH_FIN) 5210 st->src.seqhi++; 5211 st->dst.seqhi = 1; 5212 st->dst.max_win = 1; 5213 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT); 5214 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED); 5215 st->timeout = PFTM_TCP_FIRST_PACKET; 5216 atomic_inc_int(&pf_status.states_halfopen); 5217 break; 5218 case IPPROTO_UDP: 5219 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE); 5220 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC); 5221 st->timeout = PFTM_UDP_FIRST_PACKET; 5222 break; 5223 case IPPROTO_ICMP: 5224#ifdef INET6 5225 case IPPROTO_ICMPV6: 5226#endif /* INET6 */ 5227 st->timeout = PFTM_ICMP_FIRST_PACKET; 5228 break; 5229 default: 5230 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE); 5231 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC); 5232 st->timeout = PFTM_OTHER_FIRST_PACKET; 5233 } 5234 5235 st->creation = st->expire = getuptime(); 5236 5237 if (pd->proto == IPPROTO_TCP) { 5238 if (st->state_flags & PFSTATE_SCRUB_TCP && 5239 pf_normalize_tcp_init(pd, &st->src)) { 5240 REASON_SET(&reason, PFRES_MEMORY); 5241 goto csfailed; 5242 } 5243 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub && 5244 pf_normalize_tcp_stateful(pd, &reason, st, 5245 &st->src, &st->dst, rewrite)) { 5246 /* This really shouldn't happen!!! */ 5247 DPFPRINTF(LOG_ERR, 5248 "%s: tcp normalize failed on first pkt", __func__); 5249 goto csfailed; 5250 } 5251 } 5252 st->direction = pd->dir; 5253 5254 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) { 5255 REASON_SET(&reason, PFRES_MEMORY); 5256 goto csfailed; 5257 } 5258 5259 if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) { 5260 REASON_SET(&reason, PFRES_NOROUTE); 5261 goto csfailed; 5262 } 5263 5264 for (i = 0; i < PF_SN_MAX; i++) 5265 if (sns[i] != NULL) { 5266 struct pf_sn_item *sni; 5267 5268 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT); 5269 if (sni == NULL) { 5270 REASON_SET(&reason, PFRES_MEMORY); 5271 goto csfailed; 5272 } 5273 sni->sn = sns[i]; 5274 SLIST_INSERT_HEAD(&st->src_nodes, sni, next); 5275 sni->sn->states++; 5276 } 5277 5278 stlim = ctx->statelim; 5279 if (stlim != NULL) { 5280 unsigned int gen; 5281 5282 PF_ASSERT_LOCKED(); 5283 pfl = pool_get(&pf_state_link_pl, PR_NOWAIT); 5284 if (pfl == NULL) { 5285 REASON_SET(&reason, PFRES_MEMORY); 5286 goto csfailed; 5287 } 5288 5289 gen = pf_statelim_enter(stlim); 5290 stlim->pfstlim_counters.admitted++; 5291 stlim->pfstlim_inuse++; 5292 pf_statelim_leave(stlim, gen); 5293 5294 stlim->pfstlim_rate_ts += stlim->pfstlim_rate_token; 5295 5296 st->statelim = stlim->pfstlim_id; 5297 pfl->pfl_state = st; 5298 pfl->pfl_type = PF_STATE_LINK_TYPE_STATELIM; 5299 5300 TAILQ_INSERT_TAIL(&stlim->pfstlim_states, pfl, pfl_link); 5301 SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage); 5302 } 5303 5304 srlim = ctx->sourcelim; 5305 if (srlim != NULL) { 5306 unsigned int gen; 5307 5308 sr = ctx->source; 5309 if (sr == NULL) { 5310 sr = pool_get(&pf_source_pl, PR_NOWAIT|PR_ZERO); 5311 if (sr == NULL) { 5312 gen = pf_sourcelim_enter(srlim); 5313 srlim->pfsrlim_counters.addrnomem++; 5314 pf_sourcelim_leave(srlim, gen); 5315 REASON_SET(&reason, PFRES_MEMORY); 5316 goto csfailed; 5317 } 5318 5319 sr->pfsr_parent = srlim; 5320 pf_source_key(srlim, sr, 5321 ctx->pd->af, ctx->pd->rdomain, ctx->pd->src); 5322 TAILQ_INIT(&sr->pfsr_states); 5323 5324 if (RBT_INSERT(pf_source_tree, 5325 &srlim->pfsrlim_sources, sr) != NULL) { 5326 panic("%s: source pool %u (%p) " 5327 "insert collision %p?!", __func__, 5328 srlim->pfsrlim_id, srlim, sr); 5329 } 5330 5331 if (RBT_INSERT(pf_source_ioc_tree, 5332 &srlim->pfsrlim_ioc_sources, sr) != NULL) { 5333 panic("%s: source pool %u (%p) ioc " 5334 "insert collision (%p)?!", __func__, 5335 srlim->pfsrlim_id, srlim, sr); 5336 } 5337 5338 sr->pfsr_empty_ts = getuptime(); 5339 TAILQ_INSERT_TAIL(&pf_source_gc, sr, 5340 pfsr_empty_gc); 5341 5342 gen = pf_sourcelim_enter(srlim); 5343 srlim->pfsrlim_nsources++; 5344 srlim->pfsrlim_counters.addrallocs++; 5345 pf_sourcelim_leave(srlim, gen); 5346 } else { 5347 KASSERT(sr->pfsr_parent == srlim); 5348 } 5349 5350 PF_ASSERT_LOCKED(); 5351 pfl = pool_get(&pf_state_link_pl, PR_NOWAIT); 5352 if (pfl == NULL) { 5353 REASON_SET(&reason, PFRES_MEMORY); 5354 goto csfailed; 5355 } 5356 5357 pf_source_used(sr); 5358 5359 sr->pfsr_counters.admitted++; 5360 5361 gen = pf_sourcelim_enter(srlim); 5362 srlim->pfsrlim_counters.inuse++; 5363 srlim->pfsrlim_counters.admitted++; 5364 pf_sourcelim_leave(srlim, gen); 5365 5366 st->sourcelim = srlim->pfsrlim_id; 5367 pfl->pfl_state = st; 5368 pfl->pfl_type = PF_STATE_LINK_TYPE_SOURCELIM; 5369 5370 TAILQ_INSERT_TAIL(&sr->pfsr_states, pfl, pfl_link); 5371 SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage); 5372 } 5373 5374#if NPFSYNC > 0 5375 pfsync_init_state(st, *skw, *sks, 0); 5376#endif 5377 5378 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) { 5379 *sks = *skw = NULL; 5380 REASON_SET(&reason, PFRES_STATEINS); 5381 goto csfailed; 5382 } else 5383 *sm = st; 5384 5385 /* 5386 * Make state responsible for rules it binds here. 5387 */ 5388 memcpy(&st->match_rules, rules, sizeof(st->match_rules)); 5389 memset(rules, 0, sizeof(*rules)); 5390 STATE_INC_COUNTERS(st); 5391 5392 if (tag > 0) { 5393 pf_tag_ref(tag); 5394 st->tag = tag; 5395 } 5396 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) == 5397 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) { 5398 int rtid; 5399 uint16_t mss, mssdflt; 5400 5401 rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain; 5402 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 5403 st->src.seqhi = arc4random(); 5404 /* Find mss option */ 5405 mssdflt = atomic_load_int(&tcp_mssdflt); 5406 mss = pf_get_mss(pd, mssdflt); 5407 mss = pf_calc_mss(pd->src, pd->af, rtid, mss, mssdflt); 5408 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss, mssdflt); 5409 st->src.mss = mss; 5410 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 5411 th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1, 5412 TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain, 5413 &reason); 5414 REASON_SET(&reason, PFRES_SYNPROXY); 5415 return (PF_SYNPROXY_DROP); 5416 } 5417 5418 return (PF_PASS); 5419 5420csfailed: 5421 if (st) { 5422 struct pf_state_link *npfl; 5423 5424 SLIST_FOREACH_SAFE(pfl, &st->linkage, pfl_linkage, npfl) { 5425 struct pf_state_link_list *list; 5426 unsigned int gen; 5427 5428 /* who needs KASSERTS when we have NULL derefs */ 5429 5430 switch (pfl->pfl_type) { 5431 case PF_STATE_LINK_TYPE_STATELIM: 5432 gen = pf_statelim_enter(stlim); 5433 stlim->pfstlim_inuse--; 5434 pf_statelim_leave(stlim, gen); 5435 5436 stlim->pfstlim_rate_ts -= 5437 stlim->pfstlim_rate_token; 5438 list = &stlim->pfstlim_states; 5439 break; 5440 case PF_STATE_LINK_TYPE_SOURCELIM: 5441 gen = pf_sourcelim_enter(srlim); 5442 srlim->pfsrlim_counters.inuse--; 5443 pf_sourcelim_leave(srlim, gen); 5444 5445 sr->pfsr_rate_ts -= 5446 srlim->pfsrlim_rate_token; 5447 pf_source_rele(sr); 5448 5449 list = &sr->pfsr_states; 5450 break; 5451 default: 5452 panic("%s: unexpected link type on pfl %p", 5453 __func__, pfl); 5454 } 5455 5456 TAILQ_REMOVE(list, pfl, pfl_link); 5457 PF_ASSERT_LOCKED(); 5458 pool_put(&pf_state_link_pl, pfl); 5459 } 5460 5461 pf_normalize_tcp_cleanup(st); /* safe even w/o init */ 5462 pf_src_tree_remove_state(st); 5463 pool_put(&pf_state_pl, st); 5464 } 5465 5466 for (i = 0; i < PF_SN_MAX; i++) 5467 if (sns[i] != NULL) 5468 pf_remove_src_node(sns[i]); 5469 5470 return (PF_DROP); 5471} 5472 5473int 5474pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport, 5475 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type, 5476 int icmp_dir) 5477{ 5478 int rewrite = 0; 5479 int afto = pd->af != pd->naf; 5480 5481 if (afto || PF_ANEQ(daddr, pd->dst, pd->af)) 5482 pd->destchg = 1; 5483 5484 switch (pd->proto) { 5485 case IPPROTO_TCP: /* FALLTHROUGH */ 5486 case IPPROTO_UDP: 5487 rewrite += pf_patch_16(pd, pd->sport, sport); 5488 rewrite += pf_patch_16(pd, pd->dport, dport); 5489 break; 5490 5491 case IPPROTO_ICMP: 5492 if (pd->af != AF_INET) 5493 return (0); 5494 5495#ifdef INET6 5496 if (afto) { 5497 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp)) 5498 return (0); 5499 pd->proto = IPPROTO_ICMPV6; 5500 rewrite = 1; 5501 } 5502#endif /* INET6 */ 5503 if (virtual_type == htons(ICMP_ECHO)) { 5504 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 5505 rewrite += pf_patch_16(pd, 5506 &pd->hdr.icmp.icmp_id, icmpid); 5507 } 5508 break; 5509 5510#ifdef INET6 5511 case IPPROTO_ICMPV6: 5512 if (pd->af != AF_INET6) 5513 return (0); 5514 5515 if (afto) { 5516 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6)) 5517 return (0); 5518 pd->proto = IPPROTO_ICMP; 5519 rewrite = 1; 5520 } 5521 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) { 5522 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport; 5523 rewrite += pf_patch_16(pd, 5524 &pd->hdr.icmp6.icmp6_id, icmpid); 5525 } 5526 break; 5527#endif /* INET6 */ 5528 } 5529 5530 if (!afto) { 5531 rewrite += pf_translate_a(pd, pd->src, saddr); 5532 rewrite += pf_translate_a(pd, pd->dst, daddr); 5533 } 5534 5535 return (rewrite); 5536} 5537 5538int 5539pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason, 5540 int *copyback, int reverse) 5541{ 5542 struct tcphdr *th = &pd->hdr.tcp; 5543 struct pf_state_peer *src, *dst; 5544 u_int16_t win = ntohs(th->th_win); 5545 u_int32_t ack, end, data_end, seq, orig_seq; 5546 u_int8_t sws, dws, psrc, pdst; 5547 int ackskew; 5548 5549 if ((pd->dir == (*stp)->direction && !reverse) || 5550 (pd->dir != (*stp)->direction && reverse)) { 5551 src = &(*stp)->src; 5552 dst = &(*stp)->dst; 5553 psrc = PF_PEER_SRC; 5554 pdst = PF_PEER_DST; 5555 } else { 5556 src = &(*stp)->dst; 5557 dst = &(*stp)->src; 5558 psrc = PF_PEER_DST; 5559 pdst = PF_PEER_SRC; 5560 } 5561 5562 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) { 5563 sws = src->wscale & PF_WSCALE_MASK; 5564 dws = dst->wscale & PF_WSCALE_MASK; 5565 } else 5566 sws = dws = 0; 5567 5568 /* 5569 * Sequence tracking algorithm from Guido van Rooij's paper: 5570 * http://www.madison-gurkha.com/publications/tcp_filtering/ 5571 * tcp_filtering.ps 5572 */ 5573 5574 orig_seq = seq = ntohl(th->th_seq); 5575 if (src->seqlo == 0) { 5576 /* First packet from this end. Set its state */ 5577 5578 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) && 5579 src->scrub == NULL) { 5580 if (pf_normalize_tcp_init(pd, src)) { 5581 REASON_SET(reason, PFRES_MEMORY); 5582 return (PF_DROP); 5583 } 5584 } 5585 5586 /* Deferred generation of sequence number modulator */ 5587 if (dst->seqdiff && !src->seqdiff) { 5588 /* use random iss for the TCP server */ 5589 while ((src->seqdiff = arc4random() - seq) == 0) 5590 continue; 5591 ack = ntohl(th->th_ack) - dst->seqdiff; 5592 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 5593 pf_patch_32(pd, &th->th_ack, htonl(ack)); 5594 *copyback = 1; 5595 } else { 5596 ack = ntohl(th->th_ack); 5597 } 5598 5599 end = seq + pd->p_len; 5600 if (th->th_flags & TH_SYN) { 5601 end++; 5602 if (dst->wscale & PF_WSCALE_FLAG) { 5603 src->wscale = pf_get_wscale(pd); 5604 if (src->wscale & PF_WSCALE_FLAG) { 5605 /* Remove scale factor from initial 5606 * window */ 5607 sws = src->wscale & PF_WSCALE_MASK; 5608 win = ((u_int32_t)win + (1 << sws) - 1) 5609 >> sws; 5610 dws = dst->wscale & PF_WSCALE_MASK; 5611 } else { 5612 /* fixup other window */ 5613 dst->max_win = MIN(TCP_MAXWIN, 5614 (u_int32_t)dst->max_win << 5615 (dst->wscale & PF_WSCALE_MASK)); 5616 /* in case of a retrans SYN|ACK */ 5617 dst->wscale = 0; 5618 } 5619 } 5620 } 5621 data_end = end; 5622 if (th->th_flags & TH_FIN) 5623 end++; 5624 5625 src->seqlo = seq; 5626 if (src->state < TCPS_SYN_SENT) 5627 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5628 5629 /* 5630 * May need to slide the window (seqhi may have been set by 5631 * the crappy stack check or if we picked up the connection 5632 * after establishment) 5633 */ 5634 if (src->seqhi == 1 || 5635 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi)) 5636 src->seqhi = end + MAX(1, dst->max_win << dws); 5637 if (win > src->max_win) 5638 src->max_win = win; 5639 5640 } else { 5641 ack = ntohl(th->th_ack) - dst->seqdiff; 5642 if (src->seqdiff) { 5643 /* Modulate sequence numbers */ 5644 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff)); 5645 pf_patch_32(pd, &th->th_ack, htonl(ack)); 5646 *copyback = 1; 5647 } 5648 end = seq + pd->p_len; 5649 if (th->th_flags & TH_SYN) 5650 end++; 5651 data_end = end; 5652 if (th->th_flags & TH_FIN) 5653 end++; 5654 } 5655 5656 if ((th->th_flags & TH_ACK) == 0) { 5657 /* Let it pass through the ack skew check */ 5658 ack = dst->seqlo; 5659 } else if ((ack == 0 && 5660 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) || 5661 /* broken tcp stacks do not set ack */ 5662 (dst->state < TCPS_SYN_SENT)) { 5663 /* 5664 * Many stacks (ours included) will set the ACK number in an 5665 * FIN|ACK if the SYN times out -- no sequence to ACK. 5666 */ 5667 ack = dst->seqlo; 5668 } 5669 5670 if (seq == end) { 5671 /* Ease sequencing restrictions on no data packets */ 5672 seq = src->seqlo; 5673 data_end = end = seq; 5674 } 5675 5676 ackskew = dst->seqlo - ack; 5677 5678 5679 /* 5680 * Need to demodulate the sequence numbers in any TCP SACK options 5681 * (Selective ACK). We could optionally validate the SACK values 5682 * against the current ACK window, either forwards or backwards, but 5683 * I'm not confident that SACK has been implemented properly 5684 * everywhere. It wouldn't surprise me if several stacks accidentally 5685 * SACK too far backwards of previously ACKed data. There really aren't 5686 * any security implications of bad SACKing unless the target stack 5687 * doesn't validate the option length correctly. Someone trying to 5688 * spoof into a TCP connection won't bother blindly sending SACK 5689 * options anyway. 5690 */ 5691 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) { 5692 if (pf_modulate_sack(pd, dst)) 5693 *copyback = 1; 5694 } 5695 5696 5697#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */ 5698 if (SEQ_GEQ(src->seqhi, data_end) && 5699 /* Last octet inside other's window space */ 5700 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) && 5701 /* Retrans: not more than one window back */ 5702 (ackskew >= -MAXACKWINDOW) && 5703 /* Acking not more than one reassembled fragment backwards */ 5704 (ackskew <= (MAXACKWINDOW << sws)) && 5705 /* Acking not more than one window forward */ 5706 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo || 5707 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) || 5708 /* Require an exact/+1 sequence match on resets when possible */ 5709 (SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) && 5710 SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 && 5711 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) { 5712 /* Allow resets to match sequence window if ack is perfect match */ 5713 5714 if (dst->scrub || src->scrub) { 5715 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 5716 dst, copyback)) 5717 return (PF_DROP); 5718 } 5719 5720 /* update max window */ 5721 if (src->max_win < win) 5722 src->max_win = win; 5723 /* synchronize sequencing */ 5724 if (SEQ_GT(end, src->seqlo)) 5725 src->seqlo = end; 5726 /* slide the window of what the other end can send */ 5727 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 5728 dst->seqhi = ack + MAX((win << sws), 1); 5729 5730 /* update states */ 5731 if (th->th_flags & TH_SYN) 5732 if (src->state < TCPS_SYN_SENT) 5733 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5734 if (th->th_flags & TH_FIN) 5735 if (src->state < TCPS_CLOSING) 5736 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5737 if (th->th_flags & TH_ACK) { 5738 if (dst->state == TCPS_SYN_SENT) { 5739 pf_set_protostate(*stp, pdst, 5740 TCPS_ESTABLISHED); 5741 if (src->state == TCPS_ESTABLISHED && 5742 !SLIST_EMPTY(&(*stp)->src_nodes) && 5743 pf_src_connlimit(stp)) { 5744 REASON_SET(reason, PFRES_SRCLIMIT); 5745 return (PF_DROP); 5746 } 5747 } else if (dst->state == TCPS_CLOSING) 5748 pf_set_protostate(*stp, pdst, 5749 TCPS_FIN_WAIT_2); 5750 } 5751 if (th->th_flags & TH_RST) 5752 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5753 5754 /* update expire time */ 5755 (*stp)->expire = getuptime(); 5756 if (src->state >= TCPS_FIN_WAIT_2 && 5757 dst->state >= TCPS_FIN_WAIT_2) 5758 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED); 5759 else if (src->state >= TCPS_CLOSING && 5760 dst->state >= TCPS_CLOSING) 5761 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT); 5762 else if (src->state < TCPS_ESTABLISHED || 5763 dst->state < TCPS_ESTABLISHED) 5764 pf_update_state_timeout(*stp, PFTM_TCP_OPENING); 5765 else if (src->state >= TCPS_CLOSING || 5766 dst->state >= TCPS_CLOSING) 5767 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING); 5768 else 5769 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED); 5770 5771 /* Fall through to PASS packet */ 5772 } else if ((dst->state < TCPS_SYN_SENT || 5773 dst->state >= TCPS_FIN_WAIT_2 || 5774 src->state >= TCPS_FIN_WAIT_2) && 5775 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) && 5776 /* Within a window forward of the originating packet */ 5777 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) { 5778 /* Within a window backward of the originating packet */ 5779 5780 /* 5781 * This currently handles three situations: 5782 * 1) Stupid stacks will shotgun SYNs before their peer 5783 * replies. 5784 * 2) When PF catches an already established stream (the 5785 * firewall rebooted, the state table was flushed, routes 5786 * changed...) 5787 * 3) Packets get funky immediately after the connection 5788 * closes (this should catch Solaris spurious ACK|FINs 5789 * that web servers like to spew after a close) 5790 * 5791 * This must be a little more careful than the above code 5792 * since packet floods will also be caught here. We don't 5793 * update the TTL here to mitigate the damage of a packet 5794 * flood and so the same code can handle awkward establishment 5795 * and a loosened connection close. 5796 * In the establishment case, a correct peer response will 5797 * validate the connection, go through the normal state code 5798 * and keep updating the state TTL. 5799 */ 5800 5801 if (pf_status.debug >= LOG_NOTICE) { 5802 log(LOG_NOTICE, "pf: loose state match: "); 5803 pf_print_state(*stp); 5804 pf_print_flags(th->th_flags); 5805 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5806 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack, 5807 pd->p_len, ackskew, (*stp)->packets[0], 5808 (*stp)->packets[1], 5809 pd->dir == PF_IN ? "in" : "out", 5810 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5811 } 5812 5813 if (dst->scrub || src->scrub) { 5814 if (pf_normalize_tcp_stateful(pd, reason, *stp, src, 5815 dst, copyback)) 5816 return (PF_DROP); 5817 } 5818 5819 /* update max window */ 5820 if (src->max_win < win) 5821 src->max_win = win; 5822 /* synchronize sequencing */ 5823 if (SEQ_GT(end, src->seqlo)) 5824 src->seqlo = end; 5825 /* slide the window of what the other end can send */ 5826 if (SEQ_GEQ(ack + (win << sws), dst->seqhi)) 5827 dst->seqhi = ack + MAX((win << sws), 1); 5828 5829 /* 5830 * Cannot set dst->seqhi here since this could be a shotgunned 5831 * SYN and not an already established connection. 5832 */ 5833 if (th->th_flags & TH_FIN) 5834 if (src->state < TCPS_CLOSING) 5835 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5836 if (th->th_flags & TH_RST) 5837 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5838 5839 /* Fall through to PASS packet */ 5840 } else { 5841 if ((*stp)->dst.state == TCPS_SYN_SENT && 5842 (*stp)->src.state == TCPS_SYN_SENT) { 5843 /* Send RST for state mismatches during handshake */ 5844 if (!(th->th_flags & TH_RST)) 5845 pf_send_tcp((*stp)->rule.ptr, pd->af, 5846 pd->dst, pd->src, th->th_dport, 5847 th->th_sport, ntohl(th->th_ack), 0, 5848 TH_RST, 0, 0, 5849 (*stp)->rule.ptr->return_ttl, 1, 0, 5850 pd->rdomain, reason); 5851 src->seqlo = 0; 5852 src->seqhi = 1; 5853 src->max_win = 1; 5854 } else if (pf_status.debug >= LOG_NOTICE) { 5855 log(LOG_NOTICE, "pf: BAD state: "); 5856 pf_print_state(*stp); 5857 pf_print_flags(th->th_flags); 5858 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d " 5859 "pkts=%llu:%llu dir=%s,%s\n", 5860 seq, orig_seq, ack, pd->p_len, ackskew, 5861 (*stp)->packets[0], (*stp)->packets[1], 5862 pd->dir == PF_IN ? "in" : "out", 5863 pd->dir == (*stp)->direction ? "fwd" : "rev"); 5864 addlog("pf: State failure on: %c %c %c %c | %c %c\n", 5865 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1', 5866 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ? 5867 ' ': '2', 5868 (ackskew >= -MAXACKWINDOW) ? ' ' : '3', 5869 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4', 5870 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ? 5871 ' ' :'5', 5872 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6'); 5873 } 5874 REASON_SET(reason, PFRES_BADSTATE); 5875 return (PF_DROP); 5876 } 5877 5878 return (PF_PASS); 5879} 5880 5881int 5882pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp, 5883 u_short *reason) 5884{ 5885 struct tcphdr *th = &pd->hdr.tcp; 5886 struct pf_state_peer *src, *dst; 5887 u_int8_t psrc, pdst; 5888 5889 if (pd->dir == (*stp)->direction) { 5890 src = &(*stp)->src; 5891 dst = &(*stp)->dst; 5892 psrc = PF_PEER_SRC; 5893 pdst = PF_PEER_DST; 5894 } else { 5895 src = &(*stp)->dst; 5896 dst = &(*stp)->src; 5897 psrc = PF_PEER_DST; 5898 pdst = PF_PEER_SRC; 5899 } 5900 5901 if (th->th_flags & TH_SYN) 5902 if (src->state < TCPS_SYN_SENT) 5903 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT); 5904 if (th->th_flags & TH_FIN) 5905 if (src->state < TCPS_CLOSING) 5906 pf_set_protostate(*stp, psrc, TCPS_CLOSING); 5907 if (th->th_flags & TH_ACK) { 5908 if (dst->state == TCPS_SYN_SENT) { 5909 pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED); 5910 if (src->state == TCPS_ESTABLISHED && 5911 !SLIST_EMPTY(&(*stp)->src_nodes) && 5912 pf_src_connlimit(stp)) { 5913 REASON_SET(reason, PFRES_SRCLIMIT); 5914 return (PF_DROP); 5915 } 5916 } else if (dst->state == TCPS_CLOSING) { 5917 pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2); 5918 } else if (src->state == TCPS_SYN_SENT && 5919 dst->state < TCPS_SYN_SENT) { 5920 /* 5921 * Handle a special sloppy case where we only see one 5922 * half of the connection. If there is a ACK after 5923 * the initial SYN without ever seeing a packet from 5924 * the destination, set the connection to established. 5925 */ 5926 pf_set_protostate(*stp, PF_PEER_BOTH, 5927 TCPS_ESTABLISHED); 5928 if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5929 pf_src_connlimit(stp)) { 5930 REASON_SET(reason, PFRES_SRCLIMIT); 5931 return (PF_DROP); 5932 } 5933 } else if (src->state == TCPS_CLOSING && 5934 dst->state == TCPS_ESTABLISHED && 5935 dst->seqlo == 0) { 5936 /* 5937 * Handle the closing of half connections where we 5938 * don't see the full bidirectional FIN/ACK+ACK 5939 * handshake. 5940 */ 5941 pf_set_protostate(*stp, pdst, TCPS_CLOSING); 5942 } 5943 } 5944 if (th->th_flags & TH_RST) 5945 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT); 5946 5947 /* update expire time */ 5948 (*stp)->expire = getuptime(); 5949 if (src->state >= TCPS_FIN_WAIT_2 && 5950 dst->state >= TCPS_FIN_WAIT_2) 5951 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED); 5952 else if (src->state >= TCPS_CLOSING && 5953 dst->state >= TCPS_CLOSING) 5954 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT); 5955 else if (src->state < TCPS_ESTABLISHED || 5956 dst->state < TCPS_ESTABLISHED) 5957 pf_update_state_timeout(*stp, PFTM_TCP_OPENING); 5958 else if (src->state >= TCPS_CLOSING || 5959 dst->state >= TCPS_CLOSING) 5960 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING); 5961 else 5962 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED); 5963 5964 return (PF_PASS); 5965} 5966 5967static __inline int 5968pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 5969{ 5970 struct pf_state_key *sk = (*stp)->key[pd->didx]; 5971 5972 if ((*stp)->src.state == PF_TCPS_PROXY_SRC) { 5973 struct tcphdr *th = &pd->hdr.tcp; 5974 5975 if (pd->dir != (*stp)->direction) { 5976 REASON_SET(reason, PFRES_SYNPROXY); 5977 return (PF_SYNPROXY_DROP); 5978 } 5979 if (th->th_flags & TH_SYN) { 5980 if (ntohl(th->th_seq) != (*stp)->src.seqlo) { 5981 REASON_SET(reason, PFRES_SYNPROXY); 5982 return (PF_DROP); 5983 } 5984 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 5985 pd->src, th->th_dport, th->th_sport, 5986 (*stp)->src.seqhi, ntohl(th->th_seq) + 1, 5987 TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1, 5988 0, pd->rdomain, reason); 5989 REASON_SET(reason, PFRES_SYNPROXY); 5990 return (PF_SYNPROXY_DROP); 5991 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK || 5992 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 5993 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 5994 REASON_SET(reason, PFRES_SYNPROXY); 5995 return (PF_DROP); 5996 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) && 5997 pf_src_connlimit(stp)) { 5998 REASON_SET(reason, PFRES_SRCLIMIT); 5999 return (PF_DROP); 6000 } else 6001 pf_set_protostate(*stp, PF_PEER_SRC, 6002 PF_TCPS_PROXY_DST); 6003 } 6004 if ((*stp)->src.state == PF_TCPS_PROXY_DST) { 6005 struct tcphdr *th = &pd->hdr.tcp; 6006 6007 if (pd->dir == (*stp)->direction) { 6008 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) || 6009 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) || 6010 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) { 6011 REASON_SET(reason, PFRES_SYNPROXY); 6012 return (PF_DROP); 6013 } 6014 (*stp)->src.max_win = MAX(ntohs(th->th_win), 1); 6015 if ((*stp)->dst.seqhi == 1) 6016 (*stp)->dst.seqhi = arc4random(); 6017 pf_send_tcp((*stp)->rule.ptr, pd->af, 6018 &sk->addr[pd->sidx], &sk->addr[pd->didx], 6019 sk->port[pd->sidx], sk->port[pd->didx], 6020 (*stp)->dst.seqhi, 0, TH_SYN, 0, 6021 (*stp)->src.mss, 0, 0, (*stp)->tag, 6022 sk->rdomain, reason); 6023 REASON_SET(reason, PFRES_SYNPROXY); 6024 return (PF_SYNPROXY_DROP); 6025 } else if (((th->th_flags & (TH_SYN|TH_ACK)) != 6026 (TH_SYN|TH_ACK)) || 6027 (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) { 6028 REASON_SET(reason, PFRES_SYNPROXY); 6029 return (PF_DROP); 6030 } else { 6031 (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1); 6032 (*stp)->dst.seqlo = ntohl(th->th_seq); 6033 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst, 6034 pd->src, th->th_dport, th->th_sport, 6035 ntohl(th->th_ack), ntohl(th->th_seq) + 1, 6036 TH_ACK, (*stp)->src.max_win, 0, 0, 0, 6037 (*stp)->tag, pd->rdomain, reason); 6038 pf_send_tcp((*stp)->rule.ptr, pd->af, 6039 &sk->addr[pd->sidx], &sk->addr[pd->didx], 6040 sk->port[pd->sidx], sk->port[pd->didx], 6041 (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1, 6042 TH_ACK, (*stp)->dst.max_win, 0, 0, 1, 6043 0, sk->rdomain, reason); 6044 (*stp)->src.seqdiff = (*stp)->dst.seqhi - 6045 (*stp)->src.seqlo; 6046 (*stp)->dst.seqdiff = (*stp)->src.seqhi - 6047 (*stp)->dst.seqlo; 6048 (*stp)->src.seqhi = (*stp)->src.seqlo + 6049 (*stp)->dst.max_win; 6050 (*stp)->dst.seqhi = (*stp)->dst.seqlo + 6051 (*stp)->src.max_win; 6052 (*stp)->src.wscale = (*stp)->dst.wscale = 0; 6053 pf_set_protostate(*stp, PF_PEER_BOTH, 6054 TCPS_ESTABLISHED); 6055 REASON_SET(reason, PFRES_SYNPROXY); 6056 return (PF_SYNPROXY_DROP); 6057 } 6058 } 6059 return (PF_PASS); 6060} 6061 6062static __inline int 6063pf_synproxy_ack(struct pf_rule *r, struct pf_pdesc *pd, struct pf_state **sm, 6064 struct pf_rule_actions *act) 6065{ 6066 struct tcphdr *th = &pd->hdr.tcp; 6067 struct pf_state *s; 6068 u_int16_t mss, mssdflt; 6069 int rtid; 6070 u_short reason; 6071 6072 if ((th->th_flags & (TH_SYN|TH_ACK)) != TH_SYN) 6073 return (PF_PASS); 6074 6075 s = *sm; 6076 rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain; 6077 6078 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC); 6079 s->src.seqhi = arc4random(); 6080 /* Find mss option */ 6081 mssdflt = atomic_load_int(&tcp_mssdflt); 6082 mss = pf_get_mss(pd, mssdflt); 6083 mss = pf_calc_mss(pd->src, pd->af, rtid, mss, mssdflt); 6084 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss, mssdflt); 6085 s->src.mss = mss; 6086 6087 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport, 6088 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1, 6089 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain, NULL); 6090 6091 REASON_SET(&reason, PFRES_SYNPROXY); 6092 return (PF_SYNPROXY_DROP); 6093} 6094 6095int 6096pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason) 6097{ 6098 int copyback = 0; 6099 struct pf_state_peer *src, *dst; 6100 int action; 6101 u_int8_t psrc, pdst; 6102 6103 action = PF_PASS; 6104 if (pd->dir == (*stp)->direction) { 6105 src = &(*stp)->src; 6106 dst = &(*stp)->dst; 6107 psrc = PF_PEER_SRC; 6108 pdst = PF_PEER_DST; 6109 } else { 6110 src = &(*stp)->dst; 6111 dst = &(*stp)->src; 6112 psrc = PF_PEER_DST; 6113 pdst = PF_PEER_SRC; 6114 } 6115 6116 switch (pd->virtual_proto) { 6117 case IPPROTO_TCP: 6118 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS) 6119 return (action); 6120 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) { 6121 6122 if (dst->state >= TCPS_FIN_WAIT_2 && 6123 src->state >= TCPS_FIN_WAIT_2) { 6124 if (pf_status.debug >= LOG_NOTICE) { 6125 log(LOG_NOTICE, "pf: state reuse "); 6126 pf_print_state(*stp); 6127 pf_print_flags(pd->hdr.tcp.th_flags); 6128 addlog("\n"); 6129 } 6130 /* XXX make sure it's the same direction ?? */ 6131 pf_update_state_timeout(*stp, PFTM_PURGE); 6132 pf_state_unref(*stp); 6133 *stp = NULL; 6134 return (PF_DROP); 6135 } else if (dst->state >= TCPS_ESTABLISHED && 6136 src->state >= TCPS_ESTABLISHED) { 6137 /* 6138 * SYN matches existing state??? 6139 * Typically happens when sender boots up after 6140 * sudden panic. Certain protocols (NFSv3) are 6141 * always using same port numbers. Challenge 6142 * ACK enables all parties (firewall and peers) 6143 * to get in sync again. 6144 */ 6145 pf_send_challenge_ack(pd, *stp, src, dst, 6146 reason); 6147 return (PF_DROP); 6148 } 6149 } 6150 6151 if ((*stp)->state_flags & PFSTATE_SLOPPY) { 6152 if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP) 6153 return (PF_DROP); 6154 } else { 6155 if (pf_tcp_track_full(pd, stp, reason, &copyback, 6156 PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP) 6157 return (PF_DROP); 6158 } 6159 break; 6160 case IPPROTO_UDP: 6161 /* update states */ 6162 if (src->state < PFUDPS_SINGLE) 6163 pf_set_protostate(*stp, psrc, PFUDPS_SINGLE); 6164 if (dst->state == PFUDPS_SINGLE) 6165 pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE); 6166 6167 /* update expire time */ 6168 (*stp)->expire = getuptime(); 6169 if (src->state == PFUDPS_MULTIPLE && 6170 dst->state == PFUDPS_MULTIPLE) 6171 pf_update_state_timeout(*stp, PFTM_UDP_MULTIPLE); 6172 else 6173 pf_update_state_timeout(*stp, PFTM_UDP_SINGLE); 6174 break; 6175 default: 6176 /* update states */ 6177 if (src->state < PFOTHERS_SINGLE) 6178 pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE); 6179 if (dst->state == PFOTHERS_SINGLE) 6180 pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE); 6181 6182 /* update expire time */ 6183 (*stp)->expire = getuptime(); 6184 if (src->state == PFOTHERS_MULTIPLE && 6185 dst->state == PFOTHERS_MULTIPLE) 6186 pf_update_state_timeout(*stp, PFTM_OTHER_MULTIPLE); 6187 else 6188 pf_update_state_timeout(*stp, PFTM_OTHER_SINGLE); 6189 break; 6190 } 6191 6192 /* translate source/destination address, if necessary */ 6193 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 6194 struct pf_state_key *nk; 6195 int afto, sidx, didx; 6196 6197 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6198 nk = (*stp)->key[pd->sidx]; 6199 else 6200 nk = (*stp)->key[pd->didx]; 6201 6202 afto = pd->af != nk->af; 6203 sidx = afto ? pd->didx : pd->sidx; 6204 didx = afto ? pd->sidx : pd->didx; 6205 6206#ifdef INET6 6207 if (afto) { 6208 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af); 6209 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af); 6210 pd->naf = nk->af; 6211 action = PF_AFRT; 6212 } 6213#endif /* INET6 */ 6214 6215 if (!afto) 6216 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 6217 6218 if (pd->sport != NULL) 6219 pf_patch_16(pd, pd->sport, nk->port[sidx]); 6220 6221 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) || 6222 pd->rdomain != nk->rdomain) 6223 pd->destchg = 1; 6224 6225 if (!afto) 6226 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 6227 6228 if (pd->dport != NULL) 6229 pf_patch_16(pd, pd->dport, nk->port[didx]); 6230 6231 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6232 copyback = 1; 6233 } 6234 6235 if (copyback && pd->hdrlen > 0) { 6236 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 6237 } 6238 6239 return (action); 6240} 6241 6242int 6243pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key, 6244 struct pf_state **stp, u_int16_t icmpid, u_int16_t type, 6245 int icmp_dir, int *iidx, int multi, int inner) 6246{ 6247 int direction, action; 6248 6249 key->af = pd->af; 6250 key->proto = pd->proto; 6251 key->rdomain = pd->rdomain; 6252 if (icmp_dir == PF_IN) { 6253 *iidx = pd->sidx; 6254 key->port[pd->sidx] = icmpid; 6255 key->port[pd->didx] = type; 6256 } else { 6257 *iidx = pd->didx; 6258 key->port[pd->sidx] = type; 6259 key->port[pd->didx] = icmpid; 6260 } 6261 6262 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx, 6263 pd->dst, pd->af, multi)) 6264 return (PF_DROP); 6265 6266 key->hash = pf_pkt_hash(key->af, key->proto, 6267 &key->addr[0], &key->addr[1], 0, 0); 6268 6269 action = pf_find_state(pd, key, stp); 6270 if (action != PF_MATCH) 6271 return (action); 6272 6273 if ((*stp)->state_flags & PFSTATE_SLOPPY) 6274 return (-1); 6275 6276 /* Is this ICMP message flowing in right direction? */ 6277 if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af) 6278 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ? 6279 PF_IN : PF_OUT; 6280 else 6281 direction = (*stp)->direction; 6282 if ((((!inner && direction == pd->dir) || 6283 (inner && direction != pd->dir)) ? 6284 PF_IN : PF_OUT) != icmp_dir) { 6285 if (pf_status.debug >= LOG_NOTICE) { 6286 log(LOG_NOTICE, 6287 "pf: icmp type %d in wrong direction (%d): ", 6288 ntohs(type), icmp_dir); 6289 pf_print_state(*stp); 6290 addlog("\n"); 6291 } 6292 return (PF_DROP); 6293 } 6294 return (-1); 6295} 6296 6297int 6298pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp, 6299 u_short *reason) 6300{ 6301 u_int16_t virtual_id, virtual_type; 6302 u_int8_t icmptype, icmpcode; 6303 int icmp_dir, iidx, ret, copyback = 0; 6304 6305 struct pf_state_key_cmp key; 6306 6307 switch (pd->proto) { 6308 case IPPROTO_ICMP: 6309 icmptype = pd->hdr.icmp.icmp_type; 6310 icmpcode = pd->hdr.icmp.icmp_code; 6311 break; 6312#ifdef INET6 6313 case IPPROTO_ICMPV6: 6314 icmptype = pd->hdr.icmp6.icmp6_type; 6315 icmpcode = pd->hdr.icmp6.icmp6_code; 6316 break; 6317#endif /* INET6 */ 6318 default: 6319 panic("unhandled proto %d", pd->proto); 6320 } 6321 6322 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id, 6323 &virtual_type) == 0) { 6324 /* 6325 * ICMP query/reply message not related to a TCP/UDP packet. 6326 * Search for an ICMP state. 6327 */ 6328 ret = pf_icmp_state_lookup(pd, &key, stp, 6329 virtual_id, virtual_type, icmp_dir, &iidx, 6330 0, 0); 6331 /* IPv6? try matching a multicast address */ 6332 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT) 6333 ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id, 6334 virtual_type, icmp_dir, &iidx, 1, 0); 6335 if (ret >= 0) 6336 return (ret); 6337 6338 (*stp)->expire = getuptime(); 6339 pf_update_state_timeout(*stp, PFTM_ICMP_ERROR_REPLY); 6340 6341 /* translate source/destination address, if necessary */ 6342 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) { 6343 struct pf_state_key *nk; 6344 int afto, sidx, didx; 6345 6346 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6347 nk = (*stp)->key[pd->sidx]; 6348 else 6349 nk = (*stp)->key[pd->didx]; 6350 6351 afto = pd->af != nk->af; 6352 sidx = afto ? pd->didx : pd->sidx; 6353 didx = afto ? pd->sidx : pd->didx; 6354 iidx = afto ? !iidx : iidx; 6355#ifdef INET6 6356 if (afto) { 6357 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], 6358 nk->af); 6359 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], 6360 nk->af); 6361 pd->naf = nk->af; 6362 } 6363#endif /* INET6 */ 6364 if (!afto) { 6365 pf_translate_a(pd, pd->src, &nk->addr[sidx]); 6366 pf_translate_a(pd, pd->dst, &nk->addr[didx]); 6367 } 6368 6369 if (pd->rdomain != nk->rdomain) 6370 pd->destchg = 1; 6371 if (!afto && PF_ANEQ(pd->dst, 6372 &nk->addr[didx], pd->af)) 6373 pd->destchg = 1; 6374 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6375 6376 switch (pd->af) { 6377 case AF_INET: 6378#ifdef INET6 6379 if (afto) { 6380 if (pf_translate_icmp_af(pd, AF_INET6, 6381 &pd->hdr.icmp)) 6382 return (PF_DROP); 6383 pd->proto = IPPROTO_ICMPV6; 6384 } 6385#endif /* INET6 */ 6386 pf_patch_16(pd, 6387 &pd->hdr.icmp.icmp_id, nk->port[iidx]); 6388 6389 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6390 &pd->hdr.icmp, M_NOWAIT); 6391 copyback = 1; 6392 break; 6393#ifdef INET6 6394 case AF_INET6: 6395 if (afto) { 6396 if (pf_translate_icmp_af(pd, AF_INET, 6397 &pd->hdr.icmp6)) 6398 return (PF_DROP); 6399 pd->proto = IPPROTO_ICMP; 6400 } 6401 6402 pf_patch_16(pd, 6403 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]); 6404 6405 m_copyback(pd->m, pd->off, 6406 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 6407 M_NOWAIT); 6408 copyback = 1; 6409 break; 6410#endif /* INET6 */ 6411 } 6412#ifdef INET6 6413 if (afto) 6414 return (PF_AFRT); 6415#endif /* INET6 */ 6416 } 6417 } else { 6418 /* 6419 * ICMP error message in response to a TCP/UDP packet. 6420 * Extract the inner TCP/UDP header and search for that state. 6421 */ 6422 struct pf_pdesc pd2; 6423 struct ip h2; 6424#ifdef INET6 6425 struct ip6_hdr h2_6; 6426#endif /* INET6 */ 6427 int ipoff2; 6428 6429 /* Initialize pd2 fields valid for both packets with pd. */ 6430 memset(&pd2, 0, sizeof(pd2)); 6431 pd2.af = pd->af; 6432 pd2.dir = pd->dir; 6433 pd2.kif = pd->kif; 6434 pd2.m = pd->m; 6435 pd2.rdomain = pd->rdomain; 6436 /* Payload packet is from the opposite direction. */ 6437 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0; 6438 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1; 6439 switch (pd->af) { 6440 case AF_INET: 6441 /* offset of h2 in mbuf chain */ 6442 ipoff2 = pd->off + ICMP_MINLEN; 6443 6444 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2), 6445 reason, pd2.af)) { 6446 DPFPRINTF(LOG_NOTICE, 6447 "ICMP error message too short (ip)"); 6448 return (PF_DROP); 6449 } 6450 /* 6451 * ICMP error messages don't refer to non-first 6452 * fragments 6453 */ 6454 if (h2.ip_off & htons(IP_OFFMASK)) { 6455 REASON_SET(reason, PFRES_FRAG); 6456 return (PF_DROP); 6457 } 6458 6459 /* offset of protocol header that follows h2 */ 6460 pd2.off = ipoff2; 6461 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS) 6462 return (PF_DROP); 6463 6464 pd2.tot_len = ntohs(h2.ip_len); 6465 pd2.ttl = h2.ip_ttl; 6466 pd2.src = (struct pf_addr *)&h2.ip_src; 6467 pd2.dst = (struct pf_addr *)&h2.ip_dst; 6468 break; 6469#ifdef INET6 6470 case AF_INET6: 6471 ipoff2 = pd->off + sizeof(struct icmp6_hdr); 6472 6473 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6), 6474 reason, pd2.af)) { 6475 DPFPRINTF(LOG_NOTICE, 6476 "ICMP error message too short (ip6)"); 6477 return (PF_DROP); 6478 } 6479 6480 pd2.off = ipoff2; 6481 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS) 6482 return (PF_DROP); 6483 6484 pd2.tot_len = ntohs(h2_6.ip6_plen) + 6485 sizeof(struct ip6_hdr); 6486 pd2.ttl = h2_6.ip6_hlim; 6487 pd2.src = (struct pf_addr *)&h2_6.ip6_src; 6488 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst; 6489 break; 6490#endif /* INET6 */ 6491 default: 6492 unhandled_af(pd->af); 6493 } 6494 6495 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) { 6496 if (pf_status.debug >= LOG_NOTICE) { 6497 log(LOG_NOTICE, 6498 "pf: BAD ICMP %d:%d outer dst: ", 6499 icmptype, icmpcode); 6500 pf_print_host(pd->src, 0, pd->af); 6501 addlog(" -> "); 6502 pf_print_host(pd->dst, 0, pd->af); 6503 addlog(" inner src: "); 6504 pf_print_host(pd2.src, 0, pd2.af); 6505 addlog(" -> "); 6506 pf_print_host(pd2.dst, 0, pd2.af); 6507 addlog("\n"); 6508 } 6509 REASON_SET(reason, PFRES_BADSTATE); 6510 return (PF_DROP); 6511 } 6512 6513 switch (pd2.proto) { 6514 case IPPROTO_TCP: { 6515 struct tcphdr *th = &pd2.hdr.tcp; 6516 u_int32_t seq; 6517 struct pf_state_peer *src, *dst; 6518 u_int8_t dws; 6519 int action; 6520 6521 /* 6522 * Only the first 8 bytes of the TCP header can be 6523 * expected. Don't access any TCP header fields after 6524 * th_seq, an ackskew test is not possible. 6525 */ 6526 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, reason, 6527 pd2.af)) { 6528 DPFPRINTF(LOG_NOTICE, 6529 "ICMP error message too short (tcp)"); 6530 return (PF_DROP); 6531 } 6532 6533 key.af = pd2.af; 6534 key.proto = IPPROTO_TCP; 6535 key.rdomain = pd2.rdomain; 6536 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 6537 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 6538 key.port[pd2.sidx] = th->th_sport; 6539 key.port[pd2.didx] = th->th_dport; 6540 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 6541 pd2.src, pd2.dst, th->th_sport, th->th_dport); 6542 6543 action = pf_find_state(&pd2, &key, stp); 6544 if (action != PF_MATCH) 6545 return (action); 6546 6547 if (pd2.dir == (*stp)->direction) { 6548 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 6549 src = &(*stp)->src; 6550 dst = &(*stp)->dst; 6551 } else { 6552 src = &(*stp)->dst; 6553 dst = &(*stp)->src; 6554 } 6555 } else { 6556 if (PF_REVERSED_KEY((*stp)->key, pd->af)) { 6557 src = &(*stp)->dst; 6558 dst = &(*stp)->src; 6559 } else { 6560 src = &(*stp)->src; 6561 dst = &(*stp)->dst; 6562 } 6563 } 6564 6565 if (src->wscale && dst->wscale) 6566 dws = dst->wscale & PF_WSCALE_MASK; 6567 else 6568 dws = 0; 6569 6570 /* Demodulate sequence number */ 6571 seq = ntohl(th->th_seq) - src->seqdiff; 6572 if (src->seqdiff) { 6573 pf_patch_32(pd, &th->th_seq, htonl(seq)); 6574 copyback = 1; 6575 } 6576 6577 if (!((*stp)->state_flags & PFSTATE_SLOPPY) && 6578 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq, 6579 src->seqlo - (dst->max_win << dws)))) { 6580 if (pf_status.debug >= LOG_NOTICE) { 6581 log(LOG_NOTICE, 6582 "pf: BAD ICMP %d:%d ", 6583 icmptype, icmpcode); 6584 pf_print_host(pd->src, 0, pd->af); 6585 addlog(" -> "); 6586 pf_print_host(pd->dst, 0, pd->af); 6587 addlog(" state: "); 6588 pf_print_state(*stp); 6589 addlog(" seq=%u\n", seq); 6590 } 6591 REASON_SET(reason, PFRES_BADSTATE); 6592 return (PF_DROP); 6593 } else { 6594 if (pf_status.debug >= LOG_DEBUG) { 6595 log(LOG_DEBUG, 6596 "pf: OK ICMP %d:%d ", 6597 icmptype, icmpcode); 6598 pf_print_host(pd->src, 0, pd->af); 6599 addlog(" -> "); 6600 pf_print_host(pd->dst, 0, pd->af); 6601 addlog(" state: "); 6602 pf_print_state(*stp); 6603 addlog(" seq=%u\n", seq); 6604 } 6605 } 6606 6607 /* translate source/destination address, if necessary */ 6608 if ((*stp)->key[PF_SK_WIRE] != 6609 (*stp)->key[PF_SK_STACK]) { 6610 struct pf_state_key *nk; 6611 int afto, sidx, didx; 6612 6613 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6614 nk = (*stp)->key[pd->sidx]; 6615 else 6616 nk = (*stp)->key[pd->didx]; 6617 6618 afto = pd->af != nk->af; 6619 sidx = afto ? pd2.didx : pd2.sidx; 6620 didx = afto ? pd2.sidx : pd2.didx; 6621 6622#ifdef INET6 6623 if (afto) { 6624 if (pf_translate_icmp_af(pd, nk->af, 6625 &pd->hdr.icmp)) 6626 return (PF_DROP); 6627 m_copyback(pd->m, pd->off, 6628 sizeof(struct icmp6_hdr), 6629 &pd->hdr.icmp6, M_NOWAIT); 6630 if (pf_change_icmp_af(pd->m, ipoff2, 6631 pd, &pd2, &nk->addr[sidx], 6632 &nk->addr[didx], pd->af, nk->af)) 6633 return (PF_DROP); 6634 pd->m->m_pkthdr.ph_rtableid = 6635 nk->rdomain; 6636 pd->destchg = 1; 6637 pf_addrcpy(&pd->nsaddr, 6638 &nk->addr[pd2.sidx], nk->af); 6639 pf_addrcpy(&pd->ndaddr, 6640 &nk->addr[pd2.didx], nk->af); 6641 if (nk->af == AF_INET) { 6642 pd->proto = IPPROTO_ICMP; 6643 } else { 6644 pd->proto = IPPROTO_ICMPV6; 6645 /* 6646 * IPv4 becomes IPv6 so we must 6647 * copy IPv4 src addr to least 6648 * 32bits in IPv6 address to 6649 * keep traceroute/icmp 6650 * working. 6651 */ 6652 pd->nsaddr.addr32[3] = 6653 pd->src->addr32[0]; 6654 } 6655 pd->naf = nk->af; 6656 6657 pf_patch_16(pd, 6658 &th->th_sport, nk->port[sidx]); 6659 pf_patch_16(pd, 6660 &th->th_dport, nk->port[didx]); 6661 6662 m_copyback(pd2.m, pd2.off, 8, th, 6663 M_NOWAIT); 6664 return (PF_AFRT); 6665 } 6666#endif /* INET6 */ 6667 if (PF_ANEQ(pd2.src, 6668 &nk->addr[pd2.sidx], pd2.af) || 6669 nk->port[pd2.sidx] != th->th_sport) 6670 pf_translate_icmp(pd, pd2.src, 6671 &th->th_sport, pd->dst, 6672 &nk->addr[pd2.sidx], 6673 nk->port[pd2.sidx]); 6674 6675 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6676 pd2.af) || pd2.rdomain != nk->rdomain) 6677 pd->destchg = 1; 6678 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6679 6680 if (PF_ANEQ(pd2.dst, 6681 &nk->addr[pd2.didx], pd2.af) || 6682 nk->port[pd2.didx] != th->th_dport) 6683 pf_translate_icmp(pd, pd2.dst, 6684 &th->th_dport, pd->src, 6685 &nk->addr[pd2.didx], 6686 nk->port[pd2.didx]); 6687 copyback = 1; 6688 } 6689 6690 if (copyback) { 6691 switch (pd2.af) { 6692 case AF_INET: 6693 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6694 &pd->hdr.icmp, M_NOWAIT); 6695 m_copyback(pd2.m, ipoff2, sizeof(h2), 6696 &h2, M_NOWAIT); 6697 break; 6698#ifdef INET6 6699 case AF_INET6: 6700 m_copyback(pd->m, pd->off, 6701 sizeof(struct icmp6_hdr), 6702 &pd->hdr.icmp6, M_NOWAIT); 6703 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 6704 &h2_6, M_NOWAIT); 6705 break; 6706#endif /* INET6 */ 6707 } 6708 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT); 6709 } 6710 break; 6711 } 6712 case IPPROTO_UDP: { 6713 struct udphdr *uh = &pd2.hdr.udp; 6714 int action; 6715 6716 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh), 6717 reason, pd2.af)) { 6718 DPFPRINTF(LOG_NOTICE, 6719 "ICMP error message too short (udp)"); 6720 return (PF_DROP); 6721 } 6722 6723 key.af = pd2.af; 6724 key.proto = IPPROTO_UDP; 6725 key.rdomain = pd2.rdomain; 6726 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 6727 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 6728 key.port[pd2.sidx] = uh->uh_sport; 6729 key.port[pd2.didx] = uh->uh_dport; 6730 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 6731 pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport); 6732 6733 action = pf_find_state(&pd2, &key, stp); 6734 if (action != PF_MATCH) 6735 return (action); 6736 6737 /* translate source/destination address, if necessary */ 6738 if ((*stp)->key[PF_SK_WIRE] != 6739 (*stp)->key[PF_SK_STACK]) { 6740 struct pf_state_key *nk; 6741 int afto, sidx, didx; 6742 6743 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6744 nk = (*stp)->key[pd->sidx]; 6745 else 6746 nk = (*stp)->key[pd->didx]; 6747 6748 afto = pd->af != nk->af; 6749 sidx = afto ? pd2.didx : pd2.sidx; 6750 didx = afto ? pd2.sidx : pd2.didx; 6751 6752#ifdef INET6 6753 if (afto) { 6754 if (pf_translate_icmp_af(pd, nk->af, 6755 &pd->hdr.icmp)) 6756 return (PF_DROP); 6757 m_copyback(pd->m, pd->off, 6758 sizeof(struct icmp6_hdr), 6759 &pd->hdr.icmp6, M_NOWAIT); 6760 if (pf_change_icmp_af(pd->m, ipoff2, 6761 pd, &pd2, &nk->addr[sidx], 6762 &nk->addr[didx], pd->af, nk->af)) 6763 return (PF_DROP); 6764 pd->m->m_pkthdr.ph_rtableid = 6765 nk->rdomain; 6766 pd->destchg = 1; 6767 pf_addrcpy(&pd->nsaddr, 6768 &nk->addr[pd2.sidx], nk->af); 6769 pf_addrcpy(&pd->ndaddr, 6770 &nk->addr[pd2.didx], nk->af); 6771 if (nk->af == AF_INET) { 6772 pd->proto = IPPROTO_ICMP; 6773 } else { 6774 pd->proto = IPPROTO_ICMPV6; 6775 /* 6776 * IPv4 becomes IPv6 so we must 6777 * copy IPv4 src addr to least 6778 * 32bits in IPv6 address to 6779 * keep traceroute/icmp 6780 * working. 6781 */ 6782 pd->nsaddr.addr32[3] = 6783 pd->src->addr32[0]; 6784 } 6785 pd->naf = nk->af; 6786 6787 pf_patch_16(pd, 6788 &uh->uh_sport, nk->port[sidx]); 6789 pf_patch_16(pd, 6790 &uh->uh_dport, nk->port[didx]); 6791 6792 m_copyback(pd2.m, pd2.off, sizeof(*uh), 6793 uh, M_NOWAIT); 6794 return (PF_AFRT); 6795 } 6796#endif /* INET6 */ 6797 6798 if (PF_ANEQ(pd2.src, 6799 &nk->addr[pd2.sidx], pd2.af) || 6800 nk->port[pd2.sidx] != uh->uh_sport) 6801 pf_translate_icmp(pd, pd2.src, 6802 &uh->uh_sport, pd->dst, 6803 &nk->addr[pd2.sidx], 6804 nk->port[pd2.sidx]); 6805 6806 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6807 pd2.af) || pd2.rdomain != nk->rdomain) 6808 pd->destchg = 1; 6809 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6810 6811 if (PF_ANEQ(pd2.dst, 6812 &nk->addr[pd2.didx], pd2.af) || 6813 nk->port[pd2.didx] != uh->uh_dport) 6814 pf_translate_icmp(pd, pd2.dst, 6815 &uh->uh_dport, pd->src, 6816 &nk->addr[pd2.didx], 6817 nk->port[pd2.didx]); 6818 6819 switch (pd2.af) { 6820 case AF_INET: 6821 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6822 &pd->hdr.icmp, M_NOWAIT); 6823 m_copyback(pd2.m, ipoff2, sizeof(h2), 6824 &h2, M_NOWAIT); 6825 break; 6826#ifdef INET6 6827 case AF_INET6: 6828 m_copyback(pd->m, pd->off, 6829 sizeof(struct icmp6_hdr), 6830 &pd->hdr.icmp6, M_NOWAIT); 6831 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 6832 &h2_6, M_NOWAIT); 6833 break; 6834#endif /* INET6 */ 6835 } 6836 /* Avoid recomputing quoted UDP checksum. 6837 * note: udp6 0 csum invalid per rfc2460 p27. 6838 * but presumed nothing cares in this context */ 6839 pf_patch_16(pd, &uh->uh_sum, 0); 6840 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh, 6841 M_NOWAIT); 6842 copyback = 1; 6843 } 6844 break; 6845 } 6846 case IPPROTO_ICMP: { 6847 struct icmp *iih = &pd2.hdr.icmp; 6848 6849 if (pd2.af != AF_INET) { 6850 REASON_SET(reason, PFRES_NORM); 6851 return (PF_DROP); 6852 } 6853 6854 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN, 6855 reason, pd2.af)) { 6856 DPFPRINTF(LOG_NOTICE, 6857 "ICMP error message too short (icmp)"); 6858 return (PF_DROP); 6859 } 6860 6861 pf_icmp_mapping(&pd2, iih->icmp_type, 6862 &icmp_dir, &virtual_id, &virtual_type); 6863 6864 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6865 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6866 if (ret >= 0) 6867 return (ret); 6868 6869 /* translate source/destination address, if necessary */ 6870 if ((*stp)->key[PF_SK_WIRE] != 6871 (*stp)->key[PF_SK_STACK]) { 6872 struct pf_state_key *nk; 6873 int afto, sidx, didx; 6874 6875 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6876 nk = (*stp)->key[pd->sidx]; 6877 else 6878 nk = (*stp)->key[pd->didx]; 6879 6880 afto = pd->af != nk->af; 6881 sidx = afto ? pd2.didx : pd2.sidx; 6882 didx = afto ? pd2.sidx : pd2.didx; 6883 iidx = afto ? !iidx : iidx; 6884 6885#ifdef INET6 6886 if (afto) { 6887 if (nk->af != AF_INET6) 6888 return (PF_DROP); 6889 if (pf_translate_icmp_af(pd, nk->af, 6890 &pd->hdr.icmp)) 6891 return (PF_DROP); 6892 m_copyback(pd->m, pd->off, 6893 sizeof(struct icmp6_hdr), 6894 &pd->hdr.icmp6, M_NOWAIT); 6895 if (pf_change_icmp_af(pd->m, ipoff2, 6896 pd, &pd2, &nk->addr[sidx], 6897 &nk->addr[didx], pd->af, nk->af)) 6898 return (PF_DROP); 6899 pd->proto = IPPROTO_ICMPV6; 6900 if (pf_translate_icmp_af(pd, 6901 nk->af, iih)) 6902 return (PF_DROP); 6903 if (virtual_type == htons(ICMP_ECHO)) 6904 pf_patch_16(pd, &iih->icmp_id, 6905 nk->port[iidx]); 6906 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, 6907 iih, M_NOWAIT); 6908 pd->m->m_pkthdr.ph_rtableid = 6909 nk->rdomain; 6910 pd->destchg = 1; 6911 pf_addrcpy(&pd->nsaddr, 6912 &nk->addr[pd2.sidx], nk->af); 6913 pf_addrcpy(&pd->ndaddr, 6914 &nk->addr[pd2.didx], nk->af); 6915 /* 6916 * IPv4 becomes IPv6 so we must copy 6917 * IPv4 src addr to least 32bits in 6918 * IPv6 address to keep traceroute 6919 * working. 6920 */ 6921 pd->nsaddr.addr32[3] = 6922 pd->src->addr32[0]; 6923 pd->naf = nk->af; 6924 return (PF_AFRT); 6925 } 6926#endif /* INET6 */ 6927 6928 if (PF_ANEQ(pd2.src, 6929 &nk->addr[pd2.sidx], pd2.af) || 6930 (virtual_type == htons(ICMP_ECHO) && 6931 nk->port[iidx] != iih->icmp_id)) 6932 pf_translate_icmp(pd, pd2.src, 6933 (virtual_type == htons(ICMP_ECHO)) ? 6934 &iih->icmp_id : NULL, 6935 pd->dst, &nk->addr[pd2.sidx], 6936 (virtual_type == htons(ICMP_ECHO)) ? 6937 nk->port[iidx] : 0); 6938 6939 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 6940 pd2.af) || pd2.rdomain != nk->rdomain) 6941 pd->destchg = 1; 6942 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 6943 6944 if (PF_ANEQ(pd2.dst, 6945 &nk->addr[pd2.didx], pd2.af)) 6946 pf_translate_icmp(pd, pd2.dst, NULL, 6947 pd->src, &nk->addr[pd2.didx], 0); 6948 6949 m_copyback(pd->m, pd->off, ICMP_MINLEN, 6950 &pd->hdr.icmp, M_NOWAIT); 6951 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2, 6952 M_NOWAIT); 6953 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih, 6954 M_NOWAIT); 6955 copyback = 1; 6956 } 6957 break; 6958 } 6959#ifdef INET6 6960 case IPPROTO_ICMPV6: { 6961 struct icmp6_hdr *iih = &pd2.hdr.icmp6; 6962 6963 if (pd2.af != AF_INET6) { 6964 REASON_SET(reason, PFRES_NORM); 6965 return (PF_DROP); 6966 } 6967 6968 if (!pf_pull_hdr(pd2.m, pd2.off, iih, 6969 sizeof(struct icmp6_hdr), reason, pd2.af)) { 6970 DPFPRINTF(LOG_NOTICE, 6971 "ICMP error message too short (icmp6)"); 6972 return (PF_DROP); 6973 } 6974 6975 pf_icmp_mapping(&pd2, iih->icmp6_type, 6976 &icmp_dir, &virtual_id, &virtual_type); 6977 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6978 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1); 6979 /* IPv6? try matching a multicast address */ 6980 if (ret == PF_DROP && pd2.af == AF_INET6 && 6981 icmp_dir == PF_OUT) 6982 ret = pf_icmp_state_lookup(&pd2, &key, stp, 6983 virtual_id, virtual_type, icmp_dir, &iidx, 6984 1, 1); 6985 if (ret >= 0) 6986 return (ret); 6987 6988 /* translate source/destination address, if necessary */ 6989 if ((*stp)->key[PF_SK_WIRE] != 6990 (*stp)->key[PF_SK_STACK]) { 6991 struct pf_state_key *nk; 6992 int afto, sidx, didx; 6993 6994 if (PF_REVERSED_KEY((*stp)->key, pd->af)) 6995 nk = (*stp)->key[pd->sidx]; 6996 else 6997 nk = (*stp)->key[pd->didx]; 6998 6999 afto = pd->af != nk->af; 7000 sidx = afto ? pd2.didx : pd2.sidx; 7001 didx = afto ? pd2.sidx : pd2.didx; 7002 iidx = afto ? !iidx : iidx; 7003 7004 if (afto) { 7005 if (nk->af != AF_INET) 7006 return (PF_DROP); 7007 if (pf_translate_icmp_af(pd, nk->af, 7008 &pd->hdr.icmp)) 7009 return (PF_DROP); 7010 m_copyback(pd->m, pd->off, 7011 sizeof(struct icmp6_hdr), 7012 &pd->hdr.icmp6, M_NOWAIT); 7013 if (pf_change_icmp_af(pd->m, ipoff2, 7014 pd, &pd2, &nk->addr[sidx], 7015 &nk->addr[didx], pd->af, nk->af)) 7016 return (PF_DROP); 7017 pd->proto = IPPROTO_ICMP; 7018 if (pf_translate_icmp_af(pd, 7019 nk->af, iih)) 7020 return (PF_DROP); 7021 if (virtual_type == 7022 htons(ICMP6_ECHO_REQUEST)) 7023 pf_patch_16(pd, &iih->icmp6_id, 7024 nk->port[iidx]); 7025 m_copyback(pd2.m, pd2.off, 7026 sizeof(struct icmp6_hdr), iih, 7027 M_NOWAIT); 7028 pd->m->m_pkthdr.ph_rtableid = 7029 nk->rdomain; 7030 pd->destchg = 1; 7031 pf_addrcpy(&pd->nsaddr, 7032 &nk->addr[pd2.sidx], nk->af); 7033 pf_addrcpy(&pd->ndaddr, 7034 &nk->addr[pd2.didx], nk->af); 7035 pd->naf = nk->af; 7036 return (PF_AFRT); 7037 } 7038 7039 if (PF_ANEQ(pd2.src, 7040 &nk->addr[pd2.sidx], pd2.af) || 7041 ((virtual_type == 7042 htons(ICMP6_ECHO_REQUEST)) && 7043 nk->port[pd2.sidx] != iih->icmp6_id)) 7044 pf_translate_icmp(pd, pd2.src, 7045 (virtual_type == 7046 htons(ICMP6_ECHO_REQUEST)) 7047 ? &iih->icmp6_id : NULL, 7048 pd->dst, &nk->addr[pd2.sidx], 7049 (virtual_type == 7050 htons(ICMP6_ECHO_REQUEST)) 7051 ? nk->port[iidx] : 0); 7052 7053 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 7054 pd2.af) || pd2.rdomain != nk->rdomain) 7055 pd->destchg = 1; 7056 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 7057 7058 if (PF_ANEQ(pd2.dst, 7059 &nk->addr[pd2.didx], pd2.af)) 7060 pf_translate_icmp(pd, pd2.dst, NULL, 7061 pd->src, &nk->addr[pd2.didx], 0); 7062 7063 m_copyback(pd->m, pd->off, 7064 sizeof(struct icmp6_hdr), &pd->hdr.icmp6, 7065 M_NOWAIT); 7066 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6, 7067 M_NOWAIT); 7068 m_copyback(pd2.m, pd2.off, 7069 sizeof(struct icmp6_hdr), iih, M_NOWAIT); 7070 copyback = 1; 7071 } 7072 break; 7073 } 7074#endif /* INET6 */ 7075 default: { 7076 int action; 7077 7078 key.af = pd2.af; 7079 key.proto = pd2.proto; 7080 key.rdomain = pd2.rdomain; 7081 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af); 7082 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af); 7083 key.port[0] = key.port[1] = 0; 7084 key.hash = pf_pkt_hash(pd2.af, pd2.proto, 7085 pd2.src, pd2.dst, 0, 0); 7086 7087 action = pf_find_state(&pd2, &key, stp); 7088 if (action != PF_MATCH) 7089 return (action); 7090 7091 /* translate source/destination address, if necessary */ 7092 if ((*stp)->key[PF_SK_WIRE] != 7093 (*stp)->key[PF_SK_STACK]) { 7094 struct pf_state_key *nk = 7095 (*stp)->key[pd->didx]; 7096 7097 if (PF_ANEQ(pd2.src, 7098 &nk->addr[pd2.sidx], pd2.af)) 7099 pf_translate_icmp(pd, pd2.src, NULL, 7100 pd->dst, &nk->addr[pd2.sidx], 0); 7101 7102 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx], 7103 pd2.af) || pd2.rdomain != nk->rdomain) 7104 pd->destchg = 1; 7105 pd->m->m_pkthdr.ph_rtableid = nk->rdomain; 7106 7107 if (PF_ANEQ(pd2.dst, 7108 &nk->addr[pd2.didx], pd2.af)) 7109 pf_translate_icmp(pd, pd2.dst, NULL, 7110 pd->src, &nk->addr[pd2.didx], 0); 7111 7112 switch (pd2.af) { 7113 case AF_INET: 7114 m_copyback(pd->m, pd->off, ICMP_MINLEN, 7115 &pd->hdr.icmp, M_NOWAIT); 7116 m_copyback(pd2.m, ipoff2, sizeof(h2), 7117 &h2, M_NOWAIT); 7118 break; 7119#ifdef INET6 7120 case AF_INET6: 7121 m_copyback(pd->m, pd->off, 7122 sizeof(struct icmp6_hdr), 7123 &pd->hdr.icmp6, M_NOWAIT); 7124 m_copyback(pd2.m, ipoff2, sizeof(h2_6), 7125 &h2_6, M_NOWAIT); 7126 break; 7127#endif /* INET6 */ 7128 } 7129 copyback = 1; 7130 } 7131 break; 7132 } 7133 } 7134 } 7135 if (copyback) { 7136 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT); 7137 } 7138 7139 return (PF_PASS); 7140} 7141 7142/* 7143 * ipoff and off are measured from the start of the mbuf chain. 7144 * h must be at "ipoff" on the mbuf chain. 7145 */ 7146void * 7147pf_pull_hdr(struct mbuf *m, int off, void *p, int len, 7148 u_short *reasonp, sa_family_t af) 7149{ 7150 int iplen = 0; 7151 7152 switch (af) { 7153 case AF_INET: { 7154 struct ip *h = mtod(m, struct ip *); 7155 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3; 7156 7157 if (fragoff) { 7158 REASON_SET(reasonp, PFRES_FRAG); 7159 return (NULL); 7160 } 7161 iplen = ntohs(h->ip_len); 7162 break; 7163 } 7164#ifdef INET6 7165 case AF_INET6: { 7166 struct ip6_hdr *h = mtod(m, struct ip6_hdr *); 7167 7168 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 7169 break; 7170 } 7171#endif /* INET6 */ 7172 } 7173 if (m->m_pkthdr.len < off + len || iplen < off + len) { 7174 REASON_SET(reasonp, PFRES_SHORT); 7175 return (NULL); 7176 } 7177 m_copydata(m, off, len, p); 7178 return (p); 7179} 7180 7181int 7182pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif, 7183 int rtableid) 7184{ 7185 struct sockaddr_storage ss; 7186 struct sockaddr_in *dst; 7187 int ret = 1; 7188 int check_mpath; 7189#ifdef INET6 7190 struct sockaddr_in6 *dst6; 7191#endif /* INET6 */ 7192 struct rtentry *rt = NULL; 7193 7194 check_mpath = 0; 7195 memset(&ss, 0, sizeof(ss)); 7196 switch (af) { 7197 case AF_INET: 7198 dst = (struct sockaddr_in *)&ss; 7199 dst->sin_family = AF_INET; 7200 dst->sin_len = sizeof(*dst); 7201 dst->sin_addr = addr->v4; 7202 if (atomic_load_int(&ipmultipath)) 7203 check_mpath = 1; 7204 break; 7205#ifdef INET6 7206 case AF_INET6: 7207 /* 7208 * Skip check for addresses with embedded interface scope, 7209 * as they would always match anyway. 7210 */ 7211 if (IN6_IS_SCOPE_EMBED(&addr->v6)) 7212 goto out; 7213 dst6 = (struct sockaddr_in6 *)&ss; 7214 dst6->sin6_family = AF_INET6; 7215 dst6->sin6_len = sizeof(*dst6); 7216 dst6->sin6_addr = addr->v6; 7217 if (atomic_load_int(&ip6_multipath)) 7218 check_mpath = 1; 7219 break; 7220#endif /* INET6 */ 7221 } 7222 7223 /* Skip checks for ipsec interfaces */ 7224 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC) 7225 goto out; 7226 7227 rt = rtalloc(sstosa(&ss), 0, rtableid); 7228 if (rt != NULL) { 7229 /* No interface given, this is a no-route check */ 7230 if (kif == NULL) 7231 goto out; 7232 7233 if (kif->pfik_ifp == NULL) { 7234 ret = 0; 7235 goto out; 7236 } 7237 7238 /* Perform uRPF check if passed input interface */ 7239 ret = 0; 7240 do { 7241 if (rt->rt_ifidx == kif->pfik_ifp->if_index) { 7242 ret = 1; 7243#if NCARP > 0 7244 } else { 7245 struct ifnet *ifp; 7246 7247 smr_read_enter(); 7248 ifp = if_get_smr(rt->rt_ifidx); 7249 if (ifp != NULL && ifp->if_type == IFT_CARP && 7250 ifp->if_carpdevidx == 7251 kif->pfik_ifp->if_index) 7252 ret = 1; 7253 smr_read_leave(); 7254#endif /* NCARP */ 7255 } 7256 7257 rt = rtable_iterate(rt); 7258 } while (check_mpath == 1 && rt != NULL && ret == 0); 7259 } else 7260 ret = 0; 7261out: 7262 rtfree(rt); 7263 return (ret); 7264} 7265 7266int 7267pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw, 7268 int rtableid) 7269{ 7270 struct sockaddr_storage ss; 7271 struct sockaddr_in *dst; 7272#ifdef INET6 7273 struct sockaddr_in6 *dst6; 7274#endif /* INET6 */ 7275 struct rtentry *rt; 7276 int ret = 0; 7277 7278 memset(&ss, 0, sizeof(ss)); 7279 switch (af) { 7280 case AF_INET: 7281 dst = (struct sockaddr_in *)&ss; 7282 dst->sin_family = AF_INET; 7283 dst->sin_len = sizeof(*dst); 7284 dst->sin_addr = addr->v4; 7285 break; 7286#ifdef INET6 7287 case AF_INET6: 7288 dst6 = (struct sockaddr_in6 *)&ss; 7289 dst6->sin6_family = AF_INET6; 7290 dst6->sin6_len = sizeof(*dst6); 7291 dst6->sin6_addr = addr->v6; 7292 break; 7293#endif /* INET6 */ 7294 } 7295 7296 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid); 7297 if (rt != NULL) { 7298 if (rt->rt_labelid == aw->v.rtlabel) 7299 ret = 1; 7300 rtfree(rt); 7301 } 7302 7303 return (ret); 7304} 7305 7306/* pf_route() may change pd->m, adjust local copies after calling */ 7307void 7308pf_route(struct pf_pdesc *pd, struct pf_state *st) 7309{ 7310 struct mbuf *m0; 7311 struct mbuf_list ml; 7312 struct sockaddr_in *dst, sin; 7313 struct rtentry *rt = NULL; 7314 struct ip *ip; 7315 struct ifnet *ifp = NULL; 7316 unsigned int rtableid; 7317 7318 if (pd->m->m_pkthdr.pf.routed++ > 3) { 7319 m_freem(pd->m); 7320 pd->m = NULL; 7321 return; 7322 } 7323 7324 if (st->rt == PF_DUPTO) { 7325 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 7326 return; 7327 } else { 7328 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 7329 return; 7330 m0 = pd->m; 7331 pd->m = NULL; 7332 } 7333 7334 if (m0->m_len < sizeof(struct ip)) { 7335 DPFPRINTF(LOG_ERR, 7336 "%s: m0->m_len < sizeof(struct ip)", __func__); 7337 goto bad; 7338 } 7339 7340 ip = mtod(m0, struct ip *); 7341 7342 if (pd->dir == PF_IN) { 7343 if (ip->ip_ttl <= IPTTLDEC) { 7344 if (st->rt != PF_DUPTO) { 7345 pf_send_icmp(m0, ICMP_TIMXCEED, 7346 ICMP_TIMXCEED_INTRANS, 0, 7347 pd->af, st->rule.ptr, pd->rdomain); 7348 } 7349 goto bad; 7350 } 7351 ip->ip_ttl -= IPTTLDEC; 7352 } 7353 7354 memset(&sin, 0, sizeof(sin)); 7355 dst = &sin; 7356 dst->sin_family = AF_INET; 7357 dst->sin_len = sizeof(*dst); 7358 dst->sin_addr = st->rt_addr.v4; 7359 rtableid = m0->m_pkthdr.ph_rtableid; 7360 7361 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid); 7362 if (!rtisvalid(rt)) { 7363 if (st->rt != PF_DUPTO) { 7364 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST, 7365 0, pd->af, st->rule.ptr, pd->rdomain); 7366 } 7367 ipstat_inc(ips_noroute); 7368 goto bad; 7369 } 7370 7371 ifp = if_get(rt->rt_ifidx); 7372 if (ifp == NULL) 7373 goto bad; 7374 7375 /* A locally generated packet may have invalid source address. */ 7376 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET && 7377 (ifp->if_flags & IFF_LOOPBACK) == 0) 7378 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr; 7379 7380 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 7381 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS) 7382 goto bad; 7383 else if (m0 == NULL) 7384 goto done; 7385 if (m0->m_len < sizeof(struct ip)) { 7386 DPFPRINTF(LOG_ERR, 7387 "%s: m0->m_len < sizeof(struct ip)", __func__); 7388 goto bad; 7389 } 7390 ip = mtod(m0, struct ip *); 7391 } 7392 7393 if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) || 7394 m0 == NULL) 7395 goto done; 7396 7397 /* 7398 * Too large for interface; fragment if possible. 7399 * Must be able to put at least 8 bytes per fragment. 7400 */ 7401 if (ip->ip_off & htons(IP_DF)) { 7402 ipstat_inc(ips_cantfrag); 7403 if (st->rt != PF_DUPTO) 7404 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG, 7405 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 7406 goto bad; 7407 } 7408 7409 if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) || 7410 if_output_ml(ifp, &ml, sintosa(dst), rt)) 7411 goto done; 7412 ipstat_inc(ips_fragmented); 7413 7414done: 7415 if_put(ifp); 7416 rtfree(rt); 7417 return; 7418 7419bad: 7420 m_freem(m0); 7421 goto done; 7422} 7423 7424#ifdef INET6 7425/* pf_route6() may change pd->m, adjust local copies after calling */ 7426void 7427pf_route6(struct pf_pdesc *pd, struct pf_state *st) 7428{ 7429 struct mbuf *m0; 7430 struct sockaddr_in6 *dst, sin6; 7431 struct rtentry *rt = NULL; 7432 struct ip6_hdr *ip6; 7433 struct ifnet *ifp = NULL; 7434 struct m_tag *mtag; 7435 unsigned int rtableid; 7436 7437 if (pd->m->m_pkthdr.pf.routed++ > 3) { 7438 m_freem(pd->m); 7439 pd->m = NULL; 7440 return; 7441 } 7442 7443 if (st->rt == PF_DUPTO) { 7444 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL) 7445 return; 7446 } else { 7447 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir)) 7448 return; 7449 m0 = pd->m; 7450 pd->m = NULL; 7451 } 7452 7453 if (m0->m_len < sizeof(struct ip6_hdr)) { 7454 DPFPRINTF(LOG_ERR, 7455 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 7456 goto bad; 7457 } 7458 ip6 = mtod(m0, struct ip6_hdr *); 7459 7460 if (pd->dir == PF_IN) { 7461 if (ip6->ip6_hlim <= IPV6_HLIMDEC) { 7462 if (st->rt != PF_DUPTO) { 7463 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED, 7464 ICMP6_TIME_EXCEED_TRANSIT, 0, 7465 pd->af, st->rule.ptr, pd->rdomain); 7466 } 7467 goto bad; 7468 } 7469 ip6->ip6_hlim -= IPV6_HLIMDEC; 7470 } 7471 7472 memset(&sin6, 0, sizeof(sin6)); 7473 dst = &sin6; 7474 dst->sin6_family = AF_INET6; 7475 dst->sin6_len = sizeof(*dst); 7476 dst->sin6_addr = st->rt_addr.v6; 7477 rtableid = m0->m_pkthdr.ph_rtableid; 7478 7479 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0], 7480 rtableid); 7481 if (!rtisvalid(rt)) { 7482 if (st->rt != PF_DUPTO) { 7483 pf_send_icmp(m0, ICMP6_DST_UNREACH, 7484 ICMP6_DST_UNREACH_NOROUTE, 0, 7485 pd->af, st->rule.ptr, pd->rdomain); 7486 } 7487 ip6stat_inc(ip6s_noroute); 7488 goto bad; 7489 } 7490 7491 ifp = if_get(rt->rt_ifidx); 7492 if (ifp == NULL) 7493 goto bad; 7494 7495 /* A locally generated packet may have invalid source address. */ 7496 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) && 7497 (ifp->if_flags & IFF_LOOPBACK) == 0) 7498 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr; 7499 7500 if (st->rt != PF_DUPTO && pd->dir == PF_IN) { 7501 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS) 7502 goto bad; 7503 else if (m0 == NULL) 7504 goto done; 7505 if (m0->m_len < sizeof(struct ip6_hdr)) { 7506 DPFPRINTF(LOG_ERR, 7507 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__); 7508 goto bad; 7509 } 7510 } 7511 7512 /* 7513 * If packet has been reassembled by PF earlier, we have to 7514 * use pf_refragment6() here to turn it back to fragments. 7515 */ 7516 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) { 7517 (void) pf_refragment6(&m0, mtag, dst, ifp, rt); 7518 goto done; 7519 } 7520 7521 if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) || 7522 m0 == NULL) 7523 goto done; 7524 7525 ip6stat_inc(ip6s_cantfrag); 7526 if (st->rt != PF_DUPTO) 7527 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0, 7528 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain); 7529 goto bad; 7530 7531done: 7532 if_put(ifp); 7533 rtfree(rt); 7534 return; 7535 7536bad: 7537 m_freem(m0); 7538 goto done; 7539} 7540#endif /* INET6 */ 7541 7542/* 7543 * check TCP checksum and set mbuf flag 7544 * off is the offset where the protocol header starts 7545 * len is the total length of protocol header plus payload 7546 * returns 0 when the checksum is valid, otherwise returns 1. 7547 * if the _OUT flag is set the checksum isn't done yet, consider these ok 7548 */ 7549int 7550pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af) 7551{ 7552 u_int16_t sum; 7553 7554 if (m->m_pkthdr.csum_flags & 7555 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) { 7556 return (0); 7557 } 7558 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD || 7559 off < sizeof(struct ip) || 7560 m->m_pkthdr.len < off + len) { 7561 return (1); 7562 } 7563 7564 /* need to do it in software */ 7565 tcpstat_inc(tcps_inswcsum); 7566 7567 switch (af) { 7568 case AF_INET: 7569 if (m->m_len < sizeof(struct ip)) 7570 return (1); 7571 7572 sum = in4_cksum(m, IPPROTO_TCP, off, len); 7573 break; 7574#ifdef INET6 7575 case AF_INET6: 7576 if (m->m_len < sizeof(struct ip6_hdr)) 7577 return (1); 7578 7579 sum = in6_cksum(m, IPPROTO_TCP, off, len); 7580 break; 7581#endif /* INET6 */ 7582 default: 7583 unhandled_af(af); 7584 } 7585 if (sum) { 7586 tcpstat_inc(tcps_rcvbadsum); 7587 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD; 7588 return (1); 7589 } 7590 7591 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK; 7592 return (0); 7593} 7594 7595struct pf_divert * 7596pf_find_divert(struct mbuf *m) 7597{ 7598 struct m_tag *mtag; 7599 7600 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) 7601 return (NULL); 7602 7603 return ((struct pf_divert *)(mtag + 1)); 7604} 7605 7606struct pf_divert * 7607pf_get_divert(struct mbuf *m) 7608{ 7609 struct m_tag *mtag; 7610 7611 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) { 7612 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert), 7613 M_NOWAIT); 7614 if (mtag == NULL) 7615 return (NULL); 7616 memset(mtag + 1, 0, sizeof(struct pf_divert)); 7617 m_tag_prepend(m, mtag); 7618 } 7619 7620 return ((struct pf_divert *)(mtag + 1)); 7621} 7622 7623int 7624pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end, 7625 u_short *reason) 7626{ 7627 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)]; 7628 7629 /* IP header in payload of ICMP packet may be too short */ 7630 if (pd->m->m_pkthdr.len < end) { 7631 DPFPRINTF(LOG_NOTICE, "IP option too short"); 7632 REASON_SET(reason, PFRES_SHORT); 7633 return (PF_DROP); 7634 } 7635 7636 KASSERT(end - off <= sizeof(opts)); 7637 m_copydata(pd->m, off, end - off, opts); 7638 end -= off; 7639 off = 0; 7640 7641 while (off < end) { 7642 type = opts[off]; 7643 if (type == IPOPT_EOL) 7644 break; 7645 if (type == IPOPT_NOP) { 7646 off++; 7647 continue; 7648 } 7649 if (off + 2 > end) { 7650 DPFPRINTF(LOG_NOTICE, "IP length opt"); 7651 REASON_SET(reason, PFRES_IPOPTIONS); 7652 return (PF_DROP); 7653 } 7654 length = opts[off + 1]; 7655 if (length < 2) { 7656 DPFPRINTF(LOG_NOTICE, "IP short opt"); 7657 REASON_SET(reason, PFRES_IPOPTIONS); 7658 return (PF_DROP); 7659 } 7660 if (off + length > end) { 7661 DPFPRINTF(LOG_NOTICE, "IP long opt"); 7662 REASON_SET(reason, PFRES_IPOPTIONS); 7663 return (PF_DROP); 7664 } 7665 switch (type) { 7666 case IPOPT_RA: 7667 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 7668 break; 7669 default: 7670 SET(pd->badopts, PF_OPT_OTHER); 7671 break; 7672 } 7673 off += length; 7674 } 7675 7676 return (PF_PASS); 7677} 7678 7679int 7680pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason) 7681{ 7682 struct ip6_ext ext; 7683 u_int32_t hlen, end; 7684 int hdr_cnt; 7685 7686 hlen = h->ip_hl << 2; 7687 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) { 7688 REASON_SET(reason, PFRES_SHORT); 7689 return (PF_DROP); 7690 } 7691 if (hlen != sizeof(struct ip)) { 7692 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip), 7693 pd->off + hlen, reason) != PF_PASS) 7694 return (PF_DROP); 7695 /* header options which contain only padding is fishy */ 7696 if (pd->badopts == 0) 7697 SET(pd->badopts, PF_OPT_OTHER); 7698 } 7699 end = pd->off + ntohs(h->ip_len); 7700 pd->off += hlen; 7701 pd->proto = h->ip_p; 7702 /* IGMP packets have router alert options, allow them */ 7703 if (pd->proto == IPPROTO_IGMP) { 7704 /* 7705 * According to RFC 1112 ttl must be set to 1 in all IGMP 7706 * packets sent to 224.0.0.1 7707 */ 7708 if ((h->ip_ttl != 1) && 7709 (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) { 7710 DPFPRINTF(LOG_NOTICE, "Invalid IGMP"); 7711 REASON_SET(reason, PFRES_IPOPTIONS); 7712 return (PF_DROP); 7713 } 7714 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 7715 } 7716 /* stop walking over non initial fragments */ 7717 if ((h->ip_off & htons(IP_OFFMASK)) != 0) 7718 return (PF_PASS); 7719 7720 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 7721 switch (pd->proto) { 7722 case IPPROTO_AH: 7723 /* fragments may be short */ 7724 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 && 7725 end < pd->off + sizeof(ext)) 7726 return (PF_PASS); 7727 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7728 reason, AF_INET)) { 7729 DPFPRINTF(LOG_NOTICE, "IP short exthdr"); 7730 return (PF_DROP); 7731 } 7732 pd->off += (ext.ip6e_len + 2) * 4; 7733 pd->proto = ext.ip6e_nxt; 7734 break; 7735 default: 7736 return (PF_PASS); 7737 } 7738 } 7739 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit"); 7740 REASON_SET(reason, PFRES_IPOPTIONS); 7741 return (PF_DROP); 7742} 7743 7744#ifdef INET6 7745int 7746pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end, 7747 u_short *reason) 7748{ 7749 struct ip6_opt opt; 7750 struct ip6_opt_jumbo jumbo; 7751 7752 while (off < end) { 7753 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type, 7754 sizeof(opt.ip6o_type), reason, AF_INET6)) { 7755 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type"); 7756 return (PF_DROP); 7757 } 7758 if (opt.ip6o_type == IP6OPT_PAD1) { 7759 off++; 7760 continue; 7761 } 7762 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt), 7763 reason, AF_INET6)) { 7764 DPFPRINTF(LOG_NOTICE, "IPv6 short opt"); 7765 return (PF_DROP); 7766 } 7767 if (off + sizeof(opt) + opt.ip6o_len > end) { 7768 DPFPRINTF(LOG_NOTICE, "IPv6 long opt"); 7769 REASON_SET(reason, PFRES_IPOPTIONS); 7770 return (PF_DROP); 7771 } 7772 switch (opt.ip6o_type) { 7773 case IP6OPT_PADN: 7774 break; 7775 case IP6OPT_JUMBO: 7776 SET(pd->badopts, PF_OPT_JUMBO); 7777 if (pd->jumbolen != 0) { 7778 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo"); 7779 REASON_SET(reason, PFRES_IPOPTIONS); 7780 return (PF_DROP); 7781 } 7782 if (ntohs(h->ip6_plen) != 0) { 7783 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen"); 7784 REASON_SET(reason, PFRES_IPOPTIONS); 7785 return (PF_DROP); 7786 } 7787 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo), 7788 reason, AF_INET6)) { 7789 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo"); 7790 return (PF_DROP); 7791 } 7792 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len, 7793 sizeof(pd->jumbolen)); 7794 pd->jumbolen = ntohl(pd->jumbolen); 7795 if (pd->jumbolen < IPV6_MAXPACKET) { 7796 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen"); 7797 REASON_SET(reason, PFRES_IPOPTIONS); 7798 return (PF_DROP); 7799 } 7800 break; 7801 case IP6OPT_ROUTER_ALERT: 7802 SET(pd->badopts, PF_OPT_ROUTER_ALERT); 7803 break; 7804 default: 7805 SET(pd->badopts, PF_OPT_OTHER); 7806 break; 7807 } 7808 off += sizeof(opt) + opt.ip6o_len; 7809 } 7810 7811 return (PF_PASS); 7812} 7813 7814int 7815pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason) 7816{ 7817 struct ip6_frag frag; 7818 struct ip6_ext ext; 7819 struct icmp6_hdr icmp6; 7820 struct ip6_rthdr rthdr; 7821 u_int32_t end; 7822 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0; 7823 7824 pd->off += sizeof(struct ip6_hdr); 7825 end = pd->off + ntohs(h->ip6_plen); 7826 pd->fragoff = pd->extoff = pd->jumbolen = 0; 7827 pd->proto = h->ip6_nxt; 7828 7829 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) { 7830 switch (pd->proto) { 7831 case IPPROTO_ROUTING: 7832 case IPPROTO_DSTOPTS: 7833 SET(pd->badopts, PF_OPT_OTHER); 7834 break; 7835 case IPPROTO_HOPOPTS: 7836 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7837 reason, AF_INET6)) { 7838 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7839 return (PF_DROP); 7840 } 7841 if (pf_walk_option6(pd, h, pd->off + sizeof(ext), 7842 pd->off + (ext.ip6e_len + 1) * 8, reason) 7843 != PF_PASS) 7844 return (PF_DROP); 7845 /* option header which contains only padding is fishy */ 7846 if (pd->badopts == 0) 7847 SET(pd->badopts, PF_OPT_OTHER); 7848 break; 7849 } 7850 switch (pd->proto) { 7851 case IPPROTO_FRAGMENT: 7852 if (fraghdr_cnt++) { 7853 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment"); 7854 REASON_SET(reason, PFRES_FRAG); 7855 return (PF_DROP); 7856 } 7857 /* jumbo payload packets cannot be fragmented */ 7858 if (pd->jumbolen != 0) { 7859 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo"); 7860 REASON_SET(reason, PFRES_FRAG); 7861 return (PF_DROP); 7862 } 7863 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag), 7864 reason, AF_INET6)) { 7865 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment"); 7866 return (PF_DROP); 7867 } 7868 /* stop walking over non initial fragments */ 7869 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) { 7870 pd->fragoff = pd->off; 7871 return (PF_PASS); 7872 } 7873 /* RFC6946: reassemble only non atomic fragments */ 7874 if (frag.ip6f_offlg & IP6F_MORE_FRAG) 7875 pd->fragoff = pd->off; 7876 pd->off += sizeof(frag); 7877 pd->proto = frag.ip6f_nxt; 7878 break; 7879 case IPPROTO_ROUTING: 7880 if (rthdr_cnt++) { 7881 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr"); 7882 REASON_SET(reason, PFRES_IPOPTIONS); 7883 return (PF_DROP); 7884 } 7885 /* fragments may be short */ 7886 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) { 7887 pd->off = pd->fragoff; 7888 pd->proto = IPPROTO_FRAGMENT; 7889 return (PF_PASS); 7890 } 7891 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr), 7892 reason, AF_INET6)) { 7893 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr"); 7894 return (PF_DROP); 7895 } 7896 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) { 7897 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0"); 7898 REASON_SET(reason, PFRES_IPOPTIONS); 7899 return (PF_DROP); 7900 } 7901 /* FALLTHROUGH */ 7902 case IPPROTO_HOPOPTS: 7903 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */ 7904 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) { 7905 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first"); 7906 REASON_SET(reason, PFRES_IPOPTIONS); 7907 return (PF_DROP); 7908 } 7909 /* FALLTHROUGH */ 7910 case IPPROTO_AH: 7911 case IPPROTO_DSTOPTS: 7912 /* fragments may be short */ 7913 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) { 7914 pd->off = pd->fragoff; 7915 pd->proto = IPPROTO_FRAGMENT; 7916 return (PF_PASS); 7917 } 7918 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext), 7919 reason, AF_INET6)) { 7920 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr"); 7921 return (PF_DROP); 7922 } 7923 /* reassembly needs the ext header before the frag */ 7924 if (pd->fragoff == 0) 7925 pd->extoff = pd->off; 7926 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 && 7927 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) { 7928 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo"); 7929 REASON_SET(reason, PFRES_IPOPTIONS); 7930 return (PF_DROP); 7931 } 7932 if (pd->proto == IPPROTO_AH) 7933 pd->off += (ext.ip6e_len + 2) * 4; 7934 else 7935 pd->off += (ext.ip6e_len + 1) * 8; 7936 pd->proto = ext.ip6e_nxt; 7937 break; 7938 case IPPROTO_ICMPV6: 7939 /* fragments may be short, ignore inner header then */ 7940 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) { 7941 pd->off = pd->fragoff; 7942 pd->proto = IPPROTO_FRAGMENT; 7943 return (PF_PASS); 7944 } 7945 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6), 7946 reason, AF_INET6)) { 7947 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr"); 7948 return (PF_DROP); 7949 } 7950 /* ICMP multicast packets have router alert options */ 7951 switch (icmp6.icmp6_type) { 7952 case MLD_LISTENER_QUERY: 7953 case MLD_LISTENER_REPORT: 7954 case MLD_LISTENER_DONE: 7955 case MLDV2_LISTENER_REPORT: 7956 /* 7957 * According to RFC 2710 all MLD messages are 7958 * sent with hop-limit (ttl) set to 1, and link 7959 * local source address. If either one is 7960 * missing then MLD message is invalid and 7961 * should be discarded. 7962 */ 7963 if ((h->ip6_hlim != 1) || 7964 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) { 7965 DPFPRINTF(LOG_NOTICE, "Invalid MLD"); 7966 REASON_SET(reason, PFRES_IPOPTIONS); 7967 return (PF_DROP); 7968 } 7969 CLR(pd->badopts, PF_OPT_ROUTER_ALERT); 7970 break; 7971 } 7972 return (PF_PASS); 7973 case IPPROTO_TCP: 7974 case IPPROTO_UDP: 7975 /* fragments may be short, ignore inner header then */ 7976 if (pd->fragoff != 0 && end < pd->off + 7977 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) : 7978 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) : 7979 sizeof(struct icmp6_hdr))) { 7980 pd->off = pd->fragoff; 7981 pd->proto = IPPROTO_FRAGMENT; 7982 } 7983 /* FALLTHROUGH */ 7984 default: 7985 return (PF_PASS); 7986 } 7987 } 7988 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit"); 7989 REASON_SET(reason, PFRES_IPOPTIONS); 7990 return (PF_DROP); 7991} 7992#endif /* INET6 */ 7993 7994u_int16_t 7995pf_pkt_hash(sa_family_t af, uint8_t proto, 7996 const struct pf_addr *src, const struct pf_addr *dst, 7997 uint16_t sport, uint16_t dport) 7998{ 7999 uint32_t hash; 8000 8001 hash = src->addr32[0] ^ dst->addr32[0]; 8002#ifdef INET6 8003 if (af == AF_INET6) { 8004 hash ^= src->addr32[1] ^ dst->addr32[1]; 8005 hash ^= src->addr32[2] ^ dst->addr32[2]; 8006 hash ^= src->addr32[3] ^ dst->addr32[3]; 8007 } 8008#endif 8009 8010 switch (proto) { 8011 case IPPROTO_TCP: 8012 case IPPROTO_UDP: 8013 hash ^= sport ^ dport; 8014 break; 8015 } 8016 8017 return stoeplitz_n32(hash); 8018} 8019 8020int 8021pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir, 8022 struct pfi_kif *kif, struct mbuf *m, u_short *reason) 8023{ 8024 memset(pd, 0, sizeof(*pd)); 8025 pd->dir = dir; 8026 pd->kif = kif; /* kif is NULL when called by pflog */ 8027 pd->m = m; 8028 pd->sidx = (dir == PF_IN) ? 0 : 1; 8029 pd->didx = (dir == PF_IN) ? 1 : 0; 8030 pd->af = pd->naf = af; 8031 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid); 8032 8033 switch (pd->af) { 8034 case AF_INET: { 8035 struct ip *h; 8036 8037 /* Check for illegal packets */ 8038 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) { 8039 REASON_SET(reason, PFRES_SHORT); 8040 return (PF_DROP); 8041 } 8042 8043 h = mtod(pd->m, struct ip *); 8044 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) { 8045 REASON_SET(reason, PFRES_SHORT); 8046 return (PF_DROP); 8047 } 8048 8049 if (pf_walk_header(pd, h, reason) != PF_PASS) 8050 return (PF_DROP); 8051 8052 pd->src = (struct pf_addr *)&h->ip_src; 8053 pd->dst = (struct pf_addr *)&h->ip_dst; 8054 pd->tot_len = ntohs(h->ip_len); 8055 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK; 8056 pd->ttl = h->ip_ttl; 8057 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ? 8058 PF_VPROTO_FRAGMENT : pd->proto; 8059 8060 break; 8061 } 8062#ifdef INET6 8063 case AF_INET6: { 8064 struct ip6_hdr *h; 8065 8066 /* Check for illegal packets */ 8067 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) { 8068 REASON_SET(reason, PFRES_SHORT); 8069 return (PF_DROP); 8070 } 8071 8072 h = mtod(pd->m, struct ip6_hdr *); 8073 if (pd->m->m_pkthdr.len < 8074 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) { 8075 REASON_SET(reason, PFRES_SHORT); 8076 return (PF_DROP); 8077 } 8078 8079 if (pf_walk_header6(pd, h, reason) != PF_PASS) 8080 return (PF_DROP); 8081 8082#if 1 8083 /* 8084 * we do not support jumbogram yet. if we keep going, zero 8085 * ip6_plen will do something bad, so drop the packet for now. 8086 */ 8087 if (pd->jumbolen != 0) { 8088 REASON_SET(reason, PFRES_NORM); 8089 return (PF_DROP); 8090 } 8091#endif /* 1 */ 8092 8093 pd->src = (struct pf_addr *)&h->ip6_src; 8094 pd->dst = (struct pf_addr *)&h->ip6_dst; 8095 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr); 8096 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20; 8097 pd->ttl = h->ip6_hlim; 8098 pd->virtual_proto = (pd->fragoff != 0) ? 8099 PF_VPROTO_FRAGMENT : pd->proto; 8100 8101 break; 8102 } 8103#endif /* INET6 */ 8104 default: 8105 panic("pf_setup_pdesc called with illegal af %u", pd->af); 8106 8107 } 8108 8109 pf_addrcpy(&pd->nsaddr, pd->src, pd->af); 8110 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af); 8111 8112 switch (pd->virtual_proto) { 8113 case IPPROTO_TCP: { 8114 struct tcphdr *th = &pd->hdr.tcp; 8115 8116 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th), 8117 reason, pd->af)) 8118 return (PF_DROP); 8119 pd->hdrlen = sizeof(*th); 8120 if (th->th_dport == 0 || 8121 pd->off + (th->th_off << 2) > pd->tot_len || 8122 (th->th_off << 2) < sizeof(struct tcphdr)) { 8123 REASON_SET(reason, PFRES_SHORT); 8124 return (PF_DROP); 8125 } 8126 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2); 8127 pd->sport = &th->th_sport; 8128 pd->dport = &th->th_dport; 8129 pd->pcksum = &th->th_sum; 8130 break; 8131 } 8132 case IPPROTO_UDP: { 8133 struct udphdr *uh = &pd->hdr.udp; 8134 8135 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh), 8136 reason, pd->af)) 8137 return (PF_DROP); 8138 pd->hdrlen = sizeof(*uh); 8139 if (uh->uh_dport == 0 || 8140 pd->off + ntohs(uh->uh_ulen) > pd->tot_len || 8141 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) { 8142 REASON_SET(reason, PFRES_SHORT); 8143 return (PF_DROP); 8144 } 8145 pd->sport = &uh->uh_sport; 8146 pd->dport = &uh->uh_dport; 8147 pd->pcksum = &uh->uh_sum; 8148 break; 8149 } 8150 case IPPROTO_ICMP: { 8151 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN, 8152 reason, pd->af)) 8153 return (PF_DROP); 8154 pd->hdrlen = ICMP_MINLEN; 8155 if (pd->off + pd->hdrlen > pd->tot_len) { 8156 REASON_SET(reason, PFRES_SHORT); 8157 return (PF_DROP); 8158 } 8159 pd->pcksum = &pd->hdr.icmp.icmp_cksum; 8160 break; 8161 } 8162#ifdef INET6 8163 case IPPROTO_ICMPV6: { 8164 size_t icmp_hlen = sizeof(struct icmp6_hdr); 8165 8166 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 8167 reason, pd->af)) 8168 return (PF_DROP); 8169 /* ICMP headers we look further into to match state */ 8170 switch (pd->hdr.icmp6.icmp6_type) { 8171 case MLD_LISTENER_QUERY: 8172 case MLD_LISTENER_REPORT: 8173 icmp_hlen = sizeof(struct mld_hdr); 8174 break; 8175 case ND_NEIGHBOR_SOLICIT: 8176 case ND_NEIGHBOR_ADVERT: 8177 icmp_hlen = sizeof(struct nd_neighbor_solicit); 8178 /* FALLTHROUGH */ 8179 case ND_ROUTER_SOLICIT: 8180 case ND_ROUTER_ADVERT: 8181 case ND_REDIRECT: 8182 if (pd->ttl != 255) { 8183 REASON_SET(reason, PFRES_NORM); 8184 return (PF_DROP); 8185 } 8186 break; 8187 } 8188 if (icmp_hlen > sizeof(struct icmp6_hdr) && 8189 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen, 8190 reason, pd->af)) 8191 return (PF_DROP); 8192 pd->hdrlen = icmp_hlen; 8193 if (pd->off + pd->hdrlen > pd->tot_len) { 8194 REASON_SET(reason, PFRES_SHORT); 8195 return (PF_DROP); 8196 } 8197 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum; 8198 break; 8199 } 8200#endif /* INET6 */ 8201 } 8202 8203 if (pd->sport) 8204 pd->osport = pd->nsport = *pd->sport; 8205 if (pd->dport) 8206 pd->odport = pd->ndport = *pd->dport; 8207 8208 pd->hash = pf_pkt_hash(pd->af, pd->proto, 8209 pd->src, pd->dst, pd->osport, pd->odport); 8210 8211 return (PF_PASS); 8212} 8213 8214void 8215pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st, 8216 struct pf_rule *r, struct pf_rule *a) 8217{ 8218 int dirndx; 8219 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT] 8220 [action != PF_PASS] += pd->tot_len; 8221 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT] 8222 [action != PF_PASS]++; 8223 8224 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) { 8225 dirndx = (pd->dir == PF_OUT); 8226 r->packets[dirndx]++; 8227 r->bytes[dirndx] += pd->tot_len; 8228 if (a != NULL) { 8229 a->packets[dirndx]++; 8230 a->bytes[dirndx] += pd->tot_len; 8231 } 8232 if (st != NULL) { 8233 struct pf_rule_item *ri; 8234 struct pf_sn_item *sni; 8235 8236 SLIST_FOREACH(sni, &st->src_nodes, next) { 8237 sni->sn->packets[dirndx]++; 8238 sni->sn->bytes[dirndx] += pd->tot_len; 8239 } 8240 dirndx = (pd->dir == st->direction) ? 0 : 1; 8241 st->packets[dirndx]++; 8242 st->bytes[dirndx] += pd->tot_len; 8243 8244 SLIST_FOREACH(ri, &st->match_rules, entry) { 8245 ri->r->packets[dirndx]++; 8246 ri->r->bytes[dirndx] += pd->tot_len; 8247 8248 if (ri->r->src.addr.type == PF_ADDR_TABLE) 8249 pfr_update_stats(ri->r->src.addr.p.tbl, 8250 &st->key[(st->direction == PF_IN)]-> 8251 addr[(st->direction == PF_OUT)], 8252 pd, ri->r->action, ri->r->src.neg); 8253 if (ri->r->dst.addr.type == PF_ADDR_TABLE) 8254 pfr_update_stats(ri->r->dst.addr.p.tbl, 8255 &st->key[(st->direction == PF_IN)]-> 8256 addr[(st->direction == PF_IN)], 8257 pd, ri->r->action, ri->r->dst.neg); 8258 } 8259 } 8260 if (r->src.addr.type == PF_ADDR_TABLE) 8261 pfr_update_stats(r->src.addr.p.tbl, 8262 (st == NULL) ? pd->src : 8263 &st->key[(st->direction == PF_IN)]-> 8264 addr[(st->direction == PF_OUT)], 8265 pd, r->action, r->src.neg); 8266 if (r->dst.addr.type == PF_ADDR_TABLE) 8267 pfr_update_stats(r->dst.addr.p.tbl, 8268 (st == NULL) ? pd->dst : 8269 &st->key[(st->direction == PF_IN)]-> 8270 addr[(st->direction == PF_IN)], 8271 pd, r->action, r->dst.neg); 8272 } 8273} 8274 8275int 8276pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) 8277{ 8278 struct pfi_kif *kif = NULL; 8279 u_short action, reason = 0; 8280 struct pf_rule *a = NULL, *r = &pf_default_rule; 8281 struct pf_state *st = NULL; 8282 struct pf_state_key_cmp key; 8283 struct pf_ruleset *ruleset = NULL; 8284 struct pf_pdesc pd; 8285 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir; 8286 u_int32_t qid, pqid = 0; 8287 int have_pf_lock = 0; 8288 8289 if (!pf_status.running) 8290 return (PF_PASS); 8291 8292#if NCARP > 0 8293 if (ifp->if_type == IFT_CARP) { 8294 struct ifnet *ifp0; 8295 8296 smr_read_enter(); 8297 ifp0 = if_get_smr(ifp->if_carpdevidx); 8298 if (ifp0 != NULL) 8299 kif = (struct pfi_kif *)ifp0->if_pf_kif; 8300 smr_read_leave(); 8301 } else 8302#endif /* NCARP */ 8303 kif = (struct pfi_kif *)ifp->if_pf_kif; 8304 8305 if (kif == NULL) { 8306 DPFPRINTF(LOG_ERR, 8307 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname); 8308 return (PF_DROP); 8309 } 8310 if (kif->pfik_flags & PFI_IFLAG_SKIP) 8311 return (PF_PASS); 8312 8313#ifdef DIAGNOSTIC 8314 if (((*m0)->m_flags & M_PKTHDR) == 0) 8315 panic("non-M_PKTHDR is passed to pf_test"); 8316#endif /* DIAGNOSTIC */ 8317 8318 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED) 8319 return (PF_PASS); 8320 8321 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) { 8322 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET; 8323 return (PF_PASS); 8324 } 8325 8326 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) { 8327 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED; 8328 return (PF_PASS); 8329 } 8330 8331 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason); 8332 if (action != PF_PASS) { 8333#if NPFLOG > 0 8334 pd.pflog |= PF_LOG_FORCE; 8335#endif /* NPFLOG > 0 */ 8336 goto done; 8337 } 8338 8339 /* packet normalization and reassembly */ 8340 switch (pd.af) { 8341 case AF_INET: 8342 action = pf_normalize_ip(&pd, &reason); 8343 break; 8344#ifdef INET6 8345 case AF_INET6: 8346 action = pf_normalize_ip6(&pd, &reason); 8347 break; 8348#endif /* INET6 */ 8349 } 8350 *m0 = pd.m; 8351 /* if packet sits in reassembly queue, return without error */ 8352 if (pd.m == NULL) 8353 return PF_PASS; 8354 8355 if (action != PF_PASS) { 8356#if NPFLOG > 0 8357 pd.pflog |= PF_LOG_FORCE; 8358#endif /* NPFLOG > 0 */ 8359 goto done; 8360 } 8361 8362 /* if packet has been reassembled, update packet description */ 8363 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) { 8364 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason); 8365 if (action != PF_PASS) { 8366#if NPFLOG > 0 8367 pd.pflog |= PF_LOG_FORCE; 8368#endif /* NPFLOG > 0 */ 8369 goto done; 8370 } 8371 } 8372 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; 8373 8374 /* 8375 * Avoid pcb-lookups from the forwarding path. They should never 8376 * match and would cause MP locking problems. 8377 */ 8378 if (fwdir == PF_FWD) { 8379 pd.lookup.done = -1; 8380 pd.lookup.uid = -1; 8381 pd.lookup.gid = -1; 8382 pd.lookup.pid = NO_PID; 8383 } 8384 8385 switch (pd.virtual_proto) { 8386 8387 case PF_VPROTO_FRAGMENT: { 8388 /* 8389 * handle fragments that aren't reassembled by 8390 * normalization 8391 */ 8392 PF_LOCK(); 8393 have_pf_lock = 1; 8394 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason); 8395 st = pf_state_ref(st); 8396 if (action != PF_PASS) 8397 REASON_SET(&reason, PFRES_FRAG); 8398 break; 8399 } 8400 8401 case IPPROTO_ICMP: { 8402 if (pd.af != AF_INET) { 8403 action = PF_DROP; 8404 REASON_SET(&reason, PFRES_NORM); 8405 DPFPRINTF(LOG_NOTICE, 8406 "dropping IPv6 packet with ICMPv4 payload"); 8407 break; 8408 } 8409 PF_STATE_ENTER_READ(); 8410 action = pf_test_state_icmp(&pd, &st, &reason); 8411 st = pf_state_ref(st); 8412 PF_STATE_EXIT_READ(); 8413 if (action == PF_PASS || action == PF_AFRT) { 8414#if NPFSYNC > 0 8415 pfsync_update_state(st); 8416#endif /* NPFSYNC > 0 */ 8417 r = st->rule.ptr; 8418 a = st->anchor.ptr; 8419#if NPFLOG > 0 8420 pd.pflog |= st->log; 8421#endif /* NPFLOG > 0 */ 8422 } else if (st == NULL) { 8423 PF_LOCK(); 8424 have_pf_lock = 1; 8425 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 8426 &reason); 8427 st = pf_state_ref(st); 8428 } 8429 break; 8430 } 8431 8432#ifdef INET6 8433 case IPPROTO_ICMPV6: { 8434 if (pd.af != AF_INET6) { 8435 action = PF_DROP; 8436 REASON_SET(&reason, PFRES_NORM); 8437 DPFPRINTF(LOG_NOTICE, 8438 "dropping IPv4 packet with ICMPv6 payload"); 8439 break; 8440 } 8441 PF_STATE_ENTER_READ(); 8442 action = pf_test_state_icmp(&pd, &st, &reason); 8443 st = pf_state_ref(st); 8444 PF_STATE_EXIT_READ(); 8445 if (action == PF_PASS || action == PF_AFRT) { 8446#if NPFSYNC > 0 8447 pfsync_update_state(st); 8448#endif /* NPFSYNC > 0 */ 8449 r = st->rule.ptr; 8450 a = st->anchor.ptr; 8451#if NPFLOG > 0 8452 pd.pflog |= st->log; 8453#endif /* NPFLOG > 0 */ 8454 } else if (st == NULL) { 8455 PF_LOCK(); 8456 have_pf_lock = 1; 8457 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 8458 &reason); 8459 st = pf_state_ref(st); 8460 } 8461 break; 8462 } 8463#endif /* INET6 */ 8464 8465 case IPPROTO_TCP: 8466 if (pd.dir == PF_IN && 8467 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN && 8468 pf_synflood_check(&pd)) { 8469 PF_LOCK(); 8470 have_pf_lock = 1; 8471 pf_syncookie_send(&pd, &reason); 8472 action = PF_DROP; 8473 break; 8474 } 8475 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0) 8476 pqid = 1; 8477 action = pf_normalize_tcp(&pd); 8478 if (action == PF_DROP) 8479 break; 8480 8481 /* FALLTHROUGH */ 8482 default: 8483 key.af = pd.af; 8484 key.proto = pd.virtual_proto; 8485 key.rdomain = pd.rdomain; 8486 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af); 8487 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af); 8488 key.port[pd.sidx] = pd.osport; 8489 key.port[pd.didx] = pd.odport; 8490 key.hash = pd.hash; 8491 8492 PF_STATE_ENTER_READ(); 8493 action = pf_find_state(&pd, &key, &st); 8494 st = pf_state_ref(st); 8495 PF_STATE_EXIT_READ(); 8496 8497 /* check for syncookies if tcp ack and no active state */ 8498 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP && 8499 (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 && 8500 st->dst.state >= TCPS_FIN_WAIT_2)) && 8501 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK && 8502 pf_syncookie_validate(&pd)) { 8503 struct mbuf *msyn; 8504 msyn = pf_syncookie_recreate_syn(&pd, &reason); 8505 if (msyn) { 8506 action = pf_test(af, fwdir, ifp, &msyn); 8507 m_freem(msyn); 8508 if (action == PF_PASS || action == PF_AFRT) { 8509 PF_STATE_ENTER_READ(); 8510 pf_state_unref(st); 8511 action = pf_find_state(&pd, &key, &st); 8512 st = pf_state_ref(st); 8513 PF_STATE_EXIT_READ(); 8514 if (st == NULL) 8515 return (PF_DROP); 8516 st->src.seqhi = st->dst.seqhi = 8517 ntohl(pd.hdr.tcp.th_ack) - 1; 8518 st->src.seqlo = 8519 ntohl(pd.hdr.tcp.th_seq) - 1; 8520 pf_set_protostate(st, PF_PEER_SRC, 8521 PF_TCPS_PROXY_DST); 8522 } 8523 } else 8524 action = PF_DROP; 8525 } 8526 8527 if (action == PF_MATCH) 8528 action = pf_test_state(&pd, &st, &reason); 8529 8530 if (action == PF_PASS || action == PF_AFRT) { 8531#if NPFSYNC > 0 8532 pfsync_update_state(st); 8533#endif /* NPFSYNC > 0 */ 8534 r = st->rule.ptr; 8535 a = st->anchor.ptr; 8536#if NPFLOG > 0 8537 pd.pflog |= st->log; 8538#endif /* NPFLOG > 0 */ 8539 } else if (st == NULL) { 8540 PF_LOCK(); 8541 have_pf_lock = 1; 8542 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, 8543 &reason); 8544 st = pf_state_ref(st); 8545 } 8546 8547 if (pd.virtual_proto == IPPROTO_TCP) { 8548 if (st) { 8549 if (st->max_mss) 8550 pf_normalize_mss(&pd, st->max_mss); 8551 } else if (r->max_mss) 8552 pf_normalize_mss(&pd, r->max_mss); 8553 } 8554 8555 break; 8556 } 8557 8558 if (have_pf_lock != 0) 8559 PF_UNLOCK(); 8560 8561 /* 8562 * At the moment, we rely on NET_LOCK() to prevent removal of items 8563 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have 8564 * to be refcounted when NET_LOCK() is gone. 8565 */ 8566 8567done: 8568 if (action != PF_DROP) { 8569 if (st) { 8570 /* The non-state case is handled in pf_test_rule() */ 8571 if (action == PF_PASS && pd.badopts != 0 && 8572 !(st->state_flags & PFSTATE_ALLOWOPTS)) { 8573 action = PF_DROP; 8574 REASON_SET(&reason, PFRES_IPOPTIONS); 8575#if NPFLOG > 0 8576 pd.pflog |= PF_LOG_FORCE; 8577#endif /* NPFLOG > 0 */ 8578 DPFPRINTF(LOG_NOTICE, "dropping packet with " 8579 "ip/ipv6 options in pf_test()"); 8580 } 8581 8582 pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl, 8583 st->set_tos); 8584 pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]); 8585 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 8586 qid = st->pqid; 8587 if (st->state_flags & PFSTATE_SETPRIO) { 8588 pd.m->m_pkthdr.pf.prio = 8589 st->set_prio[1]; 8590 } 8591 } else { 8592 qid = st->qid; 8593 if (st->state_flags & PFSTATE_SETPRIO) { 8594 pd.m->m_pkthdr.pf.prio = 8595 st->set_prio[0]; 8596 } 8597 } 8598 pd.m->m_pkthdr.pf.delay = st->delay; 8599 } else { 8600 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl, 8601 r->set_tos); 8602 if (pqid || (pd.tos & IPTOS_LOWDELAY)) { 8603 qid = r->pqid; 8604 if (r->scrub_flags & PFSTATE_SETPRIO) 8605 pd.m->m_pkthdr.pf.prio = r->set_prio[1]; 8606 } else { 8607 qid = r->qid; 8608 if (r->scrub_flags & PFSTATE_SETPRIO) 8609 pd.m->m_pkthdr.pf.prio = r->set_prio[0]; 8610 } 8611 pd.m->m_pkthdr.pf.delay = r->delay; 8612 } 8613 } 8614 8615 if (action == PF_PASS && qid) 8616 pd.m->m_pkthdr.pf.qid = qid; 8617 if (st != NULL) { 8618 struct mbuf *m = pd.m; 8619 struct inpcb *inp = m->m_pkthdr.pf.inp; 8620 8621 if (pd.dir == PF_IN) { 8622 KASSERT(inp == NULL); 8623 pf_mbuf_link_state_key(m, st->key[PF_SK_STACK]); 8624 } else if (pd.dir == PF_OUT) 8625 pf_state_key_link_inpcb(st->key[PF_SK_STACK], inp); 8626 8627 if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) { 8628 m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash; 8629 SET(m->m_pkthdr.csum_flags, M_FLOWID); 8630 } 8631 } 8632 8633 /* 8634 * connections redirected to loopback should not match sockets 8635 * bound specifically to loopback due to security implications, 8636 * see in_pcblookup_listen(). 8637 */ 8638 if (pd.destchg) 8639 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >> 8640 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) || 8641 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6))) 8642 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST; 8643 /* We need to redo the route lookup on outgoing routes. */ 8644 if (pd.destchg && pd.dir == PF_OUT) 8645 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE; 8646 8647 if (pd.dir == PF_IN && action == PF_PASS && 8648 (r->divert.type == PF_DIVERT_TO || 8649 r->divert.type == PF_DIVERT_REPLY)) { 8650 struct pf_divert *divert; 8651 8652 if ((divert = pf_get_divert(pd.m))) { 8653 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED; 8654 divert->addr = r->divert.addr; 8655 divert->port = r->divert.port; 8656 divert->rdomain = pd.rdomain; 8657 divert->type = r->divert.type; 8658 } 8659 } 8660 8661 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET) 8662 action = PF_DIVERT; 8663 8664#if NPFLOG > 0 8665 if (pd.pflog) { 8666 struct pf_rule_item *ri; 8667 8668 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL) 8669 pflog_packet(&pd, reason, r, a, ruleset, NULL); 8670 if (st) { 8671 SLIST_FOREACH(ri, &st->match_rules, entry) 8672 if (ri->r->log & PF_LOG_ALL) 8673 pflog_packet(&pd, reason, ri->r, a, 8674 ruleset, NULL); 8675 } 8676 } 8677#endif /* NPFLOG > 0 */ 8678 8679 pf_counters_inc(action, &pd, st, r, a); 8680 8681 switch (action) { 8682 case PF_SYNPROXY_DROP: 8683 m_freem(pd.m); 8684 /* FALLTHROUGH */ 8685 case PF_DEFER: 8686 pd.m = NULL; 8687 action = PF_PASS; 8688 break; 8689 case PF_DIVERT: 8690 switch (pd.af) { 8691 case AF_INET: 8692 divert_packet(pd.m, pd.dir, r->divert.port); 8693 pd.m = NULL; 8694 break; 8695#ifdef INET6 8696 case AF_INET6: 8697 divert6_packet(pd.m, pd.dir, r->divert.port); 8698 pd.m = NULL; 8699 break; 8700#endif /* INET6 */ 8701 } 8702 action = PF_PASS; 8703 break; 8704#ifdef INET6 8705 case PF_AFRT: 8706 if (pf_translate_af(&pd)) { 8707 action = PF_DROP; 8708 goto out; 8709 } 8710 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED; 8711 switch (pd.naf) { 8712 case AF_INET: 8713 if (pd.dir == PF_IN) { 8714 int flags = IP_REDIRECT; 8715 8716 switch (atomic_load_int(&ip_forwarding)) { 8717 case 2: 8718 SET(flags, IP_FORWARDING_IPSEC); 8719 /* FALLTHROUGH */ 8720 case 1: 8721 SET(flags, IP_FORWARDING); 8722 break; 8723 default: 8724 ipstat_inc(ips_cantforward); 8725 action = PF_DROP; 8726 goto out; 8727 } 8728 if (atomic_load_int(&ip_directedbcast)) 8729 SET(flags, IP_ALLOWBROADCAST); 8730 ip_forward(pd.m, ifp, NULL, flags); 8731 } else 8732 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0); 8733 break; 8734 case AF_INET6: 8735 if (pd.dir == PF_IN) { 8736 int flags = IPV6_REDIRECT; 8737 8738 switch (atomic_load_int(&ip6_forwarding)) { 8739 case 2: 8740 SET(flags, IPV6_FORWARDING_IPSEC); 8741 /* FALLTHROUGH */ 8742 case 1: 8743 SET(flags, IPV6_FORWARDING); 8744 break; 8745 default: 8746 ip6stat_inc(ip6s_cantforward); 8747 action = PF_DROP; 8748 goto out; 8749 } 8750 ip6_forward(pd.m, NULL, flags); 8751 } else 8752 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL); 8753 break; 8754 } 8755 pd.m = NULL; 8756 action = PF_PASS; 8757 break; 8758#endif /* INET6 */ 8759 case PF_DROP: 8760 m_freem(pd.m); 8761 pd.m = NULL; 8762 break; 8763 default: 8764 if (st && st->rt) { 8765 switch (pd.af) { 8766 case AF_INET: 8767 pf_route(&pd, st); 8768 break; 8769#ifdef INET6 8770 case AF_INET6: 8771 pf_route6(&pd, st); 8772 break; 8773#endif /* INET6 */ 8774 } 8775 } 8776 break; 8777 } 8778 8779#ifdef INET6 8780 /* if reassembled packet passed, create new fragments */ 8781 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD && 8782 pd.af == AF_INET6) { 8783 struct m_tag *mtag; 8784 8785 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL))) 8786 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL); 8787 } 8788#endif /* INET6 */ 8789 if (st && action != PF_DROP) { 8790 if (!st->if_index_in && dir == PF_IN) 8791 st->if_index_in = ifp->if_index; 8792 else if (!st->if_index_out && dir == PF_OUT) 8793 st->if_index_out = ifp->if_index; 8794 } 8795 8796#ifdef INET6 8797out: 8798#endif /* INET6 */ 8799 *m0 = pd.m; 8800 8801 pf_state_unref(st); 8802 8803 return (action); 8804} 8805 8806int 8807pf_ouraddr(struct mbuf *m) 8808{ 8809 struct pf_state_key *sk; 8810 8811 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED) 8812 return (1); 8813 8814 sk = m->m_pkthdr.pf.statekey; 8815 if (sk != NULL) { 8816 if (READ_ONCE(sk->sk_inp) != NULL) 8817 return (1); 8818 } 8819 8820 return (-1); 8821} 8822 8823/* 8824 * must be called whenever any addressing information such as 8825 * address, port, protocol has changed 8826 */ 8827void 8828pf_pkt_addr_changed(struct mbuf *m) 8829{ 8830 pf_mbuf_unlink_state_key(m); 8831 pf_mbuf_unlink_inpcb(m); 8832} 8833 8834struct inpcb * 8835pf_inp_lookup(struct mbuf *m) 8836{ 8837 struct inpcb *inp = NULL; 8838 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8839 8840 if (!pf_state_key_isvalid(sk)) 8841 pf_mbuf_unlink_state_key(m); 8842 else if (READ_ONCE(sk->sk_inp) != NULL) { 8843 mtx_enter(&pf_inp_mtx); 8844 inp = in_pcbref(sk->sk_inp); 8845 mtx_leave(&pf_inp_mtx); 8846 } 8847 8848 return (inp); 8849} 8850 8851void 8852pf_inp_link(struct mbuf *m, struct inpcb *inp) 8853{ 8854 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8855 8856 if (!pf_state_key_isvalid(sk)) { 8857 pf_mbuf_unlink_state_key(m); 8858 return; 8859 } 8860 8861 /* 8862 * we don't need to grab PF-lock here. At worst case we link inp to 8863 * state, which might be just being marked as deleted by another 8864 * thread. 8865 */ 8866 pf_state_key_link_inpcb(sk, inp); 8867 8868 /* The statekey has finished finding the inp, it is no longer needed. */ 8869 pf_mbuf_unlink_state_key(m); 8870} 8871 8872void 8873pf_inp_unlink(struct inpcb *inp) 8874{ 8875 struct pf_state_key *sk; 8876 8877 if (READ_ONCE(inp->inp_pf_sk) == NULL) 8878 return; 8879 8880 mtx_enter(&pf_inp_mtx); 8881 sk = inp->inp_pf_sk; 8882 if (sk == NULL) { 8883 mtx_leave(&pf_inp_mtx); 8884 return; 8885 } 8886 KASSERT(sk->sk_inp == inp); 8887 sk->sk_inp = NULL; 8888 inp->inp_pf_sk = NULL; 8889 mtx_leave(&pf_inp_mtx); 8890 8891 pf_state_key_unref(sk); 8892 in_pcbunref(inp); 8893} 8894 8895void 8896pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev) 8897{ 8898 struct pf_state_key *old_reverse; 8899 8900 old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev); 8901 if (old_reverse != NULL) 8902 KASSERT(old_reverse == skrev); 8903 else { 8904 pf_state_key_ref(skrev); 8905 8906 /* 8907 * NOTE: if sk == skrev, then KASSERT() below holds true, we 8908 * still want to grab a reference in such case, because 8909 * pf_state_key_unlink_reverse() does not check whether keys 8910 * are identical or not. 8911 */ 8912 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk); 8913 if (old_reverse != NULL) 8914 KASSERT(old_reverse == sk); 8915 8916 pf_state_key_ref(sk); 8917 } 8918} 8919 8920#if NPFLOG > 0 8921void 8922pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am, 8923 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules) 8924{ 8925 struct pf_rule_item *ri; 8926 8927 /* if this is the log(matches) rule, packet has been logged already */ 8928 if (rm->log & PF_LOG_MATCHES) 8929 return; 8930 8931 SLIST_FOREACH(ri, matchrules, entry) 8932 if (ri->r->log & PF_LOG_MATCHES) 8933 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r); 8934} 8935#endif /* NPFLOG > 0 */ 8936 8937struct pf_state_key * 8938pf_state_key_ref(struct pf_state_key *sk) 8939{ 8940 if (sk != NULL) 8941 PF_REF_TAKE(sk->sk_refcnt); 8942 8943 return (sk); 8944} 8945 8946void 8947pf_state_key_unref(struct pf_state_key *sk) 8948{ 8949 if (PF_REF_RELE(sk->sk_refcnt)) { 8950 /* state key must be removed from tree */ 8951 KASSERT(!pf_state_key_isvalid(sk)); 8952 /* state key must be unlinked from reverse key */ 8953 KASSERT(sk->sk_reverse == NULL); 8954 /* state key must be unlinked from socket */ 8955 KASSERT(sk->sk_inp == NULL); 8956 pool_put(&pf_state_key_pl, sk); 8957 } 8958} 8959 8960int 8961pf_state_key_isvalid(struct pf_state_key *sk) 8962{ 8963 return ((sk != NULL) && (sk->sk_removed == 0)); 8964} 8965 8966void 8967pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk) 8968{ 8969 KASSERT(m->m_pkthdr.pf.statekey == NULL); 8970 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk); 8971} 8972 8973void 8974pf_mbuf_unlink_state_key(struct mbuf *m) 8975{ 8976 struct pf_state_key *sk = m->m_pkthdr.pf.statekey; 8977 8978 if (sk != NULL) { 8979 m->m_pkthdr.pf.statekey = NULL; 8980 pf_state_key_unref(sk); 8981 } 8982} 8983 8984void 8985pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp) 8986{ 8987 KASSERT(m->m_pkthdr.pf.inp == NULL); 8988 m->m_pkthdr.pf.inp = in_pcbref(inp); 8989} 8990 8991void 8992pf_mbuf_unlink_inpcb(struct mbuf *m) 8993{ 8994 struct inpcb *inp = m->m_pkthdr.pf.inp; 8995 8996 if (inp != NULL) { 8997 m->m_pkthdr.pf.inp = NULL; 8998 in_pcbunref(inp); 8999 } 9000} 9001 9002void 9003pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp) 9004{ 9005 if (inp == NULL || READ_ONCE(sk->sk_inp) != NULL) 9006 return; 9007 9008 mtx_enter(&pf_inp_mtx); 9009 if (inp->inp_pf_sk != NULL || sk->sk_inp != NULL) { 9010 mtx_leave(&pf_inp_mtx); 9011 return; 9012 } 9013 sk->sk_inp = in_pcbref(inp); 9014 inp->inp_pf_sk = pf_state_key_ref(sk); 9015 mtx_leave(&pf_inp_mtx); 9016} 9017 9018void 9019pf_state_key_unlink_inpcb(struct pf_state_key *sk) 9020{ 9021 struct inpcb *inp; 9022 9023 if (READ_ONCE(sk->sk_inp) == NULL) 9024 return; 9025 9026 mtx_enter(&pf_inp_mtx); 9027 inp = sk->sk_inp; 9028 if (inp == NULL) { 9029 mtx_leave(&pf_inp_mtx); 9030 return; 9031 } 9032 KASSERT(inp->inp_pf_sk == sk); 9033 sk->sk_inp = NULL; 9034 inp->inp_pf_sk = NULL; 9035 mtx_leave(&pf_inp_mtx); 9036 9037 pf_state_key_unref(sk); 9038 in_pcbunref(inp); 9039} 9040 9041void 9042pf_state_key_unlink_reverse(struct pf_state_key *sk) 9043{ 9044 struct pf_state_key *skrev = sk->sk_reverse; 9045 9046 /* Note that sk and skrev may be equal, then we unref twice. */ 9047 if (skrev != NULL) { 9048 KASSERT(skrev->sk_reverse == sk); 9049 sk->sk_reverse = NULL; 9050 skrev->sk_reverse = NULL; 9051 pf_state_key_unref(skrev); 9052 pf_state_key_unref(sk); 9053 } 9054} 9055 9056struct pf_state * 9057pf_state_ref(struct pf_state *st) 9058{ 9059 if (st != NULL) 9060 PF_REF_TAKE(st->refcnt); 9061 return (st); 9062} 9063 9064void 9065pf_state_unref(struct pf_state *st) 9066{ 9067 if ((st != NULL) && PF_REF_RELE(st->refcnt)) { 9068 /* never inserted or removed */ 9069#if NPFSYNC > 0 9070 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) || 9071 ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) && 9072 (st->sync_state >= PFSYNC_S_NONE))); 9073#endif /* NPFSYNC */ 9074 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) || 9075 (TAILQ_NEXT(st, entry_list) == _Q_INVALID)); 9076 9077 pf_state_key_unref(st->key[PF_SK_WIRE]); 9078 pf_state_key_unref(st->key[PF_SK_STACK]); 9079 9080 KASSERT(SLIST_EMPTY(&st->linkage)); 9081 9082 pool_put(&pf_state_pl, st); 9083 } 9084} 9085 9086int 9087pf_delay_pkt(struct mbuf *m, u_int ifidx) 9088{ 9089 struct pf_pktdelay *pdy; 9090 9091 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) { 9092 m_freem(m); 9093 return (ENOBUFS); 9094 } 9095 pdy->ifidx = ifidx; 9096 pdy->m = m; 9097 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy); 9098 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay); 9099 m->m_pkthdr.pf.delay = 0; 9100 return (0); 9101} 9102 9103void 9104pf_pktenqueue_delayed(void *arg) 9105{ 9106 struct pf_pktdelay *pdy = arg; 9107 struct ifnet *ifp; 9108 9109 ifp = if_get(pdy->ifidx); 9110 if (ifp != NULL) { 9111 if_enqueue(ifp, pdy->m); 9112 if_put(ifp); 9113 } else 9114 m_freem(pdy->m); 9115 9116 pool_put(&pf_pktdelay_pl, pdy); 9117} 9118 9119void 9120pf_status_init(void) 9121{ 9122 memset(&pf_status, 0, sizeof(pf_status)); 9123 pf_status.debug = LOG_ERR; 9124 pf_status.reass = PF_REASS_ENABLED; 9125 9126 /* XXX do our best to avoid a conflict */ 9127 pf_status.hostid = arc4random(); 9128 9129 pf_status_fcounters = counters_alloc(FCNT_MAX); 9130} 9131 9132void 9133pf_status_clear(void) 9134{ 9135 PF_ASSERT_LOCKED(); 9136 counters_zero(pf_status_fcounters, FCNT_MAX); 9137} 9138 9139void 9140pf_status_read(struct pf_status *pfs) 9141{ 9142 uint64_t scratch[FCNT_MAX]; 9143 9144 NET_LOCK(); 9145 PF_LOCK(); 9146 PF_FRAG_LOCK(); 9147 memcpy(pfs, &pf_status, sizeof(struct pf_status)); 9148 PF_FRAG_UNLOCK(); 9149 pfi_update_status(pfs->ifname, pfs); 9150 PF_UNLOCK(); 9151 NET_UNLOCK(); 9152 9153 counters_read(pf_status_fcounters, pfs->fcounters, FCNT_MAX, scratch); 9154}