jcs's openbsd hax
openbsd
1/* $OpenBSD: pf.c,v 1.1236 2026/02/05 03:26:00 dlg Exp $ */
2
3/*
4 * Copyright (c) 2001 Daniel Hartmeier
5 * Copyright (c) 2002 - 2013 Henning Brauer <henning@openbsd.org>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 *
12 * - Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * - Redistributions in binary form must reproduce the above
15 * copyright notice, this list of conditions and the following
16 * disclaimer in the documentation and/or other materials provided
17 * with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
22 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
23 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
25 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
29 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 *
32 * Effort sponsored in part by the Defense Advanced Research Projects
33 * Agency (DARPA) and Air Force Research Laboratory, Air Force
34 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
35 *
36 */
37
38#include "carp.h"
39#include "pflog.h"
40#include "pfsync.h"
41#include "pflow.h"
42
43#include <sys/param.h>
44#include <sys/systm.h>
45#include <sys/mbuf.h>
46#include <sys/socket.h>
47#include <sys/socketvar.h>
48#include <sys/time.h>
49#include <sys/pool.h>
50#include <sys/proc.h>
51#include <sys/rwlock.h>
52#include <sys/percpu.h>
53#include <sys/syslog.h>
54
55#include <crypto/sha2.h>
56
57#include <net/if.h>
58#include <net/if_var.h>
59#include <net/if_types.h>
60#include <net/route.h>
61#include <net/toeplitz.h>
62
63#include <netinet/in.h>
64#include <netinet/in_var.h>
65#include <netinet/ip.h>
66#include <netinet/in_pcb.h>
67#include <netinet/ip_var.h>
68#include <netinet/ip_icmp.h>
69#include <netinet/tcp.h>
70#include <netinet/tcp_seq.h>
71#include <netinet/tcp_timer.h>
72#include <netinet/tcp_var.h>
73#include <netinet/tcp_fsm.h>
74#include <netinet/udp.h>
75#include <netinet/udp_var.h>
76#include <netinet/ip_divert.h>
77
78#ifdef INET6
79#include <netinet6/in6_var.h>
80#include <netinet/ip6.h>
81#include <netinet6/ip6_var.h>
82#include <netinet/icmp6.h>
83#endif /* INET6 */
84
85#include <net/pfvar.h>
86#include <net/pfvar_priv.h>
87
88#if NPFLOW > 0
89#include <net/if_pflow.h>
90#endif /* NPFLOW > 0 */
91
92#if NPFSYNC > 0
93#include <net/if_pfsync.h>
94#endif /* NPFSYNC > 0 */
95
96/*
97 * Global variables
98 */
99struct pf_state_tree pf_statetbl;
100struct pf_queuehead pf_queues[2];
101struct pf_queuehead *pf_queues_active;
102struct pf_queuehead *pf_queues_inactive;
103
104struct pf_status pf_status;
105static struct cpumem *pf_status_fcounters;
106
107struct mutex pf_inp_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
108
109int pf_hdr_limit = 20; /* arbitrary limit, tune in ddb */
110
111SHA2_CTX pf_tcp_secret_ctx;
112u_char pf_tcp_secret[16];
113int pf_tcp_secret_init;
114int pf_tcp_iss_off;
115
116enum pf_test_status {
117 PF_TEST_FAIL = -1,
118 PF_TEST_OK,
119 PF_TEST_QUICK
120};
121
122struct pf_test_ctx {
123 struct pf_pdesc *pd;
124 struct pf_rule_actions act;
125 u_int8_t icmpcode;
126 u_int8_t icmptype;
127 int icmp_dir;
128 int state_icmp;
129 int tag;
130 int limiter_drop;
131 u_short reason;
132 struct pf_rule_item *ri;
133 struct pf_src_node *sns[PF_SN_MAX];
134 struct pf_rule_slist rules;
135 struct pf_rule *nr;
136 struct pf_rule **rm;
137 struct pf_rule *a;
138 struct pf_rule **am;
139 struct pf_ruleset **rsm;
140 struct pf_ruleset *arsm;
141 struct pf_ruleset *aruleset;
142 struct tcphdr *th;
143 struct pf_statelim *statelim;
144 struct pf_sourcelim *sourcelim;
145 struct pf_source *source;
146};
147
148struct pool pf_src_tree_pl, pf_rule_pl, pf_queue_pl;
149struct pool pf_state_pl, pf_state_key_pl, pf_state_item_pl;
150struct pool pf_rule_item_pl, pf_sn_item_pl, pf_pktdelay_pl;
151struct pool pf_statelim_pl, pf_sourcelim_pl, pf_source_pl;
152struct pool pf_state_link_pl;
153
154void pf_add_threshold(struct pf_threshold *);
155int pf_check_threshold(struct pf_threshold *);
156int pf_check_tcp_cksum(struct mbuf *, int, int,
157 sa_family_t);
158__inline void pf_cksum_fixup(u_int16_t *, u_int16_t, u_int16_t,
159 u_int8_t);
160void pf_cksum_fixup_a(u_int16_t *, const struct pf_addr *,
161 const struct pf_addr *, sa_family_t, u_int8_t);
162int pf_modulate_sack(struct pf_pdesc *,
163 struct pf_state_peer *);
164int pf_icmp_mapping(struct pf_pdesc *, u_int8_t, int *,
165 u_int16_t *, u_int16_t *);
166int pf_change_icmp_af(struct mbuf *, int,
167 struct pf_pdesc *, struct pf_pdesc *,
168 struct pf_addr *, struct pf_addr *, sa_family_t,
169 sa_family_t);
170int pf_translate_a(struct pf_pdesc *, struct pf_addr *,
171 struct pf_addr *);
172void pf_translate_icmp(struct pf_pdesc *, struct pf_addr *,
173 u_int16_t *, struct pf_addr *, struct pf_addr *,
174 u_int16_t);
175int pf_translate_icmp_af(struct pf_pdesc*, int, void *);
176void pf_send_icmp(struct mbuf *, u_int8_t, u_int8_t, int,
177 sa_family_t, struct pf_rule *, u_int);
178void pf_detach_state(struct pf_state *);
179struct pf_state_key *pf_state_key_attach(struct pf_state_key *,
180 struct pf_state *, int);
181void pf_state_key_detach(struct pf_state *, int);
182u_int32_t pf_tcp_iss(struct pf_pdesc *);
183void pf_rule_to_actions(struct pf_rule *,
184 struct pf_rule_actions *);
185int pf_test_rule(struct pf_pdesc *, struct pf_rule **,
186 struct pf_state **, struct pf_rule **,
187 struct pf_ruleset **, u_short *);
188static __inline int pf_create_state(struct pf_pdesc *, struct pf_rule *,
189 struct pf_rule *, struct pf_rule *,
190 struct pf_state_key **, struct pf_state_key **,
191 int *, struct pf_state **, int,
192 struct pf_rule_slist *, struct pf_rule_actions *,
193 struct pf_src_node **, struct pf_test_ctx *);
194static __inline int pf_state_key_addr_setup(struct pf_pdesc *, void *,
195 int, struct pf_addr *, int, struct pf_addr *,
196 int, int);
197int pf_state_key_setup(struct pf_pdesc *, struct
198 pf_state_key **, struct pf_state_key **, int);
199int pf_tcp_track_full(struct pf_pdesc *,
200 struct pf_state **, u_short *, int *, int);
201int pf_tcp_track_sloppy(struct pf_pdesc *,
202 struct pf_state **, u_short *);
203static __inline int pf_synproxy_ack(struct pf_rule *, struct pf_pdesc *,
204 struct pf_state **, struct pf_rule_actions *);
205static __inline int pf_synproxy(struct pf_pdesc *, struct pf_state **,
206 u_short *);
207int pf_test_state(struct pf_pdesc *, struct pf_state **,
208 u_short *);
209int pf_icmp_state_lookup(struct pf_pdesc *,
210 struct pf_state_key_cmp *, struct pf_state **,
211 u_int16_t, u_int16_t, int, int *, int, int);
212int pf_test_state_icmp(struct pf_pdesc *,
213 struct pf_state **, u_short *);
214u_int16_t pf_calc_mss(struct pf_addr *, sa_family_t, int,
215 uint16_t, uint16_t);
216static __inline int pf_set_rt_ifp(struct pf_state *, struct pf_addr *,
217 sa_family_t, struct pf_src_node **);
218struct pf_divert *pf_get_divert(struct mbuf *);
219int pf_walk_option(struct pf_pdesc *, struct ip *,
220 int, int, u_short *);
221int pf_walk_header(struct pf_pdesc *, struct ip *,
222 u_short *);
223int pf_walk_option6(struct pf_pdesc *, struct ip6_hdr *,
224 int, int, u_short *);
225int pf_walk_header6(struct pf_pdesc *, struct ip6_hdr *,
226 u_short *);
227void pf_print_state_parts(struct pf_state *,
228 struct pf_state_key *, struct pf_state_key *);
229int pf_addr_wrap_neq(struct pf_addr_wrap *,
230 struct pf_addr_wrap *);
231int pf_compare_state_keys(struct pf_state_key *,
232 struct pf_state_key *, struct pfi_kif *, u_int);
233u_int16_t pf_pkt_hash(sa_family_t, uint8_t,
234 const struct pf_addr *, const struct pf_addr *,
235 uint16_t, uint16_t);
236int pf_find_state(struct pf_pdesc *,
237 struct pf_state_key_cmp *, struct pf_state **);
238int pf_src_connlimit(struct pf_state **);
239int pf_match_rcvif(struct mbuf *, struct pf_rule *);
240enum pf_test_status pf_match_rule(struct pf_test_ctx *,
241 struct pf_ruleset *);
242void pf_counters_inc(int, struct pf_pdesc *,
243 struct pf_state *, struct pf_rule *,
244 struct pf_rule *);
245
246int pf_state_insert(struct pfi_kif *,
247 struct pf_state_key **, struct pf_state_key **,
248 struct pf_state *);
249
250int pf_state_key_isvalid(struct pf_state_key *);
251struct pf_state_key *pf_state_key_ref(struct pf_state_key *);
252void pf_state_key_unref(struct pf_state_key *);
253void pf_state_key_link_reverse(struct pf_state_key *,
254 struct pf_state_key *);
255void pf_state_key_unlink_reverse(struct pf_state_key *);
256void pf_state_key_link_inpcb(struct pf_state_key *,
257 struct inpcb *);
258void pf_state_key_unlink_inpcb(struct pf_state_key *);
259void pf_pktenqueue_delayed(void *);
260int32_t pf_state_expires(const struct pf_state *, uint8_t);
261
262#if NPFLOG > 0
263void pf_log_matches(struct pf_pdesc *, struct pf_rule *,
264 struct pf_rule *, struct pf_ruleset *,
265 struct pf_rule_slist *);
266#endif /* NPFLOG > 0 */
267
268extern struct pool pfr_ktable_pl;
269extern struct pool pfr_kentry_pl;
270
271struct pf_pool_limit pf_pool_limits[PF_LIMIT_MAX] = {
272 { &pf_state_pl, PFSTATE_HIWAT, PFSTATE_HIWAT },
273 { &pf_src_tree_pl, PFSNODE_HIWAT, PFSNODE_HIWAT },
274 { &pf_frent_pl, PFFRAG_FRENT_HIWAT, PFFRAG_FRENT_HIWAT },
275 { &pfr_ktable_pl, PFR_KTABLE_HIWAT, PFR_KTABLE_HIWAT },
276 { &pfr_kentry_pl, PFR_KENTRY_HIWAT, PFR_KENTRY_HIWAT },
277 { &pf_pktdelay_pl, PF_PKTDELAY_MAXPKTS, PF_PKTDELAY_MAXPKTS },
278 { &pf_anchor_pl, PF_ANCHOR_HIWAT, PF_ANCHOR_HIWAT }
279};
280
281#define BOUND_IFACE(r, k) \
282 ((r)->rule_flag & PFRULE_IFBOUND) ? (k) : pfi_all
283
284#define STATE_INC_COUNTERS(s) \
285 do { \
286 struct pf_rule_item *mrm; \
287 s->rule.ptr->states_cur++; \
288 s->rule.ptr->states_tot++; \
289 if (s->anchor.ptr != NULL) { \
290 s->anchor.ptr->states_cur++; \
291 s->anchor.ptr->states_tot++; \
292 } \
293 SLIST_FOREACH(mrm, &s->match_rules, entry) \
294 mrm->r->states_cur++; \
295 } while (0)
296
297static __inline int pf_src_compare(struct pf_src_node *, struct pf_src_node *);
298static inline int pf_state_compare_key(const struct pf_state_key *,
299 const struct pf_state_key *);
300static inline int pf_state_compare_id(const struct pf_state *,
301 const struct pf_state *);
302#ifdef INET6
303static __inline void pf_cksum_uncover(u_int16_t *, u_int16_t, u_int8_t);
304static __inline void pf_cksum_cover(u_int16_t *, u_int16_t, u_int8_t);
305#endif /* INET6 */
306static __inline void pf_set_protostate(struct pf_state *, int, u_int8_t);
307
308static inline int
309pf_statelim_id_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
310{
311 if (a->pfstlim_id > b->pfstlim_id)
312 return (1);
313 if (a->pfstlim_id < b->pfstlim_id)
314 return (-1);
315
316 return (0);
317}
318
319RBT_GENERATE(pf_statelim_id_tree, pf_statelim, pfstlim_id_tree,
320 pf_statelim_id_cmp);
321
322static inline int
323pf_statelim_nm_cmp(const struct pf_statelim *a, const struct pf_statelim *b)
324{
325 return (strncmp(a->pfstlim_nm, b->pfstlim_nm, sizeof(a->pfstlim_nm)));
326}
327
328RBT_GENERATE(pf_statelim_nm_tree, pf_statelim, pfstlim_nm_tree,
329 pf_statelim_nm_cmp);
330
331struct pf_statelim_id_tree pf_statelim_id_tree_active =
332 RBT_INITIALIZER(pf_statelim_id_tree_active);
333struct pf_statelim_list pf_statelim_list_active =
334 TAILQ_HEAD_INITIALIZER(pf_statelim_list_active);
335
336struct pf_statelim_id_tree pf_statelim_id_tree_inactive =
337 RBT_INITIALIZER(pf_statelim_id_tree_inactive);
338struct pf_statelim_nm_tree pf_statelim_nm_tree_inactive =
339 RBT_INITIALIZER(pf_statelim_nm_tree_inactive);
340struct pf_statelim_list pf_statelim_list_inactive =
341 TAILQ_HEAD_INITIALIZER(pf_statelim_list_inactive);
342
343static inline int
344pf_sourcelim_id_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
345{
346 if (a->pfsrlim_id > b->pfsrlim_id)
347 return (1);
348 if (a->pfsrlim_id < b->pfsrlim_id)
349 return (-1);
350
351 return (0);
352}
353
354RBT_GENERATE(pf_sourcelim_id_tree, pf_sourcelim, pfsrlim_id_tree,
355 pf_sourcelim_id_cmp);
356
357static inline int
358pf_sourcelim_nm_cmp(const struct pf_sourcelim *a, const struct pf_sourcelim *b)
359{
360 return (strncmp(a->pfsrlim_nm, b->pfsrlim_nm, sizeof(a->pfsrlim_nm)));
361}
362
363RBT_GENERATE(pf_sourcelim_nm_tree, pf_sourcelim, pfsrlim_nm_tree,
364 pf_sourcelim_nm_cmp);
365
366static inline int
367pf_source_cmp(const struct pf_source *a, const struct pf_source *b)
368{
369 if (a->pfsr_af > b->pfsr_af)
370 return (1);
371 if (a->pfsr_af < b->pfsr_af)
372 return (-1);
373 if (a->pfsr_rdomain > b->pfsr_rdomain)
374 return (1);
375 if (a->pfsr_rdomain < b->pfsr_rdomain)
376 return (-1);
377
378 return (pf_addr_compare(&a->pfsr_addr, &b->pfsr_addr, a->pfsr_af));
379}
380
381RBT_GENERATE(pf_source_tree, pf_source, pfsr_tree, pf_source_cmp);
382
383static inline int
384pf_source_ioc_cmp(const struct pf_source *a, const struct pf_source *b)
385{
386 size_t i;
387
388 if (a->pfsr_af > b->pfsr_af)
389 return (1);
390 if (a->pfsr_af < b->pfsr_af)
391 return (-1);
392 if (a->pfsr_rdomain > b->pfsr_rdomain)
393 return (1);
394 if (a->pfsr_rdomain < b->pfsr_rdomain)
395 return (-1);
396
397 for (i = 0; i < nitems(a->pfsr_addr.addr32); i++) {
398 uint32_t wa = ntohl(a->pfsr_addr.addr32[i]);
399 uint32_t wb = ntohl(b->pfsr_addr.addr32[i]);
400
401 if (wa > wb)
402 return (1);
403 if (wa < wb)
404 return (-1);
405 }
406
407 return (0);
408}
409
410RBT_GENERATE(pf_source_ioc_tree, pf_source, pfsr_ioc_tree, pf_source_ioc_cmp);
411
412struct pf_sourcelim_id_tree pf_sourcelim_id_tree_active =
413 RBT_INITIALIZER(pf_sourcelim_id_tree_active);
414struct pf_sourcelim_list pf_sourcelim_list_active =
415 TAILQ_HEAD_INITIALIZER(pf_sourcelim_list_active);
416
417struct pf_sourcelim_id_tree pf_sourcelim_id_tree_inactive =
418 RBT_INITIALIZER(pf_sourcelim_id_tree_inactive);
419struct pf_sourcelim_nm_tree pf_sourcelim_nm_tree_inactive =
420 RBT_INITIALIZER(pf_sourcelim_nm_tree_inactive);
421struct pf_sourcelim_list pf_sourcelim_list_inactive =
422 TAILQ_HEAD_INITIALIZER(pf_sourcelim_list_inactive);
423
424static inline struct pf_statelim *
425pf_statelim_find(uint32_t id)
426{
427 struct pf_statelim key;
428
429 /* only the id is used in cmp, so don't have to zero all the things */
430 key.pfstlim_id = id;
431
432 return (RBT_FIND(pf_statelim_id_tree,
433 &pf_statelim_id_tree_active, &key));
434}
435
436static inline struct pf_sourcelim *
437pf_sourcelim_find(uint32_t id)
438{
439 struct pf_sourcelim key;
440
441 /* only the id is used in cmp, so don't have to zero all the things */
442 key.pfsrlim_id = id;
443
444 return (RBT_FIND(pf_sourcelim_id_tree,
445 &pf_sourcelim_id_tree_active, &key));
446}
447
448struct pf_source_list pf_source_gc = TAILQ_HEAD_INITIALIZER(pf_source_gc);
449
450static void
451pf_source_purge(void)
452{
453 struct pf_source *sr, *nsr;
454 time_t now = getuptime();
455
456 TAILQ_FOREACH_SAFE(sr, &pf_source_gc, pfsr_empty_gc, nsr) {
457 struct pf_sourcelim *srlim = sr->pfsr_parent;
458
459 if (now <= sr->pfsr_empty_ts + srlim->pfsrlim_rate.seconds + 1)
460 continue;
461
462 TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
463
464 RBT_REMOVE(pf_source_tree, &srlim->pfsrlim_sources, sr);
465 RBT_REMOVE(pf_source_ioc_tree, &srlim->pfsrlim_ioc_sources, sr);
466 srlim->pfsrlim_nsources--;
467
468 pool_put(&pf_source_pl, sr);
469 }
470}
471
472static void
473pf_source_pfr_addr(struct pfr_addr *p, const struct pf_source *sr)
474{
475 struct pf_sourcelim *srlim = sr->pfsr_parent;
476
477 memset(p, 0, sizeof(*p));
478
479 p->pfra_af = sr->pfsr_af;
480 switch (sr->pfsr_af) {
481 case AF_INET:
482 p->pfra_net = srlim->pfsrlim_ipv4_prefix;
483 p->pfra_ip4addr = sr->pfsr_addr.v4;
484 break;
485#ifdef INET6
486 case AF_INET6:
487 p->pfra_net = srlim->pfsrlim_ipv6_prefix;
488 p->pfra_ip6addr = sr->pfsr_addr.v6;
489 break;
490#endif /* INET6 */
491 }
492}
493
494static void
495pf_source_used(struct pf_source *sr)
496{
497 struct pf_sourcelim *srlim = sr->pfsr_parent;
498 struct pfr_ktable *t;
499 unsigned int used;
500
501 used = sr->pfsr_inuse++;
502 sr->pfsr_rate_ts += srlim->pfsrlim_rate_token;
503
504 if (used == 0)
505 TAILQ_REMOVE(&pf_source_gc, sr, pfsr_empty_gc);
506 else if ((t = srlim->pfsrlim_overload.table) != NULL &&
507 used >= srlim->pfsrlim_overload.hwm && !sr->pfsr_intable) {
508 struct pfr_addr p;
509
510 pf_source_pfr_addr(&p, sr);
511
512 pfr_insert_kentry(t, &p, gettime());
513 sr->pfsr_intable = 1;
514 }
515}
516
517static void
518pf_source_rele(struct pf_source *sr)
519{
520 struct pf_sourcelim *srlim = sr->pfsr_parent;
521 struct pfr_ktable *t;
522 unsigned int used;
523
524 used = --sr->pfsr_inuse;
525
526 t = srlim->pfsrlim_overload.table;
527 if (t != NULL && sr->pfsr_intable &&
528 used < srlim->pfsrlim_overload.lwm) {
529 struct pfr_addr p;
530
531 pf_source_pfr_addr(&p, sr);
532
533 pfr_remove_kentry(t, &p);
534 sr->pfsr_intable = 0;
535 }
536
537 if (used == 0) {
538 TAILQ_INSERT_TAIL(&pf_source_gc, sr, pfsr_empty_gc);
539 sr->pfsr_empty_ts = getuptime() + srlim->pfsrlim_rate.seconds;
540 }
541}
542
543static inline void
544pf_source_key(struct pf_sourcelim *srlim, struct pf_source *key,
545 sa_family_t af, unsigned int rdomain, const struct pf_addr *addr)
546{
547 size_t i;
548
549 /* only af+addr is used for lookup. */
550 key->pfsr_af = af;
551 key->pfsr_rdomain = rdomain;
552 switch (af) {
553 case AF_INET:
554 key->pfsr_addr.addr32[0] =
555 srlim->pfsrlim_ipv4_mask.v4.s_addr &
556 addr->v4.s_addr;
557
558 for (i = 1; i < nitems(key->pfsr_addr.addr32); i++)
559 key->pfsr_addr.addr32[i] = htonl(0);
560 break;
561#ifdef INET6
562 case AF_INET6:
563 for (i = 0; i < nitems(key->pfsr_addr.addr32); i++) {
564 key->pfsr_addr.addr32[i] =
565 srlim->pfsrlim_ipv6_mask.addr32[i] &
566 addr->addr32[i];
567 }
568 break;
569#endif
570 default:
571 unhandled_af(af);
572 /* NOTREACHED */
573 }
574}
575
576static inline struct pf_source *
577pf_source_find(struct pf_sourcelim *srlim, const struct pf_source *key)
578{
579 return (RBT_FIND(pf_source_tree, &srlim->pfsrlim_sources, key));
580}
581
582struct pf_src_tree tree_src_tracking;
583
584struct pf_state_tree_id tree_id;
585struct pf_state_list pf_state_list = PF_STATE_LIST_INITIALIZER(pf_state_list);
586
587RB_GENERATE(pf_src_tree, pf_src_node, entry, pf_src_compare);
588RBT_GENERATE(pf_state_tree, pf_state_key, sk_entry, pf_state_compare_key);
589RBT_GENERATE(pf_state_tree_id, pf_state, entry_id, pf_state_compare_id);
590
591int
592pf_addr_compare(const struct pf_addr *a, const struct pf_addr *b,
593 sa_family_t af)
594{
595 switch (af) {
596 case AF_INET:
597 if (a->addr32[0] > b->addr32[0])
598 return (1);
599 if (a->addr32[0] < b->addr32[0])
600 return (-1);
601 break;
602#ifdef INET6
603 case AF_INET6:
604 if (a->addr32[3] > b->addr32[3])
605 return (1);
606 if (a->addr32[3] < b->addr32[3])
607 return (-1);
608 if (a->addr32[2] > b->addr32[2])
609 return (1);
610 if (a->addr32[2] < b->addr32[2])
611 return (-1);
612 if (a->addr32[1] > b->addr32[1])
613 return (1);
614 if (a->addr32[1] < b->addr32[1])
615 return (-1);
616 if (a->addr32[0] > b->addr32[0])
617 return (1);
618 if (a->addr32[0] < b->addr32[0])
619 return (-1);
620 break;
621#endif /* INET6 */
622 }
623 return (0);
624}
625
626static __inline int
627pf_src_compare(struct pf_src_node *a, struct pf_src_node *b)
628{
629 int diff;
630
631 if (a->rule.ptr > b->rule.ptr)
632 return (1);
633 if (a->rule.ptr < b->rule.ptr)
634 return (-1);
635 if ((diff = a->type - b->type) != 0)
636 return (diff);
637 if ((diff = a->af - b->af) != 0)
638 return (diff);
639 if ((diff = pf_addr_compare(&a->addr, &b->addr, a->af)) != 0)
640 return (diff);
641 return (0);
642}
643
644static __inline void
645pf_set_protostate(struct pf_state *st, int which, u_int8_t newstate)
646{
647 if (which == PF_PEER_DST || which == PF_PEER_BOTH)
648 st->dst.state = newstate;
649 if (which == PF_PEER_DST)
650 return;
651
652 if (st->src.state == newstate)
653 return;
654 if (st->creatorid == pf_status.hostid &&
655 st->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
656 !(TCPS_HAVEESTABLISHED(st->src.state) ||
657 st->src.state == TCPS_CLOSED) &&
658 (TCPS_HAVEESTABLISHED(newstate) || newstate == TCPS_CLOSED))
659 atomic_dec_int(&pf_status.states_halfopen);
660
661 st->src.state = newstate;
662}
663
664void
665pf_addrcpy(struct pf_addr *dst, struct pf_addr *src, sa_family_t af)
666{
667 switch (af) {
668 case AF_INET:
669 dst->addr32[0] = src->addr32[0];
670 break;
671#ifdef INET6
672 case AF_INET6:
673 dst->addr32[0] = src->addr32[0];
674 dst->addr32[1] = src->addr32[1];
675 dst->addr32[2] = src->addr32[2];
676 dst->addr32[3] = src->addr32[3];
677 break;
678#endif /* INET6 */
679 default:
680 unhandled_af(af);
681 }
682}
683
684void
685pf_init_threshold(struct pf_threshold *threshold,
686 u_int32_t limit, u_int32_t seconds)
687{
688 threshold->limit = limit * PF_THRESHOLD_MULT;
689 threshold->seconds = seconds;
690 threshold->count = 0;
691 threshold->last = getuptime();
692}
693
694void
695pf_add_threshold(struct pf_threshold *threshold)
696{
697 u_int32_t t = getuptime(), diff = t - threshold->last;
698
699 if (diff >= threshold->seconds)
700 threshold->count = 0;
701 else
702 threshold->count -= threshold->count * diff /
703 threshold->seconds;
704 threshold->count += PF_THRESHOLD_MULT;
705 threshold->last = t;
706}
707
708int
709pf_check_threshold(struct pf_threshold *threshold)
710{
711 return (threshold->count > threshold->limit);
712}
713
714void
715pf_state_list_insert(struct pf_state_list *pfs, struct pf_state *st)
716{
717 /*
718 * we can always put states on the end of the list.
719 *
720 * things reading the list should take a read lock, then
721 * the mutex, get the head and tail pointers, release the
722 * mutex, and then they can iterate between the head and tail.
723 */
724
725 pf_state_ref(st); /* get a ref for the list */
726
727 mtx_enter(&pfs->pfs_mtx);
728 TAILQ_INSERT_TAIL(&pfs->pfs_list, st, entry_list);
729 mtx_leave(&pfs->pfs_mtx);
730}
731
732void
733pf_state_list_remove(struct pf_state_list *pfs, struct pf_state *st)
734{
735 /* states can only be removed when the write lock is held */
736 rw_assert_wrlock(&pfs->pfs_rwl);
737
738 mtx_enter(&pfs->pfs_mtx);
739 TAILQ_REMOVE(&pfs->pfs_list, st, entry_list);
740 mtx_leave(&pfs->pfs_mtx);
741
742 pf_state_unref(st); /* list no longer references the state */
743}
744
745void
746pf_update_state_timeout(struct pf_state *st, int to)
747{
748 mtx_enter(&st->mtx);
749 if (st->timeout != PFTM_UNLINKED)
750 st->timeout = to;
751 mtx_leave(&st->mtx);
752}
753
754int
755pf_src_connlimit(struct pf_state **stp)
756{
757 int bad = 0;
758 struct pf_src_node *sn;
759
760 if ((sn = pf_get_src_node((*stp), PF_SN_NONE)) == NULL)
761 return (0);
762
763 sn->conn++;
764 (*stp)->src.tcp_est = 1;
765 pf_add_threshold(&sn->conn_rate);
766
767 if ((*stp)->rule.ptr->max_src_conn &&
768 (*stp)->rule.ptr->max_src_conn < sn->conn) {
769 pf_status.lcounters[LCNT_SRCCONN]++;
770 bad++;
771 }
772
773 if ((*stp)->rule.ptr->max_src_conn_rate.limit &&
774 pf_check_threshold(&sn->conn_rate)) {
775 pf_status.lcounters[LCNT_SRCCONNRATE]++;
776 bad++;
777 }
778
779 if (!bad)
780 return (0);
781
782 if ((*stp)->rule.ptr->overload_tbl) {
783 struct pfr_addr p;
784 u_int32_t killed = 0;
785
786 pf_status.lcounters[LCNT_OVERLOAD_TABLE]++;
787 if (pf_status.debug >= LOG_NOTICE) {
788 log(LOG_NOTICE,
789 "pf: pf_src_connlimit: blocking address ");
790 pf_print_host(&sn->addr, 0,
791 (*stp)->key[PF_SK_WIRE]->af);
792 }
793
794 memset(&p, 0, sizeof(p));
795 p.pfra_af = (*stp)->key[PF_SK_WIRE]->af;
796 switch ((*stp)->key[PF_SK_WIRE]->af) {
797 case AF_INET:
798 p.pfra_net = 32;
799 p.pfra_ip4addr = sn->addr.v4;
800 break;
801#ifdef INET6
802 case AF_INET6:
803 p.pfra_net = 128;
804 p.pfra_ip6addr = sn->addr.v6;
805 break;
806#endif /* INET6 */
807 }
808
809 pfr_insert_kentry((*stp)->rule.ptr->overload_tbl,
810 &p, gettime());
811
812 /* kill existing states if that's required. */
813 if ((*stp)->rule.ptr->flush) {
814 struct pf_state_key *sk;
815 struct pf_state *st;
816
817 pf_status.lcounters[LCNT_OVERLOAD_FLUSH]++;
818 RBT_FOREACH(st, pf_state_tree_id, &tree_id) {
819 sk = st->key[PF_SK_WIRE];
820 /*
821 * Kill states from this source. (Only those
822 * from the same rule if PF_FLUSH_GLOBAL is not
823 * set)
824 */
825 if (sk->af ==
826 (*stp)->key[PF_SK_WIRE]->af &&
827 (((*stp)->direction == PF_OUT &&
828 PF_AEQ(&sn->addr, &sk->addr[1], sk->af)) ||
829 ((*stp)->direction == PF_IN &&
830 PF_AEQ(&sn->addr, &sk->addr[0], sk->af))) &&
831 ((*stp)->rule.ptr->flush &
832 PF_FLUSH_GLOBAL ||
833 (*stp)->rule.ptr == st->rule.ptr)) {
834 pf_update_state_timeout(st, PFTM_PURGE);
835 pf_set_protostate(st, PF_PEER_BOTH,
836 TCPS_CLOSED);
837 killed++;
838 }
839 }
840 if (pf_status.debug >= LOG_NOTICE)
841 addlog(", %u states killed", killed);
842 }
843 if (pf_status.debug >= LOG_NOTICE)
844 addlog("\n");
845 }
846
847 /* kill this state */
848 pf_update_state_timeout(*stp, PFTM_PURGE);
849 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_CLOSED);
850 return (1);
851}
852
853int
854pf_insert_src_node(struct pf_src_node **sn, struct pf_rule *rule,
855 enum pf_sn_types type, sa_family_t af, struct pf_addr *src,
856 struct pf_addr *raddr, struct pfi_kif *kif)
857{
858 struct pf_src_node k;
859
860 if (*sn == NULL) {
861 k.af = af;
862 k.type = type;
863 pf_addrcpy(&k.addr, src, af);
864 k.rule.ptr = rule;
865 pf_status.scounters[SCNT_SRC_NODE_SEARCH]++;
866 *sn = RB_FIND(pf_src_tree, &tree_src_tracking, &k);
867 }
868 if (*sn == NULL) {
869 if (!rule->max_src_nodes ||
870 rule->src_nodes < rule->max_src_nodes)
871 (*sn) = pool_get(&pf_src_tree_pl, PR_NOWAIT | PR_ZERO);
872 else
873 pf_status.lcounters[LCNT_SRCNODES]++;
874 if ((*sn) == NULL)
875 return (-1);
876
877 pf_init_threshold(&(*sn)->conn_rate,
878 rule->max_src_conn_rate.limit,
879 rule->max_src_conn_rate.seconds);
880
881 (*sn)->type = type;
882 (*sn)->af = af;
883 (*sn)->rule.ptr = rule;
884 pf_addrcpy(&(*sn)->addr, src, af);
885 if (raddr)
886 pf_addrcpy(&(*sn)->raddr, raddr, af);
887 if (RB_INSERT(pf_src_tree,
888 &tree_src_tracking, *sn) != NULL) {
889 if (pf_status.debug >= LOG_NOTICE) {
890 log(LOG_NOTICE,
891 "pf: src_tree insert failed: ");
892 pf_print_host(&(*sn)->addr, 0, af);
893 addlog("\n");
894 }
895 pool_put(&pf_src_tree_pl, *sn);
896 return (-1);
897 }
898 (*sn)->creation = getuptime();
899 (*sn)->rule.ptr->src_nodes++;
900 if (kif != NULL) {
901 (*sn)->kif = kif;
902 pfi_kif_ref(kif, PFI_KIF_REF_SRCNODE);
903 }
904 pf_status.scounters[SCNT_SRC_NODE_INSERT]++;
905 pf_status.src_nodes++;
906 } else {
907 if (rule->max_src_states &&
908 (*sn)->states >= rule->max_src_states) {
909 pf_status.lcounters[LCNT_SRCSTATES]++;
910 return (-1);
911 }
912 }
913 return (0);
914}
915
916void
917pf_remove_src_node(struct pf_src_node *sn)
918{
919 if (sn->states > 0 || sn->expire > getuptime())
920 return;
921
922 sn->rule.ptr->src_nodes--;
923 if (sn->rule.ptr->states_cur == 0 &&
924 sn->rule.ptr->src_nodes == 0)
925 pf_rm_rule(NULL, sn->rule.ptr);
926 RB_REMOVE(pf_src_tree, &tree_src_tracking, sn);
927 pf_status.scounters[SCNT_SRC_NODE_REMOVALS]++;
928 pf_status.src_nodes--;
929 pfi_kif_unref(sn->kif, PFI_KIF_REF_SRCNODE);
930 pool_put(&pf_src_tree_pl, sn);
931}
932
933struct pf_src_node *
934pf_get_src_node(struct pf_state *st, enum pf_sn_types type)
935{
936 struct pf_sn_item *sni;
937
938 SLIST_FOREACH(sni, &st->src_nodes, next)
939 if (sni->sn->type == type)
940 return (sni->sn);
941 return (NULL);
942}
943
944void
945pf_state_rm_src_node(struct pf_state *st, struct pf_src_node *sn)
946{
947 struct pf_sn_item *sni, *snin, *snip = NULL;
948
949 for (sni = SLIST_FIRST(&st->src_nodes); sni; sni = snin) {
950 snin = SLIST_NEXT(sni, next);
951 if (sni->sn == sn) {
952 if (snip)
953 SLIST_REMOVE_AFTER(snip, next);
954 else
955 SLIST_REMOVE_HEAD(&st->src_nodes, next);
956 pool_put(&pf_sn_item_pl, sni);
957 sni = NULL;
958 sn->states--;
959 }
960 if (sni != NULL)
961 snip = sni;
962 }
963}
964
965/* state table stuff */
966
967static inline int
968pf_state_compare_key(const struct pf_state_key *a,
969 const struct pf_state_key *b)
970{
971 int diff;
972
973 if ((diff = a->hash - b->hash) != 0)
974 return (diff);
975 if ((diff = a->proto - b->proto) != 0)
976 return (diff);
977 if ((diff = a->af - b->af) != 0)
978 return (diff);
979 if ((diff = pf_addr_compare(&a->addr[0], &b->addr[0], a->af)) != 0)
980 return (diff);
981 if ((diff = pf_addr_compare(&a->addr[1], &b->addr[1], a->af)) != 0)
982 return (diff);
983 if ((diff = a->port[0] - b->port[0]) != 0)
984 return (diff);
985 if ((diff = a->port[1] - b->port[1]) != 0)
986 return (diff);
987 if ((diff = a->rdomain - b->rdomain) != 0)
988 return (diff);
989 return (0);
990}
991
992static inline int
993pf_state_compare_id(const struct pf_state *a, const struct pf_state *b)
994{
995 if (a->id > b->id)
996 return (1);
997 if (a->id < b->id)
998 return (-1);
999 if (a->creatorid > b->creatorid)
1000 return (1);
1001 if (a->creatorid < b->creatorid)
1002 return (-1);
1003
1004 return (0);
1005}
1006
1007/*
1008 * on failure, pf_state_key_attach() releases the pf_state_key
1009 * reference and returns NULL.
1010 */
1011struct pf_state_key *
1012pf_state_key_attach(struct pf_state_key *sk, struct pf_state *st, int idx)
1013{
1014 struct pf_state_item *si;
1015 struct pf_state_key *cur;
1016 struct pf_state *oldst = NULL;
1017
1018 PF_ASSERT_LOCKED();
1019
1020 KASSERT(st->key[idx] == NULL);
1021 sk->sk_removed = 0;
1022 cur = RBT_INSERT(pf_state_tree, &pf_statetbl, sk);
1023 if (cur != NULL) {
1024 sk->sk_removed = 1;
1025 /* key exists. check for same kif, if none, add to key */
1026 TAILQ_FOREACH(si, &cur->sk_states, si_entry) {
1027 struct pf_state *sist = si->si_st;
1028 if (sist->kif == st->kif &&
1029 ((sist->key[PF_SK_WIRE]->af == sk->af &&
1030 sist->direction == st->direction) ||
1031 (sist->key[PF_SK_WIRE]->af !=
1032 sist->key[PF_SK_STACK]->af &&
1033 sk->af == sist->key[PF_SK_STACK]->af &&
1034 sist->direction != st->direction))) {
1035 int reuse = 0;
1036
1037 if (sk->proto == IPPROTO_TCP &&
1038 sist->src.state >= TCPS_FIN_WAIT_2 &&
1039 sist->dst.state >= TCPS_FIN_WAIT_2)
1040 reuse = 1;
1041 if (pf_status.debug >= LOG_NOTICE) {
1042 log(LOG_NOTICE,
1043 "pf: %s key attach %s on %s: ",
1044 (idx == PF_SK_WIRE) ?
1045 "wire" : "stack",
1046 reuse ? "reuse" : "failed",
1047 st->kif->pfik_name);
1048 pf_print_state_parts(st,
1049 (idx == PF_SK_WIRE) ? sk : NULL,
1050 (idx == PF_SK_STACK) ? sk : NULL);
1051 addlog(", existing: ");
1052 pf_print_state_parts(sist,
1053 (idx == PF_SK_WIRE) ? sk : NULL,
1054 (idx == PF_SK_STACK) ? sk : NULL);
1055 addlog("\n");
1056 }
1057 if (reuse) {
1058 pf_set_protostate(sist, PF_PEER_BOTH,
1059 TCPS_CLOSED);
1060 /* remove late or sks can go away */
1061 oldst = sist;
1062 } else {
1063 pf_state_key_unref(sk);
1064 return (NULL); /* collision! */
1065 }
1066 }
1067 }
1068
1069 /* reuse the existing state key */
1070 pf_state_key_unref(sk);
1071 sk = cur;
1072 }
1073
1074 if ((si = pool_get(&pf_state_item_pl, PR_NOWAIT)) == NULL) {
1075 if (TAILQ_EMPTY(&sk->sk_states)) {
1076 KASSERT(cur == NULL);
1077 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
1078 sk->sk_removed = 1;
1079 pf_state_key_unref(sk);
1080 }
1081
1082 return (NULL);
1083 }
1084
1085 st->key[idx] = pf_state_key_ref(sk); /* give a ref to state */
1086 si->si_st = pf_state_ref(st);
1087
1088 /* list is sorted, if-bound states before floating */
1089 if (st->kif == pfi_all)
1090 TAILQ_INSERT_TAIL(&sk->sk_states, si, si_entry);
1091 else
1092 TAILQ_INSERT_HEAD(&sk->sk_states, si, si_entry);
1093
1094 if (oldst)
1095 pf_remove_state(oldst);
1096
1097 /* caller owns the pf_state ref, which owns a pf_state_key ref now */
1098 return (sk);
1099}
1100
1101void
1102pf_detach_state(struct pf_state *st)
1103{
1104 KASSERT(st->key[PF_SK_WIRE] != NULL);
1105 pf_state_key_detach(st, PF_SK_WIRE);
1106
1107 KASSERT(st->key[PF_SK_STACK] != NULL);
1108 if (st->key[PF_SK_STACK] != st->key[PF_SK_WIRE])
1109 pf_state_key_detach(st, PF_SK_STACK);
1110}
1111
1112void
1113pf_state_key_detach(struct pf_state *st, int idx)
1114{
1115 struct pf_state_item *si;
1116 struct pf_state_key *sk;
1117
1118 PF_ASSERT_LOCKED();
1119
1120 sk = st->key[idx];
1121 if (sk == NULL)
1122 return;
1123
1124 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1125 if (si->si_st == st)
1126 break;
1127 }
1128 if (si == NULL)
1129 return;
1130
1131 TAILQ_REMOVE(&sk->sk_states, si, si_entry);
1132 pool_put(&pf_state_item_pl, si);
1133
1134 if (TAILQ_EMPTY(&sk->sk_states)) {
1135 RBT_REMOVE(pf_state_tree, &pf_statetbl, sk);
1136 sk->sk_removed = 1;
1137 pf_state_key_unlink_reverse(sk);
1138 pf_state_key_unlink_inpcb(sk);
1139 pf_state_key_unref(sk);
1140 }
1141
1142 pf_state_unref(st);
1143}
1144
1145struct pf_state_key *
1146pf_alloc_state_key(int pool_flags)
1147{
1148 struct pf_state_key *sk;
1149
1150 if ((sk = pool_get(&pf_state_key_pl, pool_flags)) == NULL)
1151 return (NULL);
1152
1153 PF_REF_INIT(sk->sk_refcnt);
1154 TAILQ_INIT(&sk->sk_states);
1155 sk->sk_removed = 1;
1156
1157 return (sk);
1158}
1159
1160static __inline int
1161pf_state_key_addr_setup(struct pf_pdesc *pd, void *arg, int sidx,
1162 struct pf_addr *saddr, int didx, struct pf_addr *daddr, int af, int multi)
1163{
1164 struct pf_state_key_cmp *key = arg;
1165#ifdef INET6
1166 struct pf_addr *target;
1167
1168 if (af == AF_INET || pd->proto != IPPROTO_ICMPV6)
1169 goto copy;
1170
1171 switch (pd->hdr.icmp6.icmp6_type) {
1172 case ND_NEIGHBOR_SOLICIT:
1173 if (multi)
1174 return (-1);
1175 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
1176 daddr = target;
1177 break;
1178 case ND_NEIGHBOR_ADVERT:
1179 if (multi)
1180 return (-1);
1181 target = (struct pf_addr *)&pd->hdr.nd_ns.nd_ns_target;
1182 saddr = target;
1183 if (IN6_IS_ADDR_MULTICAST(&pd->dst->v6)) {
1184 key->addr[didx].addr32[0] = 0;
1185 key->addr[didx].addr32[1] = 0;
1186 key->addr[didx].addr32[2] = 0;
1187 key->addr[didx].addr32[3] = 0;
1188 daddr = NULL; /* overwritten */
1189 }
1190 break;
1191 default:
1192 if (multi) {
1193 key->addr[sidx].addr32[0] = __IPV6_ADDR_INT32_MLL;
1194 key->addr[sidx].addr32[1] = 0;
1195 key->addr[sidx].addr32[2] = 0;
1196 key->addr[sidx].addr32[3] = __IPV6_ADDR_INT32_ONE;
1197 saddr = NULL; /* overwritten */
1198 }
1199 }
1200 copy:
1201#endif /* INET6 */
1202 if (saddr)
1203 pf_addrcpy(&key->addr[sidx], saddr, af);
1204 if (daddr)
1205 pf_addrcpy(&key->addr[didx], daddr, af);
1206
1207 return (0);
1208}
1209
1210int
1211pf_state_key_setup(struct pf_pdesc *pd, struct pf_state_key **skw,
1212 struct pf_state_key **sks, int rtableid)
1213{
1214 /* if returning error we MUST pool_put state keys ourselves */
1215 struct pf_state_key *sk1, *sk2;
1216 u_int wrdom = pd->rdomain;
1217 int afto = pd->af != pd->naf;
1218
1219 if ((sk1 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL)
1220 return (ENOMEM);
1221
1222 pf_state_key_addr_setup(pd, sk1, pd->sidx, pd->src, pd->didx, pd->dst,
1223 pd->af, 0);
1224 sk1->port[pd->sidx] = pd->osport;
1225 sk1->port[pd->didx] = pd->odport;
1226 sk1->proto = pd->proto;
1227 sk1->af = pd->af;
1228 sk1->rdomain = pd->rdomain;
1229 sk1->hash = pf_pkt_hash(sk1->af, sk1->proto,
1230 &sk1->addr[0], &sk1->addr[1], sk1->port[0], sk1->port[1]);
1231 if (rtableid >= 0)
1232 wrdom = rtable_l2(rtableid);
1233
1234 if (PF_ANEQ(&pd->nsaddr, pd->src, pd->af) ||
1235 PF_ANEQ(&pd->ndaddr, pd->dst, pd->af) ||
1236 pd->nsport != pd->osport || pd->ndport != pd->odport ||
1237 wrdom != pd->rdomain || afto) { /* NAT/NAT64 */
1238 if ((sk2 = pf_alloc_state_key(PR_NOWAIT | PR_ZERO)) == NULL) {
1239 pf_state_key_unref(sk1);
1240 return (ENOMEM);
1241 }
1242 pf_state_key_addr_setup(pd, sk2, afto ? pd->didx : pd->sidx,
1243 &pd->nsaddr, afto ? pd->sidx : pd->didx, &pd->ndaddr,
1244 pd->naf, 0);
1245 sk2->port[afto ? pd->didx : pd->sidx] = pd->nsport;
1246 sk2->port[afto ? pd->sidx : pd->didx] = pd->ndport;
1247 if (afto) {
1248 switch (pd->proto) {
1249 case IPPROTO_ICMP:
1250 sk2->proto = IPPROTO_ICMPV6;
1251 break;
1252 case IPPROTO_ICMPV6:
1253 sk2->proto = IPPROTO_ICMP;
1254 break;
1255 default:
1256 sk2->proto = pd->proto;
1257 }
1258 } else
1259 sk2->proto = pd->proto;
1260 sk2->af = pd->naf;
1261 sk2->rdomain = wrdom;
1262 sk2->hash = pf_pkt_hash(sk2->af, sk2->proto,
1263 &sk2->addr[0], &sk2->addr[1], sk2->port[0], sk2->port[1]);
1264 } else
1265 sk2 = pf_state_key_ref(sk1);
1266
1267 if (pd->dir == PF_IN) {
1268 *skw = sk1;
1269 *sks = sk2;
1270 } else {
1271 *sks = sk1;
1272 *skw = sk2;
1273 }
1274
1275 if (pf_status.debug >= LOG_DEBUG) {
1276 log(LOG_DEBUG, "pf: key setup: ");
1277 pf_print_state_parts(NULL, *skw, *sks);
1278 addlog("\n");
1279 }
1280
1281 return (0);
1282}
1283
1284/*
1285 * pf_state_insert() does the following:
1286 * - links the pf_state up with pf_state_key(s).
1287 * - inserts the pf_state_keys into pf_state_tree.
1288 * - inserts the pf_state into the into pf_state_tree_id.
1289 * - tells pfsync about the state.
1290 *
1291 * pf_state_insert() owns the references to the pf_state_key structs
1292 * it is given. on failure to insert, these references are released.
1293 * on success, the caller owns a pf_state reference that allows it
1294 * to access the state keys.
1295 */
1296
1297int
1298pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skwp,
1299 struct pf_state_key **sksp, struct pf_state *st)
1300{
1301 struct pf_state_key *skw = *skwp;
1302 struct pf_state_key *sks = *sksp;
1303 int same = (skw == sks);
1304
1305 PF_ASSERT_LOCKED();
1306
1307 st->kif = kif;
1308 PF_STATE_ENTER_WRITE();
1309
1310 skw = pf_state_key_attach(skw, st, PF_SK_WIRE);
1311 if (skw == NULL) {
1312 pf_state_key_unref(sks);
1313 PF_STATE_EXIT_WRITE();
1314 return (-1);
1315 }
1316
1317 if (same) {
1318 /* pf_state_key_attach might have swapped skw */
1319 if (skw != sks) {
1320 pf_state_key_unref(sks);
1321 sks = pf_state_key_ref(skw);
1322 }
1323 st->key[PF_SK_STACK] = sks;
1324 } else if (pf_state_key_attach(sks, st, PF_SK_STACK) == NULL) {
1325 pf_state_key_detach(st, PF_SK_WIRE);
1326 PF_STATE_EXIT_WRITE();
1327 return (-1);
1328 }
1329
1330 if (st->id == 0 && st->creatorid == 0) {
1331 st->id = htobe64(pf_status.stateid++);
1332 st->creatorid = pf_status.hostid;
1333 }
1334 if (RBT_INSERT(pf_state_tree_id, &tree_id, st) != NULL) {
1335 if (pf_status.debug >= LOG_NOTICE) {
1336 log(LOG_NOTICE, "pf: state insert failed: "
1337 "id: %016llx creatorid: %08x",
1338 betoh64(st->id), ntohl(st->creatorid));
1339 addlog("\n");
1340 }
1341 pf_detach_state(st);
1342 PF_STATE_EXIT_WRITE();
1343 return (-1);
1344 }
1345 pf_state_list_insert(&pf_state_list, st);
1346 counters_inc(pf_status_fcounters, FCNT_STATE_INSERT);
1347 pf_status.states++;
1348 pfi_kif_ref(kif, PFI_KIF_REF_STATE);
1349 PF_STATE_EXIT_WRITE();
1350
1351#if NPFSYNC > 0
1352 pfsync_insert_state(st);
1353#endif /* NPFSYNC > 0 */
1354
1355 *skwp = skw;
1356 *sksp = sks;
1357
1358 return (0);
1359}
1360
1361struct pf_state *
1362pf_find_state_byid(struct pf_state_cmp *key)
1363{
1364 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH);
1365
1366 return (RBT_FIND(pf_state_tree_id, &tree_id, (struct pf_state *)key));
1367}
1368
1369int
1370pf_compare_state_keys(struct pf_state_key *a, struct pf_state_key *b,
1371 struct pfi_kif *kif, u_int dir)
1372{
1373 /* a (from hdr) and b (new) must be exact opposites of each other */
1374 if (a->af == b->af && a->proto == b->proto &&
1375 PF_AEQ(&a->addr[0], &b->addr[1], a->af) &&
1376 PF_AEQ(&a->addr[1], &b->addr[0], a->af) &&
1377 a->port[0] == b->port[1] &&
1378 a->port[1] == b->port[0] && a->rdomain == b->rdomain)
1379 return (0);
1380 else {
1381 /* mismatch. must not happen. */
1382 if (pf_status.debug >= LOG_ERR) {
1383 log(LOG_ERR,
1384 "pf: state key linking mismatch! dir=%s, "
1385 "if=%s, stored af=%u, a0: ",
1386 dir == PF_OUT ? "OUT" : "IN",
1387 kif->pfik_name, a->af);
1388 pf_print_host(&a->addr[0], a->port[0], a->af);
1389 addlog(", a1: ");
1390 pf_print_host(&a->addr[1], a->port[1], a->af);
1391 addlog(", proto=%u", a->proto);
1392 addlog(", found af=%u, a0: ", b->af);
1393 pf_print_host(&b->addr[0], b->port[0], b->af);
1394 addlog(", a1: ");
1395 pf_print_host(&b->addr[1], b->port[1], b->af);
1396 addlog(", proto=%u", b->proto);
1397 addlog("\n");
1398 }
1399 return (-1);
1400 }
1401}
1402
1403int
1404pf_find_state(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
1405 struct pf_state **stp)
1406{
1407 struct pf_state_key *sk, *pkt_sk;
1408 struct pf_state_item *si;
1409 struct pf_state *st = NULL;
1410 int didx;
1411
1412 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH);
1413 if (pf_status.debug >= LOG_DEBUG) {
1414 log(LOG_DEBUG, "pf: key search, %s on %s: ",
1415 pd->dir == PF_OUT ? "out" : "in", pd->kif->pfik_name);
1416 pf_print_state_parts(NULL, (struct pf_state_key *)key, NULL);
1417 addlog("\n");
1418 }
1419
1420 pkt_sk = NULL;
1421 sk = NULL;
1422 if (pd->dir == PF_OUT) {
1423 /* first if block deals with outbound forwarded packet */
1424 pkt_sk = pd->m->m_pkthdr.pf.statekey;
1425
1426 if (!pf_state_key_isvalid(pkt_sk)) {
1427 pf_mbuf_unlink_state_key(pd->m);
1428 pkt_sk = NULL;
1429 }
1430
1431 if (pkt_sk && pf_state_key_isvalid(pkt_sk->sk_reverse))
1432 sk = pkt_sk->sk_reverse;
1433
1434 if (pkt_sk == NULL) {
1435 struct inpcb *inp = pd->m->m_pkthdr.pf.inp;
1436
1437 /* here we deal with local outbound packet */
1438 if (inp != NULL) {
1439 struct pf_state_key *inp_sk;
1440
1441 mtx_enter(&pf_inp_mtx);
1442 inp_sk = inp->inp_pf_sk;
1443 if (pf_state_key_isvalid(inp_sk)) {
1444 sk = inp_sk;
1445 mtx_leave(&pf_inp_mtx);
1446 } else if (inp_sk != NULL) {
1447 KASSERT(inp_sk->sk_inp == inp);
1448 inp_sk->sk_inp = NULL;
1449 inp->inp_pf_sk = NULL;
1450 mtx_leave(&pf_inp_mtx);
1451
1452 pf_state_key_unref(inp_sk);
1453 in_pcbunref(inp);
1454 } else
1455 mtx_leave(&pf_inp_mtx);
1456 }
1457 }
1458 }
1459
1460 if (sk == NULL) {
1461 if ((sk = RBT_FIND(pf_state_tree, &pf_statetbl,
1462 (struct pf_state_key *)key)) == NULL)
1463 return (PF_DROP);
1464 if (pd->dir == PF_OUT && pkt_sk &&
1465 pf_compare_state_keys(pkt_sk, sk, pd->kif, pd->dir) == 0)
1466 pf_state_key_link_reverse(sk, pkt_sk);
1467 else if (pd->dir == PF_OUT)
1468 pf_state_key_link_inpcb(sk, pd->m->m_pkthdr.pf.inp);
1469 }
1470
1471 /* remove firewall data from outbound packet */
1472 if (pd->dir == PF_OUT)
1473 pf_pkt_addr_changed(pd->m);
1474
1475 didx = (pd->dir == PF_IN) ? PF_SK_WIRE : PF_SK_STACK;
1476
1477 /* list is sorted, if-bound states before floating ones */
1478 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1479 struct pf_state *sist = si->si_st;
1480 if (sist->timeout == PFTM_PURGE)
1481 continue;
1482 if (sist->kif != pfi_all && sist->kif != pd->kif)
1483 continue;
1484
1485 /* af-to needs to handled specially */
1486 if (sist->key[PF_SK_WIRE]->af == sist->key[PF_SK_STACK]->af) {
1487 if (sk != sist->key[didx])
1488 continue;
1489
1490 /* af-to case */
1491 } else {
1492 /*
1493 * af-to creates state for incoming (PF_IN)
1494 * connections, and then forces forwarding without
1495 * creating an outgoing state. this means the one
1496 * state covers both sides of the stack, so should
1497 * only match when pd dir is PF_IN.
1498 */
1499 if (pd->dir != PF_IN)
1500 continue;
1501
1502 /* one of the st keys has to be sk */
1503 }
1504
1505 st = sist;
1506 break;
1507 }
1508
1509 if (st == NULL)
1510 return (PF_DROP);
1511 if (ISSET(st->state_flags, PFSTATE_INP_UNLINKED))
1512 return (PF_DROP);
1513
1514 if (st->rule.ptr->pktrate.limit && pd->dir == st->direction) {
1515 pf_add_threshold(&st->rule.ptr->pktrate);
1516 if (pf_check_threshold(&st->rule.ptr->pktrate))
1517 return (PF_DROP);
1518 }
1519
1520 *stp = st;
1521
1522 return (PF_MATCH);
1523}
1524
1525struct pf_state *
1526pf_find_state_all(struct pf_state_key_cmp *key, u_int dir, int *more)
1527{
1528 struct pf_state_key *sk;
1529 struct pf_state_item *si, *ret = NULL;
1530
1531 counters_inc(pf_status_fcounters, FCNT_STATE_SEARCH);
1532
1533 sk = RBT_FIND(pf_state_tree, &pf_statetbl, (struct pf_state_key *)key);
1534
1535 if (sk != NULL) {
1536 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
1537 struct pf_state *sist = si->si_st;
1538 if (dir == PF_INOUT ||
1539 (sk == (dir == PF_IN ? sist->key[PF_SK_WIRE] :
1540 sist->key[PF_SK_STACK]))) {
1541 if (more == NULL)
1542 return (sist);
1543
1544 if (ret)
1545 (*more)++;
1546 else
1547 ret = si;
1548 }
1549 }
1550 }
1551 return (ret ? ret->si_st : NULL);
1552}
1553
1554void
1555pf_state_peer_hton(const struct pf_state_peer *s, struct pfsync_state_peer *d)
1556{
1557 d->seqlo = htonl(s->seqlo);
1558 d->seqhi = htonl(s->seqhi);
1559 d->seqdiff = htonl(s->seqdiff);
1560 d->max_win = htons(s->max_win);
1561 d->mss = htons(s->mss);
1562 d->state = s->state;
1563 d->wscale = s->wscale;
1564 if (s->scrub) {
1565 d->scrub.pfss_flags =
1566 htons(s->scrub->pfss_flags & PFSS_TIMESTAMP);
1567 d->scrub.pfss_ttl = (s)->scrub->pfss_ttl;
1568 d->scrub.pfss_ts_mod = htonl((s)->scrub->pfss_ts_mod);
1569 d->scrub.scrub_flag = PFSYNC_SCRUB_FLAG_VALID;
1570 }
1571}
1572
1573void
1574pf_state_peer_ntoh(const struct pfsync_state_peer *s, struct pf_state_peer *d)
1575{
1576 d->seqlo = ntohl(s->seqlo);
1577 d->seqhi = ntohl(s->seqhi);
1578 d->seqdiff = ntohl(s->seqdiff);
1579 d->max_win = ntohs(s->max_win);
1580 d->mss = ntohs(s->mss);
1581 d->state = s->state;
1582 d->wscale = s->wscale;
1583 if (s->scrub.scrub_flag == PFSYNC_SCRUB_FLAG_VALID &&
1584 d->scrub != NULL) {
1585 d->scrub->pfss_flags =
1586 ntohs(s->scrub.pfss_flags) & PFSS_TIMESTAMP;
1587 d->scrub->pfss_ttl = s->scrub.pfss_ttl;
1588 d->scrub->pfss_ts_mod = ntohl(s->scrub.pfss_ts_mod);
1589 }
1590}
1591
1592void
1593pf_state_export(struct pfsync_state *sp, struct pf_state *st)
1594{
1595 int32_t expire;
1596
1597 memset(sp, 0, sizeof(struct pfsync_state));
1598
1599 /* copy from state key */
1600 sp->key[PF_SK_WIRE].addr[0] = st->key[PF_SK_WIRE]->addr[0];
1601 sp->key[PF_SK_WIRE].addr[1] = st->key[PF_SK_WIRE]->addr[1];
1602 sp->key[PF_SK_WIRE].port[0] = st->key[PF_SK_WIRE]->port[0];
1603 sp->key[PF_SK_WIRE].port[1] = st->key[PF_SK_WIRE]->port[1];
1604 sp->key[PF_SK_WIRE].rdomain = htons(st->key[PF_SK_WIRE]->rdomain);
1605 sp->key[PF_SK_WIRE].af = st->key[PF_SK_WIRE]->af;
1606 sp->key[PF_SK_STACK].addr[0] = st->key[PF_SK_STACK]->addr[0];
1607 sp->key[PF_SK_STACK].addr[1] = st->key[PF_SK_STACK]->addr[1];
1608 sp->key[PF_SK_STACK].port[0] = st->key[PF_SK_STACK]->port[0];
1609 sp->key[PF_SK_STACK].port[1] = st->key[PF_SK_STACK]->port[1];
1610 sp->key[PF_SK_STACK].rdomain = htons(st->key[PF_SK_STACK]->rdomain);
1611 sp->key[PF_SK_STACK].af = st->key[PF_SK_STACK]->af;
1612 sp->rtableid[PF_SK_WIRE] = htonl(st->rtableid[PF_SK_WIRE]);
1613 sp->rtableid[PF_SK_STACK] = htonl(st->rtableid[PF_SK_STACK]);
1614 sp->proto = st->key[PF_SK_WIRE]->proto;
1615 sp->af = st->key[PF_SK_WIRE]->af;
1616
1617 /* copy from state */
1618 strlcpy(sp->ifname, st->kif->pfik_name, sizeof(sp->ifname));
1619 sp->rt = st->rt;
1620 sp->rt_addr = st->rt_addr;
1621 sp->creation = htonl(getuptime() - st->creation);
1622 expire = pf_state_expires(st, st->timeout);
1623 if (expire <= getuptime())
1624 sp->expire = htonl(0);
1625 else
1626 sp->expire = htonl(expire - getuptime());
1627
1628 sp->direction = st->direction;
1629#if NPFLOG > 0
1630 sp->log = st->log;
1631#endif /* NPFLOG > 0 */
1632 sp->timeout = st->timeout;
1633 sp->state_flags = htons(st->state_flags);
1634 if (READ_ONCE(st->sync_defer) != NULL)
1635 sp->state_flags |= htons(PFSTATE_ACK);
1636 if (!SLIST_EMPTY(&st->src_nodes))
1637 sp->sync_flags |= PFSYNC_FLAG_SRCNODE;
1638
1639 sp->id = st->id;
1640 sp->creatorid = st->creatorid;
1641 pf_state_peer_hton(&st->src, &sp->src);
1642 pf_state_peer_hton(&st->dst, &sp->dst);
1643
1644 if (st->rule.ptr == NULL)
1645 sp->rule = htonl(-1);
1646 else
1647 sp->rule = htonl(st->rule.ptr->nr);
1648 if (st->anchor.ptr == NULL)
1649 sp->anchor = htonl(-1);
1650 else
1651 sp->anchor = htonl(st->anchor.ptr->nr);
1652 sp->nat_rule = htonl(-1); /* left for compat, nat_rule is gone */
1653
1654 pf_state_counter_hton(st->packets[0], sp->packets[0]);
1655 pf_state_counter_hton(st->packets[1], sp->packets[1]);
1656 pf_state_counter_hton(st->bytes[0], sp->bytes[0]);
1657 pf_state_counter_hton(st->bytes[1], sp->bytes[1]);
1658
1659 sp->max_mss = htons(st->max_mss);
1660 sp->min_ttl = st->min_ttl;
1661 sp->set_tos = st->set_tos;
1662 sp->set_prio[0] = st->set_prio[0];
1663 sp->set_prio[1] = st->set_prio[1];
1664}
1665
1666int
1667pf_state_alloc_scrub_memory(const struct pfsync_state_peer *s,
1668 struct pf_state_peer *d)
1669{
1670 if (s->scrub.scrub_flag && d->scrub == NULL)
1671 return (pf_normalize_tcp_alloc(d));
1672
1673 return (0);
1674}
1675
1676#if NPFSYNC > 0
1677int
1678pf_state_import(const struct pfsync_state *sp, int flags)
1679{
1680 struct pf_state *st = NULL;
1681 struct pf_state_key *skw = NULL, *sks = NULL;
1682 struct pf_rule *r = NULL;
1683 struct pfi_kif *kif;
1684 int pool_flags;
1685 int error = ENOMEM;
1686 int n = 0;
1687
1688 PF_ASSERT_LOCKED();
1689
1690 if (sp->creatorid == 0) {
1691 DPFPRINTF(LOG_NOTICE, "%s: invalid creator id: %08x", __func__,
1692 ntohl(sp->creatorid));
1693 return (EINVAL);
1694 }
1695
1696 if ((kif = pfi_kif_get(sp->ifname, NULL)) == NULL) {
1697 DPFPRINTF(LOG_NOTICE, "%s: unknown interface: %s", __func__,
1698 sp->ifname);
1699 if (flags & PFSYNC_SI_IOCTL)
1700 return (EINVAL);
1701 return (0); /* skip this state */
1702 }
1703
1704 if (sp->af == 0)
1705 return (0); /* skip this state */
1706
1707 /*
1708 * If the ruleset checksums match or the state is coming from the ioctl,
1709 * it's safe to associate the state with the rule of that number.
1710 */
1711 if (sp->rule != htonl(-1) && sp->anchor == htonl(-1) &&
1712 (flags & (PFSYNC_SI_IOCTL | PFSYNC_SI_CKSUM)) &&
1713 ntohl(sp->rule) < pf_main_ruleset.rules.active.rcount) {
1714 TAILQ_FOREACH(r, pf_main_ruleset.rules.active.ptr, entries)
1715 if (ntohl(sp->rule) == n++)
1716 break;
1717 } else
1718 r = &pf_default_rule;
1719
1720 if ((r->max_states && r->states_cur >= r->max_states))
1721 goto cleanup;
1722
1723 if (flags & PFSYNC_SI_IOCTL)
1724 pool_flags = PR_WAITOK | PR_LIMITFAIL | PR_ZERO;
1725 else
1726 pool_flags = PR_NOWAIT | PR_LIMITFAIL | PR_ZERO;
1727
1728 if ((st = pool_get(&pf_state_pl, pool_flags)) == NULL)
1729 goto cleanup;
1730
1731 if ((skw = pf_alloc_state_key(pool_flags)) == NULL)
1732 goto cleanup;
1733
1734 if ((sp->key[PF_SK_WIRE].af &&
1735 (sp->key[PF_SK_WIRE].af != sp->key[PF_SK_STACK].af)) ||
1736 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[0],
1737 &sp->key[PF_SK_STACK].addr[0], sp->af) ||
1738 PF_ANEQ(&sp->key[PF_SK_WIRE].addr[1],
1739 &sp->key[PF_SK_STACK].addr[1], sp->af) ||
1740 sp->key[PF_SK_WIRE].port[0] != sp->key[PF_SK_STACK].port[0] ||
1741 sp->key[PF_SK_WIRE].port[1] != sp->key[PF_SK_STACK].port[1] ||
1742 sp->key[PF_SK_WIRE].rdomain != sp->key[PF_SK_STACK].rdomain) {
1743 if ((sks = pf_alloc_state_key(pool_flags)) == NULL)
1744 goto cleanup;
1745 } else
1746 sks = pf_state_key_ref(skw);
1747
1748 /* allocate memory for scrub info */
1749 if (pf_state_alloc_scrub_memory(&sp->src, &st->src) ||
1750 pf_state_alloc_scrub_memory(&sp->dst, &st->dst))
1751 goto cleanup;
1752
1753 /* copy to state key(s) */
1754 skw->addr[0] = sp->key[PF_SK_WIRE].addr[0];
1755 skw->addr[1] = sp->key[PF_SK_WIRE].addr[1];
1756 skw->port[0] = sp->key[PF_SK_WIRE].port[0];
1757 skw->port[1] = sp->key[PF_SK_WIRE].port[1];
1758 skw->rdomain = ntohs(sp->key[PF_SK_WIRE].rdomain);
1759 skw->proto = sp->proto;
1760 if (!(skw->af = sp->key[PF_SK_WIRE].af))
1761 skw->af = sp->af;
1762 skw->hash = pf_pkt_hash(skw->af, skw->proto,
1763 &skw->addr[0], &skw->addr[1], skw->port[0], skw->port[1]);
1764
1765 if (sks != skw) {
1766 sks->addr[0] = sp->key[PF_SK_STACK].addr[0];
1767 sks->addr[1] = sp->key[PF_SK_STACK].addr[1];
1768 sks->port[0] = sp->key[PF_SK_STACK].port[0];
1769 sks->port[1] = sp->key[PF_SK_STACK].port[1];
1770 sks->rdomain = ntohs(sp->key[PF_SK_STACK].rdomain);
1771 if (!(sks->af = sp->key[PF_SK_STACK].af))
1772 sks->af = sp->af;
1773 if (sks->af != skw->af) {
1774 switch (sp->proto) {
1775 case IPPROTO_ICMP:
1776 sks->proto = IPPROTO_ICMPV6;
1777 break;
1778 case IPPROTO_ICMPV6:
1779 sks->proto = IPPROTO_ICMP;
1780 break;
1781 default:
1782 sks->proto = sp->proto;
1783 }
1784 } else
1785 sks->proto = sp->proto;
1786
1787 if (((sks->af != AF_INET) && (sks->af != AF_INET6)) ||
1788 ((skw->af != AF_INET) && (skw->af != AF_INET6))) {
1789 error = EINVAL;
1790 goto cleanup;
1791 }
1792
1793 sks->hash = pf_pkt_hash(sks->af, sks->proto,
1794 &sks->addr[0], &sks->addr[1], sks->port[0], sks->port[1]);
1795
1796 } else if ((sks->af != AF_INET) && (sks->af != AF_INET6)) {
1797 error = EINVAL;
1798 goto cleanup;
1799 }
1800 st->rtableid[PF_SK_WIRE] = ntohl(sp->rtableid[PF_SK_WIRE]);
1801 st->rtableid[PF_SK_STACK] = ntohl(sp->rtableid[PF_SK_STACK]);
1802
1803 /* copy to state */
1804 st->rt_addr = sp->rt_addr;
1805 st->rt = sp->rt;
1806 st->creation = getuptime() - ntohl(sp->creation);
1807 st->expire = getuptime();
1808 if (ntohl(sp->expire)) {
1809 u_int32_t timeout;
1810
1811 timeout = r->timeout[sp->timeout];
1812 if (!timeout)
1813 timeout = pf_default_rule.timeout[sp->timeout];
1814
1815 /* sp->expire may have been adaptively scaled by export. */
1816 st->expire -= timeout - ntohl(sp->expire);
1817 }
1818
1819 st->direction = sp->direction;
1820 st->log = sp->log;
1821 st->timeout = sp->timeout;
1822 st->state_flags = ntohs(sp->state_flags);
1823 st->max_mss = ntohs(sp->max_mss);
1824 st->min_ttl = sp->min_ttl;
1825 st->set_tos = sp->set_tos;
1826 st->set_prio[0] = sp->set_prio[0];
1827 st->set_prio[1] = sp->set_prio[1];
1828
1829 st->id = sp->id;
1830 st->creatorid = sp->creatorid;
1831 pf_state_peer_ntoh(&sp->src, &st->src);
1832 pf_state_peer_ntoh(&sp->dst, &st->dst);
1833
1834 st->rule.ptr = r;
1835 st->anchor.ptr = NULL;
1836
1837 PF_REF_INIT(st->refcnt);
1838 mtx_init(&st->mtx, IPL_NET);
1839
1840 /* XXX when we have anchors, use STATE_INC_COUNTERS */
1841 r->states_cur++;
1842 r->states_tot++;
1843
1844 st->sync_state = PFSYNC_S_NONE;
1845 st->pfsync_time = getuptime();
1846#if NPFSYNC > 0
1847 pfsync_init_state(st, skw, sks, flags);
1848#endif
1849
1850 if (pf_state_insert(kif, &skw, &sks, st) != 0) {
1851 /* XXX when we have anchors, use STATE_DEC_COUNTERS */
1852 r->states_cur--;
1853 error = EEXIST;
1854 goto cleanup_state;
1855 }
1856
1857 return (0);
1858
1859 cleanup:
1860 if (skw != NULL)
1861 pf_state_key_unref(skw);
1862 if (sks != NULL)
1863 pf_state_key_unref(sks);
1864
1865 cleanup_state: /* pf_state_insert frees the state keys */
1866 if (st) {
1867 if (st->dst.scrub)
1868 pool_put(&pf_state_scrub_pl, st->dst.scrub);
1869 if (st->src.scrub)
1870 pool_put(&pf_state_scrub_pl, st->src.scrub);
1871 pool_put(&pf_state_pl, st);
1872 }
1873 return (error);
1874}
1875#endif /* NPFSYNC > 0 */
1876
1877/* END state table stuff */
1878
1879void pf_purge_states(void *);
1880struct task pf_purge_states_task =
1881 TASK_INITIALIZER(pf_purge_states, NULL);
1882
1883void pf_purge_states_tick(void *);
1884struct timeout pf_purge_states_to =
1885 TIMEOUT_INITIALIZER(pf_purge_states_tick, NULL);
1886
1887unsigned int pf_purge_expired_states(unsigned int, unsigned int);
1888
1889/*
1890 * how many states to scan this interval.
1891 *
1892 * this is set when the timeout fires, and reduced by the task. the
1893 * task will reschedule itself until the limit is reduced to zero,
1894 * and then it adds the timeout again.
1895 */
1896unsigned int pf_purge_states_limit;
1897
1898/*
1899 * limit how many states are processed with locks held per run of
1900 * the state purge task.
1901 */
1902unsigned int pf_purge_states_collect = 64;
1903
1904 void
1905pf_purge_states_tick(void *null)
1906 {
1907 unsigned int limit = pf_status.states;
1908 unsigned int interval = pf_default_rule.timeout[PFTM_INTERVAL];
1909
1910 if (limit == 0) {
1911 timeout_add_sec(&pf_purge_states_to, 1);
1912 return;
1913 }
1914
1915 /*
1916 * process a fraction of the state table every second
1917 */
1918
1919 if (interval > 1)
1920 limit /= interval;
1921
1922 pf_purge_states_limit = limit;
1923 task_add(systqmp, &pf_purge_states_task);
1924}
1925
1926void
1927pf_purge_states(void *null)
1928{
1929 unsigned int limit;
1930 unsigned int scanned;
1931
1932 limit = pf_purge_states_limit;
1933 if (limit < pf_purge_states_collect)
1934 limit = pf_purge_states_collect;
1935
1936 scanned = pf_purge_expired_states(limit, pf_purge_states_collect);
1937 if (scanned >= pf_purge_states_limit) {
1938 /* we've run out of states to scan this "interval" */
1939 timeout_add_sec(&pf_purge_states_to, 1);
1940 return;
1941 }
1942
1943 pf_purge_states_limit -= scanned;
1944 task_add(systqmp, &pf_purge_states_task);
1945}
1946
1947void pf_purge_tick(void *);
1948struct timeout pf_purge_to =
1949 TIMEOUT_INITIALIZER(pf_purge_tick, NULL);
1950
1951void pf_purge(void *);
1952struct task pf_purge_task =
1953 TASK_INITIALIZER(pf_purge, NULL);
1954
1955void
1956pf_purge_tick(void *null)
1957{
1958 task_add(systqmp, &pf_purge_task);
1959}
1960
1961void
1962pf_purge(void *null)
1963{
1964 unsigned int interval = max(1, pf_default_rule.timeout[PFTM_INTERVAL]);
1965
1966 PF_LOCK();
1967
1968 pf_purge_expired_src_nodes();
1969 pf_source_purge();
1970
1971 PF_UNLOCK();
1972
1973 /*
1974 * Fragments don't require PF_LOCK(), they use their own lock.
1975 */
1976 pf_purge_expired_fragments();
1977
1978 /* interpret the interval as idle time between runs */
1979 timeout_add_sec(&pf_purge_to, interval);
1980}
1981
1982int32_t
1983pf_state_expires(const struct pf_state *st, uint8_t stimeout)
1984{
1985 u_int32_t timeout;
1986 u_int32_t start;
1987 u_int32_t end;
1988 u_int32_t states;
1989
1990 /*
1991 * pf_state_expires is used by the state purge task to
1992 * decide if a state is a candidate for cleanup, and by the
1993 * pfsync state export code to populate an expiry time.
1994 *
1995 * this function may be called by the state purge task while
1996 * the state is being modified. avoid inconsistent reads of
1997 * state->timeout by having the caller do the read (and any
1998 * checks it needs to do on the same variable) and then pass
1999 * their view of the timeout in here for this function to use.
2000 * the only consequence of using a stale timeout value is
2001 * that the state won't be a candidate for purging until the
2002 * next pass of the purge task.
2003 */
2004
2005 /* handle all PFTM_* >= PFTM_MAX here */
2006 if (stimeout >= PFTM_MAX)
2007 return (0);
2008
2009 KASSERT(stimeout < PFTM_MAX);
2010
2011 timeout = st->rule.ptr->timeout[stimeout];
2012 if (!timeout)
2013 timeout = pf_default_rule.timeout[stimeout];
2014
2015 start = st->rule.ptr->timeout[PFTM_ADAPTIVE_START];
2016 if (start) {
2017 end = st->rule.ptr->timeout[PFTM_ADAPTIVE_END];
2018 states = st->rule.ptr->states_cur;
2019 } else {
2020 start = pf_default_rule.timeout[PFTM_ADAPTIVE_START];
2021 end = pf_default_rule.timeout[PFTM_ADAPTIVE_END];
2022 states = pf_status.states;
2023 }
2024 if (end && states > start && start < end) {
2025 if (states >= end)
2026 return (0);
2027
2028 timeout = (u_int64_t)timeout * (end - states) / (end - start);
2029 }
2030
2031 return (st->expire + timeout);
2032}
2033
2034void
2035pf_purge_expired_src_nodes(void)
2036{
2037 struct pf_src_node *cur, *next;
2038
2039 PF_ASSERT_LOCKED();
2040
2041 RB_FOREACH_SAFE(cur, pf_src_tree, &tree_src_tracking, next) {
2042 if (cur->states == 0 && cur->expire <= getuptime()) {
2043 pf_remove_src_node(cur);
2044 }
2045 }
2046}
2047
2048void
2049pf_src_tree_remove_state(struct pf_state *st)
2050{
2051 u_int32_t timeout;
2052 struct pf_sn_item *sni;
2053
2054 while ((sni = SLIST_FIRST(&st->src_nodes)) != NULL) {
2055 SLIST_REMOVE_HEAD(&st->src_nodes, next);
2056 if (st->src.tcp_est)
2057 --sni->sn->conn;
2058 if (--sni->sn->states == 0) {
2059 timeout = st->rule.ptr->timeout[PFTM_SRC_NODE];
2060 if (!timeout)
2061 timeout =
2062 pf_default_rule.timeout[PFTM_SRC_NODE];
2063 sni->sn->expire = getuptime() + timeout;
2064 }
2065 pool_put(&pf_sn_item_pl, sni);
2066 }
2067}
2068
2069void
2070pf_remove_state(struct pf_state *st)
2071{
2072 struct pf_state_link *pfl;
2073
2074 PF_ASSERT_LOCKED();
2075
2076 mtx_enter(&st->mtx);
2077 if (st->timeout == PFTM_UNLINKED) {
2078 mtx_leave(&st->mtx);
2079 return;
2080 }
2081 st->timeout = PFTM_UNLINKED;
2082 mtx_leave(&st->mtx);
2083
2084 /* handle load balancing related tasks */
2085 pf_postprocess_addr(st);
2086
2087 if (st->src.state == PF_TCPS_PROXY_DST) {
2088 pf_send_tcp(st->rule.ptr, st->key[PF_SK_WIRE]->af,
2089 &st->key[PF_SK_WIRE]->addr[1],
2090 &st->key[PF_SK_WIRE]->addr[0],
2091 st->key[PF_SK_WIRE]->port[1],
2092 st->key[PF_SK_WIRE]->port[0],
2093 st->src.seqhi, st->src.seqlo + 1,
2094 TH_RST|TH_ACK, 0, 0, 0, 1, st->tag,
2095 st->key[PF_SK_WIRE]->rdomain, NULL);
2096 }
2097 if (st->key[PF_SK_STACK]->proto == IPPROTO_TCP)
2098 pf_set_protostate(st, PF_PEER_BOTH, TCPS_CLOSED);
2099
2100 while ((pfl = SLIST_FIRST(&st->linkage)) != NULL) {
2101 struct pf_state_link_list *list;
2102 unsigned int gen;
2103
2104 SLIST_REMOVE_HEAD(&st->linkage, pfl_linkage);
2105
2106 switch (pfl->pfl_type) {
2107 case PF_STATE_LINK_TYPE_STATELIM: {
2108 struct pf_statelim *stlim;
2109
2110 stlim = pf_statelim_find(st->statelim);
2111 KASSERTMSG(stlim != NULL,
2112 "pf_state %p pfl %p cannot find statelim %u",
2113 st, pfl, st->statelim);
2114
2115 gen = pf_statelim_enter(stlim);
2116 stlim->pfstlim_inuse--;
2117 pf_statelim_leave(stlim, gen);
2118
2119 list = &stlim->pfstlim_states;
2120 break;
2121 }
2122 case PF_STATE_LINK_TYPE_SOURCELIM: {
2123 struct pf_sourcelim *srlim;
2124 struct pf_source key, *sr;
2125
2126 srlim = pf_sourcelim_find(st->sourcelim);
2127 KASSERTMSG(srlim != NULL,
2128 "pf_state %p pfl %p cannot find sourcelim %u",
2129 st, pfl, st->sourcelim);
2130
2131 pf_source_key(srlim, &key,
2132 st->key[PF_SK_WIRE]->af,
2133 st->key[PF_SK_WIRE]->rdomain,
2134 &st->key[PF_SK_WIRE]->addr[0 /* XXX or 1? */]);
2135
2136 sr = pf_source_find(srlim, &key);
2137 KASSERTMSG(sr != NULL,
2138 "pf_state %p pfl %p cannot find source in %u",
2139 st, pfl, st->sourcelim);
2140
2141 gen = pf_sourcelim_enter(srlim);
2142 srlim->pfsrlim_counters.inuse--;
2143 pf_sourcelim_leave(srlim, gen);
2144 pf_source_rele(sr);
2145
2146 list = &sr->pfsr_states;
2147 break;
2148 }
2149 default:
2150 panic("%s: unexpected link type on pfl %p",
2151 __func__, pfl);
2152 }
2153
2154 PF_STATE_ASSERT_LOCKED();
2155 TAILQ_REMOVE(list, pfl, pfl_link);
2156 pool_put(&pf_state_link_pl, pfl);
2157 }
2158
2159 RBT_REMOVE(pf_state_tree_id, &tree_id, st);
2160#if NPFLOW > 0
2161 if (st->state_flags & PFSTATE_PFLOW)
2162 export_pflow(st);
2163#endif /* NPFLOW > 0 */
2164#if NPFSYNC > 0
2165 pfsync_delete_state(st);
2166#endif /* NPFSYNC > 0 */
2167 pf_src_tree_remove_state(st);
2168 pf_detach_state(st);
2169}
2170
2171void
2172pf_remove_divert_state(struct inpcb *inp)
2173{
2174 struct pf_state_key *sk;
2175 struct pf_state_item *si;
2176
2177 PF_ASSERT_UNLOCKED();
2178
2179 if (READ_ONCE(inp->inp_pf_sk) == NULL)
2180 return;
2181
2182 mtx_enter(&pf_inp_mtx);
2183 sk = pf_state_key_ref(inp->inp_pf_sk);
2184 mtx_leave(&pf_inp_mtx);
2185 if (sk == NULL)
2186 return;
2187
2188 PF_LOCK();
2189 PF_STATE_ENTER_WRITE();
2190 TAILQ_FOREACH(si, &sk->sk_states, si_entry) {
2191 struct pf_state *sist = si->si_st;
2192 if (sk == sist->key[PF_SK_STACK] && sist->rule.ptr &&
2193 (sist->rule.ptr->divert.type == PF_DIVERT_TO ||
2194 sist->rule.ptr->divert.type == PF_DIVERT_REPLY)) {
2195 if (sist->key[PF_SK_STACK]->proto == IPPROTO_TCP &&
2196 sist->key[PF_SK_WIRE] != sist->key[PF_SK_STACK]) {
2197 /*
2198 * If the local address is translated, keep
2199 * the state for "tcp.closed" seconds to
2200 * prevent its source port from being reused.
2201 */
2202 if (sist->src.state < TCPS_FIN_WAIT_2 ||
2203 sist->dst.state < TCPS_FIN_WAIT_2) {
2204 pf_set_protostate(sist, PF_PEER_BOTH,
2205 TCPS_TIME_WAIT);
2206 pf_update_state_timeout(sist,
2207 PFTM_TCP_CLOSED);
2208 sist->expire = getuptime();
2209 }
2210 sist->state_flags |= PFSTATE_INP_UNLINKED;
2211 } else
2212 pf_remove_state(sist);
2213 break;
2214 }
2215 }
2216 PF_STATE_EXIT_WRITE();
2217 PF_UNLOCK();
2218
2219 pf_state_key_unref(sk);
2220}
2221
2222void
2223pf_free_state(struct pf_state *st)
2224{
2225 struct pf_rule_item *ri;
2226
2227 PF_ASSERT_LOCKED();
2228
2229#if NPFSYNC > 0
2230 if (pfsync_state_in_use(st))
2231 return;
2232#endif /* NPFSYNC > 0 */
2233
2234 KASSERT(st->timeout == PFTM_UNLINKED);
2235 if (--st->rule.ptr->states_cur == 0 &&
2236 st->rule.ptr->src_nodes == 0)
2237 pf_rm_rule(NULL, st->rule.ptr);
2238 if (st->anchor.ptr != NULL)
2239 if (--st->anchor.ptr->states_cur == 0)
2240 pf_rm_rule(NULL, st->anchor.ptr);
2241 while ((ri = SLIST_FIRST(&st->match_rules))) {
2242 SLIST_REMOVE_HEAD(&st->match_rules, entry);
2243 if (--ri->r->states_cur == 0 &&
2244 ri->r->src_nodes == 0)
2245 pf_rm_rule(NULL, ri->r);
2246 pool_put(&pf_rule_item_pl, ri);
2247 }
2248 pf_normalize_tcp_cleanup(st);
2249 pfi_kif_unref(st->kif, PFI_KIF_REF_STATE);
2250 pf_state_list_remove(&pf_state_list, st);
2251 if (st->tag)
2252 pf_tag_unref(st->tag);
2253 pf_state_unref(st);
2254 counters_inc(pf_status_fcounters, FCNT_STATE_REMOVALS);
2255 pf_status.states--;
2256}
2257
2258unsigned int
2259pf_purge_expired_states(const unsigned int limit, const unsigned int collect)
2260{
2261 /*
2262 * this task/thread/context/whatever is the only thing that
2263 * removes states from the pf_state_list, so the cur reference
2264 * it holds between calls is guaranteed to still be in the
2265 * list.
2266 */
2267 static struct pf_state *cur = NULL;
2268
2269 struct pf_state *head, *tail;
2270 struct pf_state *st;
2271 SLIST_HEAD(pf_state_gcl, pf_state) gcl = SLIST_HEAD_INITIALIZER(gcl);
2272 time_t now;
2273 unsigned int scanned;
2274 unsigned int collected = 0;
2275
2276 PF_ASSERT_UNLOCKED();
2277
2278 rw_enter_read(&pf_state_list.pfs_rwl);
2279
2280 mtx_enter(&pf_state_list.pfs_mtx);
2281 head = TAILQ_FIRST(&pf_state_list.pfs_list);
2282 tail = TAILQ_LAST(&pf_state_list.pfs_list, pf_state_queue);
2283 mtx_leave(&pf_state_list.pfs_mtx);
2284
2285 if (head == NULL) {
2286 /* the list is empty */
2287 rw_exit_read(&pf_state_list.pfs_rwl);
2288 return (limit);
2289 }
2290
2291 /* (re)start at the front of the list */
2292 if (cur == NULL)
2293 cur = head;
2294
2295 now = getuptime();
2296
2297 for (scanned = 0; scanned < limit; scanned++) {
2298 uint8_t stimeout = cur->timeout;
2299 unsigned int limited = 0;
2300
2301 if ((stimeout == PFTM_UNLINKED) ||
2302 (pf_state_expires(cur, stimeout) <= now)) {
2303 st = pf_state_ref(cur);
2304 SLIST_INSERT_HEAD(&gcl, st, gc_list);
2305
2306 if (++collected >= collect)
2307 limited = 1;
2308 }
2309
2310 /* don't iterate past the end of our view of the list */
2311 if (cur == tail) {
2312 cur = NULL;
2313 break;
2314 }
2315
2316 cur = TAILQ_NEXT(cur, entry_list);
2317
2318 /* don't spend too much time here. */
2319 if (ISSET(READ_ONCE(curcpu()->ci_schedstate.spc_schedflags),
2320 SPCF_SHOULDYIELD) || limited)
2321 break;
2322 }
2323
2324 rw_exit_read(&pf_state_list.pfs_rwl);
2325
2326 if (SLIST_EMPTY(&gcl))
2327 return (scanned);
2328
2329 rw_enter_write(&pf_state_list.pfs_rwl);
2330 PF_LOCK();
2331 PF_STATE_ENTER_WRITE();
2332 SLIST_FOREACH(st, &gcl, gc_list) {
2333 if (st->timeout != PFTM_UNLINKED)
2334 pf_remove_state(st);
2335
2336 pf_free_state(st);
2337 }
2338 PF_STATE_EXIT_WRITE();
2339 PF_UNLOCK();
2340 rw_exit_write(&pf_state_list.pfs_rwl);
2341
2342 while ((st = SLIST_FIRST(&gcl)) != NULL) {
2343 SLIST_REMOVE_HEAD(&gcl, gc_list);
2344 pf_state_unref(st);
2345 }
2346
2347 return (scanned);
2348}
2349
2350int
2351pf_tbladdr_setup(struct pf_ruleset *rs, struct pf_addr_wrap *aw, int wait)
2352{
2353 if (aw->type != PF_ADDR_TABLE)
2354 return (0);
2355 if ((aw->p.tbl = pfr_attach_table(rs, aw->v.tblname, wait)) == NULL)
2356 return (1);
2357 return (0);
2358}
2359
2360void
2361pf_tbladdr_remove(struct pf_addr_wrap *aw)
2362{
2363 if (aw->type != PF_ADDR_TABLE || aw->p.tbl == NULL)
2364 return;
2365 pfr_detach_table(aw->p.tbl);
2366 aw->p.tbl = NULL;
2367}
2368
2369void
2370pf_tbladdr_copyout(struct pf_addr_wrap *aw)
2371{
2372 struct pfr_ktable *kt = aw->p.tbl;
2373
2374 if (aw->type != PF_ADDR_TABLE || kt == NULL)
2375 return;
2376 if (!(kt->pfrkt_flags & PFR_TFLAG_ACTIVE) && kt->pfrkt_root != NULL)
2377 kt = kt->pfrkt_root;
2378 aw->p.tbl = NULL;
2379 aw->p.tblcnt = (kt->pfrkt_flags & PFR_TFLAG_ACTIVE) ?
2380 kt->pfrkt_cnt : -1;
2381}
2382
2383void
2384pf_print_host(struct pf_addr *addr, u_int16_t p, sa_family_t af)
2385{
2386 switch (af) {
2387 case AF_INET: {
2388 u_int32_t a = ntohl(addr->addr32[0]);
2389 addlog("%u.%u.%u.%u", (a>>24)&255, (a>>16)&255,
2390 (a>>8)&255, a&255);
2391 if (p) {
2392 p = ntohs(p);
2393 addlog(":%u", p);
2394 }
2395 break;
2396 }
2397#ifdef INET6
2398 case AF_INET6: {
2399 u_int16_t b;
2400 u_int8_t i, curstart, curend, maxstart, maxend;
2401 curstart = curend = maxstart = maxend = 255;
2402 for (i = 0; i < 8; i++) {
2403 if (!addr->addr16[i]) {
2404 if (curstart == 255)
2405 curstart = i;
2406 curend = i;
2407 } else {
2408 if ((curend - curstart) >
2409 (maxend - maxstart)) {
2410 maxstart = curstart;
2411 maxend = curend;
2412 }
2413 curstart = curend = 255;
2414 }
2415 }
2416 if ((curend - curstart) >
2417 (maxend - maxstart)) {
2418 maxstart = curstart;
2419 maxend = curend;
2420 }
2421 for (i = 0; i < 8; i++) {
2422 if (i >= maxstart && i <= maxend) {
2423 if (i == 0)
2424 addlog(":");
2425 if (i == maxend)
2426 addlog(":");
2427 } else {
2428 b = ntohs(addr->addr16[i]);
2429 addlog("%x", b);
2430 if (i < 7)
2431 addlog(":");
2432 }
2433 }
2434 if (p) {
2435 p = ntohs(p);
2436 addlog("[%u]", p);
2437 }
2438 break;
2439 }
2440#endif /* INET6 */
2441 }
2442}
2443
2444void
2445pf_print_state(struct pf_state *st)
2446{
2447 pf_print_state_parts(st, NULL, NULL);
2448}
2449
2450void
2451pf_print_state_parts(struct pf_state *st,
2452 struct pf_state_key *skwp, struct pf_state_key *sksp)
2453{
2454 struct pf_state_key *skw, *sks;
2455 u_int8_t proto, dir;
2456
2457 /* Do our best to fill these, but they're skipped if NULL */
2458 skw = skwp ? skwp : (st ? st->key[PF_SK_WIRE] : NULL);
2459 sks = sksp ? sksp : (st ? st->key[PF_SK_STACK] : NULL);
2460 proto = skw ? skw->proto : (sks ? sks->proto : 0);
2461 dir = st ? st->direction : 0;
2462
2463 switch (proto) {
2464 case IPPROTO_IPV4:
2465 addlog("IPv4");
2466 break;
2467 case IPPROTO_IPV6:
2468 addlog("IPv6");
2469 break;
2470 case IPPROTO_TCP:
2471 addlog("TCP");
2472 break;
2473 case IPPROTO_UDP:
2474 addlog("UDP");
2475 break;
2476 case IPPROTO_ICMP:
2477 addlog("ICMP");
2478 break;
2479 case IPPROTO_ICMPV6:
2480 addlog("ICMPv6");
2481 break;
2482 default:
2483 addlog("%u", proto);
2484 break;
2485 }
2486 switch (dir) {
2487 case PF_IN:
2488 addlog(" in");
2489 break;
2490 case PF_OUT:
2491 addlog(" out");
2492 break;
2493 }
2494 if (skw) {
2495 addlog(" wire: (%d) ", skw->rdomain);
2496 pf_print_host(&skw->addr[0], skw->port[0], skw->af);
2497 addlog(" ");
2498 pf_print_host(&skw->addr[1], skw->port[1], skw->af);
2499 }
2500 if (sks) {
2501 addlog(" stack: (%d) ", sks->rdomain);
2502 if (sks != skw) {
2503 pf_print_host(&sks->addr[0], sks->port[0], sks->af);
2504 addlog(" ");
2505 pf_print_host(&sks->addr[1], sks->port[1], sks->af);
2506 } else
2507 addlog("-");
2508 }
2509 if (st) {
2510 if (proto == IPPROTO_TCP) {
2511 addlog(" [lo=%u high=%u win=%u modulator=%u",
2512 st->src.seqlo, st->src.seqhi,
2513 st->src.max_win, st->src.seqdiff);
2514 if (st->src.wscale && st->dst.wscale)
2515 addlog(" wscale=%u",
2516 st->src.wscale & PF_WSCALE_MASK);
2517 addlog("]");
2518 addlog(" [lo=%u high=%u win=%u modulator=%u",
2519 st->dst.seqlo, st->dst.seqhi,
2520 st->dst.max_win, st->dst.seqdiff);
2521 if (st->src.wscale && st->dst.wscale)
2522 addlog(" wscale=%u",
2523 st->dst.wscale & PF_WSCALE_MASK);
2524 addlog("]");
2525 }
2526 addlog(" %u:%u", st->src.state, st->dst.state);
2527 if (st->rule.ptr)
2528 addlog(" @%d", st->rule.ptr->nr);
2529 }
2530}
2531
2532void
2533pf_print_flags(u_int8_t f)
2534{
2535 if (f)
2536 addlog(" ");
2537 if (f & TH_FIN)
2538 addlog("F");
2539 if (f & TH_SYN)
2540 addlog("S");
2541 if (f & TH_RST)
2542 addlog("R");
2543 if (f & TH_PUSH)
2544 addlog("P");
2545 if (f & TH_ACK)
2546 addlog("A");
2547 if (f & TH_URG)
2548 addlog("U");
2549 if (f & TH_ECE)
2550 addlog("E");
2551 if (f & TH_CWR)
2552 addlog("W");
2553}
2554
2555#define PF_SET_SKIP_STEPS(i) \
2556 do { \
2557 while (head[i] != cur) { \
2558 head[i]->skip[i].ptr = cur; \
2559 head[i] = TAILQ_NEXT(head[i], entries); \
2560 } \
2561 } while (0)
2562
2563void
2564pf_calc_skip_steps(struct pf_rulequeue *rules)
2565{
2566 struct pf_rule *cur, *prev, *head[PF_SKIP_COUNT];
2567 int i;
2568
2569 cur = TAILQ_FIRST(rules);
2570 prev = cur;
2571 for (i = 0; i < PF_SKIP_COUNT; ++i)
2572 head[i] = cur;
2573 while (cur != NULL) {
2574 if (cur->kif != prev->kif || cur->ifnot != prev->ifnot)
2575 PF_SET_SKIP_STEPS(PF_SKIP_IFP);
2576 if (cur->direction != prev->direction)
2577 PF_SET_SKIP_STEPS(PF_SKIP_DIR);
2578 if (cur->onrdomain != prev->onrdomain ||
2579 cur->ifnot != prev->ifnot)
2580 PF_SET_SKIP_STEPS(PF_SKIP_RDOM);
2581 if (cur->af != prev->af)
2582 PF_SET_SKIP_STEPS(PF_SKIP_AF);
2583 if (cur->proto != prev->proto)
2584 PF_SET_SKIP_STEPS(PF_SKIP_PROTO);
2585 if (cur->src.neg != prev->src.neg ||
2586 pf_addr_wrap_neq(&cur->src.addr, &prev->src.addr))
2587 PF_SET_SKIP_STEPS(PF_SKIP_SRC_ADDR);
2588 if (cur->dst.neg != prev->dst.neg ||
2589 pf_addr_wrap_neq(&cur->dst.addr, &prev->dst.addr))
2590 PF_SET_SKIP_STEPS(PF_SKIP_DST_ADDR);
2591 if (cur->src.port[0] != prev->src.port[0] ||
2592 cur->src.port[1] != prev->src.port[1] ||
2593 cur->src.port_op != prev->src.port_op)
2594 PF_SET_SKIP_STEPS(PF_SKIP_SRC_PORT);
2595 if (cur->dst.port[0] != prev->dst.port[0] ||
2596 cur->dst.port[1] != prev->dst.port[1] ||
2597 cur->dst.port_op != prev->dst.port_op)
2598 PF_SET_SKIP_STEPS(PF_SKIP_DST_PORT);
2599
2600 prev = cur;
2601 cur = TAILQ_NEXT(cur, entries);
2602 }
2603 for (i = 0; i < PF_SKIP_COUNT; ++i)
2604 PF_SET_SKIP_STEPS(i);
2605}
2606
2607int
2608pf_addr_wrap_neq(struct pf_addr_wrap *aw1, struct pf_addr_wrap *aw2)
2609{
2610 if (aw1->type != aw2->type)
2611 return (1);
2612 switch (aw1->type) {
2613 case PF_ADDR_ADDRMASK:
2614 case PF_ADDR_RANGE:
2615 if (PF_ANEQ(&aw1->v.a.addr, &aw2->v.a.addr, AF_INET6))
2616 return (1);
2617 if (PF_ANEQ(&aw1->v.a.mask, &aw2->v.a.mask, AF_INET6))
2618 return (1);
2619 return (0);
2620 case PF_ADDR_DYNIFTL:
2621 return (aw1->p.dyn->pfid_kt != aw2->p.dyn->pfid_kt);
2622 case PF_ADDR_NONE:
2623 case PF_ADDR_NOROUTE:
2624 case PF_ADDR_URPFFAILED:
2625 return (0);
2626 case PF_ADDR_TABLE:
2627 return (aw1->p.tbl != aw2->p.tbl);
2628 case PF_ADDR_RTLABEL:
2629 return (aw1->v.rtlabel != aw2->v.rtlabel);
2630 default:
2631 addlog("invalid address type: %d\n", aw1->type);
2632 return (1);
2633 }
2634}
2635
2636/* This algorithm computes 'a + b - c' in ones-complement using a trick to
2637 * emulate at most one ones-complement subtraction. This thereby limits net
2638 * carries/borrows to at most one, eliminating a reduction step and saving one
2639 * each of +, >>, & and ~.
2640 *
2641 * def. x mod y = x - (x//y)*y for integer x,y
2642 * def. sum = x mod 2^16
2643 * def. accumulator = (x >> 16) mod 2^16
2644 *
2645 * The trick works as follows: subtracting exactly one u_int16_t from the
2646 * u_int32_t x incurs at most one underflow, wrapping its upper 16-bits, the
2647 * accumulator, to 2^16 - 1. Adding this to the 16-bit sum preserves the
2648 * ones-complement borrow:
2649 *
2650 * (sum + accumulator) mod 2^16
2651 * = { assume underflow: accumulator := 2^16 - 1 }
2652 * (sum + 2^16 - 1) mod 2^16
2653 * = { mod }
2654 * (sum - 1) mod 2^16
2655 *
2656 * Although this breaks for sum = 0, giving 0xffff, which is ones-complement's
2657 * other zero, not -1, that cannot occur: the 16-bit sum cannot be underflown
2658 * to zero as that requires subtraction of at least 2^16, which exceeds a
2659 * single u_int16_t's range.
2660 *
2661 * We use the following theorem to derive the implementation:
2662 *
2663 * th. (x + (y mod z)) mod z = (x + y) mod z (0)
2664 * proof.
2665 * (x + (y mod z)) mod z
2666 * = { def mod }
2667 * (x + y - (y//z)*z) mod z
2668 * = { (a + b*c) mod c = a mod c }
2669 * (x + y) mod z [end of proof]
2670 *
2671 * ... and thereby obtain:
2672 *
2673 * (sum + accumulator) mod 2^16
2674 * = { def. accumulator, def. sum }
2675 * (x mod 2^16 + (x >> 16) mod 2^16) mod 2^16
2676 * = { (0), twice }
2677 * (x + (x >> 16)) mod 2^16
2678 * = { x mod 2^n = x & (2^n - 1) }
2679 * (x + (x >> 16)) & 0xffff
2680 *
2681 * Note: this serves also as a reduction step for at most one add (as the
2682 * trailing mod 2^16 prevents further reductions by destroying carries).
2683 */
2684__inline void
2685pf_cksum_fixup(u_int16_t *cksum, u_int16_t was, u_int16_t now,
2686 u_int8_t proto)
2687{
2688 u_int32_t x;
2689 const int udp = proto == IPPROTO_UDP;
2690
2691 x = *cksum + was - now;
2692 x = (x + (x >> 16)) & 0xffff;
2693
2694 /* optimise: eliminate a branch when not udp */
2695 if (udp && *cksum == 0x0000)
2696 return;
2697 if (udp && x == 0x0000)
2698 x = 0xffff;
2699
2700 *cksum = (u_int16_t)(x);
2701}
2702
2703#ifdef INET6
2704/* pre: coverage(cksum) is superset of coverage(covered_cksum) */
2705static __inline void
2706pf_cksum_uncover(u_int16_t *cksum, u_int16_t covered_cksum, u_int8_t proto)
2707{
2708 pf_cksum_fixup(cksum, ~covered_cksum, 0x0, proto);
2709}
2710
2711/* pre: disjoint(coverage(cksum), coverage(uncovered_cksum)) */
2712static __inline void
2713pf_cksum_cover(u_int16_t *cksum, u_int16_t uncovered_cksum, u_int8_t proto)
2714{
2715 pf_cksum_fixup(cksum, 0x0, ~uncovered_cksum, proto);
2716}
2717#endif /* INET6 */
2718
2719/* pre: *a is 16-bit aligned within its packet
2720 *
2721 * This algorithm emulates 16-bit ones-complement sums on a twos-complement
2722 * machine by conserving ones-complement's otherwise discarded carries in the
2723 * upper bits of x. These accumulated carries when added to the lower 16-bits
2724 * over at least zero 'reduction' steps then complete the ones-complement sum.
2725 *
2726 * def. sum = x mod 2^16
2727 * def. accumulator = (x >> 16)
2728 *
2729 * At most two reduction steps
2730 *
2731 * x := sum + accumulator
2732 * = { def sum, def accumulator }
2733 * x := x mod 2^16 + (x >> 16)
2734 * = { x mod 2^n = x & (2^n - 1) }
2735 * x := (x & 0xffff) + (x >> 16)
2736 *
2737 * are necessary to incorporate the accumulated carries (at most one per add)
2738 * i.e. to reduce x < 2^16 from at most 16 carries in the upper 16 bits.
2739 *
2740 * The function is also invariant over the endian of the host. Why?
2741 *
2742 * Define the unary transpose operator ~ on a bitstring in python slice
2743 * notation as lambda m: m[P:] + m[:P] , for some constant pivot P.
2744 *
2745 * th. ~ distributes over ones-complement addition, denoted by +_1, i.e.
2746 *
2747 * ~m +_1 ~n = ~(m +_1 n) (for all bitstrings m,n of equal length)
2748 *
2749 * proof. Regard the bitstrings in m +_1 n as split at P, forming at most two
2750 * 'half-adds'. Under ones-complement addition, each half-add carries to the
2751 * other, so the sum of each half-add is unaffected by their relative
2752 * order. Therefore:
2753 *
2754 * ~m +_1 ~n
2755 * = { half-adds invariant under transposition }
2756 * ~s
2757 * = { substitute }
2758 * ~(m +_1 n) [end of proof]
2759 *
2760 * th. Summing two in-memory ones-complement 16-bit variables m,n on a machine
2761 * with the converse endian does not alter the result.
2762 *
2763 * proof.
2764 * { converse machine endian: load/store transposes, P := 8 }
2765 * ~(~m +_1 ~n)
2766 * = { ~ over +_1 }
2767 * ~~m +_1 ~~n
2768 * = { ~ is an involution }
2769 * m +_1 n [end of proof]
2770 *
2771 */
2772#define NEG(x) ((u_int16_t)~(x))
2773void
2774pf_cksum_fixup_a(u_int16_t *cksum, const struct pf_addr *a,
2775 const struct pf_addr *an, sa_family_t af, u_int8_t proto)
2776{
2777 u_int32_t x;
2778 const u_int16_t *n = an->addr16;
2779 const u_int16_t *o = a->addr16;
2780 const int udp = proto == IPPROTO_UDP;
2781
2782 switch (af) {
2783 case AF_INET:
2784 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]);
2785 break;
2786#ifdef INET6
2787 case AF_INET6:
2788 x = *cksum + o[0] + NEG(n[0]) + o[1] + NEG(n[1]) +\
2789 o[2] + NEG(n[2]) + o[3] + NEG(n[3]) +\
2790 o[4] + NEG(n[4]) + o[5] + NEG(n[5]) +\
2791 o[6] + NEG(n[6]) + o[7] + NEG(n[7]);
2792 break;
2793#endif /* INET6 */
2794 default:
2795 unhandled_af(af);
2796 }
2797
2798 x = (x & 0xffff) + (x >> 16);
2799 x = (x & 0xffff) + (x >> 16);
2800
2801 /* optimise: eliminate a branch when not udp */
2802 if (udp && *cksum == 0x0000)
2803 return;
2804 if (udp && x == 0x0000)
2805 x = 0xffff;
2806
2807 *cksum = (u_int16_t)(x);
2808}
2809
2810int
2811pf_patch_8(struct pf_pdesc *pd, u_int8_t *f, u_int8_t v, bool hi)
2812{
2813 int rewrite = 0;
2814
2815 if (*f != v) {
2816 u_int16_t old = htons(hi ? (*f << 8) : *f);
2817 u_int16_t new = htons(hi ? ( v << 8) : v);
2818
2819 pf_cksum_fixup(pd->pcksum, old, new, pd->proto);
2820 *f = v;
2821 rewrite = 1;
2822 }
2823
2824 return (rewrite);
2825}
2826
2827/* pre: *f is 16-bit aligned within its packet */
2828int
2829pf_patch_16(struct pf_pdesc *pd, u_int16_t *f, u_int16_t v)
2830{
2831 int rewrite = 0;
2832
2833 if (*f != v) {
2834 pf_cksum_fixup(pd->pcksum, *f, v, pd->proto);
2835 *f = v;
2836 rewrite = 1;
2837 }
2838
2839 return (rewrite);
2840}
2841
2842int
2843pf_patch_16_unaligned(struct pf_pdesc *pd, void *f, u_int16_t v, bool hi)
2844{
2845 int rewrite = 0;
2846 u_int8_t *fb = (u_int8_t*)f;
2847 u_int8_t *vb = (u_int8_t*)&v;
2848
2849 if (hi && ALIGNED_POINTER(f, u_int16_t)) {
2850 return (pf_patch_16(pd, f, v)); /* optimise */
2851 }
2852
2853 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2854 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2855
2856 return (rewrite);
2857}
2858
2859/* pre: *f is 16-bit aligned within its packet */
2860/* pre: pd->proto != IPPROTO_UDP */
2861int
2862pf_patch_32(struct pf_pdesc *pd, u_int32_t *f, u_int32_t v)
2863{
2864 int rewrite = 0;
2865 u_int16_t *pc = pd->pcksum;
2866 u_int8_t proto = pd->proto;
2867
2868 /* optimise: inline udp fixup code is unused; let compiler scrub it */
2869 if (proto == IPPROTO_UDP)
2870 panic("%s: udp", __func__);
2871
2872 /* optimise: skip *f != v guard; true for all use-cases */
2873 pf_cksum_fixup(pc, *f / (1 << 16), v / (1 << 16), proto);
2874 pf_cksum_fixup(pc, *f % (1 << 16), v % (1 << 16), proto);
2875
2876 *f = v;
2877 rewrite = 1;
2878
2879 return (rewrite);
2880}
2881
2882int
2883pf_patch_32_unaligned(struct pf_pdesc *pd, void *f, u_int32_t v, bool hi)
2884{
2885 int rewrite = 0;
2886 u_int8_t *fb = (u_int8_t*)f;
2887 u_int8_t *vb = (u_int8_t*)&v;
2888
2889 if (hi && ALIGNED_POINTER(f, u_int32_t)) {
2890 return (pf_patch_32(pd, f, v)); /* optimise */
2891 }
2892
2893 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2894 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2895 rewrite += pf_patch_8(pd, fb++, *vb++, hi);
2896 rewrite += pf_patch_8(pd, fb++, *vb++,!hi);
2897
2898 return (rewrite);
2899}
2900
2901int
2902pf_icmp_mapping(struct pf_pdesc *pd, u_int8_t type, int *icmp_dir,
2903 u_int16_t *virtual_id, u_int16_t *virtual_type)
2904{
2905 /*
2906 * ICMP types marked with PF_OUT are typically responses to
2907 * PF_IN, and will match states in the opposite direction.
2908 * PF_IN ICMP types need to match a state with that type.
2909 */
2910 *icmp_dir = PF_OUT;
2911
2912 /* Queries (and responses) */
2913 switch (pd->af) {
2914 case AF_INET:
2915 switch (type) {
2916 case ICMP_ECHO:
2917 *icmp_dir = PF_IN;
2918 /* FALLTHROUGH */
2919 case ICMP_ECHOREPLY:
2920 *virtual_type = ICMP_ECHO;
2921 *virtual_id = pd->hdr.icmp.icmp_id;
2922 break;
2923
2924 case ICMP_TSTAMP:
2925 *icmp_dir = PF_IN;
2926 /* FALLTHROUGH */
2927 case ICMP_TSTAMPREPLY:
2928 *virtual_type = ICMP_TSTAMP;
2929 *virtual_id = pd->hdr.icmp.icmp_id;
2930 break;
2931
2932 case ICMP_IREQ:
2933 *icmp_dir = PF_IN;
2934 /* FALLTHROUGH */
2935 case ICMP_IREQREPLY:
2936 *virtual_type = ICMP_IREQ;
2937 *virtual_id = pd->hdr.icmp.icmp_id;
2938 break;
2939
2940 case ICMP_MASKREQ:
2941 *icmp_dir = PF_IN;
2942 /* FALLTHROUGH */
2943 case ICMP_MASKREPLY:
2944 *virtual_type = ICMP_MASKREQ;
2945 *virtual_id = pd->hdr.icmp.icmp_id;
2946 break;
2947
2948 case ICMP_IPV6_WHEREAREYOU:
2949 *icmp_dir = PF_IN;
2950 /* FALLTHROUGH */
2951 case ICMP_IPV6_IAMHERE:
2952 *virtual_type = ICMP_IPV6_WHEREAREYOU;
2953 *virtual_id = 0; /* Nothing sane to match on! */
2954 break;
2955
2956 case ICMP_MOBILE_REGREQUEST:
2957 *icmp_dir = PF_IN;
2958 /* FALLTHROUGH */
2959 case ICMP_MOBILE_REGREPLY:
2960 *virtual_type = ICMP_MOBILE_REGREQUEST;
2961 *virtual_id = 0; /* Nothing sane to match on! */
2962 break;
2963
2964 case ICMP_ROUTERSOLICIT:
2965 *icmp_dir = PF_IN;
2966 /* FALLTHROUGH */
2967 case ICMP_ROUTERADVERT:
2968 *virtual_type = ICMP_ROUTERSOLICIT;
2969 *virtual_id = 0; /* Nothing sane to match on! */
2970 break;
2971
2972 /* These ICMP types map to other connections */
2973 case ICMP_UNREACH:
2974 case ICMP_SOURCEQUENCH:
2975 case ICMP_REDIRECT:
2976 case ICMP_TIMXCEED:
2977 case ICMP_PARAMPROB:
2978 /* These will not be used, but set them anyway */
2979 *icmp_dir = PF_IN;
2980 *virtual_type = htons(type);
2981 *virtual_id = 0;
2982 return (1); /* These types match to another state */
2983
2984 /*
2985 * All remaining ICMP types get their own states,
2986 * and will only match in one direction.
2987 */
2988 default:
2989 *icmp_dir = PF_IN;
2990 *virtual_type = type;
2991 *virtual_id = 0;
2992 break;
2993 }
2994 break;
2995#ifdef INET6
2996 case AF_INET6:
2997 switch (type) {
2998 case ICMP6_ECHO_REQUEST:
2999 *icmp_dir = PF_IN;
3000 /* FALLTHROUGH */
3001 case ICMP6_ECHO_REPLY:
3002 *virtual_type = ICMP6_ECHO_REQUEST;
3003 *virtual_id = pd->hdr.icmp6.icmp6_id;
3004 break;
3005
3006 case MLD_LISTENER_QUERY:
3007 case MLD_LISTENER_REPORT: {
3008 struct mld_hdr *mld = &pd->hdr.mld;
3009 u_int32_t h;
3010
3011 /*
3012 * Listener Report can be sent by clients
3013 * without an associated Listener Query.
3014 * In addition to that, when Report is sent as a
3015 * reply to a Query its source and destination
3016 * address are different.
3017 */
3018 *icmp_dir = PF_IN;
3019 *virtual_type = MLD_LISTENER_QUERY;
3020 /* generate fake id for these messages */
3021 h = mld->mld_addr.s6_addr32[0] ^
3022 mld->mld_addr.s6_addr32[1] ^
3023 mld->mld_addr.s6_addr32[2] ^
3024 mld->mld_addr.s6_addr32[3];
3025 *virtual_id = (h >> 16) ^ (h & 0xffff);
3026 break;
3027 }
3028
3029 /*
3030 * ICMP6_FQDN and ICMP6_NI query/reply are the same type as
3031 * ICMP6_WRU
3032 */
3033 case ICMP6_WRUREQUEST:
3034 *icmp_dir = PF_IN;
3035 /* FALLTHROUGH */
3036 case ICMP6_WRUREPLY:
3037 *virtual_type = ICMP6_WRUREQUEST;
3038 *virtual_id = 0; /* Nothing sane to match on! */
3039 break;
3040
3041 case MLD_MTRACE:
3042 *icmp_dir = PF_IN;
3043 /* FALLTHROUGH */
3044 case MLD_MTRACE_RESP:
3045 *virtual_type = MLD_MTRACE;
3046 *virtual_id = 0; /* Nothing sane to match on! */
3047 break;
3048
3049 case ND_NEIGHBOR_SOLICIT:
3050 *icmp_dir = PF_IN;
3051 /* FALLTHROUGH */
3052 case ND_NEIGHBOR_ADVERT: {
3053 struct nd_neighbor_solicit *nd = &pd->hdr.nd_ns;
3054 u_int32_t h;
3055
3056 *virtual_type = ND_NEIGHBOR_SOLICIT;
3057 /* generate fake id for these messages */
3058 h = nd->nd_ns_target.s6_addr32[0] ^
3059 nd->nd_ns_target.s6_addr32[1] ^
3060 nd->nd_ns_target.s6_addr32[2] ^
3061 nd->nd_ns_target.s6_addr32[3];
3062 *virtual_id = (h >> 16) ^ (h & 0xffff);
3063 /*
3064 * the extra work here deals with 'keep state' option
3065 * at pass rule for unsolicited advertisement. By
3066 * returning 1 (state_icmp = 1) we override 'keep
3067 * state' to 'no state' so we don't create state for
3068 * unsolicited advertisements. No one expects answer to
3069 * unsolicited advertisements so we should be good.
3070 */
3071 if (type == ND_NEIGHBOR_ADVERT) {
3072 *virtual_type = htons(*virtual_type);
3073 return (1);
3074 }
3075 break;
3076 }
3077
3078 /*
3079 * These ICMP types map to other connections.
3080 * ND_REDIRECT can't be in this list because the triggering
3081 * packet header is optional.
3082 */
3083 case ICMP6_DST_UNREACH:
3084 case ICMP6_PACKET_TOO_BIG:
3085 case ICMP6_TIME_EXCEEDED:
3086 case ICMP6_PARAM_PROB:
3087 /* These will not be used, but set them anyway */
3088 *icmp_dir = PF_IN;
3089 *virtual_type = htons(type);
3090 *virtual_id = 0;
3091 return (1); /* These types match to another state */
3092 /*
3093 * All remaining ICMP6 types get their own states,
3094 * and will only match in one direction.
3095 */
3096 default:
3097 *icmp_dir = PF_IN;
3098 *virtual_type = type;
3099 *virtual_id = 0;
3100 break;
3101 }
3102 break;
3103#endif /* INET6 */
3104 }
3105 *virtual_type = htons(*virtual_type);
3106 return (0); /* These types match to their own state */
3107}
3108
3109void
3110pf_translate_icmp(struct pf_pdesc *pd, struct pf_addr *qa, u_int16_t *qp,
3111 struct pf_addr *oa, struct pf_addr *na, u_int16_t np)
3112{
3113 /* note: doesn't trouble to fixup quoted checksums, if any */
3114
3115 /* change quoted protocol port */
3116 if (qp != NULL)
3117 pf_patch_16(pd, qp, np);
3118
3119 /* change quoted ip address */
3120 pf_cksum_fixup_a(pd->pcksum, qa, na, pd->af, pd->proto);
3121 pf_addrcpy(qa, na, pd->af);
3122
3123 /* change network-header's ip address */
3124 if (oa)
3125 pf_translate_a(pd, oa, na);
3126}
3127
3128/* pre: *a is 16-bit aligned within its packet */
3129/* *a is a network header src/dst address */
3130int
3131pf_translate_a(struct pf_pdesc *pd, struct pf_addr *a, struct pf_addr *an)
3132{
3133 int rewrite = 0;
3134
3135 /* warning: !PF_ANEQ != PF_AEQ */
3136 if (!PF_ANEQ(a, an, pd->af))
3137 return (0);
3138
3139 /* fixup transport pseudo-header, if any */
3140 switch (pd->proto) {
3141 case IPPROTO_TCP: /* FALLTHROUGH */
3142 case IPPROTO_UDP: /* FALLTHROUGH */
3143 case IPPROTO_ICMPV6:
3144 pf_cksum_fixup_a(pd->pcksum, a, an, pd->af, pd->proto);
3145 break;
3146 default:
3147 break; /* assume no pseudo-header */
3148 }
3149
3150 pf_addrcpy(a, an, pd->af);
3151 rewrite = 1;
3152
3153 return (rewrite);
3154}
3155
3156#ifdef INET6
3157/* pf_translate_af() may change pd->m, adjust local copies after calling */
3158int
3159pf_translate_af(struct pf_pdesc *pd)
3160{
3161 static const struct pf_addr zero;
3162 struct ip *ip4;
3163 struct ip6_hdr *ip6;
3164 int copyback = 0;
3165 u_int hlen, ohlen, dlen;
3166 u_int16_t *pc;
3167 u_int8_t af_proto, naf_proto;
3168
3169 hlen = (pd->naf == AF_INET) ? sizeof(*ip4) : sizeof(*ip6);
3170 ohlen = pd->off;
3171 dlen = pd->tot_len - pd->off;
3172 pc = pd->pcksum;
3173
3174 af_proto = naf_proto = pd->proto;
3175 if (naf_proto == IPPROTO_ICMP)
3176 af_proto = IPPROTO_ICMPV6;
3177 if (naf_proto == IPPROTO_ICMPV6)
3178 af_proto = IPPROTO_ICMP;
3179
3180 /* uncover stale pseudo-header */
3181 switch (af_proto) {
3182 case IPPROTO_ICMPV6:
3183 /* optimise: unchanged for TCP/UDP */
3184 pf_cksum_fixup(pc, htons(af_proto), 0x0, af_proto);
3185 pf_cksum_fixup(pc, htons(dlen), 0x0, af_proto);
3186 /* FALLTHROUGH */
3187 case IPPROTO_UDP: /* FALLTHROUGH */
3188 case IPPROTO_TCP:
3189 pf_cksum_fixup_a(pc, pd->src, &zero, pd->af, af_proto);
3190 pf_cksum_fixup_a(pc, pd->dst, &zero, pd->af, af_proto);
3191 copyback = 1;
3192 break;
3193 default:
3194 break; /* assume no pseudo-header */
3195 }
3196
3197 /* replace the network header */
3198 m_adj(pd->m, pd->off);
3199 pd->src = NULL;
3200 pd->dst = NULL;
3201
3202 if ((M_PREPEND(pd->m, hlen, M_DONTWAIT)) == NULL) {
3203 pd->m = NULL;
3204 return (-1);
3205 }
3206
3207 pd->off = hlen;
3208 pd->tot_len += hlen - ohlen;
3209
3210 switch (pd->naf) {
3211 case AF_INET:
3212 ip4 = mtod(pd->m, struct ip *);
3213 memset(ip4, 0, hlen);
3214 ip4->ip_v = IPVERSION;
3215 ip4->ip_hl = hlen >> 2;
3216 ip4->ip_tos = pd->tos;
3217 ip4->ip_len = htons(hlen + dlen);
3218 ip4->ip_id = htons(ip_randomid());
3219 ip4->ip_off = htons(IP_DF);
3220 ip4->ip_ttl = pd->ttl;
3221 ip4->ip_p = pd->proto;
3222 ip4->ip_src = pd->nsaddr.v4;
3223 ip4->ip_dst = pd->ndaddr.v4;
3224 break;
3225 case AF_INET6:
3226 ip6 = mtod(pd->m, struct ip6_hdr *);
3227 memset(ip6, 0, hlen);
3228 ip6->ip6_vfc = IPV6_VERSION;
3229 ip6->ip6_flow |= htonl((u_int32_t)pd->tos << 20);
3230 ip6->ip6_plen = htons(dlen);
3231 ip6->ip6_nxt = pd->proto;
3232 if (!pd->ttl || pd->ttl > IPV6_DEFHLIM)
3233 ip6->ip6_hlim = IPV6_DEFHLIM;
3234 else
3235 ip6->ip6_hlim = pd->ttl;
3236 ip6->ip6_src = pd->nsaddr.v6;
3237 ip6->ip6_dst = pd->ndaddr.v6;
3238 break;
3239 default:
3240 unhandled_af(pd->naf);
3241 }
3242
3243 /* UDP over IPv6 must be checksummed per rfc2460 p27 */
3244 if (naf_proto == IPPROTO_UDP && *pc == 0x0000 &&
3245 pd->naf == AF_INET6) {
3246 pd->m->m_pkthdr.csum_flags |= M_UDP_CSUM_OUT;
3247 }
3248
3249 /* cover fresh pseudo-header */
3250 switch (naf_proto) {
3251 case IPPROTO_ICMPV6:
3252 /* optimise: unchanged for TCP/UDP */
3253 pf_cksum_fixup(pc, 0x0, htons(naf_proto), naf_proto);
3254 pf_cksum_fixup(pc, 0x0, htons(dlen), naf_proto);
3255 /* FALLTHROUGH */
3256 case IPPROTO_UDP: /* FALLTHROUGH */
3257 case IPPROTO_TCP:
3258 pf_cksum_fixup_a(pc, &zero, &pd->nsaddr, pd->naf, naf_proto);
3259 pf_cksum_fixup_a(pc, &zero, &pd->ndaddr, pd->naf, naf_proto);
3260 copyback = 1;
3261 break;
3262 default:
3263 break; /* assume no pseudo-header */
3264 }
3265
3266 /* flush pd->pcksum */
3267 if (copyback)
3268 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
3269
3270 return (0);
3271}
3272
3273int
3274pf_change_icmp_af(struct mbuf *m, int ipoff2, struct pf_pdesc *pd,
3275 struct pf_pdesc *pd2, struct pf_addr *src, struct pf_addr *dst,
3276 sa_family_t af, sa_family_t naf)
3277{
3278 struct mbuf *n = NULL;
3279 struct ip *ip4;
3280 struct ip6_hdr *ip6;
3281 u_int hlen, ohlen, dlen;
3282 int d;
3283
3284 if (af == naf || (af != AF_INET && af != AF_INET6) ||
3285 (naf != AF_INET && naf != AF_INET6))
3286 return (-1);
3287
3288 /* split the mbuf chain on the quoted ip/ip6 header boundary */
3289 if ((n = m_split(m, ipoff2, M_DONTWAIT)) == NULL)
3290 return (-1);
3291
3292 /* new quoted header */
3293 hlen = naf == AF_INET ? sizeof(*ip4) : sizeof(*ip6);
3294 /* old quoted header */
3295 ohlen = pd2->off - ipoff2;
3296
3297 /* trim old quoted header */
3298 pf_cksum_uncover(pd->pcksum, in_cksum(n, ohlen), pd->proto);
3299 m_adj(n, ohlen);
3300
3301 /* prepend a new, translated, quoted header */
3302 if ((M_PREPEND(n, hlen, M_DONTWAIT)) == NULL)
3303 return (-1);
3304
3305 switch (naf) {
3306 case AF_INET:
3307 ip4 = mtod(n, struct ip *);
3308 memset(ip4, 0, sizeof(*ip4));
3309 ip4->ip_v = IPVERSION;
3310 ip4->ip_hl = sizeof(*ip4) >> 2;
3311 ip4->ip_len = htons(sizeof(*ip4) + pd2->tot_len - ohlen);
3312 ip4->ip_id = htons(ip_randomid());
3313 ip4->ip_off = htons(IP_DF);
3314 ip4->ip_ttl = pd2->ttl;
3315 if (pd2->proto == IPPROTO_ICMPV6)
3316 ip4->ip_p = IPPROTO_ICMP;
3317 else
3318 ip4->ip_p = pd2->proto;
3319 ip4->ip_src = src->v4;
3320 ip4->ip_dst = dst->v4;
3321 in_hdr_cksum_out(n, NULL);
3322 break;
3323 case AF_INET6:
3324 ip6 = mtod(n, struct ip6_hdr *);
3325 memset(ip6, 0, sizeof(*ip6));
3326 ip6->ip6_vfc = IPV6_VERSION;
3327 ip6->ip6_plen = htons(pd2->tot_len - ohlen);
3328 if (pd2->proto == IPPROTO_ICMP)
3329 ip6->ip6_nxt = IPPROTO_ICMPV6;
3330 else
3331 ip6->ip6_nxt = pd2->proto;
3332 if (!pd2->ttl || pd2->ttl > IPV6_DEFHLIM)
3333 ip6->ip6_hlim = IPV6_DEFHLIM;
3334 else
3335 ip6->ip6_hlim = pd2->ttl;
3336 ip6->ip6_src = src->v6;
3337 ip6->ip6_dst = dst->v6;
3338 break;
3339 }
3340
3341 /* cover new quoted header */
3342 /* optimise: any new AF_INET header of ours sums to zero */
3343 if (naf != AF_INET) {
3344 pf_cksum_cover(pd->pcksum, in_cksum(n, hlen), pd->proto);
3345 }
3346
3347 /* reattach modified quoted packet to outer header */
3348 {
3349 int nlen = n->m_pkthdr.len;
3350 m_cat(m, n);
3351 m->m_pkthdr.len += nlen;
3352 }
3353
3354 /* account for altered length */
3355 d = hlen - ohlen;
3356
3357 if (pd->proto == IPPROTO_ICMPV6) {
3358 /* fixup pseudo-header */
3359 dlen = pd->tot_len - pd->off;
3360 pf_cksum_fixup(pd->pcksum,
3361 htons(dlen), htons(dlen + d), pd->proto);
3362 }
3363
3364 pd->tot_len += d;
3365 pd2->tot_len += d;
3366 pd2->off += d;
3367
3368 /* note: not bothering to update network headers as
3369 these due for rewrite by pf_translate_af() */
3370
3371 return (0);
3372}
3373
3374
3375#define PTR_IP(field) (offsetof(struct ip, field))
3376#define PTR_IP6(field) (offsetof(struct ip6_hdr, field))
3377
3378int
3379pf_translate_icmp_af(struct pf_pdesc *pd, int af, void *arg)
3380{
3381 struct icmp *icmp4;
3382 struct icmp6_hdr *icmp6;
3383 u_int32_t mtu;
3384 int32_t ptr = -1;
3385 u_int8_t type;
3386 u_int8_t code;
3387
3388 switch (af) {
3389 case AF_INET:
3390 icmp6 = arg;
3391 type = icmp6->icmp6_type;
3392 code = icmp6->icmp6_code;
3393 mtu = ntohl(icmp6->icmp6_mtu);
3394
3395 switch (type) {
3396 case ICMP6_ECHO_REQUEST:
3397 type = ICMP_ECHO;
3398 break;
3399 case ICMP6_ECHO_REPLY:
3400 type = ICMP_ECHOREPLY;
3401 break;
3402 case ICMP6_DST_UNREACH:
3403 type = ICMP_UNREACH;
3404 switch (code) {
3405 case ICMP6_DST_UNREACH_NOROUTE:
3406 case ICMP6_DST_UNREACH_BEYONDSCOPE:
3407 case ICMP6_DST_UNREACH_ADDR:
3408 code = ICMP_UNREACH_HOST;
3409 break;
3410 case ICMP6_DST_UNREACH_ADMIN:
3411 code = ICMP_UNREACH_HOST_PROHIB;
3412 break;
3413 case ICMP6_DST_UNREACH_NOPORT:
3414 code = ICMP_UNREACH_PORT;
3415 break;
3416 default:
3417 return (-1);
3418 }
3419 break;
3420 case ICMP6_PACKET_TOO_BIG:
3421 type = ICMP_UNREACH;
3422 code = ICMP_UNREACH_NEEDFRAG;
3423 mtu -= 20;
3424 break;
3425 case ICMP6_TIME_EXCEEDED:
3426 type = ICMP_TIMXCEED;
3427 break;
3428 case ICMP6_PARAM_PROB:
3429 switch (code) {
3430 case ICMP6_PARAMPROB_HEADER:
3431 type = ICMP_PARAMPROB;
3432 code = ICMP_PARAMPROB_ERRATPTR;
3433 ptr = ntohl(icmp6->icmp6_pptr);
3434
3435 if (ptr == PTR_IP6(ip6_vfc))
3436 ; /* preserve */
3437 else if (ptr == PTR_IP6(ip6_vfc) + 1)
3438 ptr = PTR_IP(ip_tos);
3439 else if (ptr == PTR_IP6(ip6_plen) ||
3440 ptr == PTR_IP6(ip6_plen) + 1)
3441 ptr = PTR_IP(ip_len);
3442 else if (ptr == PTR_IP6(ip6_nxt))
3443 ptr = PTR_IP(ip_p);
3444 else if (ptr == PTR_IP6(ip6_hlim))
3445 ptr = PTR_IP(ip_ttl);
3446 else if (ptr >= PTR_IP6(ip6_src) &&
3447 ptr < PTR_IP6(ip6_dst))
3448 ptr = PTR_IP(ip_src);
3449 else if (ptr >= PTR_IP6(ip6_dst) &&
3450 ptr < sizeof(struct ip6_hdr))
3451 ptr = PTR_IP(ip_dst);
3452 else {
3453 return (-1);
3454 }
3455 break;
3456 case ICMP6_PARAMPROB_NEXTHEADER:
3457 type = ICMP_UNREACH;
3458 code = ICMP_UNREACH_PROTOCOL;
3459 break;
3460 default:
3461 return (-1);
3462 }
3463 break;
3464 default:
3465 return (-1);
3466 }
3467
3468 pf_patch_8(pd, &icmp6->icmp6_type, type, PF_HI);
3469 pf_patch_8(pd, &icmp6->icmp6_code, code, PF_LO);
3470
3471 /* aligns well with a icmpv4 nextmtu */
3472 pf_patch_32(pd, &icmp6->icmp6_mtu, htonl(mtu));
3473
3474 /* icmpv4 pptr is a one most significant byte */
3475 if (ptr >= 0)
3476 pf_patch_32(pd, &icmp6->icmp6_pptr, htonl(ptr << 24));
3477 break;
3478 case AF_INET6:
3479 icmp4 = arg;
3480 type = icmp4->icmp_type;
3481 code = icmp4->icmp_code;
3482 mtu = ntohs(icmp4->icmp_nextmtu);
3483
3484 switch (type) {
3485 case ICMP_ECHO:
3486 type = ICMP6_ECHO_REQUEST;
3487 break;
3488 case ICMP_ECHOREPLY:
3489 type = ICMP6_ECHO_REPLY;
3490 break;
3491 case ICMP_UNREACH:
3492 type = ICMP6_DST_UNREACH;
3493 switch (code) {
3494 case ICMP_UNREACH_NET:
3495 case ICMP_UNREACH_HOST:
3496 case ICMP_UNREACH_NET_UNKNOWN:
3497 case ICMP_UNREACH_HOST_UNKNOWN:
3498 case ICMP_UNREACH_ISOLATED:
3499 case ICMP_UNREACH_TOSNET:
3500 case ICMP_UNREACH_TOSHOST:
3501 code = ICMP6_DST_UNREACH_NOROUTE;
3502 break;
3503 case ICMP_UNREACH_PORT:
3504 code = ICMP6_DST_UNREACH_NOPORT;
3505 break;
3506 case ICMP_UNREACH_NET_PROHIB:
3507 case ICMP_UNREACH_HOST_PROHIB:
3508 case ICMP_UNREACH_FILTER_PROHIB:
3509 case ICMP_UNREACH_PRECEDENCE_CUTOFF:
3510 code = ICMP6_DST_UNREACH_ADMIN;
3511 break;
3512 case ICMP_UNREACH_PROTOCOL:
3513 type = ICMP6_PARAM_PROB;
3514 code = ICMP6_PARAMPROB_NEXTHEADER;
3515 ptr = offsetof(struct ip6_hdr, ip6_nxt);
3516 break;
3517 case ICMP_UNREACH_NEEDFRAG:
3518 type = ICMP6_PACKET_TOO_BIG;
3519 code = 0;
3520 mtu += 20;
3521 break;
3522 default:
3523 return (-1);
3524 }
3525 break;
3526 case ICMP_TIMXCEED:
3527 type = ICMP6_TIME_EXCEEDED;
3528 break;
3529 case ICMP_PARAMPROB:
3530 type = ICMP6_PARAM_PROB;
3531 switch (code) {
3532 case ICMP_PARAMPROB_ERRATPTR:
3533 code = ICMP6_PARAMPROB_HEADER;
3534 break;
3535 case ICMP_PARAMPROB_LENGTH:
3536 code = ICMP6_PARAMPROB_HEADER;
3537 break;
3538 default:
3539 return (-1);
3540 }
3541
3542 ptr = icmp4->icmp_pptr;
3543 if (ptr == 0 || ptr == PTR_IP(ip_tos))
3544 ; /* preserve */
3545 else if (ptr == PTR_IP(ip_len) ||
3546 ptr == PTR_IP(ip_len) + 1)
3547 ptr = PTR_IP6(ip6_plen);
3548 else if (ptr == PTR_IP(ip_ttl))
3549 ptr = PTR_IP6(ip6_hlim);
3550 else if (ptr == PTR_IP(ip_p))
3551 ptr = PTR_IP6(ip6_nxt);
3552 else if (ptr >= PTR_IP(ip_src) &&
3553 ptr < PTR_IP(ip_dst))
3554 ptr = PTR_IP6(ip6_src);
3555 else if (ptr >= PTR_IP(ip_dst) &&
3556 ptr < sizeof(struct ip))
3557 ptr = PTR_IP6(ip6_dst);
3558 else {
3559 return (-1);
3560 }
3561 break;
3562 default:
3563 return (-1);
3564 }
3565
3566 pf_patch_8(pd, &icmp4->icmp_type, type, PF_HI);
3567 pf_patch_8(pd, &icmp4->icmp_code, code, PF_LO);
3568 pf_patch_16(pd, &icmp4->icmp_nextmtu, htons(mtu));
3569 if (ptr >= 0)
3570 pf_patch_32(pd, &icmp4->icmp_void, htonl(ptr));
3571 break;
3572 }
3573
3574 return (0);
3575}
3576#endif /* INET6 */
3577
3578/*
3579 * Need to modulate the sequence numbers in the TCP SACK option
3580 * (credits to Krzysztof Pfaff for report and patch)
3581 */
3582int
3583pf_modulate_sack(struct pf_pdesc *pd, struct pf_state_peer *dst)
3584{
3585 struct sackblk sack;
3586 int copyback = 0, i;
3587 int olen, optsoff;
3588 u_int8_t opts[MAX_TCPOPTLEN], *opt, *eoh;
3589
3590 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
3591 optsoff = pd->off + sizeof(struct tcphdr);
3592#define TCPOLEN_MINSACK (TCPOLEN_SACK + 2)
3593 if (olen < TCPOLEN_MINSACK ||
3594 !pf_pull_hdr(pd->m, optsoff, opts, olen, NULL, pd->af))
3595 return (0);
3596
3597 eoh = opts + olen;
3598 opt = opts;
3599 while ((opt = pf_find_tcpopt(opt, opts, olen,
3600 TCPOPT_SACK, TCPOLEN_MINSACK)) != NULL)
3601 {
3602 size_t safelen = MIN(opt[1], (eoh - opt));
3603 for (i = 2; i + TCPOLEN_SACK <= safelen; i += TCPOLEN_SACK) {
3604 size_t startoff = (opt + i) - opts;
3605 memcpy(&sack, &opt[i], sizeof(sack));
3606 pf_patch_32_unaligned(pd, &sack.start,
3607 htonl(ntohl(sack.start) - dst->seqdiff),
3608 PF_ALGNMNT(startoff));
3609 pf_patch_32_unaligned(pd, &sack.end,
3610 htonl(ntohl(sack.end) - dst->seqdiff),
3611 PF_ALGNMNT(startoff + sizeof(sack.start)));
3612 memcpy(&opt[i], &sack, sizeof(sack));
3613 }
3614 copyback = 1;
3615 opt += opt[1];
3616 }
3617
3618 if (copyback)
3619 m_copyback(pd->m, optsoff, olen, opts, M_NOWAIT);
3620 return (copyback);
3621}
3622
3623struct mbuf *
3624pf_build_tcp(const struct pf_rule *r, sa_family_t af,
3625 const struct pf_addr *saddr, const struct pf_addr *daddr,
3626 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
3627 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
3628 u_int16_t rtag, u_int sack, u_int rdom, u_short *reason)
3629{
3630 struct mbuf *m;
3631 int len, tlen;
3632 struct ip *h;
3633#ifdef INET6
3634 struct ip6_hdr *h6;
3635#endif /* INET6 */
3636 struct tcphdr *th;
3637 char *opt;
3638
3639 /* maximum segment size tcp option */
3640 tlen = sizeof(struct tcphdr);
3641 if (mss)
3642 tlen += 4;
3643 if (sack)
3644 tlen += 2;
3645
3646 switch (af) {
3647 case AF_INET:
3648 len = sizeof(struct ip) + tlen;
3649 break;
3650#ifdef INET6
3651 case AF_INET6:
3652 len = sizeof(struct ip6_hdr) + tlen;
3653 break;
3654#endif /* INET6 */
3655 default:
3656 unhandled_af(af);
3657 }
3658
3659 /* create outgoing mbuf */
3660 m = m_gethdr(M_DONTWAIT, MT_HEADER);
3661 if (m == NULL) {
3662 REASON_SET(reason, PFRES_MEMORY);
3663 return (NULL);
3664 }
3665 if (tag)
3666 m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
3667 m->m_pkthdr.pf.tag = rtag;
3668 m->m_pkthdr.ph_rtableid = rdom;
3669 if (r && (r->scrub_flags & PFSTATE_SETPRIO))
3670 m->m_pkthdr.pf.prio = r->set_prio[0];
3671 if (r && r->qid)
3672 m->m_pkthdr.pf.qid = r->qid;
3673 m->m_data += max_linkhdr;
3674 m->m_pkthdr.len = m->m_len = len;
3675 m->m_pkthdr.ph_ifidx = 0;
3676 m->m_pkthdr.csum_flags |= M_TCP_CSUM_OUT;
3677 memset(m->m_data, 0, len);
3678 switch (af) {
3679 case AF_INET:
3680 h = mtod(m, struct ip *);
3681 h->ip_p = IPPROTO_TCP;
3682 h->ip_len = htons(tlen);
3683 h->ip_v = 4;
3684 h->ip_hl = sizeof(*h) >> 2;
3685 h->ip_tos = IPTOS_LOWDELAY;
3686 h->ip_len = htons(len);
3687 h->ip_off = htons(atomic_load_int(&ip_mtudisc) ? IP_DF : 0);
3688 h->ip_ttl = ttl ? ttl : atomic_load_int(&ip_defttl);
3689 h->ip_sum = 0;
3690 h->ip_src.s_addr = saddr->v4.s_addr;
3691 h->ip_dst.s_addr = daddr->v4.s_addr;
3692
3693 th = (struct tcphdr *)((caddr_t)h + sizeof(struct ip));
3694 break;
3695#ifdef INET6
3696 case AF_INET6:
3697 h6 = mtod(m, struct ip6_hdr *);
3698 h6->ip6_nxt = IPPROTO_TCP;
3699 h6->ip6_plen = htons(tlen);
3700 h6->ip6_vfc |= IPV6_VERSION;
3701 h6->ip6_hlim = IPV6_DEFHLIM;
3702 memcpy(&h6->ip6_src, &saddr->v6, sizeof(struct in6_addr));
3703 memcpy(&h6->ip6_dst, &daddr->v6, sizeof(struct in6_addr));
3704
3705 th = (struct tcphdr *)((caddr_t)h6 + sizeof(struct ip6_hdr));
3706 break;
3707#endif /* INET6 */
3708 default:
3709 unhandled_af(af);
3710 }
3711
3712 /* TCP header */
3713 th->th_sport = sport;
3714 th->th_dport = dport;
3715 th->th_seq = htonl(seq);
3716 th->th_ack = htonl(ack);
3717 th->th_off = tlen >> 2;
3718 th->th_flags = flags;
3719 th->th_win = htons(win);
3720
3721 opt = (char *)(th + 1);
3722 if (mss) {
3723 opt[0] = TCPOPT_MAXSEG;
3724 opt[1] = 4;
3725 mss = htons(mss);
3726 memcpy((opt + 2), &mss, 2);
3727 opt += 4;
3728 }
3729 if (sack) {
3730 opt[0] = TCPOPT_SACK_PERMITTED;
3731 opt[1] = 2;
3732 opt += 2;
3733 }
3734
3735 return (m);
3736}
3737
3738void
3739pf_send_tcp(const struct pf_rule *r, sa_family_t af,
3740 const struct pf_addr *saddr, const struct pf_addr *daddr,
3741 u_int16_t sport, u_int16_t dport, u_int32_t seq, u_int32_t ack,
3742 u_int8_t flags, u_int16_t win, u_int16_t mss, u_int8_t ttl, int tag,
3743 u_int16_t rtag, u_int rdom, u_short *reason)
3744{
3745 struct mbuf *m;
3746
3747 if ((m = pf_build_tcp(r, af, saddr, daddr, sport, dport, seq, ack,
3748 flags, win, mss, ttl, tag, rtag, 0, rdom, reason)) == NULL)
3749 return;
3750
3751 switch (af) {
3752 case AF_INET:
3753 ip_send(m);
3754 break;
3755#ifdef INET6
3756 case AF_INET6:
3757 ip6_send(m);
3758 break;
3759#endif /* INET6 */
3760 }
3761}
3762
3763static void
3764pf_send_challenge_ack(struct pf_pdesc *pd, struct pf_state *st,
3765 struct pf_state_peer *src, struct pf_state_peer *dst, u_short *reason)
3766{
3767 /*
3768 * We are sending challenge ACK as a response to SYN packet, which
3769 * matches existing state (modulo TCP window check). Therefore packet
3770 * must be sent on behalf of destination.
3771 *
3772 * We expect sender to remain either silent, or send RST packet
3773 * so both, firewall and remote peer, can purge dead state from
3774 * memory.
3775 */
3776 pf_send_tcp(st->rule.ptr, pd->af, pd->dst, pd->src,
3777 pd->hdr.tcp.th_dport, pd->hdr.tcp.th_sport, dst->seqlo,
3778 src->seqlo, TH_ACK, 0, 0, st->rule.ptr->return_ttl, 1, 0,
3779 pd->rdomain, reason);
3780}
3781
3782void
3783pf_send_icmp(struct mbuf *m, u_int8_t type, u_int8_t code, int param,
3784 sa_family_t af, struct pf_rule *r, u_int rdomain)
3785{
3786 struct mbuf *m0;
3787
3788 if ((m0 = m_copym(m, 0, M_COPYALL, M_NOWAIT)) == NULL)
3789 return;
3790
3791 m0->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
3792 m0->m_pkthdr.ph_rtableid = rdomain;
3793 if (r && (r->scrub_flags & PFSTATE_SETPRIO))
3794 m0->m_pkthdr.pf.prio = r->set_prio[0];
3795 if (r && r->qid)
3796 m0->m_pkthdr.pf.qid = r->qid;
3797
3798 switch (af) {
3799 case AF_INET:
3800 icmp_error(m0, type, code, 0, param);
3801 break;
3802#ifdef INET6
3803 case AF_INET6:
3804 icmp6_error(m0, type, code, param);
3805 break;
3806#endif /* INET6 */
3807 }
3808}
3809
3810/*
3811 * Return ((n = 0) == (a = b [with mask m]))
3812 * Note: n != 0 => returns (a != b [with mask m])
3813 */
3814int
3815pf_match_addr(u_int8_t n, struct pf_addr *a, struct pf_addr *m,
3816 struct pf_addr *b, sa_family_t af)
3817{
3818 switch (af) {
3819 case AF_INET:
3820 if ((a->addr32[0] & m->addr32[0]) ==
3821 (b->addr32[0] & m->addr32[0]))
3822 return (n == 0);
3823 break;
3824#ifdef INET6
3825 case AF_INET6:
3826 if (((a->addr32[0] & m->addr32[0]) ==
3827 (b->addr32[0] & m->addr32[0])) &&
3828 ((a->addr32[1] & m->addr32[1]) ==
3829 (b->addr32[1] & m->addr32[1])) &&
3830 ((a->addr32[2] & m->addr32[2]) ==
3831 (b->addr32[2] & m->addr32[2])) &&
3832 ((a->addr32[3] & m->addr32[3]) ==
3833 (b->addr32[3] & m->addr32[3])))
3834 return (n == 0);
3835 break;
3836#endif /* INET6 */
3837 }
3838
3839 return (n != 0);
3840}
3841
3842/*
3843 * Return 1 if b <= a <= e, otherwise return 0.
3844 */
3845int
3846pf_match_addr_range(struct pf_addr *b, struct pf_addr *e,
3847 struct pf_addr *a, sa_family_t af)
3848{
3849 switch (af) {
3850 case AF_INET:
3851 if ((ntohl(a->addr32[0]) < ntohl(b->addr32[0])) ||
3852 (ntohl(a->addr32[0]) > ntohl(e->addr32[0])))
3853 return (0);
3854 break;
3855#ifdef INET6
3856 case AF_INET6: {
3857 int i;
3858
3859 /* check a >= b */
3860 for (i = 0; i < 4; ++i)
3861 if (ntohl(a->addr32[i]) > ntohl(b->addr32[i]))
3862 break;
3863 else if (ntohl(a->addr32[i]) < ntohl(b->addr32[i]))
3864 return (0);
3865 /* check a <= e */
3866 for (i = 0; i < 4; ++i)
3867 if (ntohl(a->addr32[i]) < ntohl(e->addr32[i]))
3868 break;
3869 else if (ntohl(a->addr32[i]) > ntohl(e->addr32[i]))
3870 return (0);
3871 break;
3872 }
3873#endif /* INET6 */
3874 }
3875 return (1);
3876}
3877
3878int
3879pf_match(u_int8_t op, u_int32_t a1, u_int32_t a2, u_int32_t p)
3880{
3881 switch (op) {
3882 case PF_OP_IRG:
3883 return ((p > a1) && (p < a2));
3884 case PF_OP_XRG:
3885 return ((p < a1) || (p > a2));
3886 case PF_OP_RRG:
3887 return ((p >= a1) && (p <= a2));
3888 case PF_OP_EQ:
3889 return (p == a1);
3890 case PF_OP_NE:
3891 return (p != a1);
3892 case PF_OP_LT:
3893 return (p < a1);
3894 case PF_OP_LE:
3895 return (p <= a1);
3896 case PF_OP_GT:
3897 return (p > a1);
3898 case PF_OP_GE:
3899 return (p >= a1);
3900 }
3901 return (0); /* never reached */
3902}
3903
3904int
3905pf_match_port(u_int8_t op, u_int16_t a1, u_int16_t a2, u_int16_t p)
3906{
3907 return (pf_match(op, ntohs(a1), ntohs(a2), ntohs(p)));
3908}
3909
3910int
3911pf_match_uid(u_int8_t op, uid_t a1, uid_t a2, uid_t u)
3912{
3913 if (u == -1 && op != PF_OP_EQ && op != PF_OP_NE)
3914 return (0);
3915 return (pf_match(op, a1, a2, u));
3916}
3917
3918int
3919pf_match_gid(u_int8_t op, gid_t a1, gid_t a2, gid_t g)
3920{
3921 if (g == -1 && op != PF_OP_EQ && op != PF_OP_NE)
3922 return (0);
3923 return (pf_match(op, a1, a2, g));
3924}
3925
3926int
3927pf_match_tag(struct mbuf *m, struct pf_rule *r, int *tag)
3928{
3929 if (*tag == -1)
3930 *tag = m->m_pkthdr.pf.tag;
3931
3932 return ((!r->match_tag_not && r->match_tag == *tag) ||
3933 (r->match_tag_not && r->match_tag != *tag));
3934}
3935
3936int
3937pf_match_rcvif(struct mbuf *m, struct pf_rule *r)
3938{
3939 struct ifnet *ifp;
3940 struct pfi_kif *kif = NULL;
3941
3942 if (m->m_pkthdr.ph_ifidx == 0)
3943 return (0);
3944
3945 smr_read_enter();
3946 ifp = if_get_smr(m->m_pkthdr.ph_ifidx);
3947 if (ifp != NULL) {
3948 kif = (struct pfi_kif *)ifp->if_pf_kif;
3949#if NCARP > 0
3950 if (ifp->if_type == IFT_CARP) {
3951 struct ifnet *ifp0 = if_get_smr(ifp->if_carpdevidx);
3952 if (ifp0 != NULL)
3953 kif = (struct pfi_kif *)ifp0->if_pf_kif;
3954 }
3955#endif /* NCARP */
3956 }
3957 smr_read_leave();
3958
3959 if (kif == NULL) {
3960 DPFPRINTF(LOG_ERR,
3961 "%s: kif == NULL, @%d via %s", __func__,
3962 r->nr, r->rcv_ifname);
3963 return (0);
3964 }
3965
3966 return (pfi_kif_match(r->rcv_kif, kif));
3967}
3968
3969void
3970pf_tag_packet(struct mbuf *m, int tag, int rtableid)
3971{
3972 if (tag > 0)
3973 m->m_pkthdr.pf.tag = tag;
3974 if (rtableid >= 0)
3975 m->m_pkthdr.ph_rtableid = (u_int)rtableid;
3976}
3977
3978void
3979pf_anchor_stack_init(void)
3980{
3981 struct pf_anchor_stackframe *stack;
3982
3983 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3984 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = &stack[0];
3985 cpumem_leave(pf_anchor_stack, stack);
3986}
3987
3988int
3989pf_anchor_stack_is_full(struct pf_anchor_stackframe *sf)
3990{
3991 struct pf_anchor_stackframe *stack;
3992 int rv;
3993
3994 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
3995 rv = (sf == &stack[PF_ANCHOR_STACK_MAX]);
3996 cpumem_leave(pf_anchor_stack, stack);
3997
3998 return (rv);
3999}
4000
4001int
4002pf_anchor_stack_is_empty(struct pf_anchor_stackframe *sf)
4003{
4004 struct pf_anchor_stackframe *stack;
4005 int rv;
4006
4007 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
4008 rv = (sf == &stack[0]);
4009 cpumem_leave(pf_anchor_stack, stack);
4010
4011 return (rv);
4012}
4013
4014struct pf_anchor_stackframe *
4015pf_anchor_stack_top(void)
4016{
4017 struct pf_anchor_stackframe *stack;
4018 struct pf_anchor_stackframe *top_sf;
4019
4020 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
4021 top_sf = stack[PF_ANCHOR_STACK_MAX].sf_stack_top;
4022 cpumem_leave(pf_anchor_stack, stack);
4023
4024 return (top_sf);
4025}
4026
4027int
4028pf_anchor_stack_push(struct pf_ruleset *rs, struct pf_rule *anchor,
4029 struct pf_rule *r, struct pf_anchor *child, int jump_target)
4030{
4031 struct pf_anchor_stackframe *stack;
4032 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
4033
4034 top_sf++;
4035 if (pf_anchor_stack_is_full(top_sf))
4036 return (-1);
4037
4038 top_sf->sf_rs = rs;
4039 top_sf->sf_anchor = anchor;
4040 top_sf->sf_r = r;
4041 top_sf->sf_child = child;
4042 top_sf->sf_jump_target = jump_target;
4043
4044 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
4045
4046 if ((top_sf <= &stack[0]) || (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
4047 panic("%s: top frame outside of anchor stack range", __func__);
4048
4049 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
4050 cpumem_leave(pf_anchor_stack, stack);
4051
4052 return (0);
4053}
4054
4055int
4056pf_anchor_stack_pop(struct pf_ruleset **rs, struct pf_rule **anchor,
4057 struct pf_rule **r, struct pf_anchor **child, int *jump_target)
4058{
4059 struct pf_anchor_stackframe *top_sf = pf_anchor_stack_top();
4060 struct pf_anchor_stackframe *stack;
4061 int on_top;
4062
4063 stack = (struct pf_anchor_stackframe *)cpumem_enter(pf_anchor_stack);
4064 if (pf_anchor_stack_is_empty(top_sf)) {
4065 on_top = -1;
4066 } else {
4067 if ((top_sf <= &stack[0]) ||
4068 (top_sf >= &stack[PF_ANCHOR_STACK_MAX]))
4069 panic("%s: top frame outside of anchor stack range",
4070 __func__);
4071
4072 *rs = top_sf->sf_rs;
4073 *anchor = top_sf->sf_anchor;
4074 *r = top_sf->sf_r;
4075 *child = top_sf->sf_child;
4076 *jump_target = top_sf->sf_jump_target;
4077 top_sf--;
4078 stack[PF_ANCHOR_STACK_MAX].sf_stack_top = top_sf;
4079 on_top = 0;
4080 }
4081 cpumem_leave(pf_anchor_stack, stack);
4082
4083 return (on_top);
4084}
4085
4086void
4087pf_poolmask(struct pf_addr *naddr, struct pf_addr *raddr,
4088 struct pf_addr *rmask, struct pf_addr *saddr, sa_family_t af)
4089{
4090 switch (af) {
4091 case AF_INET:
4092 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4093 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4094 break;
4095#ifdef INET6
4096 case AF_INET6:
4097 naddr->addr32[0] = (raddr->addr32[0] & rmask->addr32[0]) |
4098 ((rmask->addr32[0] ^ 0xffffffff ) & saddr->addr32[0]);
4099 naddr->addr32[1] = (raddr->addr32[1] & rmask->addr32[1]) |
4100 ((rmask->addr32[1] ^ 0xffffffff ) & saddr->addr32[1]);
4101 naddr->addr32[2] = (raddr->addr32[2] & rmask->addr32[2]) |
4102 ((rmask->addr32[2] ^ 0xffffffff ) & saddr->addr32[2]);
4103 naddr->addr32[3] = (raddr->addr32[3] & rmask->addr32[3]) |
4104 ((rmask->addr32[3] ^ 0xffffffff ) & saddr->addr32[3]);
4105 break;
4106#endif /* INET6 */
4107 default:
4108 unhandled_af(af);
4109 }
4110}
4111
4112void
4113pf_addr_inc(struct pf_addr *addr, sa_family_t af)
4114{
4115 switch (af) {
4116 case AF_INET:
4117 addr->addr32[0] = htonl(ntohl(addr->addr32[0]) + 1);
4118 break;
4119#ifdef INET6
4120 case AF_INET6:
4121 if (addr->addr32[3] == 0xffffffff) {
4122 addr->addr32[3] = 0;
4123 if (addr->addr32[2] == 0xffffffff) {
4124 addr->addr32[2] = 0;
4125 if (addr->addr32[1] == 0xffffffff) {
4126 addr->addr32[1] = 0;
4127 addr->addr32[0] =
4128 htonl(ntohl(addr->addr32[0]) + 1);
4129 } else
4130 addr->addr32[1] =
4131 htonl(ntohl(addr->addr32[1]) + 1);
4132 } else
4133 addr->addr32[2] =
4134 htonl(ntohl(addr->addr32[2]) + 1);
4135 } else
4136 addr->addr32[3] =
4137 htonl(ntohl(addr->addr32[3]) + 1);
4138 break;
4139#endif /* INET6 */
4140 default:
4141 unhandled_af(af);
4142 }
4143}
4144
4145int
4146pf_socket_lookup(struct pf_pdesc *pd)
4147{
4148 struct pf_addr *saddr, *daddr;
4149 u_int16_t sport, dport;
4150 struct inpcbtable *table;
4151 struct inpcb *inp;
4152
4153 pd->lookup.uid = -1;
4154 pd->lookup.gid = -1;
4155 pd->lookup.pid = NO_PID;
4156 switch (pd->virtual_proto) {
4157 case IPPROTO_TCP:
4158 sport = pd->hdr.tcp.th_sport;
4159 dport = pd->hdr.tcp.th_dport;
4160 PF_ASSERT_LOCKED();
4161 NET_ASSERT_LOCKED();
4162 table = &tcbtable;
4163 break;
4164 case IPPROTO_UDP:
4165 sport = pd->hdr.udp.uh_sport;
4166 dport = pd->hdr.udp.uh_dport;
4167 PF_ASSERT_LOCKED();
4168 NET_ASSERT_LOCKED();
4169 table = &udbtable;
4170 break;
4171 default:
4172 return (-1);
4173 }
4174 if (pd->dir == PF_IN) {
4175 saddr = pd->src;
4176 daddr = pd->dst;
4177 } else {
4178 u_int16_t p;
4179
4180 p = sport;
4181 sport = dport;
4182 dport = p;
4183 saddr = pd->dst;
4184 daddr = pd->src;
4185 }
4186 switch (pd->af) {
4187 case AF_INET:
4188 /*
4189 * Fails when rtable is changed while evaluating the ruleset
4190 * The socket looked up will not match the one hit in the end.
4191 */
4192 inp = in_pcblookup(table, saddr->v4, sport, daddr->v4, dport,
4193 pd->rdomain);
4194 if (inp == NULL) {
4195 inp = in_pcblookup_listen(table, daddr->v4, dport,
4196 NULL, pd->rdomain);
4197 if (inp == NULL)
4198 return (-1);
4199 }
4200 break;
4201#ifdef INET6
4202 case AF_INET6:
4203 if (pd->virtual_proto == IPPROTO_UDP)
4204 table = &udb6table;
4205 if (pd->virtual_proto == IPPROTO_TCP)
4206 table = &tcb6table;
4207 inp = in6_pcblookup(table, &saddr->v6, sport, &daddr->v6,
4208 dport, pd->rdomain);
4209 if (inp == NULL) {
4210 inp = in6_pcblookup_listen(table, &daddr->v6, dport,
4211 NULL, pd->rdomain);
4212 if (inp == NULL)
4213 return (-1);
4214 }
4215 break;
4216#endif /* INET6 */
4217 default:
4218 unhandled_af(pd->af);
4219 }
4220 pd->lookup.uid = inp->inp_socket->so_euid;
4221 pd->lookup.gid = inp->inp_socket->so_egid;
4222 pd->lookup.pid = inp->inp_socket->so_cpid;
4223 in_pcbunref(inp);
4224 return (1);
4225}
4226
4227/* post: r => (r[0] == type /\ r[1] >= min_typelen >= 2 "validity"
4228 * /\ (eoh - r) >= min_typelen >= 2 "safety" )
4229 *
4230 * warning: r + r[1] may exceed opts bounds for r[1] > min_typelen
4231 */
4232u_int8_t*
4233pf_find_tcpopt(u_int8_t *opt, u_int8_t *opts, size_t hlen, u_int8_t type,
4234 u_int8_t min_typelen)
4235{
4236 u_int8_t *eoh = opts + hlen;
4237
4238 if (min_typelen < 2)
4239 return (NULL);
4240
4241 while ((eoh - opt) >= min_typelen) {
4242 switch (*opt) {
4243 case TCPOPT_EOL:
4244 /* FALLTHROUGH - Workaround the failure of some
4245 systems to NOP-pad their bzero'd option buffers,
4246 producing spurious EOLs */
4247 case TCPOPT_NOP:
4248 opt++;
4249 continue;
4250 default:
4251 if (opt[0] == type &&
4252 opt[1] >= min_typelen)
4253 return (opt);
4254 }
4255
4256 opt += MAX(opt[1], 2); /* evade infinite loops */
4257 }
4258
4259 return (NULL);
4260}
4261
4262u_int8_t
4263pf_get_wscale(struct pf_pdesc *pd)
4264{
4265 int olen;
4266 u_int8_t opts[MAX_TCPOPTLEN], *opt;
4267 u_int8_t wscale = 0;
4268
4269 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
4270 if (olen < TCPOLEN_WINDOW || !pf_pull_hdr(pd->m,
4271 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
4272 return (0);
4273
4274 opt = opts;
4275 while ((opt = pf_find_tcpopt(opt, opts, olen,
4276 TCPOPT_WINDOW, TCPOLEN_WINDOW)) != NULL) {
4277 wscale = opt[2];
4278 wscale = MIN(wscale, TCP_MAX_WINSHIFT);
4279 wscale |= PF_WSCALE_FLAG;
4280
4281 opt += opt[1];
4282 }
4283
4284 return (wscale);
4285}
4286
4287u_int16_t
4288pf_get_mss(struct pf_pdesc *pd, uint16_t mssdflt)
4289{
4290 int olen;
4291 u_int8_t opts[MAX_TCPOPTLEN], *opt;
4292 u_int16_t mss;
4293
4294 olen = (pd->hdr.tcp.th_off << 2) - sizeof(struct tcphdr);
4295 if (olen < TCPOLEN_MAXSEG || !pf_pull_hdr(pd->m,
4296 pd->off + sizeof(struct tcphdr), opts, olen, NULL, pd->af))
4297 return (0);
4298
4299 mss = mssdflt;
4300 opt = opts;
4301 while ((opt = pf_find_tcpopt(opt, opts, olen,
4302 TCPOPT_MAXSEG, TCPOLEN_MAXSEG)) != NULL) {
4303 memcpy(&mss, (opt + 2), 2);
4304 mss = ntohs(mss);
4305
4306 opt += opt[1];
4307 }
4308 return (mss);
4309}
4310
4311u_int16_t
4312pf_calc_mss(struct pf_addr *addr, sa_family_t af, int rtableid, uint16_t offer,
4313 uint16_t mssdflt)
4314{
4315 struct ifnet *ifp;
4316 struct sockaddr_in *dst;
4317#ifdef INET6
4318 struct sockaddr_in6 *dst6;
4319#endif /* INET6 */
4320 struct rtentry *rt = NULL;
4321 struct sockaddr_storage ss;
4322 int hlen, mss;
4323
4324 memset(&ss, 0, sizeof(ss));
4325
4326 switch (af) {
4327 case AF_INET:
4328 hlen = sizeof(struct ip);
4329 dst = (struct sockaddr_in *)&ss;
4330 dst->sin_family = AF_INET;
4331 dst->sin_len = sizeof(*dst);
4332 dst->sin_addr = addr->v4;
4333 rt = rtalloc(sintosa(dst), 0, rtableid);
4334 break;
4335#ifdef INET6
4336 case AF_INET6:
4337 hlen = sizeof(struct ip6_hdr);
4338 dst6 = (struct sockaddr_in6 *)&ss;
4339 dst6->sin6_family = AF_INET6;
4340 dst6->sin6_len = sizeof(*dst6);
4341 dst6->sin6_addr = addr->v6;
4342 rt = rtalloc(sin6tosa(dst6), 0, rtableid);
4343 break;
4344#endif /* INET6 */
4345 }
4346
4347 mss = mssdflt;
4348 if (rt != NULL && (ifp = if_get(rt->rt_ifidx)) != NULL) {
4349 mss = ifp->if_mtu - hlen - sizeof(struct tcphdr);
4350 mss = imax(mss, mssdflt);
4351 if_put(ifp);
4352 }
4353 rtfree(rt);
4354 mss = imin(mss, offer);
4355 mss = imax(mss, 64); /* sanity - at least max opt space */
4356 return (mss);
4357}
4358
4359static __inline int
4360pf_set_rt_ifp(struct pf_state *st, struct pf_addr *saddr, sa_family_t af,
4361 struct pf_src_node **sns)
4362{
4363 struct pf_rule *r = st->rule.ptr;
4364 int rv;
4365
4366 if (!r->rt)
4367 return (0);
4368
4369 rv = pf_map_addr(af, r, saddr, &st->rt_addr, NULL, sns,
4370 &r->route, PF_SN_ROUTE);
4371 if (rv == 0)
4372 st->rt = r->rt;
4373
4374 return (rv);
4375}
4376
4377u_int32_t
4378pf_tcp_iss(struct pf_pdesc *pd)
4379{
4380 SHA2_CTX ctx;
4381 union {
4382 uint8_t bytes[SHA512_DIGEST_LENGTH];
4383 uint32_t words[1];
4384 } digest;
4385
4386 if (pf_tcp_secret_init == 0) {
4387 arc4random_buf(pf_tcp_secret, sizeof(pf_tcp_secret));
4388 SHA512Init(&pf_tcp_secret_ctx);
4389 SHA512Update(&pf_tcp_secret_ctx, pf_tcp_secret,
4390 sizeof(pf_tcp_secret));
4391 pf_tcp_secret_init = 1;
4392 }
4393 ctx = pf_tcp_secret_ctx;
4394
4395 SHA512Update(&ctx, &pd->rdomain, sizeof(pd->rdomain));
4396 SHA512Update(&ctx, &pd->hdr.tcp.th_sport, sizeof(u_short));
4397 SHA512Update(&ctx, &pd->hdr.tcp.th_dport, sizeof(u_short));
4398 switch (pd->af) {
4399 case AF_INET:
4400 SHA512Update(&ctx, &pd->src->v4, sizeof(struct in_addr));
4401 SHA512Update(&ctx, &pd->dst->v4, sizeof(struct in_addr));
4402 break;
4403#ifdef INET6
4404 case AF_INET6:
4405 SHA512Update(&ctx, &pd->src->v6, sizeof(struct in6_addr));
4406 SHA512Update(&ctx, &pd->dst->v6, sizeof(struct in6_addr));
4407 break;
4408#endif /* INET6 */
4409 }
4410 SHA512Final(digest.bytes, &ctx);
4411 pf_tcp_iss_off += 4096;
4412 return (digest.words[0] + READ_ONCE(tcp_iss) + pf_tcp_iss_off);
4413}
4414
4415void
4416pf_rule_to_actions(struct pf_rule *r, struct pf_rule_actions *a)
4417{
4418 if (r->qid)
4419 a->qid = r->qid;
4420 if (r->pqid)
4421 a->pqid = r->pqid;
4422 if (r->rtableid >= 0)
4423 a->rtableid = r->rtableid;
4424#if NPFLOG > 0
4425 a->log |= r->log;
4426#endif /* NPFLOG > 0 */
4427 if (r->scrub_flags & PFSTATE_SETTOS)
4428 a->set_tos = r->set_tos;
4429 if (r->min_ttl)
4430 a->min_ttl = r->min_ttl;
4431 if (r->max_mss)
4432 a->max_mss = r->max_mss;
4433 a->flags |= (r->scrub_flags & (PFSTATE_NODF|PFSTATE_RANDOMID|
4434 PFSTATE_SETTOS|PFSTATE_SCRUB_TCP|PFSTATE_SETPRIO));
4435 if (r->scrub_flags & PFSTATE_SETPRIO) {
4436 a->set_prio[0] = r->set_prio[0];
4437 a->set_prio[1] = r->set_prio[1];
4438 }
4439 if (r->rule_flag & PFRULE_SETDELAY)
4440 a->delay = r->delay;
4441}
4442
4443#define PF_TEST_ATTRIB(t, a) \
4444 if (t) { \
4445 r = a; \
4446 continue; \
4447 } else do { \
4448 } while (0)
4449
4450enum pf_test_status
4451pf_match_rule(struct pf_test_ctx *ctx, struct pf_ruleset *ruleset)
4452{
4453 struct pf_rule *r;
4454 struct pf_anchor *child = NULL;
4455 int target;
4456
4457 pf_anchor_stack_init();
4458enter_ruleset:
4459 r = TAILQ_FIRST(ruleset->rules.active.ptr);
4460 while (r != NULL) {
4461 struct pf_statelim *stlim = NULL;
4462 struct pf_sourcelim *srlim = NULL;
4463 struct pf_source *sr = NULL;
4464 unsigned int gen;
4465
4466 PF_TEST_ATTRIB(r->rule_flag & PFRULE_EXPIRED,
4467 TAILQ_NEXT(r, entries));
4468 r->evaluations++;
4469 PF_TEST_ATTRIB(
4470 (pfi_kif_match(r->kif, ctx->pd->kif) == r->ifnot),
4471 r->skip[PF_SKIP_IFP].ptr);
4472 PF_TEST_ATTRIB((r->direction && r->direction != ctx->pd->dir),
4473 r->skip[PF_SKIP_DIR].ptr);
4474 PF_TEST_ATTRIB((r->onrdomain >= 0 &&
4475 (r->onrdomain == ctx->pd->rdomain) == r->ifnot),
4476 r->skip[PF_SKIP_RDOM].ptr);
4477 PF_TEST_ATTRIB((r->af && r->af != ctx->pd->af),
4478 r->skip[PF_SKIP_AF].ptr);
4479 PF_TEST_ATTRIB((r->proto && r->proto != ctx->pd->proto),
4480 r->skip[PF_SKIP_PROTO].ptr);
4481 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->src.addr, &ctx->pd->nsaddr,
4482 ctx->pd->naf, r->src.neg, ctx->pd->kif,
4483 ctx->act.rtableid)),
4484 r->skip[PF_SKIP_SRC_ADDR].ptr);
4485 PF_TEST_ATTRIB((PF_MISMATCHAW(&r->dst.addr, &ctx->pd->ndaddr,
4486 ctx->pd->af, r->dst.neg, NULL, ctx->act.rtableid)),
4487 r->skip[PF_SKIP_DST_ADDR].ptr);
4488
4489 switch (ctx->pd->virtual_proto) {
4490 case PF_VPROTO_FRAGMENT:
4491 /* tcp/udp only. port_op always 0 in other cases */
4492 PF_TEST_ATTRIB((r->src.port_op || r->dst.port_op),
4493 TAILQ_NEXT(r, entries));
4494 PF_TEST_ATTRIB((ctx->pd->proto == IPPROTO_TCP &&
4495 r->flagset),
4496 TAILQ_NEXT(r, entries));
4497 /* icmp only. type/code always 0 in other cases */
4498 PF_TEST_ATTRIB((r->type || r->code),
4499 TAILQ_NEXT(r, entries));
4500 /* tcp/udp only. {uid|gid}.op always 0 in other cases */
4501 PF_TEST_ATTRIB((r->gid.op || r->uid.op),
4502 TAILQ_NEXT(r, entries));
4503 break;
4504
4505 case IPPROTO_TCP:
4506 PF_TEST_ATTRIB(((r->flagset & ctx->th->th_flags) !=
4507 r->flags),
4508 TAILQ_NEXT(r, entries));
4509 PF_TEST_ATTRIB((r->os_fingerprint != PF_OSFP_ANY &&
4510 !pf_osfp_match(pf_osfp_fingerprint(ctx->pd),
4511 r->os_fingerprint)),
4512 TAILQ_NEXT(r, entries));
4513 /* FALLTHROUGH */
4514
4515 case IPPROTO_UDP:
4516 /* tcp/udp only. port_op always 0 in other cases */
4517 PF_TEST_ATTRIB((r->src.port_op &&
4518 !pf_match_port(r->src.port_op, r->src.port[0],
4519 r->src.port[1], ctx->pd->nsport)),
4520 r->skip[PF_SKIP_SRC_PORT].ptr);
4521 PF_TEST_ATTRIB((r->dst.port_op &&
4522 !pf_match_port(r->dst.port_op, r->dst.port[0],
4523 r->dst.port[1], ctx->pd->ndport)),
4524 r->skip[PF_SKIP_DST_PORT].ptr);
4525 /* tcp/udp only. uid.op always 0 in other cases */
4526 PF_TEST_ATTRIB((r->uid.op && (ctx->pd->lookup.done ||
4527 (ctx->pd->lookup.done =
4528 pf_socket_lookup(ctx->pd), 1)) &&
4529 !pf_match_uid(r->uid.op, r->uid.uid[0],
4530 r->uid.uid[1], ctx->pd->lookup.uid)),
4531 TAILQ_NEXT(r, entries));
4532 /* tcp/udp only. gid.op always 0 in other cases */
4533 PF_TEST_ATTRIB((r->gid.op && (ctx->pd->lookup.done ||
4534 (ctx->pd->lookup.done =
4535 pf_socket_lookup(ctx->pd), 1)) &&
4536 !pf_match_gid(r->gid.op, r->gid.gid[0],
4537 r->gid.gid[1], ctx->pd->lookup.gid)),
4538 TAILQ_NEXT(r, entries));
4539 break;
4540
4541 case IPPROTO_ICMP:
4542 /* icmp only. type always 0 in other cases */
4543 PF_TEST_ATTRIB((r->type &&
4544 r->type != ctx->icmptype + 1),
4545 TAILQ_NEXT(r, entries));
4546 /* icmp only. type always 0 in other cases */
4547 PF_TEST_ATTRIB((r->code &&
4548 r->code != ctx->icmpcode + 1),
4549 TAILQ_NEXT(r, entries));
4550 /* icmp only. don't create states on replies */
4551 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
4552 (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
4553 ctx->icmp_dir != PF_IN),
4554 TAILQ_NEXT(r, entries));
4555 break;
4556
4557 case IPPROTO_ICMPV6:
4558 /* icmp only. type always 0 in other cases */
4559 PF_TEST_ATTRIB((r->type &&
4560 r->type != ctx->icmptype + 1),
4561 TAILQ_NEXT(r, entries));
4562 /* icmp only. type always 0 in other cases */
4563 PF_TEST_ATTRIB((r->code &&
4564 r->code != ctx->icmpcode + 1),
4565 TAILQ_NEXT(r, entries));
4566 /* icmp only. don't create states on replies */
4567 PF_TEST_ATTRIB((r->keep_state && !ctx->state_icmp &&
4568 (r->rule_flag & PFRULE_STATESLOPPY) == 0 &&
4569 ctx->icmp_dir != PF_IN &&
4570 ctx->icmptype != ND_NEIGHBOR_ADVERT),
4571 TAILQ_NEXT(r, entries));
4572 break;
4573
4574 default:
4575 break;
4576 }
4577
4578 PF_TEST_ATTRIB((r->rule_flag & PFRULE_FRAGMENT &&
4579 ctx->pd->virtual_proto != PF_VPROTO_FRAGMENT),
4580 TAILQ_NEXT(r, entries));
4581 PF_TEST_ATTRIB((r->tos && !(r->tos == ctx->pd->tos)),
4582 TAILQ_NEXT(r, entries));
4583 PF_TEST_ATTRIB((r->prob &&
4584 r->prob <= arc4random_uniform(UINT_MAX - 1) + 1),
4585 TAILQ_NEXT(r, entries));
4586 PF_TEST_ATTRIB((r->match_tag &&
4587 !pf_match_tag(ctx->pd->m, r, &ctx->tag)),
4588 TAILQ_NEXT(r, entries));
4589 PF_TEST_ATTRIB((r->rcv_kif && pf_match_rcvif(ctx->pd->m, r) ==
4590 r->rcvifnot),
4591 TAILQ_NEXT(r, entries));
4592 PF_TEST_ATTRIB((r->prio &&
4593 (r->prio == PF_PRIO_ZERO ? 0 : r->prio) !=
4594 ctx->pd->m->m_pkthdr.pf.prio),
4595 TAILQ_NEXT(r, entries));
4596
4597 if (r->statelim.id != PF_STATELIM_ID_NONE) {
4598 stlim = pf_statelim_find(r->statelim.id);
4599
4600 /*
4601 * Treat a missing limiter like an exhausted limiter.
4602 * There is no "backend" to get a resource out of
4603 * so the rule can't create state.
4604 */
4605 PF_TEST_ATTRIB(stlim == NULL,
4606 TAILQ_NEXT(r, entries));
4607
4608 /*
4609 * An overcommitted pool means this rule
4610 * can't create state.
4611 */
4612 if (stlim->pfstlim_inuse >= stlim->pfstlim_limit) {
4613 gen = pf_statelim_enter(stlim);
4614 stlim->pfstlim_counters.hardlimited++;
4615 pf_statelim_leave(stlim, gen);
4616 if (r->statelim.limiter_action == PF_LIMITER_BLOCK) {
4617 ctx->limiter_drop = 1;
4618 REASON_SET(&ctx->reason, PFRES_MAXSTATES);
4619 break; /* stop rule processing */
4620 }
4621
4622 r = TAILQ_NEXT(r, entries);
4623 continue;
4624 }
4625
4626 /*
4627 * Is access to the pool rate limited?
4628 */
4629 if (stlim->pfstlim_rate.limit != 0) {
4630 uint64_t ts = getnsecuptime();
4631 uint64_t diff = ts - stlim->pfstlim_rate_ts;
4632
4633 if (diff < stlim->pfstlim_rate_token) {
4634 gen = pf_statelim_enter(stlim);
4635 stlim->pfstlim_counters.ratelimited++;
4636 pf_statelim_leave(stlim, gen);
4637 if (r->statelim.limiter_action ==
4638 PF_LIMITER_BLOCK) {
4639 ctx->limiter_drop = 1;
4640 REASON_SET(&ctx->reason,
4641 PFRES_MAXSTATES);
4642 /* stop rule processing */
4643 break;
4644 }
4645 r = TAILQ_NEXT(r, entries);
4646 continue;
4647 }
4648
4649 if (diff > stlim->pfstlim_rate_bucket) {
4650 stlim->pfstlim_rate_ts =
4651 ts - stlim->pfstlim_rate_bucket;
4652 }
4653 }
4654 }
4655
4656 if (r->sourcelim.id != PF_SOURCELIM_ID_NONE) {
4657 struct pf_source key;
4658
4659 srlim = pf_sourcelim_find(r->sourcelim.id);
4660
4661 /*
4662 * Treat a missing pool like an overcommitted pool.
4663 * There is no "backend" to get a resource out of
4664 * so the rule can't create state.
4665 */
4666 PF_TEST_ATTRIB(srlim == NULL,
4667 TAILQ_NEXT(r, entries));
4668
4669 pf_source_key(srlim, &key,
4670 ctx->pd->af, ctx->pd->rdomain, ctx->pd->src);
4671 sr = pf_source_find(srlim, &key);
4672 if (sr != NULL) {
4673 /*
4674 * An overcommitted limiter means this rule
4675 * can't create state.
4676 */
4677 if (sr->pfsr_inuse >= srlim->pfsrlim_limit) {
4678 sr->pfsr_counters.hardlimited++;
4679 gen = pf_sourcelim_enter(srlim);
4680 srlim->pfsrlim_counters.hardlimited++;
4681 pf_sourcelim_leave(srlim, gen);
4682 if (r->sourcelim.limiter_action ==
4683 PF_LIMITER_BLOCK) {
4684 ctx->limiter_drop = 1;
4685 REASON_SET(&ctx->reason,
4686 PFRES_SRCLIMIT);
4687 /* stop rule processing */
4688 break;
4689 }
4690 r = TAILQ_NEXT(r, entries);
4691 continue;
4692 }
4693
4694 /*
4695 * Is access to the pool rate limited?
4696 */
4697 if (srlim->pfsrlim_rate.limit != 0) {
4698 uint64_t ts = getnsecuptime();
4699 uint64_t diff = ts - sr->pfsr_rate_ts;
4700
4701 if (diff < srlim->pfsrlim_rate_token) {
4702 sr->pfsr_counters.ratelimited++;
4703 gen = pf_sourcelim_enter(srlim);
4704 srlim->pfsrlim_counters.ratelimited++;
4705 pf_sourcelim_leave(srlim, gen);
4706 if (r->sourcelim.limiter_action ==
4707 PF_LIMITER_BLOCK) {
4708 ctx->limiter_drop = 1;
4709 REASON_SET(&ctx->reason,
4710 PFRES_SRCLIMIT);
4711 /* stop rules */
4712 break;
4713 }
4714 r = TAILQ_NEXT(r, entries);
4715 continue;
4716 }
4717
4718 if (diff > srlim->pfsrlim_rate_bucket) {
4719 sr->pfsr_rate_ts = ts -
4720 srlim->pfsrlim_rate_bucket;
4721 }
4722 }
4723 } else {
4724 /*
4725 * a new source entry will (should)
4726 * admit a state.
4727 */
4728
4729 if (srlim->pfsrlim_nsources >=
4730 srlim->pfsrlim_entries) {
4731 gen = pf_sourcelim_enter(srlim);
4732 srlim->pfsrlim_counters.addrlimited++;
4733 pf_sourcelim_leave(srlim, gen);
4734 if (r->sourcelim.limiter_action ==
4735 PF_LIMITER_BLOCK) {
4736 ctx->limiter_drop = 1;
4737 REASON_SET(&ctx->reason,
4738 PFRES_SRCLIMIT);
4739 /* stop rules processing */
4740 break;
4741 }
4742 r = TAILQ_NEXT(r, entries);
4743 continue;
4744 }
4745 }
4746 }
4747
4748 /* must be last! */
4749 if (r->pktrate.limit) {
4750 pf_add_threshold(&r->pktrate);
4751 PF_TEST_ATTRIB((pf_check_threshold(&r->pktrate)),
4752 TAILQ_NEXT(r, entries));
4753 }
4754
4755 /* FALLTHROUGH */
4756 if (r->tag)
4757 ctx->tag = r->tag;
4758 if (r->anchor == NULL) {
4759
4760 if (r->rule_flag & PFRULE_ONCE) {
4761 u_int32_t rule_flag;
4762
4763 rule_flag = r->rule_flag;
4764 if (((rule_flag & PFRULE_EXPIRED) == 0) &&
4765 atomic_cas_uint(&r->rule_flag, rule_flag,
4766 rule_flag | PFRULE_EXPIRED) == rule_flag) {
4767 r->exptime = gettime();
4768 } else {
4769 r = TAILQ_NEXT(r, entries);
4770 continue;
4771 }
4772 }
4773
4774 if (r->action == PF_MATCH) {
4775 if ((ctx->ri = pool_get(&pf_rule_item_pl,
4776 PR_NOWAIT)) == NULL) {
4777 REASON_SET(&ctx->reason, PFRES_MEMORY);
4778 return (PF_TEST_FAIL);
4779 }
4780 ctx->ri->r = r;
4781 /* order is irrelevant */
4782 SLIST_INSERT_HEAD(&ctx->rules, ctx->ri, entry);
4783 ctx->ri = NULL;
4784 pf_rule_to_actions(r, &ctx->act);
4785 if (r->rule_flag & PFRULE_AFTO)
4786 ctx->pd->naf = r->naf;
4787 if (pf_get_transaddr(r, ctx->pd, ctx->sns,
4788 &ctx->nr) == -1) {
4789 REASON_SET(&ctx->reason,
4790 PFRES_TRANSLATE);
4791 return (PF_TEST_FAIL);
4792 }
4793#if NPFLOG > 0
4794 if (r->log) {
4795 REASON_SET(&ctx->reason, PFRES_MATCH);
4796 pflog_packet(ctx->pd, ctx->reason, r,
4797 ctx->a, ruleset, NULL);
4798 }
4799#endif /* NPFLOG > 0 */
4800 } else {
4801 /*
4802 * found matching r
4803 */
4804 *ctx->rm = r;
4805 /*
4806 * anchor, with ruleset, where r belongs to
4807 */
4808 *ctx->am = ctx->a;
4809 /*
4810 * ruleset where r belongs to
4811 */
4812 *ctx->rsm = ruleset;
4813 /*
4814 * ruleset, where anchor belongs to.
4815 */
4816 ctx->arsm = ctx->aruleset;
4817 /*
4818 * state/source pools
4819 */
4820
4821 ctx->statelim = stlim;
4822 ctx->sourcelim = srlim;
4823 ctx->source = sr;
4824 }
4825
4826#if NPFLOG > 0
4827 if (ctx->act.log & PF_LOG_MATCHES)
4828 pf_log_matches(ctx->pd, r, ctx->a, ruleset,
4829 &ctx->rules);
4830#endif /* NPFLOG > 0 */
4831
4832 if (r->quick)
4833 return (PF_TEST_QUICK);
4834 } else {
4835 ctx->aruleset = &r->anchor->ruleset;
4836 if (r->anchor_wildcard) {
4837 RB_FOREACH(child, pf_anchor_node,
4838 &r->anchor->children) {
4839 if (pf_anchor_stack_push(ruleset,
4840 ctx->a, r, child,
4841 PF_NEXT_CHILD) != 0)
4842 return (PF_TEST_FAIL);
4843
4844 ctx->a = r;
4845 ruleset = &child->ruleset;
4846 goto enter_ruleset;
4847next_child:
4848 continue; /* with RB_FOREACH() */
4849 }
4850 } else {
4851 if (pf_anchor_stack_push(ruleset, ctx->a,
4852 r, child, PF_NEXT_RULE) != 0)
4853 return (PF_TEST_FAIL);
4854
4855 ctx->a = r;
4856 ruleset = &r->anchor->ruleset;
4857 child = NULL;
4858 goto enter_ruleset;
4859next_rule:
4860 ;
4861 }
4862 }
4863 r = TAILQ_NEXT(r, entries);
4864 }
4865
4866 if (pf_anchor_stack_pop(&ruleset, &ctx->a, &r, &child,
4867 &target) == 0) {
4868
4869 /* stop if any rule matched within quick anchors. */
4870 if (r->quick && *ctx->am == r)
4871 return (PF_TEST_QUICK);
4872
4873 switch (target) {
4874 case PF_NEXT_CHILD:
4875 goto next_child;
4876 case PF_NEXT_RULE:
4877 goto next_rule;
4878 default:
4879 panic("%s: unknown jump target", __func__);
4880 }
4881 }
4882
4883 return (PF_TEST_OK);
4884}
4885
4886int
4887pf_test_rule(struct pf_pdesc *pd, struct pf_rule **rm, struct pf_state **sm,
4888 struct pf_rule **am, struct pf_ruleset **rsm, u_short *reason)
4889{
4890 struct pf_rule *r = NULL;
4891 struct pf_rule *a = NULL;
4892 struct pf_ruleset *ruleset = NULL;
4893 struct pf_state_key *skw = NULL, *sks = NULL;
4894 int rewrite = 0;
4895 u_int16_t virtual_type, virtual_id;
4896 int action = PF_DROP;
4897 struct pf_test_ctx ctx;
4898 int rv;
4899
4900 PF_ASSERT_LOCKED();
4901
4902 memset(&ctx, 0, sizeof(ctx));
4903 ctx.pd = pd;
4904 ctx.rm = rm;
4905 ctx.am = am;
4906 ctx.rsm = rsm;
4907 ctx.th = &pd->hdr.tcp;
4908 ctx.act.rtableid = pd->rdomain;
4909 ctx.tag = -1;
4910 SLIST_INIT(&ctx.rules);
4911
4912 if (pd->dir == PF_IN && if_congested()) {
4913 REASON_SET(&ctx.reason, PFRES_CONGEST);
4914 return (PF_DROP);
4915 }
4916
4917 switch (pd->virtual_proto) {
4918 case IPPROTO_ICMP:
4919 ctx.icmptype = pd->hdr.icmp.icmp_type;
4920 ctx.icmpcode = pd->hdr.icmp.icmp_code;
4921 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
4922 &ctx.icmp_dir, &virtual_id, &virtual_type);
4923 if (ctx.icmp_dir == PF_IN) {
4924 pd->osport = pd->nsport = virtual_id;
4925 pd->odport = pd->ndport = virtual_type;
4926 } else {
4927 pd->osport = pd->nsport = virtual_type;
4928 pd->odport = pd->ndport = virtual_id;
4929 }
4930 break;
4931#ifdef INET6
4932 case IPPROTO_ICMPV6:
4933 ctx.icmptype = pd->hdr.icmp6.icmp6_type;
4934 ctx.icmpcode = pd->hdr.icmp6.icmp6_code;
4935 ctx.state_icmp = pf_icmp_mapping(pd, ctx.icmptype,
4936 &ctx.icmp_dir, &virtual_id, &virtual_type);
4937 if (ctx.icmp_dir == PF_IN) {
4938 pd->osport = pd->nsport = virtual_id;
4939 pd->odport = pd->ndport = virtual_type;
4940 } else {
4941 pd->osport = pd->nsport = virtual_type;
4942 pd->odport = pd->ndport = virtual_id;
4943 }
4944 break;
4945#endif /* INET6 */
4946 }
4947
4948 ruleset = &pf_main_ruleset;
4949 rv = pf_match_rule(&ctx, ruleset);
4950 if (rv == PF_TEST_FAIL || ctx.limiter_drop == 1) {
4951 REASON_SET(reason, ctx.reason);
4952 goto cleanup;
4953 }
4954
4955 r = *ctx.rm; /* matching rule */
4956 a = *ctx.am; /* rule that defines an anchor containing 'r' */
4957 ruleset = *ctx.rsm;/* ruleset of the anchor defined by the rule 'a' */
4958 ctx.aruleset = ctx.arsm;/* ruleset of the 'a' rule itself */
4959
4960 /* apply actions for last matching pass/block rule */
4961 pf_rule_to_actions(r, &ctx.act);
4962 if (r->rule_flag & PFRULE_AFTO)
4963 pd->naf = r->naf;
4964 if (pf_get_transaddr(r, pd, ctx.sns, &ctx.nr) == -1) {
4965 REASON_SET(&ctx.reason, PFRES_TRANSLATE);
4966 goto cleanup;
4967 }
4968 REASON_SET(&ctx.reason, PFRES_MATCH);
4969
4970#if NPFLOG > 0
4971 if (r->log)
4972 pflog_packet(pd, ctx.reason, r, a, ruleset, NULL);
4973 if (ctx.act.log & PF_LOG_MATCHES)
4974 pf_log_matches(pd, r, a, ruleset, &ctx.rules);
4975#endif /* NPFLOG > 0 */
4976
4977 if (pd->virtual_proto != PF_VPROTO_FRAGMENT &&
4978 (r->action == PF_DROP) &&
4979 ((r->rule_flag & PFRULE_RETURNRST) ||
4980 (r->rule_flag & PFRULE_RETURNICMP) ||
4981 (r->rule_flag & PFRULE_RETURN))) {
4982 if (pd->proto == IPPROTO_TCP &&
4983 ((r->rule_flag & PFRULE_RETURNRST) ||
4984 (r->rule_flag & PFRULE_RETURN)) &&
4985 !(ctx.th->th_flags & TH_RST)) {
4986 u_int32_t ack =
4987 ntohl(ctx.th->th_seq) + pd->p_len;
4988
4989 if (pf_check_tcp_cksum(pd->m, pd->off,
4990 pd->tot_len - pd->off, pd->af))
4991 REASON_SET(&ctx.reason, PFRES_PROTCKSUM);
4992 else {
4993 if (ctx.th->th_flags & TH_SYN)
4994 ack++;
4995 if (ctx.th->th_flags & TH_FIN)
4996 ack++;
4997 pf_send_tcp(r, pd->af, pd->dst,
4998 pd->src, ctx.th->th_dport,
4999 ctx.th->th_sport, ntohl(ctx.th->th_ack),
5000 ack, TH_RST|TH_ACK, 0, 0, r->return_ttl,
5001 1, 0, pd->rdomain, &ctx.reason);
5002 }
5003 } else if ((pd->proto != IPPROTO_ICMP ||
5004 ICMP_INFOTYPE(ctx.icmptype)) && pd->af == AF_INET &&
5005 r->return_icmp)
5006 pf_send_icmp(pd->m, r->return_icmp >> 8,
5007 r->return_icmp & 255, 0, pd->af, r, pd->rdomain);
5008 else if ((pd->proto != IPPROTO_ICMPV6 ||
5009 (ctx.icmptype >= ICMP6_ECHO_REQUEST &&
5010 ctx.icmptype != ND_REDIRECT)) && pd->af == AF_INET6 &&
5011 r->return_icmp6)
5012 pf_send_icmp(pd->m, r->return_icmp6 >> 8,
5013 r->return_icmp6 & 255, 0, pd->af, r, pd->rdomain);
5014 }
5015
5016 if (r->action == PF_DROP)
5017 goto cleanup;
5018
5019 pf_tag_packet(pd->m, ctx.tag, ctx.act.rtableid);
5020 if (ctx.act.rtableid >= 0 &&
5021 rtable_l2(ctx.act.rtableid) != pd->rdomain)
5022 pd->destchg = 1;
5023
5024 if (r->action == PF_PASS && pd->badopts != 0 && ! r->allow_opts) {
5025 REASON_SET(&ctx.reason, PFRES_IPOPTIONS);
5026#if NPFLOG > 0
5027 pd->pflog |= PF_LOG_FORCE;
5028#endif /* NPFLOG > 0 */
5029 DPFPRINTF(LOG_NOTICE, "dropping packet with "
5030 "ip/ipv6 options in pf_test_rule()");
5031 goto cleanup;
5032 }
5033
5034 if (pd->virtual_proto != PF_VPROTO_FRAGMENT
5035 && !ctx.state_icmp && r->keep_state) {
5036
5037 if (r->rule_flag & PFRULE_SRCTRACK &&
5038 pf_insert_src_node(&ctx.sns[PF_SN_NONE], r, PF_SN_NONE,
5039 pd->af, pd->src, NULL, NULL) != 0) {
5040 REASON_SET(&ctx.reason, PFRES_SRCLIMIT);
5041 goto cleanup;
5042 }
5043
5044 if (r->max_states && (r->states_cur >= r->max_states)) {
5045 pf_status.lcounters[LCNT_STATES]++;
5046 REASON_SET(&ctx.reason, PFRES_MAXSTATES);
5047 goto cleanup;
5048 }
5049
5050 action = pf_create_state(pd, r, a, ctx.nr, &skw, &sks,
5051 &rewrite, sm, ctx.tag, &ctx.rules, &ctx.act, ctx.sns,
5052 &ctx);
5053
5054 if (action != PF_PASS)
5055 goto cleanup;
5056
5057 if (pd->proto == IPPROTO_TCP &&
5058 r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
5059 action = pf_synproxy_ack(r, pd, sm, &ctx.act);
5060 if (action != PF_PASS)
5061 return (action); /* PF_SYNPROXY_DROP */
5062 }
5063
5064 if (sks != skw) {
5065 struct pf_state_key *sk;
5066
5067 if (pd->dir == PF_IN)
5068 sk = sks;
5069 else
5070 sk = skw;
5071 rewrite += pf_translate(pd,
5072 &sk->addr[pd->af == pd->naf ? pd->sidx : pd->didx],
5073 sk->port[pd->af == pd->naf ? pd->sidx : pd->didx],
5074 &sk->addr[pd->af == pd->naf ? pd->didx : pd->sidx],
5075 sk->port[pd->af == pd->naf ? pd->didx : pd->sidx],
5076 virtual_type, ctx.icmp_dir);
5077 }
5078
5079#ifdef INET6
5080 if (rewrite && skw->af != sks->af)
5081 action = PF_AFRT;
5082#endif /* INET6 */
5083
5084 } else {
5085 action = PF_PASS;
5086
5087 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
5088 SLIST_REMOVE_HEAD(&ctx.rules, entry);
5089 pool_put(&pf_rule_item_pl, ctx.ri);
5090 }
5091 }
5092
5093 /* copy back packet headers if needed */
5094 if (rewrite && pd->hdrlen) {
5095 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
5096 }
5097
5098#if NPFSYNC > 0
5099 if (*sm != NULL && !ISSET((*sm)->state_flags, PFSTATE_NOSYNC) &&
5100 pd->dir == PF_OUT && pfsync_is_up()) {
5101 /*
5102 * We want the state created, but we dont
5103 * want to send this in case a partner
5104 * firewall has to know about it to allow
5105 * replies through it.
5106 */
5107 if (pfsync_defer(*sm, pd->m))
5108 return (PF_DEFER);
5109 }
5110#endif /* NPFSYNC > 0 */
5111
5112 return (action);
5113
5114cleanup:
5115 while ((ctx.ri = SLIST_FIRST(&ctx.rules))) {
5116 SLIST_REMOVE_HEAD(&ctx.rules, entry);
5117 pool_put(&pf_rule_item_pl, ctx.ri);
5118 }
5119
5120 return (action);
5121}
5122
5123static __inline int
5124pf_create_state(struct pf_pdesc *pd, struct pf_rule *r, struct pf_rule *a,
5125 struct pf_rule *nr, struct pf_state_key **skw, struct pf_state_key **sks,
5126 int *rewrite, struct pf_state **sm, int tag, struct pf_rule_slist *rules,
5127 struct pf_rule_actions *act, struct pf_src_node *sns[PF_SN_MAX],
5128 struct pf_test_ctx *ctx)
5129{
5130 struct pf_state *st = NULL;
5131 struct pf_statelim *stlim = NULL;
5132 struct pf_sourcelim *srlim = NULL;
5133 struct pf_source *sr = NULL;
5134 struct pf_state_link *pfl;
5135 struct tcphdr *th = &pd->hdr.tcp;
5136 u_short reason;
5137 u_int i;
5138
5139 st = pool_get(&pf_state_pl, PR_NOWAIT | PR_ZERO);
5140 if (st == NULL) {
5141 REASON_SET(&reason, PFRES_MEMORY);
5142 goto csfailed;
5143 }
5144 st->rule.ptr = r;
5145 st->anchor.ptr = a;
5146 st->natrule.ptr = nr;
5147 if (r->allow_opts)
5148 st->state_flags |= PFSTATE_ALLOWOPTS;
5149 if (r->rule_flag & PFRULE_STATESLOPPY)
5150 st->state_flags |= PFSTATE_SLOPPY;
5151 if (r->rule_flag & PFRULE_PFLOW)
5152 st->state_flags |= PFSTATE_PFLOW;
5153 if (r->rule_flag & PFRULE_NOSYNC)
5154 st->state_flags |= PFSTATE_NOSYNC;
5155#if NPFLOG > 0
5156 st->log = act->log & PF_LOG_ALL;
5157#endif /* NPFLOG > 0 */
5158 st->qid = act->qid;
5159 st->pqid = act->pqid;
5160 st->rtableid[pd->didx] = act->rtableid;
5161 st->rtableid[pd->sidx] = -1; /* return traffic is routed normally */
5162 st->min_ttl = act->min_ttl;
5163 st->set_tos = act->set_tos;
5164 st->max_mss = act->max_mss;
5165 st->state_flags |= act->flags;
5166#if NPFSYNC > 0
5167 st->sync_state = PFSYNC_S_NONE;
5168#endif /* NPFSYNC > 0 */
5169 st->set_prio[0] = act->set_prio[0];
5170 st->set_prio[1] = act->set_prio[1];
5171 st->delay = act->delay;
5172 SLIST_INIT(&st->src_nodes);
5173 SLIST_INIT(&st->linkage);
5174
5175 /*
5176 * must initialize refcnt, before pf_state_insert() gets called.
5177 * pf_state_inserts() grabs reference for pfsync!
5178 */
5179 PF_REF_INIT(st->refcnt);
5180 mtx_init(&st->mtx, IPL_NET);
5181
5182 switch (pd->proto) {
5183 case IPPROTO_TCP:
5184 st->src.seqlo = ntohl(th->th_seq);
5185 st->src.seqhi = st->src.seqlo + pd->p_len + 1;
5186 if ((th->th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
5187 r->keep_state == PF_STATE_MODULATE) {
5188 /* Generate sequence number modulator */
5189 st->src.seqdiff = pf_tcp_iss(pd) - st->src.seqlo;
5190 if (st->src.seqdiff == 0)
5191 st->src.seqdiff = 1;
5192 pf_patch_32(pd, &th->th_seq,
5193 htonl(st->src.seqlo + st->src.seqdiff));
5194 *rewrite = 1;
5195 } else
5196 st->src.seqdiff = 0;
5197 if (th->th_flags & TH_SYN) {
5198 st->src.seqhi++;
5199 st->src.wscale = pf_get_wscale(pd);
5200 }
5201 st->src.max_win = MAX(ntohs(th->th_win), 1);
5202 if (st->src.wscale & PF_WSCALE_MASK) {
5203 /* Remove scale factor from initial window */
5204 int win = st->src.max_win;
5205 win += 1 << (st->src.wscale & PF_WSCALE_MASK);
5206 st->src.max_win = (win - 1) >>
5207 (st->src.wscale & PF_WSCALE_MASK);
5208 }
5209 if (th->th_flags & TH_FIN)
5210 st->src.seqhi++;
5211 st->dst.seqhi = 1;
5212 st->dst.max_win = 1;
5213 pf_set_protostate(st, PF_PEER_SRC, TCPS_SYN_SENT);
5214 pf_set_protostate(st, PF_PEER_DST, TCPS_CLOSED);
5215 st->timeout = PFTM_TCP_FIRST_PACKET;
5216 atomic_inc_int(&pf_status.states_halfopen);
5217 break;
5218 case IPPROTO_UDP:
5219 pf_set_protostate(st, PF_PEER_SRC, PFUDPS_SINGLE);
5220 pf_set_protostate(st, PF_PEER_DST, PFUDPS_NO_TRAFFIC);
5221 st->timeout = PFTM_UDP_FIRST_PACKET;
5222 break;
5223 case IPPROTO_ICMP:
5224#ifdef INET6
5225 case IPPROTO_ICMPV6:
5226#endif /* INET6 */
5227 st->timeout = PFTM_ICMP_FIRST_PACKET;
5228 break;
5229 default:
5230 pf_set_protostate(st, PF_PEER_SRC, PFOTHERS_SINGLE);
5231 pf_set_protostate(st, PF_PEER_DST, PFOTHERS_NO_TRAFFIC);
5232 st->timeout = PFTM_OTHER_FIRST_PACKET;
5233 }
5234
5235 st->creation = st->expire = getuptime();
5236
5237 if (pd->proto == IPPROTO_TCP) {
5238 if (st->state_flags & PFSTATE_SCRUB_TCP &&
5239 pf_normalize_tcp_init(pd, &st->src)) {
5240 REASON_SET(&reason, PFRES_MEMORY);
5241 goto csfailed;
5242 }
5243 if (st->state_flags & PFSTATE_SCRUB_TCP && st->src.scrub &&
5244 pf_normalize_tcp_stateful(pd, &reason, st,
5245 &st->src, &st->dst, rewrite)) {
5246 /* This really shouldn't happen!!! */
5247 DPFPRINTF(LOG_ERR,
5248 "%s: tcp normalize failed on first pkt", __func__);
5249 goto csfailed;
5250 }
5251 }
5252 st->direction = pd->dir;
5253
5254 if (pf_state_key_setup(pd, skw, sks, act->rtableid)) {
5255 REASON_SET(&reason, PFRES_MEMORY);
5256 goto csfailed;
5257 }
5258
5259 if (pf_set_rt_ifp(st, pd->src, (*skw)->af, sns) != 0) {
5260 REASON_SET(&reason, PFRES_NOROUTE);
5261 goto csfailed;
5262 }
5263
5264 for (i = 0; i < PF_SN_MAX; i++)
5265 if (sns[i] != NULL) {
5266 struct pf_sn_item *sni;
5267
5268 sni = pool_get(&pf_sn_item_pl, PR_NOWAIT);
5269 if (sni == NULL) {
5270 REASON_SET(&reason, PFRES_MEMORY);
5271 goto csfailed;
5272 }
5273 sni->sn = sns[i];
5274 SLIST_INSERT_HEAD(&st->src_nodes, sni, next);
5275 sni->sn->states++;
5276 }
5277
5278 stlim = ctx->statelim;
5279 if (stlim != NULL) {
5280 unsigned int gen;
5281
5282 PF_ASSERT_LOCKED();
5283 pfl = pool_get(&pf_state_link_pl, PR_NOWAIT);
5284 if (pfl == NULL) {
5285 REASON_SET(&reason, PFRES_MEMORY);
5286 goto csfailed;
5287 }
5288
5289 gen = pf_statelim_enter(stlim);
5290 stlim->pfstlim_counters.admitted++;
5291 stlim->pfstlim_inuse++;
5292 pf_statelim_leave(stlim, gen);
5293
5294 stlim->pfstlim_rate_ts += stlim->pfstlim_rate_token;
5295
5296 st->statelim = stlim->pfstlim_id;
5297 pfl->pfl_state = st;
5298 pfl->pfl_type = PF_STATE_LINK_TYPE_STATELIM;
5299
5300 TAILQ_INSERT_TAIL(&stlim->pfstlim_states, pfl, pfl_link);
5301 SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage);
5302 }
5303
5304 srlim = ctx->sourcelim;
5305 if (srlim != NULL) {
5306 unsigned int gen;
5307
5308 sr = ctx->source;
5309 if (sr == NULL) {
5310 sr = pool_get(&pf_source_pl, PR_NOWAIT|PR_ZERO);
5311 if (sr == NULL) {
5312 gen = pf_sourcelim_enter(srlim);
5313 srlim->pfsrlim_counters.addrnomem++;
5314 pf_sourcelim_leave(srlim, gen);
5315 REASON_SET(&reason, PFRES_MEMORY);
5316 goto csfailed;
5317 }
5318
5319 sr->pfsr_parent = srlim;
5320 pf_source_key(srlim, sr,
5321 ctx->pd->af, ctx->pd->rdomain, ctx->pd->src);
5322 TAILQ_INIT(&sr->pfsr_states);
5323
5324 if (RBT_INSERT(pf_source_tree,
5325 &srlim->pfsrlim_sources, sr) != NULL) {
5326 panic("%s: source pool %u (%p) "
5327 "insert collision %p?!", __func__,
5328 srlim->pfsrlim_id, srlim, sr);
5329 }
5330
5331 if (RBT_INSERT(pf_source_ioc_tree,
5332 &srlim->pfsrlim_ioc_sources, sr) != NULL) {
5333 panic("%s: source pool %u (%p) ioc "
5334 "insert collision (%p)?!", __func__,
5335 srlim->pfsrlim_id, srlim, sr);
5336 }
5337
5338 sr->pfsr_empty_ts = getuptime();
5339 TAILQ_INSERT_TAIL(&pf_source_gc, sr,
5340 pfsr_empty_gc);
5341
5342 gen = pf_sourcelim_enter(srlim);
5343 srlim->pfsrlim_nsources++;
5344 srlim->pfsrlim_counters.addrallocs++;
5345 pf_sourcelim_leave(srlim, gen);
5346 } else {
5347 KASSERT(sr->pfsr_parent == srlim);
5348 }
5349
5350 PF_ASSERT_LOCKED();
5351 pfl = pool_get(&pf_state_link_pl, PR_NOWAIT);
5352 if (pfl == NULL) {
5353 REASON_SET(&reason, PFRES_MEMORY);
5354 goto csfailed;
5355 }
5356
5357 pf_source_used(sr);
5358
5359 sr->pfsr_counters.admitted++;
5360
5361 gen = pf_sourcelim_enter(srlim);
5362 srlim->pfsrlim_counters.inuse++;
5363 srlim->pfsrlim_counters.admitted++;
5364 pf_sourcelim_leave(srlim, gen);
5365
5366 st->sourcelim = srlim->pfsrlim_id;
5367 pfl->pfl_state = st;
5368 pfl->pfl_type = PF_STATE_LINK_TYPE_SOURCELIM;
5369
5370 TAILQ_INSERT_TAIL(&sr->pfsr_states, pfl, pfl_link);
5371 SLIST_INSERT_HEAD(&st->linkage, pfl, pfl_linkage);
5372 }
5373
5374#if NPFSYNC > 0
5375 pfsync_init_state(st, *skw, *sks, 0);
5376#endif
5377
5378 if (pf_state_insert(BOUND_IFACE(r, pd->kif), skw, sks, st)) {
5379 *sks = *skw = NULL;
5380 REASON_SET(&reason, PFRES_STATEINS);
5381 goto csfailed;
5382 } else
5383 *sm = st;
5384
5385 /*
5386 * Make state responsible for rules it binds here.
5387 */
5388 memcpy(&st->match_rules, rules, sizeof(st->match_rules));
5389 memset(rules, 0, sizeof(*rules));
5390 STATE_INC_COUNTERS(st);
5391
5392 if (tag > 0) {
5393 pf_tag_ref(tag);
5394 st->tag = tag;
5395 }
5396 if (pd->proto == IPPROTO_TCP && (th->th_flags & (TH_SYN|TH_ACK)) ==
5397 TH_SYN && r->keep_state == PF_STATE_SYNPROXY && pd->dir == PF_IN) {
5398 int rtid;
5399 uint16_t mss, mssdflt;
5400
5401 rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain;
5402 pf_set_protostate(st, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
5403 st->src.seqhi = arc4random();
5404 /* Find mss option */
5405 mssdflt = atomic_load_int(&tcp_mssdflt);
5406 mss = pf_get_mss(pd, mssdflt);
5407 mss = pf_calc_mss(pd->src, pd->af, rtid, mss, mssdflt);
5408 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss, mssdflt);
5409 st->src.mss = mss;
5410 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
5411 th->th_sport, st->src.seqhi, ntohl(th->th_seq) + 1,
5412 TH_SYN|TH_ACK, 0, st->src.mss, 0, 1, 0, pd->rdomain,
5413 &reason);
5414 REASON_SET(&reason, PFRES_SYNPROXY);
5415 return (PF_SYNPROXY_DROP);
5416 }
5417
5418 return (PF_PASS);
5419
5420csfailed:
5421 if (st) {
5422 struct pf_state_link *npfl;
5423
5424 SLIST_FOREACH_SAFE(pfl, &st->linkage, pfl_linkage, npfl) {
5425 struct pf_state_link_list *list;
5426 unsigned int gen;
5427
5428 /* who needs KASSERTS when we have NULL derefs */
5429
5430 switch (pfl->pfl_type) {
5431 case PF_STATE_LINK_TYPE_STATELIM:
5432 gen = pf_statelim_enter(stlim);
5433 stlim->pfstlim_inuse--;
5434 pf_statelim_leave(stlim, gen);
5435
5436 stlim->pfstlim_rate_ts -=
5437 stlim->pfstlim_rate_token;
5438 list = &stlim->pfstlim_states;
5439 break;
5440 case PF_STATE_LINK_TYPE_SOURCELIM:
5441 gen = pf_sourcelim_enter(srlim);
5442 srlim->pfsrlim_counters.inuse--;
5443 pf_sourcelim_leave(srlim, gen);
5444
5445 sr->pfsr_rate_ts -=
5446 srlim->pfsrlim_rate_token;
5447 pf_source_rele(sr);
5448
5449 list = &sr->pfsr_states;
5450 break;
5451 default:
5452 panic("%s: unexpected link type on pfl %p",
5453 __func__, pfl);
5454 }
5455
5456 TAILQ_REMOVE(list, pfl, pfl_link);
5457 PF_ASSERT_LOCKED();
5458 pool_put(&pf_state_link_pl, pfl);
5459 }
5460
5461 pf_normalize_tcp_cleanup(st); /* safe even w/o init */
5462 pf_src_tree_remove_state(st);
5463 pool_put(&pf_state_pl, st);
5464 }
5465
5466 for (i = 0; i < PF_SN_MAX; i++)
5467 if (sns[i] != NULL)
5468 pf_remove_src_node(sns[i]);
5469
5470 return (PF_DROP);
5471}
5472
5473int
5474pf_translate(struct pf_pdesc *pd, struct pf_addr *saddr, u_int16_t sport,
5475 struct pf_addr *daddr, u_int16_t dport, u_int16_t virtual_type,
5476 int icmp_dir)
5477{
5478 int rewrite = 0;
5479 int afto = pd->af != pd->naf;
5480
5481 if (afto || PF_ANEQ(daddr, pd->dst, pd->af))
5482 pd->destchg = 1;
5483
5484 switch (pd->proto) {
5485 case IPPROTO_TCP: /* FALLTHROUGH */
5486 case IPPROTO_UDP:
5487 rewrite += pf_patch_16(pd, pd->sport, sport);
5488 rewrite += pf_patch_16(pd, pd->dport, dport);
5489 break;
5490
5491 case IPPROTO_ICMP:
5492 if (pd->af != AF_INET)
5493 return (0);
5494
5495#ifdef INET6
5496 if (afto) {
5497 if (pf_translate_icmp_af(pd, AF_INET6, &pd->hdr.icmp))
5498 return (0);
5499 pd->proto = IPPROTO_ICMPV6;
5500 rewrite = 1;
5501 }
5502#endif /* INET6 */
5503 if (virtual_type == htons(ICMP_ECHO)) {
5504 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
5505 rewrite += pf_patch_16(pd,
5506 &pd->hdr.icmp.icmp_id, icmpid);
5507 }
5508 break;
5509
5510#ifdef INET6
5511 case IPPROTO_ICMPV6:
5512 if (pd->af != AF_INET6)
5513 return (0);
5514
5515 if (afto) {
5516 if (pf_translate_icmp_af(pd, AF_INET, &pd->hdr.icmp6))
5517 return (0);
5518 pd->proto = IPPROTO_ICMP;
5519 rewrite = 1;
5520 }
5521 if (virtual_type == htons(ICMP6_ECHO_REQUEST)) {
5522 u_int16_t icmpid = (icmp_dir == PF_IN) ? sport : dport;
5523 rewrite += pf_patch_16(pd,
5524 &pd->hdr.icmp6.icmp6_id, icmpid);
5525 }
5526 break;
5527#endif /* INET6 */
5528 }
5529
5530 if (!afto) {
5531 rewrite += pf_translate_a(pd, pd->src, saddr);
5532 rewrite += pf_translate_a(pd, pd->dst, daddr);
5533 }
5534
5535 return (rewrite);
5536}
5537
5538int
5539pf_tcp_track_full(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason,
5540 int *copyback, int reverse)
5541{
5542 struct tcphdr *th = &pd->hdr.tcp;
5543 struct pf_state_peer *src, *dst;
5544 u_int16_t win = ntohs(th->th_win);
5545 u_int32_t ack, end, data_end, seq, orig_seq;
5546 u_int8_t sws, dws, psrc, pdst;
5547 int ackskew;
5548
5549 if ((pd->dir == (*stp)->direction && !reverse) ||
5550 (pd->dir != (*stp)->direction && reverse)) {
5551 src = &(*stp)->src;
5552 dst = &(*stp)->dst;
5553 psrc = PF_PEER_SRC;
5554 pdst = PF_PEER_DST;
5555 } else {
5556 src = &(*stp)->dst;
5557 dst = &(*stp)->src;
5558 psrc = PF_PEER_DST;
5559 pdst = PF_PEER_SRC;
5560 }
5561
5562 if (src->wscale && dst->wscale && !(th->th_flags & TH_SYN)) {
5563 sws = src->wscale & PF_WSCALE_MASK;
5564 dws = dst->wscale & PF_WSCALE_MASK;
5565 } else
5566 sws = dws = 0;
5567
5568 /*
5569 * Sequence tracking algorithm from Guido van Rooij's paper:
5570 * http://www.madison-gurkha.com/publications/tcp_filtering/
5571 * tcp_filtering.ps
5572 */
5573
5574 orig_seq = seq = ntohl(th->th_seq);
5575 if (src->seqlo == 0) {
5576 /* First packet from this end. Set its state */
5577
5578 if (((*stp)->state_flags & PFSTATE_SCRUB_TCP || dst->scrub) &&
5579 src->scrub == NULL) {
5580 if (pf_normalize_tcp_init(pd, src)) {
5581 REASON_SET(reason, PFRES_MEMORY);
5582 return (PF_DROP);
5583 }
5584 }
5585
5586 /* Deferred generation of sequence number modulator */
5587 if (dst->seqdiff && !src->seqdiff) {
5588 /* use random iss for the TCP server */
5589 while ((src->seqdiff = arc4random() - seq) == 0)
5590 continue;
5591 ack = ntohl(th->th_ack) - dst->seqdiff;
5592 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
5593 pf_patch_32(pd, &th->th_ack, htonl(ack));
5594 *copyback = 1;
5595 } else {
5596 ack = ntohl(th->th_ack);
5597 }
5598
5599 end = seq + pd->p_len;
5600 if (th->th_flags & TH_SYN) {
5601 end++;
5602 if (dst->wscale & PF_WSCALE_FLAG) {
5603 src->wscale = pf_get_wscale(pd);
5604 if (src->wscale & PF_WSCALE_FLAG) {
5605 /* Remove scale factor from initial
5606 * window */
5607 sws = src->wscale & PF_WSCALE_MASK;
5608 win = ((u_int32_t)win + (1 << sws) - 1)
5609 >> sws;
5610 dws = dst->wscale & PF_WSCALE_MASK;
5611 } else {
5612 /* fixup other window */
5613 dst->max_win = MIN(TCP_MAXWIN,
5614 (u_int32_t)dst->max_win <<
5615 (dst->wscale & PF_WSCALE_MASK));
5616 /* in case of a retrans SYN|ACK */
5617 dst->wscale = 0;
5618 }
5619 }
5620 }
5621 data_end = end;
5622 if (th->th_flags & TH_FIN)
5623 end++;
5624
5625 src->seqlo = seq;
5626 if (src->state < TCPS_SYN_SENT)
5627 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
5628
5629 /*
5630 * May need to slide the window (seqhi may have been set by
5631 * the crappy stack check or if we picked up the connection
5632 * after establishment)
5633 */
5634 if (src->seqhi == 1 ||
5635 SEQ_GEQ(end + MAX(1, dst->max_win << dws), src->seqhi))
5636 src->seqhi = end + MAX(1, dst->max_win << dws);
5637 if (win > src->max_win)
5638 src->max_win = win;
5639
5640 } else {
5641 ack = ntohl(th->th_ack) - dst->seqdiff;
5642 if (src->seqdiff) {
5643 /* Modulate sequence numbers */
5644 pf_patch_32(pd, &th->th_seq, htonl(seq + src->seqdiff));
5645 pf_patch_32(pd, &th->th_ack, htonl(ack));
5646 *copyback = 1;
5647 }
5648 end = seq + pd->p_len;
5649 if (th->th_flags & TH_SYN)
5650 end++;
5651 data_end = end;
5652 if (th->th_flags & TH_FIN)
5653 end++;
5654 }
5655
5656 if ((th->th_flags & TH_ACK) == 0) {
5657 /* Let it pass through the ack skew check */
5658 ack = dst->seqlo;
5659 } else if ((ack == 0 &&
5660 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)) ||
5661 /* broken tcp stacks do not set ack */
5662 (dst->state < TCPS_SYN_SENT)) {
5663 /*
5664 * Many stacks (ours included) will set the ACK number in an
5665 * FIN|ACK if the SYN times out -- no sequence to ACK.
5666 */
5667 ack = dst->seqlo;
5668 }
5669
5670 if (seq == end) {
5671 /* Ease sequencing restrictions on no data packets */
5672 seq = src->seqlo;
5673 data_end = end = seq;
5674 }
5675
5676 ackskew = dst->seqlo - ack;
5677
5678
5679 /*
5680 * Need to demodulate the sequence numbers in any TCP SACK options
5681 * (Selective ACK). We could optionally validate the SACK values
5682 * against the current ACK window, either forwards or backwards, but
5683 * I'm not confident that SACK has been implemented properly
5684 * everywhere. It wouldn't surprise me if several stacks accidentally
5685 * SACK too far backwards of previously ACKed data. There really aren't
5686 * any security implications of bad SACKing unless the target stack
5687 * doesn't validate the option length correctly. Someone trying to
5688 * spoof into a TCP connection won't bother blindly sending SACK
5689 * options anyway.
5690 */
5691 if (dst->seqdiff && (th->th_off << 2) > sizeof(struct tcphdr)) {
5692 if (pf_modulate_sack(pd, dst))
5693 *copyback = 1;
5694 }
5695
5696
5697#define MAXACKWINDOW (0xffff + 1500) /* 1500 is an arbitrary fudge factor */
5698 if (SEQ_GEQ(src->seqhi, data_end) &&
5699 /* Last octet inside other's window space */
5700 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) &&
5701 /* Retrans: not more than one window back */
5702 (ackskew >= -MAXACKWINDOW) &&
5703 /* Acking not more than one reassembled fragment backwards */
5704 (ackskew <= (MAXACKWINDOW << sws)) &&
5705 /* Acking not more than one window forward */
5706 ((th->th_flags & TH_RST) == 0 || orig_seq == src->seqlo ||
5707 (orig_seq == src->seqlo + 1) || (orig_seq + 1 == src->seqlo) ||
5708 /* Require an exact/+1 sequence match on resets when possible */
5709 (SEQ_GEQ(orig_seq, src->seqlo - (dst->max_win << dws)) &&
5710 SEQ_LEQ(orig_seq, src->seqlo + 1) && ackskew == 0 &&
5711 (th->th_flags & (TH_ACK|TH_RST)) == (TH_ACK|TH_RST)))) {
5712 /* Allow resets to match sequence window if ack is perfect match */
5713
5714 if (dst->scrub || src->scrub) {
5715 if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
5716 dst, copyback))
5717 return (PF_DROP);
5718 }
5719
5720 /* update max window */
5721 if (src->max_win < win)
5722 src->max_win = win;
5723 /* synchronize sequencing */
5724 if (SEQ_GT(end, src->seqlo))
5725 src->seqlo = end;
5726 /* slide the window of what the other end can send */
5727 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
5728 dst->seqhi = ack + MAX((win << sws), 1);
5729
5730 /* update states */
5731 if (th->th_flags & TH_SYN)
5732 if (src->state < TCPS_SYN_SENT)
5733 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
5734 if (th->th_flags & TH_FIN)
5735 if (src->state < TCPS_CLOSING)
5736 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
5737 if (th->th_flags & TH_ACK) {
5738 if (dst->state == TCPS_SYN_SENT) {
5739 pf_set_protostate(*stp, pdst,
5740 TCPS_ESTABLISHED);
5741 if (src->state == TCPS_ESTABLISHED &&
5742 !SLIST_EMPTY(&(*stp)->src_nodes) &&
5743 pf_src_connlimit(stp)) {
5744 REASON_SET(reason, PFRES_SRCLIMIT);
5745 return (PF_DROP);
5746 }
5747 } else if (dst->state == TCPS_CLOSING)
5748 pf_set_protostate(*stp, pdst,
5749 TCPS_FIN_WAIT_2);
5750 }
5751 if (th->th_flags & TH_RST)
5752 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
5753
5754 /* update expire time */
5755 (*stp)->expire = getuptime();
5756 if (src->state >= TCPS_FIN_WAIT_2 &&
5757 dst->state >= TCPS_FIN_WAIT_2)
5758 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED);
5759 else if (src->state >= TCPS_CLOSING &&
5760 dst->state >= TCPS_CLOSING)
5761 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT);
5762 else if (src->state < TCPS_ESTABLISHED ||
5763 dst->state < TCPS_ESTABLISHED)
5764 pf_update_state_timeout(*stp, PFTM_TCP_OPENING);
5765 else if (src->state >= TCPS_CLOSING ||
5766 dst->state >= TCPS_CLOSING)
5767 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING);
5768 else
5769 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED);
5770
5771 /* Fall through to PASS packet */
5772 } else if ((dst->state < TCPS_SYN_SENT ||
5773 dst->state >= TCPS_FIN_WAIT_2 ||
5774 src->state >= TCPS_FIN_WAIT_2) &&
5775 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) &&
5776 /* Within a window forward of the originating packet */
5777 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW)) {
5778 /* Within a window backward of the originating packet */
5779
5780 /*
5781 * This currently handles three situations:
5782 * 1) Stupid stacks will shotgun SYNs before their peer
5783 * replies.
5784 * 2) When PF catches an already established stream (the
5785 * firewall rebooted, the state table was flushed, routes
5786 * changed...)
5787 * 3) Packets get funky immediately after the connection
5788 * closes (this should catch Solaris spurious ACK|FINs
5789 * that web servers like to spew after a close)
5790 *
5791 * This must be a little more careful than the above code
5792 * since packet floods will also be caught here. We don't
5793 * update the TTL here to mitigate the damage of a packet
5794 * flood and so the same code can handle awkward establishment
5795 * and a loosened connection close.
5796 * In the establishment case, a correct peer response will
5797 * validate the connection, go through the normal state code
5798 * and keep updating the state TTL.
5799 */
5800
5801 if (pf_status.debug >= LOG_NOTICE) {
5802 log(LOG_NOTICE, "pf: loose state match: ");
5803 pf_print_state(*stp);
5804 pf_print_flags(th->th_flags);
5805 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5806 "pkts=%llu:%llu dir=%s,%s\n", seq, orig_seq, ack,
5807 pd->p_len, ackskew, (*stp)->packets[0],
5808 (*stp)->packets[1],
5809 pd->dir == PF_IN ? "in" : "out",
5810 pd->dir == (*stp)->direction ? "fwd" : "rev");
5811 }
5812
5813 if (dst->scrub || src->scrub) {
5814 if (pf_normalize_tcp_stateful(pd, reason, *stp, src,
5815 dst, copyback))
5816 return (PF_DROP);
5817 }
5818
5819 /* update max window */
5820 if (src->max_win < win)
5821 src->max_win = win;
5822 /* synchronize sequencing */
5823 if (SEQ_GT(end, src->seqlo))
5824 src->seqlo = end;
5825 /* slide the window of what the other end can send */
5826 if (SEQ_GEQ(ack + (win << sws), dst->seqhi))
5827 dst->seqhi = ack + MAX((win << sws), 1);
5828
5829 /*
5830 * Cannot set dst->seqhi here since this could be a shotgunned
5831 * SYN and not an already established connection.
5832 */
5833 if (th->th_flags & TH_FIN)
5834 if (src->state < TCPS_CLOSING)
5835 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
5836 if (th->th_flags & TH_RST)
5837 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
5838
5839 /* Fall through to PASS packet */
5840 } else {
5841 if ((*stp)->dst.state == TCPS_SYN_SENT &&
5842 (*stp)->src.state == TCPS_SYN_SENT) {
5843 /* Send RST for state mismatches during handshake */
5844 if (!(th->th_flags & TH_RST))
5845 pf_send_tcp((*stp)->rule.ptr, pd->af,
5846 pd->dst, pd->src, th->th_dport,
5847 th->th_sport, ntohl(th->th_ack), 0,
5848 TH_RST, 0, 0,
5849 (*stp)->rule.ptr->return_ttl, 1, 0,
5850 pd->rdomain, reason);
5851 src->seqlo = 0;
5852 src->seqhi = 1;
5853 src->max_win = 1;
5854 } else if (pf_status.debug >= LOG_NOTICE) {
5855 log(LOG_NOTICE, "pf: BAD state: ");
5856 pf_print_state(*stp);
5857 pf_print_flags(th->th_flags);
5858 addlog(" seq=%u (%u) ack=%u len=%u ackskew=%d "
5859 "pkts=%llu:%llu dir=%s,%s\n",
5860 seq, orig_seq, ack, pd->p_len, ackskew,
5861 (*stp)->packets[0], (*stp)->packets[1],
5862 pd->dir == PF_IN ? "in" : "out",
5863 pd->dir == (*stp)->direction ? "fwd" : "rev");
5864 addlog("pf: State failure on: %c %c %c %c | %c %c\n",
5865 SEQ_GEQ(src->seqhi, data_end) ? ' ' : '1',
5866 SEQ_GEQ(seq, src->seqlo - (dst->max_win << dws)) ?
5867 ' ': '2',
5868 (ackskew >= -MAXACKWINDOW) ? ' ' : '3',
5869 (ackskew <= (MAXACKWINDOW << sws)) ? ' ' : '4',
5870 SEQ_GEQ(src->seqhi + MAXACKWINDOW, data_end) ?
5871 ' ' :'5',
5872 SEQ_GEQ(seq, src->seqlo - MAXACKWINDOW) ?' ' :'6');
5873 }
5874 REASON_SET(reason, PFRES_BADSTATE);
5875 return (PF_DROP);
5876 }
5877
5878 return (PF_PASS);
5879}
5880
5881int
5882pf_tcp_track_sloppy(struct pf_pdesc *pd, struct pf_state **stp,
5883 u_short *reason)
5884{
5885 struct tcphdr *th = &pd->hdr.tcp;
5886 struct pf_state_peer *src, *dst;
5887 u_int8_t psrc, pdst;
5888
5889 if (pd->dir == (*stp)->direction) {
5890 src = &(*stp)->src;
5891 dst = &(*stp)->dst;
5892 psrc = PF_PEER_SRC;
5893 pdst = PF_PEER_DST;
5894 } else {
5895 src = &(*stp)->dst;
5896 dst = &(*stp)->src;
5897 psrc = PF_PEER_DST;
5898 pdst = PF_PEER_SRC;
5899 }
5900
5901 if (th->th_flags & TH_SYN)
5902 if (src->state < TCPS_SYN_SENT)
5903 pf_set_protostate(*stp, psrc, TCPS_SYN_SENT);
5904 if (th->th_flags & TH_FIN)
5905 if (src->state < TCPS_CLOSING)
5906 pf_set_protostate(*stp, psrc, TCPS_CLOSING);
5907 if (th->th_flags & TH_ACK) {
5908 if (dst->state == TCPS_SYN_SENT) {
5909 pf_set_protostate(*stp, pdst, TCPS_ESTABLISHED);
5910 if (src->state == TCPS_ESTABLISHED &&
5911 !SLIST_EMPTY(&(*stp)->src_nodes) &&
5912 pf_src_connlimit(stp)) {
5913 REASON_SET(reason, PFRES_SRCLIMIT);
5914 return (PF_DROP);
5915 }
5916 } else if (dst->state == TCPS_CLOSING) {
5917 pf_set_protostate(*stp, pdst, TCPS_FIN_WAIT_2);
5918 } else if (src->state == TCPS_SYN_SENT &&
5919 dst->state < TCPS_SYN_SENT) {
5920 /*
5921 * Handle a special sloppy case where we only see one
5922 * half of the connection. If there is a ACK after
5923 * the initial SYN without ever seeing a packet from
5924 * the destination, set the connection to established.
5925 */
5926 pf_set_protostate(*stp, PF_PEER_BOTH,
5927 TCPS_ESTABLISHED);
5928 if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
5929 pf_src_connlimit(stp)) {
5930 REASON_SET(reason, PFRES_SRCLIMIT);
5931 return (PF_DROP);
5932 }
5933 } else if (src->state == TCPS_CLOSING &&
5934 dst->state == TCPS_ESTABLISHED &&
5935 dst->seqlo == 0) {
5936 /*
5937 * Handle the closing of half connections where we
5938 * don't see the full bidirectional FIN/ACK+ACK
5939 * handshake.
5940 */
5941 pf_set_protostate(*stp, pdst, TCPS_CLOSING);
5942 }
5943 }
5944 if (th->th_flags & TH_RST)
5945 pf_set_protostate(*stp, PF_PEER_BOTH, TCPS_TIME_WAIT);
5946
5947 /* update expire time */
5948 (*stp)->expire = getuptime();
5949 if (src->state >= TCPS_FIN_WAIT_2 &&
5950 dst->state >= TCPS_FIN_WAIT_2)
5951 pf_update_state_timeout(*stp, PFTM_TCP_CLOSED);
5952 else if (src->state >= TCPS_CLOSING &&
5953 dst->state >= TCPS_CLOSING)
5954 pf_update_state_timeout(*stp, PFTM_TCP_FIN_WAIT);
5955 else if (src->state < TCPS_ESTABLISHED ||
5956 dst->state < TCPS_ESTABLISHED)
5957 pf_update_state_timeout(*stp, PFTM_TCP_OPENING);
5958 else if (src->state >= TCPS_CLOSING ||
5959 dst->state >= TCPS_CLOSING)
5960 pf_update_state_timeout(*stp, PFTM_TCP_CLOSING);
5961 else
5962 pf_update_state_timeout(*stp, PFTM_TCP_ESTABLISHED);
5963
5964 return (PF_PASS);
5965}
5966
5967static __inline int
5968pf_synproxy(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
5969{
5970 struct pf_state_key *sk = (*stp)->key[pd->didx];
5971
5972 if ((*stp)->src.state == PF_TCPS_PROXY_SRC) {
5973 struct tcphdr *th = &pd->hdr.tcp;
5974
5975 if (pd->dir != (*stp)->direction) {
5976 REASON_SET(reason, PFRES_SYNPROXY);
5977 return (PF_SYNPROXY_DROP);
5978 }
5979 if (th->th_flags & TH_SYN) {
5980 if (ntohl(th->th_seq) != (*stp)->src.seqlo) {
5981 REASON_SET(reason, PFRES_SYNPROXY);
5982 return (PF_DROP);
5983 }
5984 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
5985 pd->src, th->th_dport, th->th_sport,
5986 (*stp)->src.seqhi, ntohl(th->th_seq) + 1,
5987 TH_SYN|TH_ACK, 0, (*stp)->src.mss, 0, 1,
5988 0, pd->rdomain, reason);
5989 REASON_SET(reason, PFRES_SYNPROXY);
5990 return (PF_SYNPROXY_DROP);
5991 } else if ((th->th_flags & (TH_ACK|TH_RST|TH_FIN)) != TH_ACK ||
5992 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
5993 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
5994 REASON_SET(reason, PFRES_SYNPROXY);
5995 return (PF_DROP);
5996 } else if (!SLIST_EMPTY(&(*stp)->src_nodes) &&
5997 pf_src_connlimit(stp)) {
5998 REASON_SET(reason, PFRES_SRCLIMIT);
5999 return (PF_DROP);
6000 } else
6001 pf_set_protostate(*stp, PF_PEER_SRC,
6002 PF_TCPS_PROXY_DST);
6003 }
6004 if ((*stp)->src.state == PF_TCPS_PROXY_DST) {
6005 struct tcphdr *th = &pd->hdr.tcp;
6006
6007 if (pd->dir == (*stp)->direction) {
6008 if (((th->th_flags & (TH_SYN|TH_ACK)) != TH_ACK) ||
6009 (ntohl(th->th_ack) != (*stp)->src.seqhi + 1) ||
6010 (ntohl(th->th_seq) != (*stp)->src.seqlo + 1)) {
6011 REASON_SET(reason, PFRES_SYNPROXY);
6012 return (PF_DROP);
6013 }
6014 (*stp)->src.max_win = MAX(ntohs(th->th_win), 1);
6015 if ((*stp)->dst.seqhi == 1)
6016 (*stp)->dst.seqhi = arc4random();
6017 pf_send_tcp((*stp)->rule.ptr, pd->af,
6018 &sk->addr[pd->sidx], &sk->addr[pd->didx],
6019 sk->port[pd->sidx], sk->port[pd->didx],
6020 (*stp)->dst.seqhi, 0, TH_SYN, 0,
6021 (*stp)->src.mss, 0, 0, (*stp)->tag,
6022 sk->rdomain, reason);
6023 REASON_SET(reason, PFRES_SYNPROXY);
6024 return (PF_SYNPROXY_DROP);
6025 } else if (((th->th_flags & (TH_SYN|TH_ACK)) !=
6026 (TH_SYN|TH_ACK)) ||
6027 (ntohl(th->th_ack) != (*stp)->dst.seqhi + 1)) {
6028 REASON_SET(reason, PFRES_SYNPROXY);
6029 return (PF_DROP);
6030 } else {
6031 (*stp)->dst.max_win = MAX(ntohs(th->th_win), 1);
6032 (*stp)->dst.seqlo = ntohl(th->th_seq);
6033 pf_send_tcp((*stp)->rule.ptr, pd->af, pd->dst,
6034 pd->src, th->th_dport, th->th_sport,
6035 ntohl(th->th_ack), ntohl(th->th_seq) + 1,
6036 TH_ACK, (*stp)->src.max_win, 0, 0, 0,
6037 (*stp)->tag, pd->rdomain, reason);
6038 pf_send_tcp((*stp)->rule.ptr, pd->af,
6039 &sk->addr[pd->sidx], &sk->addr[pd->didx],
6040 sk->port[pd->sidx], sk->port[pd->didx],
6041 (*stp)->src.seqhi + 1, (*stp)->src.seqlo + 1,
6042 TH_ACK, (*stp)->dst.max_win, 0, 0, 1,
6043 0, sk->rdomain, reason);
6044 (*stp)->src.seqdiff = (*stp)->dst.seqhi -
6045 (*stp)->src.seqlo;
6046 (*stp)->dst.seqdiff = (*stp)->src.seqhi -
6047 (*stp)->dst.seqlo;
6048 (*stp)->src.seqhi = (*stp)->src.seqlo +
6049 (*stp)->dst.max_win;
6050 (*stp)->dst.seqhi = (*stp)->dst.seqlo +
6051 (*stp)->src.max_win;
6052 (*stp)->src.wscale = (*stp)->dst.wscale = 0;
6053 pf_set_protostate(*stp, PF_PEER_BOTH,
6054 TCPS_ESTABLISHED);
6055 REASON_SET(reason, PFRES_SYNPROXY);
6056 return (PF_SYNPROXY_DROP);
6057 }
6058 }
6059 return (PF_PASS);
6060}
6061
6062static __inline int
6063pf_synproxy_ack(struct pf_rule *r, struct pf_pdesc *pd, struct pf_state **sm,
6064 struct pf_rule_actions *act)
6065{
6066 struct tcphdr *th = &pd->hdr.tcp;
6067 struct pf_state *s;
6068 u_int16_t mss, mssdflt;
6069 int rtid;
6070 u_short reason;
6071
6072 if ((th->th_flags & (TH_SYN|TH_ACK)) != TH_SYN)
6073 return (PF_PASS);
6074
6075 s = *sm;
6076 rtid = (act->rtableid >= 0) ? act->rtableid : pd->rdomain;
6077
6078 pf_set_protostate(s, PF_PEER_SRC, PF_TCPS_PROXY_SRC);
6079 s->src.seqhi = arc4random();
6080 /* Find mss option */
6081 mssdflt = atomic_load_int(&tcp_mssdflt);
6082 mss = pf_get_mss(pd, mssdflt);
6083 mss = pf_calc_mss(pd->src, pd->af, rtid, mss, mssdflt);
6084 mss = pf_calc_mss(pd->dst, pd->af, rtid, mss, mssdflt);
6085 s->src.mss = mss;
6086
6087 pf_send_tcp(r, pd->af, pd->dst, pd->src, th->th_dport,
6088 th->th_sport, s->src.seqhi, ntohl(th->th_seq) + 1,
6089 TH_SYN|TH_ACK, 0, s->src.mss, 0, 1, 0, pd->rdomain, NULL);
6090
6091 REASON_SET(&reason, PFRES_SYNPROXY);
6092 return (PF_SYNPROXY_DROP);
6093}
6094
6095int
6096pf_test_state(struct pf_pdesc *pd, struct pf_state **stp, u_short *reason)
6097{
6098 int copyback = 0;
6099 struct pf_state_peer *src, *dst;
6100 int action;
6101 u_int8_t psrc, pdst;
6102
6103 action = PF_PASS;
6104 if (pd->dir == (*stp)->direction) {
6105 src = &(*stp)->src;
6106 dst = &(*stp)->dst;
6107 psrc = PF_PEER_SRC;
6108 pdst = PF_PEER_DST;
6109 } else {
6110 src = &(*stp)->dst;
6111 dst = &(*stp)->src;
6112 psrc = PF_PEER_DST;
6113 pdst = PF_PEER_SRC;
6114 }
6115
6116 switch (pd->virtual_proto) {
6117 case IPPROTO_TCP:
6118 if ((action = pf_synproxy(pd, stp, reason)) != PF_PASS)
6119 return (action);
6120 if ((pd->hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN) {
6121
6122 if (dst->state >= TCPS_FIN_WAIT_2 &&
6123 src->state >= TCPS_FIN_WAIT_2) {
6124 if (pf_status.debug >= LOG_NOTICE) {
6125 log(LOG_NOTICE, "pf: state reuse ");
6126 pf_print_state(*stp);
6127 pf_print_flags(pd->hdr.tcp.th_flags);
6128 addlog("\n");
6129 }
6130 /* XXX make sure it's the same direction ?? */
6131 pf_update_state_timeout(*stp, PFTM_PURGE);
6132 pf_state_unref(*stp);
6133 *stp = NULL;
6134 return (PF_DROP);
6135 } else if (dst->state >= TCPS_ESTABLISHED &&
6136 src->state >= TCPS_ESTABLISHED) {
6137 /*
6138 * SYN matches existing state???
6139 * Typically happens when sender boots up after
6140 * sudden panic. Certain protocols (NFSv3) are
6141 * always using same port numbers. Challenge
6142 * ACK enables all parties (firewall and peers)
6143 * to get in sync again.
6144 */
6145 pf_send_challenge_ack(pd, *stp, src, dst,
6146 reason);
6147 return (PF_DROP);
6148 }
6149 }
6150
6151 if ((*stp)->state_flags & PFSTATE_SLOPPY) {
6152 if (pf_tcp_track_sloppy(pd, stp, reason) == PF_DROP)
6153 return (PF_DROP);
6154 } else {
6155 if (pf_tcp_track_full(pd, stp, reason, ©back,
6156 PF_REVERSED_KEY((*stp)->key, pd->af)) == PF_DROP)
6157 return (PF_DROP);
6158 }
6159 break;
6160 case IPPROTO_UDP:
6161 /* update states */
6162 if (src->state < PFUDPS_SINGLE)
6163 pf_set_protostate(*stp, psrc, PFUDPS_SINGLE);
6164 if (dst->state == PFUDPS_SINGLE)
6165 pf_set_protostate(*stp, pdst, PFUDPS_MULTIPLE);
6166
6167 /* update expire time */
6168 (*stp)->expire = getuptime();
6169 if (src->state == PFUDPS_MULTIPLE &&
6170 dst->state == PFUDPS_MULTIPLE)
6171 pf_update_state_timeout(*stp, PFTM_UDP_MULTIPLE);
6172 else
6173 pf_update_state_timeout(*stp, PFTM_UDP_SINGLE);
6174 break;
6175 default:
6176 /* update states */
6177 if (src->state < PFOTHERS_SINGLE)
6178 pf_set_protostate(*stp, psrc, PFOTHERS_SINGLE);
6179 if (dst->state == PFOTHERS_SINGLE)
6180 pf_set_protostate(*stp, pdst, PFOTHERS_MULTIPLE);
6181
6182 /* update expire time */
6183 (*stp)->expire = getuptime();
6184 if (src->state == PFOTHERS_MULTIPLE &&
6185 dst->state == PFOTHERS_MULTIPLE)
6186 pf_update_state_timeout(*stp, PFTM_OTHER_MULTIPLE);
6187 else
6188 pf_update_state_timeout(*stp, PFTM_OTHER_SINGLE);
6189 break;
6190 }
6191
6192 /* translate source/destination address, if necessary */
6193 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
6194 struct pf_state_key *nk;
6195 int afto, sidx, didx;
6196
6197 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6198 nk = (*stp)->key[pd->sidx];
6199 else
6200 nk = (*stp)->key[pd->didx];
6201
6202 afto = pd->af != nk->af;
6203 sidx = afto ? pd->didx : pd->sidx;
6204 didx = afto ? pd->sidx : pd->didx;
6205
6206#ifdef INET6
6207 if (afto) {
6208 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx], nk->af);
6209 pf_addrcpy(&pd->ndaddr, &nk->addr[didx], nk->af);
6210 pd->naf = nk->af;
6211 action = PF_AFRT;
6212 }
6213#endif /* INET6 */
6214
6215 if (!afto)
6216 pf_translate_a(pd, pd->src, &nk->addr[sidx]);
6217
6218 if (pd->sport != NULL)
6219 pf_patch_16(pd, pd->sport, nk->port[sidx]);
6220
6221 if (afto || PF_ANEQ(pd->dst, &nk->addr[didx], pd->af) ||
6222 pd->rdomain != nk->rdomain)
6223 pd->destchg = 1;
6224
6225 if (!afto)
6226 pf_translate_a(pd, pd->dst, &nk->addr[didx]);
6227
6228 if (pd->dport != NULL)
6229 pf_patch_16(pd, pd->dport, nk->port[didx]);
6230
6231 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6232 copyback = 1;
6233 }
6234
6235 if (copyback && pd->hdrlen > 0) {
6236 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
6237 }
6238
6239 return (action);
6240}
6241
6242int
6243pf_icmp_state_lookup(struct pf_pdesc *pd, struct pf_state_key_cmp *key,
6244 struct pf_state **stp, u_int16_t icmpid, u_int16_t type,
6245 int icmp_dir, int *iidx, int multi, int inner)
6246{
6247 int direction, action;
6248
6249 key->af = pd->af;
6250 key->proto = pd->proto;
6251 key->rdomain = pd->rdomain;
6252 if (icmp_dir == PF_IN) {
6253 *iidx = pd->sidx;
6254 key->port[pd->sidx] = icmpid;
6255 key->port[pd->didx] = type;
6256 } else {
6257 *iidx = pd->didx;
6258 key->port[pd->sidx] = type;
6259 key->port[pd->didx] = icmpid;
6260 }
6261
6262 if (pf_state_key_addr_setup(pd, key, pd->sidx, pd->src, pd->didx,
6263 pd->dst, pd->af, multi))
6264 return (PF_DROP);
6265
6266 key->hash = pf_pkt_hash(key->af, key->proto,
6267 &key->addr[0], &key->addr[1], 0, 0);
6268
6269 action = pf_find_state(pd, key, stp);
6270 if (action != PF_MATCH)
6271 return (action);
6272
6273 if ((*stp)->state_flags & PFSTATE_SLOPPY)
6274 return (-1);
6275
6276 /* Is this ICMP message flowing in right direction? */
6277 if ((*stp)->key[PF_SK_WIRE]->af != (*stp)->key[PF_SK_STACK]->af)
6278 direction = (pd->af == (*stp)->key[PF_SK_WIRE]->af) ?
6279 PF_IN : PF_OUT;
6280 else
6281 direction = (*stp)->direction;
6282 if ((((!inner && direction == pd->dir) ||
6283 (inner && direction != pd->dir)) ?
6284 PF_IN : PF_OUT) != icmp_dir) {
6285 if (pf_status.debug >= LOG_NOTICE) {
6286 log(LOG_NOTICE,
6287 "pf: icmp type %d in wrong direction (%d): ",
6288 ntohs(type), icmp_dir);
6289 pf_print_state(*stp);
6290 addlog("\n");
6291 }
6292 return (PF_DROP);
6293 }
6294 return (-1);
6295}
6296
6297int
6298pf_test_state_icmp(struct pf_pdesc *pd, struct pf_state **stp,
6299 u_short *reason)
6300{
6301 u_int16_t virtual_id, virtual_type;
6302 u_int8_t icmptype, icmpcode;
6303 int icmp_dir, iidx, ret, copyback = 0;
6304
6305 struct pf_state_key_cmp key;
6306
6307 switch (pd->proto) {
6308 case IPPROTO_ICMP:
6309 icmptype = pd->hdr.icmp.icmp_type;
6310 icmpcode = pd->hdr.icmp.icmp_code;
6311 break;
6312#ifdef INET6
6313 case IPPROTO_ICMPV6:
6314 icmptype = pd->hdr.icmp6.icmp6_type;
6315 icmpcode = pd->hdr.icmp6.icmp6_code;
6316 break;
6317#endif /* INET6 */
6318 default:
6319 panic("unhandled proto %d", pd->proto);
6320 }
6321
6322 if (pf_icmp_mapping(pd, icmptype, &icmp_dir, &virtual_id,
6323 &virtual_type) == 0) {
6324 /*
6325 * ICMP query/reply message not related to a TCP/UDP packet.
6326 * Search for an ICMP state.
6327 */
6328 ret = pf_icmp_state_lookup(pd, &key, stp,
6329 virtual_id, virtual_type, icmp_dir, &iidx,
6330 0, 0);
6331 /* IPv6? try matching a multicast address */
6332 if (ret == PF_DROP && pd->af == AF_INET6 && icmp_dir == PF_OUT)
6333 ret = pf_icmp_state_lookup(pd, &key, stp, virtual_id,
6334 virtual_type, icmp_dir, &iidx, 1, 0);
6335 if (ret >= 0)
6336 return (ret);
6337
6338 (*stp)->expire = getuptime();
6339 pf_update_state_timeout(*stp, PFTM_ICMP_ERROR_REPLY);
6340
6341 /* translate source/destination address, if necessary */
6342 if ((*stp)->key[PF_SK_WIRE] != (*stp)->key[PF_SK_STACK]) {
6343 struct pf_state_key *nk;
6344 int afto, sidx, didx;
6345
6346 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6347 nk = (*stp)->key[pd->sidx];
6348 else
6349 nk = (*stp)->key[pd->didx];
6350
6351 afto = pd->af != nk->af;
6352 sidx = afto ? pd->didx : pd->sidx;
6353 didx = afto ? pd->sidx : pd->didx;
6354 iidx = afto ? !iidx : iidx;
6355#ifdef INET6
6356 if (afto) {
6357 pf_addrcpy(&pd->nsaddr, &nk->addr[sidx],
6358 nk->af);
6359 pf_addrcpy(&pd->ndaddr, &nk->addr[didx],
6360 nk->af);
6361 pd->naf = nk->af;
6362 }
6363#endif /* INET6 */
6364 if (!afto) {
6365 pf_translate_a(pd, pd->src, &nk->addr[sidx]);
6366 pf_translate_a(pd, pd->dst, &nk->addr[didx]);
6367 }
6368
6369 if (pd->rdomain != nk->rdomain)
6370 pd->destchg = 1;
6371 if (!afto && PF_ANEQ(pd->dst,
6372 &nk->addr[didx], pd->af))
6373 pd->destchg = 1;
6374 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6375
6376 switch (pd->af) {
6377 case AF_INET:
6378#ifdef INET6
6379 if (afto) {
6380 if (pf_translate_icmp_af(pd, AF_INET6,
6381 &pd->hdr.icmp))
6382 return (PF_DROP);
6383 pd->proto = IPPROTO_ICMPV6;
6384 }
6385#endif /* INET6 */
6386 pf_patch_16(pd,
6387 &pd->hdr.icmp.icmp_id, nk->port[iidx]);
6388
6389 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6390 &pd->hdr.icmp, M_NOWAIT);
6391 copyback = 1;
6392 break;
6393#ifdef INET6
6394 case AF_INET6:
6395 if (afto) {
6396 if (pf_translate_icmp_af(pd, AF_INET,
6397 &pd->hdr.icmp6))
6398 return (PF_DROP);
6399 pd->proto = IPPROTO_ICMP;
6400 }
6401
6402 pf_patch_16(pd,
6403 &pd->hdr.icmp6.icmp6_id, nk->port[iidx]);
6404
6405 m_copyback(pd->m, pd->off,
6406 sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
6407 M_NOWAIT);
6408 copyback = 1;
6409 break;
6410#endif /* INET6 */
6411 }
6412#ifdef INET6
6413 if (afto)
6414 return (PF_AFRT);
6415#endif /* INET6 */
6416 }
6417 } else {
6418 /*
6419 * ICMP error message in response to a TCP/UDP packet.
6420 * Extract the inner TCP/UDP header and search for that state.
6421 */
6422 struct pf_pdesc pd2;
6423 struct ip h2;
6424#ifdef INET6
6425 struct ip6_hdr h2_6;
6426#endif /* INET6 */
6427 int ipoff2;
6428
6429 /* Initialize pd2 fields valid for both packets with pd. */
6430 memset(&pd2, 0, sizeof(pd2));
6431 pd2.af = pd->af;
6432 pd2.dir = pd->dir;
6433 pd2.kif = pd->kif;
6434 pd2.m = pd->m;
6435 pd2.rdomain = pd->rdomain;
6436 /* Payload packet is from the opposite direction. */
6437 pd2.sidx = (pd2.dir == PF_IN) ? 1 : 0;
6438 pd2.didx = (pd2.dir == PF_IN) ? 0 : 1;
6439 switch (pd->af) {
6440 case AF_INET:
6441 /* offset of h2 in mbuf chain */
6442 ipoff2 = pd->off + ICMP_MINLEN;
6443
6444 if (!pf_pull_hdr(pd2.m, ipoff2, &h2, sizeof(h2),
6445 reason, pd2.af)) {
6446 DPFPRINTF(LOG_NOTICE,
6447 "ICMP error message too short (ip)");
6448 return (PF_DROP);
6449 }
6450 /*
6451 * ICMP error messages don't refer to non-first
6452 * fragments
6453 */
6454 if (h2.ip_off & htons(IP_OFFMASK)) {
6455 REASON_SET(reason, PFRES_FRAG);
6456 return (PF_DROP);
6457 }
6458
6459 /* offset of protocol header that follows h2 */
6460 pd2.off = ipoff2;
6461 if (pf_walk_header(&pd2, &h2, reason) != PF_PASS)
6462 return (PF_DROP);
6463
6464 pd2.tot_len = ntohs(h2.ip_len);
6465 pd2.ttl = h2.ip_ttl;
6466 pd2.src = (struct pf_addr *)&h2.ip_src;
6467 pd2.dst = (struct pf_addr *)&h2.ip_dst;
6468 break;
6469#ifdef INET6
6470 case AF_INET6:
6471 ipoff2 = pd->off + sizeof(struct icmp6_hdr);
6472
6473 if (!pf_pull_hdr(pd2.m, ipoff2, &h2_6, sizeof(h2_6),
6474 reason, pd2.af)) {
6475 DPFPRINTF(LOG_NOTICE,
6476 "ICMP error message too short (ip6)");
6477 return (PF_DROP);
6478 }
6479
6480 pd2.off = ipoff2;
6481 if (pf_walk_header6(&pd2, &h2_6, reason) != PF_PASS)
6482 return (PF_DROP);
6483
6484 pd2.tot_len = ntohs(h2_6.ip6_plen) +
6485 sizeof(struct ip6_hdr);
6486 pd2.ttl = h2_6.ip6_hlim;
6487 pd2.src = (struct pf_addr *)&h2_6.ip6_src;
6488 pd2.dst = (struct pf_addr *)&h2_6.ip6_dst;
6489 break;
6490#endif /* INET6 */
6491 default:
6492 unhandled_af(pd->af);
6493 }
6494
6495 if (PF_ANEQ(pd->dst, pd2.src, pd->af)) {
6496 if (pf_status.debug >= LOG_NOTICE) {
6497 log(LOG_NOTICE,
6498 "pf: BAD ICMP %d:%d outer dst: ",
6499 icmptype, icmpcode);
6500 pf_print_host(pd->src, 0, pd->af);
6501 addlog(" -> ");
6502 pf_print_host(pd->dst, 0, pd->af);
6503 addlog(" inner src: ");
6504 pf_print_host(pd2.src, 0, pd2.af);
6505 addlog(" -> ");
6506 pf_print_host(pd2.dst, 0, pd2.af);
6507 addlog("\n");
6508 }
6509 REASON_SET(reason, PFRES_BADSTATE);
6510 return (PF_DROP);
6511 }
6512
6513 switch (pd2.proto) {
6514 case IPPROTO_TCP: {
6515 struct tcphdr *th = &pd2.hdr.tcp;
6516 u_int32_t seq;
6517 struct pf_state_peer *src, *dst;
6518 u_int8_t dws;
6519 int action;
6520
6521 /*
6522 * Only the first 8 bytes of the TCP header can be
6523 * expected. Don't access any TCP header fields after
6524 * th_seq, an ackskew test is not possible.
6525 */
6526 if (!pf_pull_hdr(pd2.m, pd2.off, th, 8, reason,
6527 pd2.af)) {
6528 DPFPRINTF(LOG_NOTICE,
6529 "ICMP error message too short (tcp)");
6530 return (PF_DROP);
6531 }
6532
6533 key.af = pd2.af;
6534 key.proto = IPPROTO_TCP;
6535 key.rdomain = pd2.rdomain;
6536 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
6537 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
6538 key.port[pd2.sidx] = th->th_sport;
6539 key.port[pd2.didx] = th->th_dport;
6540 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
6541 pd2.src, pd2.dst, th->th_sport, th->th_dport);
6542
6543 action = pf_find_state(&pd2, &key, stp);
6544 if (action != PF_MATCH)
6545 return (action);
6546
6547 if (pd2.dir == (*stp)->direction) {
6548 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
6549 src = &(*stp)->src;
6550 dst = &(*stp)->dst;
6551 } else {
6552 src = &(*stp)->dst;
6553 dst = &(*stp)->src;
6554 }
6555 } else {
6556 if (PF_REVERSED_KEY((*stp)->key, pd->af)) {
6557 src = &(*stp)->dst;
6558 dst = &(*stp)->src;
6559 } else {
6560 src = &(*stp)->src;
6561 dst = &(*stp)->dst;
6562 }
6563 }
6564
6565 if (src->wscale && dst->wscale)
6566 dws = dst->wscale & PF_WSCALE_MASK;
6567 else
6568 dws = 0;
6569
6570 /* Demodulate sequence number */
6571 seq = ntohl(th->th_seq) - src->seqdiff;
6572 if (src->seqdiff) {
6573 pf_patch_32(pd, &th->th_seq, htonl(seq));
6574 copyback = 1;
6575 }
6576
6577 if (!((*stp)->state_flags & PFSTATE_SLOPPY) &&
6578 (!SEQ_GEQ(src->seqhi, seq) || !SEQ_GEQ(seq,
6579 src->seqlo - (dst->max_win << dws)))) {
6580 if (pf_status.debug >= LOG_NOTICE) {
6581 log(LOG_NOTICE,
6582 "pf: BAD ICMP %d:%d ",
6583 icmptype, icmpcode);
6584 pf_print_host(pd->src, 0, pd->af);
6585 addlog(" -> ");
6586 pf_print_host(pd->dst, 0, pd->af);
6587 addlog(" state: ");
6588 pf_print_state(*stp);
6589 addlog(" seq=%u\n", seq);
6590 }
6591 REASON_SET(reason, PFRES_BADSTATE);
6592 return (PF_DROP);
6593 } else {
6594 if (pf_status.debug >= LOG_DEBUG) {
6595 log(LOG_DEBUG,
6596 "pf: OK ICMP %d:%d ",
6597 icmptype, icmpcode);
6598 pf_print_host(pd->src, 0, pd->af);
6599 addlog(" -> ");
6600 pf_print_host(pd->dst, 0, pd->af);
6601 addlog(" state: ");
6602 pf_print_state(*stp);
6603 addlog(" seq=%u\n", seq);
6604 }
6605 }
6606
6607 /* translate source/destination address, if necessary */
6608 if ((*stp)->key[PF_SK_WIRE] !=
6609 (*stp)->key[PF_SK_STACK]) {
6610 struct pf_state_key *nk;
6611 int afto, sidx, didx;
6612
6613 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6614 nk = (*stp)->key[pd->sidx];
6615 else
6616 nk = (*stp)->key[pd->didx];
6617
6618 afto = pd->af != nk->af;
6619 sidx = afto ? pd2.didx : pd2.sidx;
6620 didx = afto ? pd2.sidx : pd2.didx;
6621
6622#ifdef INET6
6623 if (afto) {
6624 if (pf_translate_icmp_af(pd, nk->af,
6625 &pd->hdr.icmp))
6626 return (PF_DROP);
6627 m_copyback(pd->m, pd->off,
6628 sizeof(struct icmp6_hdr),
6629 &pd->hdr.icmp6, M_NOWAIT);
6630 if (pf_change_icmp_af(pd->m, ipoff2,
6631 pd, &pd2, &nk->addr[sidx],
6632 &nk->addr[didx], pd->af, nk->af))
6633 return (PF_DROP);
6634 pd->m->m_pkthdr.ph_rtableid =
6635 nk->rdomain;
6636 pd->destchg = 1;
6637 pf_addrcpy(&pd->nsaddr,
6638 &nk->addr[pd2.sidx], nk->af);
6639 pf_addrcpy(&pd->ndaddr,
6640 &nk->addr[pd2.didx], nk->af);
6641 if (nk->af == AF_INET) {
6642 pd->proto = IPPROTO_ICMP;
6643 } else {
6644 pd->proto = IPPROTO_ICMPV6;
6645 /*
6646 * IPv4 becomes IPv6 so we must
6647 * copy IPv4 src addr to least
6648 * 32bits in IPv6 address to
6649 * keep traceroute/icmp
6650 * working.
6651 */
6652 pd->nsaddr.addr32[3] =
6653 pd->src->addr32[0];
6654 }
6655 pd->naf = nk->af;
6656
6657 pf_patch_16(pd,
6658 &th->th_sport, nk->port[sidx]);
6659 pf_patch_16(pd,
6660 &th->th_dport, nk->port[didx]);
6661
6662 m_copyback(pd2.m, pd2.off, 8, th,
6663 M_NOWAIT);
6664 return (PF_AFRT);
6665 }
6666#endif /* INET6 */
6667 if (PF_ANEQ(pd2.src,
6668 &nk->addr[pd2.sidx], pd2.af) ||
6669 nk->port[pd2.sidx] != th->th_sport)
6670 pf_translate_icmp(pd, pd2.src,
6671 &th->th_sport, pd->dst,
6672 &nk->addr[pd2.sidx],
6673 nk->port[pd2.sidx]);
6674
6675 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6676 pd2.af) || pd2.rdomain != nk->rdomain)
6677 pd->destchg = 1;
6678 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6679
6680 if (PF_ANEQ(pd2.dst,
6681 &nk->addr[pd2.didx], pd2.af) ||
6682 nk->port[pd2.didx] != th->th_dport)
6683 pf_translate_icmp(pd, pd2.dst,
6684 &th->th_dport, pd->src,
6685 &nk->addr[pd2.didx],
6686 nk->port[pd2.didx]);
6687 copyback = 1;
6688 }
6689
6690 if (copyback) {
6691 switch (pd2.af) {
6692 case AF_INET:
6693 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6694 &pd->hdr.icmp, M_NOWAIT);
6695 m_copyback(pd2.m, ipoff2, sizeof(h2),
6696 &h2, M_NOWAIT);
6697 break;
6698#ifdef INET6
6699 case AF_INET6:
6700 m_copyback(pd->m, pd->off,
6701 sizeof(struct icmp6_hdr),
6702 &pd->hdr.icmp6, M_NOWAIT);
6703 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
6704 &h2_6, M_NOWAIT);
6705 break;
6706#endif /* INET6 */
6707 }
6708 m_copyback(pd2.m, pd2.off, 8, th, M_NOWAIT);
6709 }
6710 break;
6711 }
6712 case IPPROTO_UDP: {
6713 struct udphdr *uh = &pd2.hdr.udp;
6714 int action;
6715
6716 if (!pf_pull_hdr(pd2.m, pd2.off, uh, sizeof(*uh),
6717 reason, pd2.af)) {
6718 DPFPRINTF(LOG_NOTICE,
6719 "ICMP error message too short (udp)");
6720 return (PF_DROP);
6721 }
6722
6723 key.af = pd2.af;
6724 key.proto = IPPROTO_UDP;
6725 key.rdomain = pd2.rdomain;
6726 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
6727 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
6728 key.port[pd2.sidx] = uh->uh_sport;
6729 key.port[pd2.didx] = uh->uh_dport;
6730 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
6731 pd2.src, pd2.dst, uh->uh_sport, uh->uh_dport);
6732
6733 action = pf_find_state(&pd2, &key, stp);
6734 if (action != PF_MATCH)
6735 return (action);
6736
6737 /* translate source/destination address, if necessary */
6738 if ((*stp)->key[PF_SK_WIRE] !=
6739 (*stp)->key[PF_SK_STACK]) {
6740 struct pf_state_key *nk;
6741 int afto, sidx, didx;
6742
6743 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6744 nk = (*stp)->key[pd->sidx];
6745 else
6746 nk = (*stp)->key[pd->didx];
6747
6748 afto = pd->af != nk->af;
6749 sidx = afto ? pd2.didx : pd2.sidx;
6750 didx = afto ? pd2.sidx : pd2.didx;
6751
6752#ifdef INET6
6753 if (afto) {
6754 if (pf_translate_icmp_af(pd, nk->af,
6755 &pd->hdr.icmp))
6756 return (PF_DROP);
6757 m_copyback(pd->m, pd->off,
6758 sizeof(struct icmp6_hdr),
6759 &pd->hdr.icmp6, M_NOWAIT);
6760 if (pf_change_icmp_af(pd->m, ipoff2,
6761 pd, &pd2, &nk->addr[sidx],
6762 &nk->addr[didx], pd->af, nk->af))
6763 return (PF_DROP);
6764 pd->m->m_pkthdr.ph_rtableid =
6765 nk->rdomain;
6766 pd->destchg = 1;
6767 pf_addrcpy(&pd->nsaddr,
6768 &nk->addr[pd2.sidx], nk->af);
6769 pf_addrcpy(&pd->ndaddr,
6770 &nk->addr[pd2.didx], nk->af);
6771 if (nk->af == AF_INET) {
6772 pd->proto = IPPROTO_ICMP;
6773 } else {
6774 pd->proto = IPPROTO_ICMPV6;
6775 /*
6776 * IPv4 becomes IPv6 so we must
6777 * copy IPv4 src addr to least
6778 * 32bits in IPv6 address to
6779 * keep traceroute/icmp
6780 * working.
6781 */
6782 pd->nsaddr.addr32[3] =
6783 pd->src->addr32[0];
6784 }
6785 pd->naf = nk->af;
6786
6787 pf_patch_16(pd,
6788 &uh->uh_sport, nk->port[sidx]);
6789 pf_patch_16(pd,
6790 &uh->uh_dport, nk->port[didx]);
6791
6792 m_copyback(pd2.m, pd2.off, sizeof(*uh),
6793 uh, M_NOWAIT);
6794 return (PF_AFRT);
6795 }
6796#endif /* INET6 */
6797
6798 if (PF_ANEQ(pd2.src,
6799 &nk->addr[pd2.sidx], pd2.af) ||
6800 nk->port[pd2.sidx] != uh->uh_sport)
6801 pf_translate_icmp(pd, pd2.src,
6802 &uh->uh_sport, pd->dst,
6803 &nk->addr[pd2.sidx],
6804 nk->port[pd2.sidx]);
6805
6806 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6807 pd2.af) || pd2.rdomain != nk->rdomain)
6808 pd->destchg = 1;
6809 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6810
6811 if (PF_ANEQ(pd2.dst,
6812 &nk->addr[pd2.didx], pd2.af) ||
6813 nk->port[pd2.didx] != uh->uh_dport)
6814 pf_translate_icmp(pd, pd2.dst,
6815 &uh->uh_dport, pd->src,
6816 &nk->addr[pd2.didx],
6817 nk->port[pd2.didx]);
6818
6819 switch (pd2.af) {
6820 case AF_INET:
6821 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6822 &pd->hdr.icmp, M_NOWAIT);
6823 m_copyback(pd2.m, ipoff2, sizeof(h2),
6824 &h2, M_NOWAIT);
6825 break;
6826#ifdef INET6
6827 case AF_INET6:
6828 m_copyback(pd->m, pd->off,
6829 sizeof(struct icmp6_hdr),
6830 &pd->hdr.icmp6, M_NOWAIT);
6831 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
6832 &h2_6, M_NOWAIT);
6833 break;
6834#endif /* INET6 */
6835 }
6836 /* Avoid recomputing quoted UDP checksum.
6837 * note: udp6 0 csum invalid per rfc2460 p27.
6838 * but presumed nothing cares in this context */
6839 pf_patch_16(pd, &uh->uh_sum, 0);
6840 m_copyback(pd2.m, pd2.off, sizeof(*uh), uh,
6841 M_NOWAIT);
6842 copyback = 1;
6843 }
6844 break;
6845 }
6846 case IPPROTO_ICMP: {
6847 struct icmp *iih = &pd2.hdr.icmp;
6848
6849 if (pd2.af != AF_INET) {
6850 REASON_SET(reason, PFRES_NORM);
6851 return (PF_DROP);
6852 }
6853
6854 if (!pf_pull_hdr(pd2.m, pd2.off, iih, ICMP_MINLEN,
6855 reason, pd2.af)) {
6856 DPFPRINTF(LOG_NOTICE,
6857 "ICMP error message too short (icmp)");
6858 return (PF_DROP);
6859 }
6860
6861 pf_icmp_mapping(&pd2, iih->icmp_type,
6862 &icmp_dir, &virtual_id, &virtual_type);
6863
6864 ret = pf_icmp_state_lookup(&pd2, &key, stp,
6865 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
6866 if (ret >= 0)
6867 return (ret);
6868
6869 /* translate source/destination address, if necessary */
6870 if ((*stp)->key[PF_SK_WIRE] !=
6871 (*stp)->key[PF_SK_STACK]) {
6872 struct pf_state_key *nk;
6873 int afto, sidx, didx;
6874
6875 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6876 nk = (*stp)->key[pd->sidx];
6877 else
6878 nk = (*stp)->key[pd->didx];
6879
6880 afto = pd->af != nk->af;
6881 sidx = afto ? pd2.didx : pd2.sidx;
6882 didx = afto ? pd2.sidx : pd2.didx;
6883 iidx = afto ? !iidx : iidx;
6884
6885#ifdef INET6
6886 if (afto) {
6887 if (nk->af != AF_INET6)
6888 return (PF_DROP);
6889 if (pf_translate_icmp_af(pd, nk->af,
6890 &pd->hdr.icmp))
6891 return (PF_DROP);
6892 m_copyback(pd->m, pd->off,
6893 sizeof(struct icmp6_hdr),
6894 &pd->hdr.icmp6, M_NOWAIT);
6895 if (pf_change_icmp_af(pd->m, ipoff2,
6896 pd, &pd2, &nk->addr[sidx],
6897 &nk->addr[didx], pd->af, nk->af))
6898 return (PF_DROP);
6899 pd->proto = IPPROTO_ICMPV6;
6900 if (pf_translate_icmp_af(pd,
6901 nk->af, iih))
6902 return (PF_DROP);
6903 if (virtual_type == htons(ICMP_ECHO))
6904 pf_patch_16(pd, &iih->icmp_id,
6905 nk->port[iidx]);
6906 m_copyback(pd2.m, pd2.off, ICMP_MINLEN,
6907 iih, M_NOWAIT);
6908 pd->m->m_pkthdr.ph_rtableid =
6909 nk->rdomain;
6910 pd->destchg = 1;
6911 pf_addrcpy(&pd->nsaddr,
6912 &nk->addr[pd2.sidx], nk->af);
6913 pf_addrcpy(&pd->ndaddr,
6914 &nk->addr[pd2.didx], nk->af);
6915 /*
6916 * IPv4 becomes IPv6 so we must copy
6917 * IPv4 src addr to least 32bits in
6918 * IPv6 address to keep traceroute
6919 * working.
6920 */
6921 pd->nsaddr.addr32[3] =
6922 pd->src->addr32[0];
6923 pd->naf = nk->af;
6924 return (PF_AFRT);
6925 }
6926#endif /* INET6 */
6927
6928 if (PF_ANEQ(pd2.src,
6929 &nk->addr[pd2.sidx], pd2.af) ||
6930 (virtual_type == htons(ICMP_ECHO) &&
6931 nk->port[iidx] != iih->icmp_id))
6932 pf_translate_icmp(pd, pd2.src,
6933 (virtual_type == htons(ICMP_ECHO)) ?
6934 &iih->icmp_id : NULL,
6935 pd->dst, &nk->addr[pd2.sidx],
6936 (virtual_type == htons(ICMP_ECHO)) ?
6937 nk->port[iidx] : 0);
6938
6939 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
6940 pd2.af) || pd2.rdomain != nk->rdomain)
6941 pd->destchg = 1;
6942 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
6943
6944 if (PF_ANEQ(pd2.dst,
6945 &nk->addr[pd2.didx], pd2.af))
6946 pf_translate_icmp(pd, pd2.dst, NULL,
6947 pd->src, &nk->addr[pd2.didx], 0);
6948
6949 m_copyback(pd->m, pd->off, ICMP_MINLEN,
6950 &pd->hdr.icmp, M_NOWAIT);
6951 m_copyback(pd2.m, ipoff2, sizeof(h2), &h2,
6952 M_NOWAIT);
6953 m_copyback(pd2.m, pd2.off, ICMP_MINLEN, iih,
6954 M_NOWAIT);
6955 copyback = 1;
6956 }
6957 break;
6958 }
6959#ifdef INET6
6960 case IPPROTO_ICMPV6: {
6961 struct icmp6_hdr *iih = &pd2.hdr.icmp6;
6962
6963 if (pd2.af != AF_INET6) {
6964 REASON_SET(reason, PFRES_NORM);
6965 return (PF_DROP);
6966 }
6967
6968 if (!pf_pull_hdr(pd2.m, pd2.off, iih,
6969 sizeof(struct icmp6_hdr), reason, pd2.af)) {
6970 DPFPRINTF(LOG_NOTICE,
6971 "ICMP error message too short (icmp6)");
6972 return (PF_DROP);
6973 }
6974
6975 pf_icmp_mapping(&pd2, iih->icmp6_type,
6976 &icmp_dir, &virtual_id, &virtual_type);
6977 ret = pf_icmp_state_lookup(&pd2, &key, stp,
6978 virtual_id, virtual_type, icmp_dir, &iidx, 0, 1);
6979 /* IPv6? try matching a multicast address */
6980 if (ret == PF_DROP && pd2.af == AF_INET6 &&
6981 icmp_dir == PF_OUT)
6982 ret = pf_icmp_state_lookup(&pd2, &key, stp,
6983 virtual_id, virtual_type, icmp_dir, &iidx,
6984 1, 1);
6985 if (ret >= 0)
6986 return (ret);
6987
6988 /* translate source/destination address, if necessary */
6989 if ((*stp)->key[PF_SK_WIRE] !=
6990 (*stp)->key[PF_SK_STACK]) {
6991 struct pf_state_key *nk;
6992 int afto, sidx, didx;
6993
6994 if (PF_REVERSED_KEY((*stp)->key, pd->af))
6995 nk = (*stp)->key[pd->sidx];
6996 else
6997 nk = (*stp)->key[pd->didx];
6998
6999 afto = pd->af != nk->af;
7000 sidx = afto ? pd2.didx : pd2.sidx;
7001 didx = afto ? pd2.sidx : pd2.didx;
7002 iidx = afto ? !iidx : iidx;
7003
7004 if (afto) {
7005 if (nk->af != AF_INET)
7006 return (PF_DROP);
7007 if (pf_translate_icmp_af(pd, nk->af,
7008 &pd->hdr.icmp))
7009 return (PF_DROP);
7010 m_copyback(pd->m, pd->off,
7011 sizeof(struct icmp6_hdr),
7012 &pd->hdr.icmp6, M_NOWAIT);
7013 if (pf_change_icmp_af(pd->m, ipoff2,
7014 pd, &pd2, &nk->addr[sidx],
7015 &nk->addr[didx], pd->af, nk->af))
7016 return (PF_DROP);
7017 pd->proto = IPPROTO_ICMP;
7018 if (pf_translate_icmp_af(pd,
7019 nk->af, iih))
7020 return (PF_DROP);
7021 if (virtual_type ==
7022 htons(ICMP6_ECHO_REQUEST))
7023 pf_patch_16(pd, &iih->icmp6_id,
7024 nk->port[iidx]);
7025 m_copyback(pd2.m, pd2.off,
7026 sizeof(struct icmp6_hdr), iih,
7027 M_NOWAIT);
7028 pd->m->m_pkthdr.ph_rtableid =
7029 nk->rdomain;
7030 pd->destchg = 1;
7031 pf_addrcpy(&pd->nsaddr,
7032 &nk->addr[pd2.sidx], nk->af);
7033 pf_addrcpy(&pd->ndaddr,
7034 &nk->addr[pd2.didx], nk->af);
7035 pd->naf = nk->af;
7036 return (PF_AFRT);
7037 }
7038
7039 if (PF_ANEQ(pd2.src,
7040 &nk->addr[pd2.sidx], pd2.af) ||
7041 ((virtual_type ==
7042 htons(ICMP6_ECHO_REQUEST)) &&
7043 nk->port[pd2.sidx] != iih->icmp6_id))
7044 pf_translate_icmp(pd, pd2.src,
7045 (virtual_type ==
7046 htons(ICMP6_ECHO_REQUEST))
7047 ? &iih->icmp6_id : NULL,
7048 pd->dst, &nk->addr[pd2.sidx],
7049 (virtual_type ==
7050 htons(ICMP6_ECHO_REQUEST))
7051 ? nk->port[iidx] : 0);
7052
7053 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
7054 pd2.af) || pd2.rdomain != nk->rdomain)
7055 pd->destchg = 1;
7056 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
7057
7058 if (PF_ANEQ(pd2.dst,
7059 &nk->addr[pd2.didx], pd2.af))
7060 pf_translate_icmp(pd, pd2.dst, NULL,
7061 pd->src, &nk->addr[pd2.didx], 0);
7062
7063 m_copyback(pd->m, pd->off,
7064 sizeof(struct icmp6_hdr), &pd->hdr.icmp6,
7065 M_NOWAIT);
7066 m_copyback(pd2.m, ipoff2, sizeof(h2_6), &h2_6,
7067 M_NOWAIT);
7068 m_copyback(pd2.m, pd2.off,
7069 sizeof(struct icmp6_hdr), iih, M_NOWAIT);
7070 copyback = 1;
7071 }
7072 break;
7073 }
7074#endif /* INET6 */
7075 default: {
7076 int action;
7077
7078 key.af = pd2.af;
7079 key.proto = pd2.proto;
7080 key.rdomain = pd2.rdomain;
7081 pf_addrcpy(&key.addr[pd2.sidx], pd2.src, key.af);
7082 pf_addrcpy(&key.addr[pd2.didx], pd2.dst, key.af);
7083 key.port[0] = key.port[1] = 0;
7084 key.hash = pf_pkt_hash(pd2.af, pd2.proto,
7085 pd2.src, pd2.dst, 0, 0);
7086
7087 action = pf_find_state(&pd2, &key, stp);
7088 if (action != PF_MATCH)
7089 return (action);
7090
7091 /* translate source/destination address, if necessary */
7092 if ((*stp)->key[PF_SK_WIRE] !=
7093 (*stp)->key[PF_SK_STACK]) {
7094 struct pf_state_key *nk =
7095 (*stp)->key[pd->didx];
7096
7097 if (PF_ANEQ(pd2.src,
7098 &nk->addr[pd2.sidx], pd2.af))
7099 pf_translate_icmp(pd, pd2.src, NULL,
7100 pd->dst, &nk->addr[pd2.sidx], 0);
7101
7102 if (PF_ANEQ(pd2.dst, &nk->addr[pd2.didx],
7103 pd2.af) || pd2.rdomain != nk->rdomain)
7104 pd->destchg = 1;
7105 pd->m->m_pkthdr.ph_rtableid = nk->rdomain;
7106
7107 if (PF_ANEQ(pd2.dst,
7108 &nk->addr[pd2.didx], pd2.af))
7109 pf_translate_icmp(pd, pd2.dst, NULL,
7110 pd->src, &nk->addr[pd2.didx], 0);
7111
7112 switch (pd2.af) {
7113 case AF_INET:
7114 m_copyback(pd->m, pd->off, ICMP_MINLEN,
7115 &pd->hdr.icmp, M_NOWAIT);
7116 m_copyback(pd2.m, ipoff2, sizeof(h2),
7117 &h2, M_NOWAIT);
7118 break;
7119#ifdef INET6
7120 case AF_INET6:
7121 m_copyback(pd->m, pd->off,
7122 sizeof(struct icmp6_hdr),
7123 &pd->hdr.icmp6, M_NOWAIT);
7124 m_copyback(pd2.m, ipoff2, sizeof(h2_6),
7125 &h2_6, M_NOWAIT);
7126 break;
7127#endif /* INET6 */
7128 }
7129 copyback = 1;
7130 }
7131 break;
7132 }
7133 }
7134 }
7135 if (copyback) {
7136 m_copyback(pd->m, pd->off, pd->hdrlen, &pd->hdr, M_NOWAIT);
7137 }
7138
7139 return (PF_PASS);
7140}
7141
7142/*
7143 * ipoff and off are measured from the start of the mbuf chain.
7144 * h must be at "ipoff" on the mbuf chain.
7145 */
7146void *
7147pf_pull_hdr(struct mbuf *m, int off, void *p, int len,
7148 u_short *reasonp, sa_family_t af)
7149{
7150 int iplen = 0;
7151
7152 switch (af) {
7153 case AF_INET: {
7154 struct ip *h = mtod(m, struct ip *);
7155 u_int16_t fragoff = (ntohs(h->ip_off) & IP_OFFMASK) << 3;
7156
7157 if (fragoff) {
7158 REASON_SET(reasonp, PFRES_FRAG);
7159 return (NULL);
7160 }
7161 iplen = ntohs(h->ip_len);
7162 break;
7163 }
7164#ifdef INET6
7165 case AF_INET6: {
7166 struct ip6_hdr *h = mtod(m, struct ip6_hdr *);
7167
7168 iplen = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
7169 break;
7170 }
7171#endif /* INET6 */
7172 }
7173 if (m->m_pkthdr.len < off + len || iplen < off + len) {
7174 REASON_SET(reasonp, PFRES_SHORT);
7175 return (NULL);
7176 }
7177 m_copydata(m, off, len, p);
7178 return (p);
7179}
7180
7181int
7182pf_routable(struct pf_addr *addr, sa_family_t af, struct pfi_kif *kif,
7183 int rtableid)
7184{
7185 struct sockaddr_storage ss;
7186 struct sockaddr_in *dst;
7187 int ret = 1;
7188 int check_mpath;
7189#ifdef INET6
7190 struct sockaddr_in6 *dst6;
7191#endif /* INET6 */
7192 struct rtentry *rt = NULL;
7193
7194 check_mpath = 0;
7195 memset(&ss, 0, sizeof(ss));
7196 switch (af) {
7197 case AF_INET:
7198 dst = (struct sockaddr_in *)&ss;
7199 dst->sin_family = AF_INET;
7200 dst->sin_len = sizeof(*dst);
7201 dst->sin_addr = addr->v4;
7202 if (atomic_load_int(&ipmultipath))
7203 check_mpath = 1;
7204 break;
7205#ifdef INET6
7206 case AF_INET6:
7207 /*
7208 * Skip check for addresses with embedded interface scope,
7209 * as they would always match anyway.
7210 */
7211 if (IN6_IS_SCOPE_EMBED(&addr->v6))
7212 goto out;
7213 dst6 = (struct sockaddr_in6 *)&ss;
7214 dst6->sin6_family = AF_INET6;
7215 dst6->sin6_len = sizeof(*dst6);
7216 dst6->sin6_addr = addr->v6;
7217 if (atomic_load_int(&ip6_multipath))
7218 check_mpath = 1;
7219 break;
7220#endif /* INET6 */
7221 }
7222
7223 /* Skip checks for ipsec interfaces */
7224 if (kif != NULL && kif->pfik_ifp->if_type == IFT_ENC)
7225 goto out;
7226
7227 rt = rtalloc(sstosa(&ss), 0, rtableid);
7228 if (rt != NULL) {
7229 /* No interface given, this is a no-route check */
7230 if (kif == NULL)
7231 goto out;
7232
7233 if (kif->pfik_ifp == NULL) {
7234 ret = 0;
7235 goto out;
7236 }
7237
7238 /* Perform uRPF check if passed input interface */
7239 ret = 0;
7240 do {
7241 if (rt->rt_ifidx == kif->pfik_ifp->if_index) {
7242 ret = 1;
7243#if NCARP > 0
7244 } else {
7245 struct ifnet *ifp;
7246
7247 smr_read_enter();
7248 ifp = if_get_smr(rt->rt_ifidx);
7249 if (ifp != NULL && ifp->if_type == IFT_CARP &&
7250 ifp->if_carpdevidx ==
7251 kif->pfik_ifp->if_index)
7252 ret = 1;
7253 smr_read_leave();
7254#endif /* NCARP */
7255 }
7256
7257 rt = rtable_iterate(rt);
7258 } while (check_mpath == 1 && rt != NULL && ret == 0);
7259 } else
7260 ret = 0;
7261out:
7262 rtfree(rt);
7263 return (ret);
7264}
7265
7266int
7267pf_rtlabel_match(struct pf_addr *addr, sa_family_t af, struct pf_addr_wrap *aw,
7268 int rtableid)
7269{
7270 struct sockaddr_storage ss;
7271 struct sockaddr_in *dst;
7272#ifdef INET6
7273 struct sockaddr_in6 *dst6;
7274#endif /* INET6 */
7275 struct rtentry *rt;
7276 int ret = 0;
7277
7278 memset(&ss, 0, sizeof(ss));
7279 switch (af) {
7280 case AF_INET:
7281 dst = (struct sockaddr_in *)&ss;
7282 dst->sin_family = AF_INET;
7283 dst->sin_len = sizeof(*dst);
7284 dst->sin_addr = addr->v4;
7285 break;
7286#ifdef INET6
7287 case AF_INET6:
7288 dst6 = (struct sockaddr_in6 *)&ss;
7289 dst6->sin6_family = AF_INET6;
7290 dst6->sin6_len = sizeof(*dst6);
7291 dst6->sin6_addr = addr->v6;
7292 break;
7293#endif /* INET6 */
7294 }
7295
7296 rt = rtalloc(sstosa(&ss), RT_RESOLVE, rtableid);
7297 if (rt != NULL) {
7298 if (rt->rt_labelid == aw->v.rtlabel)
7299 ret = 1;
7300 rtfree(rt);
7301 }
7302
7303 return (ret);
7304}
7305
7306/* pf_route() may change pd->m, adjust local copies after calling */
7307void
7308pf_route(struct pf_pdesc *pd, struct pf_state *st)
7309{
7310 struct mbuf *m0;
7311 struct mbuf_list ml;
7312 struct sockaddr_in *dst, sin;
7313 struct rtentry *rt = NULL;
7314 struct ip *ip;
7315 struct ifnet *ifp = NULL;
7316 unsigned int rtableid;
7317
7318 if (pd->m->m_pkthdr.pf.routed++ > 3) {
7319 m_freem(pd->m);
7320 pd->m = NULL;
7321 return;
7322 }
7323
7324 if (st->rt == PF_DUPTO) {
7325 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
7326 return;
7327 } else {
7328 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
7329 return;
7330 m0 = pd->m;
7331 pd->m = NULL;
7332 }
7333
7334 if (m0->m_len < sizeof(struct ip)) {
7335 DPFPRINTF(LOG_ERR,
7336 "%s: m0->m_len < sizeof(struct ip)", __func__);
7337 goto bad;
7338 }
7339
7340 ip = mtod(m0, struct ip *);
7341
7342 if (pd->dir == PF_IN) {
7343 if (ip->ip_ttl <= IPTTLDEC) {
7344 if (st->rt != PF_DUPTO) {
7345 pf_send_icmp(m0, ICMP_TIMXCEED,
7346 ICMP_TIMXCEED_INTRANS, 0,
7347 pd->af, st->rule.ptr, pd->rdomain);
7348 }
7349 goto bad;
7350 }
7351 ip->ip_ttl -= IPTTLDEC;
7352 }
7353
7354 memset(&sin, 0, sizeof(sin));
7355 dst = &sin;
7356 dst->sin_family = AF_INET;
7357 dst->sin_len = sizeof(*dst);
7358 dst->sin_addr = st->rt_addr.v4;
7359 rtableid = m0->m_pkthdr.ph_rtableid;
7360
7361 rt = rtalloc_mpath(sintosa(dst), &ip->ip_src.s_addr, rtableid);
7362 if (!rtisvalid(rt)) {
7363 if (st->rt != PF_DUPTO) {
7364 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_HOST,
7365 0, pd->af, st->rule.ptr, pd->rdomain);
7366 }
7367 ipstat_inc(ips_noroute);
7368 goto bad;
7369 }
7370
7371 ifp = if_get(rt->rt_ifidx);
7372 if (ifp == NULL)
7373 goto bad;
7374
7375 /* A locally generated packet may have invalid source address. */
7376 if ((ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET &&
7377 (ifp->if_flags & IFF_LOOPBACK) == 0)
7378 ip->ip_src = ifatoia(rt->rt_ifa)->ia_addr.sin_addr;
7379
7380 if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
7381 if (pf_test(AF_INET, PF_OUT, ifp, &m0) != PF_PASS)
7382 goto bad;
7383 else if (m0 == NULL)
7384 goto done;
7385 if (m0->m_len < sizeof(struct ip)) {
7386 DPFPRINTF(LOG_ERR,
7387 "%s: m0->m_len < sizeof(struct ip)", __func__);
7388 goto bad;
7389 }
7390 ip = mtod(m0, struct ip *);
7391 }
7392
7393 if (if_output_tso(ifp, &m0, sintosa(dst), rt, ifp->if_mtu) ||
7394 m0 == NULL)
7395 goto done;
7396
7397 /*
7398 * Too large for interface; fragment if possible.
7399 * Must be able to put at least 8 bytes per fragment.
7400 */
7401 if (ip->ip_off & htons(IP_DF)) {
7402 ipstat_inc(ips_cantfrag);
7403 if (st->rt != PF_DUPTO)
7404 pf_send_icmp(m0, ICMP_UNREACH, ICMP_UNREACH_NEEDFRAG,
7405 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
7406 goto bad;
7407 }
7408
7409 if (ip_fragment(m0, &ml, ifp, ifp->if_mtu) ||
7410 if_output_ml(ifp, &ml, sintosa(dst), rt))
7411 goto done;
7412 ipstat_inc(ips_fragmented);
7413
7414done:
7415 if_put(ifp);
7416 rtfree(rt);
7417 return;
7418
7419bad:
7420 m_freem(m0);
7421 goto done;
7422}
7423
7424#ifdef INET6
7425/* pf_route6() may change pd->m, adjust local copies after calling */
7426void
7427pf_route6(struct pf_pdesc *pd, struct pf_state *st)
7428{
7429 struct mbuf *m0;
7430 struct sockaddr_in6 *dst, sin6;
7431 struct rtentry *rt = NULL;
7432 struct ip6_hdr *ip6;
7433 struct ifnet *ifp = NULL;
7434 struct m_tag *mtag;
7435 unsigned int rtableid;
7436
7437 if (pd->m->m_pkthdr.pf.routed++ > 3) {
7438 m_freem(pd->m);
7439 pd->m = NULL;
7440 return;
7441 }
7442
7443 if (st->rt == PF_DUPTO) {
7444 if ((m0 = m_dup_pkt(pd->m, max_linkhdr, M_NOWAIT)) == NULL)
7445 return;
7446 } else {
7447 if ((st->rt == PF_REPLYTO) == (st->direction == pd->dir))
7448 return;
7449 m0 = pd->m;
7450 pd->m = NULL;
7451 }
7452
7453 if (m0->m_len < sizeof(struct ip6_hdr)) {
7454 DPFPRINTF(LOG_ERR,
7455 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
7456 goto bad;
7457 }
7458 ip6 = mtod(m0, struct ip6_hdr *);
7459
7460 if (pd->dir == PF_IN) {
7461 if (ip6->ip6_hlim <= IPV6_HLIMDEC) {
7462 if (st->rt != PF_DUPTO) {
7463 pf_send_icmp(m0, ICMP6_TIME_EXCEEDED,
7464 ICMP6_TIME_EXCEED_TRANSIT, 0,
7465 pd->af, st->rule.ptr, pd->rdomain);
7466 }
7467 goto bad;
7468 }
7469 ip6->ip6_hlim -= IPV6_HLIMDEC;
7470 }
7471
7472 memset(&sin6, 0, sizeof(sin6));
7473 dst = &sin6;
7474 dst->sin6_family = AF_INET6;
7475 dst->sin6_len = sizeof(*dst);
7476 dst->sin6_addr = st->rt_addr.v6;
7477 rtableid = m0->m_pkthdr.ph_rtableid;
7478
7479 rt = rtalloc_mpath(sin6tosa(dst), &ip6->ip6_src.s6_addr32[0],
7480 rtableid);
7481 if (!rtisvalid(rt)) {
7482 if (st->rt != PF_DUPTO) {
7483 pf_send_icmp(m0, ICMP6_DST_UNREACH,
7484 ICMP6_DST_UNREACH_NOROUTE, 0,
7485 pd->af, st->rule.ptr, pd->rdomain);
7486 }
7487 ip6stat_inc(ip6s_noroute);
7488 goto bad;
7489 }
7490
7491 ifp = if_get(rt->rt_ifidx);
7492 if (ifp == NULL)
7493 goto bad;
7494
7495 /* A locally generated packet may have invalid source address. */
7496 if (IN6_IS_ADDR_LOOPBACK(&ip6->ip6_src) &&
7497 (ifp->if_flags & IFF_LOOPBACK) == 0)
7498 ip6->ip6_src = ifatoia6(rt->rt_ifa)->ia_addr.sin6_addr;
7499
7500 if (st->rt != PF_DUPTO && pd->dir == PF_IN) {
7501 if (pf_test(AF_INET6, PF_OUT, ifp, &m0) != PF_PASS)
7502 goto bad;
7503 else if (m0 == NULL)
7504 goto done;
7505 if (m0->m_len < sizeof(struct ip6_hdr)) {
7506 DPFPRINTF(LOG_ERR,
7507 "%s: m0->m_len < sizeof(struct ip6_hdr)", __func__);
7508 goto bad;
7509 }
7510 }
7511
7512 /*
7513 * If packet has been reassembled by PF earlier, we have to
7514 * use pf_refragment6() here to turn it back to fragments.
7515 */
7516 if ((mtag = m_tag_find(m0, PACKET_TAG_PF_REASSEMBLED, NULL))) {
7517 (void) pf_refragment6(&m0, mtag, dst, ifp, rt);
7518 goto done;
7519 }
7520
7521 if (if_output_tso(ifp, &m0, sin6tosa(dst), rt, ifp->if_mtu) ||
7522 m0 == NULL)
7523 goto done;
7524
7525 ip6stat_inc(ip6s_cantfrag);
7526 if (st->rt != PF_DUPTO)
7527 pf_send_icmp(m0, ICMP6_PACKET_TOO_BIG, 0,
7528 ifp->if_mtu, pd->af, st->rule.ptr, pd->rdomain);
7529 goto bad;
7530
7531done:
7532 if_put(ifp);
7533 rtfree(rt);
7534 return;
7535
7536bad:
7537 m_freem(m0);
7538 goto done;
7539}
7540#endif /* INET6 */
7541
7542/*
7543 * check TCP checksum and set mbuf flag
7544 * off is the offset where the protocol header starts
7545 * len is the total length of protocol header plus payload
7546 * returns 0 when the checksum is valid, otherwise returns 1.
7547 * if the _OUT flag is set the checksum isn't done yet, consider these ok
7548 */
7549int
7550pf_check_tcp_cksum(struct mbuf *m, int off, int len, sa_family_t af)
7551{
7552 u_int16_t sum;
7553
7554 if (m->m_pkthdr.csum_flags &
7555 (M_TCP_CSUM_IN_OK | M_TCP_CSUM_OUT)) {
7556 return (0);
7557 }
7558 if (m->m_pkthdr.csum_flags & M_TCP_CSUM_IN_BAD ||
7559 off < sizeof(struct ip) ||
7560 m->m_pkthdr.len < off + len) {
7561 return (1);
7562 }
7563
7564 /* need to do it in software */
7565 tcpstat_inc(tcps_inswcsum);
7566
7567 switch (af) {
7568 case AF_INET:
7569 if (m->m_len < sizeof(struct ip))
7570 return (1);
7571
7572 sum = in4_cksum(m, IPPROTO_TCP, off, len);
7573 break;
7574#ifdef INET6
7575 case AF_INET6:
7576 if (m->m_len < sizeof(struct ip6_hdr))
7577 return (1);
7578
7579 sum = in6_cksum(m, IPPROTO_TCP, off, len);
7580 break;
7581#endif /* INET6 */
7582 default:
7583 unhandled_af(af);
7584 }
7585 if (sum) {
7586 tcpstat_inc(tcps_rcvbadsum);
7587 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_BAD;
7588 return (1);
7589 }
7590
7591 m->m_pkthdr.csum_flags |= M_TCP_CSUM_IN_OK;
7592 return (0);
7593}
7594
7595struct pf_divert *
7596pf_find_divert(struct mbuf *m)
7597{
7598 struct m_tag *mtag;
7599
7600 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL)
7601 return (NULL);
7602
7603 return ((struct pf_divert *)(mtag + 1));
7604}
7605
7606struct pf_divert *
7607pf_get_divert(struct mbuf *m)
7608{
7609 struct m_tag *mtag;
7610
7611 if ((mtag = m_tag_find(m, PACKET_TAG_PF_DIVERT, NULL)) == NULL) {
7612 mtag = m_tag_get(PACKET_TAG_PF_DIVERT, sizeof(struct pf_divert),
7613 M_NOWAIT);
7614 if (mtag == NULL)
7615 return (NULL);
7616 memset(mtag + 1, 0, sizeof(struct pf_divert));
7617 m_tag_prepend(m, mtag);
7618 }
7619
7620 return ((struct pf_divert *)(mtag + 1));
7621}
7622
7623int
7624pf_walk_option(struct pf_pdesc *pd, struct ip *h, int off, int end,
7625 u_short *reason)
7626{
7627 uint8_t type, length, opts[15 * 4 - sizeof(struct ip)];
7628
7629 /* IP header in payload of ICMP packet may be too short */
7630 if (pd->m->m_pkthdr.len < end) {
7631 DPFPRINTF(LOG_NOTICE, "IP option too short");
7632 REASON_SET(reason, PFRES_SHORT);
7633 return (PF_DROP);
7634 }
7635
7636 KASSERT(end - off <= sizeof(opts));
7637 m_copydata(pd->m, off, end - off, opts);
7638 end -= off;
7639 off = 0;
7640
7641 while (off < end) {
7642 type = opts[off];
7643 if (type == IPOPT_EOL)
7644 break;
7645 if (type == IPOPT_NOP) {
7646 off++;
7647 continue;
7648 }
7649 if (off + 2 > end) {
7650 DPFPRINTF(LOG_NOTICE, "IP length opt");
7651 REASON_SET(reason, PFRES_IPOPTIONS);
7652 return (PF_DROP);
7653 }
7654 length = opts[off + 1];
7655 if (length < 2) {
7656 DPFPRINTF(LOG_NOTICE, "IP short opt");
7657 REASON_SET(reason, PFRES_IPOPTIONS);
7658 return (PF_DROP);
7659 }
7660 if (off + length > end) {
7661 DPFPRINTF(LOG_NOTICE, "IP long opt");
7662 REASON_SET(reason, PFRES_IPOPTIONS);
7663 return (PF_DROP);
7664 }
7665 switch (type) {
7666 case IPOPT_RA:
7667 SET(pd->badopts, PF_OPT_ROUTER_ALERT);
7668 break;
7669 default:
7670 SET(pd->badopts, PF_OPT_OTHER);
7671 break;
7672 }
7673 off += length;
7674 }
7675
7676 return (PF_PASS);
7677}
7678
7679int
7680pf_walk_header(struct pf_pdesc *pd, struct ip *h, u_short *reason)
7681{
7682 struct ip6_ext ext;
7683 u_int32_t hlen, end;
7684 int hdr_cnt;
7685
7686 hlen = h->ip_hl << 2;
7687 if (hlen < sizeof(struct ip) || hlen > ntohs(h->ip_len)) {
7688 REASON_SET(reason, PFRES_SHORT);
7689 return (PF_DROP);
7690 }
7691 if (hlen != sizeof(struct ip)) {
7692 if (pf_walk_option(pd, h, pd->off + sizeof(struct ip),
7693 pd->off + hlen, reason) != PF_PASS)
7694 return (PF_DROP);
7695 /* header options which contain only padding is fishy */
7696 if (pd->badopts == 0)
7697 SET(pd->badopts, PF_OPT_OTHER);
7698 }
7699 end = pd->off + ntohs(h->ip_len);
7700 pd->off += hlen;
7701 pd->proto = h->ip_p;
7702 /* IGMP packets have router alert options, allow them */
7703 if (pd->proto == IPPROTO_IGMP) {
7704 /*
7705 * According to RFC 1112 ttl must be set to 1 in all IGMP
7706 * packets sent to 224.0.0.1
7707 */
7708 if ((h->ip_ttl != 1) &&
7709 (h->ip_dst.s_addr == INADDR_ALLHOSTS_GROUP)) {
7710 DPFPRINTF(LOG_NOTICE, "Invalid IGMP");
7711 REASON_SET(reason, PFRES_IPOPTIONS);
7712 return (PF_DROP);
7713 }
7714 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
7715 }
7716 /* stop walking over non initial fragments */
7717 if ((h->ip_off & htons(IP_OFFMASK)) != 0)
7718 return (PF_PASS);
7719
7720 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
7721 switch (pd->proto) {
7722 case IPPROTO_AH:
7723 /* fragments may be short */
7724 if ((h->ip_off & htons(IP_MF | IP_OFFMASK)) != 0 &&
7725 end < pd->off + sizeof(ext))
7726 return (PF_PASS);
7727 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
7728 reason, AF_INET)) {
7729 DPFPRINTF(LOG_NOTICE, "IP short exthdr");
7730 return (PF_DROP);
7731 }
7732 pd->off += (ext.ip6e_len + 2) * 4;
7733 pd->proto = ext.ip6e_nxt;
7734 break;
7735 default:
7736 return (PF_PASS);
7737 }
7738 }
7739 DPFPRINTF(LOG_NOTICE, "IPv4 nested authentication header limit");
7740 REASON_SET(reason, PFRES_IPOPTIONS);
7741 return (PF_DROP);
7742}
7743
7744#ifdef INET6
7745int
7746pf_walk_option6(struct pf_pdesc *pd, struct ip6_hdr *h, int off, int end,
7747 u_short *reason)
7748{
7749 struct ip6_opt opt;
7750 struct ip6_opt_jumbo jumbo;
7751
7752 while (off < end) {
7753 if (!pf_pull_hdr(pd->m, off, &opt.ip6o_type,
7754 sizeof(opt.ip6o_type), reason, AF_INET6)) {
7755 DPFPRINTF(LOG_NOTICE, "IPv6 short opt type");
7756 return (PF_DROP);
7757 }
7758 if (opt.ip6o_type == IP6OPT_PAD1) {
7759 off++;
7760 continue;
7761 }
7762 if (!pf_pull_hdr(pd->m, off, &opt, sizeof(opt),
7763 reason, AF_INET6)) {
7764 DPFPRINTF(LOG_NOTICE, "IPv6 short opt");
7765 return (PF_DROP);
7766 }
7767 if (off + sizeof(opt) + opt.ip6o_len > end) {
7768 DPFPRINTF(LOG_NOTICE, "IPv6 long opt");
7769 REASON_SET(reason, PFRES_IPOPTIONS);
7770 return (PF_DROP);
7771 }
7772 switch (opt.ip6o_type) {
7773 case IP6OPT_PADN:
7774 break;
7775 case IP6OPT_JUMBO:
7776 SET(pd->badopts, PF_OPT_JUMBO);
7777 if (pd->jumbolen != 0) {
7778 DPFPRINTF(LOG_NOTICE, "IPv6 multiple jumbo");
7779 REASON_SET(reason, PFRES_IPOPTIONS);
7780 return (PF_DROP);
7781 }
7782 if (ntohs(h->ip6_plen) != 0) {
7783 DPFPRINTF(LOG_NOTICE, "IPv6 bad jumbo plen");
7784 REASON_SET(reason, PFRES_IPOPTIONS);
7785 return (PF_DROP);
7786 }
7787 if (!pf_pull_hdr(pd->m, off, &jumbo, sizeof(jumbo),
7788 reason, AF_INET6)) {
7789 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbo");
7790 return (PF_DROP);
7791 }
7792 memcpy(&pd->jumbolen, jumbo.ip6oj_jumbo_len,
7793 sizeof(pd->jumbolen));
7794 pd->jumbolen = ntohl(pd->jumbolen);
7795 if (pd->jumbolen < IPV6_MAXPACKET) {
7796 DPFPRINTF(LOG_NOTICE, "IPv6 short jumbolen");
7797 REASON_SET(reason, PFRES_IPOPTIONS);
7798 return (PF_DROP);
7799 }
7800 break;
7801 case IP6OPT_ROUTER_ALERT:
7802 SET(pd->badopts, PF_OPT_ROUTER_ALERT);
7803 break;
7804 default:
7805 SET(pd->badopts, PF_OPT_OTHER);
7806 break;
7807 }
7808 off += sizeof(opt) + opt.ip6o_len;
7809 }
7810
7811 return (PF_PASS);
7812}
7813
7814int
7815pf_walk_header6(struct pf_pdesc *pd, struct ip6_hdr *h, u_short *reason)
7816{
7817 struct ip6_frag frag;
7818 struct ip6_ext ext;
7819 struct icmp6_hdr icmp6;
7820 struct ip6_rthdr rthdr;
7821 u_int32_t end;
7822 int hdr_cnt, fraghdr_cnt = 0, rthdr_cnt = 0;
7823
7824 pd->off += sizeof(struct ip6_hdr);
7825 end = pd->off + ntohs(h->ip6_plen);
7826 pd->fragoff = pd->extoff = pd->jumbolen = 0;
7827 pd->proto = h->ip6_nxt;
7828
7829 for (hdr_cnt = 0; hdr_cnt < pf_hdr_limit; hdr_cnt++) {
7830 switch (pd->proto) {
7831 case IPPROTO_ROUTING:
7832 case IPPROTO_DSTOPTS:
7833 SET(pd->badopts, PF_OPT_OTHER);
7834 break;
7835 case IPPROTO_HOPOPTS:
7836 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
7837 reason, AF_INET6)) {
7838 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
7839 return (PF_DROP);
7840 }
7841 if (pf_walk_option6(pd, h, pd->off + sizeof(ext),
7842 pd->off + (ext.ip6e_len + 1) * 8, reason)
7843 != PF_PASS)
7844 return (PF_DROP);
7845 /* option header which contains only padding is fishy */
7846 if (pd->badopts == 0)
7847 SET(pd->badopts, PF_OPT_OTHER);
7848 break;
7849 }
7850 switch (pd->proto) {
7851 case IPPROTO_FRAGMENT:
7852 if (fraghdr_cnt++) {
7853 DPFPRINTF(LOG_NOTICE, "IPv6 multiple fragment");
7854 REASON_SET(reason, PFRES_FRAG);
7855 return (PF_DROP);
7856 }
7857 /* jumbo payload packets cannot be fragmented */
7858 if (pd->jumbolen != 0) {
7859 DPFPRINTF(LOG_NOTICE, "IPv6 fragmented jumbo");
7860 REASON_SET(reason, PFRES_FRAG);
7861 return (PF_DROP);
7862 }
7863 if (!pf_pull_hdr(pd->m, pd->off, &frag, sizeof(frag),
7864 reason, AF_INET6)) {
7865 DPFPRINTF(LOG_NOTICE, "IPv6 short fragment");
7866 return (PF_DROP);
7867 }
7868 /* stop walking over non initial fragments */
7869 if (ntohs((frag.ip6f_offlg & IP6F_OFF_MASK)) != 0) {
7870 pd->fragoff = pd->off;
7871 return (PF_PASS);
7872 }
7873 /* RFC6946: reassemble only non atomic fragments */
7874 if (frag.ip6f_offlg & IP6F_MORE_FRAG)
7875 pd->fragoff = pd->off;
7876 pd->off += sizeof(frag);
7877 pd->proto = frag.ip6f_nxt;
7878 break;
7879 case IPPROTO_ROUTING:
7880 if (rthdr_cnt++) {
7881 DPFPRINTF(LOG_NOTICE, "IPv6 multiple rthdr");
7882 REASON_SET(reason, PFRES_IPOPTIONS);
7883 return (PF_DROP);
7884 }
7885 /* fragments may be short */
7886 if (pd->fragoff != 0 && end < pd->off + sizeof(rthdr)) {
7887 pd->off = pd->fragoff;
7888 pd->proto = IPPROTO_FRAGMENT;
7889 return (PF_PASS);
7890 }
7891 if (!pf_pull_hdr(pd->m, pd->off, &rthdr, sizeof(rthdr),
7892 reason, AF_INET6)) {
7893 DPFPRINTF(LOG_NOTICE, "IPv6 short rthdr");
7894 return (PF_DROP);
7895 }
7896 if (rthdr.ip6r_type == IPV6_RTHDR_TYPE_0) {
7897 DPFPRINTF(LOG_NOTICE, "IPv6 rthdr0");
7898 REASON_SET(reason, PFRES_IPOPTIONS);
7899 return (PF_DROP);
7900 }
7901 /* FALLTHROUGH */
7902 case IPPROTO_HOPOPTS:
7903 /* RFC2460 4.1: Hop-by-Hop only after IPv6 header */
7904 if (pd->proto == IPPROTO_HOPOPTS && hdr_cnt > 0) {
7905 DPFPRINTF(LOG_NOTICE, "IPv6 hopopts not first");
7906 REASON_SET(reason, PFRES_IPOPTIONS);
7907 return (PF_DROP);
7908 }
7909 /* FALLTHROUGH */
7910 case IPPROTO_AH:
7911 case IPPROTO_DSTOPTS:
7912 /* fragments may be short */
7913 if (pd->fragoff != 0 && end < pd->off + sizeof(ext)) {
7914 pd->off = pd->fragoff;
7915 pd->proto = IPPROTO_FRAGMENT;
7916 return (PF_PASS);
7917 }
7918 if (!pf_pull_hdr(pd->m, pd->off, &ext, sizeof(ext),
7919 reason, AF_INET6)) {
7920 DPFPRINTF(LOG_NOTICE, "IPv6 short exthdr");
7921 return (PF_DROP);
7922 }
7923 /* reassembly needs the ext header before the frag */
7924 if (pd->fragoff == 0)
7925 pd->extoff = pd->off;
7926 if (pd->proto == IPPROTO_HOPOPTS && pd->fragoff == 0 &&
7927 ntohs(h->ip6_plen) == 0 && pd->jumbolen != 0) {
7928 DPFPRINTF(LOG_NOTICE, "IPv6 missing jumbo");
7929 REASON_SET(reason, PFRES_IPOPTIONS);
7930 return (PF_DROP);
7931 }
7932 if (pd->proto == IPPROTO_AH)
7933 pd->off += (ext.ip6e_len + 2) * 4;
7934 else
7935 pd->off += (ext.ip6e_len + 1) * 8;
7936 pd->proto = ext.ip6e_nxt;
7937 break;
7938 case IPPROTO_ICMPV6:
7939 /* fragments may be short, ignore inner header then */
7940 if (pd->fragoff != 0 && end < pd->off + sizeof(icmp6)) {
7941 pd->off = pd->fragoff;
7942 pd->proto = IPPROTO_FRAGMENT;
7943 return (PF_PASS);
7944 }
7945 if (!pf_pull_hdr(pd->m, pd->off, &icmp6, sizeof(icmp6),
7946 reason, AF_INET6)) {
7947 DPFPRINTF(LOG_NOTICE, "IPv6 short icmp6hdr");
7948 return (PF_DROP);
7949 }
7950 /* ICMP multicast packets have router alert options */
7951 switch (icmp6.icmp6_type) {
7952 case MLD_LISTENER_QUERY:
7953 case MLD_LISTENER_REPORT:
7954 case MLD_LISTENER_DONE:
7955 case MLDV2_LISTENER_REPORT:
7956 /*
7957 * According to RFC 2710 all MLD messages are
7958 * sent with hop-limit (ttl) set to 1, and link
7959 * local source address. If either one is
7960 * missing then MLD message is invalid and
7961 * should be discarded.
7962 */
7963 if ((h->ip6_hlim != 1) ||
7964 !IN6_IS_ADDR_LINKLOCAL(&h->ip6_src)) {
7965 DPFPRINTF(LOG_NOTICE, "Invalid MLD");
7966 REASON_SET(reason, PFRES_IPOPTIONS);
7967 return (PF_DROP);
7968 }
7969 CLR(pd->badopts, PF_OPT_ROUTER_ALERT);
7970 break;
7971 }
7972 return (PF_PASS);
7973 case IPPROTO_TCP:
7974 case IPPROTO_UDP:
7975 /* fragments may be short, ignore inner header then */
7976 if (pd->fragoff != 0 && end < pd->off +
7977 (pd->proto == IPPROTO_TCP ? sizeof(struct tcphdr) :
7978 pd->proto == IPPROTO_UDP ? sizeof(struct udphdr) :
7979 sizeof(struct icmp6_hdr))) {
7980 pd->off = pd->fragoff;
7981 pd->proto = IPPROTO_FRAGMENT;
7982 }
7983 /* FALLTHROUGH */
7984 default:
7985 return (PF_PASS);
7986 }
7987 }
7988 DPFPRINTF(LOG_NOTICE, "IPv6 nested extension header limit");
7989 REASON_SET(reason, PFRES_IPOPTIONS);
7990 return (PF_DROP);
7991}
7992#endif /* INET6 */
7993
7994u_int16_t
7995pf_pkt_hash(sa_family_t af, uint8_t proto,
7996 const struct pf_addr *src, const struct pf_addr *dst,
7997 uint16_t sport, uint16_t dport)
7998{
7999 uint32_t hash;
8000
8001 hash = src->addr32[0] ^ dst->addr32[0];
8002#ifdef INET6
8003 if (af == AF_INET6) {
8004 hash ^= src->addr32[1] ^ dst->addr32[1];
8005 hash ^= src->addr32[2] ^ dst->addr32[2];
8006 hash ^= src->addr32[3] ^ dst->addr32[3];
8007 }
8008#endif
8009
8010 switch (proto) {
8011 case IPPROTO_TCP:
8012 case IPPROTO_UDP:
8013 hash ^= sport ^ dport;
8014 break;
8015 }
8016
8017 return stoeplitz_n32(hash);
8018}
8019
8020int
8021pf_setup_pdesc(struct pf_pdesc *pd, sa_family_t af, int dir,
8022 struct pfi_kif *kif, struct mbuf *m, u_short *reason)
8023{
8024 memset(pd, 0, sizeof(*pd));
8025 pd->dir = dir;
8026 pd->kif = kif; /* kif is NULL when called by pflog */
8027 pd->m = m;
8028 pd->sidx = (dir == PF_IN) ? 0 : 1;
8029 pd->didx = (dir == PF_IN) ? 1 : 0;
8030 pd->af = pd->naf = af;
8031 pd->rdomain = rtable_l2(pd->m->m_pkthdr.ph_rtableid);
8032
8033 switch (pd->af) {
8034 case AF_INET: {
8035 struct ip *h;
8036
8037 /* Check for illegal packets */
8038 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip)) {
8039 REASON_SET(reason, PFRES_SHORT);
8040 return (PF_DROP);
8041 }
8042
8043 h = mtod(pd->m, struct ip *);
8044 if (pd->m->m_pkthdr.len < ntohs(h->ip_len)) {
8045 REASON_SET(reason, PFRES_SHORT);
8046 return (PF_DROP);
8047 }
8048
8049 if (pf_walk_header(pd, h, reason) != PF_PASS)
8050 return (PF_DROP);
8051
8052 pd->src = (struct pf_addr *)&h->ip_src;
8053 pd->dst = (struct pf_addr *)&h->ip_dst;
8054 pd->tot_len = ntohs(h->ip_len);
8055 pd->tos = h->ip_tos & ~IPTOS_ECN_MASK;
8056 pd->ttl = h->ip_ttl;
8057 pd->virtual_proto = (h->ip_off & htons(IP_MF | IP_OFFMASK)) ?
8058 PF_VPROTO_FRAGMENT : pd->proto;
8059
8060 break;
8061 }
8062#ifdef INET6
8063 case AF_INET6: {
8064 struct ip6_hdr *h;
8065
8066 /* Check for illegal packets */
8067 if (pd->m->m_pkthdr.len < (int)sizeof(struct ip6_hdr)) {
8068 REASON_SET(reason, PFRES_SHORT);
8069 return (PF_DROP);
8070 }
8071
8072 h = mtod(pd->m, struct ip6_hdr *);
8073 if (pd->m->m_pkthdr.len <
8074 sizeof(struct ip6_hdr) + ntohs(h->ip6_plen)) {
8075 REASON_SET(reason, PFRES_SHORT);
8076 return (PF_DROP);
8077 }
8078
8079 if (pf_walk_header6(pd, h, reason) != PF_PASS)
8080 return (PF_DROP);
8081
8082#if 1
8083 /*
8084 * we do not support jumbogram yet. if we keep going, zero
8085 * ip6_plen will do something bad, so drop the packet for now.
8086 */
8087 if (pd->jumbolen != 0) {
8088 REASON_SET(reason, PFRES_NORM);
8089 return (PF_DROP);
8090 }
8091#endif /* 1 */
8092
8093 pd->src = (struct pf_addr *)&h->ip6_src;
8094 pd->dst = (struct pf_addr *)&h->ip6_dst;
8095 pd->tot_len = ntohs(h->ip6_plen) + sizeof(struct ip6_hdr);
8096 pd->tos = (ntohl(h->ip6_flow) & 0x0fc00000) >> 20;
8097 pd->ttl = h->ip6_hlim;
8098 pd->virtual_proto = (pd->fragoff != 0) ?
8099 PF_VPROTO_FRAGMENT : pd->proto;
8100
8101 break;
8102 }
8103#endif /* INET6 */
8104 default:
8105 panic("pf_setup_pdesc called with illegal af %u", pd->af);
8106
8107 }
8108
8109 pf_addrcpy(&pd->nsaddr, pd->src, pd->af);
8110 pf_addrcpy(&pd->ndaddr, pd->dst, pd->af);
8111
8112 switch (pd->virtual_proto) {
8113 case IPPROTO_TCP: {
8114 struct tcphdr *th = &pd->hdr.tcp;
8115
8116 if (!pf_pull_hdr(pd->m, pd->off, th, sizeof(*th),
8117 reason, pd->af))
8118 return (PF_DROP);
8119 pd->hdrlen = sizeof(*th);
8120 if (th->th_dport == 0 ||
8121 pd->off + (th->th_off << 2) > pd->tot_len ||
8122 (th->th_off << 2) < sizeof(struct tcphdr)) {
8123 REASON_SET(reason, PFRES_SHORT);
8124 return (PF_DROP);
8125 }
8126 pd->p_len = pd->tot_len - pd->off - (th->th_off << 2);
8127 pd->sport = &th->th_sport;
8128 pd->dport = &th->th_dport;
8129 pd->pcksum = &th->th_sum;
8130 break;
8131 }
8132 case IPPROTO_UDP: {
8133 struct udphdr *uh = &pd->hdr.udp;
8134
8135 if (!pf_pull_hdr(pd->m, pd->off, uh, sizeof(*uh),
8136 reason, pd->af))
8137 return (PF_DROP);
8138 pd->hdrlen = sizeof(*uh);
8139 if (uh->uh_dport == 0 ||
8140 pd->off + ntohs(uh->uh_ulen) > pd->tot_len ||
8141 ntohs(uh->uh_ulen) < sizeof(struct udphdr)) {
8142 REASON_SET(reason, PFRES_SHORT);
8143 return (PF_DROP);
8144 }
8145 pd->sport = &uh->uh_sport;
8146 pd->dport = &uh->uh_dport;
8147 pd->pcksum = &uh->uh_sum;
8148 break;
8149 }
8150 case IPPROTO_ICMP: {
8151 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp, ICMP_MINLEN,
8152 reason, pd->af))
8153 return (PF_DROP);
8154 pd->hdrlen = ICMP_MINLEN;
8155 if (pd->off + pd->hdrlen > pd->tot_len) {
8156 REASON_SET(reason, PFRES_SHORT);
8157 return (PF_DROP);
8158 }
8159 pd->pcksum = &pd->hdr.icmp.icmp_cksum;
8160 break;
8161 }
8162#ifdef INET6
8163 case IPPROTO_ICMPV6: {
8164 size_t icmp_hlen = sizeof(struct icmp6_hdr);
8165
8166 if (!pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
8167 reason, pd->af))
8168 return (PF_DROP);
8169 /* ICMP headers we look further into to match state */
8170 switch (pd->hdr.icmp6.icmp6_type) {
8171 case MLD_LISTENER_QUERY:
8172 case MLD_LISTENER_REPORT:
8173 icmp_hlen = sizeof(struct mld_hdr);
8174 break;
8175 case ND_NEIGHBOR_SOLICIT:
8176 case ND_NEIGHBOR_ADVERT:
8177 icmp_hlen = sizeof(struct nd_neighbor_solicit);
8178 /* FALLTHROUGH */
8179 case ND_ROUTER_SOLICIT:
8180 case ND_ROUTER_ADVERT:
8181 case ND_REDIRECT:
8182 if (pd->ttl != 255) {
8183 REASON_SET(reason, PFRES_NORM);
8184 return (PF_DROP);
8185 }
8186 break;
8187 }
8188 if (icmp_hlen > sizeof(struct icmp6_hdr) &&
8189 !pf_pull_hdr(pd->m, pd->off, &pd->hdr.icmp6, icmp_hlen,
8190 reason, pd->af))
8191 return (PF_DROP);
8192 pd->hdrlen = icmp_hlen;
8193 if (pd->off + pd->hdrlen > pd->tot_len) {
8194 REASON_SET(reason, PFRES_SHORT);
8195 return (PF_DROP);
8196 }
8197 pd->pcksum = &pd->hdr.icmp6.icmp6_cksum;
8198 break;
8199 }
8200#endif /* INET6 */
8201 }
8202
8203 if (pd->sport)
8204 pd->osport = pd->nsport = *pd->sport;
8205 if (pd->dport)
8206 pd->odport = pd->ndport = *pd->dport;
8207
8208 pd->hash = pf_pkt_hash(pd->af, pd->proto,
8209 pd->src, pd->dst, pd->osport, pd->odport);
8210
8211 return (PF_PASS);
8212}
8213
8214void
8215pf_counters_inc(int action, struct pf_pdesc *pd, struct pf_state *st,
8216 struct pf_rule *r, struct pf_rule *a)
8217{
8218 int dirndx;
8219 pd->kif->pfik_bytes[pd->af == AF_INET6][pd->dir == PF_OUT]
8220 [action != PF_PASS] += pd->tot_len;
8221 pd->kif->pfik_packets[pd->af == AF_INET6][pd->dir == PF_OUT]
8222 [action != PF_PASS]++;
8223
8224 if (action == PF_PASS || action == PF_AFRT || r->action == PF_DROP) {
8225 dirndx = (pd->dir == PF_OUT);
8226 r->packets[dirndx]++;
8227 r->bytes[dirndx] += pd->tot_len;
8228 if (a != NULL) {
8229 a->packets[dirndx]++;
8230 a->bytes[dirndx] += pd->tot_len;
8231 }
8232 if (st != NULL) {
8233 struct pf_rule_item *ri;
8234 struct pf_sn_item *sni;
8235
8236 SLIST_FOREACH(sni, &st->src_nodes, next) {
8237 sni->sn->packets[dirndx]++;
8238 sni->sn->bytes[dirndx] += pd->tot_len;
8239 }
8240 dirndx = (pd->dir == st->direction) ? 0 : 1;
8241 st->packets[dirndx]++;
8242 st->bytes[dirndx] += pd->tot_len;
8243
8244 SLIST_FOREACH(ri, &st->match_rules, entry) {
8245 ri->r->packets[dirndx]++;
8246 ri->r->bytes[dirndx] += pd->tot_len;
8247
8248 if (ri->r->src.addr.type == PF_ADDR_TABLE)
8249 pfr_update_stats(ri->r->src.addr.p.tbl,
8250 &st->key[(st->direction == PF_IN)]->
8251 addr[(st->direction == PF_OUT)],
8252 pd, ri->r->action, ri->r->src.neg);
8253 if (ri->r->dst.addr.type == PF_ADDR_TABLE)
8254 pfr_update_stats(ri->r->dst.addr.p.tbl,
8255 &st->key[(st->direction == PF_IN)]->
8256 addr[(st->direction == PF_IN)],
8257 pd, ri->r->action, ri->r->dst.neg);
8258 }
8259 }
8260 if (r->src.addr.type == PF_ADDR_TABLE)
8261 pfr_update_stats(r->src.addr.p.tbl,
8262 (st == NULL) ? pd->src :
8263 &st->key[(st->direction == PF_IN)]->
8264 addr[(st->direction == PF_OUT)],
8265 pd, r->action, r->src.neg);
8266 if (r->dst.addr.type == PF_ADDR_TABLE)
8267 pfr_update_stats(r->dst.addr.p.tbl,
8268 (st == NULL) ? pd->dst :
8269 &st->key[(st->direction == PF_IN)]->
8270 addr[(st->direction == PF_IN)],
8271 pd, r->action, r->dst.neg);
8272 }
8273}
8274
8275int
8276pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
8277{
8278 struct pfi_kif *kif = NULL;
8279 u_short action, reason = 0;
8280 struct pf_rule *a = NULL, *r = &pf_default_rule;
8281 struct pf_state *st = NULL;
8282 struct pf_state_key_cmp key;
8283 struct pf_ruleset *ruleset = NULL;
8284 struct pf_pdesc pd;
8285 int dir = (fwdir == PF_FWD) ? PF_OUT : fwdir;
8286 u_int32_t qid, pqid = 0;
8287 int have_pf_lock = 0;
8288
8289 if (!pf_status.running)
8290 return (PF_PASS);
8291
8292#if NCARP > 0
8293 if (ifp->if_type == IFT_CARP) {
8294 struct ifnet *ifp0;
8295
8296 smr_read_enter();
8297 ifp0 = if_get_smr(ifp->if_carpdevidx);
8298 if (ifp0 != NULL)
8299 kif = (struct pfi_kif *)ifp0->if_pf_kif;
8300 smr_read_leave();
8301 } else
8302#endif /* NCARP */
8303 kif = (struct pfi_kif *)ifp->if_pf_kif;
8304
8305 if (kif == NULL) {
8306 DPFPRINTF(LOG_ERR,
8307 "%s: kif == NULL, if_xname %s", __func__, ifp->if_xname);
8308 return (PF_DROP);
8309 }
8310 if (kif->pfik_flags & PFI_IFLAG_SKIP)
8311 return (PF_PASS);
8312
8313#ifdef DIAGNOSTIC
8314 if (((*m0)->m_flags & M_PKTHDR) == 0)
8315 panic("non-M_PKTHDR is passed to pf_test");
8316#endif /* DIAGNOSTIC */
8317
8318 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_GENERATED)
8319 return (PF_PASS);
8320
8321 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_DIVERTED_PACKET) {
8322 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_DIVERTED_PACKET;
8323 return (PF_PASS);
8324 }
8325
8326 if ((*m0)->m_pkthdr.pf.flags & PF_TAG_REFRAGMENTED) {
8327 (*m0)->m_pkthdr.pf.flags &= ~PF_TAG_REFRAGMENTED;
8328 return (PF_PASS);
8329 }
8330
8331 action = pf_setup_pdesc(&pd, af, dir, kif, *m0, &reason);
8332 if (action != PF_PASS) {
8333#if NPFLOG > 0
8334 pd.pflog |= PF_LOG_FORCE;
8335#endif /* NPFLOG > 0 */
8336 goto done;
8337 }
8338
8339 /* packet normalization and reassembly */
8340 switch (pd.af) {
8341 case AF_INET:
8342 action = pf_normalize_ip(&pd, &reason);
8343 break;
8344#ifdef INET6
8345 case AF_INET6:
8346 action = pf_normalize_ip6(&pd, &reason);
8347 break;
8348#endif /* INET6 */
8349 }
8350 *m0 = pd.m;
8351 /* if packet sits in reassembly queue, return without error */
8352 if (pd.m == NULL)
8353 return PF_PASS;
8354
8355 if (action != PF_PASS) {
8356#if NPFLOG > 0
8357 pd.pflog |= PF_LOG_FORCE;
8358#endif /* NPFLOG > 0 */
8359 goto done;
8360 }
8361
8362 /* if packet has been reassembled, update packet description */
8363 if (pf_status.reass && pd.virtual_proto == PF_VPROTO_FRAGMENT) {
8364 action = pf_setup_pdesc(&pd, af, dir, kif, pd.m, &reason);
8365 if (action != PF_PASS) {
8366#if NPFLOG > 0
8367 pd.pflog |= PF_LOG_FORCE;
8368#endif /* NPFLOG > 0 */
8369 goto done;
8370 }
8371 }
8372 pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
8373
8374 /*
8375 * Avoid pcb-lookups from the forwarding path. They should never
8376 * match and would cause MP locking problems.
8377 */
8378 if (fwdir == PF_FWD) {
8379 pd.lookup.done = -1;
8380 pd.lookup.uid = -1;
8381 pd.lookup.gid = -1;
8382 pd.lookup.pid = NO_PID;
8383 }
8384
8385 switch (pd.virtual_proto) {
8386
8387 case PF_VPROTO_FRAGMENT: {
8388 /*
8389 * handle fragments that aren't reassembled by
8390 * normalization
8391 */
8392 PF_LOCK();
8393 have_pf_lock = 1;
8394 action = pf_test_rule(&pd, &r, &st, &a, &ruleset, &reason);
8395 st = pf_state_ref(st);
8396 if (action != PF_PASS)
8397 REASON_SET(&reason, PFRES_FRAG);
8398 break;
8399 }
8400
8401 case IPPROTO_ICMP: {
8402 if (pd.af != AF_INET) {
8403 action = PF_DROP;
8404 REASON_SET(&reason, PFRES_NORM);
8405 DPFPRINTF(LOG_NOTICE,
8406 "dropping IPv6 packet with ICMPv4 payload");
8407 break;
8408 }
8409 PF_STATE_ENTER_READ();
8410 action = pf_test_state_icmp(&pd, &st, &reason);
8411 st = pf_state_ref(st);
8412 PF_STATE_EXIT_READ();
8413 if (action == PF_PASS || action == PF_AFRT) {
8414#if NPFSYNC > 0
8415 pfsync_update_state(st);
8416#endif /* NPFSYNC > 0 */
8417 r = st->rule.ptr;
8418 a = st->anchor.ptr;
8419#if NPFLOG > 0
8420 pd.pflog |= st->log;
8421#endif /* NPFLOG > 0 */
8422 } else if (st == NULL) {
8423 PF_LOCK();
8424 have_pf_lock = 1;
8425 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
8426 &reason);
8427 st = pf_state_ref(st);
8428 }
8429 break;
8430 }
8431
8432#ifdef INET6
8433 case IPPROTO_ICMPV6: {
8434 if (pd.af != AF_INET6) {
8435 action = PF_DROP;
8436 REASON_SET(&reason, PFRES_NORM);
8437 DPFPRINTF(LOG_NOTICE,
8438 "dropping IPv4 packet with ICMPv6 payload");
8439 break;
8440 }
8441 PF_STATE_ENTER_READ();
8442 action = pf_test_state_icmp(&pd, &st, &reason);
8443 st = pf_state_ref(st);
8444 PF_STATE_EXIT_READ();
8445 if (action == PF_PASS || action == PF_AFRT) {
8446#if NPFSYNC > 0
8447 pfsync_update_state(st);
8448#endif /* NPFSYNC > 0 */
8449 r = st->rule.ptr;
8450 a = st->anchor.ptr;
8451#if NPFLOG > 0
8452 pd.pflog |= st->log;
8453#endif /* NPFLOG > 0 */
8454 } else if (st == NULL) {
8455 PF_LOCK();
8456 have_pf_lock = 1;
8457 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
8458 &reason);
8459 st = pf_state_ref(st);
8460 }
8461 break;
8462 }
8463#endif /* INET6 */
8464
8465 case IPPROTO_TCP:
8466 if (pd.dir == PF_IN &&
8467 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK)) == TH_SYN &&
8468 pf_synflood_check(&pd)) {
8469 PF_LOCK();
8470 have_pf_lock = 1;
8471 pf_syncookie_send(&pd, &reason);
8472 action = PF_DROP;
8473 break;
8474 }
8475 if ((pd.hdr.tcp.th_flags & TH_ACK) && pd.p_len == 0)
8476 pqid = 1;
8477 action = pf_normalize_tcp(&pd);
8478 if (action == PF_DROP)
8479 break;
8480
8481 /* FALLTHROUGH */
8482 default:
8483 key.af = pd.af;
8484 key.proto = pd.virtual_proto;
8485 key.rdomain = pd.rdomain;
8486 pf_addrcpy(&key.addr[pd.sidx], pd.src, key.af);
8487 pf_addrcpy(&key.addr[pd.didx], pd.dst, key.af);
8488 key.port[pd.sidx] = pd.osport;
8489 key.port[pd.didx] = pd.odport;
8490 key.hash = pd.hash;
8491
8492 PF_STATE_ENTER_READ();
8493 action = pf_find_state(&pd, &key, &st);
8494 st = pf_state_ref(st);
8495 PF_STATE_EXIT_READ();
8496
8497 /* check for syncookies if tcp ack and no active state */
8498 if (pd.dir == PF_IN && pd.virtual_proto == IPPROTO_TCP &&
8499 (st == NULL || (st->src.state >= TCPS_FIN_WAIT_2 &&
8500 st->dst.state >= TCPS_FIN_WAIT_2)) &&
8501 (pd.hdr.tcp.th_flags & (TH_SYN|TH_ACK|TH_RST)) == TH_ACK &&
8502 pf_syncookie_validate(&pd)) {
8503 struct mbuf *msyn;
8504 msyn = pf_syncookie_recreate_syn(&pd, &reason);
8505 if (msyn) {
8506 action = pf_test(af, fwdir, ifp, &msyn);
8507 m_freem(msyn);
8508 if (action == PF_PASS || action == PF_AFRT) {
8509 PF_STATE_ENTER_READ();
8510 pf_state_unref(st);
8511 action = pf_find_state(&pd, &key, &st);
8512 st = pf_state_ref(st);
8513 PF_STATE_EXIT_READ();
8514 if (st == NULL)
8515 return (PF_DROP);
8516 st->src.seqhi = st->dst.seqhi =
8517 ntohl(pd.hdr.tcp.th_ack) - 1;
8518 st->src.seqlo =
8519 ntohl(pd.hdr.tcp.th_seq) - 1;
8520 pf_set_protostate(st, PF_PEER_SRC,
8521 PF_TCPS_PROXY_DST);
8522 }
8523 } else
8524 action = PF_DROP;
8525 }
8526
8527 if (action == PF_MATCH)
8528 action = pf_test_state(&pd, &st, &reason);
8529
8530 if (action == PF_PASS || action == PF_AFRT) {
8531#if NPFSYNC > 0
8532 pfsync_update_state(st);
8533#endif /* NPFSYNC > 0 */
8534 r = st->rule.ptr;
8535 a = st->anchor.ptr;
8536#if NPFLOG > 0
8537 pd.pflog |= st->log;
8538#endif /* NPFLOG > 0 */
8539 } else if (st == NULL) {
8540 PF_LOCK();
8541 have_pf_lock = 1;
8542 action = pf_test_rule(&pd, &r, &st, &a, &ruleset,
8543 &reason);
8544 st = pf_state_ref(st);
8545 }
8546
8547 if (pd.virtual_proto == IPPROTO_TCP) {
8548 if (st) {
8549 if (st->max_mss)
8550 pf_normalize_mss(&pd, st->max_mss);
8551 } else if (r->max_mss)
8552 pf_normalize_mss(&pd, r->max_mss);
8553 }
8554
8555 break;
8556 }
8557
8558 if (have_pf_lock != 0)
8559 PF_UNLOCK();
8560
8561 /*
8562 * At the moment, we rely on NET_LOCK() to prevent removal of items
8563 * we've collected above ('r', 'anchor' and 'ruleset'). They'll have
8564 * to be refcounted when NET_LOCK() is gone.
8565 */
8566
8567done:
8568 if (action != PF_DROP) {
8569 if (st) {
8570 /* The non-state case is handled in pf_test_rule() */
8571 if (action == PF_PASS && pd.badopts != 0 &&
8572 !(st->state_flags & PFSTATE_ALLOWOPTS)) {
8573 action = PF_DROP;
8574 REASON_SET(&reason, PFRES_IPOPTIONS);
8575#if NPFLOG > 0
8576 pd.pflog |= PF_LOG_FORCE;
8577#endif /* NPFLOG > 0 */
8578 DPFPRINTF(LOG_NOTICE, "dropping packet with "
8579 "ip/ipv6 options in pf_test()");
8580 }
8581
8582 pf_scrub(pd.m, st->state_flags, pd.af, st->min_ttl,
8583 st->set_tos);
8584 pf_tag_packet(pd.m, st->tag, st->rtableid[pd.didx]);
8585 if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
8586 qid = st->pqid;
8587 if (st->state_flags & PFSTATE_SETPRIO) {
8588 pd.m->m_pkthdr.pf.prio =
8589 st->set_prio[1];
8590 }
8591 } else {
8592 qid = st->qid;
8593 if (st->state_flags & PFSTATE_SETPRIO) {
8594 pd.m->m_pkthdr.pf.prio =
8595 st->set_prio[0];
8596 }
8597 }
8598 pd.m->m_pkthdr.pf.delay = st->delay;
8599 } else {
8600 pf_scrub(pd.m, r->scrub_flags, pd.af, r->min_ttl,
8601 r->set_tos);
8602 if (pqid || (pd.tos & IPTOS_LOWDELAY)) {
8603 qid = r->pqid;
8604 if (r->scrub_flags & PFSTATE_SETPRIO)
8605 pd.m->m_pkthdr.pf.prio = r->set_prio[1];
8606 } else {
8607 qid = r->qid;
8608 if (r->scrub_flags & PFSTATE_SETPRIO)
8609 pd.m->m_pkthdr.pf.prio = r->set_prio[0];
8610 }
8611 pd.m->m_pkthdr.pf.delay = r->delay;
8612 }
8613 }
8614
8615 if (action == PF_PASS && qid)
8616 pd.m->m_pkthdr.pf.qid = qid;
8617 if (st != NULL) {
8618 struct mbuf *m = pd.m;
8619 struct inpcb *inp = m->m_pkthdr.pf.inp;
8620
8621 if (pd.dir == PF_IN) {
8622 KASSERT(inp == NULL);
8623 pf_mbuf_link_state_key(m, st->key[PF_SK_STACK]);
8624 } else if (pd.dir == PF_OUT)
8625 pf_state_key_link_inpcb(st->key[PF_SK_STACK], inp);
8626
8627 if (!ISSET(m->m_pkthdr.csum_flags, M_FLOWID)) {
8628 m->m_pkthdr.ph_flowid = st->key[PF_SK_WIRE]->hash;
8629 SET(m->m_pkthdr.csum_flags, M_FLOWID);
8630 }
8631 }
8632
8633 /*
8634 * connections redirected to loopback should not match sockets
8635 * bound specifically to loopback due to security implications,
8636 * see in_pcblookup_listen().
8637 */
8638 if (pd.destchg)
8639 if ((pd.af == AF_INET && (ntohl(pd.dst->v4.s_addr) >>
8640 IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) ||
8641 (pd.af == AF_INET6 && IN6_IS_ADDR_LOOPBACK(&pd.dst->v6)))
8642 pd.m->m_pkthdr.pf.flags |= PF_TAG_TRANSLATE_LOCALHOST;
8643 /* We need to redo the route lookup on outgoing routes. */
8644 if (pd.destchg && pd.dir == PF_OUT)
8645 pd.m->m_pkthdr.pf.flags |= PF_TAG_REROUTE;
8646
8647 if (pd.dir == PF_IN && action == PF_PASS &&
8648 (r->divert.type == PF_DIVERT_TO ||
8649 r->divert.type == PF_DIVERT_REPLY)) {
8650 struct pf_divert *divert;
8651
8652 if ((divert = pf_get_divert(pd.m))) {
8653 pd.m->m_pkthdr.pf.flags |= PF_TAG_DIVERTED;
8654 divert->addr = r->divert.addr;
8655 divert->port = r->divert.port;
8656 divert->rdomain = pd.rdomain;
8657 divert->type = r->divert.type;
8658 }
8659 }
8660
8661 if (action == PF_PASS && r->divert.type == PF_DIVERT_PACKET)
8662 action = PF_DIVERT;
8663
8664#if NPFLOG > 0
8665 if (pd.pflog) {
8666 struct pf_rule_item *ri;
8667
8668 if (pd.pflog & PF_LOG_FORCE || r->log & PF_LOG_ALL)
8669 pflog_packet(&pd, reason, r, a, ruleset, NULL);
8670 if (st) {
8671 SLIST_FOREACH(ri, &st->match_rules, entry)
8672 if (ri->r->log & PF_LOG_ALL)
8673 pflog_packet(&pd, reason, ri->r, a,
8674 ruleset, NULL);
8675 }
8676 }
8677#endif /* NPFLOG > 0 */
8678
8679 pf_counters_inc(action, &pd, st, r, a);
8680
8681 switch (action) {
8682 case PF_SYNPROXY_DROP:
8683 m_freem(pd.m);
8684 /* FALLTHROUGH */
8685 case PF_DEFER:
8686 pd.m = NULL;
8687 action = PF_PASS;
8688 break;
8689 case PF_DIVERT:
8690 switch (pd.af) {
8691 case AF_INET:
8692 divert_packet(pd.m, pd.dir, r->divert.port);
8693 pd.m = NULL;
8694 break;
8695#ifdef INET6
8696 case AF_INET6:
8697 divert6_packet(pd.m, pd.dir, r->divert.port);
8698 pd.m = NULL;
8699 break;
8700#endif /* INET6 */
8701 }
8702 action = PF_PASS;
8703 break;
8704#ifdef INET6
8705 case PF_AFRT:
8706 if (pf_translate_af(&pd)) {
8707 action = PF_DROP;
8708 goto out;
8709 }
8710 pd.m->m_pkthdr.pf.flags |= PF_TAG_GENERATED;
8711 switch (pd.naf) {
8712 case AF_INET:
8713 if (pd.dir == PF_IN) {
8714 int flags = IP_REDIRECT;
8715
8716 switch (atomic_load_int(&ip_forwarding)) {
8717 case 2:
8718 SET(flags, IP_FORWARDING_IPSEC);
8719 /* FALLTHROUGH */
8720 case 1:
8721 SET(flags, IP_FORWARDING);
8722 break;
8723 default:
8724 ipstat_inc(ips_cantforward);
8725 action = PF_DROP;
8726 goto out;
8727 }
8728 if (atomic_load_int(&ip_directedbcast))
8729 SET(flags, IP_ALLOWBROADCAST);
8730 ip_forward(pd.m, ifp, NULL, flags);
8731 } else
8732 ip_output(pd.m, NULL, NULL, 0, NULL, NULL, 0);
8733 break;
8734 case AF_INET6:
8735 if (pd.dir == PF_IN) {
8736 int flags = IPV6_REDIRECT;
8737
8738 switch (atomic_load_int(&ip6_forwarding)) {
8739 case 2:
8740 SET(flags, IPV6_FORWARDING_IPSEC);
8741 /* FALLTHROUGH */
8742 case 1:
8743 SET(flags, IPV6_FORWARDING);
8744 break;
8745 default:
8746 ip6stat_inc(ip6s_cantforward);
8747 action = PF_DROP;
8748 goto out;
8749 }
8750 ip6_forward(pd.m, NULL, flags);
8751 } else
8752 ip6_output(pd.m, NULL, NULL, 0, NULL, NULL);
8753 break;
8754 }
8755 pd.m = NULL;
8756 action = PF_PASS;
8757 break;
8758#endif /* INET6 */
8759 case PF_DROP:
8760 m_freem(pd.m);
8761 pd.m = NULL;
8762 break;
8763 default:
8764 if (st && st->rt) {
8765 switch (pd.af) {
8766 case AF_INET:
8767 pf_route(&pd, st);
8768 break;
8769#ifdef INET6
8770 case AF_INET6:
8771 pf_route6(&pd, st);
8772 break;
8773#endif /* INET6 */
8774 }
8775 }
8776 break;
8777 }
8778
8779#ifdef INET6
8780 /* if reassembled packet passed, create new fragments */
8781 if (pf_status.reass && action == PF_PASS && pd.m && fwdir == PF_FWD &&
8782 pd.af == AF_INET6) {
8783 struct m_tag *mtag;
8784
8785 if ((mtag = m_tag_find(pd.m, PACKET_TAG_PF_REASSEMBLED, NULL)))
8786 action = pf_refragment6(&pd.m, mtag, NULL, NULL, NULL);
8787 }
8788#endif /* INET6 */
8789 if (st && action != PF_DROP) {
8790 if (!st->if_index_in && dir == PF_IN)
8791 st->if_index_in = ifp->if_index;
8792 else if (!st->if_index_out && dir == PF_OUT)
8793 st->if_index_out = ifp->if_index;
8794 }
8795
8796#ifdef INET6
8797out:
8798#endif /* INET6 */
8799 *m0 = pd.m;
8800
8801 pf_state_unref(st);
8802
8803 return (action);
8804}
8805
8806int
8807pf_ouraddr(struct mbuf *m)
8808{
8809 struct pf_state_key *sk;
8810
8811 if (m->m_pkthdr.pf.flags & PF_TAG_DIVERTED)
8812 return (1);
8813
8814 sk = m->m_pkthdr.pf.statekey;
8815 if (sk != NULL) {
8816 if (READ_ONCE(sk->sk_inp) != NULL)
8817 return (1);
8818 }
8819
8820 return (-1);
8821}
8822
8823/*
8824 * must be called whenever any addressing information such as
8825 * address, port, protocol has changed
8826 */
8827void
8828pf_pkt_addr_changed(struct mbuf *m)
8829{
8830 pf_mbuf_unlink_state_key(m);
8831 pf_mbuf_unlink_inpcb(m);
8832}
8833
8834struct inpcb *
8835pf_inp_lookup(struct mbuf *m)
8836{
8837 struct inpcb *inp = NULL;
8838 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
8839
8840 if (!pf_state_key_isvalid(sk))
8841 pf_mbuf_unlink_state_key(m);
8842 else if (READ_ONCE(sk->sk_inp) != NULL) {
8843 mtx_enter(&pf_inp_mtx);
8844 inp = in_pcbref(sk->sk_inp);
8845 mtx_leave(&pf_inp_mtx);
8846 }
8847
8848 return (inp);
8849}
8850
8851void
8852pf_inp_link(struct mbuf *m, struct inpcb *inp)
8853{
8854 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
8855
8856 if (!pf_state_key_isvalid(sk)) {
8857 pf_mbuf_unlink_state_key(m);
8858 return;
8859 }
8860
8861 /*
8862 * we don't need to grab PF-lock here. At worst case we link inp to
8863 * state, which might be just being marked as deleted by another
8864 * thread.
8865 */
8866 pf_state_key_link_inpcb(sk, inp);
8867
8868 /* The statekey has finished finding the inp, it is no longer needed. */
8869 pf_mbuf_unlink_state_key(m);
8870}
8871
8872void
8873pf_inp_unlink(struct inpcb *inp)
8874{
8875 struct pf_state_key *sk;
8876
8877 if (READ_ONCE(inp->inp_pf_sk) == NULL)
8878 return;
8879
8880 mtx_enter(&pf_inp_mtx);
8881 sk = inp->inp_pf_sk;
8882 if (sk == NULL) {
8883 mtx_leave(&pf_inp_mtx);
8884 return;
8885 }
8886 KASSERT(sk->sk_inp == inp);
8887 sk->sk_inp = NULL;
8888 inp->inp_pf_sk = NULL;
8889 mtx_leave(&pf_inp_mtx);
8890
8891 pf_state_key_unref(sk);
8892 in_pcbunref(inp);
8893}
8894
8895void
8896pf_state_key_link_reverse(struct pf_state_key *sk, struct pf_state_key *skrev)
8897{
8898 struct pf_state_key *old_reverse;
8899
8900 old_reverse = atomic_cas_ptr(&sk->sk_reverse, NULL, skrev);
8901 if (old_reverse != NULL)
8902 KASSERT(old_reverse == skrev);
8903 else {
8904 pf_state_key_ref(skrev);
8905
8906 /*
8907 * NOTE: if sk == skrev, then KASSERT() below holds true, we
8908 * still want to grab a reference in such case, because
8909 * pf_state_key_unlink_reverse() does not check whether keys
8910 * are identical or not.
8911 */
8912 old_reverse = atomic_cas_ptr(&skrev->sk_reverse, NULL, sk);
8913 if (old_reverse != NULL)
8914 KASSERT(old_reverse == sk);
8915
8916 pf_state_key_ref(sk);
8917 }
8918}
8919
8920#if NPFLOG > 0
8921void
8922pf_log_matches(struct pf_pdesc *pd, struct pf_rule *rm, struct pf_rule *am,
8923 struct pf_ruleset *ruleset, struct pf_rule_slist *matchrules)
8924{
8925 struct pf_rule_item *ri;
8926
8927 /* if this is the log(matches) rule, packet has been logged already */
8928 if (rm->log & PF_LOG_MATCHES)
8929 return;
8930
8931 SLIST_FOREACH(ri, matchrules, entry)
8932 if (ri->r->log & PF_LOG_MATCHES)
8933 pflog_packet(pd, PFRES_MATCH, rm, am, ruleset, ri->r);
8934}
8935#endif /* NPFLOG > 0 */
8936
8937struct pf_state_key *
8938pf_state_key_ref(struct pf_state_key *sk)
8939{
8940 if (sk != NULL)
8941 PF_REF_TAKE(sk->sk_refcnt);
8942
8943 return (sk);
8944}
8945
8946void
8947pf_state_key_unref(struct pf_state_key *sk)
8948{
8949 if (PF_REF_RELE(sk->sk_refcnt)) {
8950 /* state key must be removed from tree */
8951 KASSERT(!pf_state_key_isvalid(sk));
8952 /* state key must be unlinked from reverse key */
8953 KASSERT(sk->sk_reverse == NULL);
8954 /* state key must be unlinked from socket */
8955 KASSERT(sk->sk_inp == NULL);
8956 pool_put(&pf_state_key_pl, sk);
8957 }
8958}
8959
8960int
8961pf_state_key_isvalid(struct pf_state_key *sk)
8962{
8963 return ((sk != NULL) && (sk->sk_removed == 0));
8964}
8965
8966void
8967pf_mbuf_link_state_key(struct mbuf *m, struct pf_state_key *sk)
8968{
8969 KASSERT(m->m_pkthdr.pf.statekey == NULL);
8970 m->m_pkthdr.pf.statekey = pf_state_key_ref(sk);
8971}
8972
8973void
8974pf_mbuf_unlink_state_key(struct mbuf *m)
8975{
8976 struct pf_state_key *sk = m->m_pkthdr.pf.statekey;
8977
8978 if (sk != NULL) {
8979 m->m_pkthdr.pf.statekey = NULL;
8980 pf_state_key_unref(sk);
8981 }
8982}
8983
8984void
8985pf_mbuf_link_inpcb(struct mbuf *m, struct inpcb *inp)
8986{
8987 KASSERT(m->m_pkthdr.pf.inp == NULL);
8988 m->m_pkthdr.pf.inp = in_pcbref(inp);
8989}
8990
8991void
8992pf_mbuf_unlink_inpcb(struct mbuf *m)
8993{
8994 struct inpcb *inp = m->m_pkthdr.pf.inp;
8995
8996 if (inp != NULL) {
8997 m->m_pkthdr.pf.inp = NULL;
8998 in_pcbunref(inp);
8999 }
9000}
9001
9002void
9003pf_state_key_link_inpcb(struct pf_state_key *sk, struct inpcb *inp)
9004{
9005 if (inp == NULL || READ_ONCE(sk->sk_inp) != NULL)
9006 return;
9007
9008 mtx_enter(&pf_inp_mtx);
9009 if (inp->inp_pf_sk != NULL || sk->sk_inp != NULL) {
9010 mtx_leave(&pf_inp_mtx);
9011 return;
9012 }
9013 sk->sk_inp = in_pcbref(inp);
9014 inp->inp_pf_sk = pf_state_key_ref(sk);
9015 mtx_leave(&pf_inp_mtx);
9016}
9017
9018void
9019pf_state_key_unlink_inpcb(struct pf_state_key *sk)
9020{
9021 struct inpcb *inp;
9022
9023 if (READ_ONCE(sk->sk_inp) == NULL)
9024 return;
9025
9026 mtx_enter(&pf_inp_mtx);
9027 inp = sk->sk_inp;
9028 if (inp == NULL) {
9029 mtx_leave(&pf_inp_mtx);
9030 return;
9031 }
9032 KASSERT(inp->inp_pf_sk == sk);
9033 sk->sk_inp = NULL;
9034 inp->inp_pf_sk = NULL;
9035 mtx_leave(&pf_inp_mtx);
9036
9037 pf_state_key_unref(sk);
9038 in_pcbunref(inp);
9039}
9040
9041void
9042pf_state_key_unlink_reverse(struct pf_state_key *sk)
9043{
9044 struct pf_state_key *skrev = sk->sk_reverse;
9045
9046 /* Note that sk and skrev may be equal, then we unref twice. */
9047 if (skrev != NULL) {
9048 KASSERT(skrev->sk_reverse == sk);
9049 sk->sk_reverse = NULL;
9050 skrev->sk_reverse = NULL;
9051 pf_state_key_unref(skrev);
9052 pf_state_key_unref(sk);
9053 }
9054}
9055
9056struct pf_state *
9057pf_state_ref(struct pf_state *st)
9058{
9059 if (st != NULL)
9060 PF_REF_TAKE(st->refcnt);
9061 return (st);
9062}
9063
9064void
9065pf_state_unref(struct pf_state *st)
9066{
9067 if ((st != NULL) && PF_REF_RELE(st->refcnt)) {
9068 /* never inserted or removed */
9069#if NPFSYNC > 0
9070 KASSERT((TAILQ_NEXT(st, sync_list) == NULL) ||
9071 ((TAILQ_NEXT(st, sync_list) == _Q_INVALID) &&
9072 (st->sync_state >= PFSYNC_S_NONE)));
9073#endif /* NPFSYNC */
9074 KASSERT((TAILQ_NEXT(st, entry_list) == NULL) ||
9075 (TAILQ_NEXT(st, entry_list) == _Q_INVALID));
9076
9077 pf_state_key_unref(st->key[PF_SK_WIRE]);
9078 pf_state_key_unref(st->key[PF_SK_STACK]);
9079
9080 KASSERT(SLIST_EMPTY(&st->linkage));
9081
9082 pool_put(&pf_state_pl, st);
9083 }
9084}
9085
9086int
9087pf_delay_pkt(struct mbuf *m, u_int ifidx)
9088{
9089 struct pf_pktdelay *pdy;
9090
9091 if ((pdy = pool_get(&pf_pktdelay_pl, PR_NOWAIT)) == NULL) {
9092 m_freem(m);
9093 return (ENOBUFS);
9094 }
9095 pdy->ifidx = ifidx;
9096 pdy->m = m;
9097 timeout_set(&pdy->to, pf_pktenqueue_delayed, pdy);
9098 timeout_add_msec(&pdy->to, m->m_pkthdr.pf.delay);
9099 m->m_pkthdr.pf.delay = 0;
9100 return (0);
9101}
9102
9103void
9104pf_pktenqueue_delayed(void *arg)
9105{
9106 struct pf_pktdelay *pdy = arg;
9107 struct ifnet *ifp;
9108
9109 ifp = if_get(pdy->ifidx);
9110 if (ifp != NULL) {
9111 if_enqueue(ifp, pdy->m);
9112 if_put(ifp);
9113 } else
9114 m_freem(pdy->m);
9115
9116 pool_put(&pf_pktdelay_pl, pdy);
9117}
9118
9119void
9120pf_status_init(void)
9121{
9122 memset(&pf_status, 0, sizeof(pf_status));
9123 pf_status.debug = LOG_ERR;
9124 pf_status.reass = PF_REASS_ENABLED;
9125
9126 /* XXX do our best to avoid a conflict */
9127 pf_status.hostid = arc4random();
9128
9129 pf_status_fcounters = counters_alloc(FCNT_MAX);
9130}
9131
9132void
9133pf_status_clear(void)
9134{
9135 PF_ASSERT_LOCKED();
9136 counters_zero(pf_status_fcounters, FCNT_MAX);
9137}
9138
9139void
9140pf_status_read(struct pf_status *pfs)
9141{
9142 uint64_t scratch[FCNT_MAX];
9143
9144 NET_LOCK();
9145 PF_LOCK();
9146 PF_FRAG_LOCK();
9147 memcpy(pfs, &pf_status, sizeof(struct pf_status));
9148 PF_FRAG_UNLOCK();
9149 pfi_update_status(pfs->ifname, pfs);
9150 PF_UNLOCK();
9151 NET_UNLOCK();
9152
9153 counters_read(pf_status_fcounters, pfs->fcounters, FCNT_MAX, scratch);
9154}