Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux

Merge tag 'ipsec-2025-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec

Steffen Klassert says:

====================
pull request (net): ipsec 2025-01-27

1) Fix incrementing the upper 32 bit sequence numbers for GSO skbs.
From Jianbo Liu.

2) Fix an out-of-bounds read on xfrm state lookup.
From Florian Westphal.

3) Fix secpath handling on packet offload mode.
From Alexandre Cassen.

4) Fix the usage of skb->sk in the xfrm layer.

5) Don't disable preemption while looking up cache state
to fix PREEMPT_RT.
From Sebastian Sewior.

* tag 'ipsec-2025-01-27' of git://git.kernel.org/pub/scm/linux/kernel/git/klassert/ipsec:
xfrm: Don't disable preemption while looking up cache state.
xfrm: Fix the usage of skb->sk
xfrm: delete intermediate secpath entry in packet offload mode
xfrm: state: fix out-of-bounds read during lookup
xfrm: replay: Fix the update of replay_esn->oseq_hi for GSO
====================

Link: https://patch.msgid.link/20250127060757.3946314-1-steffen.klassert@secunet.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>

+100 -38
+13 -3
include/net/xfrm.h
··· 1268 1268 1269 1269 if (xo) { 1270 1270 x = xfrm_input_state(skb); 1271 - if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) 1272 - return (xo->flags & CRYPTO_DONE) && 1273 - (xo->status & CRYPTO_SUCCESS); 1271 + if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET) { 1272 + bool check = (xo->flags & CRYPTO_DONE) && 1273 + (xo->status & CRYPTO_SUCCESS); 1274 + 1275 + /* The packets here are plain ones and secpath was 1276 + * needed to indicate that hardware already handled 1277 + * them and there is no need to do nothing in addition. 1278 + * 1279 + * Consume secpath which was set by drivers. 1280 + */ 1281 + secpath_reset(skb); 1282 + return check; 1283 + } 1274 1284 } 1275 1285 1276 1286 return __xfrm_check_nopolicy(net, skb, dir) ||
+1 -1
net/ipv4/esp4.c
··· 279 279 x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) 280 280 esp_output_tail_tcp(x, skb); 281 281 else 282 - xfrm_output_resume(skb->sk, skb, err); 282 + xfrm_output_resume(skb_to_full_sk(skb), skb, err); 283 283 } 284 284 } 285 285
+1 -1
net/ipv6/esp6.c
··· 315 315 x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP) 316 316 esp_output_tail_tcp(x, skb); 317 317 else 318 - xfrm_output_resume(skb->sk, skb, err); 318 + xfrm_output_resume(skb_to_full_sk(skb), skb, err); 319 319 } 320 320 } 321 321
+2 -2
net/ipv6/xfrm6_output.c
··· 82 82 83 83 toobig = skb->len > mtu && !skb_is_gso(skb); 84 84 85 - if (toobig && xfrm6_local_dontfrag(skb->sk)) { 85 + if (toobig && xfrm6_local_dontfrag(sk)) { 86 86 xfrm6_local_rxpmtu(skb, mtu); 87 87 kfree_skb(skb); 88 88 return -EMSGSIZE; 89 89 } else if (toobig && xfrm6_noneed_fragment(skb)) { 90 90 skb->ignore_df = 1; 91 91 goto skip_frag; 92 - } else if (!skb->ignore_df && toobig && skb->sk) { 92 + } else if (!skb->ignore_df && toobig && sk) { 93 93 xfrm_local_error(skb, mtu); 94 94 kfree_skb(skb); 95 95 return -EMSGSIZE;
+1 -1
net/xfrm/xfrm_interface_core.c
··· 506 506 skb_dst_set(skb, dst); 507 507 skb->dev = tdev; 508 508 509 - err = dst_output(xi->net, skb->sk, skb); 509 + err = dst_output(xi->net, skb_to_full_sk(skb), skb); 510 510 if (net_xmit_eval(err) == 0) { 511 511 dev_sw_netstats_tx_add(dev, 1, length); 512 512 } else {
+4 -3
net/xfrm/xfrm_output.c
··· 802 802 !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) { 803 803 skb->protocol = htons(ETH_P_IP); 804 804 805 - if (skb->sk) 805 + if (skb->sk && sk_fullsock(skb->sk)) 806 806 xfrm_local_error(skb, mtu); 807 807 else 808 808 icmp_send(skb, ICMP_DEST_UNREACH, ··· 838 838 { 839 839 int mtu, ret = 0; 840 840 struct dst_entry *dst = skb_dst(skb); 841 + struct sock *sk = skb_to_full_sk(skb); 841 842 842 843 if (skb->ignore_df) 843 844 goto out; ··· 853 852 skb->dev = dst->dev; 854 853 skb->protocol = htons(ETH_P_IPV6); 855 854 856 - if (xfrm6_local_dontfrag(skb->sk)) 855 + if (xfrm6_local_dontfrag(sk)) 857 856 ipv6_stub->xfrm6_local_rxpmtu(skb, mtu); 858 - else if (skb->sk) 857 + else if (sk) 859 858 xfrm_local_error(skb, mtu); 860 859 else 861 860 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+1 -1
net/xfrm/xfrm_policy.c
··· 2964 2964 skb_dst_drop(skb); 2965 2965 skb_dst_set(skb, dst); 2966 2966 2967 - dst_output(net, skb->sk, skb); 2967 + dst_output(net, skb_to_full_sk(skb), skb); 2968 2968 } 2969 2969 2970 2970 out:
+6 -4
net/xfrm/xfrm_replay.c
··· 714 714 oseq += skb_shinfo(skb)->gso_segs; 715 715 } 716 716 717 - if (unlikely(xo->seq.low < replay_esn->oseq)) { 718 - XFRM_SKB_CB(skb)->seq.output.hi = ++oseq_hi; 719 - xo->seq.hi = oseq_hi; 720 - replay_esn->oseq_hi = oseq_hi; 717 + if (unlikely(oseq < replay_esn->oseq)) { 718 + replay_esn->oseq_hi = ++oseq_hi; 719 + if (xo->seq.low < replay_esn->oseq) { 720 + XFRM_SKB_CB(skb)->seq.output.hi = oseq_hi; 721 + xo->seq.hi = oseq_hi; 722 + } 721 723 if (replay_esn->oseq_hi == 0) { 722 724 replay_esn->oseq--; 723 725 replay_esn->oseq_hi--;
+71 -22
net/xfrm/xfrm_state.c
··· 34 34 35 35 #define xfrm_state_deref_prot(table, net) \ 36 36 rcu_dereference_protected((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) 37 + #define xfrm_state_deref_check(table, net) \ 38 + rcu_dereference_check((table), lockdep_is_held(&(net)->xfrm.xfrm_state_lock)) 37 39 38 40 static void xfrm_state_gc_task(struct work_struct *work); 39 41 ··· 64 62 u32 reqid, 65 63 unsigned short family) 66 64 { 65 + lockdep_assert_held(&net->xfrm.xfrm_state_lock); 66 + 67 67 return __xfrm_dst_hash(daddr, saddr, reqid, family, net->xfrm.state_hmask); 68 68 } 69 69 ··· 74 70 const xfrm_address_t *saddr, 75 71 unsigned short family) 76 72 { 73 + lockdep_assert_held(&net->xfrm.xfrm_state_lock); 74 + 77 75 return __xfrm_src_hash(daddr, saddr, family, net->xfrm.state_hmask); 78 76 } 79 77 ··· 83 77 xfrm_spi_hash(struct net *net, const xfrm_address_t *daddr, 84 78 __be32 spi, u8 proto, unsigned short family) 85 79 { 80 + lockdep_assert_held(&net->xfrm.xfrm_state_lock); 81 + 86 82 return __xfrm_spi_hash(daddr, spi, proto, family, net->xfrm.state_hmask); 87 83 } 88 84 89 85 static unsigned int xfrm_seq_hash(struct net *net, u32 seq) 90 86 { 87 + lockdep_assert_held(&net->xfrm.xfrm_state_lock); 88 + 91 89 return __xfrm_seq_hash(seq, net->xfrm.state_hmask); 92 90 } 93 91 ··· 1118 1108 x->props.family = tmpl->encap_family; 1119 1109 } 1120 1110 1121 - static struct xfrm_state *__xfrm_state_lookup_all(struct net *net, u32 mark, 1111 + struct xfrm_hash_state_ptrs { 1112 + const struct hlist_head *bydst; 1113 + const struct hlist_head *bysrc; 1114 + const struct hlist_head *byspi; 1115 + unsigned int hmask; 1116 + }; 1117 + 1118 + static void xfrm_hash_ptrs_get(const struct net *net, struct xfrm_hash_state_ptrs *ptrs) 1119 + { 1120 + unsigned int sequence; 1121 + 1122 + do { 1123 + sequence = read_seqcount_begin(&net->xfrm.xfrm_state_hash_generation); 1124 + 1125 + ptrs->bydst = xfrm_state_deref_check(net->xfrm.state_bydst, net); 1126 + ptrs->bysrc = xfrm_state_deref_check(net->xfrm.state_bysrc, net); 1127 + ptrs->byspi = xfrm_state_deref_check(net->xfrm.state_byspi, net); 1128 + ptrs->hmask = net->xfrm.state_hmask; 1129 + } while (read_seqcount_retry(&net->xfrm.xfrm_state_hash_generation, sequence)); 1130 + } 1131 + 1132 + static struct xfrm_state *__xfrm_state_lookup_all(const struct xfrm_hash_state_ptrs *state_ptrs, 1133 + u32 mark, 1122 1134 const xfrm_address_t *daddr, 1123 1135 __be32 spi, u8 proto, 1124 1136 unsigned short family, 1125 1137 struct xfrm_dev_offload *xdo) 1126 1138 { 1127 - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); 1139 + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); 1128 1140 struct xfrm_state *x; 1129 1141 1130 - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { 1142 + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { 1131 1143 #ifdef CONFIG_XFRM_OFFLOAD 1132 1144 if (xdo->type == XFRM_DEV_OFFLOAD_PACKET) { 1133 1145 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ··· 1183 1151 return NULL; 1184 1152 } 1185 1153 1186 - static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, 1154 + static struct xfrm_state *__xfrm_state_lookup(const struct xfrm_hash_state_ptrs *state_ptrs, 1155 + u32 mark, 1187 1156 const xfrm_address_t *daddr, 1188 1157 __be32 spi, u8 proto, 1189 1158 unsigned short family) 1190 1159 { 1191 - unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); 1160 + unsigned int h = __xfrm_spi_hash(daddr, spi, proto, family, state_ptrs->hmask); 1192 1161 struct xfrm_state *x; 1193 1162 1194 - hlist_for_each_entry_rcu(x, net->xfrm.state_byspi + h, byspi) { 1163 + hlist_for_each_entry_rcu(x, state_ptrs->byspi + h, byspi) { 1195 1164 if (x->props.family != family || 1196 1165 x->id.spi != spi || 1197 1166 x->id.proto != proto || ··· 1214 1181 __be32 spi, u8 proto, 1215 1182 unsigned short family) 1216 1183 { 1184 + struct xfrm_hash_state_ptrs state_ptrs; 1217 1185 struct hlist_head *state_cache_input; 1218 1186 struct xfrm_state *x = NULL; 1219 - int cpu = get_cpu(); 1220 1187 1221 - state_cache_input = per_cpu_ptr(net->xfrm.state_cache_input, cpu); 1188 + state_cache_input = raw_cpu_ptr(net->xfrm.state_cache_input); 1222 1189 1223 1190 rcu_read_lock(); 1224 1191 hlist_for_each_entry_rcu(x, state_cache_input, state_cache_input) { ··· 1235 1202 goto out; 1236 1203 } 1237 1204 1238 - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); 1205 + xfrm_hash_ptrs_get(net, &state_ptrs); 1206 + 1207 + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); 1239 1208 1240 1209 if (x && x->km.state == XFRM_STATE_VALID) { 1241 1210 spin_lock_bh(&net->xfrm.xfrm_state_lock); ··· 1252 1217 1253 1218 out: 1254 1219 rcu_read_unlock(); 1255 - put_cpu(); 1256 1220 return x; 1257 1221 } 1258 1222 EXPORT_SYMBOL(xfrm_input_state_lookup); 1259 1223 1260 - static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, 1224 + static struct xfrm_state *__xfrm_state_lookup_byaddr(const struct xfrm_hash_state_ptrs *state_ptrs, 1225 + u32 mark, 1261 1226 const xfrm_address_t *daddr, 1262 1227 const xfrm_address_t *saddr, 1263 1228 u8 proto, unsigned short family) 1264 1229 { 1265 - unsigned int h = xfrm_src_hash(net, daddr, saddr, family); 1230 + unsigned int h = __xfrm_src_hash(daddr, saddr, family, state_ptrs->hmask); 1266 1231 struct xfrm_state *x; 1267 1232 1268 - hlist_for_each_entry_rcu(x, net->xfrm.state_bysrc + h, bysrc) { 1233 + hlist_for_each_entry_rcu(x, state_ptrs->bysrc + h, bysrc) { 1269 1234 if (x->props.family != family || 1270 1235 x->id.proto != proto || 1271 1236 !xfrm_addr_equal(&x->id.daddr, daddr, family) || ··· 1285 1250 static inline struct xfrm_state * 1286 1251 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) 1287 1252 { 1253 + struct xfrm_hash_state_ptrs state_ptrs; 1288 1254 struct net *net = xs_net(x); 1289 1255 u32 mark = x->mark.v & x->mark.m; 1290 1256 1257 + xfrm_hash_ptrs_get(net, &state_ptrs); 1258 + 1291 1259 if (use_spi) 1292 - return __xfrm_state_lookup(net, mark, &x->id.daddr, 1260 + return __xfrm_state_lookup(&state_ptrs, mark, &x->id.daddr, 1293 1261 x->id.spi, x->id.proto, family); 1294 1262 else 1295 - return __xfrm_state_lookup_byaddr(net, mark, 1263 + return __xfrm_state_lookup_byaddr(&state_ptrs, mark, 1296 1264 &x->id.daddr, 1297 1265 &x->props.saddr, 1298 1266 x->id.proto, family); ··· 1369 1331 unsigned short family, u32 if_id) 1370 1332 { 1371 1333 static xfrm_address_t saddr_wildcard = { }; 1334 + struct xfrm_hash_state_ptrs state_ptrs; 1372 1335 struct net *net = xp_net(pol); 1373 1336 unsigned int h, h_wildcard; 1374 1337 struct xfrm_state *x, *x0, *to_put; ··· 1434 1395 else if (acquire_in_progress) /* XXX: acquire_in_progress should not happen */ 1435 1396 WARN_ON(1); 1436 1397 1437 - h = xfrm_dst_hash(net, daddr, saddr, tmpl->reqid, encap_family); 1438 - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h, bydst) { 1398 + xfrm_hash_ptrs_get(net, &state_ptrs); 1399 + 1400 + h = __xfrm_dst_hash(daddr, saddr, tmpl->reqid, encap_family, state_ptrs.hmask); 1401 + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h, bydst) { 1439 1402 #ifdef CONFIG_XFRM_OFFLOAD 1440 1403 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { 1441 1404 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ··· 1470 1429 if (best || acquire_in_progress) 1471 1430 goto found; 1472 1431 1473 - h_wildcard = xfrm_dst_hash(net, daddr, &saddr_wildcard, tmpl->reqid, encap_family); 1474 - hlist_for_each_entry_rcu(x, net->xfrm.state_bydst + h_wildcard, bydst) { 1432 + h_wildcard = __xfrm_dst_hash(daddr, &saddr_wildcard, tmpl->reqid, 1433 + encap_family, state_ptrs.hmask); 1434 + hlist_for_each_entry_rcu(x, state_ptrs.bydst + h_wildcard, bydst) { 1475 1435 #ifdef CONFIG_XFRM_OFFLOAD 1476 1436 if (pol->xdo.type == XFRM_DEV_OFFLOAD_PACKET) { 1477 1437 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) ··· 1510 1468 1511 1469 if (!x && !error && !acquire_in_progress) { 1512 1470 if (tmpl->id.spi && 1513 - (x0 = __xfrm_state_lookup_all(net, mark, daddr, 1471 + (x0 = __xfrm_state_lookup_all(&state_ptrs, mark, daddr, 1514 1472 tmpl->id.spi, tmpl->id.proto, 1515 1473 encap_family, 1516 1474 &pol->xdo)) != NULL) { ··· 2295 2253 xfrm_state_lookup(struct net *net, u32 mark, const xfrm_address_t *daddr, __be32 spi, 2296 2254 u8 proto, unsigned short family) 2297 2255 { 2256 + struct xfrm_hash_state_ptrs state_ptrs; 2298 2257 struct xfrm_state *x; 2299 2258 2300 2259 rcu_read_lock(); 2301 - x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); 2260 + xfrm_hash_ptrs_get(net, &state_ptrs); 2261 + 2262 + x = __xfrm_state_lookup(&state_ptrs, mark, daddr, spi, proto, family); 2302 2263 rcu_read_unlock(); 2303 2264 return x; 2304 2265 } ··· 2312 2267 const xfrm_address_t *daddr, const xfrm_address_t *saddr, 2313 2268 u8 proto, unsigned short family) 2314 2269 { 2270 + struct xfrm_hash_state_ptrs state_ptrs; 2315 2271 struct xfrm_state *x; 2316 2272 2317 2273 spin_lock_bh(&net->xfrm.xfrm_state_lock); 2318 - x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); 2274 + 2275 + xfrm_hash_ptrs_get(net, &state_ptrs); 2276 + 2277 + x = __xfrm_state_lookup_byaddr(&state_ptrs, mark, daddr, saddr, proto, family); 2319 2278 spin_unlock_bh(&net->xfrm.xfrm_state_lock); 2320 2279 return x; 2321 2280 }