diff options
4 files changed, 372 insertions, 0 deletions
diff --git a/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch b/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch new file mode 100644 index 0000000000..f09a0b02bb --- /dev/null +++ b/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch @@ -0,0 +1,130 @@ +From ead7f9b8de65632ef8060b84b0c55049a33cfea1 Mon Sep 17 00:00:00 2001 +From: Paul Chaignon <paul.chaignon@gmail.com> +Date: Thu, 29 May 2025 12:28:35 +0200 +Subject: bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE + +From: Paul Chaignon <paul.chaignon@gmail.com> + +commit ead7f9b8de65632ef8060b84b0c55049a33cfea1 upstream. + +In Cilium, we use bpf_csum_diff + bpf_l4_csum_replace to, among other +things, update the L4 checksum after reverse SNATing IPv6 packets. That +use case is however not currently supported and leads to invalid +skb->csum values in some cases. This patch adds support for IPv6 address +changes in bpf_l4_csum_update via a new flag. + +When calling bpf_l4_csum_replace in Cilium, it ends up calling +inet_proto_csum_replace_by_diff: + + 1: void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, + 2: __wsum diff, bool pseudohdr) + 3: { + 4: if (skb->ip_summed != CHECKSUM_PARTIAL) { + 5: csum_replace_by_diff(sum, diff); + 6: if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) + 7: skb->csum = ~csum_sub(diff, skb->csum); + 8: } else if (pseudohdr) { + 9: *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); + 10: } + 11: } + +The bug happens when we're in the CHECKSUM_COMPLETE state. We've just +updated one of the IPv6 addresses. The helper now updates the L4 header +checksum on line 5. Next, it updates skb->csum on line 7. It shouldn't. + +For an IPv6 packet, the updates of the IPv6 address and of the L4 +checksum will cancel each other. The checksums are set such that +computing a checksum over the packet including its checksum will result +in a sum of 0. So the same is true here when we update the L4 checksum +on line 5. We'll update it as to cancel the previous IPv6 address +update. Hence skb->csum should remain untouched in this case. + +The same bug doesn't affect IPv4 packets because, in that case, three +fields are updated: the IPv4 address, the IP checksum, and the L4 +checksum. The change to the IPv4 address and one of the checksums still +cancel each other in skb->csum, but we're left with one checksum update +and should therefore update skb->csum accordingly. That's exactly what +inet_proto_csum_replace_by_diff does. + +This special case for IPv6 L4 checksums is also described atop +inet_proto_csum_replace16, the function we should be using in this case. + +This patch introduces a new bpf_l4_csum_replace flag, BPF_F_IPV6, +to indicate that we're updating the L4 checksum of an IPv6 packet. When +the flag is set, inet_proto_csum_replace_by_diff will skip the +skb->csum update. + +Fixes: 7d672345ed295 ("bpf: add generic bpf_csum_diff helper") +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Link: https://patch.msgid.link/96a6bc3a443e6f0b21ff7b7834000e17fb549e05.1748509484.git.paul.chaignon@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +[ Note: Fixed conflict due to unrelated comment change. ] +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/uapi/linux/bpf.h | 2 ++ + net/core/filter.c | 5 +++-- + tools/include/uapi/linux/bpf.h | 2 ++ + 3 files changed, 7 insertions(+), 2 deletions(-) + +--- a/include/uapi/linux/bpf.h ++++ b/include/uapi/linux/bpf.h +@@ -909,6 +909,7 @@ union bpf_attr { + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. ++ * Flag **BPF_F_IPV6** should be set for IPv6 packets. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more +@@ -3937,6 +3938,7 @@ enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), ++ BPF_F_IPV6 = (1ULL << 7), + }; + + /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1953,10 +1953,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s + bool is_pseudo = flags & BPF_F_PSEUDO_HDR; + bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; + bool do_mforce = flags & BPF_F_MARK_ENFORCE; ++ bool is_ipv6 = flags & BPF_F_IPV6; + __sum16 *ptr; + + if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE | +- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK))) ++ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6))) + return -EINVAL; + if (unlikely(offset > 0xffff || offset & 1)) + return -EFAULT; +@@ -1972,7 +1973,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s + if (unlikely(from != 0)) + return -EINVAL; + +- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); ++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6); + break; + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); +--- a/tools/include/uapi/linux/bpf.h ++++ b/tools/include/uapi/linux/bpf.h +@@ -909,6 +909,7 @@ union bpf_attr { + * for updates resulting in a null checksum the value is set to + * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates + * the checksum is to be computed against a pseudo-header. ++ * Flag **BPF_F_IPV6** should be set for IPv6 packets. + * + * This helper works in combination with **bpf_csum_diff**\ (), + * which does not update the checksum in-place, but offers more +@@ -3937,6 +3938,7 @@ enum { + BPF_F_PSEUDO_HDR = (1ULL << 4), + BPF_F_MARK_MANGLED_0 = (1ULL << 5), + BPF_F_MARK_ENFORCE = (1ULL << 6), ++ BPF_F_IPV6 = (1ULL << 7), + }; + + /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ diff --git a/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch b/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch new file mode 100644 index 0000000000..408a53c58e --- /dev/null +++ b/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch @@ -0,0 +1,158 @@ +From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001 +From: Paul Chaignon <paul.chaignon@gmail.com> +Date: Thu, 29 May 2025 12:28:05 +0200 +Subject: net: Fix checksum update for ILA adj-transport + +From: Paul Chaignon <paul.chaignon@gmail.com> + +commit 6043b794c7668c19dabc4a93c75b924a19474d59 upstream. + +During ILA address translations, the L4 checksums can be handled in +different ways. One of them, adj-transport, consist in parsing the +transport layer and updating any found checksum. This logic relies on +inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when +in state CHECKSUM_COMPLETE. + +This bug can be reproduced with a simple ILA to SIR mapping, assuming +packets are received with CHECKSUM_COMPLETE: + + $ ip a show dev eth0 + 14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet6 3333:0:0:1::c078/64 scope global + valid_lft forever preferred_lft forever + inet6 fd00:10:244:1::c078/128 scope global nodad + valid_lft forever preferred_lft forever + inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll + valid_lft forever preferred_lft forever + $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \ + csum-mode adj-transport ident-type luid dev eth0 + +Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on +[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with +SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed +skb->csum. The translation and drop are visible on pwru [1] traces: + + IFACE TUPLE FUNC + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM) + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem + +This is happening because inet_proto_csum_replace_by_diff is updating +skb->csum when it shouldn't. The L4 checksum is updated such that it +"cancels" the IPv6 address change in terms of checksum computation, so +the impact on skb->csum is null. + +Note this would be different for an IPv4 packet since three fields +would be updated: the IPv4 address, the IP checksum, and the L4 +checksum. Two would cancel each other and skb->csum would still need +to be updated to take the L4 checksum change into account. + +This patch fixes it by passing an ipv6 flag to +inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're +in the IPv6 case. Note the behavior of the only other user of +inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in +this patch and fixed in the subsequent patch. + +With the fix, using the reproduction from above, I can confirm +skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP +SYN proceeds to the application after the ILA translation. + +Link: https://github.com/cilium/pwru [1] +Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +[ Fixed conflict due to unrelated change in inet_proto_csum_replace_by_diff. ] +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/net/checksum.h | 2 +- + net/core/filter.c | 2 +- + net/core/utils.c | 4 ++-- + net/ipv6/ila/ila_common.c | 6 +++--- + 4 files changed, 7 insertions(+), 7 deletions(-) + +--- a/include/net/checksum.h ++++ b/include/net/checksum.h +@@ -152,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 * + const __be32 *from, const __be32 *to, + bool pseudohdr); + void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, +- __wsum diff, bool pseudohdr); ++ __wsum diff, bool pseudohdr, bool ipv6); + + static __always_inline + void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1972,7 +1972,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s + if (unlikely(from != 0)) + return -EINVAL; + +- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); ++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); + break; + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); +--- a/net/core/utils.c ++++ b/net/core/utils.c +@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 * + EXPORT_SYMBOL(inet_proto_csum_replace16); + + void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, +- __wsum diff, bool pseudohdr) ++ __wsum diff, bool pseudohdr, bool ipv6) + { + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); +- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) ++ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6) + skb->csum = ~csum_add(diff, ~skb->csum); + } else if (pseudohdr) { + *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); +--- a/net/ipv6/ila/ila_common.c ++++ b/net/ipv6/ila/ila_common.c +@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(st + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, +- diff, true); ++ diff, true, true); + } + break; + case NEXTHDR_UDP: +@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(st + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, +- diff, true); ++ diff, true, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } +@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(st + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, +- diff, true); ++ diff, true, true); + } + break; + } diff --git a/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch b/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch new file mode 100644 index 0000000000..04db0617a7 --- /dev/null +++ b/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch @@ -0,0 +1,81 @@ +From farbere@amazon.com Mon Jun 23 10:41:14 2025 +From: Eliav Farber <farbere@amazon.com> +Date: Mon, 9 Jun 2025 04:32:59 +0000 +Subject: net/ipv4: fix type mismatch in inet_ehash_locks_alloc() causing build failure +To: <davem@davemloft.net>, <kuznet@ms2.inr.ac.ru>, <yoshfuji@linux-ipv6.org>, <kuba@kernel.org>, <kuniyu@amazon.com>, <sashal@kernel.org>, <edumazet@google.com>, <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org> +Cc: <stable@vger.kernel.org>, <farbere@amazon.com> +Message-ID: <20250609043259.10772-1-farbere@amazon.com> + +From: Eliav Farber <farbere@amazon.com> + +Fix compilation warning: + +In file included from ./include/linux/kernel.h:15, + from ./include/linux/list.h:9, + from ./include/linux/module.h:12, + from net/ipv4/inet_hashtables.c:12: +net/ipv4/inet_hashtables.c: In function ‘inet_ehash_locks_alloc’: +./include/linux/minmax.h:20:35: warning: comparison of distinct pointer types lacks a cast + 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + | ^~ +./include/linux/minmax.h:26:18: note: in expansion of macro ‘__typecheck’ + 26 | (__typecheck(x, y) && __no_side_effects(x, y)) + | ^~~~~~~~~~~ +./include/linux/minmax.h:36:31: note: in expansion of macro ‘__safe_cmp’ + 36 | __builtin_choose_expr(__safe_cmp(x, y), \ + | ^~~~~~~~~~ +./include/linux/minmax.h:52:25: note: in expansion of macro ‘__careful_cmp’ + 52 | #define max(x, y) __careful_cmp(x, y, >) + | ^~~~~~~~~~~~~ +net/ipv4/inet_hashtables.c:946:19: note: in expansion of macro ‘max’ + 946 | nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz); + | ^~~ + CC block/badblocks.o + +When warnings are treated as errors, this causes the build to fail. + +The issue is a type mismatch between the operands passed to the max() +macro. Here, nblocks is an unsigned int, while the expression +num_online_nodes() * PAGE_SIZE / locksz is promoted to unsigned long. + +This happens because: + - num_online_nodes() returns int + - PAGE_SIZE is typically defined as an unsigned long (depending on the + architecture) + - locksz is unsigned int + +The resulting arithmetic expression is promoted to unsigned long. + +Thus, the max() macro compares values of different types: unsigned int +vs unsigned long. + +This issue was introduced in commit f8ece40786c9 ("tcp: bring back NUMA +dispersion in inet_ehash_locks_alloc()") during the update from kernel +v5.10.237 to v5.10.238. + +It does not exist in newer kernel branches (e.g., v5.15.185 and all 6.x +branches), because they include commit d03eba99f5bf ("minmax: allow +min()/max()/clamp() if the arguments have the same signedness.") + +Fix the issue by using max_t(unsigned int, ...) to explicitly cast both +operands to the same type, avoiding the type mismatch and ensuring +correctness. + +Fixes: f8ece40786c9 ("tcp: bring back NUMA dispersion in inet_ehash_locks_alloc()") +Signed-off-by: Eliav Farber <farbere@amazon.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/ipv4/inet_hashtables.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/net/ipv4/inet_hashtables.c ++++ b/net/ipv4/inet_hashtables.c +@@ -943,7 +943,7 @@ int inet_ehash_locks_alloc(struct inet_h + nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus(); + + /* At least one page per NUMA node. */ +- nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz); ++ nblocks = max_t(unsigned int, nblocks, num_online_nodes() * PAGE_SIZE / locksz); + + nblocks = roundup_pow_of_two(nblocks); + diff --git a/queue-5.10/series b/queue-5.10/series index 97abd26dc4..d1a56357ae 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -334,3 +334,6 @@ arm64-spectre-increase-parameters-that-can-be-used-to-turn-off-bhb-mitigation-in arm64-bpf-add-bhb-mitigation-to-the-epilogue-for-cbpf-programs.patch arm64-bpf-only-mitigate-cbpf-programs-loaded-by-unprivileged-users.patch arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch +net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch +net-fix-checksum-update-for-ila-adj-transport.patch +bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch |