aboutsummaryrefslogtreecommitdiffstats
diff options
-rw-r--r--queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch130
-rw-r--r--queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch158
-rw-r--r--queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch81
-rw-r--r--queue-5.10/series3
4 files changed, 372 insertions, 0 deletions
diff --git a/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch b/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch
new file mode 100644
index 0000000000..f09a0b02bb
--- /dev/null
+++ b/queue-5.10/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch
@@ -0,0 +1,130 @@
+From ead7f9b8de65632ef8060b84b0c55049a33cfea1 Mon Sep 17 00:00:00 2001
+From: Paul Chaignon <paul.chaignon@gmail.com>
+Date: Thu, 29 May 2025 12:28:35 +0200
+Subject: bpf: Fix L4 csum update on IPv6 in CHECKSUM_COMPLETE
+
+From: Paul Chaignon <paul.chaignon@gmail.com>
+
+commit ead7f9b8de65632ef8060b84b0c55049a33cfea1 upstream.
+
+In Cilium, we use bpf_csum_diff + bpf_l4_csum_replace to, among other
+things, update the L4 checksum after reverse SNATing IPv6 packets. That
+use case is however not currently supported and leads to invalid
+skb->csum values in some cases. This patch adds support for IPv6 address
+changes in bpf_l4_csum_update via a new flag.
+
+When calling bpf_l4_csum_replace in Cilium, it ends up calling
+inet_proto_csum_replace_by_diff:
+
+ 1: void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+ 2: __wsum diff, bool pseudohdr)
+ 3: {
+ 4: if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ 5: csum_replace_by_diff(sum, diff);
+ 6: if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
+ 7: skb->csum = ~csum_sub(diff, skb->csum);
+ 8: } else if (pseudohdr) {
+ 9: *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+ 10: }
+ 11: }
+
+The bug happens when we're in the CHECKSUM_COMPLETE state. We've just
+updated one of the IPv6 addresses. The helper now updates the L4 header
+checksum on line 5. Next, it updates skb->csum on line 7. It shouldn't.
+
+For an IPv6 packet, the updates of the IPv6 address and of the L4
+checksum will cancel each other. The checksums are set such that
+computing a checksum over the packet including its checksum will result
+in a sum of 0. So the same is true here when we update the L4 checksum
+on line 5. We'll update it as to cancel the previous IPv6 address
+update. Hence skb->csum should remain untouched in this case.
+
+The same bug doesn't affect IPv4 packets because, in that case, three
+fields are updated: the IPv4 address, the IP checksum, and the L4
+checksum. The change to the IPv4 address and one of the checksums still
+cancel each other in skb->csum, but we're left with one checksum update
+and should therefore update skb->csum accordingly. That's exactly what
+inet_proto_csum_replace_by_diff does.
+
+This special case for IPv6 L4 checksums is also described atop
+inet_proto_csum_replace16, the function we should be using in this case.
+
+This patch introduces a new bpf_l4_csum_replace flag, BPF_F_IPV6,
+to indicate that we're updating the L4 checksum of an IPv6 packet. When
+the flag is set, inet_proto_csum_replace_by_diff will skip the
+skb->csum update.
+
+Fixes: 7d672345ed295 ("bpf: add generic bpf_csum_diff helper")
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/96a6bc3a443e6f0b21ff7b7834000e17fb549e05.1748509484.git.paul.chaignon@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Note: Fixed conflict due to unrelated comment change. ]
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/uapi/linux/bpf.h | 2 ++
+ net/core/filter.c | 5 +++--
+ tools/include/uapi/linux/bpf.h | 2 ++
+ 3 files changed, 7 insertions(+), 2 deletions(-)
+
+--- a/include/uapi/linux/bpf.h
++++ b/include/uapi/linux/bpf.h
+@@ -909,6 +909,7 @@ union bpf_attr {
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
++ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+@@ -3937,6 +3938,7 @@ enum {
+ BPF_F_PSEUDO_HDR = (1ULL << 4),
+ BPF_F_MARK_MANGLED_0 = (1ULL << 5),
+ BPF_F_MARK_ENFORCE = (1ULL << 6),
++ BPF_F_IPV6 = (1ULL << 7),
+ };
+
+ /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1953,10 +1953,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
+ bool is_pseudo = flags & BPF_F_PSEUDO_HDR;
+ bool is_mmzero = flags & BPF_F_MARK_MANGLED_0;
+ bool do_mforce = flags & BPF_F_MARK_ENFORCE;
++ bool is_ipv6 = flags & BPF_F_IPV6;
+ __sum16 *ptr;
+
+ if (unlikely(flags & ~(BPF_F_MARK_MANGLED_0 | BPF_F_MARK_ENFORCE |
+- BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK)))
++ BPF_F_PSEUDO_HDR | BPF_F_HDR_FIELD_MASK | BPF_F_IPV6)))
+ return -EINVAL;
+ if (unlikely(offset > 0xffff || offset & 1))
+ return -EFAULT;
+@@ -1972,7 +1973,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false);
++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, is_ipv6);
+ break;
+ case 2:
+ inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+--- a/tools/include/uapi/linux/bpf.h
++++ b/tools/include/uapi/linux/bpf.h
+@@ -909,6 +909,7 @@ union bpf_attr {
+ * for updates resulting in a null checksum the value is set to
+ * **CSUM_MANGLED_0** instead. Flag **BPF_F_PSEUDO_HDR** indicates
+ * the checksum is to be computed against a pseudo-header.
++ * Flag **BPF_F_IPV6** should be set for IPv6 packets.
+ *
+ * This helper works in combination with **bpf_csum_diff**\ (),
+ * which does not update the checksum in-place, but offers more
+@@ -3937,6 +3938,7 @@ enum {
+ BPF_F_PSEUDO_HDR = (1ULL << 4),
+ BPF_F_MARK_MANGLED_0 = (1ULL << 5),
+ BPF_F_MARK_ENFORCE = (1ULL << 6),
++ BPF_F_IPV6 = (1ULL << 7),
+ };
+
+ /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */
diff --git a/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch b/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch
new file mode 100644
index 0000000000..408a53c58e
--- /dev/null
+++ b/queue-5.10/net-fix-checksum-update-for-ila-adj-transport.patch
@@ -0,0 +1,158 @@
+From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001
+From: Paul Chaignon <paul.chaignon@gmail.com>
+Date: Thu, 29 May 2025 12:28:05 +0200
+Subject: net: Fix checksum update for ILA adj-transport
+
+From: Paul Chaignon <paul.chaignon@gmail.com>
+
+commit 6043b794c7668c19dabc4a93c75b924a19474d59 upstream.
+
+During ILA address translations, the L4 checksums can be handled in
+different ways. One of them, adj-transport, consist in parsing the
+transport layer and updating any found checksum. This logic relies on
+inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when
+in state CHECKSUM_COMPLETE.
+
+This bug can be reproduced with a simple ILA to SIR mapping, assuming
+packets are received with CHECKSUM_COMPLETE:
+
+ $ ip a show dev eth0
+ 14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000
+ link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0
+ inet6 3333:0:0:1::c078/64 scope global
+ valid_lft forever preferred_lft forever
+ inet6 fd00:10:244:1::c078/128 scope global nodad
+ valid_lft forever preferred_lft forever
+ inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll
+ valid_lft forever preferred_lft forever
+ $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \
+ csum-mode adj-transport ident-type luid dev eth0
+
+Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on
+[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with
+SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed
+skb->csum. The translation and drop are visible on pwru [1] traces:
+
+ IFACE TUPLE FUNC
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow
+ eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM)
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head
+ eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem
+
+This is happening because inet_proto_csum_replace_by_diff is updating
+skb->csum when it shouldn't. The L4 checksum is updated such that it
+"cancels" the IPv6 address change in terms of checksum computation, so
+the impact on skb->csum is null.
+
+Note this would be different for an IPv4 packet since three fields
+would be updated: the IPv4 address, the IP checksum, and the L4
+checksum. Two would cancel each other and skb->csum would still need
+to be updated to take the L4 checksum change into account.
+
+This patch fixes it by passing an ipv6 flag to
+inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're
+in the IPv6 case. Note the behavior of the only other user of
+inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in
+this patch and fixed in the subsequent patch.
+
+With the fix, using the reproduction from above, I can confirm
+skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP
+SYN proceeds to the application after the ILA translation.
+
+Link: https://github.com/cilium/pwru [1]
+Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module")
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Acked-by: Daniel Borkmann <daniel@iogearbox.net>
+Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+[ Fixed conflict due to unrelated change in inet_proto_csum_replace_by_diff. ]
+Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ include/net/checksum.h | 2 +-
+ net/core/filter.c | 2 +-
+ net/core/utils.c | 4 ++--
+ net/ipv6/ila/ila_common.c | 6 +++---
+ 4 files changed, 7 insertions(+), 7 deletions(-)
+
+--- a/include/net/checksum.h
++++ b/include/net/checksum.h
+@@ -152,7 +152,7 @@ void inet_proto_csum_replace16(__sum16 *
+ const __be32 *from, const __be32 *to,
+ bool pseudohdr);
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+- __wsum diff, bool pseudohdr);
++ __wsum diff, bool pseudohdr, bool ipv6);
+
+ static __always_inline
+ void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb,
+--- a/net/core/filter.c
++++ b/net/core/filter.c
+@@ -1972,7 +1972,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s
+ if (unlikely(from != 0))
+ return -EINVAL;
+
+- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo);
++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false);
+ break;
+ case 2:
+ inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo);
+--- a/net/core/utils.c
++++ b/net/core/utils.c
+@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 *
+ EXPORT_SYMBOL(inet_proto_csum_replace16);
+
+ void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb,
+- __wsum diff, bool pseudohdr)
++ __wsum diff, bool pseudohdr, bool ipv6)
+ {
+ if (skb->ip_summed != CHECKSUM_PARTIAL) {
+ *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum)));
+- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr)
++ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6)
+ skb->csum = ~csum_add(diff, ~skb->csum);
+ } else if (pseudohdr) {
+ *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum)));
+--- a/net/ipv6/ila/ila_common.c
++++ b/net/ipv6/ila/ila_common.c
+@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(st
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&th->check, skb,
+- diff, true);
++ diff, true, true);
+ }
+ break;
+ case NEXTHDR_UDP:
+@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(st
+ if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) {
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&uh->check, skb,
+- diff, true);
++ diff, true, true);
+ if (!uh->check)
+ uh->check = CSUM_MANGLED_0;
+ }
+@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(st
+
+ diff = get_csum_diff(ip6h, p);
+ inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb,
+- diff, true);
++ diff, true, true);
+ }
+ break;
+ }
diff --git a/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch b/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch
new file mode 100644
index 0000000000..04db0617a7
--- /dev/null
+++ b/queue-5.10/net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch
@@ -0,0 +1,81 @@
+From farbere@amazon.com Mon Jun 23 10:41:14 2025
+From: Eliav Farber <farbere@amazon.com>
+Date: Mon, 9 Jun 2025 04:32:59 +0000
+Subject: net/ipv4: fix type mismatch in inet_ehash_locks_alloc() causing build failure
+To: <davem@davemloft.net>, <kuznet@ms2.inr.ac.ru>, <yoshfuji@linux-ipv6.org>, <kuba@kernel.org>, <kuniyu@amazon.com>, <sashal@kernel.org>, <edumazet@google.com>, <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>
+Cc: <stable@vger.kernel.org>, <farbere@amazon.com>
+Message-ID: <20250609043259.10772-1-farbere@amazon.com>
+
+From: Eliav Farber <farbere@amazon.com>
+
+Fix compilation warning:
+
+In file included from ./include/linux/kernel.h:15,
+ from ./include/linux/list.h:9,
+ from ./include/linux/module.h:12,
+ from net/ipv4/inet_hashtables.c:12:
+net/ipv4/inet_hashtables.c: In function ‘inet_ehash_locks_alloc’:
+./include/linux/minmax.h:20:35: warning: comparison of distinct pointer types lacks a cast
+ 20 | (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
+ | ^~
+./include/linux/minmax.h:26:18: note: in expansion of macro ‘__typecheck’
+ 26 | (__typecheck(x, y) && __no_side_effects(x, y))
+ | ^~~~~~~~~~~
+./include/linux/minmax.h:36:31: note: in expansion of macro ‘__safe_cmp’
+ 36 | __builtin_choose_expr(__safe_cmp(x, y), \
+ | ^~~~~~~~~~
+./include/linux/minmax.h:52:25: note: in expansion of macro ‘__careful_cmp’
+ 52 | #define max(x, y) __careful_cmp(x, y, >)
+ | ^~~~~~~~~~~~~
+net/ipv4/inet_hashtables.c:946:19: note: in expansion of macro ‘max’
+ 946 | nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz);
+ | ^~~
+ CC block/badblocks.o
+
+When warnings are treated as errors, this causes the build to fail.
+
+The issue is a type mismatch between the operands passed to the max()
+macro. Here, nblocks is an unsigned int, while the expression
+num_online_nodes() * PAGE_SIZE / locksz is promoted to unsigned long.
+
+This happens because:
+ - num_online_nodes() returns int
+ - PAGE_SIZE is typically defined as an unsigned long (depending on the
+ architecture)
+ - locksz is unsigned int
+
+The resulting arithmetic expression is promoted to unsigned long.
+
+Thus, the max() macro compares values of different types: unsigned int
+vs unsigned long.
+
+This issue was introduced in commit f8ece40786c9 ("tcp: bring back NUMA
+dispersion in inet_ehash_locks_alloc()") during the update from kernel
+v5.10.237 to v5.10.238.
+
+It does not exist in newer kernel branches (e.g., v5.15.185 and all 6.x
+branches), because they include commit d03eba99f5bf ("minmax: allow
+min()/max()/clamp() if the arguments have the same signedness.")
+
+Fix the issue by using max_t(unsigned int, ...) to explicitly cast both
+operands to the same type, avoiding the type mismatch and ensuring
+correctness.
+
+Fixes: f8ece40786c9 ("tcp: bring back NUMA dispersion in inet_ehash_locks_alloc()")
+Signed-off-by: Eliav Farber <farbere@amazon.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/ipv4/inet_hashtables.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -943,7 +943,7 @@ int inet_ehash_locks_alloc(struct inet_h
+ nblocks = max(2U * L1_CACHE_BYTES / locksz, 1U) * num_possible_cpus();
+
+ /* At least one page per NUMA node. */
+- nblocks = max(nblocks, num_online_nodes() * PAGE_SIZE / locksz);
++ nblocks = max_t(unsigned int, nblocks, num_online_nodes() * PAGE_SIZE / locksz);
+
+ nblocks = roundup_pow_of_two(nblocks);
+
diff --git a/queue-5.10/series b/queue-5.10/series
index 97abd26dc4..d1a56357ae 100644
--- a/queue-5.10/series
+++ b/queue-5.10/series
@@ -334,3 +334,6 @@ arm64-spectre-increase-parameters-that-can-be-used-to-turn-off-bhb-mitigation-in
arm64-bpf-add-bhb-mitigation-to-the-epilogue-for-cbpf-programs.patch
arm64-bpf-only-mitigate-cbpf-programs-loaded-by-unprivileged-users.patch
arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch
+net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch
+net-fix-checksum-update-for-ila-adj-transport.patch
+bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch