diff options
12 files changed, 997 insertions, 14 deletions
diff --git a/queue-5.15/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch b/queue-5.15/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch index b965b30ad0..e0eebe7f6d 100644 --- a/queue-5.15/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch +++ b/queue-5.15/bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch @@ -63,13 +63,11 @@ Signed-off-by: Jakub Kicinski <kuba@kernel.org> Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> --- - include/uapi/linux/bpf.h | 2 ++ - net/core/filter.c | 5 +++-- - tools/include/uapi/linux/bpf.h | 2 ++ + include/uapi/linux/bpf.h | 2 ++ + net/core/filter.c | 5 +++-- + tools/include/uapi/linux/bpf.h | 2 ++ 3 files changed, 7 insertions(+), 2 deletions(-) -diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h -index 0bdeeabbc5a8..2ac62d5ed466 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1695,6 +1695,7 @@ union bpf_attr { @@ -88,11 +86,9 @@ index 0bdeeabbc5a8..2ac62d5ed466 100644 }; /* BPF_FUNC_skb_set_tunnel_key and BPF_FUNC_skb_get_tunnel_key flags. */ -diff --git a/net/core/filter.c b/net/core/filter.c -index 65b7fb9c3d29..169d9ba4e7a0 100644 --- a/net/core/filter.c +++ b/net/core/filter.c -@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, +@@ -1951,10 +1951,11 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s bool is_pseudo = flags & BPF_F_PSEUDO_HDR; bool is_mmzero = flags & BPF_F_MARK_MANGLED_0; bool do_mforce = flags & BPF_F_MARK_ENFORCE; @@ -105,7 +101,7 @@ index 65b7fb9c3d29..169d9ba4e7a0 100644 return -EINVAL; if (unlikely(offset > 0xffff || offset & 1)) return -EFAULT; -@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct sk_buff *, skb, u32, offset, +@@ -1970,7 +1971,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s if (unlikely(from != 0)) return -EINVAL; @@ -114,8 +110,6 @@ index 65b7fb9c3d29..169d9ba4e7a0 100644 break; case 2: inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); -diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h -index 54b8c899d21c..fe70f9ce8b00 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1695,6 +1695,7 @@ union bpf_attr { @@ -134,6 +128,3 @@ index 54b8c899d21c..fe70f9ce8b00 100644 }; /* BPF_FUNC_clone_redirect and BPF_FUNC_redirect flags. */ --- -2.43.0 - diff --git a/queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch b/queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch new file mode 100644 index 0000000000..169bc4c67c --- /dev/null +++ b/queue-5.15/ext4-avoid-remount-errors-with-abort-mount-option.patch @@ -0,0 +1,62 @@ +From 76486b104168ae59703190566e372badf433314b Mon Sep 17 00:00:00 2001 +From: Jan Kara <jack@suse.cz> +Date: Sat, 5 Oct 2024 00:15:56 +0200 +Subject: ext4: avoid remount errors with 'abort' mount option + +From: Jan Kara <jack@suse.cz> + +commit 76486b104168ae59703190566e372badf433314b upstream. + +When we remount filesystem with 'abort' mount option while changing +other mount options as well (as is LTP test doing), we can return error +from the system call after commit d3476f3dad4a ("ext4: don't set +SB_RDONLY after filesystem errors") because the application of mount +option changes detects shutdown filesystem and refuses to do anything. +The behavior of application of other mount options in presence of +'abort' mount option is currently rather arbitary as some mount option +changes are handled before 'abort' and some after it. + +Move aborting of the filesystem to the end of remount handling so all +requested changes are properly applied before the filesystem is shutdown +to have a reasonably consistent behavior. + +Fixes: d3476f3dad4a ("ext4: don't set SB_RDONLY after filesystem errors") +Reported-by: Jan Stancek <jstancek@redhat.com> +Link: https://lore.kernel.org/all/Zvp6L+oFnfASaoHl@t14s +Signed-off-by: Jan Kara <jack@suse.cz> +Tested-by: Jan Stancek <jstancek@redhat.com> +Link: https://patch.msgid.link/20241004221556.19222-1-jack@suse.cz +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Signed-off-by: Amir Goldstein <amir73il@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/ext4/super.c | 11 ++++++++--- + 1 file changed, 8 insertions(+), 3 deletions(-) + +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -5849,9 +5849,6 @@ static int ext4_remount(struct super_blo + goto restore_opts; + } + +- if (test_opt2(sb, ABORT)) +- ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); +- + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | + (test_opt(sb, POSIX_ACL) ? SB_POSIXACL : 0); + +@@ -6027,6 +6024,14 @@ static int ext4_remount(struct super_blo + */ + *flags = (*flags & ~vfs_flags) | (sb->s_flags & vfs_flags); + ++ /* ++ * Handle aborting the filesystem as the last thing during remount to ++ * avoid obsure errors during remount when some option changes fail to ++ * apply due to shutdown filesystem. ++ */ ++ if (test_opt2(sb, ABORT)) ++ ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); ++ + ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s. Quota mode: %s.", + orig_data, ext4_quota_mode(sb)); + kfree(orig_data); diff --git a/queue-5.15/ext4-make-abort-mount-option-handling-standard.patch b/queue-5.15/ext4-make-abort-mount-option-handling-standard.patch new file mode 100644 index 0000000000..d24dba0058 --- /dev/null +++ b/queue-5.15/ext4-make-abort-mount-option-handling-standard.patch @@ -0,0 +1,66 @@ +From 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d Mon Sep 17 00:00:00 2001 +From: Jan Kara <jack@suse.cz> +Date: Fri, 16 Jun 2023 18:50:50 +0200 +Subject: ext4: make 'abort' mount option handling standard + +From: Jan Kara <jack@suse.cz> + +commit 22b8d707b07e6e06f50fe1d9ca8756e1f894eb0d upstream. + +'abort' mount option is the only mount option that has special handling +and sets a bit in sbi->s_mount_flags. There is not strong reason for +that so just simplify the code and make 'abort' set a bit in +sbi->s_mount_opt2 as any other mount option. This simplifies the code +and will allow us to drop EXT4_MF_FS_ABORTED completely in the following +patch. + +Signed-off-by: Jan Kara <jack@suse.cz> +Link: https://lore.kernel.org/r/20230616165109.21695-4-jack@suse.cz +Signed-off-by: Theodore Ts'o <tytso@mit.edu> +Stable-dep-of: 76486b104168 ("ext4: avoid remount errors with 'abort' mount option") +Signed-off-by: Amir Goldstein <amir73il@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/ext4/ext4.h | 1 + + fs/ext4/super.c | 6 ++---- + 2 files changed, 3 insertions(+), 4 deletions(-) + +--- a/fs/ext4/ext4.h ++++ b/fs/ext4/ext4.h +@@ -1255,6 +1255,7 @@ struct ext4_inode_info { + #define EXT4_MOUNT2_MB_OPTIMIZE_SCAN 0x00000080 /* Optimize group + * scanning in mballoc + */ ++#define EXT4_MOUNT2_ABORT 0x00000100 /* Abort filesystem */ + + #define clear_opt(sb, opt) EXT4_SB(sb)->s_mount_opt &= \ + ~EXT4_MOUNT_##opt +--- a/fs/ext4/super.c ++++ b/fs/ext4/super.c +@@ -2023,6 +2023,7 @@ static const struct mount_opts { + MOPT_SET | MOPT_2 | MOPT_EXT4_ONLY}, + {Opt_fc_debug_max_replay, 0, MOPT_GTE0}, + #endif ++ {Opt_abort, EXT4_MOUNT2_ABORT, MOPT_SET | MOPT_2}, + {Opt_err, 0, 0} + }; + +@@ -2143,9 +2144,6 @@ static int handle_mount_opt(struct super + case Opt_removed: + ext4_msg(sb, KERN_WARNING, "Ignoring removed %s option", opt); + return 1; +- case Opt_abort: +- ext4_set_mount_flag(sb, EXT4_MF_FS_ABORTED); +- return 1; + case Opt_i_version: + sb->s_flags |= SB_I_VERSION; + return 1; +@@ -5851,7 +5849,7 @@ static int ext4_remount(struct super_blo + goto restore_opts; + } + +- if (ext4_test_mount_flag(sb, EXT4_MF_FS_ABORTED)) ++ if (test_opt2(sb, ABORT)) + ext4_abort(sb, ESHUTDOWN, "Abort forced by user"); + + sb->s_flags = (sb->s_flags & ~SB_POSIXACL) | diff --git a/queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch b/queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch new file mode 100644 index 0000000000..2f9efce362 --- /dev/null +++ b/queue-5.15/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch @@ -0,0 +1,76 @@ +From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001 +From: Gavin Guo <gavinguo@igalia.com> +Date: Mon, 21 Apr 2025 19:35:36 +0800 +Subject: mm/huge_memory: fix dereferencing invalid pmd migration entry + +From: Gavin Guo <gavinguo@igalia.com> + +commit be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 upstream. + +When migrating a THP, concurrent access to the PMD migration entry during +a deferred split scan can lead to an invalid address access, as +illustrated below. To prevent this invalid access, it is necessary to +check the PMD migration entry and return early. In this context, there is +no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the +equality of the target folio. Since the PMD migration entry is locked, it +cannot be served as the target. + +Mailing list discussion and explanation from Hugh Dickins: "An anon_vma +lookup points to a location which may contain the folio of interest, but +might instead contain another folio: and weeding out those other folios is +precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of +replacing the wrong folio" comment a few lines above it) is for." + +BUG: unable to handle page fault for address: ffffea60001db008 +CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60 +Call Trace: +<TASK> +try_to_migrate_one+0x28c/0x3730 +rmap_walk_anon+0x4f6/0x770 +unmap_folio+0x196/0x1f0 +split_huge_page_to_list_to_order+0x9f6/0x1560 +deferred_split_scan+0xac5/0x12a0 +shrinker_debugfs_scan_write+0x376/0x470 +full_proxy_write+0x15c/0x220 +vfs_write+0x2fc/0xcb0 +ksys_write+0x146/0x250 +do_syscall_64+0x6a/0x120 +entry_SYSCALL_64_after_hwframe+0x76/0x7e + +The bug is found by syzkaller on an internal kernel, then confirmed on +upstream. + +Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com +Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/ +Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/ +Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") +Signed-off-by: Gavin Guo <gavinguo@igalia.com> +Acked-by: David Hildenbrand <david@redhat.com> +Acked-by: Hugh Dickins <hughd@google.com> +Acked-by: Zi Yan <ziy@nvidia.com> +Reviewed-by: Gavin Shan <gshan@redhat.com> +Cc: Florent Revest <revest@google.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Miaohe Lin <linmiaohe@huawei.com> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +[gavin: backport the migration checking logic to __split_huge_pmd] +Signed-off-by: Gavin Guo <gavinguo@igalia.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/huge_memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2161,7 +2161,7 @@ void __split_huge_pmd(struct vm_area_str + VM_BUG_ON(freeze && !page); + if (page) { + VM_WARN_ON_ONCE(!PageLocked(page)); +- if (page != pmd_page(*pmd)) ++ if (is_pmd_migration_entry(*pmd) || page != pmd_page(*pmd)) + goto out; + } + diff --git a/queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch b/queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch new file mode 100644 index 0000000000..e76ff7221c --- /dev/null +++ b/queue-5.15/net-fix-checksum-update-for-ila-adj-transport.patch @@ -0,0 +1,158 @@ +From 6043b794c7668c19dabc4a93c75b924a19474d59 Mon Sep 17 00:00:00 2001 +From: Paul Chaignon <paul.chaignon@gmail.com> +Date: Thu, 29 May 2025 12:28:05 +0200 +Subject: net: Fix checksum update for ILA adj-transport + +From: Paul Chaignon <paul.chaignon@gmail.com> + +commit 6043b794c7668c19dabc4a93c75b924a19474d59 upstream. + +During ILA address translations, the L4 checksums can be handled in +different ways. One of them, adj-transport, consist in parsing the +transport layer and updating any found checksum. This logic relies on +inet_proto_csum_replace_by_diff and produces an incorrect skb->csum when +in state CHECKSUM_COMPLETE. + +This bug can be reproduced with a simple ILA to SIR mapping, assuming +packets are received with CHECKSUM_COMPLETE: + + $ ip a show dev eth0 + 14: eth0@if15: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP group default qlen 1000 + link/ether 62:ae:35:9e:0f:8d brd ff:ff:ff:ff:ff:ff link-netnsid 0 + inet6 3333:0:0:1::c078/64 scope global + valid_lft forever preferred_lft forever + inet6 fd00:10:244:1::c078/128 scope global nodad + valid_lft forever preferred_lft forever + inet6 fe80::60ae:35ff:fe9e:f8d/64 scope link proto kernel_ll + valid_lft forever preferred_lft forever + $ ip ila add loc_match fd00:10:244:1 loc 3333:0:0:1 \ + csum-mode adj-transport ident-type luid dev eth0 + +Then I hit [fd00:10:244:1::c078]:8000 with a server listening only on +[3333:0:0:1::c078]:8000. With the bug, the SYN packet is dropped with +SKB_DROP_REASON_TCP_CSUM after inet_proto_csum_replace_by_diff changed +skb->csum. The translation and drop are visible on pwru [1] traces: + + IFACE TUPLE FUNC + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ipv6_rcv + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) ip6_rcv_core + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) nf_hook_slow + eth0:9 [fd00:10:244:3::3d8]:51420->[fd00:10:244:1::c078]:8000(tcp) inet_proto_csum_replace_by_diff + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_early_demux + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_route_input + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_input_finish + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ip6_protocol_deliver_rcu + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) raw6_local_deliver + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) ipv6_raw_deliver + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) tcp_v6_rcv + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) __skb_checksum_complete + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skb_reason(SKB_DROP_REASON_TCP_CSUM) + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_head_state + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_release_data + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) skb_free_head + eth0:9 [fd00:10:244:3::3d8]:51420->[3333:0:0:1::c078]:8000(tcp) kfree_skbmem + +This is happening because inet_proto_csum_replace_by_diff is updating +skb->csum when it shouldn't. The L4 checksum is updated such that it +"cancels" the IPv6 address change in terms of checksum computation, so +the impact on skb->csum is null. + +Note this would be different for an IPv4 packet since three fields +would be updated: the IPv4 address, the IP checksum, and the L4 +checksum. Two would cancel each other and skb->csum would still need +to be updated to take the L4 checksum change into account. + +This patch fixes it by passing an ipv6 flag to +inet_proto_csum_replace_by_diff, to skip the skb->csum update if we're +in the IPv6 case. Note the behavior of the only other user of +inet_proto_csum_replace_by_diff, the BPF subsystem, is left as is in +this patch and fixed in the subsequent patch. + +With the fix, using the reproduction from above, I can confirm +skb->csum is not touched by inet_proto_csum_replace_by_diff and the TCP +SYN proceeds to the application after the ILA translation. + +Link: https://github.com/cilium/pwru [1] +Fixes: 65d7ab8de582 ("net: Identifier Locator Addressing module") +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Link: https://patch.msgid.link/b5539869e3550d46068504feb02d37653d939c0b.1748509484.git.paul.chaignon@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +[ Fixed conflict due to unrelated change in inet_proto_csum_replace_by_diff. ] +Signed-off-by: Paul Chaignon <paul.chaignon@gmail.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/net/checksum.h | 2 +- + net/core/filter.c | 2 +- + net/core/utils.c | 4 ++-- + net/ipv6/ila/ila_common.c | 6 +++--- + 4 files changed, 7 insertions(+), 7 deletions(-) + +--- a/include/net/checksum.h ++++ b/include/net/checksum.h +@@ -154,7 +154,7 @@ void inet_proto_csum_replace16(__sum16 * + const __be32 *from, const __be32 *to, + bool pseudohdr); + void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, +- __wsum diff, bool pseudohdr); ++ __wsum diff, bool pseudohdr, bool ipv6); + + static __always_inline + void inet_proto_csum_replace2(__sum16 *sum, struct sk_buff *skb, +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1970,7 +1970,7 @@ BPF_CALL_5(bpf_l4_csum_replace, struct s + if (unlikely(from != 0)) + return -EINVAL; + +- inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo); ++ inet_proto_csum_replace_by_diff(ptr, skb, to, is_pseudo, false); + break; + case 2: + inet_proto_csum_replace2(ptr, skb, from, to, is_pseudo); +--- a/net/core/utils.c ++++ b/net/core/utils.c +@@ -473,11 +473,11 @@ void inet_proto_csum_replace16(__sum16 * + EXPORT_SYMBOL(inet_proto_csum_replace16); + + void inet_proto_csum_replace_by_diff(__sum16 *sum, struct sk_buff *skb, +- __wsum diff, bool pseudohdr) ++ __wsum diff, bool pseudohdr, bool ipv6) + { + if (skb->ip_summed != CHECKSUM_PARTIAL) { + *sum = csum_fold(csum_add(diff, ~csum_unfold(*sum))); +- if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr) ++ if (skb->ip_summed == CHECKSUM_COMPLETE && pseudohdr && !ipv6) + skb->csum = ~csum_add(diff, ~skb->csum); + } else if (pseudohdr) { + *sum = ~csum_fold(csum_add(diff, csum_unfold(*sum))); +--- a/net/ipv6/ila/ila_common.c ++++ b/net/ipv6/ila/ila_common.c +@@ -86,7 +86,7 @@ static void ila_csum_adjust_transport(st + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&th->check, skb, +- diff, true); ++ diff, true, true); + } + break; + case NEXTHDR_UDP: +@@ -97,7 +97,7 @@ static void ila_csum_adjust_transport(st + if (uh->check || skb->ip_summed == CHECKSUM_PARTIAL) { + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&uh->check, skb, +- diff, true); ++ diff, true, true); + if (!uh->check) + uh->check = CSUM_MANGLED_0; + } +@@ -111,7 +111,7 @@ static void ila_csum_adjust_transport(st + + diff = get_csum_diff(ip6h, p); + inet_proto_csum_replace_by_diff(&ih->icmp6_cksum, skb, +- diff, true); ++ diff, true, true); + } + break; + } diff --git a/queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch b/queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch new file mode 100644 index 0000000000..a1f37f4de7 --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch @@ -0,0 +1,67 @@ +From stable+bounces-155162-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:54 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:17 +0000 +Subject: net_sched: sch_sfq: annotate data-races around q->perturb_period +To: stable@vger.kernel.org +Cc: Eric Dumazet <edumazet@google.com>, Simon Horman <horms@kernel.org>, Jakub Kicinski <kuba@kernel.org> +Message-ID: <20250620154623.331294-1-edumazet@google.com> + +From: Eric Dumazet <edumazet@google.com> + +commit a17ef9e6c2c1cf0fc6cd6ca6a9ce525c67d1da7f upstream. + +sfq_perturbation() reads q->perturb_period locklessly. +Add annotations to fix potential issues. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Simon Horman <horms@kernel.org> +Link: https://lore.kernel.org/r/20240430180015.3111398-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -611,6 +611,7 @@ static void sfq_perturbation(struct time + struct Qdisc *sch = q->sch; + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; ++ int period; + + get_random_bytes(&nkey, sizeof(nkey)); + spin_lock(root_lock); +@@ -619,8 +620,12 @@ static void sfq_perturbation(struct time + sfq_rehash(sch); + spin_unlock(root_lock); + +- if (q->perturb_period) +- mod_timer(&q->perturb_timer, jiffies + q->perturb_period); ++ /* q->perturb_period can change under us from ++ * sfq_change() and sfq_destroy(). ++ */ ++ period = READ_ONCE(q->perturb_period); ++ if (period) ++ mod_timer(&q->perturb_timer, jiffies + period); + } + + static int sfq_change(struct Qdisc *sch, struct nlattr *opt) +@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, + q->quantum = ctl->quantum; + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + } +- q->perturb_period = ctl->perturb_period * HZ; ++ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + if (ctl->flows) + q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { +@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sc + struct sfq_sched_data *q = qdisc_priv(sch); + + tcf_block_put(q->block); +- q->perturb_period = 0; ++ WRITE_ONCE(q->perturb_period, 0); + del_timer_sync(&q->perturb_timer); + sfq_free(q->ht); + sfq_free(q->slots); diff --git a/queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch b/queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch new file mode 100644 index 0000000000..fe83d93818 --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch @@ -0,0 +1,114 @@ +From stable+bounces-155164-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:57 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:19 +0000 +Subject: net_sched: sch_sfq: don't allow 1 packet limit +To: stable@vger.kernel.org +Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>, Eric Dumazet <edumazet@google.com>, Jakub Kicinski <kuba@kernel.org> +Message-ID: <20250620154623.331294-3-edumazet@google.com> + +From: Octavian Purdila <tavip@google.com> + +commit 10685681bafce6febb39770f3387621bf5d67d0b upstream. + +The current implementation does not work correctly with a limit of +1. iproute2 actually checks for this and this patch adds the check in +kernel as well. + +This fixes the following syzkaller reported crash: + +UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:210:6 +index 65535 is out of range for type 'struct sfq_head[128]' +CPU: 0 PID: 2569 Comm: syz-executor101 Not tainted 5.10.0-smp-DEV #1 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 +Call Trace: + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0x125/0x19f lib/dump_stack.c:120 + ubsan_epilogue lib/ubsan.c:148 [inline] + __ubsan_handle_out_of_bounds+0xed/0x120 lib/ubsan.c:347 + sfq_link net/sched/sch_sfq.c:210 [inline] + sfq_dec+0x528/0x600 net/sched/sch_sfq.c:238 + sfq_dequeue+0x39b/0x9d0 net/sched/sch_sfq.c:500 + sfq_reset+0x13/0x50 net/sched/sch_sfq.c:525 + qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 + tbf_reset+0x3d/0x100 net/sched/sch_tbf.c:319 + qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 + dev_reset_queue+0x8c/0x140 net/sched/sch_generic.c:1296 + netdev_for_each_tx_queue include/linux/netdevice.h:2350 [inline] + dev_deactivate_many+0x6dc/0xc20 net/sched/sch_generic.c:1362 + __dev_close_many+0x214/0x350 net/core/dev.c:1468 + dev_close_many+0x207/0x510 net/core/dev.c:1506 + unregister_netdevice_many+0x40f/0x16b0 net/core/dev.c:10738 + unregister_netdevice_queue+0x2be/0x310 net/core/dev.c:10695 + unregister_netdevice include/linux/netdevice.h:2893 [inline] + __tun_detach+0x6b6/0x1600 drivers/net/tun.c:689 + tun_detach drivers/net/tun.c:705 [inline] + tun_chr_close+0x104/0x1b0 drivers/net/tun.c:3640 + __fput+0x203/0x840 fs/file_table.c:280 + task_work_run+0x129/0x1b0 kernel/task_work.c:185 + exit_task_work include/linux/task_work.h:33 [inline] + do_exit+0x5ce/0x2200 kernel/exit.c:931 + do_group_exit+0x144/0x310 kernel/exit.c:1046 + __do_sys_exit_group kernel/exit.c:1057 [inline] + __se_sys_exit_group kernel/exit.c:1055 [inline] + __x64_sys_exit_group+0x3b/0x40 kernel/exit.c:1055 + do_syscall_64+0x6c/0xd0 + entry_SYSCALL_64_after_hwframe+0x61/0xcb +RIP: 0033:0x7fe5e7b52479 +Code: Unable to access opcode bytes at RIP 0x7fe5e7b5244f. +RSP: 002b:00007ffd3c800398 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe5e7b52479 +RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 +RBP: 00007fe5e7bcd2d0 R08: ffffffffffffffb8 R09: 0000000000000014 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe5e7bcd2d0 +R13: 0000000000000000 R14: 00007fe5e7bcdd20 R15: 00007fe5e7b24270 + +The crash can be also be reproduced with the following (with a tc +recompiled to allow for sfq limits of 1): + +tc qdisc add dev dummy0 handle 1: root tbf rate 1Kbit burst 100b lat 1s +../iproute2-6.9.0/tc/tc qdisc add dev dummy0 handle 2: parent 1:10 sfq limit 1 +ifconfig dummy0 up +ping -I dummy0 -f -c2 -W0.1 8.8.8.8 +sleep 1 + +Scenario that triggers the crash: + +* the first packet is sent and queued in TBF and SFQ; qdisc qlen is 1 + +* TBF dequeues: it peeks from SFQ which moves the packet to the + gso_skb list and keeps qdisc qlen set to 1. TBF is out of tokens so + it schedules itself for later. + +* the second packet is sent and TBF tries to queues it to SFQ. qdisc + qlen is now 2 and because the SFQ limit is 1 the packet is dropped + by SFQ. At this point qlen is 1, and all of the SFQ slots are empty, + however q->tail is not NULL. + +At this point, assuming no more packets are queued, when sch_dequeue +runs again it will decrement the qlen for the current empty slot +causing an underflow and the subsequent out of bounds access. + +Reported-by: syzbot <syzkaller@googlegroups.com> +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Octavian Purdila <tavip@google.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://patch.msgid.link/20241204030520.2084663-2-tavip@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch, + if (!p) + return -ENOMEM; + } ++ if (ctl->limit == 1) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid limit"); ++ return -EINVAL; ++ } + sch_tree_lock(sch); + if (ctl->quantum) + q->quantum = ctl->quantum; diff --git a/queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch b/queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch new file mode 100644 index 0000000000..2caa08221b --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-handle-bigger-packets.patch @@ -0,0 +1,161 @@ +From stable+bounces-155163-greg=kroah.com@vger.kernel.org Fri Jun 20 17:50:01 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:18 +0000 +Subject: net_sched: sch_sfq: handle bigger packets +To: stable@vger.kernel.org +Cc: "Eric Dumazet" <edumazet@google.com>, "Toke H�iland-J�rgensen" <toke@redhat.com>, "Jakub Kicinski" <kuba@kernel.org> +Message-ID: <20250620154623.331294-2-edumazet@google.com> + +From: Eric Dumazet <edumazet@google.com> + +commit e4650d7ae4252f67e997a632adfae0dd74d3a99a upstream. + +SFQ has an assumption on dealing with packets smaller than 64KB. + +Even before BIG TCP, TCA_STAB can provide arbitrary big values +in qdisc_pkt_len(skb) + +It is time to switch (struct sfq_slot)->allot to a 32bit field. + +sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com> +Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 39 +++++++++++++-------------------------- + 1 file changed, 13 insertions(+), 26 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -77,12 +77,6 @@ + #define SFQ_EMPTY_SLOT 0xffff + #define SFQ_DEFAULT_HASH_DIVISOR 1024 + +-/* We use 16 bits to store allot, and want to handle packets up to 64K +- * Scale allot by 8 (1<<3) so that no overflow occurs. +- */ +-#define SFQ_ALLOT_SHIFT 3 +-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) +- + /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ + typedef u16 sfq_index; + +@@ -104,7 +98,7 @@ struct sfq_slot { + sfq_index next; /* next slot in sfq RR chain */ + struct sfq_head dep; /* anchor in dep[] chains */ + unsigned short hash; /* hash value (index in ht[]) */ +- short allot; /* credit for this slot */ ++ int allot; /* credit for this slot */ + + unsigned int backlog; + struct red_vars vars; +@@ -120,7 +114,6 @@ struct sfq_sched_data { + siphash_key_t perturbation; + u8 cur_depth; /* depth of longest slot */ + u8 flags; +- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; + sfq_index *ht; /* Hash table ('divisor' slots) */ +@@ -459,7 +452,7 @@ enqueue: + */ + q->tail = slot; + /* We could use a bigger initial quantum for new flows */ +- slot->allot = q->scaled_quantum; ++ slot->allot = q->quantum; + } + if (++sch->q.qlen <= q->limit) + return NET_XMIT_SUCCESS; +@@ -496,7 +489,7 @@ next_slot: + slot = &q->slots[a]; + if (slot->allot <= 0) { + q->tail = slot; +- slot->allot += q->scaled_quantum; ++ slot->allot += q->quantum; + goto next_slot; + } + skb = slot_dequeue_head(slot); +@@ -515,7 +508,7 @@ next_slot: + } + q->tail->next = next_a; + } else { +- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); ++ slot->allot -= qdisc_pkt_len(skb); + } + return skb; + } +@@ -598,7 +591,7 @@ drop: + q->tail->next = x; + } + q->tail = slot; +- slot->allot = q->scaled_quantum; ++ slot->allot = q->quantum; + } + } + sch->q.qlen -= dropped; +@@ -628,7 +621,8 @@ static void sfq_perturbation(struct time + mod_timer(&q->perturb_timer, jiffies + period); + } + +-static int sfq_change(struct Qdisc *sch, struct nlattr *opt) ++static int sfq_change(struct Qdisc *sch, struct nlattr *opt, ++ struct netlink_ext_ack *extack) + { + struct sfq_sched_data *q = qdisc_priv(sch); + struct tc_sfq_qopt *ctl = nla_data(opt); +@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, + (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) + return -EINVAL; + +- /* slot->allot is a short, make sure quantum is not too big. */ +- if (ctl->quantum) { +- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); +- +- if (scaled <= 0 || scaled > SHRT_MAX) +- return -EINVAL; ++ if ((int)ctl->quantum < 0) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); ++ return -EINVAL; + } +- + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) + return -EINVAL; +@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, + return -ENOMEM; + } + sch_tree_lock(sch); +- if (ctl->quantum) { ++ if (ctl->quantum) + q->quantum = ctl->quantum; +- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); +- } + WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + if (ctl->flows) + q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); +@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, s + q->divisor = SFQ_DEFAULT_HASH_DIVISOR; + q->maxflows = SFQ_DEFAULT_FLOWS; + q->quantum = psched_mtu(qdisc_dev(sch)); +- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + q->perturb_period = 0; + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); + + if (opt) { +- int err = sfq_change(sch, opt); ++ int err = sfq_change(sch, opt, extack); + if (err) + return err; + } +@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Q + if (idx != SFQ_EMPTY_SLOT) { + const struct sfq_slot *slot = &q->slots[idx]; + +- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; ++ xstats.allot = slot->allot; + qs.qlen = slot->qlen; + qs.backlog = slot->backlog; + } diff --git a/queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch b/queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch new file mode 100644 index 0000000000..41c5601642 --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-move-the-limit-validation.patch @@ -0,0 +1,83 @@ +From stable+bounces-155166-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:20 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:21 +0000 +Subject: net_sched: sch_sfq: move the limit validation +To: stable@vger.kernel.org +Cc: Octavian Purdila <tavip@google.com>, syzbot <syzkaller@googlegroups.com>, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net> +Message-ID: <20250620154623.331294-5-edumazet@google.com> + +From: Octavian Purdila <tavip@google.com> + +commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 upstream. + +It is not sufficient to directly validate the limit on the data that +the user passes as it can be updated based on how the other parameters +are changed. + +Move the check at the end of the configuration update process to also +catch scenarios where the limit is indirectly updated, for example +with the following configurations: + +tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1 +tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1 + +This fixes the following syzkaller reported crash: + +------------[ cut here ]------------ +UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6 +index 65535 is out of range for type 'struct sfq_head[128]' +CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 +Call Trace: + <TASK> + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120 + ubsan_epilogue lib/ubsan.c:231 [inline] + __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429 + sfq_link net/sched/sch_sfq.c:203 [inline] + sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231 + sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493 + sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518 + qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 + tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339 + qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 + dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311 + netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline] + dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375 + +Reported-by: syzbot <syzkaller@googlegroups.com> +Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit") +Signed-off-by: Octavian Purdila <tavip@google.com> +Acked-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, + if (!p) + return -ENOMEM; + } +- if (ctl->limit == 1) { +- NL_SET_ERR_MSG_MOD(extack, "invalid limit"); +- return -EINVAL; +- } + + sch_tree_lock(sch); + +@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, + limit = min_t(u32, ctl->limit, maxdepth * maxflows); + maxflows = min_t(u32, maxflows, limit); + } ++ if (limit == 1) { ++ sch_tree_unlock(sch); ++ kfree(p); ++ NL_SET_ERR_MSG_MOD(extack, "invalid limit"); ++ return -EINVAL; ++ } + + /* commit configuration */ + q->limit = limit; diff --git a/queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch b/queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch new file mode 100644 index 0000000000..927bb43944 --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch @@ -0,0 +1,75 @@ +From stable+bounces-155168-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:24 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:23 +0000 +Subject: net_sched: sch_sfq: reject invalid perturb period +To: stable@vger.kernel.org +Cc: Eric Dumazet <edumazet@google.com>, Gerrard Tai <gerrard.tai@starlabs.sg>, Jakub Kicinski <kuba@kernel.org> +Message-ID: <20250620154623.331294-7-edumazet@google.com> + +From: Eric Dumazet <edumazet@google.com> + +commit 7ca52541c05c832d32b112274f81a985101f9ba8 upstream. + +Gerrard Tai reported that SFQ perturb_period has no range check yet, +and this can be used to trigger a race condition fixed in a separate patch. + +We want to make sure ctl->perturb_period * HZ will not overflow +and is positive. + +Tested: + +tc qd add dev lo root sfq perturb -10 # negative value : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 1000000000 # too big : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 2000000 # acceptable value +tc -s -d qd sh dev lo +qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg> +Signed-off-by: Eric Dumazet <edumazet@google.com> +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -653,6 +653,14 @@ static int sfq_change(struct Qdisc *sch, + NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); + return -EINVAL; + } ++ ++ if (ctl->perturb_period < 0 || ++ ctl->perturb_period > INT_MAX / HZ) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); ++ return -EINVAL; ++ } ++ perturb_period = ctl->perturb_period * HZ; ++ + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) + return -EINVAL; +@@ -669,14 +677,12 @@ static int sfq_change(struct Qdisc *sch, + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; +- perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ + if (ctl->quantum) + quantum = ctl->quantum; +- perturb_period = ctl->perturb_period * HZ; + if (ctl->flows) + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { diff --git a/queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch b/queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch new file mode 100644 index 0000000000..2897142852 --- /dev/null +++ b/queue-5.15/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch @@ -0,0 +1,120 @@ +From stable+bounces-155165-greg=kroah.com@vger.kernel.org Fri Jun 20 17:53:18 2025 +From: Eric Dumazet <edumazet@google.com> +Date: Fri, 20 Jun 2025 15:46:20 +0000 +Subject: net_sched: sch_sfq: use a temporary work area for validating configuration +To: stable@vger.kernel.org +Cc: Octavian Purdila <tavip@google.com>, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net> +Message-ID: <20250620154623.331294-4-edumazet@google.com> + +From: Octavian Purdila <tavip@google.com> + +commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 upstream. + +Many configuration parameters have influence on others (e.g. divisor +-> flows -> limit, depth -> limit) and so it is difficult to correctly +do all of the validation before applying the configuration. And if a +validation error is detected late it is difficult to roll back a +partially applied configuration. + +To avoid these issues use a temporary work area to update and validate +the configuration and only then apply the configuration to the +internal state. + +Signed-off-by: Octavian Purdila <tavip@google.com> +Acked-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 56 ++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 44 insertions(+), 12 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, + struct red_parms *p = NULL; + struct sk_buff *to_free = NULL; + struct sk_buff *tail = NULL; ++ unsigned int maxflows; ++ unsigned int quantum; ++ unsigned int divisor; ++ int perturb_period; ++ u8 headdrop; ++ u8 maxdepth; ++ int limit; ++ u8 flags; ++ + + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) + return -EINVAL; +@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } ++ + sch_tree_lock(sch); ++ ++ limit = q->limit; ++ divisor = q->divisor; ++ headdrop = q->headdrop; ++ maxdepth = q->maxdepth; ++ maxflows = q->maxflows; ++ perturb_period = q->perturb_period; ++ quantum = q->quantum; ++ flags = q->flags; ++ ++ /* update and validate configuration */ + if (ctl->quantum) +- q->quantum = ctl->quantum; +- WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); ++ quantum = ctl->quantum; ++ perturb_period = ctl->perturb_period * HZ; + if (ctl->flows) +- q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); ++ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { +- q->divisor = ctl->divisor; +- q->maxflows = min_t(u32, q->maxflows, q->divisor); ++ divisor = ctl->divisor; ++ maxflows = min_t(u32, maxflows, divisor); + } + if (ctl_v1) { + if (ctl_v1->depth) +- q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); ++ maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + if (p) { +- swap(q->red_parms, p); +- red_set_parms(q->red_parms, ++ red_set_parms(p, + ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, + ctl_v1->Plog, ctl_v1->Scell_log, + NULL, + ctl_v1->max_P); + } +- q->flags = ctl_v1->flags; +- q->headdrop = ctl_v1->headdrop; ++ flags = ctl_v1->flags; ++ headdrop = ctl_v1->headdrop; + } + if (ctl->limit) { +- q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); +- q->maxflows = min_t(u32, q->maxflows, q->limit); ++ limit = min_t(u32, ctl->limit, maxdepth * maxflows); ++ maxflows = min_t(u32, maxflows, limit); + } + ++ /* commit configuration */ ++ q->limit = limit; ++ q->divisor = divisor; ++ q->headdrop = headdrop; ++ q->maxdepth = maxdepth; ++ q->maxflows = maxflows; ++ WRITE_ONCE(q->perturb_period, perturb_period); ++ q->quantum = quantum; ++ q->flags = flags; ++ if (p) ++ swap(q->red_parms, p); ++ + qlen = sch->q.qlen; + while (sch->q.qlen > q->limit) { + dropped += sfq_drop(sch, &to_free); diff --git a/queue-5.15/series b/queue-5.15/series index ada521d4ba..fd9b42ca3a 100644 --- a/queue-5.15/series +++ b/queue-5.15/series @@ -395,4 +395,14 @@ arm64-spectre-increase-parameters-that-can-be-used-to-turn-off-bhb-mitigation-in arm64-bpf-add-bhb-mitigation-to-the-epilogue-for-cbpf-programs.patch arm64-bpf-only-mitigate-cbpf-programs-loaded-by-unprivileged-users.patch arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch +net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch +net_sched-sch_sfq-handle-bigger-packets.patch +net_sched-sch_sfq-don-t-allow-1-packet-limit.patch +net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch +net_sched-sch_sfq-move-the-limit-validation.patch +net_sched-sch_sfq-reject-invalid-perturb-period.patch +mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch +ext4-make-abort-mount-option-handling-standard.patch +ext4-avoid-remount-errors-with-abort-mount-option.patch +net-fix-checksum-update-for-ila-adj-transport.patch bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch |