diff options
43 files changed, 3672 insertions, 0 deletions
diff --git a/queue-6.15/atm-revert-atm_account_tx-if-copy_from_iter_full-fails.patch b/queue-6.15/atm-revert-atm_account_tx-if-copy_from_iter_full-fails.patch new file mode 100644 index 00000000000..edb71bbf27d --- /dev/null +++ b/queue-6.15/atm-revert-atm_account_tx-if-copy_from_iter_full-fails.patch @@ -0,0 +1,75 @@ +From 7851263998d4269125fd6cb3fdbfc7c6db853859 Mon Sep 17 00:00:00 2001 +From: Kuniyuki Iwashima <kuniyu@google.com> +Date: Mon, 16 Jun 2025 11:21:15 -0700 +Subject: atm: Revert atm_account_tx() if copy_from_iter_full() fails. + +From: Kuniyuki Iwashima <kuniyu@google.com> + +commit 7851263998d4269125fd6cb3fdbfc7c6db853859 upstream. + +In vcc_sendmsg(), we account skb->truesize to sk->sk_wmem_alloc by +atm_account_tx(). + +It is expected to be reverted by atm_pop_raw() later called by +vcc->dev->ops->send(vcc, skb). + +However, vcc_sendmsg() misses the same revert when copy_from_iter_full() +fails, and then we will leak a socket. + +Let's factorise the revert part as atm_return_tx() and call it in +the failure path. + +Note that the corresponding sk_wmem_alloc operation can be found in +alloc_tx() as of the blamed commit. + + $ git blame -L:alloc_tx net/atm/common.c c55fa3cccbc2c~ + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Simon Horman <horms@kernel.org> +Closes: https://lore.kernel.org/netdev/20250614161959.GR414686@horms.kernel.org/ +Signed-off-by: Kuniyuki Iwashima <kuniyu@google.com> +Link: https://patch.msgid.link/20250616182147.963333-3-kuni1840@gmail.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/atmdev.h | 6 ++++++ + net/atm/common.c | 1 + + net/atm/raw.c | 2 +- + 3 files changed, 8 insertions(+), 1 deletion(-) + +--- a/include/linux/atmdev.h ++++ b/include/linux/atmdev.h +@@ -249,6 +249,12 @@ static inline void atm_account_tx(struct + ATM_SKB(skb)->atm_options = vcc->atm_options; + } + ++static inline void atm_return_tx(struct atm_vcc *vcc, struct sk_buff *skb) ++{ ++ WARN_ON_ONCE(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, ++ &sk_atm(vcc)->sk_wmem_alloc)); ++} ++ + static inline void atm_force_charge(struct atm_vcc *vcc,int truesize) + { + atomic_add(truesize, &sk_atm(vcc)->sk_rmem_alloc); +--- a/net/atm/common.c ++++ b/net/atm/common.c +@@ -635,6 +635,7 @@ int vcc_sendmsg(struct socket *sock, str + + skb->dev = NULL; /* for paths shared with net_device interfaces */ + if (!copy_from_iter_full(skb_put(skb, size), size, &m->msg_iter)) { ++ atm_return_tx(vcc, skb); + kfree_skb(skb); + error = -EFAULT; + goto out; +--- a/net/atm/raw.c ++++ b/net/atm/raw.c +@@ -36,7 +36,7 @@ static void atm_pop_raw(struct atm_vcc * + + pr_debug("(%d) %d -= %d\n", + vcc->vci, sk_wmem_alloc_get(sk), ATM_SKB(skb)->acct_truesize); +- WARN_ON(refcount_sub_and_test(ATM_SKB(skb)->acct_truesize, &sk->sk_wmem_alloc)); ++ atm_return_tx(vcc, skb); + dev_kfree_skb_any(skb); + sk->sk_write_space(sk); + } diff --git a/queue-6.15/cifs-deal-with-the-channel-loading-lag-while-picking-channels.patch b/queue-6.15/cifs-deal-with-the-channel-loading-lag-while-picking-channels.patch new file mode 100644 index 00000000000..e1ce7a886ec --- /dev/null +++ b/queue-6.15/cifs-deal-with-the-channel-loading-lag-while-picking-channels.patch @@ -0,0 +1,78 @@ +From 66d590b828b1fd9fa337047ae58fe1c4c6f43609 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N <sprasad@microsoft.com> +Date: Mon, 2 Jun 2025 22:37:12 +0530 +Subject: cifs: deal with the channel loading lag while picking channels + +From: Shyam Prasad N <sprasad@microsoft.com> + +commit 66d590b828b1fd9fa337047ae58fe1c4c6f43609 upstream. + +Our current approach to select a channel for sending requests is this: +1. iterate all channels to find the min and max queue depth +2. if min and max are not the same, pick the channel with min depth +3. if min and max are same, round robin, as all channels are equally loaded + +The problem with this approach is that there's a lag between selecting +a channel and sending the request (that increases the queue depth on the channel). +While these numbers will eventually catch up, there could be a skew in the +channel usage, depending on the application's I/O parallelism and the server's +speed of handling requests. + +With sufficient parallelism, this lag can artificially increase the queue depth, +thereby impacting the performance negatively. + +This change will change the step 1 above to start the iteration from the last +selected channel. This is to reduce the skew in channel usage even in the presence +of this lag. + +Fixes: ea90708d3cf3 ("cifs: use the least loaded channel for sending requests") +Cc: <stable@vger.kernel.org> +Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> +Signed-off-by: Steve French <stfrench@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/smb/client/transport.c | 14 +++++++------- + 1 file changed, 7 insertions(+), 7 deletions(-) + +--- a/fs/smb/client/transport.c ++++ b/fs/smb/client/transport.c +@@ -1018,14 +1018,16 @@ struct TCP_Server_Info *cifs_pick_channe + uint index = 0; + unsigned int min_in_flight = UINT_MAX, max_in_flight = 0; + struct TCP_Server_Info *server = NULL; +- int i; ++ int i, start, cur; + + if (!ses) + return NULL; + + spin_lock(&ses->chan_lock); ++ start = atomic_inc_return(&ses->chan_seq); + for (i = 0; i < ses->chan_count; i++) { +- server = ses->chans[i].server; ++ cur = (start + i) % ses->chan_count; ++ server = ses->chans[cur].server; + if (!server || server->terminate) + continue; + +@@ -1042,17 +1044,15 @@ struct TCP_Server_Info *cifs_pick_channe + */ + if (server->in_flight < min_in_flight) { + min_in_flight = server->in_flight; +- index = i; ++ index = cur; + } + if (server->in_flight > max_in_flight) + max_in_flight = server->in_flight; + } + + /* if all channels are equally loaded, fall back to round-robin */ +- if (min_in_flight == max_in_flight) { +- index = (uint)atomic_inc_return(&ses->chan_seq); +- index %= ses->chan_count; +- } ++ if (min_in_flight == max_in_flight) ++ index = (uint)start % ses->chan_count; + + server = ses->chans[index].server; + spin_unlock(&ses->chan_lock); diff --git a/queue-6.15/cifs-do-not-disable-interface-polling-on-failure.patch b/queue-6.15/cifs-do-not-disable-interface-polling-on-failure.patch new file mode 100644 index 00000000000..49c8d7eefc1 --- /dev/null +++ b/queue-6.15/cifs-do-not-disable-interface-polling-on-failure.patch @@ -0,0 +1,69 @@ +From 42ca547b13a20e7cbb04fbdf8d5f089ac4bb35b7 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N <sprasad@microsoft.com> +Date: Mon, 2 Jun 2025 22:37:17 +0530 +Subject: cifs: do not disable interface polling on failure + +From: Shyam Prasad N <sprasad@microsoft.com> + +commit 42ca547b13a20e7cbb04fbdf8d5f089ac4bb35b7 upstream. + +When a server has multichannel enabled, we keep polling the server +for interfaces periodically. However, when this query fails, we +disable the polling. This can be problematic as it takes away the +chance for the server to start advertizing again. + +This change reschedules the delayed work, even if the current call +failed. That way, multichannel sessions can recover. + +Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> +Cc: stable@vger.kernel.org +Signed-off-by: Steve French <stfrench@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/smb/client/connect.c | 6 +----- + fs/smb/client/smb2pdu.c | 9 +++++---- + 2 files changed, 6 insertions(+), 9 deletions(-) + +--- a/fs/smb/client/connect.c ++++ b/fs/smb/client/connect.c +@@ -116,13 +116,9 @@ static void smb2_query_server_interfaces + rc = server->ops->query_server_interfaces(xid, tcon, false); + free_xid(xid); + +- if (rc) { +- if (rc == -EOPNOTSUPP) +- return; +- ++ if (rc) + cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n", + __func__, rc); +- } + + queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, + (SMB_INTERFACE_POLL_INTERVAL * HZ)); +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -423,6 +423,10 @@ skip_sess_setup: + free_xid(xid); + ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + ++ /* regardless of rc value, setup polling */ ++ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, ++ (SMB_INTERFACE_POLL_INTERVAL * HZ)); ++ + mutex_unlock(&ses->session_mutex); + + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { +@@ -443,11 +447,8 @@ skip_sess_setup: + if (ses->chan_max > ses->chan_count && + ses->iface_count && + !SERVER_IS_CHAN(server)) { +- if (ses->chan_count == 1) { ++ if (ses->chan_count == 1) + cifs_server_dbg(VFS, "supports multichannel now\n"); +- queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, +- (SMB_INTERFACE_POLL_INTERVAL * HZ)); +- } + + cifs_try_adding_channels(ses); + } diff --git a/queue-6.15/cifs-serialize-other-channels-when-query-server-interfaces-is-pending.patch b/queue-6.15/cifs-serialize-other-channels-when-query-server-interfaces-is-pending.patch new file mode 100644 index 00000000000..729d005d081 --- /dev/null +++ b/queue-6.15/cifs-serialize-other-channels-when-query-server-interfaces-is-pending.patch @@ -0,0 +1,86 @@ +From b5e3e6e28cf3853566ba5d816f79aba5be579158 Mon Sep 17 00:00:00 2001 +From: Shyam Prasad N <sprasad@microsoft.com> +Date: Mon, 2 Jun 2025 22:37:15 +0530 +Subject: cifs: serialize other channels when query server interfaces is pending + +From: Shyam Prasad N <sprasad@microsoft.com> + +commit b5e3e6e28cf3853566ba5d816f79aba5be579158 upstream. + +Today, during smb2_reconnect, session_mutex is released as soon as +the tcon is reconnected and is in a good state. However, in case +multichannel is enabled, there is also a query of server interfaces that +follows. We've seen that this query can race with reconnects of other +channels, causing them to step on each other with reconnects. + +This change extends the hold of session_mutex till after the query of +server interfaces is complete. In order to avoid recursive smb2_reconnect +checks during query ioctl, this change also introduces a session flag +for sessions where such a query is in progress. + +Signed-off-by: Shyam Prasad N <sprasad@microsoft.com> +Cc: stable@vger.kernel.org +Signed-off-by: Steve French <stfrench@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/smb/client/cifsglob.h | 1 + + fs/smb/client/smb2pdu.c | 24 ++++++++++++++++++------ + 2 files changed, 19 insertions(+), 6 deletions(-) + +--- a/fs/smb/client/cifsglob.h ++++ b/fs/smb/client/cifsglob.h +@@ -1084,6 +1084,7 @@ struct cifs_chan { + }; + + #define CIFS_SES_FLAG_SCALE_CHANNELS (0x1) ++#define CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES (0x2) + + /* + * Session structure. One of these for each uid session with a particular host +--- a/fs/smb/client/smb2pdu.c ++++ b/fs/smb/client/smb2pdu.c +@@ -411,14 +411,19 @@ skip_sess_setup: + if (!rc && + (server->capabilities & SMB2_GLOBAL_CAP_MULTI_CHANNEL) && + server->ops->query_server_interfaces) { +- mutex_unlock(&ses->session_mutex); +- + /* +- * query server network interfaces, in case they change ++ * query server network interfaces, in case they change. ++ * Also mark the session as pending this update while the query ++ * is in progress. This will be used to avoid calling ++ * smb2_reconnect recursively. + */ ++ ses->flags |= CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; + xid = get_xid(); + rc = server->ops->query_server_interfaces(xid, tcon, false); + free_xid(xid); ++ ses->flags &= ~CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES; ++ ++ mutex_unlock(&ses->session_mutex); + + if (rc == -EOPNOTSUPP && ses->chan_count > 1) { + /* +@@ -560,11 +565,18 @@ static int smb2_ioctl_req_init(u32 opcod + struct TCP_Server_Info *server, + void **request_buf, unsigned int *total_len) + { +- /* Skip reconnect only for FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs */ +- if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO) { ++ /* ++ * Skip reconnect in one of the following cases: ++ * 1. For FSCTL_VALIDATE_NEGOTIATE_INFO IOCTLs ++ * 2. For FSCTL_QUERY_NETWORK_INTERFACE_INFO IOCTL when called from ++ * smb2_reconnect (indicated by CIFS_SES_FLAG_SCALE_CHANNELS ses flag) ++ */ ++ if (opcode == FSCTL_VALIDATE_NEGOTIATE_INFO || ++ (opcode == FSCTL_QUERY_NETWORK_INTERFACE_INFO && ++ (tcon->ses->flags & CIFS_SES_FLAGS_PENDING_QUERY_INTERFACES))) + return __smb2_plain_req_init(SMB2_IOCTL, tcon, server, + request_buf, total_len); +- } ++ + return smb2_plain_req_init(SMB2_IOCTL, tcon, server, + request_buf, total_len); + } diff --git a/queue-6.15/drivers-rapidio-rio_cm.c-prevent-possible-heap-overwrite.patch b/queue-6.15/drivers-rapidio-rio_cm.c-prevent-possible-heap-overwrite.patch new file mode 100644 index 00000000000..1c0f5b2a5aa --- /dev/null +++ b/queue-6.15/drivers-rapidio-rio_cm.c-prevent-possible-heap-overwrite.patch @@ -0,0 +1,47 @@ +From 50695153d7ddde3b1696dbf0085be0033bf3ddb3 Mon Sep 17 00:00:00 2001 +From: Andrew Morton <akpm@linux-foundation.org> +Date: Sat, 7 Jun 2025 17:43:18 -0700 +Subject: drivers/rapidio/rio_cm.c: prevent possible heap overwrite + +From: Andrew Morton <akpm@linux-foundation.org> + +commit 50695153d7ddde3b1696dbf0085be0033bf3ddb3 upstream. + +In + +riocm_cdev_ioctl(RIO_CM_CHAN_SEND) + -> cm_chan_msg_send() + -> riocm_ch_send() + +cm_chan_msg_send() checks that userspace didn't send too much data but +riocm_ch_send() failed to check that userspace sent sufficient data. The +result is that riocm_ch_send() can write to fields in the rio_ch_chan_hdr +which were outside the bounds of the space which cm_chan_msg_send() +allocated. + +Address this by teaching riocm_ch_send() to check that the entire +rio_ch_chan_hdr was copied in from userspace. + +Reported-by: maher azz <maherazz04@gmail.com> +Cc: Matt Porter <mporter@kernel.crashing.org> +Cc: Alexandre Bounine <alex.bou9@gmail.com> +Cc: Linus Torvalds <torvalds@linuxfoundation.org> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/rapidio/rio_cm.c | 3 +++ + 1 file changed, 3 insertions(+) + +--- a/drivers/rapidio/rio_cm.c ++++ b/drivers/rapidio/rio_cm.c +@@ -789,6 +789,9 @@ static int riocm_ch_send(u16 ch_id, void + if (buf == NULL || ch_id == 0 || len == 0 || len > RIO_MAX_MSG_SIZE) + return -EINVAL; + ++ if (len < sizeof(struct rio_ch_chan_hdr)) ++ return -EINVAL; /* insufficient data from user */ ++ + ch = riocm_get_channel(ch_id); + if (!ch) { + riocm_error("%s(%d) ch_%d not found", current->comm, diff --git a/queue-6.15/fgraph-do-not-enable-function_graph-tracer-when-setting-funcgraph-args.patch b/queue-6.15/fgraph-do-not-enable-function_graph-tracer-when-setting-funcgraph-args.patch new file mode 100644 index 00000000000..2c45e66d2f5 --- /dev/null +++ b/queue-6.15/fgraph-do-not-enable-function_graph-tracer-when-setting-funcgraph-args.patch @@ -0,0 +1,131 @@ +From 327e28664307d49ce3fa71ba30dcc0007c270974 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt <rostedt@goodmis.org> +Date: Wed, 18 Jun 2025 07:38:01 -0400 +Subject: fgraph: Do not enable function_graph tracer when setting funcgraph-args + +From: Steven Rostedt <rostedt@goodmis.org> + +commit 327e28664307d49ce3fa71ba30dcc0007c270974 upstream. + +When setting the funcgraph-args option when function graph tracer is net +enabled, it incorrectly enables it. Worse, it unregisters itself when it +was never registered. Then when it gets enabled again, it will register +itself a second time causing a WARNing. + + ~# echo 1 > /sys/kernel/tracing/options/funcgraph-args + ~# head -20 /sys/kernel/tracing/trace + # tracer: nop + # + # entries-in-buffer/entries-written: 813/26317372 #P:8 + # + # _-----=> irqs-off/BH-disabled + # / _----=> need-resched + # | / _---=> hardirq/softirq + # || / _--=> preempt-depth + # ||| / _-=> migrate-disable + # |||| / delay + # TASK-PID CPU# ||||| TIMESTAMP FUNCTION + # | | | ||||| | | + <idle>-0 [007] d..4. 358.966010: 7) 1.692 us | fetch_next_timer_interrupt(basej=4294981640, basem=357956000000, base_local=0xffff88823c3ae040, base_global=0xffff88823c3af300, tevt=0xffff888100e47cb8); + <idle>-0 [007] d..4. 358.966012: 7) | tmigr_cpu_deactivate(nextexp=357988000000) { + <idle>-0 [007] d..4. 358.966013: 7) | _raw_spin_lock(lock=0xffff88823c3b2320) { + <idle>-0 [007] d..4. 358.966014: 7) 0.981 us | preempt_count_add(val=1); + <idle>-0 [007] d..5. 358.966017: 7) 1.058 us | do_raw_spin_lock(lock=0xffff88823c3b2320); + <idle>-0 [007] d..4. 358.966019: 7) 5.824 us | } + <idle>-0 [007] d..5. 358.966021: 7) | tmigr_inactive_up(group=0xffff888100cb9000, child=0x0, data=0xffff888100e47bc0) { + <idle>-0 [007] d..5. 358.966022: 7) | tmigr_update_events(group=0xffff888100cb9000, child=0x0, data=0xffff888100e47bc0) { + +Notice the "tracer: nop" at the top there. The current tracer is the "nop" +tracer, but the content is obviously the function graph tracer. + +Enabling function graph tracing will cause it to register again and +trigger a warning in the accounting: + + ~# echo function_graph > /sys/kernel/tracing/current_tracer + -bash: echo: write error: Device or resource busy + +With the dmesg of: + + ------------[ cut here ]------------ + WARNING: CPU: 7 PID: 1095 at kernel/trace/ftrace.c:3509 ftrace_startup_subops+0xc1e/0x1000 + Modules linked in: kvm_intel kvm irqbypass + CPU: 7 UID: 0 PID: 1095 Comm: bash Not tainted 6.16.0-rc2-test-00006-gea03de4105d3 #24 PREEMPT + Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 + RIP: 0010:ftrace_startup_subops+0xc1e/0x1000 + Code: 48 b8 22 01 00 00 00 00 ad de 49 89 84 24 88 01 00 00 8b 44 24 08 89 04 24 e9 c3 f7 ff ff c7 04 24 ed ff ff ff e9 b7 f7 ff ff <0f> 0b c7 04 24 f0 ff ff ff e9 a9 f7 ff ff c7 04 24 f4 ff ff ff e9 + RSP: 0018:ffff888133cff948 EFLAGS: 00010202 + RAX: 0000000000000001 RBX: 1ffff1102679ff31 RCX: 0000000000000000 + RDX: 1ffffffff0b27a60 RSI: ffffffff8593d2f0 RDI: ffffffff85941140 + RBP: 00000000000c2041 R08: ffffffffffffffff R09: ffffed1020240221 + R10: ffff88810120110f R11: ffffed1020240214 R12: ffffffff8593d2f0 + R13: ffffffff8593d300 R14: ffffffff85941140 R15: ffffffff85631100 + FS: 00007f7ec6f28740(0000) GS:ffff8882b5251000(0000) knlGS:0000000000000000 + CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 + CR2: 00007f7ec6f181c0 CR3: 000000012f1d0005 CR4: 0000000000172ef0 + Call Trace: + <TASK> + ? __pfx_ftrace_startup_subops+0x10/0x10 + ? find_held_lock+0x2b/0x80 + ? ftrace_stub_direct_tramp+0x10/0x10 + ? ftrace_stub_direct_tramp+0x10/0x10 + ? trace_preempt_on+0xd0/0x110 + ? __pfx_trace_graph_entry_args+0x10/0x10 + register_ftrace_graph+0x4d2/0x1020 + ? tracing_reset_online_cpus+0x14b/0x1e0 + ? __pfx_register_ftrace_graph+0x10/0x10 + ? ring_buffer_record_enable+0x16/0x20 + ? tracing_reset_online_cpus+0x153/0x1e0 + ? __pfx_tracing_reset_online_cpus+0x10/0x10 + ? __pfx_trace_graph_return+0x10/0x10 + graph_trace_init+0xfd/0x160 + tracing_set_tracer+0x500/0xa80 + ? __pfx_tracing_set_tracer+0x10/0x10 + ? lock_release+0x181/0x2d0 + ? _copy_from_user+0x26/0xa0 + tracing_set_trace_write+0x132/0x1e0 + ? __pfx_tracing_set_trace_write+0x10/0x10 + ? ftrace_graph_func+0xcc/0x140 + ? ftrace_stub_direct_tramp+0x10/0x10 + ? ftrace_stub_direct_tramp+0x10/0x10 + ? ftrace_stub_direct_tramp+0x10/0x10 + vfs_write+0x1d0/0xe90 + ? __pfx_vfs_write+0x10/0x10 + +Have the setting of the funcgraph-args check if function_graph tracer is +the current tracer of the instance, and if not, do nothing, as there's +nothing to do (the option is checked when function_graph tracing starts). + +Cc: stable@vger.kernel.org +Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Cc: Mark Rutland <mark.rutland@arm.com> +Link: https://lore.kernel.org/20250618073801.057ea636@gandalf.local.home +Fixes: c7a60a733c373 ("ftrace: Have funcgraph-args take affect during tracing") +Closes: https://lore.kernel.org/all/4ab1a7bdd0174ab09c7b0d68cdbff9a4@huawei.com/ +Reported-by: Changbin Du <changbin.du@huawei.com> +Tested-by: Changbin Du <changbin.du@huawei.com> +Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org> +Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/trace/trace_functions_graph.c | 6 ++++++ + 1 file changed, 6 insertions(+) + +--- a/kernel/trace/trace_functions_graph.c ++++ b/kernel/trace/trace_functions_graph.c +@@ -475,10 +475,16 @@ static int graph_trace_init(struct trace + return 0; + } + ++static struct tracer graph_trace; ++ + static int ftrace_graph_trace_args(struct trace_array *tr, int set) + { + trace_func_graph_ent_t entry; + ++ /* Do nothing if the current tracer is not this tracer */ ++ if (tr->current_trace != &graph_trace) ++ return 0; ++ + if (set) + entry = trace_graph_entry_args; + else diff --git a/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-ctl-cache.patch b/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-ctl-cache.patch new file mode 100644 index 00000000000..70c77afaa27 --- /dev/null +++ b/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-ctl-cache.patch @@ -0,0 +1,41 @@ +From f4ba2ea57da51d616b689c4b8826c517ff5a8523 Mon Sep 17 00:00:00 2001 +From: Jaroslav Kysela <perex@perex.cz> +Date: Fri, 23 May 2025 17:41:51 +0200 +Subject: firmware: cs_dsp: Fix OOB memory read access in KUnit test (ctl cache) + +From: Jaroslav Kysela <perex@perex.cz> + +commit f4ba2ea57da51d616b689c4b8826c517ff5a8523 upstream. + +KASAN reported out of bounds access - cs_dsp_ctl_cache_init_multiple_offsets(). +The code uses mock_coeff_template.length_bytes (4 bytes) for register value +allocations. But later, this length is set to 8 bytes which causes +test code failures. + +As fix, just remove the lenght override, keeping the original value 4 +for all operations. + +Cc: Simon Trimmer <simont@opensource.cirrus.com> +Cc: Charles Keepax <ckeepax@opensource.cirrus.com> +Cc: Richard Fitzgerald <rf@opensource.cirrus.com> +Cc: patches@opensource.cirrus.com +Cc: stable@vger.kernel.org +Signed-off-by: Jaroslav Kysela <perex@perex.cz> +Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com> +Link: https://patch.msgid.link/20250523154151.1252585-1-perex@perex.cz +Signed-off-by: Mark Brown <broonie@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c | 1 - + 1 file changed, 1 deletion(-) + +--- a/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c ++++ b/drivers/firmware/cirrus/test/cs_dsp_test_control_cache.c +@@ -776,7 +776,6 @@ static void cs_dsp_ctl_cache_init_multip + "dummyalg", NULL); + + /* Create controls identical except for offset */ +- def.length_bytes = 8; + def.offset_dsp_words = 0; + def.shortname = "CtlA"; + cs_dsp_mock_wmfw_add_coeff_desc(local->wmfw_builder, &def); diff --git a/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-wmfw-info.patch b/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-wmfw-info.patch new file mode 100644 index 00000000000..f57456a3125 --- /dev/null +++ b/queue-6.15/firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-wmfw-info.patch @@ -0,0 +1,41 @@ +From d979b783d61f7f1f95664031b71a33afc74627b2 Mon Sep 17 00:00:00 2001 +From: Jaroslav Kysela <perex@perex.cz> +Date: Fri, 23 May 2025 17:58:14 +0200 +Subject: firmware: cs_dsp: Fix OOB memory read access in KUnit test (wmfw info) + +From: Jaroslav Kysela <perex@perex.cz> + +commit d979b783d61f7f1f95664031b71a33afc74627b2 upstream. + +KASAN reported out of bounds access - cs_dsp_mock_wmfw_add_info(), +because the source string length was rounded up to the allocation size. + +Cc: Simon Trimmer <simont@opensource.cirrus.com> +Cc: Charles Keepax <ckeepax@opensource.cirrus.com> +Cc: Richard Fitzgerald <rf@opensource.cirrus.com> +Cc: patches@opensource.cirrus.com +Cc: stable@vger.kernel.org +Signed-off-by: Jaroslav Kysela <perex@perex.cz> +Reviewed-by: Richard Fitzgerald <rf@opensource.cirrus.com> +Link: https://patch.msgid.link/20250523155814.1256762-1-perex@perex.cz +Signed-off-by: Mark Brown <broonie@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c ++++ b/drivers/firmware/cirrus/test/cs_dsp_mock_wmfw.c +@@ -133,10 +133,11 @@ void cs_dsp_mock_wmfw_add_info(struct cs + + if (info_len % 4) { + /* Create a padded string with length a multiple of 4 */ ++ size_t copy_len = info_len; + info_len = round_up(info_len, 4); + tmp = kunit_kzalloc(builder->test_priv->test, info_len, GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(builder->test_priv->test, tmp); +- memcpy(tmp, info, info_len); ++ memcpy(tmp, info, copy_len); + info = tmp; + } + diff --git a/queue-6.15/io_uring-fix-task-leak-issue-in-io_wq_create.patch b/queue-6.15/io_uring-fix-task-leak-issue-in-io_wq_create.patch new file mode 100644 index 00000000000..02cac362cde --- /dev/null +++ b/queue-6.15/io_uring-fix-task-leak-issue-in-io_wq_create.patch @@ -0,0 +1,35 @@ +From 89465d923bda180299e69ee2800aab84ad0ba689 Mon Sep 17 00:00:00 2001 +From: Penglei Jiang <superman.xpt@gmail.com> +Date: Sun, 15 Jun 2025 09:39:06 -0700 +Subject: io_uring: fix task leak issue in io_wq_create() + +From: Penglei Jiang <superman.xpt@gmail.com> + +commit 89465d923bda180299e69ee2800aab84ad0ba689 upstream. + +Add missing put_task_struct() in the error path + +Cc: stable@vger.kernel.org +Fixes: 0f8baa3c9802 ("io-wq: fully initialize wqe before calling cpuhp_state_add_instance_nocalls()") +Signed-off-by: Penglei Jiang <superman.xpt@gmail.com> +Link: https://lore.kernel.org/r/20250615163906.2367-1-superman.xpt@gmail.com +Signed-off-by: Jens Axboe <axboe@kernel.dk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + io_uring/io-wq.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +--- a/io_uring/io-wq.c ++++ b/io_uring/io-wq.c +@@ -1236,8 +1236,10 @@ struct io_wq *io_wq_create(unsigned boun + atomic_set(&wq->worker_refs, 1); + init_completion(&wq->worker_done); + ret = cpuhp_state_add_instance_nocalls(io_wq_online, &wq->cpuhp_node); +- if (ret) ++ if (ret) { ++ put_task_struct(wq->task); + goto err; ++ } + + return wq; + err: diff --git a/queue-6.15/io_uring-kbuf-don-t-truncate-end-buffer-for-multiple-buffer-peeks.patch b/queue-6.15/io_uring-kbuf-don-t-truncate-end-buffer-for-multiple-buffer-peeks.patch new file mode 100644 index 00000000000..29b4adc74c9 --- /dev/null +++ b/queue-6.15/io_uring-kbuf-don-t-truncate-end-buffer-for-multiple-buffer-peeks.patch @@ -0,0 +1,38 @@ +From 26ec15e4b0c1d7b25214d9c0be1d50492e2f006c Mon Sep 17 00:00:00 2001 +From: Jens Axboe <axboe@kernel.dk> +Date: Fri, 13 Jun 2025 11:01:49 -0600 +Subject: io_uring/kbuf: don't truncate end buffer for multiple buffer peeks + +From: Jens Axboe <axboe@kernel.dk> + +commit 26ec15e4b0c1d7b25214d9c0be1d50492e2f006c upstream. + +If peeking a bunch of buffers, normally io_ring_buffers_peek() will +truncate the end buffer. This isn't optimal as presumably more data will +be arriving later, and hence it's better to stop with the last full +buffer rather than truncate the end buffer. + +Cc: stable@vger.kernel.org +Fixes: 35c8711c8fc4 ("io_uring/kbuf: add helpers for getting/peeking multiple buffers") +Reported-by: Christian Mazakas <christian.mazakas@gmail.com> +Signed-off-by: Jens Axboe <axboe@kernel.dk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + io_uring/kbuf.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +--- a/io_uring/kbuf.c ++++ b/io_uring/kbuf.c +@@ -270,8 +270,11 @@ static int io_ring_buffers_peek(struct i + /* truncate end piece, if needed, for non partial buffers */ + if (len > arg->max_len) { + len = arg->max_len; +- if (!(bl->flags & IOBL_INC)) ++ if (!(bl->flags & IOBL_INC)) { ++ if (iov != arg->iovs) ++ break; + buf->len = len; ++ } + } + + iov->iov_base = u64_to_user_ptr(buf->addr); diff --git a/queue-6.15/io_uring-rsrc-validate-buffer-count-with-offset-for-cloning.patch b/queue-6.15/io_uring-rsrc-validate-buffer-count-with-offset-for-cloning.patch new file mode 100644 index 00000000000..1619c041c3f --- /dev/null +++ b/queue-6.15/io_uring-rsrc-validate-buffer-count-with-offset-for-cloning.patch @@ -0,0 +1,81 @@ +From 1d27f11bf02b38c431e49a17dee5c10a2b4c2e28 Mon Sep 17 00:00:00 2001 +From: Jens Axboe <axboe@kernel.dk> +Date: Sun, 15 Jun 2025 08:09:14 -0600 +Subject: io_uring/rsrc: validate buffer count with offset for cloning + +From: Jens Axboe <axboe@kernel.dk> + +commit 1d27f11bf02b38c431e49a17dee5c10a2b4c2e28 upstream. + +syzbot reports that it can trigger a WARN_ON() for kmalloc() attempt +that's too big: + +WARNING: CPU: 0 PID: 6488 at mm/slub.c:5024 __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 +Modules linked in: +CPU: 0 UID: 0 PID: 6488 Comm: syz-executor312 Not tainted 6.15.0-rc7-syzkaller-gd7fa1af5b33e #0 PREEMPT +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/07/2025 +pstate: 20400005 (nzCv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) +pc : __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 +lr : __do_kmalloc_node mm/slub.c:-1 [inline] +lr : __kvmalloc_node_noprof+0x3b4/0x640 mm/slub.c:5012 +sp : ffff80009cfd7a90 +x29: ffff80009cfd7ac0 x28: ffff0000dd52a120 x27: 0000000000412dc0 +x26: 0000000000000178 x25: ffff7000139faf70 x24: 0000000000000000 +x23: ffff800082f4cea8 x22: 00000000ffffffff x21: 000000010cd004a8 +x20: ffff0000d75816c0 x19: ffff0000dd52a000 x18: 00000000ffffffff +x17: ffff800092f39000 x16: ffff80008adbe9e4 x15: 0000000000000005 +x14: 1ffff000139faf1c x13: 0000000000000000 x12: 0000000000000000 +x11: ffff7000139faf21 x10: 0000000000000003 x9 : ffff80008f27b938 +x8 : 0000000000000002 x7 : 0000000000000000 x6 : 0000000000000000 +x5 : 00000000ffffffff x4 : 0000000000400dc0 x3 : 0000000200000000 +x2 : 000000010cd004a8 x1 : ffff80008b3ebc40 x0 : 0000000000000001 +Call trace: + __kvmalloc_node_noprof+0x520/0x640 mm/slub.c:5024 (P) + kvmalloc_array_node_noprof include/linux/slab.h:1065 [inline] + io_rsrc_data_alloc io_uring/rsrc.c:206 [inline] + io_clone_buffers io_uring/rsrc.c:1178 [inline] + io_register_clone_buffers+0x484/0xa14 io_uring/rsrc.c:1287 + __io_uring_register io_uring/register.c:815 [inline] + __do_sys_io_uring_register io_uring/register.c:926 [inline] + __se_sys_io_uring_register io_uring/register.c:903 [inline] + __arm64_sys_io_uring_register+0x42c/0xea8 io_uring/register.c:903 + __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] + invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 + el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 + do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 + el0_svc+0x58/0x17c arch/arm64/kernel/entry-common.c:767 + el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:786 + el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 + +which is due to offset + buffer_count being too large. The registration +code checks only the total count of buffers, but given that the indexing +is an array, it should also check offset + count. That can't exceed +IORING_MAX_REG_BUFFERS either, as there's no way to reach buffers beyond +that limit. + +There's no issue with registrering a table this large, outside of the +fact that it's pointless to register buffers that cannot be reached, and +that it can trigger this kmalloc() warning for attempting an allocation +that is too large. + +Cc: stable@vger.kernel.org +Fixes: b16e920a1909 ("io_uring/rsrc: allow cloning at an offset") +Reported-by: syzbot+cb4bf3cb653be0d25de8@syzkaller.appspotmail.com +Link: https://lore.kernel.org/io-uring/684e77bd.a00a0220.279073.0029.GAE@google.com/ +Signed-off-by: Jens Axboe <axboe@kernel.dk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + io_uring/rsrc.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/io_uring/rsrc.c ++++ b/io_uring/rsrc.c +@@ -1174,6 +1174,8 @@ static int io_clone_buffers(struct io_ri + return -EINVAL; + if (check_add_overflow(arg->nr, arg->dst_off, &nbufs)) + return -EOVERFLOW; ++ if (nbufs > IORING_MAX_REG_BUFFERS) ++ return -EINVAL; + + ret = io_rsrc_data_alloc(&data, max(nbufs, ctx->buf_table.nr)); + if (ret) diff --git a/queue-6.15/jffs2-check-jffs2_prealloc_raw_node_refs-result-in-few-other-places.patch b/queue-6.15/jffs2-check-jffs2_prealloc_raw_node_refs-result-in-few-other-places.patch new file mode 100644 index 00000000000..6f1aef7a000 --- /dev/null +++ b/queue-6.15/jffs2-check-jffs2_prealloc_raw_node_refs-result-in-few-other-places.patch @@ -0,0 +1,80 @@ +From 2b6d96503255a3ed676cd70f8368870c6d6a25c6 Mon Sep 17 00:00:00 2001 +From: Fedor Pchelkin <pchelkin@ispras.ru> +Date: Tue, 25 Mar 2025 19:32:13 +0300 +Subject: jffs2: check jffs2_prealloc_raw_node_refs() result in few other places + +From: Fedor Pchelkin <pchelkin@ispras.ru> + +commit 2b6d96503255a3ed676cd70f8368870c6d6a25c6 upstream. + +Fuzzing hit another invalid pointer dereference due to the lack of +checking whether jffs2_prealloc_raw_node_refs() completed successfully. +Subsequent logic implies that the node refs have been allocated. + +Handle that. The code is ready for propagating the error upwards. + +KASAN: null-ptr-deref in range [0x0000000000000008-0x000000000000000f] +CPU: 1 PID: 5835 Comm: syz-executor145 Not tainted 5.10.234-syzkaller #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 +RIP: 0010:jffs2_link_node_ref+0xac/0x690 fs/jffs2/nodelist.c:600 +Call Trace: + jffs2_mark_erased_block fs/jffs2/erase.c:460 [inline] + jffs2_erase_pending_blocks+0x688/0x1860 fs/jffs2/erase.c:118 + jffs2_garbage_collect_pass+0x638/0x1a00 fs/jffs2/gc.c:253 + jffs2_reserve_space+0x3f4/0xad0 fs/jffs2/nodemgmt.c:167 + jffs2_write_inode_range+0x246/0xb50 fs/jffs2/write.c:362 + jffs2_write_end+0x712/0x1110 fs/jffs2/file.c:302 + generic_perform_write+0x2c2/0x500 mm/filemap.c:3347 + __generic_file_write_iter+0x252/0x610 mm/filemap.c:3465 + generic_file_write_iter+0xdb/0x230 mm/filemap.c:3497 + call_write_iter include/linux/fs.h:2039 [inline] + do_iter_readv_writev+0x46d/0x750 fs/read_write.c:740 + do_iter_write+0x18c/0x710 fs/read_write.c:866 + vfs_writev+0x1db/0x6a0 fs/read_write.c:939 + do_pwritev fs/read_write.c:1036 [inline] + __do_sys_pwritev fs/read_write.c:1083 [inline] + __se_sys_pwritev fs/read_write.c:1078 [inline] + __x64_sys_pwritev+0x235/0x310 fs/read_write.c:1078 + do_syscall_64+0x30/0x40 arch/x86/entry/common.c:46 + entry_SYSCALL_64_after_hwframe+0x67/0xd1 + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Fixes: 2f785402f39b ("[JFFS2] Reduce visibility of raw_node_ref to upper layers of JFFS2 code.") +Fixes: f560928baa60 ("[JFFS2] Allocate node_ref for wasted space when skipping to page boundary") +Cc: stable@vger.kernel.org +Signed-off-by: Fedor Pchelkin <pchelkin@ispras.ru> +Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com> +Signed-off-by: Richard Weinberger <richard@nod.at> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/jffs2/erase.c | 4 +++- + fs/jffs2/scan.c | 4 +++- + 2 files changed, 6 insertions(+), 2 deletions(-) + +--- a/fs/jffs2/erase.c ++++ b/fs/jffs2/erase.c +@@ -425,7 +425,9 @@ static void jffs2_mark_erased_block(stru + .totlen = cpu_to_je32(c->cleanmarker_size) + }; + +- jffs2_prealloc_raw_node_refs(c, jeb, 1); ++ ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); ++ if (ret) ++ goto filebad; + + marker.hdr_crc = cpu_to_je32(crc32(0, &marker, sizeof(struct jffs2_unknown_node)-4)); + +--- a/fs/jffs2/scan.c ++++ b/fs/jffs2/scan.c +@@ -256,7 +256,9 @@ int jffs2_scan_medium(struct jffs2_sb_in + + jffs2_dbg(1, "%s(): Skipping %d bytes in nextblock to ensure page alignment\n", + __func__, skip); +- jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); ++ ret = jffs2_prealloc_raw_node_refs(c, c->nextblock, 1); ++ if (ret) ++ goto out; + jffs2_scan_dirty_space(c, c->nextblock, skip); + } + #endif diff --git a/queue-6.15/jffs2-check-that-raw-node-were-preallocated-before-writing-summary.patch b/queue-6.15/jffs2-check-that-raw-node-were-preallocated-before-writing-summary.patch new file mode 100644 index 00000000000..ad59dd44def --- /dev/null +++ b/queue-6.15/jffs2-check-that-raw-node-were-preallocated-before-writing-summary.patch @@ -0,0 +1,87 @@ +From ec9e6f22bce433b260ea226de127ec68042849b0 Mon Sep 17 00:00:00 2001 +From: Artem Sadovnikov <a.sadovnikov@ispras.ru> +Date: Fri, 7 Mar 2025 16:34:09 +0000 +Subject: jffs2: check that raw node were preallocated before writing summary + +From: Artem Sadovnikov <a.sadovnikov@ispras.ru> + +commit ec9e6f22bce433b260ea226de127ec68042849b0 upstream. + +Syzkaller detected a kernel bug in jffs2_link_node_ref, caused by fault +injection in jffs2_prealloc_raw_node_refs. jffs2_sum_write_sumnode doesn't +check return value of jffs2_prealloc_raw_node_refs and simply lets any +error propagate into jffs2_sum_write_data, which eventually calls +jffs2_link_node_ref in order to link the summary to an expectedly allocated +node. + +kernel BUG at fs/jffs2/nodelist.c:592! +invalid opcode: 0000 [#1] PREEMPT SMP KASAN NOPTI +CPU: 1 PID: 31277 Comm: syz-executor.7 Not tainted 6.1.128-syzkaller-00139-ge10f83ca10a1 #0 +Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.12.0-1 04/01/2014 +RIP: 0010:jffs2_link_node_ref+0x570/0x690 fs/jffs2/nodelist.c:592 +Call Trace: + <TASK> + jffs2_sum_write_data fs/jffs2/summary.c:841 [inline] + jffs2_sum_write_sumnode+0xd1a/0x1da0 fs/jffs2/summary.c:874 + jffs2_do_reserve_space+0xa18/0xd60 fs/jffs2/nodemgmt.c:388 + jffs2_reserve_space+0x55f/0xaa0 fs/jffs2/nodemgmt.c:197 + jffs2_write_inode_range+0x246/0xb50 fs/jffs2/write.c:362 + jffs2_write_end+0x726/0x15d0 fs/jffs2/file.c:301 + generic_perform_write+0x314/0x5d0 mm/filemap.c:3856 + __generic_file_write_iter+0x2ae/0x4d0 mm/filemap.c:3973 + generic_file_write_iter+0xe3/0x350 mm/filemap.c:4005 + call_write_iter include/linux/fs.h:2265 [inline] + do_iter_readv_writev+0x20f/0x3c0 fs/read_write.c:735 + do_iter_write+0x186/0x710 fs/read_write.c:861 + vfs_iter_write+0x70/0xa0 fs/read_write.c:902 + iter_file_splice_write+0x73b/0xc90 fs/splice.c:685 + do_splice_from fs/splice.c:763 [inline] + direct_splice_actor+0x10c/0x170 fs/splice.c:950 + splice_direct_to_actor+0x337/0xa10 fs/splice.c:896 + do_splice_direct+0x1a9/0x280 fs/splice.c:1002 + do_sendfile+0xb13/0x12c0 fs/read_write.c:1255 + __do_sys_sendfile64 fs/read_write.c:1323 [inline] + __se_sys_sendfile64 fs/read_write.c:1309 [inline] + __x64_sys_sendfile64+0x1cf/0x210 fs/read_write.c:1309 + do_syscall_x64 arch/x86/entry/common.c:51 [inline] + do_syscall_64+0x35/0x80 arch/x86/entry/common.c:81 + entry_SYSCALL_64_after_hwframe+0x6e/0xd8 + +Fix this issue by checking return value of jffs2_prealloc_raw_node_refs +before calling jffs2_sum_write_data. + +Found by Linux Verification Center (linuxtesting.org) with Syzkaller. + +Cc: stable@vger.kernel.org +Fixes: 2f785402f39b ("[JFFS2] Reduce visibility of raw_node_ref to upper layers of JFFS2 code.") +Signed-off-by: Artem Sadovnikov <a.sadovnikov@ispras.ru> +Reviewed-by: Zhihao Cheng <chengzhihao1@huawei.com> +Signed-off-by: Richard Weinberger <richard@nod.at> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/jffs2/summary.c | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +--- a/fs/jffs2/summary.c ++++ b/fs/jffs2/summary.c +@@ -858,7 +858,10 @@ int jffs2_sum_write_sumnode(struct jffs2 + spin_unlock(&c->erase_completion_lock); + + jeb = c->nextblock; +- jffs2_prealloc_raw_node_refs(c, jeb, 1); ++ ret = jffs2_prealloc_raw_node_refs(c, jeb, 1); ++ ++ if (ret) ++ goto out; + + if (!c->summary->sum_num || !c->summary->sum_list_head) { + JFFS2_WARNING("Empty summary info!!!\n"); +@@ -872,6 +875,8 @@ int jffs2_sum_write_sumnode(struct jffs2 + datasize += padsize; + + ret = jffs2_sum_write_data(c, jeb, infosize, datasize, padsize); ++ ++out: + spin_lock(&c->erase_completion_lock); + return ret; + } diff --git a/queue-6.15/ksmbd-fix-null-pointer-dereference-in-destroy_previous_session.patch b/queue-6.15/ksmbd-fix-null-pointer-dereference-in-destroy_previous_session.patch new file mode 100644 index 00000000000..740cd6e5ce0 --- /dev/null +++ b/queue-6.15/ksmbd-fix-null-pointer-dereference-in-destroy_previous_session.patch @@ -0,0 +1,50 @@ +From 7ac5b66acafcc9292fb935d7e03790f2b8b2dc0e Mon Sep 17 00:00:00 2001 +From: Namjae Jeon <linkinjeon@kernel.org> +Date: Fri, 13 Jun 2025 10:12:43 +0900 +Subject: ksmbd: fix null pointer dereference in destroy_previous_session + +From: Namjae Jeon <linkinjeon@kernel.org> + +commit 7ac5b66acafcc9292fb935d7e03790f2b8b2dc0e upstream. + +If client set ->PreviousSessionId on kerberos session setup stage, +NULL pointer dereference error will happen. Since sess->user is not +set yet, It can pass the user argument as NULL to destroy_previous_session. +sess->user will be set in ksmbd_krb5_authenticate(). So this patch move +calling destroy_previous_session() after ksmbd_krb5_authenticate(). + +Cc: stable@vger.kernel.org +Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-27391 +Signed-off-by: Namjae Jeon <linkinjeon@kernel.org> +Signed-off-by: Steve French <stfrench@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/smb/server/smb2pdu.c | 11 ++++++----- + 1 file changed, 6 insertions(+), 5 deletions(-) + +--- a/fs/smb/server/smb2pdu.c ++++ b/fs/smb/server/smb2pdu.c +@@ -1607,17 +1607,18 @@ static int krb5_authenticate(struct ksmb + out_len = work->response_sz - + (le16_to_cpu(rsp->SecurityBufferOffset) + 4); + +- /* Check previous session */ +- prev_sess_id = le64_to_cpu(req->PreviousSessionId); +- if (prev_sess_id && prev_sess_id != sess->id) +- destroy_previous_session(conn, sess->user, prev_sess_id); +- + retval = ksmbd_krb5_authenticate(sess, in_blob, in_len, + out_blob, &out_len); + if (retval) { + ksmbd_debug(SMB, "krb5 authentication failed\n"); + return -EINVAL; + } ++ ++ /* Check previous session */ ++ prev_sess_id = le64_to_cpu(req->PreviousSessionId); ++ if (prev_sess_id && prev_sess_id != sess->id) ++ destroy_previous_session(conn, sess->user, prev_sess_id); ++ + rsp->SecurityBufferLength = cpu_to_le16(out_len); + + if ((conn->sign || server_conf.enforced_signing) || diff --git a/queue-6.15/loongarch-avoid-using-r0-r1-as-mask-for-csrxchg.patch b/queue-6.15/loongarch-avoid-using-r0-r1-as-mask-for-csrxchg.patch new file mode 100644 index 00000000000..0551faf8ac2 --- /dev/null +++ b/queue-6.15/loongarch-avoid-using-r0-r1-as-mask-for-csrxchg.patch @@ -0,0 +1,90 @@ +From 52c22661c79a7b6af7fad9f77200738fc6c51878 Mon Sep 17 00:00:00 2001 +From: Huacai Chen <chenhuacai@loongson.cn> +Date: Fri, 30 May 2025 21:45:48 +0800 +Subject: LoongArch: Avoid using $r0/$r1 as "mask" for csrxchg + +From: Huacai Chen <chenhuacai@loongson.cn> + +commit 52c22661c79a7b6af7fad9f77200738fc6c51878 upstream. + +When building kernel with LLVM there are occasionally such errors: + +In file included from ./include/linux/spinlock.h:59: +In file included from ./include/linux/irqflags.h:17: +arch/loongarch/include/asm/irqflags.h:38:3: error: must not be $r0 or $r1 + 38 | "csrxchg %[val], %[mask], %[reg]\n\t" + | ^ +<inline asm>:1:16: note: instantiated into assembly here + 1 | csrxchg $a1, $ra, 0 + | ^ + +To prevent the compiler from allocating $r0 or $r1 for the "mask" of the +csrxchg instruction, the 'q' constraint must be used but Clang < 21 does +not support it. So force to use $t0 in the inline asm, in order to avoid +using $r0/$r1 while keeping the backward compatibility. + +Cc: stable@vger.kernel.org +Link: https://github.com/llvm/llvm-project/pull/141037 +Reviewed-by: Yanteng Si <si.yanteng@linux.dev> +Suggested-by: WANG Rui <wangrui@loongson.cn> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/loongarch/include/asm/irqflags.h | 16 ++++++++++++---- + 1 file changed, 12 insertions(+), 4 deletions(-) + +--- a/arch/loongarch/include/asm/irqflags.h ++++ b/arch/loongarch/include/asm/irqflags.h +@@ -14,40 +14,48 @@ + static inline void arch_local_irq_enable(void) + { + u32 flags = CSR_CRMD_IE; ++ register u32 mask asm("t0") = CSR_CRMD_IE; ++ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) +- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) ++ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + } + + static inline void arch_local_irq_disable(void) + { + u32 flags = 0; ++ register u32 mask asm("t0") = CSR_CRMD_IE; ++ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) +- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) ++ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + } + + static inline unsigned long arch_local_irq_save(void) + { + u32 flags = 0; ++ register u32 mask asm("t0") = CSR_CRMD_IE; ++ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) +- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) ++ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + return flags; + } + + static inline void arch_local_irq_restore(unsigned long flags) + { ++ register u32 mask asm("t0") = CSR_CRMD_IE; ++ + __asm__ __volatile__( + "csrxchg %[val], %[mask], %[reg]\n\t" + : [val] "+r" (flags) +- : [mask] "r" (CSR_CRMD_IE), [reg] "i" (LOONGARCH_CSR_CRMD) ++ : [mask] "r" (mask), [reg] "i" (LOONGARCH_CSR_CRMD) + : "memory"); + } + diff --git a/queue-6.15/loongarch-fix-panic-caused-by-null-pmd-in-huge_pte_offset.patch b/queue-6.15/loongarch-fix-panic-caused-by-null-pmd-in-huge_pte_offset.patch new file mode 100644 index 00000000000..137aebe935a --- /dev/null +++ b/queue-6.15/loongarch-fix-panic-caused-by-null-pmd-in-huge_pte_offset.patch @@ -0,0 +1,48 @@ +From ee084fa96123ede8b0563a1b5a9b23adc43cd50d Mon Sep 17 00:00:00 2001 +From: Tianyang Zhang <zhangtianyang@loongson.cn> +Date: Fri, 30 May 2025 21:45:57 +0800 +Subject: LoongArch: Fix panic caused by NULL-PMD in huge_pte_offset() + +From: Tianyang Zhang <zhangtianyang@loongson.cn> + +commit ee084fa96123ede8b0563a1b5a9b23adc43cd50d upstream. + +ERROR INFO: + +CPU 25 Unable to handle kernel paging request at virtual address 0x0 + ... + Call Trace: + [<900000000023c30c>] huge_pte_offset+0x3c/0x58 + [<900000000057fd4c>] hugetlb_follow_page_mask+0x74/0x438 + [<900000000051fee8>] __get_user_pages+0xe0/0x4c8 + [<9000000000522414>] faultin_page_range+0x84/0x380 + [<9000000000564e8c>] madvise_vma_behavior+0x534/0xa48 + [<900000000056689c>] do_madvise+0x1bc/0x3e8 + [<9000000000566df4>] sys_madvise+0x24/0x38 + [<90000000015b9e88>] do_syscall+0x78/0x98 + [<9000000000221f18>] handle_syscall+0xb8/0x158 + +In some cases, pmd may be NULL and rely on NULL as the return value for +processing, so it is necessary to determine this situation here. + +Cc: stable@vger.kernel.org +Fixes: bd51834d1cf6 ("LoongArch: Return NULL from huge_pte_offset() for invalid PMD") +Signed-off-by: Tianyang Zhang <zhangtianyang@loongson.cn> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/loongarch/mm/hugetlbpage.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +--- a/arch/loongarch/mm/hugetlbpage.c ++++ b/arch/loongarch/mm/hugetlbpage.c +@@ -47,7 +47,8 @@ pte_t *huge_pte_offset(struct mm_struct + pmd = pmd_offset(pud, addr); + } + } +- return pmd_none(pmdp_get(pmd)) ? NULL : (pte_t *) pmd; ++ ++ return (!pmd || pmd_none(pmdp_get(pmd))) ? NULL : (pte_t *) pmd; + } + + uint64_t pmd_to_entrylo(unsigned long pmd_val) diff --git a/queue-6.15/loongarch-vdso-correctly-use-asm-parameters-in-syscall-wrappers.patch b/queue-6.15/loongarch-vdso-correctly-use-asm-parameters-in-syscall-wrappers.patch new file mode 100644 index 00000000000..88f1298bbfa --- /dev/null +++ b/queue-6.15/loongarch-vdso-correctly-use-asm-parameters-in-syscall-wrappers.patch @@ -0,0 +1,82 @@ +From e242bbbb6d7ac7556aa1e358294dc7e3c82cc902 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= <thomas.weissschuh@linutronix.de> +Date: Thu, 5 Jun 2025 20:34:18 +0800 +Subject: LoongArch: vDSO: Correctly use asm parameters in syscall wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Thomas Weißschuh <thomas.weissschuh@linutronix.de> + +commit e242bbbb6d7ac7556aa1e358294dc7e3c82cc902 upstream. + +The syscall wrappers use the "a0" register for two different register +variables, both the first argument and the return value. Here the "ret" +variable is used as both input and output while the argument register is +only used as input. Clang treats the conflicting input parameters as an +undefined behaviour and optimizes away the argument assignment. + +The code seems to work by chance for the most part today but that may +change in the future. Specifically clock_gettime_fallback() fails with +clockids from 16 to 23, as implemented by the upcoming auxiliary clocks. + +Switch the "ret" register variable to a pure output, similar to the +other architectures' vDSO code. This works in both clang and GCC. + +Link: https://lore.kernel.org/lkml/20250602102825-42aa84f0-23f1-4d10-89fc-e8bbaffd291a@linutronix.de/ +Link: https://lore.kernel.org/lkml/20250519082042.742926976@linutronix.de/ +Fixes: c6b99bed6b8f ("LoongArch: Add VDSO and VSYSCALL support") +Fixes: 18efd0b10e0f ("LoongArch: vDSO: Wire up getrandom() vDSO implementation") +Cc: stable@vger.kernel.org +Reviewed-by: Nathan Chancellor <nathan@kernel.org> +Reviewed-by: Yanteng Si <si.yanteng@linux.dev> +Reviewed-by: WANG Xuerui <git@xen0n.name> +Reviewed-by: Xi Ruoyao <xry111@xry111.site> +Signed-off-by: Thomas Weißschuh <thomas.weissschuh@linutronix.de> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/loongarch/include/asm/vdso/getrandom.h | 2 +- + arch/loongarch/include/asm/vdso/gettimeofday.h | 6 +++--- + 2 files changed, 4 insertions(+), 4 deletions(-) + +--- a/arch/loongarch/include/asm/vdso/getrandom.h ++++ b/arch/loongarch/include/asm/vdso/getrandom.h +@@ -20,7 +20,7 @@ static __always_inline ssize_t getrandom + + asm volatile( + " syscall 0\n" +- : "+r" (ret) ++ : "=r" (ret) + : "r" (nr), "r" (buffer), "r" (len), "r" (flags) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t8", + "memory"); +--- a/arch/loongarch/include/asm/vdso/gettimeofday.h ++++ b/arch/loongarch/include/asm/vdso/gettimeofday.h +@@ -25,7 +25,7 @@ static __always_inline long gettimeofday + + asm volatile( + " syscall 0\n" +- : "+r" (ret) ++ : "=r" (ret) + : "r" (nr), "r" (tv), "r" (tz) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); +@@ -44,7 +44,7 @@ static __always_inline long clock_gettim + + asm volatile( + " syscall 0\n" +- : "+r" (ret) ++ : "=r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); +@@ -63,7 +63,7 @@ static __always_inline int clock_getres_ + + asm volatile( + " syscall 0\n" +- : "+r" (ret) ++ : "=r" (ret) + : "r" (nr), "r" (clkid), "r" (ts) + : "$t0", "$t1", "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", + "$t8", "memory"); diff --git a/queue-6.15/mm-close-theoretical-race-where-stale-tlb-entries-could-linger.patch b/queue-6.15/mm-close-theoretical-race-where-stale-tlb-entries-could-linger.patch new file mode 100644 index 00000000000..5a891efd96d --- /dev/null +++ b/queue-6.15/mm-close-theoretical-race-where-stale-tlb-entries-could-linger.patch @@ -0,0 +1,95 @@ +From 383c4613c67c26e90e8eebb72e3083457d02033f Mon Sep 17 00:00:00 2001 +From: Ryan Roberts <ryan.roberts@arm.com> +Date: Fri, 6 Jun 2025 10:28:07 +0100 +Subject: mm: close theoretical race where stale TLB entries could linger + +From: Ryan Roberts <ryan.roberts@arm.com> + +commit 383c4613c67c26e90e8eebb72e3083457d02033f upstream. + +Commit 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a +parallel reclaim leaving stale TLB entries") described a theoretical race +as such: + + +""" +Nadav Amit identified a theoretical race between page reclaim and mprotect +due to TLB flushes being batched outside of the PTL being held. + +He described the race as follows: + + CPU0 CPU1 + ---- ---- + user accesses memory using RW PTE + [PTE now cached in TLB] + try_to_unmap_one() + ==> ptep_get_and_clear() + ==> set_tlb_ubc_flush_pending() + mprotect(addr, PROT_READ) + ==> change_pte_range() + ==> [ PTE non-present - no flush ] + + user writes using cached RW PTE + ... + + try_to_unmap_flush() + +The same type of race exists for reads when protecting for PROT_NONE and +also exists for operations that can leave an old TLB entry behind such as +munmap, mremap and madvise. +""" + +The solution was to introduce flush_tlb_batched_pending() and call it +under the PTL from mprotect/madvise/munmap/mremap to complete any pending +tlb flushes. + +However, while madvise_free_pte_range() and +madvise_cold_or_pageout_pte_range() were both retro-fitted to call +flush_tlb_batched_pending() immediately after initially acquiring the PTL, +they both temporarily release the PTL to split a large folio if they +stumble upon one. In this case, where re-acquiring the PTL +flush_tlb_batched_pending() must be called again, but it previously was +not. Let's fix that. + +There are 2 Fixes: tags here: the first is the commit that fixed +madvise_free_pte_range(). The second is the commit that added +madvise_cold_or_pageout_pte_range(), which looks like it copy/pasted the +faulty pattern from madvise_free_pte_range(). + +This is a theoretical bug discovered during code review. + +Link: https://lkml.kernel.org/r/20250606092809.4194056-1-ryan.roberts@arm.com +Fixes: 3ea277194daa ("mm, mprotect: flush TLB if potentially racing with a parallel reclaim leaving stale TLB entries") +Fixes: 9c276cc65a58 ("mm: introduce MADV_COLD") +Signed-off-by: Ryan Roberts <ryan.roberts@arm.com> +Reviewed-by: Jann Horn <jannh@google.com> +Acked-by: David Hildenbrand <david@redhat.com> +Cc: Liam Howlett <liam.howlett@oracle.com> +Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Cc: Mel Gorman <mgorman <mgorman@suse.de> +Cc: Vlastimil Babka <vbabka@suse.cz> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/madvise.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/mm/madvise.c ++++ b/mm/madvise.c +@@ -503,6 +503,7 @@ restart: + pte_offset_map_lock(mm, pmd, addr, &ptl); + if (!start_pte) + break; ++ flush_tlb_batched_pending(mm); + arch_enter_lazy_mmu_mode(); + if (!err) + nr = 0; +@@ -736,6 +737,7 @@ static int madvise_free_pte_range(pmd_t + start_pte = pte; + if (!start_pte) + break; ++ flush_tlb_batched_pending(mm); + arch_enter_lazy_mmu_mode(); + if (!err) + nr = 0; diff --git a/queue-6.15/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch b/queue-6.15/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch new file mode 100644 index 00000000000..c433d8448ca --- /dev/null +++ b/queue-6.15/mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch @@ -0,0 +1,98 @@ +From 0cf4b1687a187ba9247c71721d8b064634eda1f7 Mon Sep 17 00:00:00 2001 +From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Date: Fri, 6 Jun 2025 13:50:32 +0100 +Subject: mm/vma: reset VMA iterator on commit_merge() OOM failure + +From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> + +commit 0cf4b1687a187ba9247c71721d8b064634eda1f7 upstream. + +While an OOM failure in commit_merge() isn't really feasible due to the +allocation which might fail (a maple tree pre-allocation) being 'too small +to fail', we do need to handle this case correctly regardless. + +In vma_merge_existing_range(), we can theoretically encounter failures +which result in an OOM error in two ways - firstly dup_anon_vma() might +fail with an OOM error, and secondly commit_merge() failing, ultimately, +to pre-allocate a maple tree node. + +The abort logic for dup_anon_vma() resets the VMA iterator to the initial +range, ensuring that any logic looping on this iterator will correctly +proceed to the next VMA. + +However the commit_merge() abort logic does not do the same thing. This +resulted in a syzbot report occurring because mlockall() iterates through +VMAs, is tolerant of errors, but ended up with an incorrect previous VMA +being specified due to incorrect iterator state. + +While making this change, it became apparent we are duplicating logic - +the logic introduced in commit 41e6ddcaa0f1 ("mm/vma: add give_up_on_oom +option on modify/merge, use in uffd release") duplicates the +vmg->give_up_on_oom check in both abort branches. + +Additionally, we observe that we can perform the anon_dup check safely on +dup_anon_vma() failure, as this will not be modified should this call +fail. + +Finally, we need to reset the iterator in both cases, so now we can simply +use the exact same code to abort for both. + +We remove the VM_WARN_ON(err != -ENOMEM) as it would be silly for this to +be otherwise and it allows us to implement the abort check more neatly. + +Link: https://lkml.kernel.org/r/20250606125032.164249-1-lorenzo.stoakes@oracle.com +Fixes: 47b16d0462a4 ("mm: abort vma_modify() on merge out of memory failure") +Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> +Reported-by: syzbot+d16409ea9ecc16ed261a@syzkaller.appspotmail.com +Closes: https://lore.kernel.org/linux-mm/6842cc67.a00a0220.29ac89.003b.GAE@google.com/ +Reviewed-by: Pedro Falcato <pfalcato@suse.de> +Reviewed-by: Vlastimil Babka <vbabka@suse.cz> +Reviewed-by: Liam R. Howlett <Liam.Howlett@oracle.com> +Cc: Jann Horn <jannh@google.com> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/vma.c | 22 ++++------------------ + 1 file changed, 4 insertions(+), 18 deletions(-) + +--- a/mm/vma.c ++++ b/mm/vma.c +@@ -927,26 +927,9 @@ static __must_check struct vm_area_struc + err = dup_anon_vma(next, middle, &anon_dup); + } + +- if (err) ++ if (err || commit_merge(vmg)) + goto abort; + +- err = commit_merge(vmg); +- if (err) { +- VM_WARN_ON(err != -ENOMEM); +- +- if (anon_dup) +- unlink_anon_vmas(anon_dup); +- +- /* +- * We've cleaned up any cloned anon_vma's, no VMAs have been +- * modified, no harm no foul if the user requests that we not +- * report this and just give up, leaving the VMAs unmerged. +- */ +- if (!vmg->give_up_on_oom) +- vmg->state = VMA_MERGE_ERROR_NOMEM; +- return NULL; +- } +- + khugepaged_enter_vma(vmg->target, vmg->flags); + vmg->state = VMA_MERGE_SUCCESS; + return vmg->target; +@@ -955,6 +938,9 @@ abort: + vma_iter_set(vmg->vmi, start); + vma_iter_load(vmg->vmi); + ++ if (anon_dup) ++ unlink_anon_vmas(anon_dup); ++ + /* + * This means we have failed to clone anon_vma's correctly, but no + * actual changes to VMAs have occurred, so no harm no foul - if the diff --git a/queue-6.15/net-clear-the-dst-when-changing-skb-protocol.patch b/queue-6.15/net-clear-the-dst-when-changing-skb-protocol.patch new file mode 100644 index 00000000000..a468f9d4a75 --- /dev/null +++ b/queue-6.15/net-clear-the-dst-when-changing-skb-protocol.patch @@ -0,0 +1,118 @@ +From ba9db6f907ac02215e30128770f85fbd7db2fcf9 Mon Sep 17 00:00:00 2001 +From: Jakub Kicinski <kuba@kernel.org> +Date: Mon, 9 Jun 2025 17:12:44 -0700 +Subject: net: clear the dst when changing skb protocol +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Jakub Kicinski <kuba@kernel.org> + +commit ba9db6f907ac02215e30128770f85fbd7db2fcf9 upstream. + +A not-so-careful NAT46 BPF program can crash the kernel +if it indiscriminately flips ingress packets from v4 to v6: + + BUG: kernel NULL pointer dereference, address: 0000000000000000 + ip6_rcv_core (net/ipv6/ip6_input.c:190:20) + ipv6_rcv (net/ipv6/ip6_input.c:306:8) + process_backlog (net/core/dev.c:6186:4) + napi_poll (net/core/dev.c:6906:9) + net_rx_action (net/core/dev.c:7028:13) + do_softirq (kernel/softirq.c:462:3) + netif_rx (net/core/dev.c:5326:3) + dev_loopback_xmit (net/core/dev.c:4015:2) + ip_mc_finish_output (net/ipv4/ip_output.c:363:8) + NF_HOOK (./include/linux/netfilter.h:314:9) + ip_mc_output (net/ipv4/ip_output.c:400:5) + dst_output (./include/net/dst.h:459:9) + ip_local_out (net/ipv4/ip_output.c:130:9) + ip_send_skb (net/ipv4/ip_output.c:1496:8) + udp_send_skb (net/ipv4/udp.c:1040:8) + udp_sendmsg (net/ipv4/udp.c:1328:10) + +The output interface has a 4->6 program attached at ingress. +We try to loop the multicast skb back to the sending socket. +Ingress BPF runs as part of netif_rx(), pushes a valid v6 hdr +and changes skb->protocol to v6. We enter ip6_rcv_core which +tries to use skb_dst(). But the dst is still an IPv4 one left +after IPv4 mcast output. + +Clear the dst in all BPF helpers which change the protocol. +Try to preserve metadata dsts, those may carry non-routing +metadata. + +Cc: stable@vger.kernel.org +Reviewed-by: Maciej Żenczykowski <maze@google.com> +Acked-by: Daniel Borkmann <daniel@iogearbox.net> +Fixes: d219df60a70e ("bpf: Add ipip6 and ip6ip decap support for bpf_skb_adjust_room()") +Fixes: 1b00e0dfe7d0 ("bpf: update skb->protocol in bpf_skb_net_grow") +Fixes: 6578171a7ff0 ("bpf: add bpf_skb_change_proto helper") +Reviewed-by: Willem de Bruijn <willemb@google.com> +Link: https://patch.msgid.link/20250610001245.1981782-1-kuba@kernel.org +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/core/filter.c | 19 +++++++++++++------ + 1 file changed, 13 insertions(+), 6 deletions(-) + +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -3233,6 +3233,13 @@ static const struct bpf_func_proto bpf_s + .arg1_type = ARG_PTR_TO_CTX, + }; + ++static void bpf_skb_change_protocol(struct sk_buff *skb, u16 proto) ++{ ++ skb->protocol = htons(proto); ++ if (skb_valid_dst(skb)) ++ skb_dst_drop(skb); ++} ++ + static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len) + { + /* Caller already did skb_cow() with len as headroom, +@@ -3329,7 +3336,7 @@ static int bpf_skb_proto_4_to_6(struct s + } + } + +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + skb_clear_hash(skb); + + return 0; +@@ -3359,7 +3366,7 @@ static int bpf_skb_proto_6_to_4(struct s + } + } + +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + skb_clear_hash(skb); + + return 0; +@@ -3550,10 +3557,10 @@ static int bpf_skb_net_grow(struct sk_bu + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6) +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4) +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + } + + if (skb_is_gso(skb)) { +@@ -3606,10 +3613,10 @@ static int bpf_skb_net_shrink(struct sk_ + /* Match skb->protocol to new outer l3 protocol */ + if (skb->protocol == htons(ETH_P_IP) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6) +- skb->protocol = htons(ETH_P_IPV6); ++ bpf_skb_change_protocol(skb, ETH_P_IPV6); + else if (skb->protocol == htons(ETH_P_IPV6) && + flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4) +- skb->protocol = htons(ETH_P_IP); ++ bpf_skb_change_protocol(skb, ETH_P_IP); + + if (skb_is_gso(skb)) { + struct skb_shared_info *shinfo = skb_shinfo(skb); diff --git a/queue-6.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch b/queue-6.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch new file mode 100644 index 00000000000..5c226d4e7bd --- /dev/null +++ b/queue-6.15/net_sched-sch_sfq-reject-invalid-perturb-period.patch @@ -0,0 +1,72 @@ +From 7ca52541c05c832d32b112274f81a985101f9ba8 Mon Sep 17 00:00:00 2001 +From: Eric Dumazet <edumazet@google.com> +Date: Wed, 11 Jun 2025 08:35:01 +0000 +Subject: net_sched: sch_sfq: reject invalid perturb period + +From: Eric Dumazet <edumazet@google.com> + +commit 7ca52541c05c832d32b112274f81a985101f9ba8 upstream. + +Gerrard Tai reported that SFQ perturb_period has no range check yet, +and this can be used to trigger a race condition fixed in a separate patch. + +We want to make sure ctl->perturb_period * HZ will not overflow +and is positive. + +Tested: + +tc qd add dev lo root sfq perturb -10 # negative value : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 1000000000 # too big : error +Error: sch_sfq: invalid perturb period. + +tc qd add dev lo root sfq perturb 2000000 # acceptable value +tc -s -d qd sh dev lo +qdisc sfq 8005: root refcnt 2 limit 127p quantum 64Kb depth 127 flows 128 divisor 1024 perturb 2000000sec + Sent 0 bytes 0 pkt (dropped 0, overlimits 0 requeues 0) + backlog 0b 0p requeues 0 + +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Reported-by: Gerrard Tai <gerrard.tai@starlabs.sg> +Signed-off-by: Eric Dumazet <edumazet@google.com> +Cc: stable@vger.kernel.org +Link: https://patch.msgid.link/20250611083501.1810459-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 10 ++++++++-- + 1 file changed, 8 insertions(+), 2 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -656,6 +656,14 @@ static int sfq_change(struct Qdisc *sch, + NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); + return -EINVAL; + } ++ ++ if (ctl->perturb_period < 0 || ++ ctl->perturb_period > INT_MAX / HZ) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid perturb period"); ++ return -EINVAL; ++ } ++ perturb_period = ctl->perturb_period * HZ; ++ + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) + return -EINVAL; +@@ -672,14 +680,12 @@ static int sfq_change(struct Qdisc *sch, + headdrop = q->headdrop; + maxdepth = q->maxdepth; + maxflows = q->maxflows; +- perturb_period = q->perturb_period; + quantum = q->quantum; + flags = q->flags; + + /* update and validate configuration */ + if (ctl->quantum) + quantum = ctl->quantum; +- perturb_period = ctl->perturb_period * HZ; + if (ctl->flows) + maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { diff --git a/queue-6.15/nvme-always-punt-polled-uring_cmd-end_io-work-to-task_work.patch b/queue-6.15/nvme-always-punt-polled-uring_cmd-end_io-work-to-task_work.patch new file mode 100644 index 00000000000..38ed127c26f --- /dev/null +++ b/queue-6.15/nvme-always-punt-polled-uring_cmd-end_io-work-to-task_work.patch @@ -0,0 +1,59 @@ +From 9ce6c9875f3e995be5fd720b65835291f8a609b1 Mon Sep 17 00:00:00 2001 +From: Jens Axboe <axboe@kernel.dk> +Date: Fri, 13 Jun 2025 13:37:41 -0600 +Subject: nvme: always punt polled uring_cmd end_io work to task_work + +From: Jens Axboe <axboe@kernel.dk> + +commit 9ce6c9875f3e995be5fd720b65835291f8a609b1 upstream. + +Currently NVMe uring_cmd completions will complete locally, if they are +polled. This is done because those completions are always invoked from +task context. And while that is true, there's no guarantee that it's +invoked under the right ring context, or even task. If someone does +NVMe passthrough via multiple threads and with a limited number of +poll queues, then ringA may find completions from ringB. For that case, +completing the request may not be sound. + +Always just punt the passthrough completions via task_work, which will +redirect the completion, if needed. + +Cc: stable@vger.kernel.org +Fixes: 585079b6e425 ("nvme: wire up async polling for io passthrough commands") +Signed-off-by: Jens Axboe <axboe@kernel.dk> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/nvme/host/ioctl.c | 21 +++++++-------------- + 1 file changed, 7 insertions(+), 14 deletions(-) + +--- a/drivers/nvme/host/ioctl.c ++++ b/drivers/nvme/host/ioctl.c +@@ -429,21 +429,14 @@ static enum rq_end_io_ret nvme_uring_cmd + pdu->result = le64_to_cpu(nvme_req(req)->result.u64); + + /* +- * For iopoll, complete it directly. Note that using the uring_cmd +- * helper for this is safe only because we check blk_rq_is_poll(). +- * As that returns false if we're NOT on a polled queue, then it's +- * safe to use the polled completion helper. +- * +- * Otherwise, move the completion to task work. ++ * IOPOLL could potentially complete this request directly, but ++ * if multiple rings are polling on the same queue, then it's possible ++ * for one ring to find completions for another ring. Punting the ++ * completion via task_work will always direct it to the right ++ * location, rather than potentially complete requests for ringA ++ * under iopoll invocations from ringB. + */ +- if (blk_rq_is_poll(req)) { +- if (pdu->bio) +- blk_rq_unmap_user(pdu->bio); +- io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status); +- } else { +- io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); +- } +- ++ io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb); + return RQ_END_IO_FREE; + } + diff --git a/queue-6.15/platform-loongarch-laptop-add-backlight-power-control-support.patch b/queue-6.15/platform-loongarch-laptop-add-backlight-power-control-support.patch new file mode 100644 index 00000000000..885488d758c --- /dev/null +++ b/queue-6.15/platform-loongarch-laptop-add-backlight-power-control-support.patch @@ -0,0 +1,145 @@ +From 53c762b47f726e4079a1f06f684bce2fc0d56fba Mon Sep 17 00:00:00 2001 +From: Yao Zi <ziyao@disroot.org> +Date: Thu, 5 Jun 2025 20:34:46 +0800 +Subject: platform/loongarch: laptop: Add backlight power control support + +From: Yao Zi <ziyao@disroot.org> + +commit 53c762b47f726e4079a1f06f684bce2fc0d56fba upstream. + +loongson_laptop_turn_{on,off}_backlight() are designed for controlling +the power of the backlight, but they aren't really used in the driver +previously. + +Unify these two functions since they only differ in arguments passed to +ACPI method, and wire up loongson_laptop_backlight_update() to update +the power state of the backlight as well. Tested on the TongFang L860-T2 +Loongson-3A5000 laptop. + +Cc: stable@vger.kernel.org +Fixes: 6246ed09111f ("LoongArch: Add ACPI-based generic laptop driver") +Signed-off-by: Yao Zi <ziyao@disroot.org> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/loongarch/loongson-laptop.c | 73 +++++++++++++-------------- + 1 file changed, 37 insertions(+), 36 deletions(-) + +--- a/drivers/platform/loongarch/loongson-laptop.c ++++ b/drivers/platform/loongarch/loongson-laptop.c +@@ -56,8 +56,7 @@ static struct input_dev *generic_inputde + static acpi_handle hotkey_handle; + static struct key_entry hotkey_keycode_map[GENERIC_HOTKEY_MAP_MAX]; + +-int loongson_laptop_turn_on_backlight(void); +-int loongson_laptop_turn_off_backlight(void); ++static bool bl_powered; + static int loongson_laptop_backlight_update(struct backlight_device *bd); + + /* 2. ACPI Helpers and device model */ +@@ -354,16 +353,42 @@ static int ec_backlight_level(u8 level) + return level; + } + ++static int ec_backlight_set_power(bool state) ++{ ++ int status; ++ union acpi_object arg0 = { ACPI_TYPE_INTEGER }; ++ struct acpi_object_list args = { 1, &arg0 }; ++ ++ arg0.integer.value = state; ++ status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); ++ if (ACPI_FAILURE(status)) { ++ pr_info("Loongson lvds error: 0x%x\n", status); ++ return -EIO; ++ } ++ ++ return 0; ++} ++ + static int loongson_laptop_backlight_update(struct backlight_device *bd) + { +- int lvl = ec_backlight_level(bd->props.brightness); ++ bool target_powered = !backlight_is_blank(bd); ++ int ret = 0, lvl = ec_backlight_level(bd->props.brightness); + + if (lvl < 0) + return -EIO; ++ + if (ec_set_brightness(lvl)) + return -EIO; + +- return 0; ++ if (target_powered != bl_powered) { ++ ret = ec_backlight_set_power(target_powered); ++ if (ret < 0) ++ return ret; ++ ++ bl_powered = target_powered; ++ } ++ ++ return ret; + } + + static int loongson_laptop_get_brightness(struct backlight_device *bd) +@@ -384,7 +409,7 @@ static const struct backlight_ops backli + + static int laptop_backlight_register(void) + { +- int status = 0; ++ int status = 0, ret; + struct backlight_properties props; + + memset(&props, 0, sizeof(props)); +@@ -392,44 +417,20 @@ static int laptop_backlight_register(voi + if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d")) + return -EIO; + ++ ret = ec_backlight_set_power(true); ++ if (ret) ++ return ret; ++ ++ bl_powered = true; ++ + props.max_brightness = status; + props.brightness = ec_get_brightness(); ++ props.power = BACKLIGHT_POWER_ON; + props.type = BACKLIGHT_PLATFORM; + + backlight_device_register("loongson_laptop", + NULL, NULL, &backlight_laptop_ops, &props); + +- return 0; +-} +- +-int loongson_laptop_turn_on_backlight(void) +-{ +- int status; +- union acpi_object arg0 = { ACPI_TYPE_INTEGER }; +- struct acpi_object_list args = { 1, &arg0 }; +- +- arg0.integer.value = 1; +- status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); +- if (ACPI_FAILURE(status)) { +- pr_info("Loongson lvds error: 0x%x\n", status); +- return -ENODEV; +- } +- +- return 0; +-} +- +-int loongson_laptop_turn_off_backlight(void) +-{ +- int status; +- union acpi_object arg0 = { ACPI_TYPE_INTEGER }; +- struct acpi_object_list args = { 1, &arg0 }; +- +- arg0.integer.value = 0; +- status = acpi_evaluate_object(NULL, "\\BLSW", &args, NULL); +- if (ACPI_FAILURE(status)) { +- pr_info("Loongson lvds error: 0x%x\n", status); +- return -ENODEV; +- } + + return 0; + } diff --git a/queue-6.15/platform-loongarch-laptop-get-brightness-setting-from-ec-on-probe.patch b/queue-6.15/platform-loongarch-laptop-get-brightness-setting-from-ec-on-probe.patch new file mode 100644 index 00000000000..39a8338274a --- /dev/null +++ b/queue-6.15/platform-loongarch-laptop-get-brightness-setting-from-ec-on-probe.patch @@ -0,0 +1,41 @@ +From 1205088fd0393bd9eae96b62bf1e4b9eb1b73edf Mon Sep 17 00:00:00 2001 +From: Yao Zi <ziyao@disroot.org> +Date: Thu, 5 Jun 2025 20:34:46 +0800 +Subject: platform/loongarch: laptop: Get brightness setting from EC on probe + +From: Yao Zi <ziyao@disroot.org> + +commit 1205088fd0393bd9eae96b62bf1e4b9eb1b73edf upstream. + +Previously during driver probe, 1 is unconditionally taken as current +brightness value and set to props.brightness, which will be considered +as the brightness before suspend and restored to EC on resume. Since a +brightness value of 1 almost never matches EC's state on coldboot (my +laptop's EC defaults to 80), this causes surprising changes of screen +brightness on the first time of resume after coldboot. + +Let's get brightness from EC and take it as the current brightness on +probe of the laptop driver to avoid the surprising behavior. Tested on +TongFang L860-T2 Loongson-3A5000 laptop. + +Cc: stable@vger.kernel.org +Fixes: 6246ed09111f ("LoongArch: Add ACPI-based generic laptop driver") +Signed-off-by: Yao Zi <ziyao@disroot.org> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/loongarch/loongson-laptop.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/platform/loongarch/loongson-laptop.c ++++ b/drivers/platform/loongarch/loongson-laptop.c +@@ -392,8 +392,8 @@ static int laptop_backlight_register(voi + if (!acpi_evalf(hotkey_handle, &status, "ECLL", "d")) + return -EIO; + +- props.brightness = 1; + props.max_brightness = status; ++ props.brightness = ec_get_brightness(); + props.type = BACKLIGHT_PLATFORM; + + backlight_device_register("loongson_laptop", diff --git a/queue-6.15/platform-loongarch-laptop-unregister-generic_sub_drivers-on-exit.patch b/queue-6.15/platform-loongarch-laptop-unregister-generic_sub_drivers-on-exit.patch new file mode 100644 index 00000000000..2f401f3c99d --- /dev/null +++ b/queue-6.15/platform-loongarch-laptop-unregister-generic_sub_drivers-on-exit.patch @@ -0,0 +1,46 @@ +From f78fb2576f22b0ba5297412a9aa7691920666c41 Mon Sep 17 00:00:00 2001 +From: Yao Zi <ziyao@disroot.org> +Date: Thu, 5 Jun 2025 20:34:46 +0800 +Subject: platform/loongarch: laptop: Unregister generic_sub_drivers on exit + +From: Yao Zi <ziyao@disroot.org> + +commit f78fb2576f22b0ba5297412a9aa7691920666c41 upstream. + +Without correct unregisteration, ACPI notify handlers and the platform +drivers installed by generic_subdriver_init() will become dangling +references after removing the loongson_laptop module, triggering various +kernel faults when a hotkey is sent or at kernel shutdown. + +Cc: stable@vger.kernel.org +Fixes: 6246ed09111f ("LoongArch: Add ACPI-based generic laptop driver") +Signed-off-by: Yao Zi <ziyao@disroot.org> +Signed-off-by: Huacai Chen <chenhuacai@loongson.cn> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/loongarch/loongson-laptop.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/drivers/platform/loongarch/loongson-laptop.c ++++ b/drivers/platform/loongarch/loongson-laptop.c +@@ -611,11 +611,17 @@ static int __init generic_acpi_laptop_in + + static void __exit generic_acpi_laptop_exit(void) + { ++ int i; ++ + if (generic_inputdev) { +- if (input_device_registered) +- input_unregister_device(generic_inputdev); +- else ++ if (!input_device_registered) { + input_free_device(generic_inputdev); ++ } else { ++ input_unregister_device(generic_inputdev); ++ ++ for (i = 0; i < ARRAY_SIZE(generic_sub_drivers); i++) ++ generic_subdriver_exit(&generic_sub_drivers[i]); ++ } + } + } + diff --git a/queue-6.15/platform-x86-ideapad-laptop-use-usleep_range-for-ec-polling.patch b/queue-6.15/platform-x86-ideapad-laptop-use-usleep_range-for-ec-polling.patch new file mode 100644 index 00000000000..7389140bb22 --- /dev/null +++ b/queue-6.15/platform-x86-ideapad-laptop-use-usleep_range-for-ec-polling.patch @@ -0,0 +1,122 @@ +From 5808c34216954cd832bd4b8bc52dfa287049122b Mon Sep 17 00:00:00 2001 +From: Rong Zhang <i@rong.moe> +Date: Mon, 26 May 2025 04:18:07 +0800 +Subject: platform/x86: ideapad-laptop: use usleep_range() for EC polling +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Rong Zhang <i@rong.moe> + +commit 5808c34216954cd832bd4b8bc52dfa287049122b upstream. + +It was reported that ideapad-laptop sometimes causes some recent (since +2024) Lenovo ThinkBook models shut down when: + - suspending/resuming + - closing/opening the lid + - (dis)connecting a charger + - reading/writing some sysfs properties, e.g., fan_mode, touchpad + - pressing down some Fn keys, e.g., Brightness Up/Down (Fn+F5/F6) + - (seldom) loading the kmod + +The issue has existed since the launch day of such models, and there +have been some out-of-tree workarounds (see Link:) for the issue. One +disables some functionalities, while another one simply shortens +IDEAPAD_EC_TIMEOUT. The disabled functionalities have read_ec_data() in +their call chains, which calls schedule() between each poll. + +It turns out that these models suffer from the indeterminacy of +schedule() because of their low tolerance for being polled too +frequently. Sometimes schedule() returns too soon due to the lack of +ready tasks, causing the margin between two polls to be too short. +In this case, the command is somehow aborted, and too many subsequent +polls (they poll for "nothing!") may eventually break the state machine +in the EC, resulting in a hard shutdown. This explains why shortening +IDEAPAD_EC_TIMEOUT works around the issue - it reduces the total number +of polls sent to the EC. + +Even when it doesn't lead to a shutdown, frequent polls may also disturb +the ongoing operation and notably delay (+ 10-20ms) the availability of +EC response. This phenomenon is unlikely to be exclusive to the models +mentioned above, so dropping the schedule() manner should also slightly +improve the responsiveness of various models. + +Fix these issues by migrating to usleep_range(150, 300). The interval is +chosen to add some margin to the minimal 50us and considering EC +responses are usually available after 150-2500us based on my test. It +should be enough to fix these issues on all models subject to the EC bug +without introducing latency on other models. + +Tested on ThinkBook 14 G7+ ASP and solved both issues. No regression was +introduced in the test on a model without the EC bug (ThinkBook X IMH, +thanks Eric). + +Link: https://github.com/ty2/ideapad-laptop-tb2024g6plus/commit/6c5db18c9e8109873c2c90a7d2d7f552148f7ad4 +Link: https://github.com/ferstar/ideapad-laptop-tb/commit/42d1e68e5009529d31bd23f978f636f79c023e80 +Closes: https://bugzilla.kernel.org/show_bug.cgi?id=218771 +Fixes: 6a09f21dd1e2 ("ideapad: add ACPI helpers") +Cc: stable@vger.kernel.org +Tested-by: Felix Yan <felixonmars@archlinux.org> +Tested-by: Eric Long <i@hack3r.moe> +Tested-by: Jianfei Zhang <zhangjianfei3@gmail.com> +Tested-by: Mingcong Bai <jeffbai@aosc.io> +Tested-by: Minh Le <minhld139@gmail.com> +Tested-by: Sicheng Zhu <Emmet_Z@outlook.com> +Signed-off-by: Rong Zhang <i@rong.moe> +Link: https://lore.kernel.org/r/20250525201833.37939-1-i@rong.moe +Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/x86/ideapad-laptop.c | 19 +++++++++++++++++-- + 1 file changed, 17 insertions(+), 2 deletions(-) + +--- a/drivers/platform/x86/ideapad-laptop.c ++++ b/drivers/platform/x86/ideapad-laptop.c +@@ -15,6 +15,7 @@ + #include <linux/bug.h> + #include <linux/cleanup.h> + #include <linux/debugfs.h> ++#include <linux/delay.h> + #include <linux/device.h> + #include <linux/dmi.h> + #include <linux/i8042.h> +@@ -267,6 +268,20 @@ static void ideapad_shared_exit(struct i + */ + #define IDEAPAD_EC_TIMEOUT 200 /* in ms */ + ++/* ++ * Some models (e.g., ThinkBook since 2024) have a low tolerance for being ++ * polled too frequently. Doing so may break the state machine in the EC, ++ * resulting in a hard shutdown. ++ * ++ * It is also observed that frequent polls may disturb the ongoing operation ++ * and notably delay the availability of EC response. ++ * ++ * These values are used as the delay before the first poll and the interval ++ * between subsequent polls to solve the above issues. ++ */ ++#define IDEAPAD_EC_POLL_MIN_US 150 ++#define IDEAPAD_EC_POLL_MAX_US 300 ++ + static int eval_int(acpi_handle handle, const char *name, unsigned long *res) + { + unsigned long long result; +@@ -383,7 +398,7 @@ static int read_ec_data(acpi_handle hand + end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; + + while (time_before(jiffies, end_jiffies)) { +- schedule(); ++ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); + + err = eval_vpcr(handle, 1, &val); + if (err) +@@ -414,7 +429,7 @@ static int write_ec_cmd(acpi_handle hand + end_jiffies = jiffies + msecs_to_jiffies(IDEAPAD_EC_TIMEOUT) + 1; + + while (time_before(jiffies, end_jiffies)) { +- schedule(); ++ usleep_range(IDEAPAD_EC_POLL_MIN_US, IDEAPAD_EC_POLL_MAX_US); + + err = eval_vpcr(handle, 1, &val); + if (err) diff --git a/queue-6.15/platform-x86-intel-uncore-freq-fail-module-load-when-plat_info-is-null.patch b/queue-6.15/platform-x86-intel-uncore-freq-fail-module-load-when-plat_info-is-null.patch new file mode 100644 index 00000000000..8bcc26ac1b3 --- /dev/null +++ b/queue-6.15/platform-x86-intel-uncore-freq-fail-module-load-when-plat_info-is-null.patch @@ -0,0 +1,59 @@ +From 685f88c72a0c4d12d3bd2ff50286938f14486f85 Mon Sep 17 00:00:00 2001 +From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +Date: Fri, 6 Jun 2025 13:53:00 -0700 +Subject: platform/x86/intel-uncore-freq: Fail module load when plat_info is NULL +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> + +commit 685f88c72a0c4d12d3bd2ff50286938f14486f85 upstream. + +Address a Smatch static checker warning regarding an unchecked +dereference in the function call: +set_cdie_id(i, cluster_info, plat_info) +when plat_info is NULL. + +Instead of addressing this one case, in general if plat_info is NULL +then it can cause other issues. For example in a two package system it +will give warning for duplicate sysfs entry as package ID will be always +zero for both packages when creating string for attribute group name. + +plat_info is derived from TPMI ID TPMI_BUS_INFO, which is integral to +the core TPMI design. Therefore, it should not be NULL on a production +platform. Consequently, the module should fail to load if plat_info is +NULL. + +Reported-by: Dan Carpenter <dan.carpenter@linaro.org> +Closes: https://lore.kernel.org/platform-driver-x86/aEKvGCLd1qmX04Tc@stanley.mountain/T/#u +Fixes: 8a54e2253e4c ("platform/x86/intel-uncore-freq: Uncore frequency control via TPMI") +Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250606205300.2384494-1-srinivas.pandruvada@linux.intel.com +Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c | 9 ++++++--- + 1 file changed, 6 insertions(+), 3 deletions(-) + +--- a/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c ++++ b/drivers/platform/x86/intel/uncore-frequency/uncore-frequency-tpmi.c +@@ -467,10 +467,13 @@ static int uncore_probe(struct auxiliary + + /* Get the package ID from the TPMI core */ + plat_info = tpmi_get_platform_data(auxdev); +- if (plat_info) +- pkg = plat_info->package_id; +- else ++ if (unlikely(!plat_info)) { + dev_info(&auxdev->dev, "Platform information is NULL\n"); ++ ret = -ENODEV; ++ goto err_rem_common; ++ } ++ ++ pkg = plat_info->package_id; + + for (i = 0; i < num_resources; ++i) { + struct tpmi_uncore_power_domain_info *pd_info; diff --git a/queue-6.15/revert-mm-execmem-unify-early-execmem_cache-behaviour.patch b/queue-6.15/revert-mm-execmem-unify-early-execmem_cache-behaviour.patch new file mode 100644 index 00000000000..f74c1bfcb64 --- /dev/null +++ b/queue-6.15/revert-mm-execmem-unify-early-execmem_cache-behaviour.patch @@ -0,0 +1,154 @@ +From 7cd9a11dd0c3d1dd225795ed1b5b53132888e7b5 Mon Sep 17 00:00:00 2001 +From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> +Date: Tue, 3 Jun 2025 14:14:45 +0300 +Subject: Revert "mm/execmem: Unify early execmem_cache behaviour" + +From: Mike Rapoport (Microsoft) <rppt@kernel.org> + +commit 7cd9a11dd0c3d1dd225795ed1b5b53132888e7b5 upstream. + +The commit d6d1e3e6580c ("mm/execmem: Unify early execmem_cache +behaviour") changed early behaviour of execemem ROX cache to allow its +usage in early x86 code that allocates text pages when +CONFIG_MITGATION_ITS is enabled. + +The permission management of the pages allocated from execmem for ITS +mitigation is now completely contained in arch/x86/kernel/alternatives.c +and therefore there is no need to special case early allocations in +execmem. + +This reverts commit d6d1e3e6580ca35071ad474381f053cbf1fb6414. + +Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250603111446.2609381-6-rppt@kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/init_32.c | 3 --- + arch/x86/mm/init_64.c | 3 --- + include/linux/execmem.h | 8 +------- + mm/execmem.c | 40 +++------------------------------------- + 4 files changed, 4 insertions(+), 50 deletions(-) + +--- a/arch/x86/mm/init_32.c ++++ b/arch/x86/mm/init_32.c +@@ -30,7 +30,6 @@ + #include <linux/initrd.h> + #include <linux/cpumask.h> + #include <linux/gfp.h> +-#include <linux/execmem.h> + + #include <asm/asm.h> + #include <asm/bios_ebda.h> +@@ -756,8 +755,6 @@ void mark_rodata_ro(void) + pr_info("Write protecting kernel text and read-only data: %luk\n", + size >> 10); + +- execmem_cache_make_ro(); +- + kernel_set_to_readonly = 1; + + #ifdef CONFIG_CPA_DEBUG +--- a/arch/x86/mm/init_64.c ++++ b/arch/x86/mm/init_64.c +@@ -34,7 +34,6 @@ + #include <linux/gfp.h> + #include <linux/kcore.h> + #include <linux/bootmem_info.h> +-#include <linux/execmem.h> + + #include <asm/processor.h> + #include <asm/bios_ebda.h> +@@ -1392,8 +1391,6 @@ void mark_rodata_ro(void) + (end - start) >> 10); + set_memory_ro(start, (end - start) >> PAGE_SHIFT); + +- execmem_cache_make_ro(); +- + kernel_set_to_readonly = 1; + + /* +--- a/include/linux/execmem.h ++++ b/include/linux/execmem.h +@@ -54,7 +54,7 @@ enum execmem_range_flags { + EXECMEM_ROX_CACHE = (1 << 1), + }; + +-#if defined(CONFIG_ARCH_HAS_EXECMEM_ROX) && defined(CONFIG_EXECMEM) ++#ifdef CONFIG_ARCH_HAS_EXECMEM_ROX + /** + * execmem_fill_trapping_insns - set memory to contain instructions that + * will trap +@@ -94,15 +94,9 @@ int execmem_make_temp_rw(void *ptr, size + * Return: 0 on success or negative error code on failure. + */ + int execmem_restore_rox(void *ptr, size_t size); +- +-/* +- * Called from mark_readonly(), where the system transitions to ROX. +- */ +-void execmem_cache_make_ro(void); + #else + static inline int execmem_make_temp_rw(void *ptr, size_t size) { return 0; } + static inline int execmem_restore_rox(void *ptr, size_t size) { return 0; } +-static inline void execmem_cache_make_ro(void) { } + #endif + + /** +--- a/mm/execmem.c ++++ b/mm/execmem.c +@@ -254,34 +254,6 @@ out_unlock: + return ptr; + } + +-static bool execmem_cache_rox = false; +- +-void execmem_cache_make_ro(void) +-{ +- struct maple_tree *free_areas = &execmem_cache.free_areas; +- struct maple_tree *busy_areas = &execmem_cache.busy_areas; +- MA_STATE(mas_free, free_areas, 0, ULONG_MAX); +- MA_STATE(mas_busy, busy_areas, 0, ULONG_MAX); +- struct mutex *mutex = &execmem_cache.mutex; +- void *area; +- +- execmem_cache_rox = true; +- +- mutex_lock(mutex); +- +- mas_for_each(&mas_free, area, ULONG_MAX) { +- unsigned long pages = mas_range_len(&mas_free) >> PAGE_SHIFT; +- set_memory_ro(mas_free.index, pages); +- } +- +- mas_for_each(&mas_busy, area, ULONG_MAX) { +- unsigned long pages = mas_range_len(&mas_busy) >> PAGE_SHIFT; +- set_memory_ro(mas_busy.index, pages); +- } +- +- mutex_unlock(mutex); +-} +- + static int execmem_cache_populate(struct execmem_range *range, size_t size) + { + unsigned long vm_flags = VM_ALLOW_HUGE_VMAP; +@@ -302,15 +274,9 @@ static int execmem_cache_populate(struct + /* fill memory with instructions that will trap */ + execmem_fill_trapping_insns(p, alloc_size, /* writable = */ true); + +- if (execmem_cache_rox) { +- err = set_memory_rox((unsigned long)p, vm->nr_pages); +- if (err) +- goto err_free_mem; +- } else { +- err = set_memory_x((unsigned long)p, vm->nr_pages); +- if (err) +- goto err_free_mem; +- } ++ err = set_memory_rox((unsigned long)p, vm->nr_pages); ++ if (err) ++ goto err_free_mem; + + err = execmem_cache_add(p, alloc_size); + if (err) diff --git a/queue-6.15/revert-platform-x86-alienware-wmi-wmax-add-g-mode-support-to-alienware-m16-r1.patch b/queue-6.15/revert-platform-x86-alienware-wmi-wmax-add-g-mode-support-to-alienware-m16-r1.patch new file mode 100644 index 00000000000..8276052e549 --- /dev/null +++ b/queue-6.15/revert-platform-x86-alienware-wmi-wmax-add-g-mode-support-to-alienware-m16-r1.patch @@ -0,0 +1,40 @@ +From e2468dc700743683e1d1793bbd855e2536fd3de2 Mon Sep 17 00:00:00 2001 +From: Kurt Borja <kuurtb@gmail.com> +Date: Wed, 11 Jun 2025 18:30:40 -0300 +Subject: Revert "platform/x86: alienware-wmi-wmax: Add G-Mode support to Alienware m16 R1" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Kurt Borja <kuurtb@gmail.com> + +commit e2468dc700743683e1d1793bbd855e2536fd3de2 upstream. + +This reverts commit 5ff79cabb23a2f14d2ed29e9596aec908905a0e6. + +Although the Alienware m16 R1 AMD model supports G-Mode, it actually has +a lower power ceiling than plain "performance" profile, which results in +lower performance. + +Reported-by: Cihan Ozakca <cozakca@outlook.com> +Cc: stable@vger.kernel.org # 6.15.x +Signed-off-by: Kurt Borja <kuurtb@gmail.com> +Link: https://lore.kernel.org/r/20250611-m16-rev-v1-1-72d13bad03c9@gmail.com +Reviewed-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/platform/x86/dell/alienware-wmi-wmax.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/platform/x86/dell/alienware-wmi-wmax.c ++++ b/drivers/platform/x86/dell/alienware-wmi-wmax.c +@@ -91,7 +91,7 @@ static const struct dmi_system_id awcc_d + DMI_MATCH(DMI_SYS_VENDOR, "Alienware"), + DMI_MATCH(DMI_PRODUCT_NAME, "Alienware m16 R1 AMD"), + }, +- .driver_data = &g_series_quirks, ++ .driver_data = &generic_quirks, + }, + { + .ident = "Alienware m16 R2", diff --git a/queue-6.15/sched_ext-sched-core-don-t-call-scx_group_set_weight-prematurely-from-sched_create_group.patch b/queue-6.15/sched_ext-sched-core-don-t-call-scx_group_set_weight-prematurely-from-sched_create_group.patch new file mode 100644 index 00000000000..69e7924217e --- /dev/null +++ b/queue-6.15/sched_ext-sched-core-don-t-call-scx_group_set_weight-prematurely-from-sched_create_group.patch @@ -0,0 +1,84 @@ +From 33796b91871ad4010c8188372dd1faf97cf0f1c0 Mon Sep 17 00:00:00 2001 +From: Tejun Heo <tj@kernel.org> +Date: Mon, 16 Jun 2025 10:13:25 -1000 +Subject: sched_ext, sched/core: Don't call scx_group_set_weight() prematurely from sched_create_group() + +From: Tejun Heo <tj@kernel.org> + +commit 33796b91871ad4010c8188372dd1faf97cf0f1c0 upstream. + +During task_group creation, sched_create_group() calls +scx_group_set_weight() with CGROUP_WEIGHT_DFL to initialize the sched_ext +portion. This is premature and ends up calling ops.cgroup_set_weight() with +an incorrect @cgrp before ops.cgroup_init() is called. + +sched_create_group() should just initialize SCX related fields in the new +task_group. Fix it by factoring out scx_tg_init() from sched_init() and +making sched_create_group() call that function instead of +scx_group_set_weight(). + +v2: Retain CONFIG_EXT_GROUP_SCHED ifdef in sched_init() as removing it leads + to build failures on !CONFIG_GROUP_SCHED configs. + +Signed-off-by: Tejun Heo <tj@kernel.org> +Fixes: 819513666966 ("sched_ext: Add cgroup support") +Cc: stable@vger.kernel.org # v6.12+ +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/sched/core.c | 4 ++-- + kernel/sched/ext.c | 5 +++++ + kernel/sched/ext.h | 2 ++ + 3 files changed, 9 insertions(+), 2 deletions(-) + +--- a/kernel/sched/core.c ++++ b/kernel/sched/core.c +@@ -8571,7 +8571,7 @@ void __init sched_init(void) + init_cfs_bandwidth(&root_task_group.cfs_bandwidth, NULL); + #endif /* CONFIG_FAIR_GROUP_SCHED */ + #ifdef CONFIG_EXT_GROUP_SCHED +- root_task_group.scx_weight = CGROUP_WEIGHT_DFL; ++ scx_tg_init(&root_task_group); + #endif /* CONFIG_EXT_GROUP_SCHED */ + #ifdef CONFIG_RT_GROUP_SCHED + root_task_group.rt_se = (struct sched_rt_entity **)ptr; +@@ -9011,7 +9011,7 @@ struct task_group *sched_create_group(st + if (!alloc_rt_sched_group(tg, parent)) + goto err; + +- scx_group_set_weight(tg, CGROUP_WEIGHT_DFL); ++ scx_tg_init(tg); + alloc_uclamp_sched_group(tg, parent); + + return tg; +--- a/kernel/sched/ext.c ++++ b/kernel/sched/ext.c +@@ -3936,6 +3936,11 @@ bool scx_can_stop_tick(struct rq *rq) + DEFINE_STATIC_PERCPU_RWSEM(scx_cgroup_rwsem); + static bool scx_cgroup_enabled; + ++void scx_tg_init(struct task_group *tg) ++{ ++ tg->scx_weight = CGROUP_WEIGHT_DFL; ++} ++ + int scx_tg_online(struct task_group *tg) + { + int ret = 0; +--- a/kernel/sched/ext.h ++++ b/kernel/sched/ext.h +@@ -80,6 +80,7 @@ static inline void scx_update_idle(struc + + #ifdef CONFIG_CGROUP_SCHED + #ifdef CONFIG_EXT_GROUP_SCHED ++void scx_tg_init(struct task_group *tg); + int scx_tg_online(struct task_group *tg); + void scx_tg_offline(struct task_group *tg); + int scx_cgroup_can_attach(struct cgroup_taskset *tset); +@@ -89,6 +90,7 @@ void scx_cgroup_cancel_attach(struct cgr + void scx_group_set_weight(struct task_group *tg, unsigned long cgrp_weight); + void scx_group_set_idle(struct task_group *tg, bool idle); + #else /* CONFIG_EXT_GROUP_SCHED */ ++static inline void scx_tg_init(struct task_group *tg) {} + static inline int scx_tg_online(struct task_group *tg) { return 0; } + static inline void scx_tg_offline(struct task_group *tg) {} + static inline int scx_cgroup_can_attach(struct cgroup_taskset *tset) { return 0; } diff --git a/queue-6.15/scsi-s390-zfcp-ensure-synchronous-unit_add.patch b/queue-6.15/scsi-s390-zfcp-ensure-synchronous-unit_add.patch new file mode 100644 index 00000000000..1564576f188 --- /dev/null +++ b/queue-6.15/scsi-s390-zfcp-ensure-synchronous-unit_add.patch @@ -0,0 +1,46 @@ +From 9697ca0d53e3db357be26d2414276143c4a2cd49 Mon Sep 17 00:00:00 2001 +From: Peter Oberparleiter <oberpar@linux.ibm.com> +Date: Tue, 3 Jun 2025 20:21:56 +0200 +Subject: scsi: s390: zfcp: Ensure synchronous unit_add + +From: Peter Oberparleiter <oberpar@linux.ibm.com> + +commit 9697ca0d53e3db357be26d2414276143c4a2cd49 upstream. + +Improve the usability of the unit_add sysfs attribute by ensuring that +the associated FCP LUN scan processing is completed synchronously. This +enables configuration tooling to consistently determine the end of the +scan process to allow for serialization of follow-on actions. + +While the scan process associated with unit_add typically completes +synchronously, it is deferred to an asynchronous background process if +unit_add is used before initial remote port scanning has completed. This +occurs when unit_add is used immediately after setting the associated FCP +device online. + +To ensure synchronous unit_add processing, wait for remote port scanning +to complete before initiating the FCP LUN scan. + +Cc: stable@vger.kernel.org +Reviewed-by: M Nikhil <nikh1092@linux.ibm.com> +Reviewed-by: Nihar Panda <niharp@linux.ibm.com> +Signed-off-by: Peter Oberparleiter <oberpar@linux.ibm.com> +Signed-off-by: Nihar Panda <niharp@linux.ibm.com> +Link: https://lore.kernel.org/r/20250603182252.2287285-2-niharp@linux.ibm.com +Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/s390/scsi/zfcp_sysfs.c | 2 ++ + 1 file changed, 2 insertions(+) + +--- a/drivers/s390/scsi/zfcp_sysfs.c ++++ b/drivers/s390/scsi/zfcp_sysfs.c +@@ -449,6 +449,8 @@ static ssize_t zfcp_sysfs_unit_add_store + if (kstrtoull(buf, 0, (unsigned long long *) &fcp_lun)) + return -EINVAL; + ++ flush_work(&port->rport_work); ++ + retval = zfcp_unit_add(port, fcp_lun); + if (retval) + return retval; diff --git a/queue-6.15/scsi-storvsc-increase-the-timeouts-to-storvsc_timeout.patch b/queue-6.15/scsi-storvsc-increase-the-timeouts-to-storvsc_timeout.patch new file mode 100644 index 00000000000..9eee1b05e68 --- /dev/null +++ b/queue-6.15/scsi-storvsc-increase-the-timeouts-to-storvsc_timeout.patch @@ -0,0 +1,76 @@ +From b2f966568faaad326de97481096d0f3dc0971c43 Mon Sep 17 00:00:00 2001 +From: Dexuan Cui <decui@microsoft.com> +Date: Fri, 6 Jun 2025 13:57:39 -0700 +Subject: scsi: storvsc: Increase the timeouts to storvsc_timeout + +From: Dexuan Cui <decui@microsoft.com> + +commit b2f966568faaad326de97481096d0f3dc0971c43 upstream. + +Currently storvsc_timeout is only used in storvsc_sdev_configure(), and +5s and 10s are used elsewhere. It turns out that rarely the 5s is not +enough on Azure, so let's use storvsc_timeout everywhere. + +In case a timeout happens and storvsc_channel_init() returns an error, +close the VMBus channel so that any host-to-guest messages in the +channel's ringbuffer, which might come late, can be safely ignored. + +Add a "const" to storvsc_timeout. + +Cc: stable@kernel.org +Signed-off-by: Dexuan Cui <decui@microsoft.com> +Link: https://lore.kernel.org/r/1749243459-10419-1-git-send-email-decui@microsoft.com +Reviewed-by: Long Li <longli@microsoft.com> +Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/scsi/storvsc_drv.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/drivers/scsi/storvsc_drv.c ++++ b/drivers/scsi/storvsc_drv.c +@@ -362,7 +362,7 @@ MODULE_PARM_DESC(ring_avail_percent_lowa + /* + * Timeout in seconds for all devices managed by this driver. + */ +-static int storvsc_timeout = 180; ++static const int storvsc_timeout = 180; + + #if IS_ENABLED(CONFIG_SCSI_FC_ATTRS) + static struct scsi_transport_template *fc_transport_template; +@@ -768,7 +768,7 @@ static void handle_multichannel_storage + return; + } + +- t = wait_for_completion_timeout(&request->wait_event, 10*HZ); ++ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); + if (t == 0) { + dev_err(dev, "Failed to create sub-channel: timed out\n"); + return; +@@ -833,7 +833,7 @@ static int storvsc_execute_vstor_op(stru + if (ret != 0) + return ret; + +- t = wait_for_completion_timeout(&request->wait_event, 5*HZ); ++ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); + if (t == 0) + return -ETIMEDOUT; + +@@ -1350,6 +1350,8 @@ static int storvsc_connect_to_vsp(struct + return ret; + + ret = storvsc_channel_init(device, is_fc); ++ if (ret) ++ vmbus_close(device->channel); + + return ret; + } +@@ -1668,7 +1670,7 @@ static int storvsc_host_reset_handler(st + if (ret != 0) + return FAILED; + +- t = wait_for_completion_timeout(&request->wait_event, 5*HZ); ++ t = wait_for_completion_timeout(&request->wait_event, storvsc_timeout * HZ); + if (t == 0) + return TIMEOUT_ERROR; + diff --git a/queue-6.15/selftests-x86-add-a-test-to-detect-infinite-sigtrap-handler-loop.patch b/queue-6.15/selftests-x86-add-a-test-to-detect-infinite-sigtrap-handler-loop.patch new file mode 100644 index 00000000000..a67983a1ee6 --- /dev/null +++ b/queue-6.15/selftests-x86-add-a-test-to-detect-infinite-sigtrap-handler-loop.patch @@ -0,0 +1,152 @@ +From f287822688eeb44ae1cf6ac45701d965efc33218 Mon Sep 17 00:00:00 2001 +From: "Xin Li (Intel)" <xin@zytor.com> +Date: Mon, 9 Jun 2025 01:40:54 -0700 +Subject: selftests/x86: Add a test to detect infinite SIGTRAP handler loop +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Xin Li (Intel) <xin@zytor.com> + +commit f287822688eeb44ae1cf6ac45701d965efc33218 upstream. + +When FRED is enabled, if the Trap Flag (TF) is set without an external +debugger attached, it can lead to an infinite loop in the SIGTRAP +handler. To avoid this, the software event flag in the augmented SS +must be cleared, ensuring that no single-step trap remains pending when +ERETU completes. + +This test checks for that specific scenario—verifying whether the kernel +correctly prevents an infinite SIGTRAP loop in this edge case when FRED +is enabled. + +The test should _always_ pass with IDT event delivery, thus no need to +disable the test even when FRED is not enabled. + +Signed-off-by: Xin Li (Intel) <xin@zytor.com> +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Tested-by: Sohil Mehta <sohil.mehta@intel.com> +Cc:stable@vger.kernel.org +Link: https://lore.kernel.org/all/20250609084054.2083189-3-xin%40zytor.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/testing/selftests/x86/Makefile | 2 + tools/testing/selftests/x86/sigtrap_loop.c | 101 +++++++++++++++++++++++++++++ + 2 files changed, 102 insertions(+), 1 deletion(-) + create mode 100644 tools/testing/selftests/x86/sigtrap_loop.c + +--- a/tools/testing/selftests/x86/Makefile ++++ b/tools/testing/selftests/x86/Makefile +@@ -12,7 +12,7 @@ CAN_BUILD_WITH_NOPIE := $(shell ./check_ + + TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ + check_initial_reg_state sigreturn iopl ioperm \ +- test_vsyscall mov_ss_trap \ ++ test_vsyscall mov_ss_trap sigtrap_loop \ + syscall_arg_fault fsgsbase_restore sigaltstack + TARGETS_C_BOTHBITS += nx_stack + TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \ +--- /dev/null ++++ b/tools/testing/selftests/x86/sigtrap_loop.c +@@ -0,0 +1,101 @@ ++// SPDX-License-Identifier: GPL-2.0-only ++/* ++ * Copyright (C) 2025 Intel Corporation ++ */ ++#define _GNU_SOURCE ++ ++#include <err.h> ++#include <signal.h> ++#include <stdio.h> ++#include <stdlib.h> ++#include <string.h> ++#include <sys/ucontext.h> ++ ++#ifdef __x86_64__ ++# define REG_IP REG_RIP ++#else ++# define REG_IP REG_EIP ++#endif ++ ++static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), int flags) ++{ ++ struct sigaction sa; ++ ++ memset(&sa, 0, sizeof(sa)); ++ sa.sa_sigaction = handler; ++ sa.sa_flags = SA_SIGINFO | flags; ++ sigemptyset(&sa.sa_mask); ++ ++ if (sigaction(sig, &sa, 0)) ++ err(1, "sigaction"); ++ ++ return; ++} ++ ++static void sigtrap(int sig, siginfo_t *info, void *ctx_void) ++{ ++ ucontext_t *ctx = (ucontext_t *)ctx_void; ++ static unsigned int loop_count_on_same_ip; ++ static unsigned long last_trap_ip; ++ ++ if (last_trap_ip == ctx->uc_mcontext.gregs[REG_IP]) { ++ printf("\tTrapped at %016lx\n", last_trap_ip); ++ ++ /* ++ * If the same IP is hit more than 10 times in a row, it is ++ * _considered_ an infinite loop. ++ */ ++ if (++loop_count_on_same_ip > 10) { ++ printf("[FAIL]\tDetected SIGTRAP infinite loop\n"); ++ exit(1); ++ } ++ ++ return; ++ } ++ ++ loop_count_on_same_ip = 0; ++ last_trap_ip = ctx->uc_mcontext.gregs[REG_IP]; ++ printf("\tTrapped at %016lx\n", last_trap_ip); ++} ++ ++int main(int argc, char *argv[]) ++{ ++ sethandler(SIGTRAP, sigtrap, 0); ++ ++ /* ++ * Set the Trap Flag (TF) to single-step the test code, therefore to ++ * trigger a SIGTRAP signal after each instruction until the TF is ++ * cleared. ++ * ++ * Because the arithmetic flags are not significant here, the TF is ++ * set by pushing 0x302 onto the stack and then popping it into the ++ * flags register. ++ * ++ * Four instructions in the following asm code are executed with the ++ * TF set, thus the SIGTRAP handler is expected to run four times. ++ */ ++ printf("[RUN]\tSIGTRAP infinite loop detection\n"); ++ asm volatile( ++#ifdef __x86_64__ ++ /* ++ * Avoid clobbering the redzone ++ * ++ * Equivalent to "sub $128, %rsp", however -128 can be encoded ++ * in a single byte immediate while 128 uses 4 bytes. ++ */ ++ "add $-128, %rsp\n\t" ++#endif ++ "push $0x302\n\t" ++ "popf\n\t" ++ "nop\n\t" ++ "nop\n\t" ++ "push $0x202\n\t" ++ "popf\n\t" ++#ifdef __x86_64__ ++ "sub $-128, %rsp\n\t" ++#endif ++ ); ++ ++ printf("[OK]\tNo SIGTRAP infinite loop detected\n"); ++ return 0; ++} diff --git a/queue-6.15/selinux-fix-selinux_xfrm_alloc_user-to-set-correct-ctx_len.patch b/queue-6.15/selinux-fix-selinux_xfrm_alloc_user-to-set-correct-ctx_len.patch new file mode 100644 index 00000000000..fbd78895e30 --- /dev/null +++ b/queue-6.15/selinux-fix-selinux_xfrm_alloc_user-to-set-correct-ctx_len.patch @@ -0,0 +1,41 @@ +From 86c8db86af43f52f682e53a0f2f0828683be1e52 Mon Sep 17 00:00:00 2001 +From: Stephen Smalley <stephen.smalley.work@gmail.com> +Date: Fri, 13 Jun 2025 15:37:05 -0400 +Subject: selinux: fix selinux_xfrm_alloc_user() to set correct ctx_len +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Stephen Smalley <stephen.smalley.work@gmail.com> + +commit 86c8db86af43f52f682e53a0f2f0828683be1e52 upstream. + +We should count the terminating NUL byte as part of the ctx_len. +Otherwise, UBSAN logs a warning: + UBSAN: array-index-out-of-bounds in security/selinux/xfrm.c:99:14 + index 60 is out of range for type 'char [*]' + +The allocation itself is correct so there is no actual out of bounds +indexing, just a warning. + +Cc: stable@vger.kernel.org +Suggested-by: Christian Göttsche <cgzones@googlemail.com> +Link: https://lore.kernel.org/selinux/CAEjxPJ6tA5+LxsGfOJokzdPeRomBHjKLBVR6zbrg+_w3ZZbM3A@mail.gmail.com/ +Signed-off-by: Stephen Smalley <stephen.smalley.work@gmail.com> +Signed-off-by: Paul Moore <paul@paul-moore.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + security/selinux/xfrm.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/security/selinux/xfrm.c ++++ b/security/selinux/xfrm.c +@@ -94,7 +94,7 @@ static int selinux_xfrm_alloc_user(struc + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; +- ctx->ctx_len = str_len; ++ ctx->ctx_len = str_len + 1; + memcpy(ctx->ctx_str, &uctx[1], str_len); + ctx->ctx_str[str_len] = '\0'; + rc = security_context_to_sid(ctx->ctx_str, str_len, diff --git a/queue-6.15/series b/queue-6.15/series index 819cae6fcf3..c928afa9311 100644 --- a/queue-6.15/series +++ b/queue-6.15/series @@ -445,3 +445,45 @@ ovl-fix-debug-print-in-case-of-mkdir-error.patch powerpc-vdso-fix-build-of-vdso32-with-pcrel.patch powerpc-eeh-fix-missing-pe-bridge-reconfiguration-du.patch fs-drop-assert-in-file_seek_cur_needs_f_lock.patch +io_uring-kbuf-don-t-truncate-end-buffer-for-multiple-buffer-peeks.patch +io_uring-rsrc-validate-buffer-count-with-offset-for-cloning.patch +io_uring-fix-task-leak-issue-in-io_wq_create.patch +drivers-rapidio-rio_cm.c-prevent-possible-heap-overwrite.patch +platform-loongarch-laptop-get-brightness-setting-from-ec-on-probe.patch +platform-loongarch-laptop-unregister-generic_sub_drivers-on-exit.patch +platform-loongarch-laptop-add-backlight-power-control-support.patch +loongarch-vdso-correctly-use-asm-parameters-in-syscall-wrappers.patch +loongarch-avoid-using-r0-r1-as-mask-for-csrxchg.patch +loongarch-fix-panic-caused-by-null-pmd-in-huge_pte_offset.patch +firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-ctl-cache.patch +firmware-cs_dsp-fix-oob-memory-read-access-in-kunit-test-wmfw-info.patch +jffs2-check-that-raw-node-were-preallocated-before-writing-summary.patch +jffs2-check-jffs2_prealloc_raw_node_refs-result-in-few-other-places.patch +cifs-deal-with-the-channel-loading-lag-while-picking-channels.patch +cifs-serialize-other-channels-when-query-server-interfaces-is-pending.patch +cifs-do-not-disable-interface-polling-on-failure.patch +tracing-fix-regression-of-filter-waiting-a-long-time-on-rcu-synchronization.patch +smb-improve-directory-cache-reuse-for-readdir-operations.patch +scsi-storvsc-increase-the-timeouts-to-storvsc_timeout.patch +scsi-s390-zfcp-ensure-synchronous-unit_add.patch +nvme-always-punt-polled-uring_cmd-end_io-work-to-task_work.patch +net_sched-sch_sfq-reject-invalid-perturb-period.patch +net-clear-the-dst-when-changing-skb-protocol.patch +mm-close-theoretical-race-where-stale-tlb-entries-could-linger.patch +udmabuf-use-sgtable-based-scatterlist-wrappers.patch +mm-vma-reset-vma-iterator-on-commit_merge-oom-failure.patch +x86-mm-pat-don-t-collapse-pages-without-pse-set.patch +x86-kconfig-only-enable-rox-cache-in-execmem-when-strict_module_rwx-is-set.patch +x86-its-move-its_pages-array-to-struct-mod_arch_specific.patch +x86-its-explicitly-manage-permissions-for-its-pages.patch +revert-mm-execmem-unify-early-execmem_cache-behaviour.patch +x86-virt-tdx-avoid-indirect-calls-to-tdx-assembly-functions.patch +selftests-x86-add-a-test-to-detect-infinite-sigtrap-handler-loop.patch +ksmbd-fix-null-pointer-dereference-in-destroy_previous_session.patch +fgraph-do-not-enable-function_graph-tracer-when-setting-funcgraph-args.patch +platform-x86-ideapad-laptop-use-usleep_range-for-ec-polling.patch +revert-platform-x86-alienware-wmi-wmax-add-g-mode-support-to-alienware-m16-r1.patch +selinux-fix-selinux_xfrm_alloc_user-to-set-correct-ctx_len.patch +platform-x86-intel-uncore-freq-fail-module-load-when-plat_info-is-null.patch +sched_ext-sched-core-don-t-call-scx_group_set_weight-prematurely-from-sched_create_group.patch +atm-revert-atm_account_tx-if-copy_from_iter_full-fails.patch diff --git a/queue-6.15/smb-improve-directory-cache-reuse-for-readdir-operations.patch b/queue-6.15/smb-improve-directory-cache-reuse-for-readdir-operations.patch new file mode 100644 index 00000000000..48b420b0f0e --- /dev/null +++ b/queue-6.15/smb-improve-directory-cache-reuse-for-readdir-operations.patch @@ -0,0 +1,153 @@ +From 72dd7961a4bb4fa1fc456169a61dd12e68e50645 Mon Sep 17 00:00:00 2001 +From: Bharath SM <bharathsm.hsk@gmail.com> +Date: Wed, 11 Jun 2025 16:59:02 +0530 +Subject: smb: improve directory cache reuse for readdir operations + +From: Bharath SM <bharathsm.hsk@gmail.com> + +commit 72dd7961a4bb4fa1fc456169a61dd12e68e50645 upstream. + +Currently, cached directory contents were not reused across subsequent +'ls' operations because the cache validity check relied on comparing +the ctx pointer, which changes with each readdir invocation. As a +result, the cached dir entries was not marked as valid and the cache was +not utilized for subsequent 'ls' operations. + +This change uses the file pointer, which remains consistent across all +readdir calls for a given directory instance, to associate and validate +the cache. As a result, cached directory contents can now be +correctly reused, improving performance for repeated directory listings. + +Performance gains with local windows SMB server: + +Without the patch and default actimeo=1: + 1000 directory enumeration operations on dir with 10k files took 135.0s + +With this patch and actimeo=0: + 1000 directory enumeration operations on dir with 10k files took just 5.1s + +Signed-off-by: Bharath SM <bharathsm@microsoft.com> +Reviewed-by: Shyam Prasad N <sprasad@microsoft.com> +Cc: stable@vger.kernel.org +Signed-off-by: Steve French <stfrench@microsoft.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + fs/smb/client/cached_dir.h | 8 ++++---- + fs/smb/client/readdir.c | 28 +++++++++++++++------------- + 2 files changed, 19 insertions(+), 17 deletions(-) + +--- a/fs/smb/client/cached_dir.h ++++ b/fs/smb/client/cached_dir.h +@@ -21,10 +21,10 @@ struct cached_dirent { + struct cached_dirents { + bool is_valid:1; + bool is_failed:1; +- struct dir_context *ctx; /* +- * Only used to make sure we only take entries +- * from a single context. Never dereferenced. +- */ ++ struct file *file; /* ++ * Used to associate the cache with a single ++ * open file instance. ++ */ + struct mutex de_mutex; + int pos; /* Expected ctx->pos */ + struct list_head entries; +--- a/fs/smb/client/readdir.c ++++ b/fs/smb/client/readdir.c +@@ -850,9 +850,9 @@ static bool emit_cached_dirents(struct c + } + + static void update_cached_dirents_count(struct cached_dirents *cde, +- struct dir_context *ctx) ++ struct file *file) + { +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -861,9 +861,9 @@ static void update_cached_dirents_count( + } + + static void finished_cached_dirents_count(struct cached_dirents *cde, +- struct dir_context *ctx) ++ struct dir_context *ctx, struct file *file) + { +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -876,11 +876,12 @@ static void finished_cached_dirents_coun + static void add_cached_dirent(struct cached_dirents *cde, + struct dir_context *ctx, + const char *name, int namelen, +- struct cifs_fattr *fattr) ++ struct cifs_fattr *fattr, ++ struct file *file) + { + struct cached_dirent *de; + +- if (cde->ctx != ctx) ++ if (cde->file != file) + return; + if (cde->is_valid || cde->is_failed) + return; +@@ -910,7 +911,8 @@ static void add_cached_dirent(struct cac + static bool cifs_dir_emit(struct dir_context *ctx, + const char *name, int namelen, + struct cifs_fattr *fattr, +- struct cached_fid *cfid) ++ struct cached_fid *cfid, ++ struct file *file) + { + bool rc; + ino_t ino = cifs_uniqueid_to_ino_t(fattr->cf_uniqueid); +@@ -922,7 +924,7 @@ static bool cifs_dir_emit(struct dir_con + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); + add_cached_dirent(&cfid->dirents, ctx, name, namelen, +- fattr); ++ fattr, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + +@@ -1022,7 +1024,7 @@ static int cifs_filldir(char *find_entry + cifs_prime_dcache(file_dentry(file), &name, &fattr); + + return !cifs_dir_emit(ctx, name.name, name.len, +- &fattr, cfid); ++ &fattr, cfid, file); + } + + +@@ -1073,8 +1075,8 @@ int cifs_readdir(struct file *file, stru + * we need to initialize scanning and storing the + * directory content. + */ +- if (ctx->pos == 0 && cfid->dirents.ctx == NULL) { +- cfid->dirents.ctx = ctx; ++ if (ctx->pos == 0 && cfid->dirents.file == NULL) { ++ cfid->dirents.file = file; + cfid->dirents.pos = 2; + } + /* +@@ -1142,7 +1144,7 @@ int cifs_readdir(struct file *file, stru + } else { + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); +- finished_cached_dirents_count(&cfid->dirents, ctx); ++ finished_cached_dirents_count(&cfid->dirents, ctx, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + cifs_dbg(FYI, "Could not find entry\n"); +@@ -1183,7 +1185,7 @@ int cifs_readdir(struct file *file, stru + ctx->pos++; + if (cfid) { + mutex_lock(&cfid->dirents.de_mutex); +- update_cached_dirents_count(&cfid->dirents, ctx); ++ update_cached_dirents_count(&cfid->dirents, file); + mutex_unlock(&cfid->dirents.de_mutex); + } + diff --git a/queue-6.15/tracing-fix-regression-of-filter-waiting-a-long-time-on-rcu-synchronization.patch b/queue-6.15/tracing-fix-regression-of-filter-waiting-a-long-time-on-rcu-synchronization.patch new file mode 100644 index 00000000000..bc8d1df4b6c --- /dev/null +++ b/queue-6.15/tracing-fix-regression-of-filter-waiting-a-long-time-on-rcu-synchronization.patch @@ -0,0 +1,405 @@ +From a9d0aab5eb33a44792a66b7af13ff50d7b3e7022 Mon Sep 17 00:00:00 2001 +From: Steven Rostedt <rostedt@goodmis.org> +Date: Fri, 6 Jun 2025 20:20:20 -0400 +Subject: tracing: Fix regression of filter waiting a long time on RCU synchronization + +From: Steven Rostedt <rostedt@goodmis.org> + +commit a9d0aab5eb33a44792a66b7af13ff50d7b3e7022 upstream. + +When faultable trace events were added, a trace event may no longer use +normal RCU to synchronize but instead used synchronize_rcu_tasks_trace(). +This synchronization takes a much longer time to synchronize. + +The filter logic would free the filters by calling +tracepoint_synchronize_unregister() after it unhooked the filter strings +and before freeing them. With this function now calling +synchronize_rcu_tasks_trace() this increased the time to free a filter +tremendously. On a PREEMPT_RT system, it was even more noticeable. + + # time trace-cmd record -p function sleep 1 + [..] + real 2m29.052s + user 0m0.244s + sys 0m20.136s + +As trace-cmd would clear out all the filters before recording, it could +take up to 2 minutes to do a recording of "sleep 1". + +To find out where the issues was: + + ~# trace-cmd sqlhist -e -n sched_stack select start.prev_state as state, end.next_comm as comm, TIMESTAMP_DELTA_USECS as delta, start.STACKTRACE as stack from sched_switch as start join sched_switch as end on start.prev_pid = end.next_pid + +Which will produce the following commands (and -e will also execute them): + + echo 's:sched_stack s64 state; char comm[16]; u64 delta; unsigned long stack[];' >> /sys/kernel/tracing/dynamic_events + echo 'hist:keys=prev_pid:__arg_18057_2=prev_state,__arg_18057_4=common_timestamp.usecs,__arg_18057_7=common_stacktrace' >> /sys/kernel/tracing/events/sched/sched_switch/trigger + echo 'hist:keys=next_pid:__state_18057_1=$__arg_18057_2,__comm_18057_3=next_comm,__delta_18057_5=common_timestamp.usecs-$__arg_18057_4,__stack_18057_6=$__arg_18057_7:onmatch(sched.sched_switch).trace(sched_stack,$__state_18057_1,$__comm_18057_3,$__delta_18057_5,$__stack_18057_6)' >> /sys/kernel/tracing/events/sched/sched_switch/trigger + +The above creates a synthetic event that creates a stack trace when a task +schedules out and records it with the time it scheduled back in. Basically +the time a task is off the CPU. It also records the state of the task when +it left the CPU (running, blocked, sleeping, etc). It also saves the comm +of the task as "comm" (needed for the next command). + +~# echo 'hist:keys=state,stack.stacktrace:vals=delta:sort=state,delta if comm == "trace-cmd" && state & 3' > /sys/kernel/tracing/events/synthetic/sched_stack/trigger + +The above creates a histogram with buckets per state, per stack, and the +value of the total time it was off the CPU for that stack trace. It filters +on tasks with "comm == trace-cmd" and only the sleeping and blocked states +(1 - sleeping, 2 - blocked). + +~# trace-cmd record -p function sleep 1 + +~# cat /sys/kernel/tracing/events/synthetic/sched_stack/hist | tail -18 +{ state: 2, stack.stacktrace __schedule+0x1545/0x3700 + schedule+0xe2/0x390 + schedule_timeout+0x175/0x200 + wait_for_completion_state+0x294/0x440 + __wait_rcu_gp+0x247/0x4f0 + synchronize_rcu_tasks_generic+0x151/0x230 + apply_subsystem_event_filter+0xa2b/0x1300 + subsystem_filter_write+0x67/0xc0 + vfs_write+0x1e2/0xeb0 + ksys_write+0xff/0x1d0 + do_syscall_64+0x7b/0x420 + entry_SYSCALL_64_after_hwframe+0x76/0x7e +} hitcount: 237 delta: 99756288 <<--------------- Delta is 99 seconds! + +Totals: + Hits: 525 + Entries: 21 + Dropped: 0 + +This shows that this particular trace waited for 99 seconds on +synchronize_rcu_tasks() in apply_subsystem_event_filter(). + +In fact, there's a lot of places in the filter code that spends a lot of +time waiting for synchronize_rcu_tasks_trace() in order to free the +filters. + +Add helper functions that will use call_rcu*() variants to asynchronously +free the filters. This brings the timings back to normal: + + # time trace-cmd record -p function sleep 1 + [..] + real 0m14.681s + user 0m0.335s + sys 0m28.616s + +And the histogram also shows this: + +~# cat /sys/kernel/tracing/events/synthetic/sched_stack/hist | tail -21 +{ state: 2, stack.stacktrace __schedule+0x1545/0x3700 + schedule+0xe2/0x390 + schedule_timeout+0x175/0x200 + wait_for_completion_state+0x294/0x440 + __wait_rcu_gp+0x247/0x4f0 + synchronize_rcu_normal+0x3db/0x5c0 + tracing_reset_online_cpus+0x8f/0x1e0 + tracing_open+0x335/0x440 + do_dentry_open+0x4c6/0x17a0 + vfs_open+0x82/0x360 + path_openat+0x1a36/0x2990 + do_filp_open+0x1c5/0x420 + do_sys_openat2+0xed/0x180 + __x64_sys_openat+0x108/0x1d0 + do_syscall_64+0x7b/0x420 +} hitcount: 2 delta: 77044 + +Totals: + Hits: 55 + Entries: 28 + Dropped: 0 + +Where the total waiting time of synchronize_rcu_tasks_trace() is 77 +milliseconds. + +Cc: stable@vger.kernel.org +Cc: Masami Hiramatsu <mhiramat@kernel.org> +Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> +Cc: "Paul E. McKenney" <paulmck@kernel.org> +Cc: Jan Kiszka <jan.kiszka@siemens.com> +Cc: Andreas Ziegler <ziegler.andreas@siemens.com> +Cc: Felix MOESSBAUER <felix.moessbauer@siemens.com> +Link: https://lore.kernel.org/20250606201936.1e3d09a9@batman.local.home +Reported-by: "Flot, Julien" <julien.flot@siemens.com> +Tested-by: Julien Flot <julien.flot@siemens.com> +Fixes: a363d27cdbc2 ("tracing: Allow system call tracepoints to handle page faults") +Closes: https://lore.kernel.org/all/240017f656631c7dd4017aa93d91f41f653788ea.camel@siemens.com/ +Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/trace/trace_events_filter.c | 186 +++++++++++++++++++++++++++---------- + 1 file changed, 138 insertions(+), 48 deletions(-) + +--- a/kernel/trace/trace_events_filter.c ++++ b/kernel/trace/trace_events_filter.c +@@ -1335,22 +1335,139 @@ static void filter_free_subsystem_preds( + } + } + ++struct filter_list { ++ struct list_head list; ++ struct event_filter *filter; ++}; ++ ++struct filter_head { ++ struct list_head list; ++ struct rcu_head rcu; ++}; ++ ++ ++static void free_filter_list(struct rcu_head *rhp) ++{ ++ struct filter_head *filter_list = container_of(rhp, struct filter_head, rcu); ++ struct filter_list *filter_item, *tmp; ++ ++ list_for_each_entry_safe(filter_item, tmp, &filter_list->list, list) { ++ __free_filter(filter_item->filter); ++ list_del(&filter_item->list); ++ kfree(filter_item); ++ } ++ kfree(filter_list); ++} ++ ++static void free_filter_list_tasks(struct rcu_head *rhp) ++{ ++ call_rcu(rhp, free_filter_list); ++} ++ ++/* ++ * The tracepoint_synchronize_unregister() is a double rcu call. ++ * It calls synchronize_rcu_tasks_trace() followed by synchronize_rcu(). ++ * Instead of waiting for it, simply call these via the call_rcu*() ++ * variants. ++ */ ++static void delay_free_filter(struct filter_head *head) ++{ ++ call_rcu_tasks_trace(&head->rcu, free_filter_list_tasks); ++} ++ ++static void try_delay_free_filter(struct event_filter *filter) ++{ ++ struct filter_head *head; ++ struct filter_list *item; ++ ++ head = kmalloc(sizeof(*head), GFP_KERNEL); ++ if (!head) ++ goto free_now; ++ ++ INIT_LIST_HEAD(&head->list); ++ ++ item = kmalloc(sizeof(*item), GFP_KERNEL); ++ if (!item) { ++ kfree(head); ++ goto free_now; ++ } ++ ++ item->filter = filter; ++ list_add_tail(&item->list, &head->list); ++ delay_free_filter(head); ++ return; ++ ++ free_now: ++ /* Make sure the filter is not being used */ ++ tracepoint_synchronize_unregister(); ++ __free_filter(filter); ++} ++ + static inline void __free_subsystem_filter(struct trace_event_file *file) + { + __free_filter(file->filter); + file->filter = NULL; + } + ++static inline void event_set_filter(struct trace_event_file *file, ++ struct event_filter *filter) ++{ ++ rcu_assign_pointer(file->filter, filter); ++} ++ ++static inline void event_clear_filter(struct trace_event_file *file) ++{ ++ RCU_INIT_POINTER(file->filter, NULL); ++} ++ + static void filter_free_subsystem_filters(struct trace_subsystem_dir *dir, +- struct trace_array *tr) ++ struct trace_array *tr, ++ struct event_filter *filter) + { + struct trace_event_file *file; ++ struct filter_head *head; ++ struct filter_list *item; ++ ++ head = kmalloc(sizeof(*head), GFP_KERNEL); ++ if (!head) ++ goto free_now; ++ ++ INIT_LIST_HEAD(&head->list); ++ ++ item = kmalloc(sizeof(*item), GFP_KERNEL); ++ if (!item) { ++ kfree(head); ++ goto free_now; ++ } ++ ++ item->filter = filter; ++ list_add_tail(&item->list, &head->list); + + list_for_each_entry(file, &tr->events, list) { + if (file->system != dir) + continue; +- __free_subsystem_filter(file); ++ item = kmalloc(sizeof(*item), GFP_KERNEL); ++ if (!item) ++ goto free_now; ++ item->filter = event_filter(file); ++ list_add_tail(&item->list, &head->list); ++ event_clear_filter(file); ++ } ++ ++ delay_free_filter(head); ++ return; ++ free_now: ++ tracepoint_synchronize_unregister(); ++ ++ if (head) ++ free_filter_list(&head->rcu); ++ ++ list_for_each_entry(file, &tr->events, list) { ++ if (file->system != dir || !file->filter) ++ continue; ++ __free_filter(file->filter); + } ++ __free_filter(filter); + } + + int filter_assign_type(const char *type) +@@ -2120,22 +2237,6 @@ static inline void event_set_filtered_fl + trace_buffered_event_enable(); + } + +-static inline void event_set_filter(struct trace_event_file *file, +- struct event_filter *filter) +-{ +- rcu_assign_pointer(file->filter, filter); +-} +- +-static inline void event_clear_filter(struct trace_event_file *file) +-{ +- RCU_INIT_POINTER(file->filter, NULL); +-} +- +-struct filter_list { +- struct list_head list; +- struct event_filter *filter; +-}; +- + static int process_system_preds(struct trace_subsystem_dir *dir, + struct trace_array *tr, + struct filter_parse_error *pe, +@@ -2144,11 +2245,16 @@ static int process_system_preds(struct t + struct trace_event_file *file; + struct filter_list *filter_item; + struct event_filter *filter = NULL; +- struct filter_list *tmp; +- LIST_HEAD(filter_list); ++ struct filter_head *filter_list; + bool fail = true; + int err; + ++ filter_list = kmalloc(sizeof(*filter_list), GFP_KERNEL); ++ if (!filter_list) ++ return -ENOMEM; ++ ++ INIT_LIST_HEAD(&filter_list->list); ++ + list_for_each_entry(file, &tr->events, list) { + + if (file->system != dir) +@@ -2175,7 +2281,7 @@ static int process_system_preds(struct t + if (!filter_item) + goto fail_mem; + +- list_add_tail(&filter_item->list, &filter_list); ++ list_add_tail(&filter_item->list, &filter_list->list); + /* + * Regardless of if this returned an error, we still + * replace the filter for the call. +@@ -2195,31 +2301,22 @@ static int process_system_preds(struct t + * Do a synchronize_rcu() and to ensure all calls are + * done with them before we free them. + */ +- tracepoint_synchronize_unregister(); +- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { +- __free_filter(filter_item->filter); +- list_del(&filter_item->list); +- kfree(filter_item); +- } ++ delay_free_filter(filter_list); + return 0; + fail: + /* No call succeeded */ +- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { +- list_del(&filter_item->list); +- kfree(filter_item); +- } ++ free_filter_list(&filter_list->rcu); + parse_error(pe, FILT_ERR_BAD_SUBSYS_FILTER, 0); + return -EINVAL; + fail_mem: + __free_filter(filter); ++ + /* If any call succeeded, we still need to sync */ + if (!fail) +- tracepoint_synchronize_unregister(); +- list_for_each_entry_safe(filter_item, tmp, &filter_list, list) { +- __free_filter(filter_item->filter); +- list_del(&filter_item->list); +- kfree(filter_item); +- } ++ delay_free_filter(filter_list); ++ else ++ free_filter_list(&filter_list->rcu); ++ + return -ENOMEM; + } + +@@ -2361,9 +2458,7 @@ int apply_event_filter(struct trace_even + + event_clear_filter(file); + +- /* Make sure the filter is not being used */ +- tracepoint_synchronize_unregister(); +- __free_filter(filter); ++ try_delay_free_filter(filter); + + return 0; + } +@@ -2387,11 +2482,8 @@ int apply_event_filter(struct trace_even + + event_set_filter(file, filter); + +- if (tmp) { +- /* Make sure the call is done with the filter */ +- tracepoint_synchronize_unregister(); +- __free_filter(tmp); +- } ++ if (tmp) ++ try_delay_free_filter(tmp); + } + + return err; +@@ -2417,9 +2509,7 @@ int apply_subsystem_event_filter(struct + filter = system->filter; + system->filter = NULL; + /* Ensure all filters are no longer used */ +- tracepoint_synchronize_unregister(); +- filter_free_subsystem_filters(dir, tr); +- __free_filter(filter); ++ filter_free_subsystem_filters(dir, tr, filter); + return 0; + } + diff --git a/queue-6.15/udmabuf-use-sgtable-based-scatterlist-wrappers.patch b/queue-6.15/udmabuf-use-sgtable-based-scatterlist-wrappers.patch new file mode 100644 index 00000000000..b0ccc81034d --- /dev/null +++ b/queue-6.15/udmabuf-use-sgtable-based-scatterlist-wrappers.patch @@ -0,0 +1,50 @@ +From afe382843717d44b24ef5014d57dcbaab75a4052 Mon Sep 17 00:00:00 2001 +From: Marek Szyprowski <m.szyprowski@samsung.com> +Date: Wed, 7 May 2025 18:09:12 +0200 +Subject: udmabuf: use sgtable-based scatterlist wrappers +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Marek Szyprowski <m.szyprowski@samsung.com> + +commit afe382843717d44b24ef5014d57dcbaab75a4052 upstream. + +Use common wrappers operating directly on the struct sg_table objects to +fix incorrect use of scatterlists sync calls. dma_sync_sg_for_*() +functions have to be called with the number of elements originally passed +to dma_map_sg_*() function, not the one returned in sgtable's nents. + +Fixes: 1ffe09590121 ("udmabuf: fix dma-buf cpu access") +CC: stable@vger.kernel.org +Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com> +Acked-by: Vivek Kasireddy <vivek.kasireddy@intel.com> +Reviewed-by: Christian König <christian.koenig@amd.com> +Signed-off-by: Christian König <christian.koenig@amd.com> +Link: https://lore.kernel.org/r/20250507160913.2084079-3-m.szyprowski@samsung.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/dma-buf/udmabuf.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/dma-buf/udmabuf.c ++++ b/drivers/dma-buf/udmabuf.c +@@ -264,8 +264,7 @@ static int begin_cpu_udmabuf(struct dma_ + ubuf->sg = NULL; + } + } else { +- dma_sync_sg_for_cpu(dev, ubuf->sg->sgl, ubuf->sg->nents, +- direction); ++ dma_sync_sgtable_for_cpu(dev, ubuf->sg, direction); + } + + return ret; +@@ -280,7 +279,7 @@ static int end_cpu_udmabuf(struct dma_bu + if (!ubuf->sg) + return -EINVAL; + +- dma_sync_sg_for_device(dev, ubuf->sg->sgl, ubuf->sg->nents, direction); ++ dma_sync_sgtable_for_device(dev, ubuf->sg, direction); + return 0; + } + diff --git a/queue-6.15/x86-its-explicitly-manage-permissions-for-its-pages.patch b/queue-6.15/x86-its-explicitly-manage-permissions-for-its-pages.patch new file mode 100644 index 00000000000..3a55f8fcc0e --- /dev/null +++ b/queue-6.15/x86-its-explicitly-manage-permissions-for-its-pages.patch @@ -0,0 +1,153 @@ +From a82b26451de126a5ae130361081986bc459afe9b Mon Sep 17 00:00:00 2001 +From: "Peter Zijlstra (Intel)" <peterz@infradead.org> +Date: Tue, 3 Jun 2025 14:14:44 +0300 +Subject: x86/its: explicitly manage permissions for ITS pages + +From: Peter Zijlstra (Intel) <peterz@infradead.org> + +commit a82b26451de126a5ae130361081986bc459afe9b upstream. + +execmem_alloc() sets permissions differently depending on the kernel +configuration, CPU support for PSE and whether a page is allocated +before or after mark_rodata_ro(). + +Add tracking for pages allocated for ITS when patching the core kernel +and make sure the permissions for ITS pages are explicitly managed for +both kernel and module allocations. + +Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Co-developed-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Reviewed-by: Nikolay Borisov <nik.borisov@suse.com> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250603111446.2609381-5-rppt@kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/kernel/alternative.c | 74 +++++++++++++++++++++++++++++------------- + 1 file changed, 52 insertions(+), 22 deletions(-) + +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -138,6 +138,24 @@ static struct module *its_mod; + #endif + static void *its_page; + static unsigned int its_offset; ++struct its_array its_pages; ++ ++static void *__its_alloc(struct its_array *pages) ++{ ++ void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); ++ if (!page) ++ return NULL; ++ ++ void *tmp = krealloc(pages->pages, (pages->num+1) * sizeof(void *), ++ GFP_KERNEL); ++ if (!tmp) ++ return NULL; ++ ++ pages->pages = tmp; ++ pages->pages[pages->num++] = page; ++ ++ return no_free_ptr(page); ++} + + /* Initialize a thunk with the "jmp *reg; int3" instructions. */ + static void *its_init_thunk(void *thunk, int reg) +@@ -173,6 +191,21 @@ static void *its_init_thunk(void *thunk, + return thunk + offset; + } + ++static void its_pages_protect(struct its_array *pages) ++{ ++ for (int i = 0; i < pages->num; i++) { ++ void *page = pages->pages[i]; ++ execmem_restore_rox(page, PAGE_SIZE); ++ } ++} ++ ++static void its_fini_core(void) ++{ ++ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) ++ its_pages_protect(&its_pages); ++ kfree(its_pages.pages); ++} ++ + #ifdef CONFIG_MODULES + void its_init_mod(struct module *mod) + { +@@ -195,10 +228,8 @@ void its_fini_mod(struct module *mod) + its_page = NULL; + mutex_unlock(&text_mutex); + +- for (int i = 0; i < mod->arch.its_pages.num; i++) { +- void *page = mod->arch.its_pages.pages[i]; +- execmem_restore_rox(page, PAGE_SIZE); +- } ++ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX)) ++ its_pages_protect(&mod->arch.its_pages); + } + + void its_free_mod(struct module *mod) +@@ -216,28 +247,23 @@ void its_free_mod(struct module *mod) + + static void *its_alloc(void) + { +- void *page __free(execmem) = execmem_alloc(EXECMEM_MODULE_TEXT, PAGE_SIZE); ++ struct its_array *pages = &its_pages; ++ void *page; + ++#ifdef CONFIG_MODULE ++ if (its_mod) ++ pages = &its_mod->arch.its_pages; ++#endif ++ ++ page = __its_alloc(pages); + if (!page) + return NULL; + +-#ifdef CONFIG_MODULES +- if (its_mod) { +- struct its_array *pages = &its_mod->arch.its_pages; +- void *tmp = krealloc(pages->pages, +- (pages->num+1) * sizeof(void *), +- GFP_KERNEL); +- if (!tmp) +- return NULL; +- +- pages->pages = tmp; +- pages->pages[pages->num++] = page; ++ execmem_make_temp_rw(page, PAGE_SIZE); ++ if (pages == &its_pages) ++ set_memory_x((unsigned long)page, 1); + +- execmem_make_temp_rw(page, PAGE_SIZE); +- } +-#endif /* CONFIG_MODULES */ +- +- return no_free_ptr(page); ++ return page; + } + + static void *its_allocate_thunk(int reg) +@@ -291,7 +317,9 @@ u8 *its_static_thunk(int reg) + return thunk; + } + +-#endif ++#else ++static inline void its_fini_core(void) {} ++#endif /* CONFIG_MITIGATION_ITS */ + + /* + * Nomenclature for variable names to simplify and clarify this code and ease +@@ -2368,6 +2396,8 @@ void __init alternative_instructions(voi + apply_retpolines(__retpoline_sites, __retpoline_sites_end); + apply_returns(__return_sites, __return_sites_end); + ++ its_fini_core(); ++ + /* + * Adjust all CALL instructions to point to func()-10, including + * those in .altinstr_replacement. diff --git a/queue-6.15/x86-its-move-its_pages-array-to-struct-mod_arch_specific.patch b/queue-6.15/x86-its-move-its_pages-array-to-struct-mod_arch_specific.patch new file mode 100644 index 00000000000..55399e4068a --- /dev/null +++ b/queue-6.15/x86-its-move-its_pages-array-to-struct-mod_arch_specific.patch @@ -0,0 +1,115 @@ +From 0b0cae7119a0ec9449d7261b5e672a5fed765068 Mon Sep 17 00:00:00 2001 +From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> +Date: Tue, 3 Jun 2025 14:14:43 +0300 +Subject: x86/its: move its_pages array to struct mod_arch_specific + +From: Mike Rapoport (Microsoft) <rppt@kernel.org> + +commit 0b0cae7119a0ec9449d7261b5e672a5fed765068 upstream. + +The of pages with ITS thunks allocated for modules are tracked by an +array in 'struct module'. + +Since this is very architecture specific data structure, move it to +'struct mod_arch_specific'. + +No functional changes. + +Fixes: 872df34d7c51 ("x86/its: Use dynamic thunks for indirect branches") +Suggested-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250603111446.2609381-4-rppt@kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/module.h | 8 ++++++++ + arch/x86/kernel/alternative.c | 19 ++++++++++--------- + include/linux/module.h | 5 ----- + 3 files changed, 18 insertions(+), 14 deletions(-) + +--- a/arch/x86/include/asm/module.h ++++ b/arch/x86/include/asm/module.h +@@ -5,12 +5,20 @@ + #include <asm-generic/module.h> + #include <asm/orc_types.h> + ++struct its_array { ++#ifdef CONFIG_MITIGATION_ITS ++ void **pages; ++ int num; ++#endif ++}; ++ + struct mod_arch_specific { + #ifdef CONFIG_UNWINDER_ORC + unsigned int num_orcs; + int *orc_unwind_ip; + struct orc_entry *orc_unwind; + #endif ++ struct its_array its_pages; + }; + + #endif /* _ASM_X86_MODULE_H */ +--- a/arch/x86/kernel/alternative.c ++++ b/arch/x86/kernel/alternative.c +@@ -195,8 +195,8 @@ void its_fini_mod(struct module *mod) + its_page = NULL; + mutex_unlock(&text_mutex); + +- for (int i = 0; i < mod->its_num_pages; i++) { +- void *page = mod->its_page_array[i]; ++ for (int i = 0; i < mod->arch.its_pages.num; i++) { ++ void *page = mod->arch.its_pages.pages[i]; + execmem_restore_rox(page, PAGE_SIZE); + } + } +@@ -206,11 +206,11 @@ void its_free_mod(struct module *mod) + if (!cpu_feature_enabled(X86_FEATURE_INDIRECT_THUNK_ITS)) + return; + +- for (int i = 0; i < mod->its_num_pages; i++) { +- void *page = mod->its_page_array[i]; ++ for (int i = 0; i < mod->arch.its_pages.num; i++) { ++ void *page = mod->arch.its_pages.pages[i]; + execmem_free(page); + } +- kfree(mod->its_page_array); ++ kfree(mod->arch.its_pages.pages); + } + #endif /* CONFIG_MODULES */ + +@@ -223,14 +223,15 @@ static void *its_alloc(void) + + #ifdef CONFIG_MODULES + if (its_mod) { +- void *tmp = krealloc(its_mod->its_page_array, +- (its_mod->its_num_pages+1) * sizeof(void *), ++ struct its_array *pages = &its_mod->arch.its_pages; ++ void *tmp = krealloc(pages->pages, ++ (pages->num+1) * sizeof(void *), + GFP_KERNEL); + if (!tmp) + return NULL; + +- its_mod->its_page_array = tmp; +- its_mod->its_page_array[its_mod->its_num_pages++] = page; ++ pages->pages = tmp; ++ pages->pages[pages->num++] = page; + + execmem_make_temp_rw(page, PAGE_SIZE); + } +--- a/include/linux/module.h ++++ b/include/linux/module.h +@@ -586,11 +586,6 @@ struct module { + atomic_t refcnt; + #endif + +-#ifdef CONFIG_MITIGATION_ITS +- int its_num_pages; +- void **its_page_array; +-#endif +- + #ifdef CONFIG_CONSTRUCTORS + /* Constructor functions. */ + ctor_fn_t *ctors; diff --git a/queue-6.15/x86-kconfig-only-enable-rox-cache-in-execmem-when-strict_module_rwx-is-set.patch b/queue-6.15/x86-kconfig-only-enable-rox-cache-in-execmem-when-strict_module_rwx-is-set.patch new file mode 100644 index 00000000000..9264f0c0e45 --- /dev/null +++ b/queue-6.15/x86-kconfig-only-enable-rox-cache-in-execmem-when-strict_module_rwx-is-set.patch @@ -0,0 +1,38 @@ +From 47410d839fcda6890cb82828f874f97710982f24 Mon Sep 17 00:00:00 2001 +From: "Mike Rapoport (Microsoft)" <rppt@kernel.org> +Date: Tue, 3 Jun 2025 14:14:42 +0300 +Subject: x86/Kconfig: only enable ROX cache in execmem when STRICT_MODULE_RWX is set + +From: Mike Rapoport (Microsoft) <rppt@kernel.org> + +commit 47410d839fcda6890cb82828f874f97710982f24 upstream. + +Currently ROX cache in execmem is enabled regardless of +STRICT_MODULE_RWX setting. This breaks an assumption that module memory +is writable when STRICT_MODULE_RWX is disabled, for instance for kernel +debuggin. + +Only enable ROX cache in execmem when STRICT_MODULE_RWX is set to +restore the original behaviour of module text permissions. + +Fixes: 64f6a4e10c05 ("x86: re-enable EXECMEM_ROX support") +Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: stable@vger.kernel.org +Link: https://lkml.kernel.org/r/20250603111446.2609381-3-rppt@kernel.org +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/Kconfig | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -88,7 +88,7 @@ config X86 + select ARCH_HAS_DMA_OPS if GART_IOMMU || XEN + select ARCH_HAS_EARLY_DEBUG if KGDB + select ARCH_HAS_ELF_RANDOMIZE +- select ARCH_HAS_EXECMEM_ROX if X86_64 ++ select ARCH_HAS_EXECMEM_ROX if X86_64 && STRICT_MODULE_RWX + select ARCH_HAS_FAST_MULTIPLIER + select ARCH_HAS_FORTIFY_SOURCE + select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/queue-6.15/x86-mm-pat-don-t-collapse-pages-without-pse-set.patch b/queue-6.15/x86-mm-pat-don-t-collapse-pages-without-pse-set.patch new file mode 100644 index 00000000000..2fafda8b1f5 --- /dev/null +++ b/queue-6.15/x86-mm-pat-don-t-collapse-pages-without-pse-set.patch @@ -0,0 +1,41 @@ +From 1dbf30fdb5e57fb2c39f17f35f2b544d5de34397 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Tue, 3 Jun 2025 14:14:41 +0300 +Subject: x86/mm/pat: don't collapse pages without PSE set + +From: Juergen Gross <jgross@suse.com> + +commit 1dbf30fdb5e57fb2c39f17f35f2b544d5de34397 upstream. + +Collapsing pages to a leaf PMD or PUD should be done only if +X86_FEATURE_PSE is available, which is not the case when running e.g. +as a Xen PV guest. + +Fixes: 41d88484c71c ("x86/mm/pat: restore large ROX pages after fragmentation") +Signed-off-by: Juergen Gross <jgross@suse.com> +Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org> +Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/r/20250528123557.12847-3-jgross@suse.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/mm/pat/set_memory.c | 3 +++ + 1 file changed, 3 insertions(+) + +diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c +index 46edc11726b7..8834c76f91c9 100644 +--- a/arch/x86/mm/pat/set_memory.c ++++ b/arch/x86/mm/pat/set_memory.c +@@ -1257,6 +1257,9 @@ static int collapse_pmd_page(pmd_t *pmd, unsigned long addr, + pgprot_t pgprot; + int i = 0; + ++ if (!cpu_feature_enabled(X86_FEATURE_PSE)) ++ return 0; ++ + addr &= PMD_MASK; + pte = pte_offset_kernel(pmd, addr); + first = *pte; +-- +2.50.0 + diff --git a/queue-6.15/x86-virt-tdx-avoid-indirect-calls-to-tdx-assembly-functions.patch b/queue-6.15/x86-virt-tdx-avoid-indirect-calls-to-tdx-assembly-functions.patch new file mode 100644 index 00000000000..f0ef1ab6bec --- /dev/null +++ b/queue-6.15/x86-virt-tdx-avoid-indirect-calls-to-tdx-assembly-functions.patch @@ -0,0 +1,68 @@ +From 0b3bc018e86afdc0cbfef61328c63d5c08f8b370 Mon Sep 17 00:00:00 2001 +From: Kai Huang <kai.huang@intel.com> +Date: Sat, 7 Jun 2025 01:07:37 +1200 +Subject: x86/virt/tdx: Avoid indirect calls to TDX assembly functions + +From: Kai Huang <kai.huang@intel.com> + +commit 0b3bc018e86afdc0cbfef61328c63d5c08f8b370 upstream. + +Two 'static inline' TDX helper functions (sc_retry() and +sc_retry_prerr()) take function pointer arguments which refer to +assembly functions. Normally, the compiler inlines the TDX helper, +realizes that the function pointer targets are completely static -- +thus can be resolved at compile time -- and generates direct call +instructions. + +But, other times (like when CONFIG_CC_OPTIMIZE_FOR_SIZE=y), the +compiler declines to inline the helpers and will instead generate +indirect call instructions. + +Indirect calls to assembly functions require special annotation (for +various Control Flow Integrity mechanisms). But TDX assembly +functions lack the special annotations and can only be called +directly. + +Annotate both the helpers as '__always_inline' to prod the compiler +into maintaining the direct calls. There is no guarantee here, but +Peter has volunteered to report the compiler bug if this assumption +ever breaks[1]. + +Fixes: 1e66a7e27539 ("x86/virt/tdx: Handle SEAMCALL no entropy error in common code") +Fixes: df01f5ae07dd ("x86/virt/tdx: Add SEAMCALL error printing for module initialization") +Signed-off-by: Kai Huang <kai.huang@intel.com> +Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com> +Cc: stable@vger.kernel.org +Link: https://lore.kernel.org/lkml/20250605145914.GW39944@noisy.programming.kicks-ass.net/ [1] +Link: https://lore.kernel.org/all/20250606130737.30713-1-kai.huang%40intel.com +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + arch/x86/include/asm/tdx.h | 2 +- + arch/x86/virt/vmx/tdx/tdx.c | 5 +++-- + 2 files changed, 4 insertions(+), 3 deletions(-) + +--- a/arch/x86/include/asm/tdx.h ++++ b/arch/x86/include/asm/tdx.h +@@ -100,7 +100,7 @@ void tdx_init(void); + + typedef u64 (*sc_func_t)(u64 fn, struct tdx_module_args *args); + +-static inline u64 sc_retry(sc_func_t func, u64 fn, ++static __always_inline u64 sc_retry(sc_func_t func, u64 fn, + struct tdx_module_args *args) + { + int retry = RDRAND_RETRY_LOOPS; +--- a/arch/x86/virt/vmx/tdx/tdx.c ++++ b/arch/x86/virt/vmx/tdx/tdx.c +@@ -69,8 +69,9 @@ static inline void seamcall_err_ret(u64 + args->r9, args->r10, args->r11); + } + +-static inline int sc_retry_prerr(sc_func_t func, sc_err_func_t err_func, +- u64 fn, struct tdx_module_args *args) ++static __always_inline int sc_retry_prerr(sc_func_t func, ++ sc_err_func_t err_func, ++ u64 fn, struct tdx_module_args *args) + { + u64 sret = sc_retry(func, fn, args); + |