diff options
17 files changed, 3337 insertions, 0 deletions
diff --git a/queue-5.10/bpf-add-mem_rdonly-for-helper-args-that-are-pointers-to-rdonly-mem.patch b/queue-5.10/bpf-add-mem_rdonly-for-helper-args-that-are-pointers-to-rdonly-mem.patch new file mode 100644 index 00000000000..80cd32caf51 --- /dev/null +++ b/queue-5.10/bpf-add-mem_rdonly-for-helper-args-that-are-pointers-to-rdonly-mem.patch @@ -0,0 +1,513 @@ +From stable+bounces-152308-greg=kroah.com@vger.kernel.org Tue Jun 10 16:46:36 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:44:02 +0000 +Subject: bpf: Add MEM_RDONLY for helper args that are pointers to rdonly mem. +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-8-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 216e3cd2f28dbbf1fe86848e0e29e6693b9f0a20 upstream. + +Some helper functions may modify its arguments, for example, +bpf_d_path, bpf_get_stack etc. Previously, their argument types +were marked as ARG_PTR_TO_MEM, which is compatible with read-only +mem types, such as PTR_TO_RDONLY_BUF. Therefore it's legitimate, +but technically incorrect, to modify a read-only memory by passing +it into one of such helper functions. + +This patch tags the bpf_args compatible with immutable memory with +MEM_RDONLY flag. The arguments that don't have this flag will be +only compatible with mutable memory types, preventing the helper +from modifying a read-only memory. The bpf_args that have +MEM_RDONLY are compatible with both mutable memory and immutable +memory. + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-9-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 4 ++- + kernel/bpf/cgroup.c | 2 - + kernel/bpf/helpers.c | 6 ++-- + kernel/bpf/ringbuf.c | 2 - + kernel/bpf/verifier.c | 20 ++++++++++++--- + kernel/trace/bpf_trace.c | 22 ++++++++-------- + net/core/filter.c | 62 +++++++++++++++++++++++------------------------ + 7 files changed, 67 insertions(+), 51 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -277,7 +277,9 @@ enum bpf_type_flag { + /* PTR may be NULL. */ + PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), + +- /* MEM is read-only. */ ++ /* MEM is read-only. When applied on bpf_arg, it indicates the arg is ++ * compatible with both mutable and immutable memory. ++ */ + MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), + + __BPF_TYPE_LAST_FLAG = MEM_RDONLY, +--- a/kernel/bpf/cgroup.c ++++ b/kernel/bpf/cgroup.c +@@ -1738,7 +1738,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + }; + +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -516,7 +516,7 @@ const struct bpf_func_proto bpf_strtol_p + .func = bpf_strtol, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +@@ -544,7 +544,7 @@ const struct bpf_func_proto bpf_strtoul_ + .func = bpf_strtoul, + .gpl_only = false, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_LONG, +@@ -616,7 +616,7 @@ const struct bpf_func_proto bpf_event_ou + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +--- a/kernel/bpf/ringbuf.c ++++ b/kernel/bpf/ringbuf.c +@@ -483,7 +483,7 @@ const struct bpf_func_proto bpf_ringbuf_ + .func = bpf_ringbuf_output, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, + }; +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4602,7 +4602,6 @@ static const struct bpf_reg_types mem_ty + PTR_TO_MAP_VALUE, + PTR_TO_MEM, + PTR_TO_BUF, +- PTR_TO_BUF | MEM_RDONLY, + }, + }; + +@@ -4663,6 +4662,21 @@ static int check_reg_type(struct bpf_ver + return -EFAULT; + } + ++ /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY, ++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY ++ * ++ * Same for MAYBE_NULL: ++ * ++ * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL, ++ * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL ++ * ++ * Therefore we fold these flags depending on the arg_type before comparison. ++ */ ++ if (arg_type & MEM_RDONLY) ++ type &= ~MEM_RDONLY; ++ if (arg_type & PTR_MAYBE_NULL) ++ type &= ~PTR_MAYBE_NULL; ++ + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { + expected = compatible->types[i]; + if (expected == NOT_INIT) +@@ -4672,14 +4686,14 @@ static int check_reg_type(struct bpf_ver + goto found; + } + +- verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, type)); ++ verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type)); + for (j = 0; j + 1 < i; j++) + verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); + verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); + return -EACCES; + + found: +- if (type == PTR_TO_BTF_ID) { ++ if (reg->type == PTR_TO_BTF_ID) { + if (!arg_btf_id) { + if (!compatible->btf_id) { + verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); +--- a/kernel/trace/bpf_trace.c ++++ b/kernel/trace/bpf_trace.c +@@ -342,7 +342,7 @@ static const struct bpf_func_proto bpf_p + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + }; + +@@ -545,7 +545,7 @@ static const struct bpf_func_proto bpf_t + .func = bpf_trace_printk, + .gpl_only = true, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM, ++ .arg1_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE, + }; + +@@ -754,9 +754,9 @@ static const struct bpf_func_proto bpf_s + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_seq_file_ids[0], +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, +- .arg4_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg4_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -771,7 +771,7 @@ static const struct bpf_func_proto bpf_s + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_seq_file_ids[0], +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -795,7 +795,7 @@ static const struct bpf_func_proto bpf_s + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID, + .arg1_btf_id = &btf_seq_file_ids[0], +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, + }; +@@ -956,7 +956,7 @@ static const struct bpf_func_proto bpf_p + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -1247,7 +1247,7 @@ const struct bpf_func_proto bpf_snprintf + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_MEM, + .arg2_type = ARG_CONST_SIZE, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, + }; +@@ -1422,7 +1422,7 @@ static const struct bpf_func_proto bpf_p + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -1640,7 +1640,7 @@ static const struct bpf_func_proto bpf_p + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -1694,7 +1694,7 @@ static const struct bpf_func_proto bpf_g + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, + }; +--- a/net/core/filter.c ++++ b/net/core/filter.c +@@ -1724,7 +1724,7 @@ static const struct bpf_func_proto bpf_s + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE, + .arg5_type = ARG_ANYTHING, + }; +@@ -2033,9 +2033,9 @@ static const struct bpf_func_proto bpf_c + .gpl_only = false, + .pkt_access = true, + .ret_type = RET_INTEGER, +- .arg1_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg1_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg2_type = ARG_CONST_SIZE_OR_ZERO, +- .arg3_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg3_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE_OR_ZERO, + .arg5_type = ARG_ANYTHING, + }; +@@ -2582,7 +2582,7 @@ static const struct bpf_func_proto bpf_r + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, +- .arg2_type = ARG_PTR_TO_MEM_OR_NULL, ++ .arg2_type = ARG_PTR_TO_MEM | PTR_MAYBE_NULL | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, + }; +@@ -4254,7 +4254,7 @@ static const struct bpf_func_proto bpf_s + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -4268,7 +4268,7 @@ const struct bpf_func_proto bpf_skb_outp + .arg1_btf_id = &bpf_skb_output_btf_ids[0], + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -4451,7 +4451,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + }; +@@ -4477,7 +4477,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + }; + +@@ -4647,7 +4647,7 @@ static const struct bpf_func_proto bpf_x + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -4661,7 +4661,7 @@ const struct bpf_func_proto bpf_xdp_outp + .arg1_btf_id = &bpf_xdp_output_btf_ids[0], + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, + }; + +@@ -5079,7 +5079,7 @@ static const struct bpf_func_proto bpf_s + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, + }; + +@@ -5113,7 +5113,7 @@ static const struct bpf_func_proto bpf_s + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, + }; + +@@ -5288,7 +5288,7 @@ static const struct bpf_func_proto bpf_b + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + }; + +@@ -5749,7 +5749,7 @@ static const struct bpf_func_proto bpf_l + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE + }; + +@@ -5759,7 +5759,7 @@ static const struct bpf_func_proto bpf_l + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE + }; + +@@ -5802,7 +5802,7 @@ static const struct bpf_func_proto bpf_l + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE + }; + +@@ -5890,7 +5890,7 @@ static const struct bpf_func_proto bpf_l + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +- .arg3_type = ARG_PTR_TO_MEM, ++ .arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg4_type = ARG_CONST_SIZE + }; + +@@ -6137,7 +6137,7 @@ static const struct bpf_func_proto bpf_s + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6156,7 +6156,7 @@ static const struct bpf_func_proto bpf_s + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6175,7 +6175,7 @@ static const struct bpf_func_proto bpf_s + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6212,7 +6212,7 @@ static const struct bpf_func_proto bpf_x + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6235,7 +6235,7 @@ static const struct bpf_func_proto bpf_x + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6258,7 +6258,7 @@ static const struct bpf_func_proto bpf_x + .pkt_access = true, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6277,7 +6277,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCK_COMMON_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6296,7 +6296,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6315,7 +6315,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_PTR_TO_SOCKET_OR_NULL, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + .arg5_type = ARG_ANYTHING, +@@ -6637,9 +6637,9 @@ static const struct bpf_func_proto bpf_t + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, + }; + +@@ -6706,9 +6706,9 @@ static const struct bpf_func_proto bpf_t + .pkt_access = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, +- .arg4_type = ARG_PTR_TO_MEM, ++ .arg4_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg5_type = ARG_CONST_SIZE, + }; + +@@ -6939,7 +6939,7 @@ static const struct bpf_func_proto bpf_s + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, +- .arg2_type = ARG_PTR_TO_MEM, ++ .arg2_type = ARG_PTR_TO_MEM | MEM_RDONLY, + .arg3_type = ARG_CONST_SIZE, + .arg4_type = ARG_ANYTHING, + }; diff --git a/queue-5.10/bpf-introduce-composable-reg-ret-and-arg-types.patch b/queue-5.10/bpf-introduce-composable-reg-ret-and-arg-types.patch new file mode 100644 index 00000000000..acb735b4952 --- /dev/null +++ b/queue-5.10/bpf-introduce-composable-reg-ret-and-arg-types.patch @@ -0,0 +1,147 @@ +From puranjay@kernel.org Tue Jun 10 16:45:20 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:43:56 +0000 +Subject: bpf: Introduce composable reg, ret and arg types. +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-2-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit d639b9d13a39cf15639cbe6e8b2c43eb60148a73 upstream. + +There are some common properties shared between bpf reg, ret and arg +values. For instance, a value may be a NULL pointer, or a pointer to +a read-only memory. Previously, to express these properties, enumeration +was used. For example, in order to test whether a reg value can be NULL, +reg_type_may_be_null() simply enumerates all types that are possibly +NULL. The problem of this approach is that it's not scalable and causes +a lot of duplication. These properties can be combined, for example, a +type could be either MAYBE_NULL or RDONLY, or both. + +This patch series rewrites the layout of reg_type, arg_type and +ret_type, so that common properties can be extracted and represented as +composable flag. For example, one can write + + ARG_PTR_TO_MEM | PTR_MAYBE_NULL + +which is equivalent to the previous + + ARG_PTR_TO_MEM_OR_NULL + +The type ARG_PTR_TO_MEM are called "base type" in this patch. Base +types can be extended with flags. A flag occupies the higher bits while +base types sits in the lower bits. + +This patch in particular sets up a set of macro for this purpose. The +following patches will rewrite arg_types, ret_types and reg_types +respectively. + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-2-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++++++++++++ + include/linux/bpf_verifier.h | 14 ++++++++++++++ + 2 files changed, 57 insertions(+) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -263,6 +263,29 @@ bool bpf_map_meta_equal(const struct bpf + + extern const struct bpf_map_ops bpf_map_offload_ops; + ++/* bpf_type_flag contains a set of flags that are applicable to the values of ++ * arg_type, ret_type and reg_type. For example, a pointer value may be null, ++ * or a memory is read-only. We classify types into two categories: base types ++ * and extended types. Extended types are base types combined with a type flag. ++ * ++ * Currently there are no more than 32 base types in arg_type, ret_type and ++ * reg_types. ++ */ ++#define BPF_BASE_TYPE_BITS 8 ++ ++enum bpf_type_flag { ++ /* PTR may be NULL. */ ++ PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), ++ ++ __BPF_TYPE_LAST_FLAG = PTR_MAYBE_NULL, ++}; ++ ++/* Max number of base types. */ ++#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS) ++ ++/* Max number of all types. */ ++#define BPF_TYPE_LIMIT (__BPF_TYPE_LAST_FLAG | (__BPF_TYPE_LAST_FLAG - 1)) ++ + /* function argument constraints */ + enum bpf_arg_type { + ARG_DONTCARE = 0, /* unused argument in helper function */ +@@ -305,7 +328,13 @@ enum bpf_arg_type { + ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + __BPF_ARG_TYPE_MAX, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_ARG_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_ARG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* type of values returned from helper functions */ + enum bpf_return_type { +@@ -320,7 +349,14 @@ enum bpf_return_type { + RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ ++ __BPF_RET_TYPE_MAX, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_RET_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_RET_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs + * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL +@@ -419,7 +455,14 @@ enum bpf_reg_type { + PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ + PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ ++ __BPF_REG_TYPE_MAX, ++ ++ /* This must be the last entry. Its purpose is to ensure the enum is ++ * wide enough to hold the higher bits reserved for bpf_type_flag. ++ */ ++ __BPF_REG_TYPE_LIMIT = BPF_TYPE_LIMIT, + }; ++static_assert(__BPF_REG_TYPE_MAX <= BPF_BASE_TYPE_LIMIT); + + /* The information passed from prog-specific *_is_valid_access + * back to the verifier. +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -509,4 +509,18 @@ int bpf_check_attach_target(struct bpf_v + u32 btf_id, + struct bpf_attach_target_info *tgt_info); + ++#define BPF_BASE_TYPE_MASK GENMASK(BPF_BASE_TYPE_BITS - 1, 0) ++ ++/* extract base type from bpf_{arg, return, reg}_type. */ ++static inline u32 base_type(u32 type) ++{ ++ return type & BPF_BASE_TYPE_MASK; ++} ++ ++/* extract flags from an extended type. See bpf_type_flag in bpf.h. */ ++static inline u32 type_flag(u32 type) ++{ ++ return type & ~BPF_BASE_TYPE_MASK; ++} ++ + #endif /* _LINUX_BPF_VERIFIER_H */ diff --git a/queue-5.10/bpf-introduce-mem_rdonly-flag.patch b/queue-5.10/bpf-introduce-mem_rdonly-flag.patch new file mode 100644 index 00000000000..7e5960f2723 --- /dev/null +++ b/queue-5.10/bpf-introduce-mem_rdonly-flag.patch @@ -0,0 +1,259 @@ +From puranjay@kernel.org Tue Jun 10 16:45:37 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:44:00 +0000 +Subject: bpf: Introduce MEM_RDONLY flag +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-6-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 20b2aff4bc15bda809f994761d5719827d66c0b4 upstream. + +This patch introduce a flag MEM_RDONLY to tag a reg value +pointing to read-only memory. It makes the following changes: + +1. PTR_TO_RDWR_BUF -> PTR_TO_BUF +2. PTR_TO_RDONLY_BUF -> PTR_TO_BUF | MEM_RDONLY + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-6-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 8 ++-- + kernel/bpf/btf.c | 3 - + kernel/bpf/map_iter.c | 4 +- + kernel/bpf/verifier.c | 84 +++++++++++++++++++++++++++------------------- + net/core/bpf_sk_storage.c | 2 - + net/core/sock_map.c | 2 - + 6 files changed, 60 insertions(+), 43 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -277,7 +277,10 @@ enum bpf_type_flag { + /* PTR may be NULL. */ + PTR_MAYBE_NULL = BIT(0 + BPF_BASE_TYPE_BITS), + +- __BPF_TYPE_LAST_FLAG = PTR_MAYBE_NULL, ++ /* MEM is read-only. */ ++ MEM_RDONLY = BIT(1 + BPF_BASE_TYPE_BITS), ++ ++ __BPF_TYPE_LAST_FLAG = MEM_RDONLY, + }; + + /* Max number of base types. */ +@@ -452,8 +455,7 @@ enum bpf_reg_type { + * an explicit null check is required for this struct. + */ + PTR_TO_MEM, /* reg points to valid memory region */ +- PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ +- PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ ++ PTR_TO_BUF, /* reg points to a read/write buffer */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + __BPF_REG_TYPE_MAX, + +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -4539,8 +4539,7 @@ bool btf_ctx_access(int off, int size, e + + type = base_type(ctx_arg_info->reg_type); + flag = type_flag(ctx_arg_info->reg_type); +- if (ctx_arg_info->offset == off && +- (type == PTR_TO_RDWR_BUF || type == PTR_TO_RDONLY_BUF) && ++ if (ctx_arg_info->offset == off && type == PTR_TO_BUF && + (flag & PTR_MAYBE_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; +--- a/kernel/bpf/map_iter.c ++++ b/kernel/bpf/map_iter.c +@@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_map_elem, key), +- PTR_TO_RDONLY_BUF | PTR_MAYBE_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, + { offsetof(struct bpf_iter__bpf_map_elem, value), +- PTR_TO_RDWR_BUF | PTR_MAYBE_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL }, + }, + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -417,6 +417,11 @@ static bool reg_type_may_be_refcounted_o + base_type(type) == PTR_TO_MEM; + } + ++static bool type_is_rdonly_mem(u32 type) ++{ ++ return type & MEM_RDONLY; ++} ++ + static bool arg_type_may_be_refcounted(enum bpf_arg_type type) + { + return type == ARG_PTR_TO_SOCK_COMMON; +@@ -485,7 +490,7 @@ static bool is_ptr_cast_function(enum bp + static const char *reg_type_str(struct bpf_verifier_env *env, + enum bpf_reg_type type) + { +- char postfix[16] = {0}; ++ char postfix[16] = {0}, prefix[16] = {0}; + static const char * const str[] = { + [NOT_INIT] = "?", + [SCALAR_VALUE] = "inv", +@@ -505,8 +510,7 @@ static const char *reg_type_str(struct b + [PTR_TO_BTF_ID] = "ptr_", + [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", + [PTR_TO_MEM] = "mem", +- [PTR_TO_RDONLY_BUF] = "rdonly_buf", +- [PTR_TO_RDWR_BUF] = "rdwr_buf", ++ [PTR_TO_BUF] = "buf", + }; + + if (type & PTR_MAYBE_NULL) { +@@ -517,8 +521,11 @@ static const char *reg_type_str(struct b + strncpy(postfix, "_or_null", 16); + } + +- snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s", +- str[base_type(type)], postfix); ++ if (type & MEM_RDONLY) ++ strncpy(prefix, "rdonly_", 16); ++ ++ snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", ++ prefix, str[base_type(type)], postfix); + return env->type_str_buf; + } + +@@ -2376,8 +2383,7 @@ static bool is_spillable_regtype(enum bp + case PTR_TO_TCP_SOCK: + case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: +- case PTR_TO_RDONLY_BUF: +- case PTR_TO_RDWR_BUF: ++ case PTR_TO_BUF: + case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: + return true; +@@ -4120,22 +4126,28 @@ static int check_mem_access(struct bpf_v + } else if (reg->type == CONST_PTR_TO_MAP) { + err = check_ptr_to_map_access(env, regs, regno, off, size, t, + value_regno); +- } else if (reg->type == PTR_TO_RDONLY_BUF) { +- if (t == BPF_WRITE) { +- verbose(env, "R%d cannot write into %s\n", +- regno, reg_type_str(env, reg->type)); +- return -EACCES; ++ } else if (base_type(reg->type) == PTR_TO_BUF) { ++ bool rdonly_mem = type_is_rdonly_mem(reg->type); ++ const char *buf_info; ++ u32 *max_access; ++ ++ if (rdonly_mem) { ++ if (t == BPF_WRITE) { ++ verbose(env, "R%d cannot write into %s\n", ++ regno, reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ buf_info = "rdonly"; ++ max_access = &env->prog->aux->max_rdonly_access; ++ } else { ++ buf_info = "rdwr"; ++ max_access = &env->prog->aux->max_rdwr_access; + } ++ + err = check_buffer_access(env, reg, regno, off, size, false, +- "rdonly", +- &env->prog->aux->max_rdonly_access); +- if (!err && value_regno >= 0) +- mark_reg_unknown(env, regs, value_regno); +- } else if (reg->type == PTR_TO_RDWR_BUF) { +- err = check_buffer_access(env, reg, regno, off, size, false, +- "rdwr", +- &env->prog->aux->max_rdwr_access); +- if (!err && t == BPF_READ && value_regno >= 0) ++ buf_info, max_access); ++ ++ if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) + mark_reg_unknown(env, regs, value_regno); + } else { + verbose(env, "R%d invalid mem access '%s'\n", regno, +@@ -4339,8 +4351,10 @@ static int check_helper_mem_access(struc + struct bpf_call_arg_meta *meta) + { + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; ++ const char *buf_info; ++ u32 *max_access; + +- switch (reg->type) { ++ switch (base_type(reg->type)) { + case PTR_TO_PACKET: + case PTR_TO_PACKET_META: + return check_packet_access(env, regno, reg->off, access_size, +@@ -4356,18 +4370,20 @@ static int check_helper_mem_access(struc + return check_mem_region_access(env, regno, reg->off, + access_size, reg->mem_size, + zero_size_allowed); +- case PTR_TO_RDONLY_BUF: +- if (meta && meta->raw_mode) +- return -EACCES; +- return check_buffer_access(env, reg, regno, reg->off, +- access_size, zero_size_allowed, +- "rdonly", +- &env->prog->aux->max_rdonly_access); +- case PTR_TO_RDWR_BUF: ++ case PTR_TO_BUF: ++ if (type_is_rdonly_mem(reg->type)) { ++ if (meta && meta->raw_mode) ++ return -EACCES; ++ ++ buf_info = "rdonly"; ++ max_access = &env->prog->aux->max_rdonly_access; ++ } else { ++ buf_info = "rdwr"; ++ max_access = &env->prog->aux->max_rdwr_access; ++ } + return check_buffer_access(env, reg, regno, reg->off, + access_size, zero_size_allowed, +- "rdwr", +- &env->prog->aux->max_rdwr_access); ++ buf_info, max_access); + case PTR_TO_STACK: + return check_stack_range_initialized( + env, +@@ -4570,8 +4586,8 @@ static const struct bpf_reg_types mem_ty + PTR_TO_PACKET_META, + PTR_TO_MAP_VALUE, + PTR_TO_MEM, +- PTR_TO_RDONLY_BUF, +- PTR_TO_RDWR_BUF, ++ PTR_TO_BUF, ++ PTR_TO_BUF | MEM_RDONLY, + }, + }; + +--- a/net/core/bpf_sk_storage.c ++++ b/net/core/bpf_sk_storage.c +@@ -867,7 +867,7 @@ static struct bpf_iter_reg bpf_sk_storag + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), +- PTR_TO_RDWR_BUF | PTR_MAYBE_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL }, + }, + .seq_info = &iter_seq_info, + }; +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -1657,7 +1657,7 @@ static struct bpf_iter_reg sock_map_iter + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__sockmap, key), +- PTR_TO_RDONLY_BUF | PTR_MAYBE_NULL }, ++ PTR_TO_BUF | PTR_MAYBE_NULL | MEM_RDONLY }, + { offsetof(struct bpf_iter__sockmap, sk), + PTR_TO_BTF_ID_OR_NULL }, + }, diff --git a/queue-5.10/bpf-make-per_cpu_ptr-return-rdonly-ptr_to_mem.patch b/queue-5.10/bpf-make-per_cpu_ptr-return-rdonly-ptr_to_mem.patch new file mode 100644 index 00000000000..df23df48ee5 --- /dev/null +++ b/queue-5.10/bpf-make-per_cpu_ptr-return-rdonly-ptr_to_mem.patch @@ -0,0 +1,120 @@ +From puranjay@kernel.org Tue Jun 10 16:45:50 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:44:01 +0000 +Subject: bpf: Make per_cpu_ptr return rdonly PTR_TO_MEM. +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-7-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 34d3a78c681e8e7844b43d1a2f4671a04249c821 upstream. + +Tag the return type of {per, this}_cpu_ptr with RDONLY_MEM. The +returned value of this pair of helpers is kernel object, which +can not be updated by bpf programs. Previously these two helpers +return PTR_OT_MEM for kernel objects of scalar type, which allows +one to directly modify the memory. Now with RDONLY_MEM tagging, +the verifier will reject programs that write into RDONLY_MEM. + +Fixes: 63d9b80dcf2c ("bpf: Introducte bpf_this_cpu_ptr()") +Fixes: eaa6bcb71ef6 ("bpf: Introduce bpf_per_cpu_ptr()") +Fixes: 4976b718c355 ("bpf: Introduce pseudo_btf_id") +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-8-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + kernel/bpf/helpers.c | 4 ++-- + kernel/bpf/verifier.c | 30 ++++++++++++++++++++++++++---- + 2 files changed, 28 insertions(+), 6 deletions(-) + +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -653,7 +653,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void * + const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, +- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL, ++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, + }; +@@ -666,7 +666,7 @@ BPF_CALL_1(bpf_this_cpu_ptr, const void + const struct bpf_func_proto bpf_this_cpu_ptr_proto = { + .func = bpf_this_cpu_ptr, + .gpl_only = false, +- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID, ++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | MEM_RDONLY, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -4014,15 +4014,30 @@ static int check_mem_access(struct bpf_v + mark_reg_unknown(env, regs, value_regno); + } + } +- } else if (reg->type == PTR_TO_MEM) { ++ } else if (base_type(reg->type) == PTR_TO_MEM) { ++ bool rdonly_mem = type_is_rdonly_mem(reg->type); ++ ++ if (type_may_be_null(reg->type)) { ++ verbose(env, "R%d invalid mem access '%s'\n", regno, ++ reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ ++ if (t == BPF_WRITE && rdonly_mem) { ++ verbose(env, "R%d cannot write into %s\n", ++ regno, reg_type_str(env, reg->type)); ++ return -EACCES; ++ } ++ + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose(env, "R%d leaks addr into mem\n", value_regno); + return -EACCES; + } ++ + err = check_mem_region_access(env, regno, off, size, + reg->mem_size, false); +- if (!err && t == BPF_READ && value_regno >= 0) ++ if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) + mark_reg_unknown(env, regs, value_regno); + } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = SCALAR_VALUE; +@@ -5730,6 +5745,13 @@ static int check_helper_call(struct bpf_ + regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].mem_size = tsize; + } else { ++ /* MEM_RDONLY may be carried from ret_flag, but it ++ * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise ++ * it will confuse the check of PTR_TO_BTF_ID in ++ * check_mem_access(). ++ */ ++ ret_flag &= ~MEM_RDONLY; ++ + regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } +@@ -8387,7 +8409,7 @@ static int check_ld_imm(struct bpf_verif + mark_reg_known_zero(env, regs, insn->dst_reg); + + dst_reg->type = aux->btf_var.reg_type; +- switch (dst_reg->type) { ++ switch (base_type(dst_reg->type)) { + case PTR_TO_MEM: + dst_reg->mem_size = aux->btf_var.mem_size; + break; +@@ -10401,7 +10423,7 @@ static int check_pseudo_btf_id(struct bp + tname, PTR_ERR(ret)); + return -EINVAL; + } +- aux->btf_var.reg_type = PTR_TO_MEM; ++ aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY; + aux->btf_var.mem_size = tsize; + } else { + aux->btf_var.reg_type = PTR_TO_BTF_ID; diff --git a/queue-5.10/bpf-replace-arg_xxx_or_null-with-arg_xxx-ptr_maybe_null.patch b/queue-5.10/bpf-replace-arg_xxx_or_null-with-arg_xxx-ptr_maybe_null.patch new file mode 100644 index 00000000000..02f6553f919 --- /dev/null +++ b/queue-5.10/bpf-replace-arg_xxx_or_null-with-arg_xxx-ptr_maybe_null.patch @@ -0,0 +1,189 @@ +From puranjay@kernel.org Tue Jun 10 16:45:24 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:43:57 +0000 +Subject: bpf: Replace ARG_XXX_OR_NULL with ARG_XXX | PTR_MAYBE_NULL +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-3-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 48946bd6a5d695c50b34546864b79c1f910a33c1 upstream. + +We have introduced a new type to make bpf_arg composable, by +reserving high bits of bpf_arg to represent flags of a type. + +One of the flags is PTR_MAYBE_NULL which indicates a pointer +may be NULL. When applying this flag to an arg_type, it means +the arg can take NULL pointer. This patch switches the +qualified arg_types to use this flag. The arg_types changed +in this patch include: + +1. ARG_PTR_TO_MAP_VALUE_OR_NULL +2. ARG_PTR_TO_MEM_OR_NULL +3. ARG_PTR_TO_CTX_OR_NULL +4. ARG_PTR_TO_SOCKET_OR_NULL +5. ARG_PTR_TO_ALLOC_MEM_OR_NULL +6. ARG_PTR_TO_STACK_OR_NULL + +This patch does not eliminate the use of these arg_types, instead +it makes them an alias to the 'ARG_XXX | PTR_MAYBE_NULL'. + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-3-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 12 +++++++----- + kernel/bpf/verifier.c | 36 +++++++++++++----------------------- + 2 files changed, 20 insertions(+), 28 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -297,13 +297,11 @@ enum bpf_arg_type { + ARG_PTR_TO_MAP_KEY, /* pointer to stack used as map key */ + ARG_PTR_TO_MAP_VALUE, /* pointer to stack used as map value */ + ARG_PTR_TO_UNINIT_MAP_VALUE, /* pointer to valid memory used to store a map value */ +- ARG_PTR_TO_MAP_VALUE_OR_NULL, /* pointer to stack used as map value or NULL */ + + /* the following constraints used to prototype bpf_memcmp() and other + * functions that access data on eBPF program stack + */ + ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ +- ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ + ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, + * helper function must fill all bytes or clear + * them in error case. +@@ -313,22 +311,26 @@ enum bpf_arg_type { + ARG_CONST_SIZE_OR_ZERO, /* number of bytes accessed from memory or 0 */ + + ARG_PTR_TO_CTX, /* pointer to context */ +- ARG_PTR_TO_CTX_OR_NULL, /* pointer to context or NULL */ + ARG_ANYTHING, /* any (initialized) argument is ok */ + ARG_PTR_TO_SPIN_LOCK, /* pointer to bpf_spin_lock */ + ARG_PTR_TO_SOCK_COMMON, /* pointer to sock_common */ + ARG_PTR_TO_INT, /* pointer to int */ + ARG_PTR_TO_LONG, /* pointer to long */ + ARG_PTR_TO_SOCKET, /* pointer to bpf_sock (fullsock) */ +- ARG_PTR_TO_SOCKET_OR_NULL, /* pointer to bpf_sock (fullsock) or NULL */ + ARG_PTR_TO_BTF_ID, /* pointer to in-kernel struct */ + ARG_PTR_TO_ALLOC_MEM, /* pointer to dynamically allocated memory */ +- ARG_PTR_TO_ALLOC_MEM_OR_NULL, /* pointer to dynamically allocated memory or NULL */ + ARG_CONST_ALLOC_SIZE_OR_ZERO, /* number of allocated bytes requested */ + ARG_PTR_TO_BTF_ID_SOCK_COMMON, /* pointer to in-kernel sock_common or bpf-mirrored bpf_sock */ + ARG_PTR_TO_PERCPU_BTF_ID, /* pointer to in-kernel percpu type */ + __BPF_ARG_TYPE_MAX, + ++ /* Extended arg_types. */ ++ ARG_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MAP_VALUE, ++ ARG_PTR_TO_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_MEM, ++ ARG_PTR_TO_CTX_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_CTX, ++ ARG_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_SOCKET, ++ ARG_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | ARG_PTR_TO_ALLOC_MEM, ++ + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -437,13 +437,9 @@ static bool arg_type_may_be_refcounted(e + return type == ARG_PTR_TO_SOCK_COMMON; + } + +-static bool arg_type_may_be_null(enum bpf_arg_type type) ++static bool type_may_be_null(u32 type) + { +- return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || +- type == ARG_PTR_TO_MEM_OR_NULL || +- type == ARG_PTR_TO_CTX_OR_NULL || +- type == ARG_PTR_TO_SOCKET_OR_NULL || +- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; ++ return type & PTR_MAYBE_NULL; + } + + /* Determine whether the function releases some resources allocated by another +@@ -4486,9 +4482,8 @@ static int process_spin_lock(struct bpf_ + + static bool arg_type_is_mem_ptr(enum bpf_arg_type type) + { +- return type == ARG_PTR_TO_MEM || +- type == ARG_PTR_TO_MEM_OR_NULL || +- type == ARG_PTR_TO_UNINIT_MEM; ++ return base_type(type) == ARG_PTR_TO_MEM || ++ base_type(type) == ARG_PTR_TO_UNINIT_MEM; + } + + static bool arg_type_is_mem_size(enum bpf_arg_type type) +@@ -4615,26 +4610,21 @@ static const struct bpf_reg_types *compa + [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, + [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, +- [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, + [ARG_CONST_SIZE] = &scalar_types, + [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_MAP_PTR] = &const_map_ptr_types, + [ARG_PTR_TO_CTX] = &context_types, +- [ARG_PTR_TO_CTX_OR_NULL] = &context_types, + [ARG_PTR_TO_SOCK_COMMON] = &sock_types, + #ifdef CONFIG_NET + [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, + #endif + [ARG_PTR_TO_SOCKET] = &fullsock_types, +- [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, + [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, + [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, + [ARG_PTR_TO_MEM] = &mem_types, +- [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, + [ARG_PTR_TO_UNINIT_MEM] = &mem_types, + [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, +- [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, + [ARG_PTR_TO_INT] = &int_ptr_types, + [ARG_PTR_TO_LONG] = &int_ptr_types, + [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types, +@@ -4649,7 +4639,7 @@ static int check_reg_type(struct bpf_ver + const struct bpf_reg_types *compatible; + int i, j; + +- compatible = compatible_reg_types[arg_type]; ++ compatible = compatible_reg_types[base_type(arg_type)]; + if (!compatible) { + verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); + return -EFAULT; +@@ -4730,15 +4720,14 @@ static int check_func_arg(struct bpf_ver + return -EACCES; + } + +- if (arg_type == ARG_PTR_TO_MAP_VALUE || +- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || +- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { ++ if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || ++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { + err = resolve_map_arg_type(env, meta, &arg_type); + if (err) + return err; + } + +- if (register_is_null(reg) && arg_type_may_be_null(arg_type)) ++ if (register_is_null(reg) && type_may_be_null(arg_type)) + /* A NULL register has a SCALAR_VALUE type, so skip + * type checking. + */ +@@ -4785,10 +4774,11 @@ skip_type_check: + err = check_helper_mem_access(env, regno, + meta->map_ptr->key_size, false, + NULL); +- } else if (arg_type == ARG_PTR_TO_MAP_VALUE || +- (arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL && +- !register_is_null(reg)) || +- arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE) { ++ } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || ++ base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) { ++ if (type_may_be_null(arg_type) && register_is_null(reg)) ++ return 0; ++ + /* bpf_map_xxx(..., map_ptr, ..., value) call: + * check [value, value + map->value_size) validity + */ diff --git a/queue-5.10/bpf-replace-ptr_to_xxx_or_null-with-ptr_to_xxx-ptr_maybe_null.patch b/queue-5.10/bpf-replace-ptr_to_xxx_or_null-with-ptr_to_xxx-ptr_maybe_null.patch new file mode 100644 index 00000000000..4f016bd92b3 --- /dev/null +++ b/queue-5.10/bpf-replace-ptr_to_xxx_or_null-with-ptr_to_xxx-ptr_maybe_null.patch @@ -0,0 +1,799 @@ +From puranjay@kernel.org Tue Jun 10 16:45:31 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:43:59 +0000 +Subject: bpf: Replace PTR_TO_XXX_OR_NULL with PTR_TO_XXX | PTR_MAYBE_NULL +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-5-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit c25b2ae136039ffa820c26138ed4a5e5f3ab3841 upstream. + +We have introduced a new type to make bpf_reg composable, by +allocating bits in the type to represent flags. + +One of the flags is PTR_MAYBE_NULL which indicates a pointer +may be NULL. This patch switches the qualified reg_types to +use this flag. The reg_types changed in this patch include: + +1. PTR_TO_MAP_VALUE_OR_NULL +2. PTR_TO_SOCKET_OR_NULL +3. PTR_TO_SOCK_COMMON_OR_NULL +4. PTR_TO_TCP_SOCK_OR_NULL +5. PTR_TO_BTF_ID_OR_NULL +6. PTR_TO_MEM_OR_NULL +7. PTR_TO_RDONLY_BUF_OR_NULL +8. PTR_TO_RDWR_BUF_OR_NULL + +[puranjay: backport notes + There was a reg_type_may_be_null() in adjust_ptr_min_max_vals() in + 5.10.x, but didn't exist in the upstream commit. This backport + converted that reg_type_may_be_null() to type_may_be_null() as well.] + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/r/20211217003152.48334-5-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 15 +- + include/linux/bpf_verifier.h | 4 + kernel/bpf/btf.c | 7 - + kernel/bpf/map_iter.c | 4 + kernel/bpf/verifier.c | 294 +++++++++++++++++++------------------------ + net/core/bpf_sk_storage.c | 2 + net/core/sock_map.c | 2 + 7 files changed, 152 insertions(+), 176 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -426,18 +426,14 @@ enum bpf_reg_type { + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ +- PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET_META, /* skb->data - meta_len */ + PTR_TO_PACKET, /* reg points to skb->data */ + PTR_TO_PACKET_END, /* skb->data + headlen */ + PTR_TO_FLOW_KEYS, /* reg points to bpf_flow_keys */ + PTR_TO_SOCKET, /* reg points to struct bpf_sock */ +- PTR_TO_SOCKET_OR_NULL, /* reg points to struct bpf_sock or NULL */ + PTR_TO_SOCK_COMMON, /* reg points to sock_common */ +- PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */ + PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */ +- PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */ + PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */ + PTR_TO_XDP_SOCK, /* reg points to struct xdp_sock */ + /* PTR_TO_BTF_ID points to a kernel struct that does not need +@@ -455,16 +451,19 @@ enum bpf_reg_type { + * been checked for null. Used primarily to inform the verifier + * an explicit null check is required for this struct. + */ +- PTR_TO_BTF_ID_OR_NULL, + PTR_TO_MEM, /* reg points to valid memory region */ +- PTR_TO_MEM_OR_NULL, /* reg points to valid memory region or NULL */ + PTR_TO_RDONLY_BUF, /* reg points to a readonly buffer */ +- PTR_TO_RDONLY_BUF_OR_NULL, /* reg points to a readonly buffer or NULL */ + PTR_TO_RDWR_BUF, /* reg points to a read/write buffer */ +- PTR_TO_RDWR_BUF_OR_NULL, /* reg points to a read/write buffer or NULL */ + PTR_TO_PERCPU_BTF_ID, /* reg points to a percpu kernel variable */ + __BPF_REG_TYPE_MAX, + ++ /* Extended reg_types. */ ++ PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | PTR_TO_MAP_VALUE, ++ PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCKET, ++ PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | PTR_TO_SOCK_COMMON, ++ PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | PTR_TO_TCP_SOCK, ++ PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | PTR_TO_BTF_ID, ++ + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ +--- a/include/linux/bpf_verifier.h ++++ b/include/linux/bpf_verifier.h +@@ -17,6 +17,8 @@ + * that converting umax_value to int cannot overflow. + */ + #define BPF_MAX_VAR_SIZ (1 << 29) ++/* size of type_str_buf in bpf_verifier. */ ++#define TYPE_STR_BUF_LEN 64 + + /* Liveness marks, used for registers and spilled-regs (in stack slots). + * Read marks propagate upwards until they find a write mark; they record that +@@ -462,6 +464,8 @@ struct bpf_verifier_env { + u32 peak_states; + /* longest register parentage chain walked for liveness marking */ + u32 longest_mark_read_walk; ++ /* buffer used in reg_type_str() to generate reg_type string */ ++ char type_str_buf[TYPE_STR_BUF_LEN]; + }; + + __printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log, +--- a/kernel/bpf/btf.c ++++ b/kernel/bpf/btf.c +@@ -4535,10 +4535,13 @@ bool btf_ctx_access(int off, int size, e + /* check for PTR_TO_RDONLY_BUF_OR_NULL or PTR_TO_RDWR_BUF_OR_NULL */ + for (i = 0; i < prog->aux->ctx_arg_info_size; i++) { + const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i]; ++ u32 type, flag; + ++ type = base_type(ctx_arg_info->reg_type); ++ flag = type_flag(ctx_arg_info->reg_type); + if (ctx_arg_info->offset == off && +- (ctx_arg_info->reg_type == PTR_TO_RDONLY_BUF_OR_NULL || +- ctx_arg_info->reg_type == PTR_TO_RDWR_BUF_OR_NULL)) { ++ (type == PTR_TO_RDWR_BUF || type == PTR_TO_RDONLY_BUF) && ++ (flag & PTR_MAYBE_NULL)) { + info->reg_type = ctx_arg_info->reg_type; + return true; + } +--- a/kernel/bpf/map_iter.c ++++ b/kernel/bpf/map_iter.c +@@ -174,9 +174,9 @@ static const struct bpf_iter_reg bpf_map + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__bpf_map_elem, key), +- PTR_TO_RDONLY_BUF_OR_NULL }, ++ PTR_TO_RDONLY_BUF | PTR_MAYBE_NULL }, + { offsetof(struct bpf_iter__bpf_map_elem, value), +- PTR_TO_RDWR_BUF_OR_NULL }, ++ PTR_TO_RDWR_BUF | PTR_MAYBE_NULL }, + }, + }; + +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -404,18 +404,6 @@ static bool reg_type_not_null(enum bpf_r + type == PTR_TO_SOCK_COMMON; + } + +-static bool reg_type_may_be_null(enum bpf_reg_type type) +-{ +- return type == PTR_TO_MAP_VALUE_OR_NULL || +- type == PTR_TO_SOCKET_OR_NULL || +- type == PTR_TO_SOCK_COMMON_OR_NULL || +- type == PTR_TO_TCP_SOCK_OR_NULL || +- type == PTR_TO_BTF_ID_OR_NULL || +- type == PTR_TO_MEM_OR_NULL || +- type == PTR_TO_RDONLY_BUF_OR_NULL || +- type == PTR_TO_RDWR_BUF_OR_NULL; +-} +- + static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg) + { + return reg->type == PTR_TO_MAP_VALUE && +@@ -424,12 +412,9 @@ static bool reg_may_point_to_spin_lock(c + + static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type) + { +- return type == PTR_TO_SOCKET || +- type == PTR_TO_SOCKET_OR_NULL || +- type == PTR_TO_TCP_SOCK || +- type == PTR_TO_TCP_SOCK_OR_NULL || +- type == PTR_TO_MEM || +- type == PTR_TO_MEM_OR_NULL; ++ return base_type(type) == PTR_TO_SOCKET || ++ base_type(type) == PTR_TO_TCP_SOCK || ++ base_type(type) == PTR_TO_MEM; + } + + static bool arg_type_may_be_refcounted(enum bpf_arg_type type) +@@ -492,37 +477,50 @@ static bool is_ptr_cast_function(enum bp + func_id == BPF_FUNC_skc_to_tcp_request_sock; + } + +-/* string representation of 'enum bpf_reg_type' */ +-static const char * const reg_type_str[] = { +- [NOT_INIT] = "?", +- [SCALAR_VALUE] = "inv", +- [PTR_TO_CTX] = "ctx", +- [CONST_PTR_TO_MAP] = "map_ptr", +- [PTR_TO_MAP_VALUE] = "map_value", +- [PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null", +- [PTR_TO_STACK] = "fp", +- [PTR_TO_PACKET] = "pkt", +- [PTR_TO_PACKET_META] = "pkt_meta", +- [PTR_TO_PACKET_END] = "pkt_end", +- [PTR_TO_FLOW_KEYS] = "flow_keys", +- [PTR_TO_SOCKET] = "sock", +- [PTR_TO_SOCKET_OR_NULL] = "sock_or_null", +- [PTR_TO_SOCK_COMMON] = "sock_common", +- [PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null", +- [PTR_TO_TCP_SOCK] = "tcp_sock", +- [PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null", +- [PTR_TO_TP_BUFFER] = "tp_buffer", +- [PTR_TO_XDP_SOCK] = "xdp_sock", +- [PTR_TO_BTF_ID] = "ptr_", +- [PTR_TO_BTF_ID_OR_NULL] = "ptr_or_null_", +- [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", +- [PTR_TO_MEM] = "mem", +- [PTR_TO_MEM_OR_NULL] = "mem_or_null", +- [PTR_TO_RDONLY_BUF] = "rdonly_buf", +- [PTR_TO_RDONLY_BUF_OR_NULL] = "rdonly_buf_or_null", +- [PTR_TO_RDWR_BUF] = "rdwr_buf", +- [PTR_TO_RDWR_BUF_OR_NULL] = "rdwr_buf_or_null", +-}; ++/* string representation of 'enum bpf_reg_type' ++ * ++ * Note that reg_type_str() can not appear more than once in a single verbose() ++ * statement. ++ */ ++static const char *reg_type_str(struct bpf_verifier_env *env, ++ enum bpf_reg_type type) ++{ ++ char postfix[16] = {0}; ++ static const char * const str[] = { ++ [NOT_INIT] = "?", ++ [SCALAR_VALUE] = "inv", ++ [PTR_TO_CTX] = "ctx", ++ [CONST_PTR_TO_MAP] = "map_ptr", ++ [PTR_TO_MAP_VALUE] = "map_value", ++ [PTR_TO_STACK] = "fp", ++ [PTR_TO_PACKET] = "pkt", ++ [PTR_TO_PACKET_META] = "pkt_meta", ++ [PTR_TO_PACKET_END] = "pkt_end", ++ [PTR_TO_FLOW_KEYS] = "flow_keys", ++ [PTR_TO_SOCKET] = "sock", ++ [PTR_TO_SOCK_COMMON] = "sock_common", ++ [PTR_TO_TCP_SOCK] = "tcp_sock", ++ [PTR_TO_TP_BUFFER] = "tp_buffer", ++ [PTR_TO_XDP_SOCK] = "xdp_sock", ++ [PTR_TO_BTF_ID] = "ptr_", ++ [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_", ++ [PTR_TO_MEM] = "mem", ++ [PTR_TO_RDONLY_BUF] = "rdonly_buf", ++ [PTR_TO_RDWR_BUF] = "rdwr_buf", ++ }; ++ ++ if (type & PTR_MAYBE_NULL) { ++ if (base_type(type) == PTR_TO_BTF_ID || ++ base_type(type) == PTR_TO_PERCPU_BTF_ID) ++ strncpy(postfix, "or_null_", 16); ++ else ++ strncpy(postfix, "_or_null", 16); ++ } ++ ++ snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s", ++ str[base_type(type)], postfix); ++ return env->type_str_buf; ++} + + static char slot_type_char[] = { + [STACK_INVALID] = '?', +@@ -588,7 +586,7 @@ static void print_verifier_state(struct + continue; + verbose(env, " R%d", i); + print_liveness(env, reg->live); +- verbose(env, "=%s", reg_type_str[t]); ++ verbose(env, "=%s", reg_type_str(env, t)); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && +@@ -596,9 +594,8 @@ static void print_verifier_state(struct + /* reg->off should be 0 for SCALAR_VALUE */ + verbose(env, "%lld", reg->var_off.value + reg->off); + } else { +- if (t == PTR_TO_BTF_ID || +- t == PTR_TO_BTF_ID_OR_NULL || +- t == PTR_TO_PERCPU_BTF_ID) ++ if (base_type(t) == PTR_TO_BTF_ID || ++ base_type(t) == PTR_TO_PERCPU_BTF_ID) + verbose(env, "%s", kernel_type_name(reg->btf_id)); + verbose(env, "(id=%d", reg->id); + if (reg_type_may_be_refcounted_or_null(t)) +@@ -607,9 +604,8 @@ static void print_verifier_state(struct + verbose(env, ",off=%d", reg->off); + if (type_is_pkt_pointer(t)) + verbose(env, ",r=%d", reg->range); +- else if (t == CONST_PTR_TO_MAP || +- t == PTR_TO_MAP_VALUE || +- t == PTR_TO_MAP_VALUE_OR_NULL) ++ else if (base_type(t) == CONST_PTR_TO_MAP || ++ base_type(t) == PTR_TO_MAP_VALUE) + verbose(env, ",ks=%d,vs=%d", + reg->map_ptr->key_size, + reg->map_ptr->value_size); +@@ -679,7 +675,7 @@ static void print_verifier_state(struct + if (is_spilled_reg(&state->stack[i])) { + reg = &state->stack[i].spilled_ptr; + t = reg->type; +- verbose(env, "=%s", reg_type_str[t]); ++ verbose(env, "=%s", reg_type_str(env, t)); + if (t == SCALAR_VALUE && reg->precise) + verbose(env, "P"); + if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) +@@ -1577,7 +1573,7 @@ static int mark_reg_read(struct bpf_veri + break; + if (parent->live & REG_LIVE_DONE) { + verbose(env, "verifier BUG type %s var_off %lld off %d\n", +- reg_type_str[parent->type], ++ reg_type_str(env, parent->type), + parent->var_off.value, parent->off); + return -EFAULT; + } +@@ -2366,9 +2362,8 @@ static int mark_chain_precision_stack_fr + + static bool is_spillable_regtype(enum bpf_reg_type type) + { +- switch (type) { ++ switch (base_type(type)) { + case PTR_TO_MAP_VALUE: +- case PTR_TO_MAP_VALUE_OR_NULL: + case PTR_TO_STACK: + case PTR_TO_CTX: + case PTR_TO_PACKET: +@@ -2377,21 +2372,14 @@ static bool is_spillable_regtype(enum bp + case PTR_TO_FLOW_KEYS: + case CONST_PTR_TO_MAP: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: +- case PTR_TO_BTF_ID_OR_NULL: + case PTR_TO_RDONLY_BUF: +- case PTR_TO_RDONLY_BUF_OR_NULL: + case PTR_TO_RDWR_BUF: +- case PTR_TO_RDWR_BUF_OR_NULL: + case PTR_TO_PERCPU_BTF_ID: + case PTR_TO_MEM: +- case PTR_TO_MEM_OR_NULL: + return true; + default: + return false; +@@ -3252,7 +3240,7 @@ static int check_ctx_access(struct bpf_v + */ + *reg_type = info.reg_type; + +- if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL) ++ if (base_type(*reg_type) == PTR_TO_BTF_ID) + *btf_id = info.btf_id; + else + env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size; +@@ -3318,7 +3306,7 @@ static int check_sock_access(struct bpf_ + } + + verbose(env, "R%d invalid %s access off=%d size=%d\n", +- regno, reg_type_str[reg->type], off, size); ++ regno, reg_type_str(env, reg->type), off, size); + + return -EACCES; + } +@@ -4057,7 +4045,7 @@ static int check_mem_access(struct bpf_v + } else { + mark_reg_known_zero(env, regs, + value_regno); +- if (reg_type_may_be_null(reg_type)) ++ if (type_may_be_null(reg_type)) + regs[value_regno].id = ++env->id_gen; + /* A load of ctx field could have different + * actual load size with the one encoded in the +@@ -4065,8 +4053,7 @@ static int check_mem_access(struct bpf_v + * a sub-register. + */ + regs[value_regno].subreg_def = DEF_NOT_SUBREG; +- if (reg_type == PTR_TO_BTF_ID || +- reg_type == PTR_TO_BTF_ID_OR_NULL) ++ if (base_type(reg_type) == PTR_TO_BTF_ID) + regs[value_regno].btf_id = btf_id; + } + regs[value_regno].type = reg_type; +@@ -4117,7 +4104,7 @@ static int check_mem_access(struct bpf_v + } else if (type_is_sk_pointer(reg->type)) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", +- regno, reg_type_str[reg->type]); ++ regno, reg_type_str(env, reg->type)); + return -EACCES; + } + err = check_sock_access(env, insn_idx, regno, off, size, t); +@@ -4136,7 +4123,7 @@ static int check_mem_access(struct bpf_v + } else if (reg->type == PTR_TO_RDONLY_BUF) { + if (t == BPF_WRITE) { + verbose(env, "R%d cannot write into %s\n", +- regno, reg_type_str[reg->type]); ++ regno, reg_type_str(env, reg->type)); + return -EACCES; + } + err = check_buffer_access(env, reg, regno, off, size, false, +@@ -4152,7 +4139,7 @@ static int check_mem_access(struct bpf_v + mark_reg_unknown(env, regs, value_regno); + } else { + verbose(env, "R%d invalid mem access '%s'\n", regno, +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EACCES; + } + +@@ -4195,7 +4182,7 @@ static int check_xadd(struct bpf_verifie + is_sk_reg(env, insn->dst_reg)) { + verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", + insn->dst_reg, +- reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + +@@ -4392,9 +4379,9 @@ static int check_helper_mem_access(struc + register_is_null(reg)) + return 0; + +- verbose(env, "R%d type=%s expected=%s\n", regno, +- reg_type_str[reg->type], +- reg_type_str[PTR_TO_STACK]); ++ verbose(env, "R%d type=%s ", regno, ++ reg_type_str(env, reg->type)); ++ verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK)); + return -EACCES; + } + } +@@ -4654,10 +4641,10 @@ static int check_reg_type(struct bpf_ver + goto found; + } + +- verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); ++ verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, type)); + for (j = 0; j + 1 < i; j++) +- verbose(env, "%s, ", reg_type_str[compatible->types[j]]); +- verbose(env, "%s\n", reg_type_str[compatible->types[j]]); ++ verbose(env, "%s, ", reg_type_str(env, compatible->types[j])); ++ verbose(env, "%s\n", reg_type_str(env, compatible->types[j])); + return -EACCES; + + found: +@@ -5556,6 +5543,7 @@ static int check_helper_call(struct bpf_ + { + const struct bpf_func_proto *fn = NULL; + enum bpf_return_type ret_type; ++ enum bpf_type_flag ret_flag; + struct bpf_reg_state *regs; + struct bpf_call_arg_meta meta; + bool changes_data; +@@ -5668,6 +5656,7 @@ static int check_helper_call(struct bpf_ + + /* update return register (already marked as written above) */ + ret_type = fn->ret_type; ++ ret_flag = type_flag(fn->ret_type); + if (ret_type == RET_INTEGER) { + /* sets type to SCALAR_VALUE */ + mark_reg_unknown(env, regs, BPF_REG_0); +@@ -5686,25 +5675,23 @@ static int check_helper_call(struct bpf_ + return -EINVAL; + } + regs[BPF_REG_0].map_ptr = meta.map_ptr; +- if (type_may_be_null(ret_type)) { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; +- } else { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; +- if (map_value_has_spin_lock(meta.map_ptr)) +- regs[BPF_REG_0].id = ++env->id_gen; ++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag; ++ if (!type_may_be_null(ret_type) && ++ map_value_has_spin_lock(meta.map_ptr)) { ++ regs[BPF_REG_0].id = ++env->id_gen; + } + } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag; + } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; ++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].mem_size = meta.mem_size; + } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { + const struct btf_type *t; +@@ -5724,23 +5711,17 @@ static int check_helper_call(struct bpf_ + tname, PTR_ERR(ret)); + return -EINVAL; + } +- regs[BPF_REG_0].type = +- (ret_type & PTR_MAYBE_NULL) ? +- PTR_TO_MEM_OR_NULL : PTR_TO_MEM; ++ regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag; + regs[BPF_REG_0].mem_size = tsize; + } else { +- regs[BPF_REG_0].type = +- (ret_type & PTR_MAYBE_NULL) ? +- PTR_TO_BTF_ID_OR_NULL : PTR_TO_BTF_ID; ++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } + } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + int ret_btf_id; + + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = (ret_type & PTR_MAYBE_NULL) ? +- PTR_TO_BTF_ID_OR_NULL : +- PTR_TO_BTF_ID; ++ regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag; + ret_btf_id = *fn->ret_btf_id; + if (ret_btf_id == 0) { + verbose(env, "invalid return type %u of func %s#%d\n", +@@ -5755,7 +5736,7 @@ static int check_helper_call(struct bpf_ + return -EINVAL; + } + +- if (reg_type_may_be_null(regs[BPF_REG_0].type)) ++ if (type_may_be_null(regs[BPF_REG_0].type)) + regs[BPF_REG_0].id = ++env->id_gen; + + if (is_ptr_cast_function(func_id)) { +@@ -5856,25 +5837,25 @@ static bool check_reg_sane_offset(struct + + if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) { + verbose(env, "math between %s pointer and %lld is not allowed\n", +- reg_type_str[type], val); ++ reg_type_str(env, type), val); + return false; + } + + if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) { + verbose(env, "%s pointer offset %d is not allowed\n", +- reg_type_str[type], reg->off); ++ reg_type_str(env, type), reg->off); + return false; + } + + if (smin == S64_MIN) { + verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n", +- reg_type_str[type]); ++ reg_type_str(env, type)); + return false; + } + + if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) { + verbose(env, "value %lld makes %s pointer be out of bounds\n", +- smin, reg_type_str[type]); ++ smin, reg_type_str(env, type)); + return false; + } + +@@ -6251,11 +6232,13 @@ static int adjust_ptr_min_max_vals(struc + return -EACCES; + } + +- switch (ptr_reg->type) { +- case PTR_TO_MAP_VALUE_OR_NULL: ++ if (ptr_reg->type & PTR_MAYBE_NULL) { + verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", +- dst, reg_type_str[ptr_reg->type]); ++ dst, reg_type_str(env, ptr_reg->type)); + return -EACCES; ++ } ++ ++ switch (base_type(ptr_reg->type)) { + case CONST_PTR_TO_MAP: + /* smin_val represents the known value */ + if (known && smin_val == 0 && opcode == BPF_ADD) +@@ -6268,10 +6251,10 @@ static int adjust_ptr_min_max_vals(struc + case PTR_TO_XDP_SOCK: + reject: + verbose(env, "R%d pointer arithmetic on %s prohibited\n", +- dst, reg_type_str[ptr_reg->type]); ++ dst, reg_type_str(env, ptr_reg->type)); + return -EACCES; + default: +- if (reg_type_may_be_null(ptr_reg->type)) ++ if (type_may_be_null(ptr_reg->type)) + goto reject; + break; + } +@@ -7964,7 +7947,7 @@ static void mark_ptr_or_null_reg(struct + struct bpf_reg_state *reg, u32 id, + bool is_null) + { +- if (reg_type_may_be_null(reg->type) && reg->id == id && ++ if (type_may_be_null(reg->type) && reg->id == id && + !WARN_ON_ONCE(!reg->id)) { + if (WARN_ON_ONCE(reg->smin_value || reg->smax_value || + !tnum_equals_const(reg->var_off, 0) || +@@ -7978,7 +7961,17 @@ static void mark_ptr_or_null_reg(struct + } + if (is_null) { + reg->type = SCALAR_VALUE; +- } else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) { ++ /* We don't need id and ref_obj_id from this point ++ * onwards anymore, thus we should better reset it, ++ * so that state pruning has chances to take effect. ++ */ ++ reg->id = 0; ++ reg->ref_obj_id = 0; ++ ++ return; ++ } ++ ++ if (base_type(reg->type) == PTR_TO_MAP_VALUE) { + const struct bpf_map *map = reg->map_ptr; + + if (map->inner_map_meta) { +@@ -7992,29 +7985,11 @@ static void mark_ptr_or_null_reg(struct + } else { + reg->type = PTR_TO_MAP_VALUE; + } +- } else if (reg->type == PTR_TO_SOCKET_OR_NULL) { +- reg->type = PTR_TO_SOCKET; +- } else if (reg->type == PTR_TO_SOCK_COMMON_OR_NULL) { +- reg->type = PTR_TO_SOCK_COMMON; +- } else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) { +- reg->type = PTR_TO_TCP_SOCK; +- } else if (reg->type == PTR_TO_BTF_ID_OR_NULL) { +- reg->type = PTR_TO_BTF_ID; +- } else if (reg->type == PTR_TO_MEM_OR_NULL) { +- reg->type = PTR_TO_MEM; +- } else if (reg->type == PTR_TO_RDONLY_BUF_OR_NULL) { +- reg->type = PTR_TO_RDONLY_BUF; +- } else if (reg->type == PTR_TO_RDWR_BUF_OR_NULL) { +- reg->type = PTR_TO_RDWR_BUF; ++ } else { ++ reg->type &= ~PTR_MAYBE_NULL; + } +- if (is_null) { +- /* We don't need id and ref_obj_id from this point +- * onwards anymore, thus we should better reset it, +- * so that state pruning has chances to take effect. +- */ +- reg->id = 0; +- reg->ref_obj_id = 0; +- } else if (!reg_may_point_to_spin_lock(reg)) { ++ ++ if (!reg_may_point_to_spin_lock(reg)) { + /* For not-NULL ptr, reg->ref_obj_id will be reset + * in release_reference(). + * +@@ -8341,7 +8316,7 @@ static int check_cond_jmp_op(struct bpf_ + */ + if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && + insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) && +- reg_type_may_be_null(dst_reg->type)) { ++ type_may_be_null(dst_reg->type)) { + /* Mark all identical registers in each branch as either + * safe or unknown depending R == 0 or R != 0 conditional. + */ +@@ -8570,7 +8545,7 @@ static int check_return_code(struct bpf_ + if (is_subprog) { + if (reg->type != SCALAR_VALUE) { + verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n", +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EINVAL; + } + return 0; +@@ -8631,7 +8606,7 @@ static int check_return_code(struct bpf_ + + if (reg->type != SCALAR_VALUE) { + verbose(env, "At program exit the register R0 is not a known value (%s)\n", +- reg_type_str[reg->type]); ++ reg_type_str(env, reg->type)); + return -EINVAL; + } + +@@ -9379,7 +9354,7 @@ static bool regsafe(struct bpf_verifier_ + return true; + if (rcur->type == NOT_INIT) + return false; +- switch (rold->type) { ++ switch (base_type(rold->type)) { + case SCALAR_VALUE: + if (env->explore_alu_limits) + return false; +@@ -9400,6 +9375,22 @@ static bool regsafe(struct bpf_verifier_ + return false; + } + case PTR_TO_MAP_VALUE: ++ /* a PTR_TO_MAP_VALUE could be safe to use as a ++ * PTR_TO_MAP_VALUE_OR_NULL into the same map. ++ * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- ++ * checked, doing so could have affected others with the same ++ * id, and we can't check for that because we lost the id when ++ * we converted to a PTR_TO_MAP_VALUE. ++ */ ++ if (type_may_be_null(rold->type)) { ++ if (!type_may_be_null(rcur->type)) ++ return false; ++ if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) ++ return false; ++ /* Check our ids match any regs they're supposed to */ ++ return check_ids(rold->id, rcur->id, idmap); ++ } ++ + /* If the new min/max/var_off satisfy the old ones and + * everything else matches, we are OK. + * 'id' is not compared, since it's only used for maps with +@@ -9411,20 +9402,6 @@ static bool regsafe(struct bpf_verifier_ + return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && + range_within(rold, rcur) && + tnum_in(rold->var_off, rcur->var_off); +- case PTR_TO_MAP_VALUE_OR_NULL: +- /* a PTR_TO_MAP_VALUE could be safe to use as a +- * PTR_TO_MAP_VALUE_OR_NULL into the same map. +- * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL- +- * checked, doing so could have affected others with the same +- * id, and we can't check for that because we lost the id when +- * we converted to a PTR_TO_MAP_VALUE. +- */ +- if (rcur->type != PTR_TO_MAP_VALUE_OR_NULL) +- return false; +- if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) +- return false; +- /* Check our ids match any regs they're supposed to */ +- return check_ids(rold->id, rcur->id, idmap); + case PTR_TO_PACKET_META: + case PTR_TO_PACKET: + if (rcur->type != rold->type) +@@ -9453,11 +9430,8 @@ static bool regsafe(struct bpf_verifier_ + case PTR_TO_PACKET_END: + case PTR_TO_FLOW_KEYS: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + /* Only valid matches are exact, which memcmp() above + * would have accepted +@@ -9979,17 +9953,13 @@ next: + /* Return true if it's OK to have the same insn return a different type. */ + static bool reg_type_mismatch_ok(enum bpf_reg_type type) + { +- switch (type) { ++ switch (base_type(type)) { + case PTR_TO_CTX: + case PTR_TO_SOCKET: +- case PTR_TO_SOCKET_OR_NULL: + case PTR_TO_SOCK_COMMON: +- case PTR_TO_SOCK_COMMON_OR_NULL: + case PTR_TO_TCP_SOCK: +- case PTR_TO_TCP_SOCK_OR_NULL: + case PTR_TO_XDP_SOCK: + case PTR_TO_BTF_ID: +- case PTR_TO_BTF_ID_OR_NULL: + return false; + default: + return true; +@@ -10207,7 +10177,7 @@ static int do_check(struct bpf_verifier_ + if (is_ctx_reg(env, insn->dst_reg)) { + verbose(env, "BPF_ST stores into R%d %s is not allowed\n", + insn->dst_reg, +- reg_type_str[reg_state(env, insn->dst_reg)->type]); ++ reg_type_str(env, reg_state(env, insn->dst_reg)->type)); + return -EACCES; + } + +--- a/net/core/bpf_sk_storage.c ++++ b/net/core/bpf_sk_storage.c +@@ -867,7 +867,7 @@ static struct bpf_iter_reg bpf_sk_storag + { offsetof(struct bpf_iter__bpf_sk_storage_map, sk), + PTR_TO_BTF_ID_OR_NULL }, + { offsetof(struct bpf_iter__bpf_sk_storage_map, value), +- PTR_TO_RDWR_BUF_OR_NULL }, ++ PTR_TO_RDWR_BUF | PTR_MAYBE_NULL }, + }, + .seq_info = &iter_seq_info, + }; +--- a/net/core/sock_map.c ++++ b/net/core/sock_map.c +@@ -1657,7 +1657,7 @@ static struct bpf_iter_reg sock_map_iter + .ctx_arg_info_size = 2, + .ctx_arg_info = { + { offsetof(struct bpf_iter__sockmap, key), +- PTR_TO_RDONLY_BUF_OR_NULL }, ++ PTR_TO_RDONLY_BUF | PTR_MAYBE_NULL }, + { offsetof(struct bpf_iter__sockmap, sk), + PTR_TO_BTF_ID_OR_NULL }, + }, diff --git a/queue-5.10/bpf-replace-ret_xxx_or_null-with-ret_xxx-ptr_maybe_null.patch b/queue-5.10/bpf-replace-ret_xxx_or_null-with-ret_xxx-ptr_maybe_null.patch new file mode 100644 index 00000000000..75c461e2869 --- /dev/null +++ b/queue-5.10/bpf-replace-ret_xxx_or_null-with-ret_xxx-ptr_maybe_null.patch @@ -0,0 +1,196 @@ +From puranjay@kernel.org Tue Jun 10 16:45:37 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:43:58 +0000 +Subject: bpf: Replace RET_XXX_OR_NULL with RET_XXX | PTR_MAYBE_NULL +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-4-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 3c4807322660d4290ac9062c034aed6b87243861 upstream. + +We have introduced a new type to make bpf_ret composable, by +reserving high bits to represent flags. + +One of the flag is PTR_MAYBE_NULL, which indicates a pointer +may be NULL. When applying this flag to ret_types, it means +the returned value could be a NULL pointer. This patch +switches the qualified arg_types to use this flag. +The ret_types changed in this patch include: + +1. RET_PTR_TO_MAP_VALUE_OR_NULL +2. RET_PTR_TO_SOCKET_OR_NULL +3. RET_PTR_TO_TCP_SOCK_OR_NULL +4. RET_PTR_TO_SOCK_COMMON_OR_NULL +5. RET_PTR_TO_ALLOC_MEM_OR_NULL +6. RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL +7. RET_PTR_TO_BTF_ID_OR_NULL + +This patch doesn't eliminate the use of these names, instead +it makes them aliases to 'RET_PTR_TO_XXX | PTR_MAYBE_NULL'. + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-4-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + include/linux/bpf.h | 20 +++++++++++++------- + kernel/bpf/helpers.c | 2 +- + kernel/bpf/verifier.c | 49 ++++++++++++++++++++++++++----------------------- + 3 files changed, 40 insertions(+), 31 deletions(-) + +--- a/include/linux/bpf.h ++++ b/include/linux/bpf.h +@@ -343,16 +343,22 @@ enum bpf_return_type { + RET_INTEGER, /* function returns integer */ + RET_VOID, /* function doesn't return anything */ + RET_PTR_TO_MAP_VALUE, /* returns a pointer to map elem value */ +- RET_PTR_TO_MAP_VALUE_OR_NULL, /* returns a pointer to map elem value or NULL */ +- RET_PTR_TO_SOCKET_OR_NULL, /* returns a pointer to a socket or NULL */ +- RET_PTR_TO_TCP_SOCK_OR_NULL, /* returns a pointer to a tcp_sock or NULL */ +- RET_PTR_TO_SOCK_COMMON_OR_NULL, /* returns a pointer to a sock_common or NULL */ +- RET_PTR_TO_ALLOC_MEM_OR_NULL, /* returns a pointer to dynamically allocated memory or NULL */ +- RET_PTR_TO_BTF_ID_OR_NULL, /* returns a pointer to a btf_id or NULL */ +- RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, /* returns a pointer to a valid memory or a btf_id or NULL */ ++ RET_PTR_TO_SOCKET, /* returns a pointer to a socket */ ++ RET_PTR_TO_TCP_SOCK, /* returns a pointer to a tcp_sock */ ++ RET_PTR_TO_SOCK_COMMON, /* returns a pointer to a sock_common */ ++ RET_PTR_TO_ALLOC_MEM, /* returns a pointer to dynamically allocated memory */ + RET_PTR_TO_MEM_OR_BTF_ID, /* returns a pointer to a valid memory or a btf_id */ ++ RET_PTR_TO_BTF_ID, /* returns a pointer to a btf_id */ + __BPF_RET_TYPE_MAX, + ++ /* Extended ret_types. */ ++ RET_PTR_TO_MAP_VALUE_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_MAP_VALUE, ++ RET_PTR_TO_SOCKET_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCKET, ++ RET_PTR_TO_TCP_SOCK_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_TCP_SOCK, ++ RET_PTR_TO_SOCK_COMMON_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_SOCK_COMMON, ++ RET_PTR_TO_ALLOC_MEM_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_ALLOC_MEM, ++ RET_PTR_TO_BTF_ID_OR_NULL = PTR_MAYBE_NULL | RET_PTR_TO_BTF_ID, ++ + /* This must be the last entry. Its purpose is to ensure the enum is + * wide enough to hold the higher bits reserved for bpf_type_flag. + */ +--- a/kernel/bpf/helpers.c ++++ b/kernel/bpf/helpers.c +@@ -653,7 +653,7 @@ BPF_CALL_2(bpf_per_cpu_ptr, const void * + const struct bpf_func_proto bpf_per_cpu_ptr_proto = { + .func = bpf_per_cpu_ptr, + .gpl_only = false, +- .ret_type = RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL, ++ .ret_type = RET_PTR_TO_MEM_OR_BTF_ID | PTR_MAYBE_NULL, + .arg1_type = ARG_PTR_TO_PERCPU_BTF_ID, + .arg2_type = ARG_ANYTHING, + }; +--- a/kernel/bpf/verifier.c ++++ b/kernel/bpf/verifier.c +@@ -5555,6 +5555,7 @@ static int check_reference_leak(struct b + static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx) + { + const struct bpf_func_proto *fn = NULL; ++ enum bpf_return_type ret_type; + struct bpf_reg_state *regs; + struct bpf_call_arg_meta meta; + bool changes_data; +@@ -5666,13 +5667,13 @@ static int check_helper_call(struct bpf_ + regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG; + + /* update return register (already marked as written above) */ +- if (fn->ret_type == RET_INTEGER) { ++ ret_type = fn->ret_type; ++ if (ret_type == RET_INTEGER) { + /* sets type to SCALAR_VALUE */ + mark_reg_unknown(env, regs, BPF_REG_0); +- } else if (fn->ret_type == RET_VOID) { ++ } else if (ret_type == RET_VOID) { + regs[BPF_REG_0].type = NOT_INIT; +- } else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL || +- fn->ret_type == RET_PTR_TO_MAP_VALUE) { ++ } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) { + /* There is no offset yet applied, variable or fixed */ + mark_reg_known_zero(env, regs, BPF_REG_0); + /* remember map_ptr, so that check_map_access() +@@ -5685,28 +5686,27 @@ static int check_helper_call(struct bpf_ + return -EINVAL; + } + regs[BPF_REG_0].map_ptr = meta.map_ptr; +- if (fn->ret_type == RET_PTR_TO_MAP_VALUE) { ++ if (type_may_be_null(ret_type)) { ++ regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; ++ } else { + regs[BPF_REG_0].type = PTR_TO_MAP_VALUE; + if (map_value_has_spin_lock(meta.map_ptr)) + regs[BPF_REG_0].id = ++env->id_gen; +- } else { +- regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; + } +- } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; +- } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) { + mark_reg_known_zero(env, regs, BPF_REG_0); + regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; + regs[BPF_REG_0].mem_size = meta.mem_size; +- } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { ++ } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) { + const struct btf_type *t; + + mark_reg_known_zero(env, regs, BPF_REG_0); +@@ -5725,30 +5725,33 @@ static int check_helper_call(struct bpf_ + return -EINVAL; + } + regs[BPF_REG_0].type = +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? +- PTR_TO_MEM : PTR_TO_MEM_OR_NULL; ++ (ret_type & PTR_MAYBE_NULL) ? ++ PTR_TO_MEM_OR_NULL : PTR_TO_MEM; + regs[BPF_REG_0].mem_size = tsize; + } else { + regs[BPF_REG_0].type = +- fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID ? +- PTR_TO_BTF_ID : PTR_TO_BTF_ID_OR_NULL; ++ (ret_type & PTR_MAYBE_NULL) ? ++ PTR_TO_BTF_ID_OR_NULL : PTR_TO_BTF_ID; + regs[BPF_REG_0].btf_id = meta.ret_btf_id; + } +- } else if (fn->ret_type == RET_PTR_TO_BTF_ID_OR_NULL) { ++ } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) { + int ret_btf_id; + + mark_reg_known_zero(env, regs, BPF_REG_0); +- regs[BPF_REG_0].type = PTR_TO_BTF_ID_OR_NULL; ++ regs[BPF_REG_0].type = (ret_type & PTR_MAYBE_NULL) ? ++ PTR_TO_BTF_ID_OR_NULL : ++ PTR_TO_BTF_ID; + ret_btf_id = *fn->ret_btf_id; + if (ret_btf_id == 0) { +- verbose(env, "invalid return type %d of func %s#%d\n", +- fn->ret_type, func_id_name(func_id), func_id); ++ verbose(env, "invalid return type %u of func %s#%d\n", ++ base_type(ret_type), func_id_name(func_id), ++ func_id); + return -EINVAL; + } + regs[BPF_REG_0].btf_id = ret_btf_id; + } else { +- verbose(env, "unknown return type %d of func %s#%d\n", +- fn->ret_type, func_id_name(func_id), func_id); ++ verbose(env, "unknown return type %u of func %s#%d\n", ++ base_type(ret_type), func_id_name(func_id), func_id); + return -EINVAL; + } + diff --git a/queue-5.10/bpf-selftests-test-ptr_to_rdonly_mem.patch b/queue-5.10/bpf-selftests-test-ptr_to_rdonly_mem.patch new file mode 100644 index 00000000000..9143f7eda4b --- /dev/null +++ b/queue-5.10/bpf-selftests-test-ptr_to_rdonly_mem.patch @@ -0,0 +1,96 @@ +From stable+bounces-152309-greg=kroah.com@vger.kernel.org Tue Jun 10 16:47:07 2025 +From: Puranjay Mohan <puranjay@kernel.org> +Date: Tue, 10 Jun 2025 14:44:03 +0000 +Subject: bpf/selftests: Test PTR_TO_RDONLY_MEM +To: Greg KH <gregkh@linuxfoundation.org> +Cc: Hao Luo <haoluo@google.com>, Alexei Starovoitov <ast@kernel.org>, Andrii Nakryiko <andrii@kernel.org>, Puranjay Mohan <puranjay@kernel.org>, stable@vger.kernel.org +Message-ID: <20250610144407.95865-9-puranjay@kernel.org> + +From: Hao Luo <haoluo@google.com> + +commit 9497c458c10b049438ef6e6ddda898edbc3ec6a8 upstream. + +This test verifies that a ksym of non-struct can not be directly +updated. + +Signed-off-by: Hao Luo <haoluo@google.com> +Signed-off-by: Alexei Starovoitov <ast@kernel.org> +Acked-by: Andrii Nakryiko <andrii@kernel.org> +[Changed ASSERT_ERR_PTR() to CHECK()] +Signed-off-by: Puranjay Mohan <puranjay@kernel.org> +Link: https://lore.kernel.org/bpf/20211217003152.48334-10-haoluo@google.com +Cc: stable@vger.kernel.org # 5.10.x +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + tools/testing/selftests/bpf/prog_tests/ksyms_btf.c | 14 ++++ + tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c | 29 ++++++++++ + 2 files changed, 43 insertions(+) + create mode 100644 tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c + +--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c ++++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +@@ -6,6 +6,7 @@ + #include <bpf/btf.h> + #include "test_ksyms_btf.skel.h" + #include "test_ksyms_btf_null_check.skel.h" ++#include "test_ksyms_btf_write_check.skel.h" + + static int duration; + +@@ -81,6 +82,16 @@ static void test_null_check(void) + test_ksyms_btf_null_check__destroy(skel); + } + ++static void test_write_check(void) ++{ ++ struct test_ksyms_btf_write_check *skel; ++ ++ skel = test_ksyms_btf_write_check__open_and_load(); ++ CHECK(skel, "skel_open", "unexpected load of a prog writing to ksym memory\n"); ++ ++ test_ksyms_btf_write_check__destroy(skel); ++} ++ + void test_ksyms_btf(void) + { + int percpu_datasec; +@@ -106,4 +117,7 @@ void test_ksyms_btf(void) + + if (test__start_subtest("null_check")) + test_null_check(); ++ ++ if (test__start_subtest("write_check")) ++ test_write_check(); + } +--- /dev/null ++++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c +@@ -0,0 +1,29 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* Copyright (c) 2021 Google */ ++ ++#include "vmlinux.h" ++ ++#include <bpf/bpf_helpers.h> ++ ++extern const int bpf_prog_active __ksym; /* int type global var. */ ++ ++SEC("raw_tp/sys_enter") ++int handler(const void *ctx) ++{ ++ int *active; ++ __u32 cpu; ++ ++ cpu = bpf_get_smp_processor_id(); ++ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); ++ if (active) { ++ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr ++ * is read-only, should _not_ pass verification. ++ */ ++ /* WRITE_ONCE */ ++ *(volatile int *)active = -1; ++ } ++ ++ return 0; ++} ++ ++char _license[] SEC("license") = "GPL"; diff --git a/queue-5.10/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch b/queue-5.10/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch new file mode 100644 index 00000000000..9cbbf7a68aa --- /dev/null +++ b/queue-5.10/mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch @@ -0,0 +1,76 @@ +From be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 Mon Sep 17 00:00:00 2001 +From: Gavin Guo <gavinguo@igalia.com> +Date: Mon, 21 Apr 2025 19:35:36 +0800 +Subject: mm/huge_memory: fix dereferencing invalid pmd migration entry + +From: Gavin Guo <gavinguo@igalia.com> + +commit be6e843fc51a584672dfd9c4a6a24c8cb81d5fb7 upstream. + +When migrating a THP, concurrent access to the PMD migration entry during +a deferred split scan can lead to an invalid address access, as +illustrated below. To prevent this invalid access, it is necessary to +check the PMD migration entry and return early. In this context, there is +no need to use pmd_to_swp_entry and pfn_swap_entry_to_page to verify the +equality of the target folio. Since the PMD migration entry is locked, it +cannot be served as the target. + +Mailing list discussion and explanation from Hugh Dickins: "An anon_vma +lookup points to a location which may contain the folio of interest, but +might instead contain another folio: and weeding out those other folios is +precisely what the "folio != pmd_folio((*pmd)" check (and the "risk of +replacing the wrong folio" comment a few lines above it) is for." + +BUG: unable to handle page fault for address: ffffea60001db008 +CPU: 0 UID: 0 PID: 2199114 Comm: tee Not tainted 6.14.0+ #4 NONE +Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.3-debian-1.16.3-2 04/01/2014 +RIP: 0010:split_huge_pmd_locked+0x3b5/0x2b60 +Call Trace: +<TASK> +try_to_migrate_one+0x28c/0x3730 +rmap_walk_anon+0x4f6/0x770 +unmap_folio+0x196/0x1f0 +split_huge_page_to_list_to_order+0x9f6/0x1560 +deferred_split_scan+0xac5/0x12a0 +shrinker_debugfs_scan_write+0x376/0x470 +full_proxy_write+0x15c/0x220 +vfs_write+0x2fc/0xcb0 +ksys_write+0x146/0x250 +do_syscall_64+0x6a/0x120 +entry_SYSCALL_64_after_hwframe+0x76/0x7e + +The bug is found by syzkaller on an internal kernel, then confirmed on +upstream. + +Link: https://lkml.kernel.org/r/20250421113536.3682201-1-gavinguo@igalia.com +Link: https://lore.kernel.org/all/20250414072737.1698513-1-gavinguo@igalia.com/ +Link: https://lore.kernel.org/all/20250418085802.2973519-1-gavinguo@igalia.com/ +Fixes: 84c3fc4e9c56 ("mm: thp: check pmd migration entry in common path") +Signed-off-by: Gavin Guo <gavinguo@igalia.com> +Acked-by: David Hildenbrand <david@redhat.com> +Acked-by: Hugh Dickins <hughd@google.com> +Acked-by: Zi Yan <ziy@nvidia.com> +Reviewed-by: Gavin Shan <gshan@redhat.com> +Cc: Florent Revest <revest@google.com> +Cc: Matthew Wilcox (Oracle) <willy@infradead.org> +Cc: Miaohe Lin <linmiaohe@huawei.com> +Cc: <stable@vger.kernel.org> +Signed-off-by: Andrew Morton <akpm@linux-foundation.org> +[gavin: backport the migration checking logic to __split_huge_pmd] +Signed-off-by: Gavin Guo <gavinguo@igalia.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + mm/huge_memory.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/mm/huge_memory.c ++++ b/mm/huge_memory.c +@@ -2227,7 +2227,7 @@ void __split_huge_pmd(struct vm_area_str + VM_BUG_ON(freeze && !page); + if (page) { + VM_WARN_ON_ONCE(!PageLocked(page)); +- if (page != pmd_page(*pmd)) ++ if (is_pmd_migration_entry(*pmd) || page != pmd_page(*pmd)) + goto out; + } + diff --git a/queue-5.10/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch b/queue-5.10/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch new file mode 100644 index 00000000000..11557765daf --- /dev/null +++ b/queue-5.10/net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch @@ -0,0 +1,71 @@ +From stable+bounces-152669-greg=kroah.com@vger.kernel.org Sun Jun 15 19:52:10 2025 +From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Date: Sun, 15 Jun 2025 10:51:49 -0700 +Subject: net_sched: sch_sfq: annotate data-races around q->perturb_period +To: stable@vger.kernel.org +Cc: tavip@google.com, edumazet@google.com, Simon Horman <horms@kernel.org>, Jakub Kicinski <kuba@kernel.org>, Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Message-ID: <20250615175153.1610731-2-harshit.m.mogalapalli@oracle.com> + +From: Eric Dumazet <edumazet@google.com> + +[ Upstream commit a17ef9e6c2c1cf0fc6cd6ca6a9ce525c67d1da7f ] + +sfq_perturbation() reads q->perturb_period locklessly. +Add annotations to fix potential issues. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Simon Horman <horms@kernel.org> +Link: https://lore.kernel.org/r/20240430180015.3111398-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +[ Harshit: Backport to 5.10.y, conflicts resolved due to missing commit: + d636fc5dd692 ("net: sched: add rcu annotations around qdisc->qdisc_sleeping") + in 5.10.y ] +Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 13 +++++++++---- + 1 file changed, 9 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -611,6 +611,7 @@ static void sfq_perturbation(struct time + struct Qdisc *sch = q->sch; + spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); + siphash_key_t nkey; ++ int period; + + get_random_bytes(&nkey, sizeof(nkey)); + spin_lock(root_lock); +@@ -619,8 +620,12 @@ static void sfq_perturbation(struct time + sfq_rehash(sch); + spin_unlock(root_lock); + +- if (q->perturb_period) +- mod_timer(&q->perturb_timer, jiffies + q->perturb_period); ++ /* q->perturb_period can change under us from ++ * sfq_change() and sfq_destroy(). ++ */ ++ period = READ_ONCE(q->perturb_period); ++ if (period) ++ mod_timer(&q->perturb_timer, jiffies + period); + } + + static int sfq_change(struct Qdisc *sch, struct nlattr *opt) +@@ -662,7 +667,7 @@ static int sfq_change(struct Qdisc *sch, + q->quantum = ctl->quantum; + q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + } +- q->perturb_period = ctl->perturb_period * HZ; ++ WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + if (ctl->flows) + q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { +@@ -724,7 +729,7 @@ static void sfq_destroy(struct Qdisc *sc + struct sfq_sched_data *q = qdisc_priv(sch); + + tcf_block_put(q->block); +- q->perturb_period = 0; ++ WRITE_ONCE(q->perturb_period, 0); + del_timer_sync(&q->perturb_timer); + sfq_free(q->ht); + sfq_free(q->slots); diff --git a/queue-5.10/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch b/queue-5.10/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch new file mode 100644 index 00000000000..d27c32416f5 --- /dev/null +++ b/queue-5.10/net_sched-sch_sfq-don-t-allow-1-packet-limit.patch @@ -0,0 +1,115 @@ +From stable+bounces-152671-greg=kroah.com@vger.kernel.org Sun Jun 15 19:52:16 2025 +From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Date: Sun, 15 Jun 2025 10:51:51 -0700 +Subject: net_sched: sch_sfq: don't allow 1 packet limit +To: stable@vger.kernel.org +Cc: tavip@google.com, edumazet@google.com, syzbot <syzkaller@googlegroups.com>, Jakub Kicinski <kuba@kernel.org>, Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Message-ID: <20250615175153.1610731-4-harshit.m.mogalapalli@oracle.com> + +From: Octavian Purdila <tavip@google.com> + +[ Upstream commit 10685681bafce6febb39770f3387621bf5d67d0b ] + +The current implementation does not work correctly with a limit of +1. iproute2 actually checks for this and this patch adds the check in +kernel as well. + +This fixes the following syzkaller reported crash: + +UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:210:6 +index 65535 is out of range for type 'struct sfq_head[128]' +CPU: 0 PID: 2569 Comm: syz-executor101 Not tainted 5.10.0-smp-DEV #1 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/13/2024 +Call Trace: + __dump_stack lib/dump_stack.c:79 [inline] + dump_stack+0x125/0x19f lib/dump_stack.c:120 + ubsan_epilogue lib/ubsan.c:148 [inline] + __ubsan_handle_out_of_bounds+0xed/0x120 lib/ubsan.c:347 + sfq_link net/sched/sch_sfq.c:210 [inline] + sfq_dec+0x528/0x600 net/sched/sch_sfq.c:238 + sfq_dequeue+0x39b/0x9d0 net/sched/sch_sfq.c:500 + sfq_reset+0x13/0x50 net/sched/sch_sfq.c:525 + qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 + tbf_reset+0x3d/0x100 net/sched/sch_tbf.c:319 + qdisc_reset+0xfe/0x510 net/sched/sch_generic.c:1026 + dev_reset_queue+0x8c/0x140 net/sched/sch_generic.c:1296 + netdev_for_each_tx_queue include/linux/netdevice.h:2350 [inline] + dev_deactivate_many+0x6dc/0xc20 net/sched/sch_generic.c:1362 + __dev_close_many+0x214/0x350 net/core/dev.c:1468 + dev_close_many+0x207/0x510 net/core/dev.c:1506 + unregister_netdevice_many+0x40f/0x16b0 net/core/dev.c:10738 + unregister_netdevice_queue+0x2be/0x310 net/core/dev.c:10695 + unregister_netdevice include/linux/netdevice.h:2893 [inline] + __tun_detach+0x6b6/0x1600 drivers/net/tun.c:689 + tun_detach drivers/net/tun.c:705 [inline] + tun_chr_close+0x104/0x1b0 drivers/net/tun.c:3640 + __fput+0x203/0x840 fs/file_table.c:280 + task_work_run+0x129/0x1b0 kernel/task_work.c:185 + exit_task_work include/linux/task_work.h:33 [inline] + do_exit+0x5ce/0x2200 kernel/exit.c:931 + do_group_exit+0x144/0x310 kernel/exit.c:1046 + __do_sys_exit_group kernel/exit.c:1057 [inline] + __se_sys_exit_group kernel/exit.c:1055 [inline] + __x64_sys_exit_group+0x3b/0x40 kernel/exit.c:1055 + do_syscall_64+0x6c/0xd0 + entry_SYSCALL_64_after_hwframe+0x61/0xcb +RIP: 0033:0x7fe5e7b52479 +Code: Unable to access opcode bytes at RIP 0x7fe5e7b5244f. +RSP: 002b:00007ffd3c800398 EFLAGS: 00000246 ORIG_RAX: 00000000000000e7 +RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fe5e7b52479 +RDX: 000000000000003c RSI: 00000000000000e7 RDI: 0000000000000000 +RBP: 00007fe5e7bcd2d0 R08: ffffffffffffffb8 R09: 0000000000000014 +R10: 0000000000000000 R11: 0000000000000246 R12: 00007fe5e7bcd2d0 +R13: 0000000000000000 R14: 00007fe5e7bcdd20 R15: 00007fe5e7b24270 + +The crash can be also be reproduced with the following (with a tc +recompiled to allow for sfq limits of 1): + +tc qdisc add dev dummy0 handle 1: root tbf rate 1Kbit burst 100b lat 1s +../iproute2-6.9.0/tc/tc qdisc add dev dummy0 handle 2: parent 1:10 sfq limit 1 +ifconfig dummy0 up +ping -I dummy0 -f -c2 -W0.1 8.8.8.8 +sleep 1 + +Scenario that triggers the crash: + +* the first packet is sent and queued in TBF and SFQ; qdisc qlen is 1 + +* TBF dequeues: it peeks from SFQ which moves the packet to the + gso_skb list and keeps qdisc qlen set to 1. TBF is out of tokens so + it schedules itself for later. + +* the second packet is sent and TBF tries to queues it to SFQ. qdisc + qlen is now 2 and because the SFQ limit is 1 the packet is dropped + by SFQ. At this point qlen is 1, and all of the SFQ slots are empty, + however q->tail is not NULL. + +At this point, assuming no more packets are queued, when sch_dequeue +runs again it will decrement the qlen for the current empty slot +causing an underflow and the subsequent out of bounds access. + +Reported-by: syzbot <syzkaller@googlegroups.com> +Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") +Signed-off-by: Octavian Purdila <tavip@google.com> +Reviewed-by: Eric Dumazet <edumazet@google.com> +Link: https://patch.msgid.link/20241204030520.2084663-2-tavip@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 4 ++++ + 1 file changed, 4 insertions(+) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -652,6 +652,10 @@ static int sfq_change(struct Qdisc *sch, + if (!p) + return -ENOMEM; + } ++ if (ctl->limit == 1) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid limit"); ++ return -EINVAL; ++ } + sch_tree_lock(sch); + if (ctl->quantum) + q->quantum = ctl->quantum; diff --git a/queue-5.10/net_sched-sch_sfq-handle-bigger-packets.patch b/queue-5.10/net_sched-sch_sfq-handle-bigger-packets.patch new file mode 100644 index 00000000000..a7a0af423b5 --- /dev/null +++ b/queue-5.10/net_sched-sch_sfq-handle-bigger-packets.patch @@ -0,0 +1,162 @@ +From stable+bounces-152670-greg=kroah.com@vger.kernel.org Sun Jun 15 19:52:12 2025 +From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Date: Sun, 15 Jun 2025 10:51:50 -0700 +Subject: net_sched: sch_sfq: handle bigger packets +To: stable@vger.kernel.org +Cc: tavip@google.com, edumazet@google.com, "Toke H�iland-J�rgensen" <toke@redhat.com>, "Jakub Kicinski" <kuba@kernel.org>, "Harshit Mogalapalli" <harshit.m.mogalapalli@oracle.com> +Message-ID: <20250615175153.1610731-3-harshit.m.mogalapalli@oracle.com> + +From: Eric Dumazet <edumazet@google.com> + +[ Upstream commit e4650d7ae4252f67e997a632adfae0dd74d3a99a ] + +SFQ has an assumption on dealing with packets smaller than 64KB. + +Even before BIG TCP, TCA_STAB can provide arbitrary big values +in qdisc_pkt_len(skb) + +It is time to switch (struct sfq_slot)->allot to a 32bit field. + +sizeof(struct sfq_slot) is now 64 bytes, giving better cache locality. + +Signed-off-by: Eric Dumazet <edumazet@google.com> +Reviewed-by: Toke H�iland-J�rgensen <toke@redhat.com> +Link: https://patch.msgid.link/20241008111603.653140-1-edumazet@google.com +Signed-off-by: Jakub Kicinski <kuba@kernel.org> +Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 39 +++++++++++++-------------------------- + 1 file changed, 13 insertions(+), 26 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -77,12 +77,6 @@ + #define SFQ_EMPTY_SLOT 0xffff + #define SFQ_DEFAULT_HASH_DIVISOR 1024 + +-/* We use 16 bits to store allot, and want to handle packets up to 64K +- * Scale allot by 8 (1<<3) so that no overflow occurs. +- */ +-#define SFQ_ALLOT_SHIFT 3 +-#define SFQ_ALLOT_SIZE(X) DIV_ROUND_UP(X, 1 << SFQ_ALLOT_SHIFT) +- + /* This type should contain at least SFQ_MAX_DEPTH + 1 + SFQ_MAX_FLOWS values */ + typedef u16 sfq_index; + +@@ -104,7 +98,7 @@ struct sfq_slot { + sfq_index next; /* next slot in sfq RR chain */ + struct sfq_head dep; /* anchor in dep[] chains */ + unsigned short hash; /* hash value (index in ht[]) */ +- short allot; /* credit for this slot */ ++ int allot; /* credit for this slot */ + + unsigned int backlog; + struct red_vars vars; +@@ -120,7 +114,6 @@ struct sfq_sched_data { + siphash_key_t perturbation; + u8 cur_depth; /* depth of longest slot */ + u8 flags; +- unsigned short scaled_quantum; /* SFQ_ALLOT_SIZE(quantum) */ + struct tcf_proto __rcu *filter_list; + struct tcf_block *block; + sfq_index *ht; /* Hash table ('divisor' slots) */ +@@ -459,7 +452,7 @@ enqueue: + */ + q->tail = slot; + /* We could use a bigger initial quantum for new flows */ +- slot->allot = q->scaled_quantum; ++ slot->allot = q->quantum; + } + if (++sch->q.qlen <= q->limit) + return NET_XMIT_SUCCESS; +@@ -496,7 +489,7 @@ next_slot: + slot = &q->slots[a]; + if (slot->allot <= 0) { + q->tail = slot; +- slot->allot += q->scaled_quantum; ++ slot->allot += q->quantum; + goto next_slot; + } + skb = slot_dequeue_head(slot); +@@ -515,7 +508,7 @@ next_slot: + } + q->tail->next = next_a; + } else { +- slot->allot -= SFQ_ALLOT_SIZE(qdisc_pkt_len(skb)); ++ slot->allot -= qdisc_pkt_len(skb); + } + return skb; + } +@@ -598,7 +591,7 @@ drop: + q->tail->next = x; + } + q->tail = slot; +- slot->allot = q->scaled_quantum; ++ slot->allot = q->quantum; + } + } + sch->q.qlen -= dropped; +@@ -628,7 +621,8 @@ static void sfq_perturbation(struct time + mod_timer(&q->perturb_timer, jiffies + period); + } + +-static int sfq_change(struct Qdisc *sch, struct nlattr *opt) ++static int sfq_change(struct Qdisc *sch, struct nlattr *opt, ++ struct netlink_ext_ack *extack) + { + struct sfq_sched_data *q = qdisc_priv(sch); + struct tc_sfq_qopt *ctl = nla_data(opt); +@@ -646,14 +640,10 @@ static int sfq_change(struct Qdisc *sch, + (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) + return -EINVAL; + +- /* slot->allot is a short, make sure quantum is not too big. */ +- if (ctl->quantum) { +- unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); +- +- if (scaled <= 0 || scaled > SHRT_MAX) +- return -EINVAL; ++ if ((int)ctl->quantum < 0) { ++ NL_SET_ERR_MSG_MOD(extack, "invalid quantum"); ++ return -EINVAL; + } +- + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, ctl_v1->Scell_log, NULL)) + return -EINVAL; +@@ -663,10 +653,8 @@ static int sfq_change(struct Qdisc *sch, + return -ENOMEM; + } + sch_tree_lock(sch); +- if (ctl->quantum) { ++ if (ctl->quantum) + q->quantum = ctl->quantum; +- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); +- } + WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); + if (ctl->flows) + q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); +@@ -762,12 +750,11 @@ static int sfq_init(struct Qdisc *sch, s + q->divisor = SFQ_DEFAULT_HASH_DIVISOR; + q->maxflows = SFQ_DEFAULT_FLOWS; + q->quantum = psched_mtu(qdisc_dev(sch)); +- q->scaled_quantum = SFQ_ALLOT_SIZE(q->quantum); + q->perturb_period = 0; + get_random_bytes(&q->perturbation, sizeof(q->perturbation)); + + if (opt) { +- int err = sfq_change(sch, opt); ++ int err = sfq_change(sch, opt, extack); + if (err) + return err; + } +@@ -878,7 +865,7 @@ static int sfq_dump_class_stats(struct Q + if (idx != SFQ_EMPTY_SLOT) { + const struct sfq_slot *slot = &q->slots[idx]; + +- xstats.allot = slot->allot << SFQ_ALLOT_SHIFT; ++ xstats.allot = slot->allot; + qs.qlen = slot->qlen; + qs.backlog = slot->backlog; + } diff --git a/queue-5.10/net_sched-sch_sfq-move-the-limit-validation.patch b/queue-5.10/net_sched-sch_sfq-move-the-limit-validation.patch new file mode 100644 index 00000000000..81d6166f7dc --- /dev/null +++ b/queue-5.10/net_sched-sch_sfq-move-the-limit-validation.patch @@ -0,0 +1,84 @@ +From stable+bounces-152673-greg=kroah.com@vger.kernel.org Sun Jun 15 19:52:16 2025 +From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Date: Sun, 15 Jun 2025 10:51:53 -0700 +Subject: net_sched: sch_sfq: move the limit validation +To: stable@vger.kernel.org +Cc: tavip@google.com, edumazet@google.com, syzbot <syzkaller@googlegroups.com>, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net>, Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Message-ID: <20250615175153.1610731-6-harshit.m.mogalapalli@oracle.com> + +From: Octavian Purdila <tavip@google.com> + +[ Upstream commit b3bf8f63e6179076b57c9de660c9f80b5abefe70 ] + +It is not sufficient to directly validate the limit on the data that +the user passes as it can be updated based on how the other parameters +are changed. + +Move the check at the end of the configuration update process to also +catch scenarios where the limit is indirectly updated, for example +with the following configurations: + +tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 depth 1 +tc qdisc add dev dummy0 handle 1: root sfq limit 2 flows 1 divisor 1 + +This fixes the following syzkaller reported crash: + +------------[ cut here ]------------ +UBSAN: array-index-out-of-bounds in net/sched/sch_sfq.c:203:6 +index 65535 is out of range for type 'struct sfq_head[128]' +CPU: 1 UID: 0 PID: 3037 Comm: syz.2.16 Not tainted 6.14.0-rc2-syzkaller #0 +Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 12/27/2024 +Call Trace: + <TASK> + __dump_stack lib/dump_stack.c:94 [inline] + dump_stack_lvl+0x201/0x300 lib/dump_stack.c:120 + ubsan_epilogue lib/ubsan.c:231 [inline] + __ubsan_handle_out_of_bounds+0xf5/0x120 lib/ubsan.c:429 + sfq_link net/sched/sch_sfq.c:203 [inline] + sfq_dec+0x53c/0x610 net/sched/sch_sfq.c:231 + sfq_dequeue+0x34e/0x8c0 net/sched/sch_sfq.c:493 + sfq_reset+0x17/0x60 net/sched/sch_sfq.c:518 + qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 + tbf_reset+0x41/0x110 net/sched/sch_tbf.c:339 + qdisc_reset+0x12e/0x600 net/sched/sch_generic.c:1035 + dev_reset_queue+0x100/0x1b0 net/sched/sch_generic.c:1311 + netdev_for_each_tx_queue include/linux/netdevice.h:2590 [inline] + dev_deactivate_many+0x7e5/0xe70 net/sched/sch_generic.c:1375 + +Reported-by: syzbot <syzkaller@googlegroups.com> +Fixes: 10685681bafc ("net_sched: sch_sfq: don't allow 1 packet limit") +Signed-off-by: Octavian Purdila <tavip@google.com> +Acked-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 10 ++++++---- + 1 file changed, 6 insertions(+), 4 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -661,10 +661,6 @@ static int sfq_change(struct Qdisc *sch, + if (!p) + return -ENOMEM; + } +- if (ctl->limit == 1) { +- NL_SET_ERR_MSG_MOD(extack, "invalid limit"); +- return -EINVAL; +- } + + sch_tree_lock(sch); + +@@ -705,6 +701,12 @@ static int sfq_change(struct Qdisc *sch, + limit = min_t(u32, ctl->limit, maxdepth * maxflows); + maxflows = min_t(u32, maxflows, limit); + } ++ if (limit == 1) { ++ sch_tree_unlock(sch); ++ kfree(p); ++ NL_SET_ERR_MSG_MOD(extack, "invalid limit"); ++ return -EINVAL; ++ } + + /* commit configuration */ + q->limit = limit; diff --git a/queue-5.10/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch b/queue-5.10/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch new file mode 100644 index 00000000000..af978673dd6 --- /dev/null +++ b/queue-5.10/net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch @@ -0,0 +1,121 @@ +From stable+bounces-152672-greg=kroah.com@vger.kernel.org Sun Jun 15 19:52:15 2025 +From: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Date: Sun, 15 Jun 2025 10:51:52 -0700 +Subject: net_sched: sch_sfq: use a temporary work area for validating configuration +To: stable@vger.kernel.org +Cc: tavip@google.com, edumazet@google.com, Cong Wang <xiyou.wangcong@gmail.com>, "David S. Miller" <davem@davemloft.net>, Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Message-ID: <20250615175153.1610731-5-harshit.m.mogalapalli@oracle.com> + +From: Octavian Purdila <tavip@google.com> + +[ Upstream commit 8c0cea59d40cf6dd13c2950437631dd614fbade6 ] + +Many configuration parameters have influence on others (e.g. divisor +-> flows -> limit, depth -> limit) and so it is difficult to correctly +do all of the validation before applying the configuration. And if a +validation error is detected late it is difficult to roll back a +partially applied configuration. + +To avoid these issues use a temporary work area to update and validate +the configuration and only then apply the configuration to the +internal state. + +Signed-off-by: Octavian Purdila <tavip@google.com> +Acked-by: Cong Wang <xiyou.wangcong@gmail.com> +Signed-off-by: David S. Miller <davem@davemloft.net> +Signed-off-by: Harshit Mogalapalli <harshit.m.mogalapalli@oracle.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + net/sched/sch_sfq.c | 56 ++++++++++++++++++++++++++++++++++++++++------------ + 1 file changed, 44 insertions(+), 12 deletions(-) + +--- a/net/sched/sch_sfq.c ++++ b/net/sched/sch_sfq.c +@@ -631,6 +631,15 @@ static int sfq_change(struct Qdisc *sch, + struct red_parms *p = NULL; + struct sk_buff *to_free = NULL; + struct sk_buff *tail = NULL; ++ unsigned int maxflows; ++ unsigned int quantum; ++ unsigned int divisor; ++ int perturb_period; ++ u8 headdrop; ++ u8 maxdepth; ++ int limit; ++ u8 flags; ++ + + if (opt->nla_len < nla_attr_size(sizeof(*ctl))) + return -EINVAL; +@@ -656,36 +665,59 @@ static int sfq_change(struct Qdisc *sch, + NL_SET_ERR_MSG_MOD(extack, "invalid limit"); + return -EINVAL; + } ++ + sch_tree_lock(sch); ++ ++ limit = q->limit; ++ divisor = q->divisor; ++ headdrop = q->headdrop; ++ maxdepth = q->maxdepth; ++ maxflows = q->maxflows; ++ perturb_period = q->perturb_period; ++ quantum = q->quantum; ++ flags = q->flags; ++ ++ /* update and validate configuration */ + if (ctl->quantum) +- q->quantum = ctl->quantum; +- WRITE_ONCE(q->perturb_period, ctl->perturb_period * HZ); ++ quantum = ctl->quantum; ++ perturb_period = ctl->perturb_period * HZ; + if (ctl->flows) +- q->maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); ++ maxflows = min_t(u32, ctl->flows, SFQ_MAX_FLOWS); + if (ctl->divisor) { +- q->divisor = ctl->divisor; +- q->maxflows = min_t(u32, q->maxflows, q->divisor); ++ divisor = ctl->divisor; ++ maxflows = min_t(u32, maxflows, divisor); + } + if (ctl_v1) { + if (ctl_v1->depth) +- q->maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); ++ maxdepth = min_t(u32, ctl_v1->depth, SFQ_MAX_DEPTH); + if (p) { +- swap(q->red_parms, p); +- red_set_parms(q->red_parms, ++ red_set_parms(p, + ctl_v1->qth_min, ctl_v1->qth_max, + ctl_v1->Wlog, + ctl_v1->Plog, ctl_v1->Scell_log, + NULL, + ctl_v1->max_P); + } +- q->flags = ctl_v1->flags; +- q->headdrop = ctl_v1->headdrop; ++ flags = ctl_v1->flags; ++ headdrop = ctl_v1->headdrop; + } + if (ctl->limit) { +- q->limit = min_t(u32, ctl->limit, q->maxdepth * q->maxflows); +- q->maxflows = min_t(u32, q->maxflows, q->limit); ++ limit = min_t(u32, ctl->limit, maxdepth * maxflows); ++ maxflows = min_t(u32, maxflows, limit); + } + ++ /* commit configuration */ ++ q->limit = limit; ++ q->divisor = divisor; ++ q->headdrop = headdrop; ++ q->maxdepth = maxdepth; ++ q->maxflows = maxflows; ++ WRITE_ONCE(q->perturb_period, perturb_period); ++ q->quantum = quantum; ++ q->flags = flags; ++ if (p) ++ swap(q->red_parms, p); ++ + qlen = sch->q.qlen; + while (sch->q.qlen > q->limit) { + dropped += sfq_drop(sch, &to_free); diff --git a/queue-5.10/rtc-improve-performance-of-rtc_time64_to_tm-.-add-tests.patch b/queue-5.10/rtc-improve-performance-of-rtc_time64_to_tm-.-add-tests.patch new file mode 100644 index 00000000000..117d905f0bd --- /dev/null +++ b/queue-5.10/rtc-improve-performance-of-rtc_time64_to_tm-.-add-tests.patch @@ -0,0 +1,288 @@ +From 1d1bb12a8b1805ddeef9793ebeb920179fb0fa38 Mon Sep 17 00:00:00 2001 +From: Cassio Neri <cassio.neri@gmail.com> +Date: Thu, 24 Jun 2021 21:13:43 +0100 +Subject: rtc: Improve performance of rtc_time64_to_tm(). Add tests. + +From: Cassio Neri <cassio.neri@gmail.com> + +commit 1d1bb12a8b1805ddeef9793ebeb920179fb0fa38 upstream. + +The current implementation of rtc_time64_to_tm() contains unnecessary +loops, branches and look-up tables. The new one uses an arithmetic-based +algorithm appeared in [1] and is approximately 4.3 times faster (YMMV). + +The drawback is that the new code isn't intuitive and contains many 'magic +numbers' (not unusual for this type of algorithm). However, [1] justifies +all those numbers and, given this function's history, the code is unlikely +to need much maintenance, if any at all. + +Add a KUnit test case that checks every day in a 160,000 years interval +starting on 1970-01-01 against the expected result. Add a new config +RTC_LIB_KUNIT_TEST symbol to give the option to run this test suite. + +[1] Neri, Schneider, "Euclidean Affine Functions and Applications to +Calendar Algorithms". https://arxiv.org/abs/2102.06959 + +Signed-off-by: Cassio Neri <cassio.neri@gmail.com> +Reported-by: kernel test robot <lkp@intel.com> +Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> +Link: https://lore.kernel.org/r/20210624201343.85441-1-cassio.neri@gmail.com +Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/rtc/Kconfig | 10 ++++ + drivers/rtc/Makefile | 1 + drivers/rtc/lib.c | 107 ++++++++++++++++++++++++++++++++++++------------- + drivers/rtc/lib_test.c | 79 ++++++++++++++++++++++++++++++++++++ + 4 files changed, 170 insertions(+), 27 deletions(-) + create mode 100644 drivers/rtc/lib_test.c + +--- a/drivers/rtc/Kconfig ++++ b/drivers/rtc/Kconfig +@@ -10,6 +10,16 @@ config RTC_MC146818_LIB + bool + select RTC_LIB + ++config RTC_LIB_KUNIT_TEST ++ tristate "KUnit test for RTC lib functions" if !KUNIT_ALL_TESTS ++ depends on KUNIT ++ default KUNIT_ALL_TESTS ++ select RTC_LIB ++ help ++ Enable this option to test RTC library functions. ++ ++ If unsure, say N. ++ + menuconfig RTC_CLASS + bool "Real Time Clock" + default n +--- a/drivers/rtc/Makefile ++++ b/drivers/rtc/Makefile +@@ -183,3 +183,4 @@ obj-$(CONFIG_RTC_DRV_WM8350) += rtc-wm83 + obj-$(CONFIG_RTC_DRV_X1205) += rtc-x1205.o + obj-$(CONFIG_RTC_DRV_XGENE) += rtc-xgene.o + obj-$(CONFIG_RTC_DRV_ZYNQMP) += rtc-zynqmp.o ++obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o +--- a/drivers/rtc/lib.c ++++ b/drivers/rtc/lib.c +@@ -6,6 +6,8 @@ + * Author: Alessandro Zummo <a.zummo@towertech.it> + * + * based on arch/arm/common/rtctime.c and other bits ++ * ++ * Author: Cassio Neri <cassio.neri@gmail.com> (rtc_time64_to_tm) + */ + + #include <linux/export.h> +@@ -22,8 +24,6 @@ static const unsigned short rtc_ydays[2] + { 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 } + }; + +-#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400) +- + /* + * The number of days in the month. + */ +@@ -42,42 +42,95 @@ int rtc_year_days(unsigned int day, unsi + } + EXPORT_SYMBOL(rtc_year_days); + +-/* +- * rtc_time64_to_tm - Converts time64_t to rtc_time. +- * Convert seconds since 01-01-1970 00:00:00 to Gregorian date. ++/** ++ * rtc_time64_to_tm - converts time64_t to rtc_time. ++ * ++ * @time: The number of seconds since 01-01-1970 00:00:00. ++ * (Must be positive.) ++ * @tm: Pointer to the struct rtc_time. + */ + void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) + { +- unsigned int month, year, secs; ++ unsigned int secs; + int days; + ++ u64 u64tmp; ++ u32 u32tmp, udays, century, day_of_century, year_of_century, year, ++ day_of_year, month, day; ++ bool is_Jan_or_Feb, is_leap_year; ++ + /* time must be positive */ + days = div_s64_rem(time, 86400, &secs); + + /* day of the week, 1970-01-01 was a Thursday */ + tm->tm_wday = (days + 4) % 7; + +- year = 1970 + days / 365; +- days -= (year - 1970) * 365 +- + LEAPS_THRU_END_OF(year - 1) +- - LEAPS_THRU_END_OF(1970 - 1); +- while (days < 0) { +- year -= 1; +- days += 365 + is_leap_year(year); +- } +- tm->tm_year = year - 1900; +- tm->tm_yday = days + 1; +- +- for (month = 0; month < 11; month++) { +- int newdays; +- +- newdays = days - rtc_month_days(month, year); +- if (newdays < 0) +- break; +- days = newdays; +- } +- tm->tm_mon = month; +- tm->tm_mday = days + 1; ++ /* ++ * The following algorithm is, basically, Proposition 6.3 of Neri ++ * and Schneider [1]. In a few words: it works on the computational ++ * (fictitious) calendar where the year starts in March, month = 2 ++ * (*), and finishes in February, month = 13. This calendar is ++ * mathematically convenient because the day of the year does not ++ * depend on whether the year is leap or not. For instance: ++ * ++ * March 1st 0-th day of the year; ++ * ... ++ * April 1st 31-st day of the year; ++ * ... ++ * January 1st 306-th day of the year; (Important!) ++ * ... ++ * February 28th 364-th day of the year; ++ * February 29th 365-th day of the year (if it exists). ++ * ++ * After having worked out the date in the computational calendar ++ * (using just arithmetics) it's easy to convert it to the ++ * corresponding date in the Gregorian calendar. ++ * ++ * [1] "Euclidean Affine Functions and Applications to Calendar ++ * Algorithms". https://arxiv.org/abs/2102.06959 ++ * ++ * (*) The numbering of months follows rtc_time more closely and ++ * thus, is slightly different from [1]. ++ */ ++ ++ udays = ((u32) days) + 719468; ++ ++ u32tmp = 4 * udays + 3; ++ century = u32tmp / 146097; ++ day_of_century = u32tmp % 146097 / 4; ++ ++ u32tmp = 4 * day_of_century + 3; ++ u64tmp = 2939745ULL * u32tmp; ++ year_of_century = upper_32_bits(u64tmp); ++ day_of_year = lower_32_bits(u64tmp) / 2939745 / 4; ++ ++ year = 100 * century + year_of_century; ++ is_leap_year = year_of_century != 0 ? ++ year_of_century % 4 == 0 : century % 4 == 0; ++ ++ u32tmp = 2141 * day_of_year + 132377; ++ month = u32tmp >> 16; ++ day = ((u16) u32tmp) / 2141; ++ ++ /* ++ * Recall that January 01 is the 306-th day of the year in the ++ * computational (not Gregorian) calendar. ++ */ ++ is_Jan_or_Feb = day_of_year >= 306; ++ ++ /* Converts to the Gregorian calendar. */ ++ year = year + is_Jan_or_Feb; ++ month = is_Jan_or_Feb ? month - 12 : month; ++ day = day + 1; ++ ++ day_of_year = is_Jan_or_Feb ? ++ day_of_year - 306 : day_of_year + 31 + 28 + is_leap_year; ++ ++ /* Converts to rtc_time's format. */ ++ tm->tm_year = (int) (year - 1900); ++ tm->tm_mon = (int) month; ++ tm->tm_mday = (int) day; ++ tm->tm_yday = (int) day_of_year + 1; + + tm->tm_hour = secs / 3600; + secs -= tm->tm_hour * 3600; +--- /dev/null ++++ b/drivers/rtc/lib_test.c +@@ -0,0 +1,79 @@ ++// SPDX-License-Identifier: LGPL-2.1+ ++ ++#include <kunit/test.h> ++#include <linux/rtc.h> ++ ++/* ++ * Advance a date by one day. ++ */ ++static void advance_date(int *year, int *month, int *mday, int *yday) ++{ ++ if (*mday != rtc_month_days(*month - 1, *year)) { ++ ++*mday; ++ ++*yday; ++ return; ++ } ++ ++ *mday = 1; ++ if (*month != 12) { ++ ++*month; ++ ++*yday; ++ return; ++ } ++ ++ *month = 1; ++ *yday = 1; ++ ++*year; ++} ++ ++/* ++ * Checks every day in a 160000 years interval starting on 1970-01-01 ++ * against the expected result. ++ */ ++static void rtc_time64_to_tm_test_date_range(struct kunit *test) ++{ ++ /* ++ * 160000 years = (160000 / 400) * 400 years ++ * = (160000 / 400) * 146097 days ++ * = (160000 / 400) * 146097 * 86400 seconds ++ */ ++ time64_t total_secs = ((time64_t) 160000) / 400 * 146097 * 86400; ++ ++ int year = 1970; ++ int month = 1; ++ int mday = 1; ++ int yday = 1; ++ ++ struct rtc_time result; ++ time64_t secs; ++ s64 days; ++ ++ for (secs = 0; secs <= total_secs; secs += 86400) { ++ ++ rtc_time64_to_tm(secs, &result); ++ ++ days = div_s64(secs, 86400); ++ ++ #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \ ++ year, month, mday, yday, days ++ ++ KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG); ++ KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG); ++ KUNIT_ASSERT_EQ_MSG(test, mday, result.tm_mday, FAIL_MSG); ++ KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG); ++ ++ advance_date(&year, &month, &mday, &yday); ++ } ++} ++ ++static struct kunit_case rtc_lib_test_cases[] = { ++ KUNIT_CASE(rtc_time64_to_tm_test_date_range), ++ {} ++}; ++ ++static struct kunit_suite rtc_lib_test_suite = { ++ .name = "rtc_lib_test_cases", ++ .test_cases = rtc_lib_test_cases, ++}; ++ ++kunit_test_suite(rtc_lib_test_suite); diff --git a/queue-5.10/rtc-make-rtc_time64_to_tm-support-dates-before-1970.patch b/queue-5.10/rtc-make-rtc_time64_to_tm-support-dates-before-1970.patch new file mode 100644 index 00000000000..b4a42d19a94 --- /dev/null +++ b/queue-5.10/rtc-make-rtc_time64_to_tm-support-dates-before-1970.patch @@ -0,0 +1,85 @@ +From 7df4cfef8b351fec3156160bedfc7d6d29de4cce Mon Sep 17 00:00:00 2001 +From: Alexandre Mergnat <amergnat@baylibre.com> +Date: Mon, 28 Apr 2025 12:06:47 +0200 +Subject: rtc: Make rtc_time64_to_tm() support dates before 1970 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +From: Alexandre Mergnat <amergnat@baylibre.com> + +commit 7df4cfef8b351fec3156160bedfc7d6d29de4cce upstream. + +Conversion of dates before 1970 is still relevant today because these +dates are reused on some hardwares to store dates bigger than the +maximal date that is representable in the device's native format. +This prominently and very soon affects the hardware covered by the +rtc-mt6397 driver that can only natively store dates in the interval +1900-01-01 up to 2027-12-31. So to store the date 2028-01-01 00:00:00 +to such a device, rtc_time64_to_tm() must do the right thing for +time=-2208988800. + +Signed-off-by: Alexandre Mergnat <amergnat@baylibre.com> +Reviewed-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com> +Link: https://lore.kernel.org/r/20250428-enable-rtc-v4-1-2b2f7e3f9349@baylibre.com +Signed-off-by: Alexandre Belloni <alexandre.belloni@bootlin.com> +Signed-off-by: Uwe Kleine-König <u.kleine-koenig@baylibre.com> +Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> +--- + drivers/rtc/lib.c | 24 +++++++++++++++++++----- + 1 file changed, 19 insertions(+), 5 deletions(-) + +--- a/drivers/rtc/lib.c ++++ b/drivers/rtc/lib.c +@@ -46,24 +46,38 @@ EXPORT_SYMBOL(rtc_year_days); + * rtc_time64_to_tm - converts time64_t to rtc_time. + * + * @time: The number of seconds since 01-01-1970 00:00:00. +- * (Must be positive.) ++ * Works for values since at least 1900 + * @tm: Pointer to the struct rtc_time. + */ + void rtc_time64_to_tm(time64_t time, struct rtc_time *tm) + { +- unsigned int secs; +- int days; ++ int days, secs; + + u64 u64tmp; + u32 u32tmp, udays, century, day_of_century, year_of_century, year, + day_of_year, month, day; + bool is_Jan_or_Feb, is_leap_year; + +- /* time must be positive */ ++ /* ++ * Get days and seconds while preserving the sign to ++ * handle negative time values (dates before 1970-01-01) ++ */ + days = div_s64_rem(time, 86400, &secs); + ++ /* ++ * We need 0 <= secs < 86400 which isn't given for negative ++ * values of time. Fixup accordingly. ++ */ ++ if (secs < 0) { ++ days -= 1; ++ secs += 86400; ++ } ++ + /* day of the week, 1970-01-01 was a Thursday */ + tm->tm_wday = (days + 4) % 7; ++ /* Ensure tm_wday is always positive */ ++ if (tm->tm_wday < 0) ++ tm->tm_wday += 7; + + /* + * The following algorithm is, basically, Proposition 6.3 of Neri +@@ -93,7 +107,7 @@ void rtc_time64_to_tm(time64_t time, str + * thus, is slightly different from [1]. + */ + +- udays = ((u32) days) + 719468; ++ udays = days + 719468; + + u32tmp = 4 * udays + 3; + century = u32tmp / 146097; diff --git a/queue-5.10/series b/queue-5.10/series index d1a56357ae6..39cf2a26fa9 100644 --- a/queue-5.10/series +++ b/queue-5.10/series @@ -337,3 +337,19 @@ arm64-proton-pack-add-new-cpus-k-values-for-branch-mitigation.patch net-ipv4-fix-type-mismatch-in-inet_ehash_locks_alloc-causing-build-failure.patch net-fix-checksum-update-for-ila-adj-transport.patch bpf-fix-l4-csum-update-on-ipv6-in-checksum_complete.patch +rtc-improve-performance-of-rtc_time64_to_tm-.-add-tests.patch +rtc-make-rtc_time64_to_tm-support-dates-before-1970.patch +net_sched-sch_sfq-annotate-data-races-around-q-perturb_period.patch +net_sched-sch_sfq-handle-bigger-packets.patch +net_sched-sch_sfq-don-t-allow-1-packet-limit.patch +net_sched-sch_sfq-use-a-temporary-work-area-for-validating-configuration.patch +net_sched-sch_sfq-move-the-limit-validation.patch +bpf-introduce-composable-reg-ret-and-arg-types.patch +bpf-replace-arg_xxx_or_null-with-arg_xxx-ptr_maybe_null.patch +bpf-replace-ret_xxx_or_null-with-ret_xxx-ptr_maybe_null.patch +bpf-replace-ptr_to_xxx_or_null-with-ptr_to_xxx-ptr_maybe_null.patch +bpf-introduce-mem_rdonly-flag.patch +bpf-make-per_cpu_ptr-return-rdonly-ptr_to_mem.patch +bpf-add-mem_rdonly-for-helper-args-that-are-pointers-to-rdonly-mem.patch +bpf-selftests-test-ptr_to_rdonly_mem.patch +mm-huge_memory-fix-dereferencing-invalid-pmd-migration-entry.patch |