diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 09:18:14 +0100 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2026-06-17 09:18:14 +0100 |
| commit | 9c87e61e3c5797277407ba5eae4eac8a52be3fa3 (patch) | |
| tree | e3f902cb5363b5b90ab74a4b7e26fafbc15aaeaf /net | |
| parent | b85966adbf5de0668a815c6e3527f87e0c387fb4 (diff) | |
| parent | e4287bf34f97a88c7d9322f5bde828724c073a6b (diff) | |
| download | ath-9c87e61e3c5797277407ba5eae4eac8a52be3fa3.tar.gz | |
Merge tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov:
"Major changes:
- Recover from BPF arena page faults using a scratch page and add
ptep_try_set() for lockless empty-slot installs on x86 and arm64.
This allows BPF kfuncs to access arena pointers directly.
The 'arena_direct_access' stable branch was created for this work
and was pulled into sched-ext and bpf-next trees (Tejun Heo, Kumar
Kartikeya Dwivedi)
- Lift old restriction and support 6+ arguments in BPF programs and
kfuncs on x86 and arm64 (Yonghong Song, Puranjay Mohan)
Other features and fixes:
- Add 24-bit BTF vlen and reclaim unused bits in the BTF UAPI to ease
addition of new BTF kinds (Alan Maguire)
- Raise the maximum BPF call chain depth from 8 to 16 frames (Alexei
Starovoitov)
- Refactor object relationship tracking in the verifier and fix a
dynptr use-after-free bug (Amery Hung)
- Harden the signed program loader and reject exclusive maps as inner
maps (Daniel Borkmann)
- Replace the verifier min/max bounds fields with a circular number
(cnum) representation and improve 32->64 bit range refinements
(Eduard Zingerman)
- Introduce the arena library and runtime (libarena) with a buddy
allocator, rbtree and SPMC queue data structures, ASAN support and
a parallel test harness. Allow subprograms to return arena pointers
and switch to a BTF type-tag based __arena annotation (Emil
Tsalapatis)
- Cache build IDs in the sleepable stackmap path and avoid faultable
build ID reads under mm locks (Ihor Solodrai)
- Introduce the tracing_multi link to attach a single BPF program to
many kernel functions at once. Allow specifying the uprobe_multi
target via FD (Jiri Olsa)
- Extend the bpf_list family of kfuncs with bpf_list_add/del(), and
bpf_list_is_first/is_last/empty() (Kaitao Cheng)
- Extend the BPF syscall with common attributes support for
prog_load, btf_load and map_create (Leon Hwang)
- Wrap rhashtable as BPF map (Mykyta Yatsenko, Herbert Xu)
- Add sleepable support for tracepoint programs and fix deadlocks in
LRU map due to NMI reentry (Mykyta Yatsenko)
- Fix OOB access in bpf_flow_keys, fix nullness analysis of inner
arrays, enforce write checks for global subprograms (Nuoqi Gui)
- Report the maximum combined stack depth and print a breakdown of
instructions processed per subprogram (Paul Chaignon)
- Add an XDP load-balancer benchmark and arm64 JIT support for stack
arguments (Puranjay Mohan)
- Add kfuncs to traverse over wakeup_sources (Samuel Wu)
- Allow sleepable BPF programs to use LPM trie maps directly (Vlad
Poenaru)
- Many more fixes and cleanups across the verifier, BTF, sockmap,
devmap, bpffs, security hooks, s390/riscv/loongarch JITs,
rqspinlock, libbpf, bpftool, selftests"
* tag 'bpf-next-7.2' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (336 commits)
selftests/bpf: Work around llvm stack overflow in crypto progs
selftests/bpf: add test for bpf_msg_pop_data() overflow
bpf, sockmap: fix integer overflow in bpf_msg_pop_data() bounds check
sockmap: Fix use-after-free in udp_bpf_recvmsg()
bpf, sockmap: keep sk_msg copy state in sync
bpf, sockmap: Fix wrong rsge offset in bpf_msg_push_data()
bpf, sockmap: reject overflowing copy + len in bpf_msg_push_data()
selftsets/bpf: Retry map update on helper_fill_hashmap()
selftests/bpf: Add test for sleepable lsm_cgroup rejection
selftests/bpf: Add test to verify the fix for bpf_setsockopt() helper
bpf: Fix bpf_get/setsockopt to tos for ipv4-mapped ipv6 socket
selftests/bpf: Avoid static LLVM linking for cross builds
selftests/bpf: Use common CFLAGS for urandom_read
selftests/bpf: Initialize operation name before use
tools/bpf: build: Append extra cflags
libbpf: Initialize CFLAGS before including Makefile.include
bpftool: Append extra host flags
bpftool: Avoid adding EXTRA_CFLAGS to HOST_CFLAGS
bpftool: Pass host flags to bootstrap libbpf
selftests/bpf: correct CONFIG_PPC64 macro name in comment
...
Diffstat (limited to 'net')
| -rw-r--r-- | net/bpf/bpf_dummy_struct_ops.c | 14 | ||||
| -rw-r--r-- | net/bpf/test_run.c | 68 | ||||
| -rw-r--r-- | net/core/filter.c | 117 | ||||
| -rw-r--r-- | net/core/lwt_bpf.c | 12 | ||||
| -rw-r--r-- | net/ipv4/udp_bpf.c | 9 |
5 files changed, 167 insertions, 53 deletions
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index ae5a54c350b9e..191a6b3ee2541 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -132,7 +132,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, const struct bpf_struct_ops *st_ops = &bpf_bpf_dummy_ops; const struct btf_type *func_proto; struct bpf_dummy_ops_test_args *args; - struct bpf_tramp_links *tlinks = NULL; + struct bpf_tramp_nodes *tnodes = NULL; struct bpf_tramp_link *link = NULL; void *image = NULL; unsigned int op_idx; @@ -158,8 +158,8 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (err) goto out; - tlinks = kzalloc_objs(*tlinks, BPF_TRAMP_MAX); - if (!tlinks) { + tnodes = kzalloc_objs(*tnodes, BPF_TRAMP_MAX); + if (!tnodes) { err = -ENOMEM; goto out; } @@ -171,11 +171,11 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, } /* prog doesn't take the ownership of the reference from caller */ bpf_prog_inc(prog); - bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, prog, - prog->expected_attach_type); + bpf_tramp_link_init(link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_link_lops, + prog, prog->expected_attach_type, 0); op_idx = prog->expected_attach_type; - err = bpf_struct_ops_prepare_trampoline(tlinks, link, + err = bpf_struct_ops_prepare_trampoline(tnodes, &link->node, &st_ops->func_models[op_idx], &dummy_ops_test_ret_function, &image, &image_off, @@ -198,7 +198,7 @@ out: bpf_struct_ops_image_free(image); if (link) bpf_link_put(&link->link); - kfree(tlinks); + kfree(tnodes); return err; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index dbf0d8eae8d89..7fdee8f52ee2b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -702,6 +702,9 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_TRACE_FSESSION: + case BPF_TRACE_FENTRY_MULTI: + case BPF_TRACE_FEXIT_MULTI: + case BPF_TRACE_FSESSION_MULTI: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || @@ -747,14 +750,35 @@ static void __bpf_prog_test_run_raw_tp(void *data) { struct bpf_raw_tp_test_run_info *info = data; + struct srcu_ctr __percpu *scp = NULL; struct bpf_trace_run_ctx run_ctx = {}; struct bpf_run_ctx *old_run_ctx; old_run_ctx = bpf_set_run_ctx(&run_ctx.run_ctx); - rcu_read_lock(); + if (info->prog->sleepable) { + scp = rcu_read_lock_tasks_trace(); + migrate_disable(); + } else { + rcu_read_lock(); + } + + if (unlikely(!bpf_prog_get_recursion_context(info->prog))) { + bpf_prog_inc_misses_counter(info->prog); + goto out; + } + info->retval = bpf_prog_run(info->prog, info->ctx); - rcu_read_unlock(); + +out: + bpf_prog_put_recursion_context(info->prog); + + if (info->prog->sleepable) { + migrate_enable(); + rcu_read_unlock_tasks_trace(scp); + } else { + rcu_read_unlock(); + } bpf_reset_run_ctx(old_run_ctx); } @@ -782,6 +806,13 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0) return -EINVAL; + /* + * Sleepable programs cannot run with preemption disabled or in + * hardirq context (smp_call_function_single), reject the flag. + */ + if (prog->sleepable && (kattr->test.flags & BPF_F_TEST_RUN_ON_CPU)) + return -EINVAL; + if (ctx_size_in) { info.ctx = memdup_user(ctx_in, ctx_size_in); if (IS_ERR(info.ctx)) @@ -790,24 +821,31 @@ int bpf_prog_test_run_raw_tp(struct bpf_prog *prog, info.ctx = NULL; } + info.retval = 0; info.prog = prog; - current_cpu = get_cpu(); - if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 || - cpu == current_cpu) { + if (prog->sleepable) { __bpf_prog_test_run_raw_tp(&info); - } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { - /* smp_call_function_single() also checks cpu_online() - * after csd_lock(). However, since cpu is from user - * space, let's do an extra quick check to filter out - * invalid value before smp_call_function_single(). - */ - err = -ENXIO; } else { - err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp, - &info, 1); + current_cpu = get_cpu(); + if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 || + cpu == current_cpu) { + __bpf_prog_test_run_raw_tp(&info); + } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) { + /* + * smp_call_function_single() also checks cpu_online() + * after csd_lock(). However, since cpu is from user + * space, let's do an extra quick check to filter out + * invalid value before smp_call_function_single(). + */ + err = -ENXIO; + } else { + err = smp_call_function_single(cpu, + __bpf_prog_test_run_raw_tp, + &info, 1); + } + put_cpu(); } - put_cpu(); if (!err && copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32))) diff --git a/net/core/filter.c b/net/core/filter.c index 40037413dd4ec..2e96b4b847ce1 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2654,6 +2654,37 @@ static void sk_msg_reset_curr(struct sk_msg *msg) } } +static bool sk_msg_elem_is_copy(const struct sk_msg *msg, u32 i) +{ + return test_bit(i, msg->sg.copy); +} + +static void sk_msg_clear_elem_copy(struct sk_msg *msg, u32 i) +{ + __clear_bit(i, msg->sg.copy); +} + +static void sk_msg_set_elem_copy(struct sk_msg *msg, u32 i, bool sg_copy) +{ + __assign_bit(i, msg->sg.copy, sg_copy); +} + +static void sk_msg_clear_copy_range(struct sk_msg *msg, u32 start, u32 end) +{ + while (start != end) { + sk_msg_clear_elem_copy(msg, start); + sk_msg_iter_var_next(start); + } +} + +static void sk_msg_sg_move(struct sk_msg *msg, u32 dst, u32 src) +{ + msg->sg.data[dst] = msg->sg.data[src]; + + sk_msg_set_elem_copy(msg, dst, + sk_msg_elem_is_copy(msg, src)); +} + static const struct bpf_func_proto bpf_msg_cork_bytes_proto = { .func = bpf_msg_cork_bytes, .gpl_only = false, @@ -2692,7 +2723,7 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, * account for the headroom. */ bytes_sg_total = start - offset + bytes; - if (!test_bit(i, msg->sg.copy) && bytes_sg_total <= len) + if (!sk_msg_elem_is_copy(msg, i) && bytes_sg_total <= len) goto out; /* At this point we need to linearize multiple scatterlist @@ -2733,13 +2764,13 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, poffset += len; sge->length = 0; put_page(sg_page(sge)); - __clear_bit(i, msg->sg.copy); + sk_msg_clear_elem_copy(msg, i); sk_msg_iter_var_next(i); } while (i != last_sge); sg_set_page(&msg->sg.data[first_sge], page, copy, 0); - __clear_bit(first_sge, msg->sg.copy); + sk_msg_clear_elem_copy(msg, first_sge); /* To repair sg ring we need to shift entries. If we only * had a single entry though we can just replace it and @@ -2749,8 +2780,14 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, shift = last_sge > first_sge ? last_sge - first_sge - 1 : NR_MSG_FRAG_IDS - first_sge + last_sge - 1; - if (!shift) + if (!shift) { + sk_msg_clear_elem_copy(msg, msg->sg.end); goto out; + } + + i = first_sge; + sk_msg_iter_var_next(i); + sk_msg_clear_copy_range(msg, i, last_sge); i = first_sge; sk_msg_iter_var_next(i); @@ -2764,18 +2801,18 @@ BPF_CALL_4(bpf_msg_pull_data, struct sk_msg *, msg, u32, start, if (move_from == msg->sg.end) break; - msg->sg.data[i] = msg->sg.data[move_from]; - sk_msg_sg_copy_assign(msg, i, msg, move_from); + sk_msg_sg_move(msg, i, move_from); msg->sg.data[move_from].length = 0; msg->sg.data[move_from].page_link = 0; msg->sg.data[move_from].offset = 0; - __clear_bit(move_from, msg->sg.copy); + sk_msg_clear_elem_copy(msg, move_from); sk_msg_iter_var_next(i); } while (1); msg->sg.end = msg->sg.end - shift > msg->sg.end ? msg->sg.end - shift + NR_MSG_FRAG_IDS : msg->sg.end - shift; + sk_msg_clear_elem_copy(msg, msg->sg.end); out: sk_msg_reset_curr(msg); msg->data = sg_virt(&msg->sg.data[first_sge]) + start - offset; @@ -2796,9 +2833,10 @@ static const struct bpf_func_proto bpf_msg_pull_data_proto = { BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, u32, len, u64, flags) { + bool sge_copy = false, nsge_copy = false, nnsge_copy = false; struct scatterlist sge, nsge, nnsge, rsge = {0}, *psge; u32 new, i = 0, l = 0, space, copy = 0, offset = 0; - bool sge_copy, nsge_copy, nnsge_copy, rsge_copy = false; + bool rsge_copy = false; u8 *raw, *to, *from; struct page *page; @@ -2834,6 +2872,9 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, if (!space || (space == 1 && start != offset)) copy = msg->sg.data[i].length; + if (unlikely(copy + len < copy)) + return -EINVAL; + page = alloc_pages(__GFP_NOWARN | GFP_ATOMIC | __GFP_COMP, get_order(copy + len)); if (unlikely(!page)) @@ -2871,7 +2912,7 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, sk_msg_iter_var_prev(i); psge = sk_msg_elem(msg, i); rsge = sk_msg_elem_cpy(msg, i); - rsge_copy = test_bit(i, msg->sg.copy); + rsge_copy = sk_msg_elem_is_copy(msg, i); psge->length = start - offset; rsge.length -= psge->length; @@ -2896,21 +2937,21 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, /* Shift one or two slots as needed */ sge = sk_msg_elem_cpy(msg, new); - sge_copy = test_bit(new, msg->sg.copy); sg_unmark_end(&sge); + sge_copy = sk_msg_elem_is_copy(msg, new); nsge = sk_msg_elem_cpy(msg, i); - nsge_copy = test_bit(i, msg->sg.copy); + nsge_copy = sk_msg_elem_is_copy(msg, i); if (rsge.length) { sk_msg_iter_var_next(i); nnsge = sk_msg_elem_cpy(msg, i); - nnsge_copy = test_bit(i, msg->sg.copy); + nnsge_copy = sk_msg_elem_is_copy(msg, i); sk_msg_iter_next(msg, end); } while (i != msg->sg.end) { msg->sg.data[i] = sge; - __assign_bit(i, msg->sg.copy, sge_copy); + sk_msg_set_elem_copy(msg, i, sge_copy); sge = nsge; sge_copy = nsge_copy; sk_msg_iter_var_next(i); @@ -2918,10 +2959,10 @@ BPF_CALL_4(bpf_msg_push_data, struct sk_msg *, msg, u32, start, nsge = nnsge; nsge_copy = nnsge_copy; nnsge = sk_msg_elem_cpy(msg, i); - nnsge_copy = test_bit(i, msg->sg.copy); + nnsge_copy = sk_msg_elem_is_copy(msg, i); } else { nsge = sk_msg_elem_cpy(msg, i); - nsge_copy = test_bit(i, msg->sg.copy); + nsge_copy = sk_msg_elem_is_copy(msg, i); } } @@ -2929,14 +2970,15 @@ place_new: /* Place newly allocated data buffer */ sk_mem_charge(msg->sk, len); msg->sg.size += len; - __clear_bit(new, msg->sg.copy); + sk_msg_clear_elem_copy(msg, new); sg_set_page(&msg->sg.data[new], page, len + copy, 0); if (rsge.length) { get_page(sg_page(&rsge)); sk_msg_iter_var_next(new); msg->sg.data[new] = rsge; - __assign_bit(new, msg->sg.copy, rsge_copy); + sk_msg_set_elem_copy(msg, new, rsge_copy); } + sk_msg_clear_elem_copy(msg, msg->sg.end); sk_msg_reset_curr(msg); sk_msg_compute_data_pointers(msg); @@ -2962,12 +3004,11 @@ static void sk_msg_shift_left(struct sk_msg *msg, int i) do { prev = i; sk_msg_iter_var_next(i); - msg->sg.data[prev] = msg->sg.data[i]; - sk_msg_sg_copy_assign(msg, prev, msg, i); + sk_msg_sg_move(msg, prev, i); } while (i != msg->sg.end); sk_msg_iter_prev(msg, end); - __clear_bit(msg->sg.end, msg->sg.copy); + sk_msg_clear_elem_copy(msg, msg->sg.end); } static void sk_msg_shift_right(struct sk_msg *msg, int i) @@ -2977,28 +3018,29 @@ static void sk_msg_shift_right(struct sk_msg *msg, int i) sk_msg_iter_next(msg, end); sge = sk_msg_elem_cpy(msg, i); - sge_copy = test_bit(i, msg->sg.copy); + sge_copy = sk_msg_elem_is_copy(msg, i); sk_msg_iter_var_next(i); tmp = sk_msg_elem_cpy(msg, i); - tmp_copy = test_bit(i, msg->sg.copy); + tmp_copy = sk_msg_elem_is_copy(msg, i); while (i != msg->sg.end) { msg->sg.data[i] = sge; - __assign_bit(i, msg->sg.copy, sge_copy); + sk_msg_set_elem_copy(msg, i, sge_copy); sk_msg_iter_var_next(i); sge = tmp; sge_copy = tmp_copy; tmp = sk_msg_elem_cpy(msg, i); - tmp_copy = test_bit(i, msg->sg.copy); + tmp_copy = sk_msg_elem_is_copy(msg, i); } + sk_msg_clear_elem_copy(msg, msg->sg.end); } BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, u32, len, u64, flags) { u32 i = 0, l = 0, space, offset = 0; - u64 last = start + len; - int pop; + u64 last = (u64)start + len; + u32 pop; if (unlikely(flags)) return -EINVAL; @@ -3047,10 +3089,10 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, */ if (start != offset) { struct scatterlist *nsge, *sge = sk_msg_elem(msg, i); + bool sge_copy = sk_msg_elem_is_copy(msg, i); int a = start - offset; int b = sge->length - pop - a; - u32 sge_i = i; - bool sge_copy = test_bit(i, msg->sg.copy); + u32 sge_idx = i; sk_msg_iter_var_next(i); @@ -3063,7 +3105,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, sg_set_page(nsge, sg_page(sge), b, sge->offset + pop + a); - __assign_bit(i, msg->sg.copy, sge_copy); + sk_msg_set_elem_copy(msg, i, sge_copy); } else { struct page *page, *orig; u8 *to, *from; @@ -3080,7 +3122,7 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, memcpy(to, from, a); memcpy(to + a, from + a + pop, b); sg_set_page(sge, page, a + b, 0); - __clear_bit(sge_i, msg->sg.copy); + sk_msg_clear_elem_copy(msg, sge_idx); put_page(orig); } pop = 0; @@ -5571,11 +5613,24 @@ static int sol_tcp_sockopt(struct sock *sk, int optname, KERNEL_SOCKPTR(optval), *optlen); } +static bool sk_allows_sol_ip_sockopt(struct sock *sk) +{ + switch (sk->sk_family) { + case AF_INET: + return true; + case AF_INET6: + /* Allow getting/setting sockopt for possible ipv4-mapped ipv6 socket. */ + return sk->sk_type != SOCK_RAW && !ipv6_only_sock(sk); + default: + return false; + } +} + static int sol_ip_sockopt(struct sock *sk, int optname, char *optval, int *optlen, bool getopt) { - if (sk->sk_family != AF_INET) + if (!sk_allows_sol_ip_sockopt(sk)) return -EINVAL; switch (optname) { diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index f71ef82a5f3d3..bf588f508b79e 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -599,6 +599,7 @@ static int handle_gso_encap(struct sk_buff *skb, bool ipv4, int encap_len) int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) { + bool is_udp_tunnel; struct iphdr *iph; bool ipv4; int err; @@ -612,10 +613,16 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) ipv4 = true; if (unlikely(len < iph->ihl * 4)) return -EINVAL; + is_udp_tunnel = iph->protocol == IPPROTO_UDP; + if (unlikely(is_udp_tunnel && len < iph->ihl * 4 + sizeof(struct udphdr))) + return -EINVAL; } else if (iph->version == 6) { ipv4 = false; if (unlikely(len < sizeof(struct ipv6hdr))) return -EINVAL; + is_udp_tunnel = ((struct ipv6hdr *)iph)->nexthdr == NEXTHDR_UDP; + if (unlikely(is_udp_tunnel && len < sizeof(struct ipv6hdr) + sizeof(struct udphdr))) + return -EINVAL; } else { return -EINVAL; } @@ -637,6 +644,11 @@ int bpf_lwt_push_ip_encap(struct sk_buff *skb, void *hdr, u32 len, bool ingress) if (ingress) skb_postpush_rcsum(skb, iph, len); skb_reset_network_header(skb); + if (is_udp_tunnel) { + size_t iph_sz = ipv4 ? iph->ihl * 4 : sizeof(struct ipv6hdr); + + skb_set_transport_header(skb, skb_network_offset(skb) + iph_sz); + } memcpy(skb_network_header(skb), hdr, len); bpf_compute_data_pointers(skb); skb_clear_hash(skb); diff --git a/net/ipv4/udp_bpf.c b/net/ipv4/udp_bpf.c index 9f33b07b14813..ad57c4c9eaab6 100644 --- a/net/ipv4/udp_bpf.c +++ b/net/ipv4/udp_bpf.c @@ -50,7 +50,9 @@ static int udp_msg_wait_data(struct sock *sk, struct sk_psock *psock, sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); ret = udp_msg_has_data(sk, psock); if (!ret) { + release_sock(sk); wait_woken(&wait, TASK_INTERRUPTIBLE, timeo); + lock_sock(sk); ret = udp_msg_has_data(sk, psock); } sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); @@ -79,6 +81,7 @@ static int udp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, goto out; } + lock_sock(sk); msg_bytes_ready: copied = sk_msg_recvmsg(sk, psock, msg, len, flags); if (!copied) { @@ -90,11 +93,17 @@ msg_bytes_ready: if (data) { if (psock_has_data(psock)) goto msg_bytes_ready; + + release_sock(sk); + ret = sk_udp_recvmsg(sk, msg, len, flags); goto out; } copied = -EAGAIN; } + + release_sock(sk); + ret = copied; out: sk_psock_put(sk, psock); |
