aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
authorJakub Kicinski <kuba@kernel.org>2026-06-22 10:33:38 -0700
committerJakub Kicinski <kuba@kernel.org>2026-06-22 10:33:38 -0700
commit56abdaebbf0da304b860bed1f2b5a85f5a6a16a0 (patch)
tree2797ae9b4a4d4142fb51f271929f67932cadc07f /net
parentd07d80b6a129a44538cda1549b7acf95154fb197 (diff)
parent27dd2997746d54ebc079bb13161cc1bdd401d4a6 (diff)
downloadath-56abdaebbf0da304b860bed1f2b5a85f5a6a16a0.tar.gz
Merge tag 'nf-26-06-21' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net. This batches fixes for real crashes with trivial/correctness fixes. There is too a rework of the conntrack expectation timeout strategy to deal with a possible race when removing an expectation. 1) Fix the incorrect flowtable timeout extension for entries in hw offload, from Adrian Bente. This is correcting a defect in the functionality, no crash. 2) Hold reference to device under the fake dst in br_netfilter, from Haoze Xie. This is fixing a possible UaF if the device is removed while packet is sitting in nfqueue. 3) Reject template conntrack in xt_cluster, otherwise access to uninitialize conntrack fields are possible leading to WARN_ON due to unset layer 3 protocol. From Wyatt Feng. 4) Make sure the IPv6 tunnel header is in the linear skb data area before pulling. While at it remove incomplete NEXTHDR_DEST support. From Lorenzo Bianconi. This possibly leading to crash if IPv4 header is not in the linear area. 5) Use test_bit_acquire in ipset hash set to avoid reordering of subsequent memory access. This is addressing a LLM related report, no crash has been observed. From Jozsef Kadlecsik. 6) Use test_bit_acquire in ipset bitmap set too, for the same reason as in the previous patch, from Jozsef Kadlecsik. 7) Call kfree_rcu() after rcu_assign_pointer() to address a possible UaF if kfree_rcu() runs inmediately, which to my understanding never happens. Never observed in practise, reported by LLM. Also from Jozsef Kadlecsik. 8) Use disable_delayed_work_sync() instead cancel_delayed_work_sync() to avoid that ipset GC handler re-queues work as reported by LLM. From Jozsef Kadlecsik. This is for correctness. 9) Restore the check in nft_payload for exceeding payloda offset over 2^16. From Florian Westphal. This fixes a silent truncation, not a big deal, but better be assertive and reject it. 10) Validate NFT_META_BRI_IIFHWADDR can only run from bridge prerouting. From Florian Westphal. Harmless but it could allow to read bytes from skb->cb. 11) Zero out destination hardware address during the flowtable path setup, also from Florian. This is a correctness fix, LLM points that possible infoleak can happen but topology to achieve it is not clear. 12) Skip IPv4 options if present when building the IPV4 reject reply. Otherwise bytes in the IPv4 options header can be sent back to origin where the ICMP header is being expected. Again from Florian Westphal. 13) Replace timer API for expectation by GC worker approach. This is implicitly fixing a race between nf_ct_remove_expectations() which might fail to remove the expectation due to timer_del() returning false because timer has expired and callback is being run concurrently. This fix is addressing a crash that has been already reported with a reproducer. 14) Check if br_vlan_get_pvid_rcu() fails, otherwise possible stack infoleak of 4-bytes. From Florian Westphal. * tag 'nf-26-06-21' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: netfilter: nft_meta_bridge: fix NFT_META_BRI_IIFPVID stack leak netfilter: nf_conntrack_expect: use conntrack GC to reap expectations netfilter: nf_reject: skip iphdr options when looking for icmp header netfilter: nft_flow_offload: zero device address for non-ether case netfilter: nft_meta_bridge: add validate callback for get operations netfilter: nft_payload: reject offsets exceeding 65535 bytes netfilter: ipset: make sure gc is properly stopped netfilter: ipset: fix order of kfree_rcu() and rcu_assign_pointer() netfilter: ipset: Don't use test_bit() in lockless RCU readers in bitmap types netfilter: ipset: Don't use test_bit() in lockless RCU readers in hash types netfilter: flowtable: fix and simplify IP6IP6 tunnel handling netfilter: xt_cluster: reject template conntracks in hash match netfilter: nf_queue: pin bridge device while NFQUEUE holds fake dst netfilter: flowtable: fix offloaded ct timeout never being extended ==================== Link: https://patch.msgid.link/20260620222738.112506-1-pablo@netfilter.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/bridge/netfilter/nft_meta_bridge.c23
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c2
-rw-r--r--net/ipv6/ip6_tunnel.c7
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h4
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ipmac.c2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_port.c2
-rw-r--r--net/netfilter/ipset/ip_set_core.c4
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h12
-rw-r--r--net/netfilter/nf_conntrack_core.c33
-rw-r--r--net/netfilter/nf_conntrack_expect.c145
-rw-r--r--net/netfilter/nf_conntrack_h323_main.c4
-rw-r--r--net/netfilter/nf_conntrack_helper.c10
-rw-r--r--net/netfilter/nf_conntrack_netlink.c22
-rw-r--r--net/netfilter/nf_conntrack_sip.c13
-rw-r--r--net/netfilter/nf_flow_table_core.c13
-rw-r--r--net/netfilter/nf_flow_table_ip.c80
-rw-r--r--net/netfilter/nf_flow_table_path.c4
-rw-r--r--net/netfilter/nf_queue.c14
-rw-r--r--net/netfilter/nfnetlink_queue.c3
-rw-r--r--net/netfilter/nft_ct.c3
-rw-r--r--net/netfilter/nft_meta.c5
-rw-r--r--net/netfilter/nft_payload.c16
-rw-r--r--net/netfilter/xt_cluster.c2
24 files changed, 233 insertions, 192 deletions
diff --git a/net/bridge/netfilter/nft_meta_bridge.c b/net/bridge/netfilter/nft_meta_bridge.c
index 219c406802602..e4c9aa1f64e25 100644
--- a/net/bridge/netfilter/nft_meta_bridge.c
+++ b/net/bridge/netfilter/nft_meta_bridge.c
@@ -44,7 +44,9 @@ static void nft_meta_bridge_get_eval(const struct nft_expr *expr,
if (!br_dev || !br_vlan_enabled(br_dev))
goto err;
- br_vlan_get_pvid_rcu(in, &p_pvid);
+ if (br_vlan_get_pvid_rcu(in, &p_pvid))
+ goto err;
+
nft_reg_store16(dest, p_pvid);
return;
}
@@ -107,12 +109,30 @@ static int nft_meta_bridge_get_init(const struct nft_ctx *ctx,
NULL, NFT_DATA_VALUE, len);
}
+static int nft_meta_bridge_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct nft_meta *priv = nft_expr_priv(expr);
+ unsigned int hooks;
+
+ switch (priv->key) {
+ case NFT_META_BRI_IIFHWADDR:
+ hooks = 1 << NF_BR_PRE_ROUTING;
+ break;
+ default:
+ return nft_meta_get_validate(ctx, expr);
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, hooks);
+}
+
static struct nft_expr_type nft_meta_bridge_type;
static const struct nft_expr_ops nft_meta_bridge_get_ops = {
.type = &nft_meta_bridge_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_meta)),
.eval = nft_meta_bridge_get_eval,
.init = nft_meta_bridge_get_init,
+ .validate = nft_meta_bridge_get_validate,
.dump = nft_meta_get_dump,
};
@@ -168,7 +188,6 @@ static int nft_meta_bridge_set_validate(const struct nft_ctx *ctx,
switch (priv->key) {
case NFT_META_BRI_BROUTE:
- case NFT_META_BRI_IIFHWADDR:
hooks = 1 << NF_BR_PRE_ROUTING;
break;
default:
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index fecf6621f679f..4626dc46808f3 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -89,7 +89,7 @@ static bool nf_skb_is_icmp_unreach(const struct sk_buff *skb)
if (iph->protocol != IPPROTO_ICMP)
return false;
- thoff = skb_network_offset(skb) + sizeof(*iph);
+ thoff = skb_network_offset(skb) + ip_hdrlen(skb);
tp = skb_header_pointer(skb,
thoff + offsetof(struct icmphdr, type),
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d7c90a8533ec4..bf8e40af60b08 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1851,6 +1851,13 @@ static int ip6_tnl_fill_forward_path(struct net_device_path_ctx *ctx,
struct dst_entry *dst;
int err;
+ if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
+ /* encaplimit option is currently not supported is
+ * sw-acceleration path.
+ */
+ return -EOPNOTSUPP;
+ }
+
dst = ip6_route_output(dev_net(ctx->dev), NULL, &fl6);
if (!dst->error) {
path->type = DEV_PATH_TUN;
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 798c7993635e6..bb9b5bed10e19 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -165,6 +165,7 @@ mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
ip_set_init_skbinfo(ext_skbinfo(x, set), ext);
/* Activate element */
+ smp_mb__before_atomic();
set_bit(e->id, map->members);
set->elements++;
@@ -219,7 +220,7 @@ mtype_list(const struct ip_set *set,
cond_resched_rcu();
id = cb->args[IPSET_CB_ARG0];
x = get_ext(set, map, id);
- if (!test_bit(id, map->members) ||
+ if (!test_bit_acquire(id, map->members) ||
(SET_WITH_TIMEOUT(set) &&
#ifdef IP_SET_BITMAP_STORED_TIMEOUT
mtype_is_filled(x) &&
@@ -278,6 +279,7 @@ mtype_gc(struct timer_list *t)
x = get_ext(set, map, id);
if (ip_set_timeout_expired(ext_timeout(x, set))) {
clear_bit(id, map->members);
+ smp_mb__after_atomic();
ip_set_ext_destroy(set, x);
set->elements--;
}
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index 5988b9bb9029d..ac7febce074f1 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -67,7 +67,7 @@ static int
bitmap_ip_do_test(const struct bitmap_ip_adt_elem *e,
struct bitmap_ip *map, size_t dsize)
{
- return !!test_bit(e->id, map->members);
+ return !!test_bit_acquire(e->id, map->members);
}
static int
diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
index 752f59ef87442..5921fd9d2dca0 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c
@@ -86,7 +86,7 @@ bitmap_ipmac_do_test(const struct bitmap_ipmac_adt_elem *e,
{
const struct bitmap_ipmac_elem *elem;
- if (!test_bit(e->id, map->members))
+ if (!test_bit_acquire(e->id, map->members))
return 0;
elem = get_const_elem(map->extensions, e->id, dsize);
if (e->add_mac && elem->filled == MAC_FILLED)
diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c
index 7138e080def4c..ca875c9824245 100644
--- a/net/netfilter/ipset/ip_set_bitmap_port.c
+++ b/net/netfilter/ipset/ip_set_bitmap_port.c
@@ -58,7 +58,7 @@ static int
bitmap_port_do_test(const struct bitmap_port_adt_elem *e,
const struct bitmap_port *map, size_t dsize)
{
- return !!test_bit(e->id, map->members);
+ return !!test_bit_acquire(e->id, map->members);
}
static int
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 3706b4a85a0f1..a531b654b8d96 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -351,8 +351,8 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment,
if (unlikely(c)) {
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
- kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
+ kfree_rcu(c, rcu);
}
if (!len)
return;
@@ -393,8 +393,8 @@ ip_set_comment_free(struct ip_set *set, void *ptr)
if (unlikely(!c))
return;
set->ext_size -= sizeof(*c) + strlen(c->str) + 1;
- kfree_rcu(c, rcu);
rcu_assign_pointer(comment->c, NULL);
+ kfree_rcu(c, rcu);
}
typedef void (*destroyer)(struct ip_set *, void *);
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index 04e4627ddfc10..dedf59b661ddf 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -606,7 +606,7 @@ mtype_cancel_gc(struct ip_set *set)
struct htype *h = set->data;
if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
+ disable_delayed_work_sync(&h->gc.dwork);
}
static int
@@ -689,7 +689,7 @@ retry:
continue;
pos = smp_load_acquire(&n->pos);
for (j = 0; j < pos; j++) {
- if (!test_bit(j, n->used))
+ if (!test_bit_acquire(j, n->used))
continue;
data = ahash_data(n, j, dsize);
if (SET_ELEM_EXPIRED(set, data))
@@ -826,7 +826,7 @@ mtype_ext_size(struct ip_set *set, u32 *elements, size_t *ext_size)
continue;
pos = smp_load_acquire(&n->pos);
for (j = 0; j < pos; j++) {
- if (!test_bit(j, n->used))
+ if (!test_bit_acquire(j, n->used))
continue;
data = ahash_data(n, j, set->dsize);
if (!SET_ELEM_EXPIRED(set, data))
@@ -1201,7 +1201,7 @@ mtype_test_cidrs(struct ip_set *set, struct mtype_elem *d,
continue;
pos = smp_load_acquire(&n->pos);
for (i = 0; i < pos; i++) {
- if (!test_bit(i, n->used))
+ if (!test_bit_acquire(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
@@ -1259,7 +1259,7 @@ mtype_test(struct ip_set *set, void *value, const struct ip_set_ext *ext,
}
pos = smp_load_acquire(&n->pos);
for (i = 0; i < pos; i++) {
- if (!test_bit(i, n->used))
+ if (!test_bit_acquire(i, n->used))
continue;
data = ahash_data(n, i, set->dsize);
if (!mtype_data_equal(data, d, &multi))
@@ -1396,7 +1396,7 @@ mtype_list(const struct ip_set *set,
continue;
pos = smp_load_acquire(&n->pos);
for (i = 0; i < pos; i++) {
- if (!test_bit(i, n->used))
+ if (!test_bit_acquire(i, n->used))
continue;
e = ahash_data(n, i, set->dsize);
if (SET_ELEM_EXPIRED(set, e))
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 4fb3a2d186319..784bd1d7a9bfa 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1471,6 +1471,31 @@ static bool gc_worker_can_early_drop(const struct nf_conn *ct)
return false;
}
+static void nf_ct_help_gc(struct nf_conn *ct)
+{
+ struct nf_conn_help *help;
+
+ if (!refcount_inc_not_zero(&ct->ct_general.use))
+ return;
+
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
+ if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct)) {
+ nf_ct_put(ct);
+ return;
+ }
+
+ /* re-check helper due to SLAB_TYPESAFE_BY_RCU */
+ if (test_bit(IPS_HELPER_BIT, &ct->status)) {
+ help = nfct_help(ct);
+ if (help)
+ nf_ct_expectation_gc(help);
+ }
+
+ nf_ct_put(ct);
+}
+
static void gc_worker(struct work_struct *work)
{
unsigned int i, hashsz, nf_conntrack_max95 = 0;
@@ -1543,7 +1568,13 @@ static void gc_worker(struct work_struct *work)
expires = (expires - (long)next_run) / ++count;
next_run += expires;
- if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp))
+ if (gc_worker_skip_ct(tmp))
+ continue;
+
+ if (test_bit(IPS_HELPER_BIT, &tmp->status))
+ nf_ct_help_gc(tmp);
+
+ if (nf_conntrack_max95 == 0)
continue;
net = nf_ct_net(tmp);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 5c9b17835c280..49e18eda037ef 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -43,6 +43,24 @@ unsigned int nf_ct_expect_max __read_mostly;
static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
static siphash_aligned_key_t nf_ct_expect_hashrnd;
+void nf_ct_expectation_gc(struct nf_conn_help *master_help)
+{
+ struct nf_conntrack_expect *exp;
+ struct hlist_node *next;
+
+ if (hlist_empty(&master_help->expectations))
+ return;
+
+ spin_lock_bh(&nf_conntrack_expect_lock);
+ hlist_for_each_entry_safe(exp, next, &master_help->expectations, lnode) {
+ if (!nf_ct_exp_is_expired(exp))
+ continue;
+
+ nf_ct_unlink_expect(exp);
+ }
+ spin_unlock_bh(&nf_conntrack_expect_lock);
+}
+
/* nf_conntrack_expect helper functions */
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report)
@@ -52,7 +70,6 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
struct nf_conntrack_net *cnet;
lockdep_nfct_expect_lock_held();
- WARN_ON_ONCE(timer_pending(&exp->timeout));
hlist_del_rcu(&exp->hnode);
@@ -70,16 +87,6 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
}
EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
-static void nf_ct_expectation_timed_out(struct timer_list *t)
-{
- struct nf_conntrack_expect *exp = timer_container_of(exp, t, timeout);
-
- spin_lock_bh(&nf_conntrack_expect_lock);
- nf_ct_unlink_expect(exp);
- spin_unlock_bh(&nf_conntrack_expect_lock);
- nf_ct_expect_put(exp);
-}
-
static unsigned int nf_ct_expect_dst_hash(const struct net *n, const struct nf_conntrack_tuple *tuple)
{
struct {
@@ -117,19 +124,6 @@ nf_ct_exp_equal(const struct nf_conntrack_tuple *tuple,
nf_ct_exp_zone_equal_any(i, zone);
}
-bool nf_ct_remove_expect(struct nf_conntrack_expect *exp)
-{
- lockdep_nfct_expect_lock_held();
-
- if (timer_delete(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- return true;
- }
- return false;
-}
-EXPORT_SYMBOL_GPL(nf_ct_remove_expect);
-
struct nf_conntrack_expect *
__nf_ct_expect_find(struct net *net,
const struct nf_conntrack_zone *zone,
@@ -144,6 +138,8 @@ __nf_ct_expect_find(struct net *net,
h = nf_ct_expect_dst_hash(net, tuple);
hlist_for_each_entry_rcu(i, &nf_ct_expect_hash[h], hnode) {
+ if (nf_ct_exp_is_expired(i))
+ continue;
if (nf_ct_exp_equal(tuple, i, zone, net))
return i;
}
@@ -178,6 +174,7 @@ nf_ct_find_expectation(struct net *net,
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
+ struct hlist_node *next;
unsigned int h;
lockdep_nfct_expect_lock_held();
@@ -186,7 +183,11 @@ nf_ct_find_expectation(struct net *net,
return NULL;
h = nf_ct_expect_dst_hash(net, tuple);
- hlist_for_each_entry(i, &nf_ct_expect_hash[h], hnode) {
+ hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
+ if (nf_ct_exp_is_expired(i)) {
+ nf_ct_unlink_expect(i);
+ continue;
+ }
if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
nf_ct_exp_equal(tuple, i, zone, net)) {
exp = i;
@@ -196,13 +197,16 @@ nf_ct_find_expectation(struct net *net,
if (!exp)
return NULL;
+ if (!refcount_inc_not_zero(&exp->use))
+ return NULL;
+
/* If master is not in hash table yet (ie. packet hasn't left
this machine yet), how can other end know about expected?
Hence these are not the droids you are looking for (if
master ct never got confirmed, we'd hold a reference to it
and weird things would happen to future packets). */
if (!nf_ct_is_confirmed(exp->master))
- return NULL;
+ goto err_release_exp;
/* Avoid race with other CPUs, that for exp->master ct, is
* about to invoke ->destroy(), or nf_ct_delete() via timeout
@@ -214,18 +218,17 @@ nf_ct_find_expectation(struct net *net,
*/
if (unlikely(nf_ct_is_dying(exp->master) ||
!refcount_inc_not_zero(&exp->master->ct_general.use)))
- return NULL;
+ goto err_release_exp;
- if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
- refcount_inc(&exp->use);
- return exp;
- } else if (timer_delete(&exp->timeout)) {
- nf_ct_unlink_expect(exp);
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink)
return exp;
- }
- /* Undo exp->master refcnt increase, if timer_delete() failed */
- nf_ct_put(exp->master);
+ nf_ct_unlink_expect(exp);
+
+ return exp;
+
+err_release_exp:
+ nf_ct_expect_put(exp);
return NULL;
}
@@ -241,9 +244,8 @@ void nf_ct_remove_expectations(struct nf_conn *ct)
return;
spin_lock_bh(&nf_conntrack_expect_lock);
- hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
- nf_ct_remove_expect(exp);
- }
+ hlist_for_each_entry_safe(exp, next, &help->expectations, lnode)
+ nf_ct_unlink_expect(exp);
spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
@@ -292,7 +294,7 @@ static bool master_matches(const struct nf_conntrack_expect *a,
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
spin_lock_bh(&nf_conntrack_expect_lock);
- nf_ct_remove_expect(exp);
+ WRITE_ONCE(exp->flags, exp->flags | NF_CT_EXPECT_DEAD);
spin_unlock_bh(&nf_conntrack_expect_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
@@ -308,6 +310,7 @@ struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
if (!new)
return NULL;
+ new->timeout = nfct_time_stamp;
new->master = me;
refcount_set(&new->use, 1);
return new;
@@ -413,17 +416,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp,
struct net *net = nf_ct_exp_net(exp);
unsigned int h = nf_ct_expect_dst_hash(net, &exp->tuple);
- /* two references : one for hash insert, one for the timer */
- refcount_add(2, &exp->use);
+ refcount_inc(&exp->use);
- timer_setup(&exp->timeout, nf_ct_expectation_timed_out, 0);
helper = rcu_dereference_protected(master_help->helper,
lockdep_is_held(&nf_conntrack_expect_lock));
- if (helper) {
- exp->timeout.expires = jiffies +
- helper->expect_policy[exp->class].timeout * HZ;
- }
- add_timer(&exp->timeout);
+ if (helper)
+ exp->timeout += helper->expect_policy[exp->class].timeout * HZ;
hlist_add_head_rcu(&exp->lnode, &master_help->expectations);
master_help->expecting[exp->class]++;
@@ -435,19 +433,26 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp,
NF_CT_STAT_INC(net, expect_create);
}
-/* Race with expectations being used means we could have none to find; OK. */
static void evict_oldest_expect(struct nf_conn_help *master_help,
- struct nf_conntrack_expect *new)
+ struct nf_conntrack_expect *new,
+ const struct nf_conntrack_expect_policy *p)
{
struct nf_conntrack_expect *exp, *last = NULL;
+ struct hlist_node *next;
- hlist_for_each_entry(exp, &master_help->expectations, lnode) {
+ hlist_for_each_entry_safe(exp, next, &master_help->expectations, lnode) {
+ if (nf_ct_exp_is_expired(exp)) {
+ nf_ct_unlink_expect(exp);
+ continue;
+ }
if (exp->class == new->class)
last = exp;
}
- if (last)
- nf_ct_remove_expect(last);
+ /* Still worth to evict oldest expectation after garbage collection? */
+ if (last &&
+ master_help->expecting[last->class] >= p->max_expected)
+ nf_ct_unlink_expect(last);
}
static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
@@ -467,14 +472,18 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
+ if (nf_ct_exp_is_expired(i)) {
+ nf_ct_unlink_expect(i);
+ continue;
+ }
if (master_matches(i, expect, flags) &&
expect_matches(i, expect)) {
if (i->class != expect->class ||
i->master != expect->master)
return -EALREADY;
- if (nf_ct_remove_expect(i))
- break;
+ nf_ct_unlink_expect(i);
+ break;
} else if (expect_clash(i, expect)) {
ret = -EBUSY;
goto out;
@@ -486,14 +495,8 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
if (helper) {
p = &helper->expect_policy[expect->class];
if (p->max_expected &&
- master_help->expecting[expect->class] >= p->max_expected) {
- evict_oldest_expect(master_help, expect);
- if (master_help->expecting[expect->class]
- >= p->max_expected) {
- ret = -EMFILE;
- goto out;
- }
- }
+ master_help->expecting[expect->class] >= p->max_expected)
+ evict_oldest_expect(master_help, expect, p);
}
cnet = nf_ct_pernet(net);
@@ -547,10 +550,8 @@ void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, vo
hlist_for_each_entry_safe(exp, next,
&nf_ct_expect_hash[i],
hnode) {
- if (iter(exp, data) && timer_delete(&exp->timeout)) {
+ if (iter(exp, data))
nf_ct_unlink_expect(exp);
- nf_ct_expect_put(exp);
- }
}
}
@@ -577,10 +578,8 @@ void nf_ct_expect_iterate_net(struct net *net,
if (!net_eq(nf_ct_exp_net(exp), net))
continue;
- if (iter(exp, data) && timer_delete(&exp->timeout)) {
+ if (iter(exp, data))
nf_ct_unlink_expect_report(exp, portid, report);
- nf_ct_expect_put(exp);
- }
}
}
@@ -657,17 +656,17 @@ static int exp_seq_show(struct seq_file *s, void *v)
struct net *net = seq_file_net(s);
struct hlist_node *n = v;
char *delim = "";
+ __s32 timeout;
expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
if (!net_eq(nf_ct_exp_net(expect), net))
return 0;
+ if (nf_ct_exp_is_expired(expect))
+ return 0;
- if (expect->timeout.function)
- seq_printf(s, "%ld ", timer_pending(&expect->timeout)
- ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
- else
- seq_puts(s, "- ");
+ timeout = (__s32)(READ_ONCE(expect->timeout) - nfct_time_stamp) / HZ;
+ seq_printf(s, "%d ", timeout > 0 ? timeout : 0);
seq_printf(s, "l3proto = %u proto=%u ",
expect->tuple.src.l3num,
expect->tuple.dst.protonum);
diff --git a/net/netfilter/nf_conntrack_h323_main.c b/net/netfilter/nf_conntrack_h323_main.c
index 7f189dceb3c4d..24931e379985b 100644
--- a/net/netfilter/nf_conntrack_h323_main.c
+++ b/net/netfilter/nf_conntrack_h323_main.c
@@ -1388,8 +1388,8 @@ static int process_rcf(struct sk_buff *skb, struct nf_conn *ct,
"timeout to %u seconds for",
info->timeout);
nf_ct_dump_tuple(&exp->tuple);
- mod_timer_pending(&exp->timeout,
- jiffies + info->timeout * HZ);
+ WRITE_ONCE(exp->timeout,
+ nfct_time_stamp + (info->timeout * HZ));
}
spin_unlock_bh(&nf_conntrack_expect_lock);
}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 2f35bdd0d7d74..8b94001c24306 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -181,10 +181,10 @@ nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp)
struct nf_conn_help *help;
help = nf_ct_ext_add(ct, NF_CT_EXT_HELPER, gfp);
- if (help)
+ if (help) {
+ __set_bit(IPS_HELPER_BIT, &ct->status);
INIT_HLIST_HEAD(&help->expectations);
- else
- pr_debug("failed to add helper extension area");
+ }
return help;
}
EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add);
@@ -203,10 +203,8 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
return 0;
help = nfct_help(tmpl);
- if (help != NULL) {
+ if (help)
helper = rcu_dereference(help->helper);
- set_bit(IPS_HELPER_BIT, &ct->status);
- }
help = nfct_help(ct);
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index b429e648f06c5..4e78d24829895 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -3014,8 +3014,8 @@ static int
ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
+ __s32 timeout = (__s32)(READ_ONCE(exp->timeout) - nfct_time_stamp) / HZ;
struct nf_conn *master = exp->master;
- long timeout = ((long)exp->timeout.expires - (long)jiffies) / HZ;
struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
@@ -3178,6 +3178,9 @@ ctnetlink_exp_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
restart:
hlist_for_each_entry_rcu(exp, &nf_ct_expect_hash[cb->args[0]],
hnode) {
+ if (nf_ct_exp_is_expired(exp))
+ continue;
+
if (l3proto && exp->tuple.src.l3num != l3proto)
continue;
@@ -3456,11 +3459,8 @@ static int ctnetlink_del_expect(struct sk_buff *skb,
}
/* after list removal, usage count == 1 */
- if (timer_delete(&exp->timeout)) {
- nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
- nlmsg_report(info->nlh));
- nf_ct_expect_put(exp);
- }
+ nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid,
+ nlmsg_report(info->nlh));
spin_unlock_bh(&nf_conntrack_expect_lock);
/* have to put what we 'get' above.
* after this line usage count == 0 */
@@ -3484,14 +3484,10 @@ static int
ctnetlink_change_expect(struct nf_conntrack_expect *x,
const struct nlattr * const cda[])
{
- if (cda[CTA_EXPECT_TIMEOUT]) {
- if (!timer_delete(&x->timeout))
- return -ETIME;
+ if (cda[CTA_EXPECT_TIMEOUT])
+ WRITE_ONCE(x->timeout, nfct_time_stamp +
+ ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ);
- x->timeout.expires = jiffies +
- ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
- add_timer(&x->timeout);
- }
return 0;
}
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index c606d1f60b589..5ec3a4a4bbd70 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -897,11 +897,10 @@ static int refresh_signalling_expectation(struct nf_conn *ct,
exp->tuple.dst.protonum != proto ||
exp->tuple.dst.u.udp.port != port)
continue;
- if (mod_timer_pending(&exp->timeout, jiffies + expires * HZ)) {
- exp->flags &= ~NF_CT_EXPECT_INACTIVE;
- found = 1;
- break;
- }
+ WRITE_ONCE(exp->timeout, nfct_time_stamp + (expires * HZ));
+ WRITE_ONCE(exp->flags, exp->flags & ~NF_CT_EXPECT_INACTIVE);
+ found = 1;
+ break;
}
spin_unlock_bh(&nf_conntrack_expect_lock);
return found;
@@ -920,8 +919,7 @@ static void flush_expectations(struct nf_conn *ct, bool media)
hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
if ((exp->class != SIP_EXPECT_SIGNALLING) ^ media)
continue;
- if (!nf_ct_remove_expect(exp))
- continue;
+ nf_ct_unlink_expect(exp);
if (!media)
break;
}
@@ -1413,7 +1411,6 @@ static int process_register_request(struct sk_buff *skb, unsigned int protoff,
nf_ct_expect_init(exp, SIP_EXPECT_SIGNALLING, nf_ct_l3num(ct),
saddr, &daddr, proto, NULL, &port);
- exp->timeout.expires = sip_timeout * HZ;
rcu_assign_pointer(exp->assign_helper, helper);
exp->flags = NF_CT_EXPECT_PERMANENT | NF_CT_EXPECT_INACTIVE;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 785d8c244a771..99c5b9d671a0c 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -505,8 +505,13 @@ static u32 nf_flow_table_tcp_timeout(const struct nf_conn *ct)
*/
static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
{
- static const u32 min_timeout = 5 * 60 * HZ;
- u32 expires = nf_ct_expires(ct);
+ static const s32 min_timeout = 5 * 60 * HZ;
+ u32 ct_timeout = READ_ONCE(ct->timeout);
+ s32 expires;
+
+ expires = ct_timeout - nfct_time_stamp;
+ if (expires <= 0) /* already expired */
+ return;
/* normal case: large enough timeout, nothing to do. */
if (likely(expires >= min_timeout))
@@ -524,7 +529,7 @@ static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
if (nf_ct_is_confirmed(ct) &&
test_bit(IPS_OFFLOAD_BIT, &ct->status)) {
u8 l4proto = nf_ct_protonum(ct);
- u32 new_timeout = true;
+ u32 new_timeout = 1;
switch (l4proto) {
case IPPROTO_UDP:
@@ -549,7 +554,7 @@ static void nf_flow_table_extend_ct_timeout(struct nf_conn *ct)
*/
if (new_timeout) {
new_timeout += nfct_time_stamp;
- cmpxchg(&ct->timeout, expires, new_timeout);
+ cmpxchg(&ct->timeout, ct_timeout, new_timeout);
}
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 9c05a50d60138..e7a3fb2b2d946 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -347,29 +347,23 @@ static bool nf_flow_ip6_tunnel_proto(struct nf_flowtable_ctx *ctx,
struct sk_buff *skb)
{
#if IS_ENABLED(CONFIG_IPV6)
- struct ipv6hdr *ip6h, _ip6h;
- __be16 frag_off;
- u8 nexthdr;
- int hdrlen;
+ struct ipv6hdr *ip6h;
- ip6h = skb_header_pointer(skb, ctx->offset, sizeof(*ip6h), &_ip6h);
- if (!ip6h)
+ if (!pskb_may_pull(skb, sizeof(*ip6h) + ctx->offset))
return false;
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
if (ip6h->hop_limit <= 1)
return false;
- nexthdr = ip6h->nexthdr;
- hdrlen = ipv6_skip_exthdr(skb, sizeof(*ip6h) + ctx->offset, &nexthdr,
- &frag_off);
- if (hdrlen < 0)
+ if (ipv6_ext_hdr(ip6h->nexthdr))
return false;
- if (nexthdr == IPPROTO_IPV6) {
- ctx->tun.hdr_size = hdrlen;
- ctx->tun.proto = IPPROTO_IPV6;
+ if (ip6h->nexthdr == IPPROTO_IPV6) {
+ ctx->tun.proto = ip6h->nexthdr;
+ ctx->tun.hdr_size = sizeof(*ip6h);
+ ctx->offset += ctx->tun.hdr_size;
}
- ctx->offset += ctx->tun.hdr_size;
return true;
#else
@@ -648,25 +642,19 @@ static int nf_flow_tunnel_v4_push(struct net *net, struct sk_buff *skb,
return 0;
}
-struct ipv6_tel_txoption {
- struct ipv6_txoptions ops;
- __u8 dst_opt[8];
-};
-
static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
struct flow_offload_tuple *tuple,
- struct in6_addr **ip6_daddr,
- int encap_limit)
+ struct in6_addr **ip6_daddr)
{
struct ipv6hdr *ip6h = (struct ipv6hdr *)skb_network_header(skb);
- u8 hop_limit = ip6h->hop_limit, proto = IPPROTO_IPV6;
struct rtable *rt = dst_rtable(tuple->dst_cache);
__u8 dsfield = ipv6_get_dsfield(ip6h);
struct flowi6 fl6 = {
.daddr = tuple->tun.src_v6,
.saddr = tuple->tun.dst_v6,
- .flowi6_proto = proto,
+ .flowi6_proto = IPPROTO_IPV6,
};
+ u8 hop_limit = ip6h->hop_limit;
int err, mtu;
u32 headroom;
@@ -674,41 +662,18 @@ static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
if (err)
return err;
- skb_set_inner_ipproto(skb, proto);
+ skb_set_inner_ipproto(skb, IPPROTO_IPV6);
headroom = sizeof(*ip6h) + LL_RESERVED_SPACE(rt->dst.dev) +
rt->dst.header_len;
- if (encap_limit)
- headroom += 8;
err = skb_cow_head(skb, headroom);
if (err)
return err;
skb_scrub_packet(skb, true);
mtu = dst_mtu(&rt->dst) - sizeof(*ip6h);
- if (encap_limit)
- mtu -= 8;
mtu = max(mtu, IPV6_MIN_MTU);
skb_dst_update_pmtu_no_confirm(skb, mtu);
- if (encap_limit > 0) {
- struct ipv6_tel_txoption opt = {
- .dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT,
- .dst_opt[3] = 1,
- .dst_opt[4] = encap_limit,
- .dst_opt[5] = IPV6_TLV_PADN,
- .dst_opt[6] = 1,
- };
- struct ipv6_opt_hdr *hopt;
-
- opt.ops.dst1opt = (struct ipv6_opt_hdr *)opt.dst_opt;
- opt.ops.opt_nflen = 8;
-
- hopt = skb_push(skb, ipv6_optlen(opt.ops.dst1opt));
- memcpy(hopt, opt.ops.dst1opt, ipv6_optlen(opt.ops.dst1opt));
- hopt->nexthdr = IPPROTO_IPV6;
- proto = NEXTHDR_DEST;
- }
-
skb_push(skb, sizeof(*ip6h));
skb_reset_network_header(skb);
@@ -716,7 +681,7 @@ static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
ip6_flow_hdr(ip6h, dsfield,
ip6_make_flowlabel(net, skb, fl6.flowlabel, true, &fl6));
ip6h->hop_limit = hop_limit;
- ip6h->nexthdr = proto;
+ ip6h->nexthdr = IPPROTO_IPV6;
ip6h->daddr = tuple->tun.src_v6;
ip6h->saddr = tuple->tun.dst_v6;
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(*ip6h));
@@ -729,12 +694,10 @@ static int nf_flow_tunnel_ip6ip6_push(struct net *net, struct sk_buff *skb,
static int nf_flow_tunnel_v6_push(struct net *net, struct sk_buff *skb,
struct flow_offload_tuple *tuple,
- struct in6_addr **ip6_daddr,
- int encap_limit)
+ struct in6_addr **ip6_daddr)
{
if (tuple->tun_num)
- return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr,
- encap_limit);
+ return nf_flow_tunnel_ip6ip6_push(net, skb, tuple, ip6_daddr);
return 0;
}
@@ -1089,7 +1052,7 @@ static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
struct nf_flowtable *flow_table,
struct flow_offload_tuple_rhash *tuplehash,
- struct sk_buff *skb, int encap_limit)
+ struct sk_buff *skb)
{
enum flow_offload_tuple_dir dir;
struct flow_offload *flow;
@@ -1100,11 +1063,8 @@ static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
- if (flow->tuplehash[!dir].tuple.tun_num) {
+ if (flow->tuplehash[!dir].tuple.tun_num)
mtu -= sizeof(*ip6h);
- if (encap_limit > 0)
- mtu -= 8; /* encap limit option */
- }
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
return 0;
@@ -1158,7 +1118,6 @@ unsigned int
nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
const struct nf_hook_state *state)
{
- int encap_limit = IPV6_DEFAULT_TNL_ENCAP_LIMIT;
struct flow_offload_tuple_rhash *tuplehash;
struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple *other_tuple;
@@ -1177,8 +1136,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (tuplehash == NULL)
return NF_ACCEPT;
- ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb,
- encap_limit);
+ ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
if (ret < 0)
return NF_DROP;
else if (ret == 0)
@@ -1198,7 +1156,7 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
ip6_daddr = &other_tuple->src_v6;
if (nf_flow_tunnel_v6_push(state->net, skb, other_tuple,
- &ip6_daddr, encap_limit) < 0)
+ &ip6_daddr) < 0)
return NF_DROP;
switch (tuplehash->tuple.xmit_type) {
diff --git a/net/netfilter/nf_flow_table_path.c b/net/netfilter/nf_flow_table_path.c
index 1e7e216b9f894..98c03b487f521 100644
--- a/net/netfilter/nf_flow_table_path.c
+++ b/net/netfilter/nf_flow_table_path.c
@@ -53,8 +53,10 @@ static int nft_dev_fill_forward_path(const struct nf_flow_route *route,
struct neighbour *n;
u8 nud_state;
- if (!nft_is_valid_ether_device(dev))
+ if (!nft_is_valid_ether_device(dev)) {
+ eth_zero_addr(ha);
goto out;
+ }
n = dst_neigh_lookup(dst_cache, daddr);
if (!n)
diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c
index 57b450024a99e..73363ceedebe3 100644
--- a/net/netfilter/nf_queue.c
+++ b/net/netfilter/nf_queue.c
@@ -68,6 +68,7 @@ static void nf_queue_entry_release_refs(struct nf_queue_entry *entry)
nf_queue_sock_put(state->sk);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ dev_put(entry->bridge_dev);
dev_put(entry->physin);
dev_put(entry->physout);
#endif
@@ -84,6 +85,8 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
{
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
const struct sk_buff *skb = entry->skb;
+ struct dst_entry *dst = skb_dst(skb);
+ struct net_device *dev = NULL;
if (nf_bridge_info_exists(skb)) {
entry->physin = nf_bridge_get_physindev(skb, entry->state.net);
@@ -92,6 +95,16 @@ static void __nf_queue_entry_init_physdevs(struct nf_queue_entry *entry)
entry->physin = NULL;
entry->physout = NULL;
}
+
+ if (entry->state.pf == NFPROTO_BRIDGE &&
+ dst && (dst->flags & DST_FAKE_RTABLE))
+ dev = dst_dev_rcu(dst);
+
+ /* Must hold a reference on the bridge device: dst_hold() protects
+ * the dst itself, but the fake rtable is embedded in bridge-private
+ * storage that netdevice teardown can free independently.
+ */
+ entry->bridge_dev = dev;
#endif
}
@@ -108,6 +121,7 @@ bool nf_queue_entry_get_refs(struct nf_queue_entry *entry)
dev_hold(state->out);
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+ dev_hold(entry->bridge_dev);
dev_hold(entry->physin);
dev_hold(entry->physout);
#endif
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index c5e29fec419bb..80ca077b81bd5 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -1262,6 +1262,9 @@ dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
if (physinif == ifindex || physoutif == ifindex)
return 1;
+
+ if (entry->bridge_dev && entry->bridge_dev->ifindex == ifindex)
+ return 1;
#endif
if (entry->skb_dev && entry->skb_dev->ifindex == ifindex)
return 1;
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 25934c6f01fbc..958054dd2e2ec 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1145,7 +1145,6 @@ static void nft_ct_helper_obj_eval(struct nft_object *obj,
help = nf_ct_helper_ext_add(ct, GFP_ATOMIC);
if (help && refcount_inc_not_zero(&to_assign->ct_refcnt)) {
rcu_assign_pointer(help->helper, to_assign);
- set_bit(IPS_HELPER_BIT, &ct->status);
if ((ct->status & IPS_NAT_MASK) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
@@ -1326,7 +1325,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
priv->l4proto, NULL, &priv->dport);
- exp->timeout.expires = jiffies + priv->timeout * HZ;
+ exp->timeout += priv->timeout * HZ;
if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index 9b5821c644426..0a43e0787a688 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -635,8 +635,8 @@ static int nft_meta_get_validate_xfrm(const struct nft_ctx *ctx)
#endif
}
-static int nft_meta_get_validate(const struct nft_ctx *ctx,
- const struct nft_expr *expr)
+int nft_meta_get_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
{
const struct nft_meta *priv = nft_expr_priv(expr);
@@ -652,6 +652,7 @@ static int nft_meta_get_validate(const struct nft_ctx *ctx,
return 0;
}
+EXPORT_SYMBOL_GPL(nft_meta_get_validate);
int nft_meta_set_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index ef2a80dfc68f9..345eff140d56f 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -224,11 +224,17 @@ static int nft_payload_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload *priv = nft_expr_priv(expr);
+ u32 offset;
+ int err;
priv->base = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_BASE]));
- priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset);
+ if (err < 0)
+ return err;
+ priv->offset = offset;
+
return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
&priv->dreg, NULL, NFT_DATA_VALUE,
priv->len);
@@ -621,7 +627,8 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
{
struct nft_payload *priv = nft_expr_priv(expr);
- u32 base;
+ u32 base, offset;
+ int err;
if (!tb[NFTA_PAYLOAD_BASE] || !tb[NFTA_PAYLOAD_OFFSET] ||
!tb[NFTA_PAYLOAD_LEN] || !tb[NFTA_PAYLOAD_DREG])
@@ -639,8 +646,11 @@ static int nft_payload_inner_init(const struct nft_ctx *ctx,
}
priv->base = base;
- priv->offset = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_OFFSET]));
priv->len = ntohl(nla_get_be32(tb[NFTA_PAYLOAD_LEN]));
+ err = nft_parse_u32_check(tb[NFTA_PAYLOAD_OFFSET], U16_MAX, &offset);
+ if (err < 0)
+ return err;
+ priv->offset = offset;
return nft_parse_register_store(ctx, tb[NFTA_PAYLOAD_DREG],
&priv->dreg, NULL, NFT_DATA_VALUE,
diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c
index 908fd5f2c3c84..eaf2511d63f0e 100644
--- a/net/netfilter/xt_cluster.c
+++ b/net/netfilter/xt_cluster.c
@@ -107,7 +107,7 @@ xt_cluster_mt(const struct sk_buff *skb, struct xt_action_param *par)
}
ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
+ if (!ct || nf_ct_is_template(ct))
return false;
if (ct->master)