aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
authorJakub Kicinski <kuba@kernel.org>2026-06-24 19:56:58 -0700
committerJakub Kicinski <kuba@kernel.org>2026-06-24 19:56:59 -0700
commit02f144fbb4c86c360495d33debe307cb46a57f95 (patch)
tree3f0c348379dbbf6d0247906e0e23a56dab07ee16 /net
parent620839b699aa7b1aaba925547eec6d2b976aa763 (diff)
parent397c8300972f6e1486fd1afd99a044648a401cd5 (diff)
downloadath-02f144fbb4c86c360495d33debe307cb46a57f95.tar.gz
Merge tag 'nf-26-06-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf
Pablo Neira Ayuso says: ==================== Netfilter fixes for net The following patchset contains Netfilter fixes for net: 1) Add a workaround to avoid a possible crash if nf_nat and nft_chain_nat are compiled built-in and nf_nat fails to register, allowing nft_chain_nat to access the incorrect pernetns area. This is crash specific of all built-in compilation. From Matias Krause. 2) Revisit conncount GC optimization for confirmed conntracks, skip GC round if IPS_ASSURED is set on. This is addressing an issue for corner case use case scenario involving locally generated traffic. No crash, just a functionality fix. From Fernando F. Mancera. 3) Validate iph->ihl in flowtable IPIP tunnel support, from Lorenzo Bianconi. This a sanity check to bounces back malformed IPIP packets to classic forwarding path. 4) Kdoc fixes for x_tables.h, from Randy Dunlap. 5) Use info->options so nft_synproxy_tcp_options() stays on the same local snapshot, otherwise eval path can observe inconsistent mix of mss and timestamps. From Runyu Xiao. 6) Add conntrack_sctp_collision.sh to cover for SCTP INIT collisions. From Yi Chen. 7) Do not allow NFPROTO_UNSPEC targets if family is NFPROTO_BRIDGE in nft_compat. This allows to use non-sense targets such as xt_nat leading to crash. From Florian Westphal. 8) Add a selftest queueing from bridge family. From Florian Westphal. 9) Do not allow to reset a conntrack helper via ctnetlink. This feature antedates the creation of the conntrack-tools, and it is not used I don't have a usecase for it, I prefer to remove than fixing it. 10) Add deprecation warning for IPv4 only conntrack helpers for PPTP and IRC. From Florian Westphal. 11) Store the master tuple in the expectation object and use it, otherwise SLAB_TYPESAFE_RCU rules allow to display incorrect master tuple information through ctnetlink. 12) Run expectation eviction when inserting an expectation with no helper, this is a fix for the nft_ct custom expectation support. 13) Fix nft_ct custom expectation timeouts, userspace provides a timeout in milliseconds but kernel assumes this comes in seconds. From Florian Westphal. 14) Cap maximum number of expectations per class to 255 expectations per master conntrack at helper registration. This is a fix to restrict the maximum number of expectations per master conntrack which can be a issue for the new lazy GC expectation approach. * tag 'nf-26-06-23' of git://git.kernel.org/pub/scm/linux/kernel/git/netfilter/nf: netfilter: nf_conntrack_helper: cap maximum number of expectation at helper registration netfilter: nft_ct: expectation timeouts are passed in milliseconds netfilter: nf_conntrack_expect: run expectation eviction with no helper netfilter: nf_conntrack_expect: store master_tuple in expectation netfilter: conntrack: add deprecation warnings for irc and pptp trackers netfilter: ctnetlink: do not allow to reset helper on existing conntrack selftests: nft_queue.sh: add a bridge queue test netfilter: nft_compat: ebtables emulation must reject non-bridge targets selftests: netfilter: conntrack_sctp_collision.sh: Introduce SCTP INIT collision test netfilter: nft_synproxy: stop bypassing the priv->info snapshot netfilter: x_tables.h: fix all kernel-doc warnings netfilter: flowtable: Validate iph->ihl in nf_flow_ip4_tunnel_proto() netfilter: nf_conncount: prevent connlimit drops for early confirmed ct netfilter: nf_nat: avoid invalid nat_net pointer use on failed nf_nat_init() ==================== Link: https://patch.msgid.link/20260623221548.701545-1-pablo@netfilter.org Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net')
-rw-r--r--net/netfilter/Kconfig11
-rw-r--r--net/netfilter/nf_conncount.c11
-rw-r--r--net/netfilter/nf_conntrack_broadcast.c1
-rw-r--r--net/netfilter/nf_conntrack_expect.c12
-rw-r--r--net/netfilter/nf_conntrack_helper.c9
-rw-r--r--net/netfilter/nf_conntrack_irc.c2
-rw-r--r--net/netfilter/nf_conntrack_netlink.c23
-rw-r--r--net/netfilter/nf_conntrack_pptp.c2
-rw-r--r--net/netfilter/nf_flow_table_ip.c8
-rw-r--r--net/netfilter/nf_nat_core.c10
-rw-r--r--net/netfilter/nft_compat.c24
-rw-r--r--net/netfilter/nft_ct.c21
-rw-r--r--net/netfilter/nft_synproxy.c9
13 files changed, 95 insertions, 48 deletions
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 665f8008cc4b2..4c04cd8d40a22 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -256,8 +256,7 @@ config NF_CONNTRACK_H323
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_IRC
- tristate "IRC protocol support"
- default m if NETFILTER_ADVANCED=n
+ tristate "IRC DCC protocol support (obsolete)"
help
There is a commonly-used extension to IRC called
Direct Client-to-Client Protocol (DCC). This enables users to send
@@ -267,6 +266,8 @@ config NF_CONNTRACK_IRC
using NAT, this extension will enable you to send files and initiate
chats. Note that you do NOT need this extension to get files or
have others initiate chats, or everything else in IRC.
+ DCC tracking behind NAT requires plaintext (unencrypted) IRC, so
+ this helper is of limited use these days.
To compile it as a module, choose M here. If unsure, say N.
@@ -308,17 +309,17 @@ config NF_CONNTRACK_SNMP
To compile it as a module, choose M here. If unsure, say N.
config NF_CONNTRACK_PPTP
- tristate "PPtP protocol support"
+ tristate "PPtP protocol support (deprecated)"
depends on NETFILTER_ADVANCED
select NF_CT_PROTO_GRE
help
This module adds support for PPTP (Point to Point Tunnelling
Protocol, RFC2637) connection tracking and NAT.
- If you are running PPTP sessions over a stateful firewall or NAT
+ If you are still running PPTP sessions over a stateful firewall or NAT
box, you may want to enable this feature.
- Please note that not all PPTP modes of operation are supported yet.
+ Please note that not all PPTP modes of operation are supported.
Specifically these limitations exist:
- Blindly assumes that control connections are always established
in PNS->PAC direction. This is a violation of RFC2637.
diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c
index dd67004a5cc09..91582069f6d2e 100644
--- a/net/netfilter/nf_conncount.c
+++ b/net/netfilter/nf_conncount.c
@@ -183,17 +183,16 @@ static int __nf_conncount_add(struct net *net,
return -ENOENT;
if (ct && nf_ct_is_confirmed(ct)) {
- /* local connections are confirmed in postrouting so confirmation
- * might have happened before hitting connlimit
+ /* Connection is confirmed but might still be in the setup phase.
+ * Only skip the tracking if it is fully assured. This guarantees
+ * that setup packets or retransmissions are properly counted and
+ * deduplicated.
*/
- if (skb->skb_iif != LOOPBACK_IFINDEX) {
+ if (test_bit(IPS_ASSURED_BIT, &ct->status)) {
err = -EEXIST;
goto out_put;
}
- /* this is likely a local connection, skip optimization to avoid
- * adding duplicates from a 'packet train'
- */
goto check_connections;
}
diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c
index 400119b6320e7..bf78828c7549d 100644
--- a/net/netfilter/nf_conntrack_broadcast.c
+++ b/net/netfilter/nf_conntrack_broadcast.c
@@ -62,6 +62,7 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb,
if (exp == NULL)
goto out;
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
exp->tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple;
helper = rcu_dereference(help->helper);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 49e18eda037ef..38630c5e006f0 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -355,6 +355,8 @@ void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
exp->tuple.src.l3num = family;
exp->tuple.dst.protonum = proto;
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
+
if (saddr) {
memcpy(&exp->tuple.src.u3, saddr, len);
if (sizeof(exp->tuple.src.u3) > len)
@@ -494,9 +496,15 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
lockdep_is_held(&nf_conntrack_expect_lock));
if (helper) {
p = &helper->expect_policy[expect->class];
- if (p->max_expected &&
- master_help->expecting[expect->class] >= p->max_expected)
+ if (master_help->expecting[expect->class] >= p->max_expected)
evict_oldest_expect(master_help, expect, p);
+ } else {
+ const struct nf_conntrack_expect_policy default_exp_policy = {
+ .max_expected = NF_CT_EXPECT_MAX_CNT,
+ };
+
+ if (master_help->expecting[expect->class] >= default_exp_policy.max_expected)
+ evict_oldest_expect(master_help, expect, &default_exp_policy);
}
cnet = nf_ct_pernet(net);
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 8b94001c24306..500509b17663e 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -374,8 +374,13 @@ int __nf_conntrack_helper_register(struct nf_conntrack_helper *me)
if (!nf_ct_helper_hash)
return -ENOENT;
- if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
- return -EINVAL;
+ for (i = 0; i <= me->expect_class_max; i++) {
+ if (!me->expect_policy[i].max_expected)
+ me->expect_policy[i].max_expected = NF_CT_EXPECT_MAX_CNT;
+
+ if (me->expect_policy[i].max_expected > NF_CT_EXPECT_MAX_CNT)
+ return -EINVAL;
+ }
mutex_lock(&nf_ct_helper_mutex);
for (i = 0; i < nf_ct_helper_hsize; i++) {
diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c
index 0c117b8492e99..193ab34db795f 100644
--- a/net/netfilter/nf_conntrack_irc.c
+++ b/net/netfilter/nf_conntrack_irc.c
@@ -262,6 +262,8 @@ static int __init nf_conntrack_irc_init(void)
{
int i, ret;
+ nf_conntrack_helper_deprecated(HELPER_NAME);
+
if (max_dcc_channels < 1) {
pr_err("max_dcc_channels must not be zero\n");
return -EINVAL;
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 4e78d24829895..4217715d42dc2 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -1953,19 +1953,6 @@ static int ctnetlink_change_helper(struct nf_conn *ct,
return err;
}
- if (!strcmp(helpname, "") && help) {
- helper = rcu_dereference(help->helper);
- if (helper) {
- /* we had a helper before ... */
- nf_ct_remove_expectations(ct);
- RCU_INIT_POINTER(help->helper, NULL);
- if (refcount_dec_and_test(&helper->ct_refcnt))
- kfree_rcu(helper, rcu);
- }
- rcu_read_unlock();
- return 0;
- }
-
helper = __nf_conntrack_helper_find(helpname, nf_ct_l3num(ct),
nf_ct_protonum(ct));
if (helper == NULL) {
@@ -3015,7 +3002,6 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
const struct nf_conntrack_expect *exp)
{
__s32 timeout = (__s32)(READ_ONCE(exp->timeout) - nfct_time_stamp) / HZ;
- struct nf_conn *master = exp->master;
struct nf_conntrack_helper *helper;
#if IS_ENABLED(CONFIG_NF_NAT)
struct nlattr *nest_parms;
@@ -3030,9 +3016,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
goto nla_put_failure;
if (ctnetlink_exp_dump_mask(skb, &exp->tuple, &exp->mask) < 0)
goto nla_put_failure;
- if (ctnetlink_exp_dump_tuple(skb,
- &master->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
- CTA_EXPECT_MASTER) < 0)
+ if (ctnetlink_exp_dump_tuple(skb, &exp->master_tuple, CTA_EXPECT_MASTER) < 0)
goto nla_put_failure;
#if IS_ENABLED(CONFIG_NF_NAT)
@@ -3045,9 +3029,9 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb,
if (nla_put_be32(skb, CTA_EXPECT_NAT_DIR, htonl(exp->dir)))
goto nla_put_failure;
- nat_tuple.src.l3num = nf_ct_l3num(master);
+ nat_tuple.src.l3num = exp->master_tuple.src.l3num;
nat_tuple.src.u3 = exp->saved_addr;
- nat_tuple.dst.protonum = nf_ct_protonum(master);
+ nat_tuple.dst.protonum = exp->master_tuple.dst.protonum;
nat_tuple.src.u = exp->saved_proto;
if (ctnetlink_exp_dump_tuple(skb, &nat_tuple,
@@ -3589,6 +3573,7 @@ ctnetlink_alloc_expect(const struct nlattr * const cda[], struct nf_conn *ct,
#endif
rcu_assign_pointer(exp->helper, helper);
rcu_assign_pointer(exp->assign_helper, assign_helper);
+ exp->master_tuple = ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
exp->tuple = *tuple;
exp->mask.src.u3 = mask->src.u3;
exp->mask.src.u.all = mask->src.u.all;
diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c
index 776505a78e645..80fc14c87ddc4 100644
--- a/net/netfilter/nf_conntrack_pptp.c
+++ b/net/netfilter/nf_conntrack_pptp.c
@@ -545,6 +545,8 @@ static int __init nf_conntrack_pptp_init(void)
pptp.destroy = gre_pptp_destroy_siblings;
+ nf_conntrack_helper_deprecated(pptp.name);
+
return nf_conntrack_helper_register(&pptp, &pptp_ptr);
}
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index e7a3fb2b2d946..29e93ac1e2e40 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -326,8 +326,10 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
return false;
iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
- size = iph->ihl << 2;
+ if (iph->ihl < 5)
+ return false;
+ size = iph->ihl << 2;
if (ip_is_fragment(iph) || unlikely(ip_has_options(size)))
return false;
@@ -335,9 +337,9 @@ static bool nf_flow_ip4_tunnel_proto(struct nf_flowtable_ctx *ctx,
return false;
if (iph->protocol == IPPROTO_IPIP) {
- ctx->tun.proto = IPPROTO_IPIP;
+ ctx->tun.proto = iph->protocol;
ctx->tun.hdr_size = size;
- ctx->offset += size;
+ ctx->offset += ctx->tun.hdr_size;
}
return true;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index 2bbf5163c0e27..63ff6b4d5d214 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -1181,6 +1181,16 @@ int nf_nat_register_fn(struct net *net, u8 pf, const struct nf_hook_ops *ops,
struct nf_hook_ops *nat_ops;
int i, ret;
+#ifndef MODULE
+ /* If nf_nat_core is built-in and nf_nat_init() fails, dependent
+ * modules like nft_chain_nat.ko may still call this function.
+ * However, nat_net would be invalid, likely pointing to some other
+ * per-net structure.
+ */
+ if (WARN_ON_ONCE(!nf_nat_hook))
+ return -EOPNOTSUPP;
+#endif
+
if (WARN_ON_ONCE(pf >= ARRAY_SIZE(nat_net->nat_proto_net)))
return -EINVAL;
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 0caa9304d2d03..63864b9282590 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -397,6 +397,22 @@ static int nft_target_validate(const struct nft_ctx *ctx,
return 0;
}
+static int nft_target_bridge_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr)
+{
+ struct xt_target *target = expr->ops->data;
+
+ /* Do not allow UNSPEC to stand-in for NFPROTO_BRIDGE
+ * targets: they are incompatible. ebtables targets return
+ * EBT_ACCEPT, DROP and so on which are not compatible with
+ * NF_ACCEPT, NF_DROP and so on.
+ */
+ if (target->family != NFPROTO_BRIDGE)
+ return -ENOENT;
+
+ return nft_target_validate(ctx, expr);
+}
+
static void __nft_match_eval(const struct nft_expr *expr,
struct nft_regs *regs,
const struct nft_pktinfo *pkt,
@@ -932,13 +948,15 @@ nft_target_select_ops(const struct nft_ctx *ctx,
ops->init = nft_target_init;
ops->destroy = nft_target_destroy;
ops->dump = nft_target_dump;
- ops->validate = nft_target_validate;
ops->data = target;
- if (family == NFPROTO_BRIDGE)
+ if (family == NFPROTO_BRIDGE) {
ops->eval = nft_target_eval_bridge;
- else
+ ops->validate = nft_target_bridge_validate;
+ } else {
ops->eval = nft_target_eval_xt;
+ ops->validate = nft_target_validate;
+ }
return ops;
err:
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 958054dd2e2ec..03a88c77e0f02 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -1215,11 +1215,23 @@ struct nft_ct_expect_obj {
u32 timeout;
};
+static int nft_ct_expect_timeout_get(const struct nlattr *attr, u32 *val)
+{
+ unsigned long jiffies_val = msecs_to_jiffies(nla_get_u32(attr));
+
+ if (jiffies_val > UINT_MAX)
+ return -ERANGE;
+
+ *val = jiffies_val;
+ return 0;
+}
+
static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
const struct nlattr * const tb[],
struct nft_object *obj)
{
struct nft_ct_expect_obj *priv = nft_obj_data(obj);
+ int err;
if (!tb[NFTA_CT_EXPECT_L4PROTO] ||
!tb[NFTA_CT_EXPECT_DPORT] ||
@@ -1254,8 +1266,11 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
return -EOPNOTSUPP;
}
+ err = nft_ct_expect_timeout_get(tb[NFTA_CT_EXPECT_TIMEOUT], &priv->timeout);
+ if (err)
+ return err;
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
- priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
return nf_ct_netns_get(ctx->net, ctx->family);
@@ -1275,7 +1290,7 @@ static int nft_ct_expect_obj_dump(struct sk_buff *skb,
if (nla_put_be16(skb, NFTA_CT_EXPECT_L3PROTO, htons(priv->l3num)) ||
nla_put_u8(skb, NFTA_CT_EXPECT_L4PROTO, priv->l4proto) ||
nla_put_be16(skb, NFTA_CT_EXPECT_DPORT, priv->dport) ||
- nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, priv->timeout) ||
+ nla_put_u32(skb, NFTA_CT_EXPECT_TIMEOUT, jiffies_to_msecs(priv->timeout)) ||
nla_put_u8(skb, NFTA_CT_EXPECT_SIZE, priv->size))
return -1;
@@ -1325,7 +1340,7 @@ static void nft_ct_expect_obj_eval(struct nft_object *obj,
&ct->tuplehash[!dir].tuple.src.u3,
&ct->tuplehash[!dir].tuple.dst.u3,
priv->l4proto, NULL, &priv->dport);
- exp->timeout += priv->timeout * HZ;
+ exp->timeout += priv->timeout;
if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 7641f249614c9..9ed288c9d1688 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -24,14 +24,13 @@ static const struct nla_policy nft_synproxy_policy[NFTA_SYNPROXY_MAX + 1] = {
static void nft_synproxy_tcp_options(struct synproxy_options *opts,
const struct tcphdr *tcp,
struct synproxy_net *snet,
- struct nf_synproxy_info *info,
- const struct nft_synproxy *priv)
+ struct nf_synproxy_info *info)
{
this_cpu_inc(snet->stats->syn_received);
if (tcp->ece && tcp->cwr)
opts->options |= NF_SYNPROXY_OPT_ECN;
- opts->options &= priv->info.options;
+ opts->options &= info->options;
opts->mss_encode = opts->mss_option;
opts->mss_option = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
@@ -56,7 +55,7 @@ static void nft_synproxy_eval_v4(const struct nft_synproxy *priv,
if (tcp->syn) {
/* Initial SYN from client */
- nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ nft_synproxy_tcp_options(opts, tcp, snet, &info);
synproxy_send_client_synack(net, skb, tcp, opts);
consume_skb(skb);
regs->verdict.code = NF_STOLEN;
@@ -87,7 +86,7 @@ static void nft_synproxy_eval_v6(const struct nft_synproxy *priv,
if (tcp->syn) {
/* Initial SYN from client */
- nft_synproxy_tcp_options(opts, tcp, snet, &info, priv);
+ nft_synproxy_tcp_options(opts, tcp, snet, &info);
synproxy_send_client_synack_ipv6(net, skb, tcp, opts);
consume_skb(skb);
regs->verdict.code = NF_STOLEN;