From: Willem de Bruijn <willemdebruijn.kernel@gmail.com>
To: netdev@vger.kernel.org
Cc: davem@davemloft.net, kuba@kernel.org, edumazet@google.com,
pabeni@redhat.com, horms@kernel.org,
Willem de Bruijn <willemb@google.com>
Subject: [PATCH net-next] net_sched: sch_fq: update flow delivery time on earlier EDT packet
Date: Tue, 26 May 2026 09:40:37 -0400 [thread overview]
Message-ID: <20260526134109.2624493-1-willemdebruijn.kernel@gmail.com> (raw)
From: Willem de Bruijn <willemb@google.com>
When inserting an EDT packet with time before flow->time_next_packet,
update the flow and possibly queue next delivery time.
Reinsert the flow into the q->delayed rb-tree to position correctly
and to have fq_check_throttled set wake-up at the right next time.
Factor RB tree insertion out fq_flow_set_throttled to avoid open
coding twice.
EDT packets do not take precedence over queue rate limit. Skip this
new step if a queue limit is set. EDT packets do take precedence over
per-socket rate limits, as can be seen from fq_dequeue reading
sk_pacing_rate if !skb->tstamp.
With this change the so_txtime selftest sends packets in the expected
order.
Fixes: eeb84aa0d0af ("net_sched: sch_fq: do not assume EDT packets are ordered")
Assisted-by: Gemini:gemini-3
Signed-off-by: Willem de Bruijn <willemb@google.com>
---
On net this has a conflict on so_txtime.py. Treat this as an
improvement and send to net-next only (not stable material).
---
net/sched/sch_fq.c | 34 ++++++++++++++++---
.../selftests/drivers/net/so_txtime.py | 2 +-
2 files changed, 31 insertions(+), 5 deletions(-)
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 796cb8046a90..33783c9f8e16 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -217,7 +217,7 @@ static void fq_flow_unset_throttled(struct fq_sched_data *q, struct fq_flow *f)
fq_flow_add_tail(q, f, OLD_FLOW);
}
-static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+static void fq_flow_rb_insert(struct fq_sched_data *q, struct fq_flow *f)
{
struct rb_node **p = &q->delayed.rb_node, *parent = NULL;
@@ -233,14 +233,18 @@ static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
}
rb_link_node(&f->rate_node, parent, p);
rb_insert_color(&f->rate_node, &q->delayed);
- q->throttled_flows++;
- q->stat_throttled++;
- f->next = &throttled;
if (q->time_next_delayed_flow > f->time_next_packet)
q->time_next_delayed_flow = f->time_next_packet;
}
+static void fq_flow_set_throttled(struct fq_sched_data *q, struct fq_flow *f)
+{
+ fq_flow_rb_insert(q, f);
+ q->throttled_flows++;
+ q->stat_throttled++;
+ f->next = &throttled;
+}
static struct kmem_cache *fq_flow_cachep __read_mostly;
@@ -539,6 +543,24 @@ static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
return unlikely((s64)skb->tstamp > (s64)(now + q->horizon));
}
+static void fq_flow_adjust_timer(struct fq_sched_data *q, struct fq_flow *flow,
+ u64 time_to_send, u64 now)
+{
+ if (time_to_send <= now) {
+ fq_flow_unset_throttled(q, flow);
+ if (q->time_next_delayed_flow == flow->time_next_packet) {
+ struct rb_node *p = rb_first(&q->delayed);
+
+ q->time_next_delayed_flow = p ? rb_entry(p, struct fq_flow, rate_node)->time_next_packet : ~0ULL;
+ }
+ flow->time_next_packet = time_to_send;
+ } else {
+ rb_erase(&flow->rate_node, &q->delayed);
+ flow->time_next_packet = time_to_send;
+ fq_flow_rb_insert(q, flow);
+ }
+}
+
static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
struct sk_buff **to_free)
{
@@ -596,6 +618,10 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
/* Note: this overwrites f->age */
flow_queue_add(f, skb);
+ if (fq_skb_cb(skb)->time_to_send < f->time_next_packet && skb->tstamp &&
+ fq_flow_is_throttled(f) && q->flow_max_rate == ~0UL)
+ fq_flow_adjust_timer(q, f, fq_skb_cb(skb)->time_to_send, now);
+
qdisc_qstats_backlog_inc(sch, skb);
qdisc_qlen_inc(sch);
diff --git a/tools/testing/selftests/drivers/net/so_txtime.py b/tools/testing/selftests/drivers/net/so_txtime.py
index e7de8fe22c1e..5d4388bfc6dd 100755
--- a/tools/testing/selftests/drivers/net/so_txtime.py
+++ b/tools/testing/selftests/drivers/net/so_txtime.py
@@ -53,7 +53,7 @@ def _test_variants_mono():
["zero_delay", "a,0", "a,0"],
["one_pkt", "a,10", "a,10"],
["in_order", "a,10,b,20", "a,10,b,20"],
- ["reverse_order", "a,20,b,10", "b,20,a,20"],
+ ["reverse_order", "a,20,b,10", "b,10,a,20"],
]:
name = f"v{ipver}_{testcase[0]}"
yield KsftNamedVariant(name, ipver, testcase[1], testcase[2])
--
2.54.0.746.g67dd491aae-goog
next reply other threads:[~2026-05-26 13:41 UTC|newest]
Thread overview: 2+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-05-26 13:40 Willem de Bruijn [this message]
2026-05-28 1:50 ` [PATCH net-next] net_sched: sch_fq: update flow delivery time on earlier EDT packet patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260526134109.2624493-1-willemdebruijn.kernel@gmail.com \
--to=willemdebruijn.kernel@gmail.com \
--cc=davem@davemloft.net \
--cc=edumazet@google.com \
--cc=horms@kernel.org \
--cc=kuba@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pabeni@redhat.com \
--cc=willemb@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.