diff options
| author | Mark Brown <broonie@kernel.org> | 2026-05-29 22:22:18 +0100 |
|---|---|---|
| committer | Mark Brown <broonie@kernel.org> | 2026-05-29 22:22:24 +0100 |
| commit | 091bddd6f3606ee19980772e41e0d5c8cb34dce7 (patch) | |
| tree | f6f9c17ea6c1aa02dc2ca7549ef44311221ab779 /tools | |
| parent | d020f44bb364ba5456cdb834ebe68773ce9ff02b (diff) | |
| parent | 8415598365503ced2e3d019491b0a2756c85c494 (diff) | |
| download | linux-next-history-091bddd6f3606ee19980772e41e0d5c8cb34dce7.tar.gz | |
Merge branch 'main' of https://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
Diffstat (limited to 'tools')
59 files changed, 3489 insertions, 682 deletions
diff --git a/tools/net/ynl/pyynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py index 010aac0c6c67a..092d132edec12 100644 --- a/tools/net/ynl/pyynl/lib/ynl.py +++ b/tools/net/ynl/pyynl/lib/ynl.py @@ -682,6 +682,7 @@ class YnlFamily(SpecFamily): ynl.ntf_subscribe(mcast_name) -- join a multicast group ynl.ntf_listen_all_nsid() -- listen on all netns + ynl.ntf_bind(addr=(0, 0)) -- bind socket for unicast notifications ynl.check_ntf() -- drain pending notifications ynl.poll_ntf(duration=None) -- yield notifications @@ -767,6 +768,10 @@ class YnlFamily(SpecFamily): return None return None + def ntf_bind(self, addr=(0, 0)): + """Bind socket for receiving unicast notifications.""" + self.sock.bind(addr) + def set_recv_dbg(self, enabled): self._recv_dbg = enabled diff --git a/tools/net/ynl/ynltool/page-pool.c b/tools/net/ynl/ynltool/page-pool.c index 4b24492abab78..9487eda6b3aab 100644 --- a/tools/net/ynl/ynltool/page-pool.c +++ b/tools/net/ynl/ynltool/page-pool.c @@ -327,7 +327,9 @@ static void aggregate_device_stats(struct pp_stats_array *a, static int do_stats(int argc, char **argv) { + struct netdev_page_pool_stats_get_req_dump pp_stat_req = {}; struct netdev_page_pool_stats_get_list *pp_stats; + struct netdev_page_pool_get_req_dump pp_req = {}; struct netdev_page_pool_get_list *pools; enum { GROUP_BY_DEVICE, @@ -374,14 +376,14 @@ static int do_stats(int argc, char **argv) return -1; } - pools = netdev_page_pool_get_dump(ys); + pools = netdev_page_pool_get_dump(ys, &pp_req); if (!pools) { p_err("failed to get page pools: %s", ys->err.msg); ret = -1; goto exit_close; } - pp_stats = netdev_page_pool_stats_get_dump(ys); + pp_stats = netdev_page_pool_stats_get_dump(ys, &pp_stat_req); if (!pp_stats) { p_err("failed to get page pool stats: %s", ys->err.msg); ret = -1; diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore index 585ecb4d5dc42..e5314ce4bb2de 100644 --- a/tools/testing/selftests/drivers/net/.gitignore +++ b/tools/testing/selftests/drivers/net/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only napi_id_helper psp_responder +so_txtime diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index b72080c6d06b0..d5bf4cb638a8c 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -7,6 +7,7 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \ TEST_GEN_FILES := \ napi_id_helper \ + so_txtime \ # end of TEST_GEN_FILES TEST_PROGS := \ @@ -21,6 +22,7 @@ TEST_PROGS := \ queues.py \ ring_reconfig.py \ shaper.py \ + so_txtime.py \ stats.py \ xdp.py \ # end of TEST_PROGS diff --git a/tools/testing/selftests/drivers/net/config b/tools/testing/selftests/drivers/net/config index fd16994366f4b..617de8aaf5510 100644 --- a/tools/testing/selftests/drivers/net/config +++ b/tools/testing/selftests/drivers/net/config @@ -8,5 +8,9 @@ CONFIG_NETCONSOLE=m CONFIG_NETCONSOLE_DYNAMIC=y CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m +CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_FQ=m +CONFIG_PPP=y +CONFIG_PPPOE=y CONFIG_VLAN_8021Q=m CONFIG_XDP_SOCKETS=y diff --git a/tools/testing/selftests/drivers/net/gro.py b/tools/testing/selftests/drivers/net/gro.py index 221f27e571476..fd158c775b1c2 100755 --- a/tools/testing/selftests/drivers/net/gro.py +++ b/tools/testing/selftests/drivers/net/gro.py @@ -132,11 +132,21 @@ def _get_queue_stats(cfg, queue_id): return {} +def _require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + if features["ntuple-filters"]["fixed"]: + raise KsftXfailEx("Device does not support ntuple-filters") + ethtool(f"-K {cfg.ifname} ntuple-filters on") + defer(ethtool, f"-K {cfg.ifname} ntuple-filters off") + + def _setup_isolated_queue(cfg): """Set up an isolated queue for testing using ntuple filter. Remove queue 1 from the default RSS context and steer test traffic to it. """ + _require_ntuple(cfg) test_queue = 1 qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) @@ -313,6 +323,12 @@ def _gro_variants(): "ip_frag6", "ip_v6ext_same", "ip_v6ext_diff", ] + # Tests specific to PPPoE + pppoe_tests = [ + "data_same", "data_lrg_sml", "data_sml_lrg", "data_lrg_1byte", + "data_burst", "pppoe_sid", + ] + for mode in ["sw", "hw", "lro"]: for protocol in ["ipv4", "ipv6", "ipip", "ip6ip6"]: for test_name in common_tests: @@ -325,6 +341,11 @@ def _gro_variants(): for test_name in ipv6_tests: yield mode, protocol, test_name + for mode in ["sw"]: + for protocol in ["pppoev4", "pppoev6"]: + for test_name in pppoe_tests: + yield mode, protocol, test_name + @ksft_variants(_gro_variants()) def test(cfg, mode, protocol, test_name): diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 82809d5b24780..c7a1206880ea7 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -35,6 +35,7 @@ TEST_PROGS = \ irq.py \ loopback.sh \ nic_timestamp.py \ + nk_devmem.py \ nk_netns.py \ nk_qlease.py \ ntuple.py \ @@ -52,6 +53,7 @@ TEST_PROGS = \ # TEST_FILES := \ + devmem_lib.py \ ethtool_lib.sh \ # diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index ee863e90d1e0e..031cf9905f651 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -2,91 +2,40 @@ # SPDX-License-Identifier: GPL-2.0 from os import path -from lib.py import ksft_run, ksft_exit -from lib.py import ksft_eq, KsftSkipEx +from devmem_lib import setup_test, run_rx, run_tx, run_tx_chunks, run_rx_hds +from lib.py import ksft_run, ksft_exit, ksft_disruptive from lib.py import NetDrvEpEnv -from lib.py import bkg, cmd, rand_port, wait_port_listen -from lib.py import ksft_disruptive - - -def require_devmem(cfg): - if not hasattr(cfg, "_devmem_probed"): - probe_command = f"{cfg.bin_local} -f {cfg.ifname}" - cfg._devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 - cfg._devmem_probed = True - - if not cfg._devmem_supported: - raise KsftSkipEx("Test requires devmem support") @ksft_disruptive def check_rx(cfg) -> None: - require_devmem(cfg) - - port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" - listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" - - with bkg(listen_cmd, exit_wait=True) as ncdevmem: - wait_port_listen(port) - cmd(f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | \ - head -c 1K | {socat}", host=cfg.remote, shell=True) - - ksft_eq(ncdevmem.ret, 0) + """Run the devmem RX test.""" + run_rx(cfg) @ksft_disruptive def check_tx(cfg) -> None: - require_devmem(cfg) - - port = rand_port() - listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - - with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: - wait_port_listen(port, host=cfg.remote) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) - - ksft_eq(socat.stdout.strip(), "hello\nworld") + """Run the devmem TX test.""" + run_tx(cfg) @ksft_disruptive def check_tx_chunks(cfg) -> None: - require_devmem(cfg) - - port = rand_port() - listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - - with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: - wait_port_listen(port, host=cfg.remote) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) - - ksft_eq(socat.stdout.strip(), "hello\nworld") + """Run the devmem TX chunking test.""" + run_tx_chunks(cfg) def check_rx_hds(cfg) -> None: - """Test HDS splitting across payload sizes.""" - require_devmem(cfg) - - for size in [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]: - port = rand_port() - listen_cmd = f"{cfg.bin_local} -L -l -f {cfg.ifname} -s {cfg.addr} -p {port}" - - with bkg(listen_cmd, exit_wait=True) as ncdevmem: - wait_port_listen(port) - cmd(f"dd if=/dev/zero bs={size} count=1 2>/dev/null | " + - f"socat -b {size} -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},nodelay", - host=cfg.remote, shell=True) - - ksft_eq(ncdevmem.ret, 0, f"HDS failed for payload size {size}") + """Run the HDS test.""" + run_rx_hds(cfg) def main() -> None: + """Run the devmem test cases.""" with NetDrvEpEnv(__file__) as cfg: - cfg.bin_local = path.abspath(path.dirname(__file__) + "/ncdevmem") - cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) - + setup_test(cfg, path.abspath(path.dirname(__file__) + "/ncdevmem")) ksft_run([check_rx, check_tx, check_tx_chunks, check_rx_hds], - args=(cfg, )) + args=(cfg,)) ksft_exit() diff --git a/tools/testing/selftests/drivers/net/hw/devmem_lib.py b/tools/testing/selftests/drivers/net/hw/devmem_lib.py new file mode 100644 index 0000000000000..0921ff03eb81c --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/devmem_lib.py @@ -0,0 +1,222 @@ +# SPDX-License-Identifier: GPL-2.0 +"""Shared helpers for devmem TCP selftests.""" + +import re + +from lib.py import (bkg, cmd, defer, ethtool, rand_port, wait_port_listen, + ksft_eq, KsftSkipEx, NetNSEnter, EthtoolFamily, + NetdevFamily) + + +def require_devmem(cfg): + """Probe ncdevmem on cfg.ifname and SKIP the test if devmem isn't supported.""" + if not hasattr(cfg, "devmem_probed"): + probe_command = f"{cfg.bin_local} -f {cfg.ifname}" + cfg.devmem_supported = cmd(probe_command, fail=False, shell=True).ret == 0 + cfg.devmem_probed = True + + if not cfg.devmem_supported: + raise KsftSkipEx("Test requires devmem support") + + +def configure_nic(cfg): + """Channels, rings, RSS, queue lease for netkit devmem.""" + if not hasattr(cfg, 'netns'): + return + + cfg.require_ipver('6') + ethnl = EthtoolFamily() + + channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}}) + channels = channels['combined-count'] + if channels < 2: + raise KsftSkipEx( + 'Test requires NETIF with at least 2 combined channels' + ) + + rings = ethnl.rings_get({'header': {'dev-index': cfg.ifindex}}) + orig_rx_rings = rings['rx'] + orig_hds_thresh = rings.get('hds-thresh', 0) + orig_data_split = rings.get('tcp-data-split', 'unknown') + + ethnl.rings_set({'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': 'enabled', + 'hds-thresh': 0, + 'rx': min(64, orig_rx_rings)}) + defer(ethnl.rings_set, {'header': {'dev-index': cfg.ifindex}, + 'tcp-data-split': orig_data_split, + 'hds-thresh': orig_hds_thresh, + 'rx': orig_rx_rings}) + + cfg.src_queue = channels - 1 + ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") + defer(ethtool, f"-X {cfg.ifname} default") + + if not hasattr(cfg, 'nk_queue'): + with NetNSEnter(str(cfg.netns)): + netdevnl = NetdevFamily() + lease_result = netdevnl.queue_create({ + "ifindex": cfg.nk_guest_ifindex, + "type": "rx", + "lease": { + "ifindex": cfg.ifindex, + "queue": {"id": cfg.src_queue, "type": "rx"}, + "netns-id": 0, + }, + }) + cfg.nk_queue = lease_result['id'] + + +def set_flow_rule(cfg, port): + """Install a flow rule steering to src_queue and return the flow rule ID.""" + output = ethtool( + f"-N {cfg.ifname} flow-type tcp6 dst-port {port}" + f" action {cfg.src_queue}" + ).stdout + return int(re.search(r'ID (\d+)', output).group(1)) + + +def ncdevmem_rx(cfg, port, verify=True, fail_on_linear=False, flow_steer=False): + """Build the ncdevmem RX listener command.""" + if hasattr(cfg, 'netns'): + flow_rule_id = set_flow_rule(cfg, port) + defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") + + ifname = cfg.nk_guest_ifname + addr = cfg.nk_guest_ipv6 + extras = [f"-t {cfg.nk_queue}", "-q 1", "-n"] + else: + ifname = cfg.ifname + addr = cfg.addr + extras = [] + if flow_steer: + extras.append(f"-c {cfg.remote_addr}") + + if verify: + extras.append("-v 7") + if fail_on_linear: + extras.append("-L") + + parts = [cfg.bin_local, "-l", f"-f {ifname}", f"-s {addr}", + f"-p {port}", *extras] + return " ".join(parts) + + +def ncdevmem_tx(cfg, port, chunk_size=0): + """Build the ncdevmem TX send command.""" + if hasattr(cfg, 'netns'): + ifname = cfg.nk_guest_ifname + addr = cfg.remote_addr_v['6'] + extras = ["-t 0", "-q 1", "-n"] + else: + ifname = cfg.ifname + addr = cfg.remote_addr + extras = [] + + if chunk_size: + extras.append(f"-z {chunk_size}") + + parts = [cfg.bin_local, f"-f {ifname}", f"-s {addr}", + f"-p {port}", *extras] + return " ".join(parts) + + +def socat_send(cfg, port, buf_size=0): + """Socat command for sending to the devmem listener. + + When buf_size > 0, force one TCP segment per write of exactly that size by + setting socat's buffer (-b) and disabling Nagle (TCP_NODELAY). + """ + proto = f"TCP{cfg.addr_ipver}" + + if hasattr(cfg, 'netns'): + addr = f"[{cfg.nk_guest_ipv6}]" + else: + addr = cfg.baddr + + suffix = f",bind={cfg.remote_baddr}:{port}" + + buf = "" + if buf_size: + buf = f"-b {buf_size}" + suffix += ",nodelay" + + return f"socat {buf} -u - {proto}:{addr}:{port}{suffix}" + + +def socat_listen(cfg, port): + """Socat listen command for TX tests.""" + return f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" + + +def setup_test(cfg, bin_local): + """Stash the local ncdevmem path on cfg and deploy it to the remote.""" + cfg.bin_local = bin_local + cfg.bin_remote = cfg.remote.deploy(cfg.bin_local) + + +def run_rx(cfg): + """Run the devmem RX test.""" + require_devmem(cfg) + configure_nic(cfg) + port = rand_port() + socat = socat_send(cfg, port) + data_pipe = (f"yes $(echo -e \x01\x02\x03\x04\x05\x06) | head -c 1K" + f" | {socat}") + netns = getattr(cfg, "netns", None) + + listen_cmd = ncdevmem_rx(cfg, port, flow_steer=not hasattr(cfg, 'netns')) + with bkg(listen_cmd, exit_wait=True, ns=netns) as ncdevmem: + wait_port_listen(port, proto="tcp", ns=netns) + cmd(data_pipe, host=cfg.remote, shell=True) + ksft_eq(ncdevmem.ret, 0) + + +def run_tx(cfg): + """Run the devmem TX test.""" + require_devmem(cfg) + configure_nic(cfg) + netns = getattr(cfg, "netns", None) + port = rand_port() + tx_cmd = ncdevmem_tx(cfg, port) + listen_cmd = socat_listen(cfg, port) + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"bash -c 'echo -e \"hello\\nworld\" | {tx_cmd}'", ns=netns, shell=True) + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def run_tx_chunks(cfg): + """Run the devmem TX chunking test.""" + require_devmem(cfg) + configure_nic(cfg) + netns = getattr(cfg, "netns", None) + port = rand_port() + tx_cmd = ncdevmem_tx(cfg, port, chunk_size=3) + listen_cmd = socat_listen(cfg, port) + + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"bash -c 'echo -e \"hello\\nworld\" | {tx_cmd}'", ns=netns, shell=True) + ksft_eq(socat.stdout.strip(), "hello\nworld") + + +def run_rx_hds(cfg): + """Run the HDS test by running devmem RX across a segment size sweep.""" + require_devmem(cfg) + configure_nic(cfg) + netns = getattr(cfg, "netns", None) + + for size in [1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]: + port = rand_port() + + listen_cmd = ncdevmem_rx(cfg, port, verify=False, + fail_on_linear=True) + socat = socat_send(cfg, port, buf_size=size) + + with bkg(listen_cmd, exit_wait=True, ns=netns) as ncdevmem: + wait_port_listen(port, proto="tcp", ns=netns) + cmd(f"dd if=/dev/zero bs={size} count=1 2>/dev/null | " + f"{socat}", host=cfg.remote, shell=True) + ksft_eq(ncdevmem.ret, 0, f"HDS failed for payload size {size}") diff --git a/tools/testing/selftests/drivers/net/hw/gro_hw.py b/tools/testing/selftests/drivers/net/hw/gro_hw.py index 10e08b22ee0ed..70e76e3888bdb 100755 --- a/tools/testing/selftests/drivers/net/hw/gro_hw.py +++ b/tools/testing/selftests/drivers/net/hw/gro_hw.py @@ -51,11 +51,21 @@ def _resolve_dmac(cfg, ipver): return getattr(cfg, attr) +def _require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + if features["ntuple-filters"]["fixed"]: + raise KsftSkipEx("Device does not support ntuple-filters") + ethtool(f"-K {cfg.ifname} ntuple-filters on") + defer(ethtool, f"-K {cfg.ifname} ntuple-filters off") + + def _setup_isolated_queue(cfg): """Set up an isolated queue for testing using ntuple filter. Remove queue 1 from the default RSS context and steer test traffic to it. """ + _require_ntuple(cfg) test_queue = 1 qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py index e81724cb5542a..d72b76ba08356 100755 --- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py +++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py @@ -100,12 +100,22 @@ def rss(cfg): defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") +def _require_ntuple(cfg): + features = ethtool(f"-k {cfg.ifname}", json=True)[0] + if not features["ntuple-filters"]["active"]: + if features["ntuple-filters"]["fixed"]: + raise KsftSkipEx("Device does not support ntuple-filters") + ethtool(f"-K {cfg.ifname} ntuple-filters on") + defer(ethtool, f"-K {cfg.ifname} ntuple-filters off") + + @ksft_variants([ KsftNamedVariant("single", single), KsftNamedVariant("rss", rss), ]) def test_zcrx(cfg, setup) -> None: cfg.require_ipver('6') + _require_ntuple(cfg) setup(cfg) rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target}" @@ -121,6 +131,7 @@ def test_zcrx(cfg, setup) -> None: ]) def test_zcrx_oneshot(cfg, setup) -> None: cfg.require_ipver('6') + _require_ntuple(cfg) setup(cfg) rx_cmd = f"{cfg.bin_local} -s -p {cfg.port} -i {cfg.ifname} -q {cfg.target} -o 4" @@ -134,6 +145,7 @@ def test_zcrx_large_chunks(cfg) -> None: """Test zcrx with large buffer chunks.""" cfg.require_ipver('6') + _require_ntuple(cfg) hp_file = "/proc/sys/vm/nr_hugepages" with open(hp_file, 'r+', encoding='utf-8') as f: diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index e098d6534c3c8..d96e8a3b5a652 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -93,6 +93,7 @@ static char *port; static size_t do_validation; static int start_queue = -1; static int num_queues = -1; +static int skip_config; static char *ifname; static unsigned int ifindex; static unsigned int dmabuf_id; @@ -828,7 +829,7 @@ static struct netdev_queue_id *create_queues(void) static int do_server(struct memory_buffer *mem) { - struct ethtool_rings_get_rsp *ring_config; + struct ethtool_rings_get_rsp *ring_config = NULL; char ctrl_data[sizeof(int) * 20000]; size_t non_page_aligned_frags = 0; struct sockaddr_in6 client_addr; @@ -851,27 +852,29 @@ static int do_server(struct memory_buffer *mem) return -1; } - ring_config = get_ring_config(); - if (!ring_config) { - pr_err("Failed to get current ring configuration"); - return -1; - } + if (!skip_config) { + ring_config = get_ring_config(); + if (!ring_config) { + pr_err("Failed to get current ring configuration"); + return -1; + } - if (configure_headersplit(ring_config, 1)) { - pr_err("Failed to enable TCP header split"); - goto err_free_ring_config; - } + if (configure_headersplit(ring_config, 1)) { + pr_err("Failed to enable TCP header split"); + goto err_free_ring_config; + } - /* Configure RSS to divert all traffic from our devmem queues */ - if (configure_rss()) { - pr_err("Failed to configure rss"); - goto err_reset_headersplit; - } + /* Configure RSS to divert all traffic from our devmem queues */ + if (configure_rss()) { + pr_err("Failed to configure rss"); + goto err_reset_headersplit; + } - /* Flow steer our devmem flows to start_queue */ - if (configure_flow_steering(&server_sin)) { - pr_err("Failed to configure flow steering"); - goto err_reset_rss; + /* Flow steer our devmem flows to start_queue */ + if (configure_flow_steering(&server_sin)) { + pr_err("Failed to configure flow steering"); + goto err_reset_rss; + } } if (bind_rx_queue(ifindex, mem->fd, create_queues(), num_queues, &ys)) { @@ -1052,13 +1055,17 @@ err_free_tmp: err_unbind: ynl_sock_destroy(ys); err_reset_flow_steering: - reset_flow_steering(); + if (!skip_config) + reset_flow_steering(); err_reset_rss: - reset_rss(); + if (!skip_config) + reset_rss(); err_reset_headersplit: - restore_ring_config(ring_config); + if (!skip_config) + restore_ring_config(ring_config); err_free_ring_config: - ethtool_rings_get_rsp_free(ring_config); + if (!skip_config) + ethtool_rings_get_rsp_free(ring_config); return err; } @@ -1404,7 +1411,7 @@ int main(int argc, char *argv[]) int is_server = 0, opt; int ret, err = 1; - while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:")) != -1) { + while ((opt = getopt(argc, argv, "Lls:c:p:v:q:t:f:z:n")) != -1) { switch (opt) { case 'L': fail_on_linear = true; @@ -1436,6 +1443,9 @@ int main(int argc, char *argv[]) case 'z': max_chunk = atoi(optarg); break; + case 'n': + skip_config = 1; + break; case '?': fprintf(stderr, "unknown option: %c\n", optopt); break; diff --git a/tools/testing/selftests/drivers/net/hw/nk_devmem.py b/tools/testing/selftests/drivers/net/hw/nk_devmem.py new file mode 100755 index 0000000000000..300ed2a70ab44 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_devmem.py @@ -0,0 +1,46 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 +"""Test devmem TCP with netkit.""" + +import os +from devmem_lib import setup_test, run_rx, run_tx, run_tx_chunks, run_rx_hds +from lib.py import ksft_run, ksft_exit, ksft_disruptive +from lib.py import NetDrvContEnv + + +@ksft_disruptive +def check_nk_rx(cfg) -> None: + """Run the devmem RX test through netkit.""" + run_rx(cfg) + + +@ksft_disruptive +def check_nk_tx(cfg) -> None: + """Run the devmem TX test through netkit.""" + run_tx(cfg) + + +@ksft_disruptive +def check_nk_tx_chunks(cfg) -> None: + """Run the devmem TX chunking test through netkit.""" + run_tx_chunks(cfg) + + +def check_nk_rx_hds(cfg) -> None: + """Run the HDS test through netkit.""" + run_rx_hds(cfg) + + +def main() -> None: + """Run the netkit devmem test cases.""" + with NetDrvContEnv(__file__, rxqueues=2, primary_rx_redirect=True) as cfg: + setup_test(cfg, + os.path.join(os.path.dirname(os.path.abspath(__file__)), + "ncdevmem")) + ksft_run([check_nk_rx, check_nk_tx, check_nk_tx_chunks, + check_nk_rx_hds], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/drivers/net/hw/nk_primary_rx_redirect.bpf.c b/tools/testing/selftests/drivers/net/hw/nk_primary_rx_redirect.bpf.c new file mode 100644 index 0000000000000..46ff494b23ded --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/nk_primary_rx_redirect.bpf.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ipv6.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#define ctx_ptr(field) ((void *)(long)(field)) + +volatile __u32 phys_ifindex; + +SEC("tc/ingress") +int nk_primary_rx_redirect(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ethhdr *eth; + struct ipv6hdr *ip6h; + + eth = data; + if ((void *)(eth + 1) > data_end) + return TC_ACT_OK; + + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + ip6h = data + sizeof(struct ethhdr); + if ((void *)(ip6h + 1) > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr == IPPROTO_ICMPV6) + return TC_ACT_OK; + + return bpf_redirect_neigh(phys_ifindex, NULL, 0, 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/drivers/net/hw/nk_qlease.py b/tools/testing/selftests/drivers/net/hw/nk_qlease.py index aa83dc321328a..139a91ebd229b 100755 --- a/tools/testing/selftests/drivers/net/hw/nk_qlease.py +++ b/tools/testing/selftests/drivers/net/hw/nk_qlease.py @@ -71,7 +71,7 @@ def test_iou_zcrx(cfg) -> None: flow_rule_id = set_flow_rule(cfg) defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}") - rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg.nk_guest_ifname} -q {cfg.nk_queue}" tx_cmd = f"{cfg.bin_remote} -c -h {cfg.nk_guest_ipv6} -p {cfg.port} -l 12840" with bkg(rx_cmd, exit_wait=True): wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) @@ -128,7 +128,7 @@ def test_attach_xdp_with_mp(cfg) -> None: netdevnl = NetdevFamily() - rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg.nk_guest_ifname} -q {cfg.nk_queue}" with bkg(rx_cmd): wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) @@ -178,7 +178,7 @@ def test_destroy(cfg) -> None: ethtool(f"-X {cfg.ifname} equal {cfg.src_queue}") defer(ethtool, f"-X {cfg.ifname} default") - rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg._nk_guest_ifname} -q {cfg.nk_queue}" + rx_cmd = f"ip netns exec {cfg.netns.name} {cfg.bin_local} -s -p {cfg.port} -i {cfg.nk_guest_ifname} -q {cfg.nk_queue}" rx_proc = cmd(rx_cmd, background=True) wait_port_listen(cfg.port, proto="tcp", ns=cfg.netns) @@ -196,7 +196,7 @@ def test_destroy(cfg) -> None: ip(f"link del dev {cfg._nk_host_ifname}") kill_timer.join() cfg._nk_host_ifname = None - cfg._nk_guest_ifname = None + cfg.nk_guest_ifname = None queue_info = netdevnl.queue_get( {"ifindex": cfg.ifindex, "id": cfg.src_queue, "type": "rx"} diff --git a/tools/testing/selftests/drivers/net/hw/ntuple.py b/tools/testing/selftests/drivers/net/hw/ntuple.py index 232733142c026..ef4604bfa8ef4 100755 --- a/tools/testing/selftests/drivers/net/hw/ntuple.py +++ b/tools/testing/selftests/drivers/net/hw/ntuple.py @@ -22,7 +22,10 @@ class NtupleField(Enum): def _require_ntuple(cfg): features = ethtool(f"-k {cfg.ifname}", json=True)[0] if not features["ntuple-filters"]["active"]: - raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + if features["ntuple-filters"]["fixed"]: + raise KsftSkipEx("Device does not support ntuple-filters") + ethtool(f"-K {cfg.ifname} ntuple-filters on") + defer(ethtool, f"-K {cfg.ifname} ntuple-filters off") def _get_rx_cnts(cfg, prev=None): diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 51f4e7bc3e5d8..f36f76d6ca592 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -9,7 +9,7 @@ from lib.py import ksft_disruptive from lib.py import ksft_run, ksft_pr, ksft_exit from lib.py import ksft_eq, ksft_ne, ksft_ge, ksft_in, ksft_lt, ksft_true, ksft_raises from lib.py import NetDrvEpEnv -from lib.py import EthtoolFamily, NetdevFamily +from lib.py import EthtoolFamily, NetdevFamily, NlError from lib.py import KsftSkipEx, KsftFailEx from lib.py import rand_port, rand_ports from lib.py import cmd, ethtool, ip, defer, CmdExitFailure, wait_file @@ -57,9 +57,10 @@ def ethtool_create(cfg, act, opts): def require_ntuple(cfg): features = ethtool(f"-k {cfg.ifname}", json=True)[0] if not features["ntuple-filters"]["active"]: - # ntuple is more of a capability than a config knob, don't bother - # trying to enable it (until some driver actually needs it). - raise KsftSkipEx("Ntuple filters not enabled on the device: " + str(features["ntuple-filters"])) + if features["ntuple-filters"]["fixed"]: + raise KsftSkipEx("Device does not support ntuple-filters") + ethtool(f"-K {cfg.ifname} ntuple-filters on") + defer(ethtool, f"-K {cfg.ifname} ntuple-filters off") def require_context_cnt(cfg, need_cnt): @@ -828,6 +829,94 @@ def test_rss_default_context_rule(cfg): 'noise' : (0, 1) }) +def _set_flow_hash(cfg, fl_type, fields, context=0): + req = {"header": {"dev-index": cfg.ifindex}, + "flow-hash": {fl_type: fields}} + if context: + req["context"] = context + cfg.ethnl.rss_set(req) + + +def _get_flow_hash(cfg, fl_type, context=0): + req = {"header": {"dev-index": cfg.ifindex}} + if context: + req["context"] = context + rss = cfg.ethnl.rss_get(req) + return rss.get("flow-hash", {}).get(fl_type, set()) + + +def test_rss_context_flow_hash(cfg): + """ + Validate, with traffic, that an additional RSS context honors the + flow-hash field selection. If the driver lacks per-context field + configuration ("ops->rxfh_per_ctx_fields") fall back to setting the + fields on the main context, which the kernel applies device-wide. + """ + + require_ntuple(cfg) + + queue_cnt = len(_get_rx_cnts(cfg)) + if queue_cnt < 6: + try: + ksft_pr(f"Increasing queue count {queue_cnt} -> 6") + ethtool(f"-L {cfg.ifname} combined 6") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") + except CmdExitFailure as exc: + raise KsftSkipEx("Not enough queues for the test") from exc + + fl_type = f"tcp{cfg.addr_ipver}" + if not _get_flow_hash(cfg, fl_type): + raise KsftSkipEx(f"Device does not report flow-hash for {fl_type}") + + # Reserve queues 0/1 for main, build a new context spanning 2..5 + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") + ctx_id = ethtool_create(cfg, "-X", "context new start 2 equal 4") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + + port = rand_port() + flow = f"flow-type {fl_type} dst-ip {cfg.addr} dst-port {port} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + ip_only = {"ip-src", "ip-dst"} + ip_l4 = ip_only | {"l4-b-0-1", "l4-b-2-3"} + + # Try per-context flow-hash; fall back to main context if unsupported. + cfg_ctx = ctx_id + try: + orig = _get_flow_hash(cfg, fl_type, context=ctx_id) + _set_flow_hash(cfg, fl_type, ip_only, context=ctx_id) + except NlError: + ksft_pr("Per-context flow-hash not supported, using device-wide") + cfg_ctx = 0 + orig = _get_flow_hash(cfg, fl_type) + _set_flow_hash(cfg, fl_type, ip_only) + defer(_set_flow_hash, cfg, fl_type, orig, context=cfg_ctx) + + def measure(): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + ctx_cnts = cnts[2:6] + directed = sum(ctx_cnts) + used = sum(1 for c in ctx_cnts if c > directed / 200) + return cnts, directed, used + + # IP-only hash: iperf3 streams share src/dst IP, all should land on the + # same queue inside the context's range. + cnts, directed, used = measure() + ksft_ge(directed, 20000, f"traffic on context {ctx_id} (IP-only): {cnts}") + ksft_eq(used, 1, f"IP-only hash should use one queue in context {ctx_id}, got: {cnts}") + + # IP+L4 hash: streams have distinct src ports, traffic should spread. + _set_flow_hash(cfg, fl_type, ip_l4, context=cfg_ctx) + + cnts, directed, used = measure() + ksft_ge(directed, 20000, f"traffic on context {ctx_id} (IP+L4): {cnts}") + ksft_ge(used, 2, f"IP+L4 hash should spread across context {ctx_id} queues, got: {cnts}") + + @ksft_disruptive def test_rss_context_persist_ifupdown(cfg, pre_down=False): """ @@ -935,6 +1024,7 @@ def main() -> None: test_flow_add_context_missing, test_delete_rss_context_busy, test_rss_ntuple_addition, test_rss_default_context_rule, + test_rss_context_flow_hash, test_rss_context_persist_create_and_ifdown, test_rss_context_persist_ifdown_and_create], args=(cfg, )) diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 2b5ec0505672c..09aac4ce67bcf 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -23,7 +23,8 @@ try: NlError, RtnlFamily, DevlinkFamily, PSPFamily, Netlink from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, rand_ports, wait_port_listen, wait_file + fd_read_timeout, ip, rand_port, rand_ports, tc, wait_port_listen, \ + wait_file from net.lib.py import bpf_map_set, bpf_map_dump, bpf_prog_map_ids from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ @@ -36,7 +37,7 @@ try: "NlError", "RtnlFamily", "DevlinkFamily", "PSPFamily", "Netlink", "CmdExitFailure", "bkg", "cmd", "bpftool", "bpftrace", "defer", "ethtool", - "fd_read_timeout", "ip", "rand_port", "rand_ports", + "fd_read_timeout", "ip", "rand_port", "rand_ports", "tc", "wait_port_listen", "wait_file", "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", "KsftSkipEx", "KsftFailEx", "KsftXfailEx", diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 24ce122abd9cb..ef317aef3a0ad 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -2,6 +2,7 @@ import ipaddress import os +import sys import time import json from pathlib import Path @@ -336,15 +337,18 @@ class NetDrvContEnv(NetDrvEpEnv): +---------------+ """ - def __init__(self, src_path, rxqueues=1, **kwargs): + def __init__(self, src_path, rxqueues=1, primary_rx_redirect=False, **kwargs): self.netns = None self._nk_host_ifname = None - self._nk_guest_ifname = None + self.nk_guest_ifname = None self._tc_clsact_added = False self._tc_attached = False + self._primary_rx_redirect_attached = False + self._primary_rx_redirect_clsact_added = False self._bpf_prog_pref = None self._bpf_prog_id = None self._init_ns_attached = False + self._remote_route_added = False self._old_fwd = None self._old_accept_ra = None @@ -390,14 +394,24 @@ class NetDrvContEnv(NetDrvEpEnv): netkit_links.sort(key=lambda x: x['ifindex']) self._nk_host_ifname = netkit_links[1]['ifname'] - self._nk_guest_ifname = netkit_links[0]['ifname'] + self.nk_guest_ifname = netkit_links[0]['ifname'] self.nk_host_ifindex = netkit_links[1]['ifindex'] self.nk_guest_ifindex = netkit_links[0]['ifindex'] self._setup_ns() self._attach_bpf() + if primary_rx_redirect: + self._attach_primary_rx_redirect_bpf() def __del__(self): + if self._primary_rx_redirect_attached: + cmd(f"tc filter del dev {self._nk_host_ifname} ingress", fail=False) + self._primary_rx_redirect_attached = False + + if self._primary_rx_redirect_clsact_added: + cmd(f"tc qdisc del dev {self._nk_host_ifname} clsact", fail=False) + self._primary_rx_redirect_clsact_added = False + if self._tc_attached: cmd(f"tc filter del dev {self.ifname} ingress pref {self._bpf_prog_pref}") self._tc_attached = False @@ -406,10 +420,15 @@ class NetDrvContEnv(NetDrvEpEnv): cmd(f"tc qdisc del dev {self.ifname} clsact") self._tc_clsact_added = False + if self._remote_route_added: + cmd(f"ip -6 route del {self.nk_guest_ipv6}/128", + host=self.remote, fail=False) + self._remote_route_added = False + if self._nk_host_ifname: cmd(f"ip link del dev {self._nk_host_ifname}") self._nk_host_ifname = None - self._nk_guest_ifname = None + self.nk_guest_ifname = None if self._init_ns_attached: cmd("ip netns del init", fail=False) @@ -448,24 +467,30 @@ class NetDrvContEnv(NetDrvEpEnv): cmd("ip netns attach init 1") self._init_ns_attached = True ip("netns set init 0", ns=self.netns) - ip(f"link set dev {self._nk_guest_ifname} netns {self.netns.name}") + ip(f"link set dev {self.nk_guest_ifname} netns {self.netns.name}") ip(f"link set dev {self._nk_host_ifname} up") ip(f"-6 addr add fe80::1/64 dev {self._nk_host_ifname} nodad") ip(f"-6 route add {self.nk_guest_ipv6}/128 via fe80::2 dev {self._nk_host_ifname}") ip("link set lo up", ns=self.netns) - ip(f"link set dev {self._nk_guest_ifname} up", ns=self.netns) - ip(f"-6 addr add fe80::2/64 dev {self._nk_guest_ifname}", ns=self.netns) - ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self._nk_guest_ifname} nodad", ns=self.netns) - ip(f"-6 route add default via fe80::1 dev {self._nk_guest_ifname}", ns=self.netns) + ip(f"link set dev {self.nk_guest_ifname} up", ns=self.netns) + ip(f"-6 addr add fe80::2/64 dev {self.nk_guest_ifname}", ns=self.netns) + ip(f"-6 addr add {self.nk_guest_ipv6}/64 dev {self.nk_guest_ifname} nodad", ns=self.netns) + ip(f"-6 route add default via fe80::1 dev {self.nk_guest_ifname}", ns=self.netns) - def _tc_ensure_clsact(self): - qdisc = json.loads(cmd(f"tc -j qdisc show dev {self.ifname}").stdout) + def _tc_ensure_clsact(self, ifname=None): + """Ensure a clsact qdisc exists on @ifname. + + Returns True if this call added the qdisc, otherwise returns False. + """ + if ifname is None: + ifname = self.ifname + qdisc = json.loads(cmd(f"tc -j qdisc show dev {ifname}").stdout) for q in qdisc: if q['kind'] == 'clsact': - return - cmd(f"tc qdisc add dev {self.ifname} clsact") - self._tc_clsact_added = True + return False + cmd(f"tc qdisc add dev {ifname} clsact") + return True def _get_bpf_prog_ids(self): filters = json.loads(cmd(f"tc -j filter show dev {self.ifname} ingress").stdout) @@ -476,28 +501,28 @@ class NetDrvContEnv(NetDrvEpEnv): return (bpf['pref'], bpf['options']['prog']['id']) raise Exception("Failed to get BPF prog ID") + def _find_bss_map_id(self, prog_id): + """Find the .bss map ID for a loaded BPF program.""" + prog_info = bpftool(f"prog show id {prog_id}", json=True) + for map_id in prog_info.get("map_ids", []): + map_info = bpftool(f"map show id {map_id}", json=True) + if map_info.get("name", "").endswith("bss"): + return map_id + raise Exception(f"Failed to find .bss map for prog {prog_id}") + def _attach_bpf(self): bpf_obj = self.test_dir / "nk_forward.bpf.o" if not bpf_obj.exists(): raise KsftSkipEx("BPF prog not found") - self._tc_ensure_clsact() + if self._tc_ensure_clsact(): + self._tc_clsact_added = True cmd(f"tc filter add dev {self.ifname} ingress bpf obj {bpf_obj}" " sec tc/ingress direct-action") self._tc_attached = True (self._bpf_prog_pref, self._bpf_prog_id) = self._get_bpf_prog_ids() - prog_info = bpftool(f"prog show id {self._bpf_prog_id}", json=True) - map_ids = prog_info.get("map_ids", []) - - bss_map_id = None - for map_id in map_ids: - map_info = bpftool(f"map show id {map_id}", json=True) - if map_info.get("name").endswith("bss"): - bss_map_id = map_id - - if bss_map_id is None: - raise Exception("Failed to find .bss map") + bss_map_id = self._find_bss_map_id(self._bpf_prog_id) ipv6_addr = ipaddress.IPv6Address(self.ipv6_prefix) ipv6_bytes = ipv6_addr.packed @@ -505,3 +530,36 @@ class NetDrvContEnv(NetDrvEpEnv): value = ipv6_bytes + ifindex_bytes value_hex = ' '.join(f'{b:02x}' for b in value) bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}") + + def _attach_primary_rx_redirect_bpf(self): + """Attach BPF redirect program on the primary netkit ingress.""" + bpf_obj = self.test_dir / "nk_primary_rx_redirect.bpf.o" + if not bpf_obj.exists(): + raise KsftSkipEx("Primary RX redirect BPF prog not found") + + if self._tc_ensure_clsact(self._nk_host_ifname): + self._primary_rx_redirect_clsact_added = True + cmd(f"tc filter add dev {self._nk_host_ifname} ingress" + f" bpf obj {bpf_obj} sec tc/ingress direct-action") + self._primary_rx_redirect_attached = True + + ip(f"-6 route add {self.nk_guest_ipv6}/128 via {self.addr_v['6']}", + host=self.remote) + self._remote_route_added = True + + filters = json.loads( + cmd(f"tc -j filter show dev {self._nk_host_ifname} ingress").stdout) + redirect_prog_id = None + for bpf in filters: + if 'options' not in bpf: + continue + if bpf['options']['bpf_name'].startswith('nk_primary_rx_redirect'): + redirect_prog_id = bpf['options']['prog']['id'] + break + if redirect_prog_id is None: + raise Exception("Failed to get primary RX redirect BPF prog ID") + + bss_map_id = self._find_bss_map_id(redirect_prog_id) + phys_ifindex_bytes = self.ifindex.to_bytes(4, byteorder=sys.byteorder) + value_hex = ' '.join(f'{b:02x}' for b in phys_ifindex_bytes) + bpftool(f"map update id {bss_map_id} key hex 00 00 00 00 value hex {value_hex}") diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/drivers/net/so_txtime.c index b76df1efc2efc..75f3beef13d9e 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/drivers/net/so_txtime.c @@ -33,9 +33,12 @@ #include <unistd.h> #include <poll.h> +#include "kselftest.h" + static int cfg_clockid = CLOCK_TAI; static uint16_t cfg_port = 8000; static int cfg_variance_us = 4000; +static bool cfg_machine_slow; static uint64_t cfg_start_time_ns; static int cfg_mark; static bool cfg_rx; @@ -43,6 +46,8 @@ static bool cfg_rx; static uint64_t glob_tstart; static uint64_t tdeliver_max; +static int errors; + /* encode one timed transmission (of a 1B payload) */ struct timed_send { char data; @@ -131,13 +136,15 @@ static void do_recv_one(int fdr, struct timed_send *ts) fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n", rbuf[0], (long long)tstop, (long long)texpect); - if (rbuf[0] != ts->data) - error(1, 0, "payload mismatch. expected %c", ts->data); + if (rbuf[0] != ts->data) { + fprintf(stderr, "payload mismatch. expected %c\n", ts->data); + errors++; + } if (llabs(tstop - texpect) > cfg_variance_us) { fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us); - if (!getenv("KSFT_MACHINE_SLOW")) - exit(1); + if (!cfg_machine_slow) + errors++; } } @@ -255,8 +262,12 @@ static void start_time_wait(void) return; now = gettime_ns(CLOCK_REALTIME); - if (cfg_start_time_ns < now) + if (cfg_start_time_ns < now) { + fprintf(stderr, "FAIL: start time already passed\n"); + if (!cfg_machine_slow) + errors++; return; + } err = usleep((cfg_start_time_ns - now) / 1000); if (err) @@ -316,6 +327,9 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen) if (bind(fd, addr, alen)) error(1, errno, "bind"); + if (cfg_machine_slow) + tv.tv_sec = 2; + if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) error(1, errno, "setsockopt rcv timeout"); @@ -502,6 +516,8 @@ static void parse_opts(int argc, char **argv) setup_sockaddr(domain, saddr, &cfg_src_addr); cfg_num_pkt = parse_io(argv[optind], cfg_buf); + + cfg_machine_slow = getenv("KSFT_MACHINE_SLOW"); } int main(int argc, char **argv) @@ -513,5 +529,10 @@ int main(int argc, char **argv) else do_test_tx((void *)&cfg_src_addr, cfg_alen); - return 0; + if (errors) { + fprintf(stderr, "FAIL: %d errors\n", errors); + return KSFT_FAIL; + } + + return KSFT_PASS; } diff --git a/tools/testing/selftests/drivers/net/so_txtime.py b/tools/testing/selftests/drivers/net/so_txtime.py new file mode 100755 index 0000000000000..5d4388bfc6dd9 --- /dev/null +++ b/tools/testing/selftests/drivers/net/so_txtime.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +"""Regression tests for the SO_TXTIME interface. + +Test delivery time in FQ and ETF qdiscs. +""" + +import os +import time + +from lib.py import ksft_exit, ksft_run, ksft_variants +from lib.py import KsftNamedVariant, KsftSkipEx +from lib.py import NetDrvEpEnv, bkg, cmd, defer, tc + + +def test_so_txtime(cfg, clockid, ipver, args_tx, args_rx, expect_success): + """Main function. Run so_txtime as sender and receiver.""" + slow_machine = os.environ.get('KSFT_MACHINE_SLOW') + + bin_path = cfg.test_dir / "so_txtime" + + tstart = time.time_ns() + (2000_000_000 if slow_machine else 200_000_000) + + cmd_addr = f"-S {cfg.addr_v[ipver]} -D {cfg.remote_addr_v[ipver]}" + cmd_base = f"{bin_path} -{ipver} -c {clockid} -t {tstart} {cmd_addr}" + cmd_rx = f"{cmd_base} {args_rx} -r" + cmd_tx = f"{cmd_base} {args_tx}" + + expect_fail = not expect_success + if slow_machine: + expect_success = False + + with bkg(cmd_rx, host=cfg.remote, fail=expect_success, + expect_fail=expect_fail, exit_wait=True): + cmd(cmd_tx) + + +def _qdisc_setup(ifname, qdisc, optargs=""): + """Replace root qdisc. Restore the original after the test. + + If the original is mq, children will be of type default_qdisc. + """ + orig = tc(f"qdisc show dev {ifname} root", json=True)[0].get("kind", None) + defer(tc, f"qdisc replace dev {ifname} root {orig}") + tc(f"qdisc replace dev {ifname} root {qdisc} {optargs}") + + +def _test_variants_mono(): + for ipver in ["4", "6"]: + for testcase in [ + ["no_delay", "a,-1", "a,-1"], + ["zero_delay", "a,0", "a,0"], + ["one_pkt", "a,10", "a,10"], + ["in_order", "a,10,b,20", "a,10,b,20"], + ["reverse_order", "a,20,b,10", "b,10,a,20"], + ]: + name = f"v{ipver}_{testcase[0]}" + yield KsftNamedVariant(name, ipver, testcase[1], testcase[2]) + + +@ksft_variants(_test_variants_mono()) +def test_so_txtime_mono(cfg, ipver, args_tx, args_rx): + """Run all variants of monotonic (fq) tests.""" + _qdisc_setup(cfg.ifname, "fq") + test_so_txtime(cfg, "mono", ipver, args_tx, args_rx, True) + + +def _test_variants_etf(): + for ipver in ["4", "6"]: + for testcase in [ + ["no_delay", "a,-1", "a,-1", False], + ["zero_delay", "a,0", "a,0", False], + ["one_pkt", "a,10", "a,10", True], + ["in_order", "a,10,b,20", "a,10,b,20", True], + ["reverse_order", "a,20,b,10", "b,10,a,20", True], + ]: + name = f"v{ipver}_{testcase[0]}" + yield KsftNamedVariant( + name, ipver, testcase[1], testcase[2], testcase[3] + ) + + +@ksft_variants(_test_variants_etf()) +def test_so_txtime_etf(cfg, ipver, args_tx, args_rx, expect_fail): + """Run all variants of etf tests.""" + try: + _qdisc_setup(cfg.ifname, "etf", "clockid CLOCK_TAI delta 400000") + except Exception as e: + raise KsftSkipEx("tc does not support qdisc etf. skipping") from e + + test_so_txtime(cfg, "tai", ipver, args_tx, args_rx, expect_fail) + + +def main() -> None: + """Boilerplate ksft main.""" + with NetDrvEpEnv(__file__) as cfg: + ksft_run([test_so_txtime_mono, test_so_txtime_etf], args=(cfg,)) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 97ad4d551d44d..02ad4c99a2b49 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -40,7 +40,6 @@ skf_net_off socket so_incoming_cpu so_netns_cookie -so_txtime so_rcv_listener stress_reuseport_listen tap diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index f3da38c54d276..5ca6c557fc3fa 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -69,6 +69,7 @@ TEST_PROGS := \ nl_netdev.py \ nl_nlctrl.py \ pmtu.sh \ + protodown.sh \ psock_snd.sh \ reuseaddr_ports_exhausted.sh \ reuseport_addr_any.sh \ @@ -83,7 +84,6 @@ TEST_PROGS := \ rxtimestamp.sh \ sctp_vrf.sh \ skf_net_off.sh \ - so_txtime.sh \ srv6_end_dt46_l3vpn_test.sh \ srv6_end_dt4_l3vpn_test.sh \ srv6_end_dt6_l3vpn_test.sh \ @@ -158,7 +158,6 @@ TEST_GEN_FILES := \ skf_net_off \ so_netns_cookie \ so_rcv_listener \ - so_txtime \ socket \ stress_reuseport_listen \ tcp_fastopen_backup_key \ @@ -177,6 +176,7 @@ TEST_GEN_PROGS := \ bind_timewait \ bind_wildcard \ epoll_busy_poll \ + getsockopt_iter \ icmp_rfc4884 \ ipv6_fragmentation \ proc_net_pktgen \ diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 94d722770420d..d07c5ac5cab7b 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -117,6 +117,7 @@ CONFIG_OPENVSWITCH=m CONFIG_OPENVSWITCH_GENEVE=m CONFIG_OPENVSWITCH_GRE=m CONFIG_OPENVSWITCH_VXLAN=m +CONFIG_PAGE_POOL_STATS=y CONFIG_PROC_SYSCTL=y CONFIG_PSAMPLE=m CONFIG_RPS=y diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index af64f93bb2e1f..8f10de0eb9853 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -12,7 +12,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance \ - ipv4_mpath_balance_preferred fib6_ra_to_static" + ipv4_mpath_balance_preferred fib6_ra_to_static fib6_temp_addr_renewal" VERBOSE=0 PAUSE_ON_FAIL=no @@ -1611,6 +1611,62 @@ fib6_ra_to_static() cleanup &> /dev/null } +fib6_temp_addr_renewal() { + setup + + echo + echo "Fib6 temporary address renewal test" + set -e + + # ra6 is required for the test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return + fi + + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.use_tempaddr=2 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.temp_prefered_lft=15 + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.max_desync_factor=0 + + # Send a RA message with a prefix from veth2. + $NS_EXEC ra6 -i veth2 -s fe80::1 -d ff02::1 -P 2001:12::/64\#LA\#3600\#3600 -e + sleep 3 + + # Deprecate it + $NS_EXEC ra6 -i veth2 -s fe80::1 -d ff02::1 -P 2001:12::/64\#LA\#3600\#0 -e + sleep 3 + + # Restore it + $NS_EXEC ra6 -i veth2 -s fe80::1 -d ff02::1 -P 2001:12::/64\#LA\#3600\#3600 -e + + ret=1 + for i in $(seq 1 25); do + sleep 1 + num_dep="$($IP -6 addr | grep -c "temporary deprecated" || true)" + num_tot="$($IP -6 addr | grep -c "temporary" || true)" + + if [ "$num_dep" -eq 1 ] && [ "$num_tot" -ge 2 ]; then + ret=0 + break + fi + done + log_test "$ret" 0 "IPv6 temporary address cleanly deprecated and regenerated" + + set +e + + cleanup &> /dev/null +} + # add route for a prefix, flushing any existing routes first # expected to be the first step of a test add_route() @@ -3002,6 +3058,7 @@ do ipv6_mpath_balance) ipv6_mpath_balance_test;; ipv4_mpath_balance_preferred) ipv4_mpath_balance_preferred_test;; fib6_ra_to_static) fib6_ra_to_static;; + fib6_temp_addr_renewal) fib6_temp_addr_renewal;; help) echo "Test names: $TESTS"; exit 0;; esac diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh index ebdb4c790a5d7..c4cd2078a8db7 100755 --- a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh @@ -162,14 +162,27 @@ vlmc_query_cnt_setup() { local type=$1 local dev=$2 + local match=($3) if [[ $type == "igmp" ]]; then - tc filter add dev $dev egress pref 10 prot 802.1Q \ + # This matches: IP Protocol 2 (IGMP) + tc filter add dev "$dev" egress pref 10 prot 802.1Q \ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \ + action goto chain 1 + # AND Type 0x11 (Query) at offset 0 of IGMP header + # 20 bytes IPv4 header + 4 bytes Router Alert option + IGMP[offset 0] + tc filter add dev "$dev" egress pref 20 chain 1 prot 802.1Q u32 \ + match u8 0x11 0xff at 24 "${match[@]}" \ action pass else - tc filter add dev $dev egress pref 10 prot 802.1Q \ + # This matches: ICMPv6 + tc filter add dev "$dev" egress pref 10 prot 802.1Q \ flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \ + action goto chain 1 + # AND Type 0x82 (Query) at offset 0 of MLD header + # 40 bytes IPv6 header + 8 bytes Hop-by-hop option + MLD[offset 0] + tc filter add dev "$dev" egress pref 20 chain 1 prot 802.1Q u32 \ + match u8 0x82 0xff at 48 "${match[@]}" \ action pass fi @@ -181,7 +194,39 @@ vlmc_query_cnt_cleanup() local dev=$1 ip link set dev br0 type bridge mcast_stats_enabled 0 - tc filter del dev $dev egress pref 10 + tc filter del dev "$dev" egress pref 20 chain 1 + tc filter del dev "$dev" egress pref 10 +} + +vlmc_query_get_intvl_match() +{ + local type=$1 + local version=$2 + local test=$3 + local enc_val=$4 + + if [ "$test" = "qqic" ]; then + # QQIC is 8-bit floating point encoding for IGMPv3 and MLDv2 + if [ "${type}v${version}" = "igmpv3" ]; then + # QQIC is at offset 9 of IGMP header + # 20 bytes IPv4 header + 4 bytes Router Alert option + IGMP[offset 9] + echo "match u8 $enc_val 0xff at 33" + elif [ "${type}v${version}" = "mldv2" ]; then + # QQIC is at offset 25 of MLD header + # 40 bytes IPv6 header + 8 bytes Hop-by-hop option + MLD[offset 25] + echo "match u8 $enc_val 0xff at 73" + fi + elif [ "$test" = "mrc" ]; then + if [ "${type}v${version}" = "igmpv3" ]; then + # MRC is 8-bit floating point encoding at offset 1 of IGMP header + # 20 bytes IPv4 header + 4 bytes Router Alert option + IGMP[offset 1] + echo "match u8 $enc_val 0xff at 25" + elif [ "${type}v${version}" = "mldv2" ]; then + # MRC is 16-bit floating point encoding at offset 4 of MLD header + # 40 bytes IPv6 header + 8 bytes Hop-by-hop option + MLD[offset 4] + echo "match u16 $enc_val 0xffff at 52" + fi + fi } vlmc_check_query() @@ -191,9 +236,13 @@ vlmc_check_query() local dev=$3 local expect=$4 local time=$5 + local test=$6 + local enc_val=$7 + local intvl_match="" local ret=0 - vlmc_query_cnt_setup $type $dev + intvl_match="$(vlmc_query_get_intvl_match "$type" "$version" "$test" "$enc_val")" + vlmc_query_cnt_setup "$type" "$dev" "$intvl_match" local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 @@ -201,7 +250,7 @@ vlmc_check_query() if [[ $ret -eq 0 ]]; then sleep $time - local tcstats=$(tc_rule_stats_get $dev 10 egress) + local tcstats=$(tc_rule_stats_get "$dev" 20 egress) local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev) if [[ $tcstats != $expect || \ @@ -448,8 +497,46 @@ vlmc_query_intvl_test() # 1 is sent immediately, then 2 more in the next 5 seconds vlmc_check_query igmp 2 $swp1 3 5 check_err $? "Wrong number of tagged IGMPv2 general queries sent" - log_test "Vlan 10 mcast_query_interval option changed to 200" + log_test "Number of tagged IGMPv2 general query" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3 + check_err $? "Could not set mcast_igmp_version in vlan 10" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2 + check_err $? "Could not set mcast_mld_version in vlan 10" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 6000 + check_err $? "Could not set mcast_query_interval in vlan 10" + # 1 is sent immediately, IGMPv3 QQIC should match with linear value 60 (0x3c) + # which is 8-bit encoded value of 60 [units of seconds] + vlmc_check_query igmp 3 $swp1 1 1 qqic 0x3c + check_err $? "Wrong QQIC in generated IGMPv3 general queries" + log_test "IGMPv3 QQIC linear value 60(s)" + + RET=0 + # 1 is sent immediately, MLDv2 QQIC should match with linear value 60 (0x3c) + # which is 8-bit encoded value of 60 [units of seconds] + vlmc_check_query mld 2 $swp1 1 1 qqic 0x3c + check_err $? "Wrong QQIC in generated MLDv2 general queries" + log_test "MLDv2 QQIC linear value 60(s)" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 16000 + check_err $? "Could not set mcast_query_interval in vlan 10" + # 1 is sent immediately, IGMPv3 QQIC should match with non linear value 132 (0x84) + # which is 8-bit encoded value of 160 [units of seconds] + vlmc_check_query igmp 3 $swp1 1 1 qqic 0x84 + check_err $? "Wrong QQIC in generated IGMPv3 general queries" + log_test "IGMPv3 QQIC non linear value 160(s)" + RET=0 + # 1 is sent immediately, MLDv2 QQIC should match with non linear value 132 (0x84) + # which is 8-bit encoded value of 160 [units of seconds] + vlmc_check_query mld 2 $swp1 1 1 qqic 0x84 + check_err $? "Wrong QQIC in generated MLDv2 general queries" + log_test "MLDv2 QQIC non linear value 160(s)" + + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1 bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500 } @@ -469,10 +556,47 @@ vlmc_query_response_intvl_test() log_test "Vlan mcast_query_response_interval global option default value" RET=0 - bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3 + check_err $? "Could not set mcast_igmp_version in vlan 10" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2 + check_err $? "Could not set mcast_mld_version in vlan 10" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 600 + check_err $? "Could not set mcast_query_response_interval in vlan 10" + # 1 is sent immediately, IGMPv3 MRC should match with linear value 60 (0x3c) + # which is 8-bit encoded value of 60 [units of 0.1s = 6 seconds] + vlmc_check_query igmp 3 $swp1 1 1 mrc 0x3c + check_err $? "Wrong MRC in generated IGMPv3 general queries" + log_test "IGMPv3 MRC linear value of 60(x0.1s)" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 2400 + check_err $? "Could not set mcast_query_response_interval in vlan 10" + # 1 is sent immediately, MLDv2 MRC should match with linear value 0x5dc0 (24000) + # which is 16-bit encoded value of 24000 [units of ms / 24 seconds] + vlmc_check_query mld 2 $swp1 1 1 mrc 0x5dc0 + check_err $? "Wrong MRC in generated MLDv2 general queries" + log_test "MLDv2 MRC linear value of 24000(ms)" + + RET=0 + # 1 is sent immediately, IGMPv3 MRC should match with non linear value 142 (0x8e) + # which is 8-bit encoded value of 240 [units of 0.1s = 24 seconds] + vlmc_check_query igmp 3 $swp1 1 1 mrc 0x8e + check_err $? "Wrong MRC in generated IGMPv3 general queries" + log_test "IGMPv3 MRC non linear value of 240(x0.1s)" + + RET=0 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 4800 check_err $? "Could not set mcast_query_response_interval in vlan 10" - log_test "Vlan 10 mcast_query_response_interval option changed to 200" + # 1 is sent immediately, MLDv2 MRC should match with non linear value 0x8770 (34672) + # which is 16-bit encoded value of 48000 [units of ms / 48 seconds] + vlmc_check_query mld 2 $swp1 1 1 mrc 0x8770 + check_err $? "Wrong MRC in generated MLDv2 general queries" + log_test "MLDv2 MRC non linear value of 48000(ms)" + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1 + bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2 bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000 } diff --git a/tools/testing/selftests/net/getsockopt_iter.c b/tools/testing/selftests/net/getsockopt_iter.c new file mode 100644 index 0000000000000..209569354d0e3 --- /dev/null +++ b/tools/testing/selftests/net/getsockopt_iter.c @@ -0,0 +1,300 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Quick test for getsockopt{_iter} tests. + * + * Each fixture targets one converted protocol and pins down the + * returned-length / errno semantics across buffer-size variations, + * an unknown optname and a bogus level. + * + * - netlink: NETLINK_PKTINFO covers the flag-style int path; the + * NETLINK_LIST_MEMBERSHIPS cases cover the size-discovery path + * that always reports the required buffer length back via optlen, + * even when the user buffer is too small to receive any group bits. + * - vsock: SO_VM_SOCKETS_BUFFER_SIZE covers the u64 path. + * + * Author: Breno Leitao <leitao@debian.org> + */ + +#include <errno.h> +#include <stdint.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <linux/time_types.h> +#include <linux/vm_sockets.h> +#include <sys/socket.h> +#include "kselftest_harness.h" + +#ifndef AF_VSOCK +#define AF_VSOCK 40 +#endif + +/* ---------- netlink ---------- */ + +FIXTURE(netlink) +{ + int fd; +}; + +FIXTURE_SETUP(netlink) +{ + int group = RTNLGRP_LINK; + + self->fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (self->fd < 0) + SKIP(return, "AF_NETLINK socket: %s", strerror(errno)); + + /* Joining a multicast group grows nlk->ngroups so the + * NETLINK_LIST_MEMBERSHIPS path has a non-zero size to report. + */ + if (setsockopt(self->fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, + &group, sizeof(group)) < 0) + SKIP(return, "NETLINK_ADD_MEMBERSHIP: %s", strerror(errno)); +} + +FIXTURE_TEARDOWN(netlink) +{ + if (self->fd >= 0) + close(self->fd); +} + +TEST_F(netlink, pktinfo_exact) +{ + socklen_t optlen; + int val = -1; + + optlen = sizeof(val); + + ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO, + &val, &optlen)); + ASSERT_EQ(sizeof(int), optlen); + ASSERT_TRUE(val == 0 || val == 1); +} + +TEST_F(netlink, pktinfo_oversize_clamped) +{ + char buf[16] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO, + buf, &optlen)); + ASSERT_EQ(sizeof(int), optlen); +} + +TEST_F(netlink, pktinfo_undersize) +{ + char buf[2] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(-1, getsockopt(self->fd, SOL_NETLINK, NETLINK_PKTINFO, + buf, &optlen)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(sizeof(buf), optlen); +} + +TEST_F(netlink, list_memberships_size_discovery) +{ + socklen_t optlen = 0; + char dummy; + + ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, + NETLINK_LIST_MEMBERSHIPS, + &dummy, &optlen)); + ASSERT_GT(optlen, 0); + ASSERT_EQ(0, optlen % sizeof(__u32)); +} + +TEST_F(netlink, list_memberships_full_read) +{ + __u32 buf[64] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(0, getsockopt(self->fd, SOL_NETLINK, + NETLINK_LIST_MEMBERSHIPS, + buf, &optlen)); + ASSERT_GT(optlen, 0); + ASSERT_LE(optlen, sizeof(buf)); + ASSERT_EQ(0, optlen % sizeof(__u32)); +} + +TEST_F(netlink, bad_level) +{ + socklen_t optlen; + int val; + + optlen = sizeof(val); + + ASSERT_EQ(-1, getsockopt(self->fd, SOL_SOCKET + 1, NETLINK_PKTINFO, + &val, &optlen)); + ASSERT_EQ(ENOPROTOOPT, errno); + ASSERT_EQ(sizeof(val), optlen); +} + +TEST_F(netlink, bad_optname) +{ + socklen_t optlen; + int val; + + optlen = sizeof(val); + + ASSERT_EQ(-1, getsockopt(self->fd, SOL_NETLINK, 0x7fff, + &val, &optlen)); + ASSERT_EQ(ENOPROTOOPT, errno); + ASSERT_EQ(sizeof(val), optlen); +} + +/* ---------- vsock ---------- */ + +FIXTURE(vsock) +{ + int fd; +}; + +FIXTURE_SETUP(vsock) +{ + self->fd = socket(AF_VSOCK, SOCK_STREAM, 0); + if (self->fd < 0) + SKIP(return, "AF_VSOCK socket: %s", strerror(errno)); +} + +FIXTURE_TEARDOWN(vsock) +{ + if (self->fd >= 0) + close(self->fd); +} + +TEST_F(vsock, buffer_size_exact) +{ + socklen_t optlen; + uint64_t val = 0; + + optlen = sizeof(val); + + ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_BUFFER_SIZE, + &val, &optlen)); + ASSERT_EQ(sizeof(uint64_t), optlen); + ASSERT_GT(val, 0); +} + +TEST_F(vsock, buffer_size_oversize_clamped) +{ + char buf[16] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_BUFFER_SIZE, + buf, &optlen)); + ASSERT_EQ(sizeof(uint64_t), optlen); +} + +TEST_F(vsock, buffer_size_undersize) +{ + char buf[4] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_BUFFER_SIZE, + buf, &optlen)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(sizeof(buf), optlen); +} + +TEST_F(vsock, bad_level) +{ + socklen_t optlen; + uint64_t val; + + optlen = sizeof(val); + + ASSERT_EQ(-1, getsockopt(self->fd, SOL_SOCKET + 1, + SO_VM_SOCKETS_BUFFER_SIZE, + &val, &optlen)); + ASSERT_EQ(ENOPROTOOPT, errno); + ASSERT_EQ(sizeof(val), optlen); +} + +TEST_F(vsock, bad_optname) +{ + socklen_t optlen; + uint64_t val; + + optlen = sizeof(val); + + ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK, 0x7fff, + &val, &optlen)); + ASSERT_EQ(ENOPROTOOPT, errno); + ASSERT_EQ(sizeof(val), optlen); +} + +/* SO_VM_SOCKETS_CONNECT_TIMEOUT_{NEW,OLD} return a sock_timeval-shaped + * payload, which is wider than u64 on 64-bit. They exercise the path + * where the protocol's reported lv (16 bytes) is larger than the + * common 8-byte u64 case covered above. + */ +TEST_F(vsock, connect_timeout_new_exact) +{ + struct __kernel_sock_timeval tv = {}; + socklen_t optlen; + + optlen = sizeof(tv); + + ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW, + &tv, &optlen)); + ASSERT_EQ(sizeof(tv), optlen); +} + +TEST_F(vsock, connect_timeout_new_oversize_clamped) +{ + char buf[sizeof(struct __kernel_sock_timeval) * 2] = {}; + socklen_t optlen; + + optlen = sizeof(buf); + + ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW, + buf, &optlen)); + ASSERT_EQ(sizeof(struct __kernel_sock_timeval), optlen); +} + +TEST_F(vsock, connect_timeout_new_undersize) +{ + socklen_t optlen; + uint64_t val; + + optlen = sizeof(val); + + ASSERT_EQ(-1, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_CONNECT_TIMEOUT_NEW, + &val, &optlen)); + ASSERT_EQ(EINVAL, errno); + ASSERT_EQ(sizeof(val), optlen); +} + +TEST_F(vsock, connect_timeout_old_exact) +{ + struct __kernel_old_timeval tv = {}; + socklen_t optlen; + + optlen = sizeof(tv); + + ASSERT_EQ(0, getsockopt(self->fd, AF_VSOCK, + SO_VM_SOCKETS_CONNECT_TIMEOUT_OLD, + &tv, &optlen)); + ASSERT_EQ(sizeof(tv), optlen); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/lib/gro.c b/tools/testing/selftests/net/lib/gro.c index 11b16ae5f0e83..fa35dfc8e790f 100644 --- a/tools/testing/selftests/net/lib/gro.c +++ b/tools/testing/selftests/net/lib/gro.c @@ -67,12 +67,14 @@ #include <errno.h> #include <error.h> #include <getopt.h> +#include <net/ethernet.h> +#include <net/if.h> #include <linux/filter.h> #include <linux/if_packet.h> +#include <linux/if_pppox.h> #include <linux/ipv6.h> #include <linux/net_tstamp.h> -#include <net/ethernet.h> -#include <net/if.h> +#include <linux/ppp_defs.h> #include <netinet/in.h> #include <netinet/ip.h> #include <netinet/ip6.h> @@ -102,6 +104,7 @@ #define MAX_LARGE_PKT_CNT ((IP_MAXPACKET - (MAX_HDR_LEN - ETH_HLEN)) / \ (ASSUMED_MTU - (MAX_HDR_LEN - ETH_HLEN))) #define MIN_EXTHDR_SIZE 8 +#define L2_HLEN_MAX (ETH_HLEN + PPPOE_SES_HLEN) #define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" #define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" @@ -134,6 +137,7 @@ static int total_hdr_len = -1; static int ethhdr_proto = -1; static bool ipip; static bool ip6ip6; +static bool pppoe; static uint64_t txtime_ns; static int num_flows = 4; static bool order_check; @@ -171,6 +175,22 @@ static void vlog(const char *fmt, ...) } } +static void fill_pppoelayer(void *buf, int payload_len, uint16_t sid) +{ + struct pppoe_ppp_hdr { + struct pppoe_hdr eh; + __be16 proto; + } *ph = buf; + + payload_len += sizeof(struct tcphdr); + ph->eh.type = 1; + ph->eh.ver = 1; + ph->eh.code = 0; + ph->eh.sid = htons(sid); + ph->eh.length = htons(payload_len + sizeof(ph->proto)); + ph->proto = htons(proto == PF_INET ? PPP_IP : PPP_IPV6); +} + static void setup_sock_filter(int fd) { const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); @@ -412,11 +432,15 @@ static void create_packet(void *buf, int seq_offset, int ack_offset, fill_networklayer(buf + inner_ip_off, payload_len, IPPROTO_TCP); if (inner_ip_off > ETH_HLEN) { - int encap_proto = (proto == PF_INET) ? - IPPROTO_IPIP : IPPROTO_IPV6; + if (pppoe) { + fill_pppoelayer(buf + ETH_HLEN, payload_len + ip_hdr_len, 0x1234); + } else { + int encap_proto = (proto == PF_INET) ? + IPPROTO_IPIP : IPPROTO_IPV6; - fill_networklayer(buf + ETH_HLEN, - payload_len + ip_hdr_len, encap_proto); + fill_networklayer(buf + ETH_HLEN, + payload_len + ip_hdr_len, encap_proto); + } } fill_datalinklayer(buf); @@ -526,7 +550,7 @@ static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn, static void send_data_pkts(int fd, struct sockaddr_ll *daddr, int payload_len1, int payload_len2) { - static char buf[ETH_HLEN + IP_MAXPACKET]; + static char buf[L2_HLEN_MAX + IP_MAXPACKET]; create_packet(buf, 0, 0, payload_len1, 0); write_packet(fd, buf, total_hdr_len + payload_len1, daddr); @@ -1071,6 +1095,20 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr) write_packet(fd, buf, bufpkt_len, daddr); } +static void send_changed_pppoe_sid(int fd, struct sockaddr_ll *daddr) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + int pkt_size = total_hdr_len + PAYLOAD_LEN; + struct pppoe_hdr *hdr = (struct pppoe_hdr *)(buf + ETH_HLEN); + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + write_packet(fd, buf, pkt_size, daddr); + + create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + hdr->sid = htons(0x4321); + write_packet(fd, buf, pkt_size, daddr); +} + static void bind_packetsocket(int fd) { struct sockaddr_ll daddr = {}; @@ -1121,9 +1159,10 @@ static void recv_error(int fd, int rcv_errno) static void check_recv_pkts(int fd, int *correct_payload, int correct_num_pkts) { - static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; - struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + static char buffer[IP_MAXPACKET + L2_HLEN_MAX + 1]; + int nhoff = ETH_HLEN + (pppoe ? PPPOE_SES_HLEN : 0); + struct iphdr *iph = (struct iphdr *)(buffer + nhoff); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + nhoff); struct tcphdr *tcph; bool bad_packet = false; int tcp_ext_len = 0; @@ -1140,7 +1179,7 @@ static void check_recv_pkts(int fd, int *correct_payload, while (1) { ip_ext_len = 0; - pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + pkt_size = recv(fd, buffer, sizeof(buffer), 0); if (pkt_size < 0) recv_error(fd, errno); @@ -1183,9 +1222,10 @@ static void check_recv_pkts(int fd, int *correct_payload, static void check_capacity_pkts(int fd) { - static char buffer[IP_MAXPACKET + ETH_HLEN + 1]; - struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN); - struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN); + static char buffer[IP_MAXPACKET + L2_HLEN_MAX + 1]; + int nhoff = ETH_HLEN + (pppoe ? PPPOE_SES_HLEN : 0); + struct iphdr *iph = (struct iphdr *)(buffer + nhoff); + struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + nhoff); int num_pkt = 0, num_coal = 0, pkt_idx; const char *fail_reason = NULL; int flow_order[num_flows * 2]; @@ -1203,7 +1243,7 @@ static void check_capacity_pkts(int fd) while (1) { ip_ext_len = 0; - pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); + pkt_size = recv(fd, buffer, sizeof(buffer), 0); if (pkt_size < 0) recv_error(fd, errno); @@ -1499,6 +1539,12 @@ static void gro_sender(void) usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + /* PPPoE sub-tests */ + } else if (strcmp(testname, "pppoe_sid") == 0) { + send_changed_pppoe_sid(txfd, &daddr); + usleep(fin_delay_us); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + } else { error(1, 0, "Unknown testcase: %s", testname); } @@ -1716,6 +1762,12 @@ static void gro_receiver(void) } else if (strcmp(testname, "capacity") == 0) { check_capacity_pkts(rxfd); + } else if (strcmp(testname, "pppoe_sid") == 0) { + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + printf("different PPPoE session ID doesn't coalesce: "); + check_recv_pkts(rxfd, correct_payload, 2); + } else { error(1, 0, "Test case error: unknown testname %s", testname); } @@ -1734,6 +1786,8 @@ static void parse_args(int argc, char **argv) { "ipv6", no_argument, NULL, '6' }, { "ipip", no_argument, NULL, 'e' }, { "ip6ip6", no_argument, NULL, 'E' }, + { "pppoev4", no_argument, NULL, 'p' }, + { "pppoev6", no_argument, NULL, 'P' }, { "num-flows", required_argument, NULL, 'n' }, { "rx", no_argument, NULL, 'r' }, { "saddr", required_argument, NULL, 's' }, @@ -1745,7 +1799,7 @@ static void parse_args(int argc, char **argv) }; int c; - while ((c = getopt_long(argc, argv, "46d:D:eEi:n:rs:S:t:ov", opts, NULL)) != -1) { + while ((c = getopt_long(argc, argv, "46d:D:eEi:n:pPrs:S:t:ov", opts, NULL)) != -1) { switch (c) { case '4': proto = PF_INET; @@ -1765,6 +1819,16 @@ static void parse_args(int argc, char **argv) proto = PF_INET6; ethhdr_proto = htons(ETH_P_IPV6); break; + case 'p': + pppoe = true; + proto = PF_INET; + ethhdr_proto = htons(ETH_P_PPP_SES); + break; + case 'P': + pppoe = true; + proto = PF_INET6; + ethhdr_proto = htons(ETH_P_PPP_SES); + break; case 'd': addr4_dst = addr6_dst = optarg; break; @@ -1812,6 +1876,10 @@ int main(int argc, char **argv) } else if (ip6ip6) { tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr) * 2; total_hdr_len = tcp_offset + sizeof(struct tcphdr); + } else if (pppoe) { + tcp_offset = ETH_HLEN + PPPOE_SES_HLEN + + (proto == PF_INET ? sizeof(struct iphdr) : sizeof(struct ipv6hdr)); + total_hdr_len = tcp_offset + sizeof(struct tcphdr); } else if (proto == PF_INET) { tcp_offset = ETH_HLEN + sizeof(struct iphdr); total_hdr_len = tcp_offset + sizeof(struct tcphdr); diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py index 7c81d86a7e972..64a8c1ed49503 100644 --- a/tools/testing/selftests/net/lib/py/__init__.py +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -14,7 +14,7 @@ from .netns import NetNS, NetNSEnter from .nsim import NetdevSim, NetdevSimDev from .utils import CmdExitFailure, fd_read_timeout, cmd, bkg, defer, \ bpftool, ip, ethtool, bpftrace, rand_port, rand_ports, wait_port_listen, \ - wait_file, tool + wait_file, tool, tc from .bpf import bpf_map_set, bpf_map_dump, bpf_prog_map_ids from .ynl import NlError, NlctrlFamily, YnlFamily, \ EthtoolFamily, NetdevFamily, RtnlFamily, RtnlAddrFamily @@ -29,7 +29,7 @@ __all__ = ["KSRC", "NetNS", "NetNSEnter", "CmdExitFailure", "fd_read_timeout", "cmd", "bkg", "defer", "bpftool", "ip", "ethtool", "bpftrace", "rand_port", "rand_ports", - "wait_port_listen", "wait_file", "tool", + "wait_port_listen", "wait_file", "tool", "tc", "bpf_map_set", "bpf_map_dump", "bpf_prog_map_ids", "NetdevSim", "NetdevSimDev", "NetshaperFamily", "DevlinkFamily", "PSPFamily", "NlError", diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 6c44a3d2bbf77..be9408a771681 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -23,6 +23,10 @@ class CmdExitFailure(Exception): self.cmd = cmd_obj +class CmdExitZeroFailure(CmdExitFailure): + """ Command succeeded (returned zero exit code), but expected failure. """ + + def fd_read_timeout(fd, timeout): rlist, _, _ = select.select([fd], [], [], timeout) if rlist: @@ -39,8 +43,9 @@ class cmd: Use bkg() instead to run a command in the background. """ - def __init__(self, comm, shell=None, fail=True, ns=None, background=False, - host=None, timeout=5, ksft_ready=None, ksft_wait=None): + def __init__(self, comm, shell=None, fail=True, expect_fail=False, ns=None, + background=False, host=None, timeout=5, ksft_ready=None, + ksft_wait=None): if ns: comm = f'ip netns exec {ns} ' + comm @@ -88,7 +93,8 @@ class cmd: self._process_terminate(terminate=terminate, timeout=1) raise CmdInitFailure("Did not receive ready message", self) if not background: - self.process(terminate=False, fail=fail, timeout=timeout) + self.process(terminate=False, fail=fail, expect_fail=expect_fail, + timeout=timeout) def _process_terminate(self, terminate, timeout): if terminate: @@ -102,7 +108,7 @@ class cmd: return stdout, stderr - def process(self, terminate=True, fail=None, timeout=5): + def process(self, terminate=True, fail=None, expect_fail=False, timeout=5): if fail is None: fail = not terminate @@ -111,10 +117,19 @@ class cmd: stdout, stderr = self._process_terminate(terminate=terminate, timeout=timeout) - if self.proc.returncode != 0 and fail: + + # Fail on unexpected test failure if fail. + # Fail on unexpected test success if expect_fail. + # Fail on negative returncode if either: + # Set by subprocess on crash or signal, this is never expected failure. + if (self.proc.returncode != 0 and fail or + (self.proc.returncode < 0 and expect_fail)): if len(stderr) > 0 and stderr[-1] == "\n": stderr = stderr[:-1] raise CmdExitFailure("Command failed", self) + elif self.proc.returncode == 0 and expect_fail: + raise CmdExitZeroFailure("Command succeeded (expected fail)", self) + def __repr__(self): def str_fmt(name, s): @@ -157,14 +172,17 @@ class bkg(cmd): with bkg("my_binary", ksft_wait=5): """ - def __init__(self, comm, shell=None, fail=None, ns=None, host=None, - exit_wait=False, ksft_ready=None, ksft_wait=None): + def __init__(self, comm, shell=None, fail=None, expect_fail=None, + ns=None, host=None, exit_wait=False, ksft_ready=None, + ksft_wait=None): super().__init__(comm, background=True, - shell=shell, fail=fail, ns=ns, host=host, - ksft_ready=ksft_ready, ksft_wait=ksft_wait) + shell=shell, fail=fail, expect_fail=expect_fail, + ns=ns, host=host, ksft_ready=ksft_ready, + ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait self._exit_wait = exit_wait self.check_fail = fail + self.expect_fail = expect_fail if shell and self.terminate: print("# Warning: combining shell and terminate is risky!") @@ -179,7 +197,8 @@ class bkg(cmd): # since forcing termination silences failures with fail=None if self.proc.poll() is None: terminate = terminate or (self._exit_wait and ex_type is not None) - return self.process(terminate=terminate, fail=self.check_fail) + return self.process(terminate=terminate, fail=self.check_fail, + expect_fail=self.expect_fail) GLOBAL_DEFER_QUEUE = [] @@ -220,7 +239,10 @@ class defer: def tool(name, args, json=None, ns=None, host=None): cmd_str = name + ' ' if json: - cmd_str += '--json ' + if name == 'tc': + cmd_str += '-json ' + else: + cmd_str += '--json ' cmd_str += args cmd_obj = cmd(cmd_str, ns=ns, host=host) if json: @@ -238,6 +260,13 @@ def ip(args, json=None, ns=None, host=None): return tool('ip', args, json=json, host=host) +def tc(args, json=None, ns=None, host=None): + """ Helper to call tc with standard set of optional args. """ + if ns: + args = f'-netns {ns} ' + args + return tool('tc', args, json=json, host=host) + + def ethtool(args, json=None, ns=None, host=None): return tool('ethtool', args, json=json, ns=ns, host=host) diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 5acd12021e6e8..5d4d0f127f795 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -63,6 +63,7 @@ unset fastclose unset fullmesh unset speed unset bind_addr +unset ifaces_nr unset join_syn_rej unset join_csum_ns1 unset join_csum_ns2 @@ -146,7 +147,7 @@ init_partial() # ns1eth4 ns2eth4 local i - for i in $(seq 1 4); do + for i in $(seq 1 "${ifaces_nr:-4}"); do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad @@ -165,7 +166,7 @@ init_partial() init_shapers() { local i - for i in $(seq 1 4); do + for i in $(seq 1 "${ifaces_nr:-4}"); do tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms done @@ -512,6 +513,19 @@ reset_with_tcp_filter() fi } +# For kernel supporting limits above 8 +# $1: title ; $2,4: addrs limit ns1,2 ; $3,5: subflows limit ns1,2 +reset_with_high_limits() +{ + reset "${1}" || return 1 + + if ! pm_nl_set_limits "${ns1}" "${2}" "${3}" 2>/dev/null || + ! pm_nl_set_limits "${ns2}" "${4}" "${5}" 2>/dev/null; then + mark_as_skipped "unable to set the limits to ${*:2}" + return 1 + fi +} + # $1: err msg fail_test() { @@ -3700,6 +3714,21 @@ fullmesh_tests() chk_prio_nr 0 1 1 0 chk_rm_nr 0 1 fi + + # fullmesh in 8x8 to create 63 additional subflows + if ifaces_nr=8 reset_with_high_limits "fullmesh 8x8" 64 64 64 64; then + # higher chance to lose ADD_ADDR: allow retransmissions + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1 + local i + for i in $(seq 1 8); do + pm_nl_add_endpoint $ns2 10.0.$i.2 flags subflow,fullmesh + pm_nl_add_endpoint $ns1 10.0.$i.1 flags signal + done + speed=slow \ + run_tests $ns1 $ns2 10.0.1.1 + chk_join_nr 63 63 63 + fi + } fastclose_tests() diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index ab8bce06b2626..e850a87429b61 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -355,10 +355,10 @@ sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) cout=$(mktemp) +trap cleanup EXIT init make_file "$cin" "client" 1 make_file "$sin" "server" 1 -trap cleanup EXIT mptcp_lib_subtests_last_ts_reset run_tests $ns1 $ns2 10.0.1.1 diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 04594dfc22b13..21bfe1311f118 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -66,6 +66,15 @@ get_limits() { fi } +get_limits_nb() { + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp limits | awk '{ print $2" "$4 }' + else + ip netns exec "${ns1}" ./pm_nl_ctl limits | \ + awk '{ printf "%s ", $2 }' + fi +} + format_endpoints() { mptcp_lib_pm_nl_format_endpoints "${@}" } @@ -164,6 +173,7 @@ check "get_endpoint 2" "" "simple del addr" 1 check "show_endpoints" \ "$(format_endpoints "1,10.0.1.1" \ "3,10.0.1.3,signal backup")" "dump addrs after del" +add_endpoint 10.0.1.2 id 2 add_endpoint 10.0.1.3 2>/dev/null check "get_endpoint 4" "" "duplicate addr" 1 @@ -171,25 +181,29 @@ check "get_endpoint 4" "" "duplicate addr" 1 add_endpoint 10.0.1.4 flags signal check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" -for i in $(seq 5 9); do - add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 -done -check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" -check "get_endpoint 10" "" "above hard addr limit" 1 +read -r -a default_limits_nb <<< "$(get_limits_nb)" +# limits have been increased: from 8 to 64 for subflows/add_addr & 255 for endp +if mptcp_lib_expect_all_features || set_limits 9 9 2>/dev/null; then + max_endp=255 + max_limits=64 +else + max_endp=8 + max_limits=8 +fi +set_limits "${default_limits_nb[@]}" -del_endpoint 9 -for i in $(seq 10 255); do - add_endpoint 10.0.0.9 id "${i}" - del_endpoint "${i}" +for i in $(seq 5 ${max_endp}); do + add_endpoint "10.0.0.${i}" id "${i}" done -check "show_endpoints" \ - "$(format_endpoints "1,10.0.1.1" \ - "3,10.0.1.3,signal backup" \ - "4,10.0.1.4,signal" \ - "5,10.0.1.5,signal" \ - "6,10.0.1.6,signal" \ - "7,10.0.1.7,signal" \ - "8,10.0.1.8,signal")" "id limit" +check "get_endpoint ${max_endp}" \ + "$(format_endpoints "${max_endp},10.0.0.${max_endp}")" "id limit" + +if add_endpoint '10.0.0.1' &>/dev/null; then + hardlimit="no error" +else + hardlimit="error" +fi +check "echo ${hardlimit}" "error" "above hard addr limit" flush_endpoint check "show_endpoints" "" "flush addrs" @@ -202,15 +216,15 @@ if ! mptcp_lib_is_ip_mptcp; then flush_endpoint fi -set_limits 9 1 2>/dev/null +set_limits $((max_limits + 1)) 1 2>/dev/null check "get_limits" "${default_limits}" "rcv addrs above hard limit" -set_limits 1 9 2>/dev/null +set_limits 1 $((max_limits + 1)) 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" -set_limits 8 8 +set_limits ${max_limits} ${max_limits} flush_endpoint ## to make sure it doesn't affect the limits -check "get_limits" "$(format_limits 8 8)" "set limits" +check "get_limits" "$(format_limits ${max_limits} ${max_limits})" "set limits" flush_endpoint add_endpoint 10.0.1.1 diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 99eecccbf0c87..78180da1efcc5 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -217,8 +217,6 @@ static int capture_events(int fd, int event_group) /* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) { - struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - socklen_t addr_len; void *data = nh; int rem, ret; int err = 0; @@ -230,15 +228,15 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) } nh->nlmsg_len = len; - ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); + ret = send(fd, data, len, 0); if (ret != len) error(1, errno, "send netlink: %uB != %uB\n", ret, len); - addr_len = sizeof(nladdr); - rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); + ret = recv(fd, data, max, 0); if (ret < 0) error(1, errno, "recv netlink: %uB\n", ret); + rem = ret; /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { if (nh->nlmsg_type == NLMSG_DONE) diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index d11a8b949aab5..7b9aabe101704 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -76,13 +76,13 @@ setup() ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1 ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad - ip -net "$ns1" link set ns1eth1 up mtu 1500 + ip -net "$ns1" link set ns1eth1 up mtu 1500 gso_max_segs 0 ip -net "$ns1" route add default via 10.0.1.2 ip -net "$ns1" route add default via dead:beef:1::2 ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad - ip -net "$ns1" link set ns1eth2 up mtu 1500 + ip -net "$ns1" link set ns1eth2 up mtu 1500 gso_max_segs 0 ip -net "$ns1" route add default via 10.0.2.2 metric 101 ip -net "$ns1" route add default via dead:beef:2::2 metric 101 @@ -91,21 +91,21 @@ setup() ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad - ip -net "$ns2" link set ns2eth1 up mtu 1500 + ip -net "$ns2" link set ns2eth1 up mtu 1500 gso_max_segs 0 ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2 ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad - ip -net "$ns2" link set ns2eth2 up mtu 1500 + ip -net "$ns2" link set ns2eth2 up mtu 1500 gso_max_segs 0 ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3 ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad - ip -net "$ns2" link set ns2eth3 up mtu 1500 + ip -net "$ns2" link set ns2eth3 up mtu 1500 gso_max_segs 0 ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1 ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad - ip -net "$ns3" link set ns3eth1 up mtu 1500 + ip -net "$ns3" link set ns3eth1 up mtu 1500 gso_max_segs 0 ip -net "$ns3" route add default via 10.0.3.2 ip -net "$ns3" route add default via dead:beef:3::2 @@ -223,9 +223,11 @@ run_test() local rate2=$2 local delay1=$3 local delay2=$4 + local limit1=$5 + local limit2=$6 local lret local dev - shift 4 + shift 6 local msg=$* [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1="" @@ -240,10 +242,10 @@ run_test() # keep the queued pkts number low, or the RTT estimator will see # increasing latency over time. - tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit 50 - tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit 50 - tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit 50 - tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit 50 + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 limit ${limit1} + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 limit ${limit2} + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 limit ${limit1} + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 limit ${limit2} # time is measured in ms, account for transfer size, aggregated link speed # and header overhead (10%) @@ -301,13 +303,13 @@ done setup mptcp_lib_subtests_last_ts_reset -run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 25 "balanced bwidth with unbalanced delay" +run_test 10 10 0 0 20 20 "balanced bwidth" +run_test 10 10 1 25 20 50 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 "unbalanced bwidth" -run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" -run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" +MPTCP_LIB_SUBTEST_FLAKY=1 run_test 10 3 0 0 30 20 "unbalanced bwidth" +run_test 10 3 1 25 40 30 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 50 30 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py index eff55c64a0125..ceb44c8e1fec5 100755 --- a/tools/testing/selftests/net/nl_netdev.py +++ b/tools/testing/selftests/net/nl_netdev.py @@ -9,7 +9,7 @@ import errno from os import system from lib.py import ksft_run, ksft_exit from lib.py import ksft_eq, ksft_ge, ksft_ne, ksft_raises, ksft_busy_wait -from lib.py import NetdevFamily, NetdevSimDev, NlError, ip +from lib.py import NetdevFamily, NetdevSimDev, NlError, defer, ip def empty_check(nf) -> None: @@ -255,6 +255,117 @@ def page_pool_check(nf) -> None: nsim.dfs_write("pp_hold", "y") +def page_pool_dump_ifindex(nf) -> None: + """Test page pool dump filtering by ifindex.""" + nsimdev1 = NetdevSimDev(queue_count=3) + rm_nsim1 = defer(nsimdev1.remove) + nsimdev2 = NetdevSimDev(queue_count=5) + defer(nsimdev2.remove) + + nsim1 = nsimdev1.nsims[0] + nsim2 = nsimdev2.nsims[0] + + ip(f"link set dev {nsim1.ifname} up") + ip(f"link set dev {nsim2.ifname} up") + + # Unfiltered dump should have pools from both devices + all_pp = nf.page_pool_get({}, dump=True) + pp1_all = [pp for pp in all_pp + if pp.get("ifindex") == nsim1.ifindex] + pp2_all = [pp for pp in all_pp + if pp.get("ifindex") == nsim2.ifindex] + ksft_ge(len(pp1_all), 1) + ksft_ge(len(pp2_all), 1) + + # Filtered dump should only return pools for that device + pp1_flt = nf.page_pool_get({'ifindex': nsim1.ifindex}, dump=True) + ksft_eq(pp1_flt, pp1_all) + + pp2_flt = nf.page_pool_get({'ifindex': nsim2.ifindex}, dump=True) + ksft_eq(pp2_flt, pp2_all) + + # Non-existent ifindex should return empty dump + pp_none = nf.page_pool_get({'ifindex': 12345678}, dump=True) + ksft_eq(len(pp_none), 0) + + # Device down - no pools for that ifindex + ip(f"link set dev {nsim1.ifname} down") + pp1_down = nf.page_pool_get({'ifindex': nsim1.ifindex}, dump=True) + ksft_eq(len(pp1_down), 0) + + # Remove device, dump by its old ifindex should return empty + old_ifindex = nsim1.ifindex + rm_nsim1.exec() + pp1_gone = nf.page_pool_get({'ifindex': old_ifindex}, dump=True) + ksft_eq(len(pp1_gone), 0) + + +def page_pool_ifindex_leak_check(nf) -> None: + """Test that zombie page pools don't show up under the original ifindex.""" + nsimdev = NetdevSimDev() + rm_nsim = defer(nsimdev.remove) + nsim = nsimdev.nsims[0] + + ip(f"link set dev {nsim.ifname} up") + nsim.dfs_write("pp_hold", "y") + + pp_up = nf.page_pool_get({'ifindex': nsim.ifindex}, dump=True) + ksft_ge(len(pp_up), 1) + + # Remove device with leaked page - pool becomes zombie (orphaned to lo) + old_ifindex = nsim.ifindex + rm_nsim.exec() + + # Zombie pool should NOT appear under the original device + pp_down = nf.page_pool_get({'ifindex': old_ifindex}, dump=True) + ksft_eq(len(pp_down), 0) + + # But it should appear in an unfiltered dump (under loopback) + pp_all = nf.page_pool_get({}, dump=True) + orphans = [pp for pp in pp_all + if "detach-time" in pp and "ifindex" not in pp] + ksft_ge(len(orphans), 1) + + +def page_pool_stats_ifindex_check(nf) -> None: + """Test page pool stats dump filtering by ifindex.""" + nsimdev1 = NetdevSimDev(queue_count=3) + defer(nsimdev1.remove) + nsimdev2 = NetdevSimDev(queue_count=5) + defer(nsimdev2.remove) + + nsim1 = nsimdev1.nsims[0] + nsim2 = nsimdev2.nsims[0] + + ip(f"link set dev {nsim1.ifname} up") + ip(f"link set dev {nsim2.ifname} up") + + # Unfiltered stats dump + all_stats = nf.page_pool_stats_get({}, dump=True) + s1_all = [s for s in all_stats + if s.get("info", {}).get("ifindex") == nsim1.ifindex] + s2_all = [s for s in all_stats + if s.get("info", {}).get("ifindex") == nsim2.ifindex] + ksft_ge(len(s1_all), 1) + ksft_ge(len(s2_all), 1) + + # Filtered stats dump + s1_flt = nf.page_pool_stats_get({'info': {'ifindex': nsim1.ifindex}}, + dump=True) + ksft_eq(s1_flt, s1_all) + + # Non-existent ifindex should return empty + s_none = nf.page_pool_stats_get({'info': {'ifindex': 12345678}}, dump=True) + ksft_eq(len(s_none), 0) + + # info.id should be rejected for stats dump + with ksft_raises(NlError) as cm: + nf.page_pool_stats_get({'info': {'id': s1_all[0]['info']['id']}}, + dump=True) + ksft_eq(cm.exception.nl_msg.error, -errno.EINVAL) + ksft_eq(cm.exception.nl_msg.extack['bad-attr'], '.info.id') + + def main() -> None: """ Ksft boiler plate main """ nf = NetdevFamily() @@ -265,7 +376,11 @@ def main() -> None: napi_set_threaded, dev_set_threaded, nsim_rxq_reset_down, - page_pool_check], + page_pool_check, + page_pool_dump_ifindex, + page_pool_ifindex_leak_check, + page_pool_stats_ifindex_check + ], args=(nf, )) ksft_exit() diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 3cdd953f68132..8cd5b3d894abd 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -28,6 +28,7 @@ tests=" tunnel_metadata ovs: test extraction of tunnel metadata tunnel_refcount ovs: test tunnel vport reference cleanup drop_reason drop: test drop reasons are emitted + pop_vlan vlan: POP_VLAN action strips tag psample psample: Sampling packets with psample" info() { @@ -864,6 +865,83 @@ test_tunnel_refcount() { ovs_wait dev_removed dp-${tun_type} || return 1 ovs_wait dev_removed ovs-${tun_type}0 || return 1 done + + return 0 +} + +test_pop_vlan() { + local sbx="test_pop_vlan" + sbx_add "$sbx" || return $? + ovs_add_dp "$sbx" vlandp || return 1 + + ovs_add_netns_and_veths "$sbx" vlandp \ + ns1 veth1 ns1veth 192.0.2.1/24 || return 1 + ovs_add_netns_and_veths "$sbx" vlandp \ + ns2 veth2 ns2veth 192.0.2.2/24 || return 1 + + # Baseline: untagged bidirectional forwarding + ovs_add_flow "$sbx" vlandp \ + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(1),eth(),eth_type(0x0800),ipv4()' '2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0800),ipv4()' '1' || return 1 + ovs_sbx "$sbx" ip netns exec ns1 ping -c 3 -W 2 \ + 192.0.2.2 || return 1 + + # VLAN topology: ns1 uses VLAN sub-interface, ns2 is plain + ip -n ns1 link add link ns1veth name ns1veth.10 \ + type vlan id 10 || return 1 + on_exit "ip -n ns1 link del ns1veth.10 2>/dev/null" + ip -n ns1 addr add 198.51.100.1/24 dev ns1veth.10 || return 1 + ip -n ns1 link set ns1veth.10 up || return 1 + ip -n ns2 addr add 198.51.100.2/24 dev ns2veth || return 1 + + ovs_del_flows "$sbx" vlandp + + # Static ARP: avoids VLAN-tagged ARP complexity + local ns1veth10mac ns2mac + ns1veth10mac=$(ip -n ns1 link show ns1veth.10 \ + | awk '/link\/ether/ {print $2}') + [ -z "$ns1veth10mac" ] && \ + { info "failed to get ns1veth10mac"; return 1; } + ns2mac=$(ip -n ns2 link show ns2veth \ + | awk '/link\/ether/ {print $2}') + [ -z "$ns2mac" ] && \ + { info "failed to get ns2mac"; return 1; } + ip -n ns1 neigh replace 198.51.100.2 lladdr "$ns2mac" \ + dev ns1veth.10 nud permanent || return 1 + ip -n ns2 neigh replace 198.51.100.1 \ + lladdr "$ns1veth10mac" \ + dev ns2veth nud permanent || return 1 + + local vlan_match='in_port(1),eth(),eth_type(0x8100),' + vlan_match+='vlan(vid=10),' + vlan_match+='encap(eth_type(0x0800),' + vlan_match+='ipv4(src=198.51.100.1,proto=1),icmp())' + + # Negative: forward without pop_vlan -- tagged frame + # is invisible to ns2 (no VLAN sub-interface), ping fails + ovs_add_flow "$sbx" vlandp "$vlan_match" '2' || return 1 + ovs_sbx "$sbx" ip netns exec ns1 ping -I ns1veth.10 \ + -c 3 -W 1 198.51.100.2 >/dev/null 2>&1 \ + && { info "FAIL: ping should fail without pop_vlan" + return 1; } + + ovs_del_flows "$sbx" vlandp + + # Positive: pop_vlan strips tag on forward path, + # push_vlan restores tag on return path -- ping succeeds + ovs_add_flow "$sbx" vlandp \ + "$vlan_match" 'pop_vlan,2' || return 1 + ovs_add_flow "$sbx" vlandp \ + 'in_port(2),eth(),eth_type(0x0800),ipv4()' \ + 'push_vlan(vid=10,pcp=0,tpid=0x8100),1' || return 1 + ovs_sbx "$sbx" ip netns exec ns1 ping -I ns1veth.10 \ + -c 3 -W 2 198.51.100.2 || return 1 + return 0 } diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index bbe35e2718d26..3b6a26e265a4a 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -369,7 +369,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_OUTPUT", "uint32"), ("OVS_ACTION_ATTR_USERSPACE", "userspace"), ("OVS_ACTION_ATTR_SET", "ovskey"), - ("OVS_ACTION_ATTR_PUSH_VLAN", "none"), + ("OVS_ACTION_ATTR_PUSH_VLAN", "push_vlan"), ("OVS_ACTION_ATTR_POP_VLAN", "flag"), ("OVS_ACTION_ATTR_SAMPLE", "sample"), ("OVS_ACTION_ATTR_RECIRC", "uint32"), @@ -426,6 +426,9 @@ class ovsactions(nla): return actstr + class push_vlan(nla): + fields = (("vlan_tpid", "!H"), ("vlan_tci", "!H")) + class sample(nla): nla_flags = NLA_F_NESTED @@ -632,6 +635,14 @@ class ovsactions(nla): print_str += "ct_clear" elif field[0] == "OVS_ACTION_ATTR_POP_VLAN": print_str += "pop_vlan" + elif field[0] == "OVS_ACTION_ATTR_PUSH_VLAN": + datum = self.get_attr(field[0]) + tpid = datum["vlan_tpid"] + tci = datum["vlan_tci"] + vid = tci & 0x0FFF + pcp = (tci >> 13) & 0x7 + print_str += "push_vlan(vid=%d,pcp=%d" \ + ",tpid=0x%04x)" % (vid, pcp, tpid) elif field[0] == "OVS_ACTION_ATTR_POP_ETH": print_str += "pop_eth" elif field[0] == "OVS_ACTION_ATTR_POP_NSH": @@ -725,7 +736,57 @@ class ovsactions(nla): actstr = actstr[strspn(actstr, ", ") :] parsed = True - if parse_starts_block(actstr, "clone(", False): + if parse_starts_block(actstr, "push_vlan(", False): + actstr = actstr[len("push_vlan("):] + vid = 0 + pcp = 0 + tpid = 0x8100 + if ")" not in actstr: + raise ValueError( + "push_vlan(): missing ')'") + paren = actstr.index(")") + if not actstr[:paren].strip(): + raise ValueError("push_vlan(): no fields") + for kv in actstr[:paren].split(","): + if "=" not in kv: + raise ValueError( + "push_vlan(): bad field '%s'" + % kv.strip()) + k = kv[:kv.index("=")].strip() + v = kv[kv.index("=") + 1:].strip() + if k == "vid": + vid = int(v, 0) + if vid < 0 or vid > 0xFFF: + raise ValueError( + "push_vlan(): vid=%d out of " + "range (0-4095)" % vid) + elif k == "pcp": + pcp = int(v, 0) + if pcp < 0 or pcp > 7: + raise ValueError( + "push_vlan(): pcp=%d out of " + "range (0-7)" % pcp) + elif k == "tpid": + tpid = int(v, 0) + if tpid < 0 or tpid > 0xFFFF: + raise ValueError( + "push_vlan(): tpid=0x%x out " + "of range (0-0xffff)" % tpid) + else: + raise ValueError( + "push_vlan(): unknown key '%s'" + % k) + tci = (vid & 0x0FFF) | ((pcp & 0x7) << 13) \ + | 0x1000 + pvact = self.push_vlan() + pvact["vlan_tpid"] = tpid + pvact["vlan_tci"] = tci + self["attrs"].append( + ["OVS_ACTION_ATTR_PUSH_VLAN", pvact]) + actstr = actstr[paren + 1:] + parsed = True + + elif parse_starts_block(actstr, "clone(", False): parencount += 1 subacts = ovsactions() actstr = actstr[len("clone("):] @@ -900,11 +961,11 @@ class ovskey(nla): nla_flags = NLA_F_NESTED nla_map = ( ("OVS_KEY_ATTR_UNSPEC", "none"), - ("OVS_KEY_ATTR_ENCAP", "none"), + ("OVS_KEY_ATTR_ENCAP", "encap_ovskey"), ("OVS_KEY_ATTR_PRIORITY", "uint32"), ("OVS_KEY_ATTR_IN_PORT", "uint32"), ("OVS_KEY_ATTR_ETHERNET", "ethaddr"), - ("OVS_KEY_ATTR_VLAN", "uint16"), + ("OVS_KEY_ATTR_VLAN", "be16"), ("OVS_KEY_ATTR_ETHERTYPE", "be16"), ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"), ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"), @@ -1635,6 +1696,194 @@ class ovskey(nla): class ovs_key_mpls(nla): fields = (("lse", ">I"),) + # 802.1Q CFI (Canonical Format Indicator) bit, always set for Ethernet + _VLAN_CFI_MASK = 0x1000 + + @staticmethod + def _vlan_dpstr(tci): + """Format VLAN TCI as vid=X,pcp=Y,cfi=Z or tci=0xNNNN. + + When cfi=1 (standard Ethernet VLAN), outputs decomposed + vid/pcp/cfi fields. When cfi=0 (truncated VLAN header), + falls back to raw tci=0x%04x to ensure round-trip + correctness: the parser auto-adds cfi=1 for vid/pcp + format, so cfi=0 would be lost on re-parse.""" + vid = tci & 0x0FFF + pcp = (tci >> 13) & 0x7 + cfi = (tci >> 12) & 0x1 + if cfi: + return "vid=%d,pcp=%d,cfi=%d" % (vid, pcp, cfi) + return "tci=0x%04x" % tci + + @staticmethod + def _parse_vlan_from_flowstr(flowstr): + """Parse vlan(tci=X) or vlan(vid=X[,pcp=Y,cfi=Z]) from flowstr. + + Returns (remaining_flowstr, key_tci, mask_tci). + TCI values use standard bit layout (VID bits 0-11, + CFI bit 12, PCP bits 13-15); byte order conversion to + big-endian happens in pyroute2 be16 NLA serialization. + The mask covers only the fields the caller specified: + vid -> 0x0FFF, pcp -> 0xE000, cfi -> 0x1000, tci -> 0xFFFF. + + The tci= key sets the raw TCI bitfield (no CFI validation) to allow + non-Ethernet use cases. Use cfi=1 for standard Ethernet VLAN matching. + """ + tci = 0 + mask = 0 + has_tci = False + has_vid = has_pcp = has_cfi = False + _tci_mix_err = "vlan(): 'tci' cannot be mixed " \ + "with 'vid'/'pcp'/'cfi'" + first = True + while True: + flowstr = flowstr.lstrip() + if not flowstr: + raise ValueError("vlan(): missing ')'") + if flowstr[0] == ')': + break + if not first: + flowstr = flowstr[1:] # skip ',' + if not flowstr: + raise ValueError("vlan(): missing ')' after trailing comma") + flowstr = flowstr.lstrip() + if flowstr and flowstr[0] == ')': + break + if flowstr and flowstr[0] == ',': + raise ValueError( + "vlan(): empty or extra comma in field list") + first = False + + eq = flowstr.find('=') + if eq == -1: + raise ValueError( + "vlan(): expected key=value, got '%s'" % flowstr) + key = flowstr[:eq].strip() + flowstr = flowstr[eq + 1:] + + end = flowstr.find(',') + end2 = flowstr.find(')') + if end == -1 and end2 == -1: + raise ValueError("vlan(): missing ')'") + if end == -1 or (end2 != -1 and end2 < end): + end = end2 + val = flowstr[:end].strip() + flowstr = flowstr[end:] + + if not val: + raise ValueError("vlan(): empty value for key '%s'" % key) + try: + v = int(val, 0) + except ValueError as exc: + raise ValueError( + "vlan(): invalid value '%s' for key '%s'" + % (val, key)) from exc + + if key == 'tci': + if has_tci: + raise ValueError("vlan(): duplicate 'tci'") + if has_vid or has_pcp or has_cfi: + raise ValueError(_tci_mix_err) + if v > 0xFFFF or v < 0: + raise ValueError("vlan(): tci=0x%x out of range" % v) + tci = v + mask = 0xFFFF + has_tci = True + elif key == 'vid': + if has_tci: + raise ValueError(_tci_mix_err) + if has_vid: + raise ValueError("vlan(): duplicate 'vid'") + if v < 0 or v > 0xFFF: + raise ValueError("vlan(): vid=%d out of range (0-4095)" % v) + tci |= v + mask |= 0x0FFF + has_vid = True + elif key == 'pcp': + if has_tci: + raise ValueError(_tci_mix_err) + if has_pcp: + raise ValueError("vlan(): duplicate 'pcp'") + if v < 0 or v > 7: + raise ValueError("vlan(): pcp=%d out of range (0-7)" % v) + tci |= (v & 0x7) << 13 + mask |= 0xE000 + has_pcp = True + elif key == 'cfi': + if has_tci: + raise ValueError(_tci_mix_err) + if has_cfi: + raise ValueError("vlan(): duplicate 'cfi'") + if v != 1: + raise ValueError("vlan(): cfi must be 1 for Ethernet") + tci |= ovskey._VLAN_CFI_MASK + mask |= ovskey._VLAN_CFI_MASK + has_cfi = True + else: + raise ValueError("vlan(): unknown key '%s'" % key) + + flowstr = flowstr[1:] # skip ')' + # Catch immediate '))' (user error). A ')' after ',' is consumed + # by parse()'s strspn(flowstr, "), ") inter-field separator stripping. + if flowstr.lstrip().startswith(')'): + raise ValueError("vlan(): unmatched ')'") + # parse() strips trailing ',', ')', ' ' as inter-field separators, + # so we do not need to call strspn here. + + if mask == 0: + raise ValueError("vlan(): no fields specified, " + "use vlan(vid=X[,pcp=Y,cfi=Z]) or vlan(tci=X)") + if not has_tci: + tci |= ovskey._VLAN_CFI_MASK + mask |= ovskey._VLAN_CFI_MASK + return flowstr, tci, mask + + @staticmethod + def _parse_encap_from_flowstr(flowstr): + """Parse encap(inner_flow) from flowstr. + + Returns (remaining_flowstr, inner_key_dict, inner_mask_dict) + where each dict has an 'attrs' key for recursive NLA encoding. + Parenthesis-depth tracking handles nested encap() calls but not + quoted strings containing literal parentheses. + """ + depth = 1 + end = -1 + for i, c in enumerate(flowstr): + if c == '(': + depth += 1 + elif c == ')': + depth -= 1 + if depth < 0: + raise ValueError( + "encap(): unmatched ')' at position %d" % i) + if depth == 0: + end = i + break + + if end == -1: + if depth > 1: + raise ValueError("encap(): missing ')' in nested encap") + raise ValueError("encap(): missing ')'") + + inner_str = flowstr[:end].strip() + if not inner_str: + raise ValueError("encap(): empty inner flow") + + flowstr = flowstr[end + 1:] + if flowstr.lstrip().startswith(')'): + raise ValueError("encap(): unmatched ')' after encap()") + + inner_key = encap_ovskey() + inner_mask = encap_ovskey() + remaining = inner_key.parse(inner_str, inner_mask) + if remaining and re.search(r'[^\s,)]', remaining): + raise ValueError( + "encap(): unrecognized trailing " + "content '%s'" % remaining.strip()) + + return flowstr, inner_key, inner_mask + def parse(self, flowstr, mask=None): for field in ( ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse), @@ -1657,6 +1906,16 @@ class ovskey(nla): lambda x: intparse(x, "0xffff"), ), ( + "OVS_KEY_ATTR_VLAN", + "vlan", + ovskey._parse_vlan_from_flowstr, + ), + ( + "OVS_KEY_ATTR_ENCAP", + "encap", + ovskey._parse_encap_from_flowstr, + ), + ( "OVS_KEY_ATTR_IPV4", "ipv4", ovskey.ovs_key_ipv4, @@ -1793,6 +2052,9 @@ class ovskey(nla): True, ), ("OVS_KEY_ATTR_ETHERNET", None, None, False, False), + ("OVS_KEY_ATTR_VLAN", "vlan", ovskey._vlan_dpstr, + lambda x: False, True), + ("OVS_KEY_ATTR_ENCAP", None, None, False, False), ( "OVS_KEY_ATTR_ETHERTYPE", "eth_type", @@ -1820,22 +2082,63 @@ class ovskey(nla): v = self.get_attr(field[0]) if v is not None: m = None if mask is None else mask.get_attr(field[0]) + fmt = field[2] # str format or callable if field[4] is False: print_str += v.dpstr(m, more) print_str += "," else: if m is None or field[3](m): - print_str += field[1] + "(" - print_str += field[2] % v - print_str += ")," + val = fmt(v) if callable(fmt) else fmt % v + print_str += field[1] + "(" + val + ")," elif more or m != 0: - print_str += field[1] + "(" - print_str += (field[2] % v) + "/" + (field[2] % m) - print_str += ")," + if field[0] == "OVS_KEY_ATTR_VLAN": + val = "tci=0x%04x/0x%04x" % (v, m) + elif callable(fmt): + val = fmt(v) + "/" + fmt(m) + else: + val = (fmt % v) + "/" + (fmt % m) + print_str += field[1] + "(" + val + ")," return print_str +class encap_ovskey(ovskey): + """Inner flow key attributes valid inside 802.1Q ENCAP. + + Only L2-L4 key attributes (slots 0-21) appear inside ENCAP. + Metadata-only attributes (SKB_MARK, DP_HASH, RECIRC_ID, etc.) + are set to "none" -- they never appear inside ENCAP per + ovs_nla_put_vlan() in net/openvswitch/flow_netlink.c. + + nla_map indexes must match OVS_KEY_ATTR_* enum values in + include/uapi/linux/openvswitch.h. + """ + nla_map = ( + ("OVS_KEY_ATTR_UNSPEC", "none"), + ("OVS_KEY_ATTR_ENCAP", "none"), # placeholder, parsed by ovskey + ("OVS_KEY_ATTR_PRIORITY", "none"), # skb metadata, not in ENCAP + ("OVS_KEY_ATTR_IN_PORT", "none"), # skb metadata, not in ENCAP + ("OVS_KEY_ATTR_ETHERNET", "ethaddr"), + ("OVS_KEY_ATTR_VLAN", "be16"), + ("OVS_KEY_ATTR_ETHERTYPE", "be16"), + ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"), + ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"), + ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"), + ("OVS_KEY_ATTR_UDP", "ovs_key_udp"), + ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"), + ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"), + ("OVS_KEY_ATTR_ARP", "ovs_key_arp"), + ("OVS_KEY_ATTR_ND", "ovs_key_nd"), + ("OVS_KEY_ATTR_SKB_MARK", "none"), # metadata, not in ENCAP + ("OVS_KEY_ATTR_TUNNEL", "none"), # tunnel metadata, not in ENCAP + ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"), + ("OVS_KEY_ATTR_TCP_FLAGS", "be16"), + ("OVS_KEY_ATTR_DP_HASH", "none"), # metadata, not in ENCAP + ("OVS_KEY_ATTR_RECIRC_ID", "none"), # metadata, not in ENCAP + ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"), + ) + + class OvsPacket(GenericNetlinkSocket): OVS_PACKET_CMD_MISS = 1 # Flow table miss OVS_PACKET_CMD_ACTION = 2 # USERSPACE action @@ -2583,6 +2886,7 @@ def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()): def main(argv): + nlmsg_atoms.encap_ovskey = encap_ovskey nlmsg_atoms.ovskey = ovskey nlmsg_atoms.ovsactions = ovsactions diff --git a/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip4_9k.pkt b/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip4_9k.pkt new file mode 100644 index 0000000000000..60910069b3d7a --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip4_9k.pkt @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Check syncookies. +// +// Check we are able to rebuild client sack, wscale, ecn and mss options. +// IPv4 msstab[4] = { 536, 1300, 1440, 1460 } + +--ip_version=ipv4 + +`./defaults.sh +sysctl -q net.ipv4.tcp_syncookies=2 +ip link set dev tun0 mtu 9000 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 10) = 0 + + +0 < S 0:0(0) win 32792 <mss 8960,sackOK,TS val 100 ecr 0,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 8960,sackOK,TS val 4000 ecr 100,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 1024 <nop,nop,TS val 110 ecr 4000> + + +0 accept(3, ..., ...) = 4 + +// Check we properly infer from the final packet the other peer wanted mss >= 1460, wscale 10, sackOK and no ECN. +// Note that mss is limited to 1460 - 12 because of IPv4 msstab[] +// This is only possible because TCP TS option was used. +// Linux uses the SYNACK TS.val 6 low order bits to encode the options. + + +0 %{ assert tcpi_snd_mss == 1460 - 12, tcpi_snd_mss; \ + assert tcpi_snd_wscale == 10, tcpi_snd_wscale; \ + assert (tcpi_options & TCPI_OPT_SACK) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_TIMESTAMPS) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_WSCALE) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_ECN) == 0, tcpi_options +}% diff --git a/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip6_9k.pkt b/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip6_9k.pkt new file mode 100644 index 0000000000000..f333c61044bc6 --- /dev/null +++ b/tools/testing/selftests/net/packetdrill/tcp_syncookies_ip6_9k.pkt @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 +// +// Check syncookies. +// +// Check we are able to rebuild client sack, wscale, ecn and mss options. +// IPv6 msstab[4] = { 1280 - 60, 1480 - 60, 1500 - 60, 9000 - 60 } + +--ip_version=ipv6 + +`./defaults.sh +sysctl -q net.ipv4.tcp_syncookies=2 +ip link set dev tun0 mtu 9000 +` + + 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 + +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 + +0 bind(3, ..., ...) = 0 + +0 listen(3, 10) = 0 + + +0 < S 0:0(0) win 32792 <mss 8940,sackOK,TS val 100 ecr 0,nop,wscale 10> + +0 > S. 0:0(0) ack 1 <mss 8940,sackOK,TS val 4000 ecr 100,nop,wscale 8> + +.01 < . 1:1(0) ack 1 win 1024 <nop,nop,TS val 110 ecr 4000> + + +0 accept(3, ..., ...) = 4 + +// Check we properly infer from the final packet the other peer wanted mss >= 8940, wscale 10, sackOK and no ECN. +// This is only possible because TCP TS option was used. +// Linux uses the SYNACK TS.val 6 low order bits to encode the options. + + +0 %{ assert tcpi_snd_mss == 8940 - 12, tcpi_snd_mss; \ + assert tcpi_snd_wscale == 10, tcpi_snd_wscale; \ + assert (tcpi_options & TCPI_OPT_SACK) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_TIMESTAMPS) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_WSCALE) != 0, tcpi_options; \ + assert (tcpi_options & TCPI_OPT_ECN) == 0, tcpi_options +}% diff --git a/tools/testing/selftests/net/ppp/Makefile b/tools/testing/selftests/net/ppp/Makefile index b39b0abadde66..6036fa134351f 100644 --- a/tools/testing/selftests/net/ppp/Makefile +++ b/tools/testing/selftests/net/ppp/Makefile @@ -5,6 +5,7 @@ top_srcdir = ../../../../.. TEST_PROGS := \ ppp_async.sh \ pppoe.sh \ + pppol2tp.sh \ # end of TEST_PROGS TEST_FILES := \ diff --git a/tools/testing/selftests/net/ppp/config b/tools/testing/selftests/net/ppp/config index b45d25c5b9708..843545df8f033 100644 --- a/tools/testing/selftests/net/ppp/config +++ b/tools/testing/selftests/net/ppp/config @@ -1,4 +1,5 @@ CONFIG_IPV6=y +CONFIG_L2TP=m CONFIG_PACKET=y CONFIG_PPP=m CONFIG_PPP_ASYNC=m @@ -6,4 +7,5 @@ CONFIG_PPP_BSDCOMP=m CONFIG_PPP_DEFLATE=m CONFIG_PPPOE=m CONFIG_PPPOE_HASH_BITS_4=y +CONFIG_PPPOL2TP=m CONFIG_VETH=y diff --git a/tools/testing/selftests/net/ppp/pppol2tp.sh b/tools/testing/selftests/net/ppp/pppol2tp.sh new file mode 100755 index 0000000000000..5b592785f1f9b --- /dev/null +++ b/tools/testing/selftests/net/ppp/pppol2tp.sh @@ -0,0 +1,95 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source ppp_common.sh + +VETH_SERVER="veth-server" +VETH_CLIENT="veth-client" +OUTER_IP_SERVER="172.16.1.1" +OUTER_IP_CLIENT="172.16.1.2" + +PPPOL2TP_DIR=$(mktemp -d /tmp/pppol2tp.XXXXXX) + +# shellcheck disable=SC2329 +cleanup() { + cleanup_all_ns + rm -rf "$PPPOL2TP_DIR" +} + +trap cleanup EXIT + +require_command xl2tpd +ppp_common_init +modprobe -q l2tp_ppp + +# Create the veth pair +ip link add "$VETH_SERVER" type veth peer name "$VETH_CLIENT" +ip link set "$VETH_SERVER" netns "$NS_SERVER" +ip link set "$VETH_CLIENT" netns "$NS_CLIENT" +ip -netns "$NS_SERVER" link set "$VETH_SERVER" up +ip -netns "$NS_CLIENT" link set "$VETH_CLIENT" up +ip -netns "$NS_SERVER" address add dev "$VETH_SERVER" "$OUTER_IP_SERVER" peer "$OUTER_IP_CLIENT" +ip -netns "$NS_CLIENT" address add dev "$VETH_CLIENT" "$OUTER_IP_CLIENT" peer "$OUTER_IP_SERVER" + +# Generate configuration files +cat > "$PPPOL2TP_DIR/l2tp-server.conf" <<EOF +[global] +listen-addr = $OUTER_IP_SERVER +access control = no + +[lns default] +ip range = $IP_CLIENT +local ip = $IP_SERVER +require authentication = no +require chap = no +require pap = no +ppp debug = yes +pppoptfile = $(pwd)/pppoe-server-options +EOF + +cat > "$PPPOL2TP_DIR/l2tp-client.conf" <<EOF +[global] +listen-addr = $OUTER_IP_CLIENT +access control = no + +[lac server] +lns = $OUTER_IP_SERVER +require authentication = no +require chap = no +require pap = no +ppp debug = yes +pppoptfile = $(pwd)/pppoe-server-options +EOF + +# Start the L2TP Server +ip netns exec "$NS_SERVER" xl2tpd -D -c "$PPPOL2TP_DIR/l2tp-server.conf" \ + -p "$PPPOL2TP_DIR/l2tp-server.pid" -C "$PPPOL2TP_DIR/l2tp-server.control" & + +# Start the L2TP Client +ip netns exec "$NS_CLIENT" xl2tpd -D -c "$PPPOL2TP_DIR/l2tp-client.conf" \ + -p "$PPPOL2TP_DIR/l2tp-client.pid" -C "$PPPOL2TP_DIR/l2tp-client.control" & + +# Wait for xl2tpd to start and open their control pipes +slowwait 2 [ -p "$PPPOL2TP_DIR/l2tp-server.control" ] +slowwait 2 [ -p "$PPPOL2TP_DIR/l2tp-client.control" ] + +# Connect LAC to LNS +echo "c server" > "$PPPOL2TP_DIR/l2tp-client.control" + +ppp_test_connectivity + +log_test "PPPoL2TP" + +# Recursion test +RET=0 +# Delete route to LNS IP +ip -netns "$NS_CLIENT" route del "$OUTER_IP_SERVER" +# Add default route through ppp0 +ip -netns "$NS_CLIENT" route add default dev ppp0 +# ping (we expect the ping to fail but not deadlock the system) +ip netns exec "$NS_CLIENT" ping -c 1 "$IP_SERVER" -w 1 +check_fail $? + +log_test "PPPoL2TP Recursion" + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/protodown.sh b/tools/testing/selftests/net/protodown.sh new file mode 100755 index 0000000000000..0a7b78c63c37a --- /dev/null +++ b/tools/testing/selftests/net/protodown.sh @@ -0,0 +1,182 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test the protodown mechanism. Verify basic protodown toggling, protodown +# reasons, operational state when the lower device carrier changes, and correct +# operational state when the lower device has no carrier. + +# shellcheck disable=SC1091,SC2034,SC2154,SC2317 +source lib.sh + +require_command jq + +ALL_TESTS=" + protodown_basic_macvlan + protodown_basic_vxlan + protodown_reasons + protodown_lower_toggle + protodown_lower_down +" + +operstate_get() +{ + local ns=$1; shift + local dev=$1; shift + + ip -n "$ns" -j link show dev "$dev" | jq -r '.[].operstate' +} + +operstate_check() +{ + local ns=$1; shift + local dev=$1; shift + local expected=$1; shift + + local current + current=$(operstate_get "$ns" "$dev") + + [ "$current" = "$expected" ] +} + +setup_prepare() +{ + setup_ns NS + defer cleanup_all_ns + + ip -n "$NS" link add name dummy0 up type dummy + + ip -n "$NS" link add name macvlan0 link dummy0 up type macvlan mode bridge + + ip -n "$NS" link add name vxlan0 up type vxlan id 10010 dstport 4789 +} + +protodown_basic() +{ + local dev=$1; shift + + ip -n "$NS" link set dev "$dev" protodown on + check_err $? "Failed to set protodown on" + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" "$dev" DOWN + check_err $? "Operational state is not DOWN after setting protodown" + + ip -n "$NS" link set dev "$dev" protodown off + check_err $? "Failed to set protodown off" + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" "$dev" UP + check_err $? "Operational state is not UP after clearing protodown" +} + +protodown_basic_macvlan() +{ + RET=0 + + protodown_basic macvlan0 + + log_test "Basic protodown on/off with macvlan" +} + +protodown_basic_vxlan() +{ + RET=0 + + protodown_basic vxlan0 + + log_test "Basic protodown on/off with vxlan" +} + +protodown_reasons() +{ + RET=0 + + ip -n "$NS" link set dev macvlan0 protodown on + + ip -n "$NS" link set dev macvlan0 protodown_reason 0 on + check_err $? "Failed to set protodown reason bit 0" + + # Cannot clear protodown while reasons are active. + ip -n "$NS" link set dev macvlan0 protodown off 2>/dev/null + check_fail $? "Clearing protodown succeeded with active reasons" + + ip -n "$NS" link set dev macvlan0 protodown_reason 0 off + check_err $? "Failed to clear protodown reason bit 0" + + # Can clear protodown when no reasons are active. + ip -n "$NS" link set dev macvlan0 protodown off + check_err $? "Failed to clear protodown with no active reasons" + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP + check_err $? "Operational state is not UP after clearing protodown" + + log_test "Protodown reasons" +} + +protodown_lower_toggle() +{ + RET=0 + + ip -n "$NS" link set dev macvlan0 protodown on + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 DOWN + check_err $? "Operational state is not DOWN after setting protodown" + + # Toggle carrier on the lower device. The macvlan should stay DOWN + # because protodown is on. + ip -n "$NS" link set dev dummy0 carrier off + ip -n "$NS" link set dev dummy0 carrier on + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" dummy0 UP + check_err $? "Lower device is not UP after carrier on" + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 DOWN + check_err $? "Macvlan operational state is not DOWN despite protodown" + + # Clear protodown and verify the macvlan comes back up. + ip -n "$NS" link set dev macvlan0 protodown off + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP + check_err $? "Operational state is not UP after clearing protodown" + + log_test "Protodown with lower device toggled" +} + +protodown_lower_down() +{ + RET=0 + + # Bring the lower device carrier down first. + ip -n "$NS" link set dev dummy0 carrier off + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN + check_err $? "Macvlan is not LOWERLAYERDOWN with lower carrier off" + + # Toggle protodown on and off while lower has no carrier. The macvlan + # should not transition to UP. + ip -n "$NS" link set dev macvlan0 protodown on + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN + check_err $? "Macvlan is not LOWERLAYERDOWN after setting protodown" + + ip -n "$NS" link set dev macvlan0 protodown off + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 LOWERLAYERDOWN + check_err $? "Macvlan is not LOWERLAYERDOWN after clearing protodown" + + # Bring the lower device carrier up. The macvlan should transition to + # UP. + ip -n "$NS" link set dev dummy0 carrier on + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" dummy0 UP + check_err $? "Lower device is not UP after carrier on" + + busywait "$BUSYWAIT_TIMEOUT" operstate_check "$NS" macvlan0 UP + check_err $? "Macvlan is not UP after lower device is UP" + + log_test "Protodown with lower device down" +} + +trap defer_scopes_cleanup EXIT +setup_prepare +tests_run + +exit "$EXIT_STATUS" diff --git a/tools/testing/selftests/net/rds/README.txt b/tools/testing/selftests/net/rds/README.txt index c6fe003d503bf..bac6f15a80d52 100644 --- a/tools/testing/selftests/net/rds/README.txt +++ b/tools/testing/selftests/net/rds/README.txt @@ -1,21 +1,27 @@ RDS self-tests ============== -These scripts provide a coverage test for RDS-TCP by creating two -network namespaces and running rds packets between them. A loopback -network is provisioned with optional probability of packet loss or -corruption. A workload of 50000 hashes, each 64 characters in size, -are passed over an RDS socket on this test network. A passing test means -the RDS-TCP stack was able to recover properly. The provided config.sh -can be used to compile the kernel with the necessary gcov options. The -kernel may optionally be configured to omit the coverage report as well. +These scripts provide a coverage test for RDS-TCP and RDS-RDMA (over +RoCE/RXE) by setting up two endpoints and running RDS packets between +them. The TCP path creates two network namespaces; the RDMA path uses +an RXE (soft RoCE) device backed by a veth pair. A workload of 50000 +hashes, each 64 characters in size, is passed over an RDS socket on +this test network with an optional probability of packet loss or +corruption. A passing test means the RDS stack was able to recover +properly. The provided config.sh can be used to compile the kernel +with the necessary gcov options; pass -r to also enable the kernel +configs required for the RDMA transport. The kernel may optionally be +configured to omit the coverage report as well. USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption] - [-u packet_duplcate] + [-u packet_duplicate] [-t timeout] + [-T tcp|rdma|tcp,rdma] OPTIONS: - -d Log directory. Defaults to tools/testing/selftests/net/rds/rds_logs + -d Log directory. If set, logs will be stored in the + given dir, or skipped if unset. Log dir can also be + set through the RDS_LOG_DIR env variable -l Simulates a percentage of packet loss @@ -23,11 +29,36 @@ OPTIONS: -u Simulates a percentage of packet duplication. + -t Test timeout. Defaults to tools/testing/selftests/net/rds/settings + + -T Comma-separated list of transports to test. Accepts + "tcp", "rdma", or "tcp,rdma". Defaults to "tcp". Use + config.sh -r to enable required RDMA configs + +ENV VARIABLES: + RDS_LOG_DIR Log directory. If set, logs will be stored in + the given dir, or skipped if unset. Log dir + can also be set with the -d flag. + + Use with --rwdir on the CI path to retain logs after + test compleation. Log dir end point must be within + the specified --rwdir path for logs to persist on + the host. + + SUDO_USER The user name that should be used for tcpdump + --relinquish-privileges. Set this to a user + belonging to the sudoers group to avoid drop + privilege errors with the vng 9p filesystem + which may result in empty pcaps + EXAMPLE: # Create a suitable gcov enabled .config tools/testing/selftests/net/rds/config.sh -g + # Optionally add RDMA configs (CONFIG_RDS_RDMA, CONFIG_RDMA_RXE) + tools/testing/selftests/net/rds/config.sh -r + # Alternatly create a gcov disabled .config tools/testing/selftests/net/rds/config.sh @@ -39,6 +70,8 @@ EXAMPLE: # launch the tests in a VM vng -v --rwdir ./ --run . --user root --cpus 4 -- \ - "export PYTHONPATH=tools/testing/selftests/net/; tools/testing/selftests/net/rds/run.sh" + "export PYTHONPATH=tools/testing/selftests/net/; \ + export SUDO_USER=example_user; \ + export RDS_LOG_DIR=tools/testing/selftests/net/rds/rds_logs; \ + tools/testing/selftests/net/rds/run.sh -T tcp,rdma" -An HTML coverage report will be output in tools/testing/selftests/net/rds/rds_logs/coverage/. diff --git a/tools/testing/selftests/net/rds/config.sh b/tools/testing/selftests/net/rds/config.sh index 29a79314dd60f..be0668359a070 100755 --- a/tools/testing/selftests/net/rds/config.sh +++ b/tools/testing/selftests/net/rds/config.sh @@ -10,7 +10,8 @@ CONF_FILE="" FLAGS=() GENERATE_GCOV_REPORT=0 -while getopts "gc:" opt; do +ENABLE_RDMA=0 +while getopts "gc:r" opt; do case ${opt} in g) GENERATE_GCOV_REPORT=1 @@ -18,8 +19,11 @@ while getopts "gc:" opt; do c) CONF_FILE=$OPTARG ;; + r) + ENABLE_RDMA=1 + ;; :) - echo "USAGE: config.sh [-g] [-c config]" + echo "USAGE: config.sh [-g] [-c config] [-r]" exit 1 ;; ?) @@ -58,3 +62,10 @@ scripts/config "${FLAGS[@]}" --enable CONFIG_VETH # simulate packet loss scripts/config "${FLAGS[@]}" --enable CONFIG_NET_SCH_NETEM +if [ "$ENABLE_RDMA" -eq 1 ]; then + # enable RDS over InfiniBand / RDMA (rds_rdma test) + scripts/config "${FLAGS[@]}" --enable CONFIG_INFINIBAND + scripts/config "${FLAGS[@]}" --enable CONFIG_INFINIBAND_ADDR_TRANS + scripts/config "${FLAGS[@]}" --enable CONFIG_RDMA_RXE + scripts/config "${FLAGS[@]}" --enable CONFIG_RDS_RDMA +fi diff --git a/tools/testing/selftests/net/rds/run.sh b/tools/testing/selftests/net/rds/run.sh index 897d17d1b8dbf..07af2f927a2a7 100755 --- a/tools/testing/selftests/net/rds/run.sh +++ b/tools/testing/selftests/net/rds/run.sh @@ -35,7 +35,7 @@ GCOV_CMD=gcov check_gcov_env() { if ! which "$GCOV_CMD" > /dev/null 2>&1; then - echo "Warning: Could not find gcov. " + echo "# Warning: Could not find gcov. " GENERATE_GCOV_REPORT=0 return fi @@ -48,7 +48,7 @@ check_gcov_env() GCOV_CMD=gcov-$(gcc -dumpversion) if ! which "$GCOV_CMD" > /dev/null 2>&1; then - echo "Warning: Could not find an appropriate gcov installation. \ + echo "# Warning: Could not find an appropriate gcov installation. \ gcov version must match gcc version" GENERATE_GCOV_REPORT=0 return @@ -58,11 +58,11 @@ check_gcov_env() GCOV_VER=$($GCOV_CMD -v | grep gcov | awk '{print $3}'| \ awk 'BEGIN {FS="-"}{print $1}') if [ "$GCOV_VER" != "$GCC_VER" ]; then - echo "Warning: Could not find an appropriate gcov installation. \ + echo "# Warning: Could not find an appropriate gcov installation. \ gcov version must match gcc version" GENERATE_GCOV_REPORT=0 else - echo "Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" + echo "# Warning: Mismatched gcc and gcov detected. Using $GCOV_CMD" fi fi } @@ -71,20 +71,20 @@ check_gcov_env() check_gcov_conf() { if ! grep -x "CONFIG_GCOV_PROFILE_RDS=y" "$kconfig" > /dev/null 2>&1; then - echo "INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" + echo "# INFO: CONFIG_GCOV_PROFILE_RDS should be enabled for coverage reports" GENERATE_GCOV_REPORT=0 fi if ! grep -x "CONFIG_GCOV_KERNEL=y" "$kconfig" > /dev/null 2>&1; then - echo "INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" + echo "# INFO: CONFIG_GCOV_KERNEL should be enabled for coverage reports" GENERATE_GCOV_REPORT=0 fi if grep -x "CONFIG_GCOV_PROFILE_ALL=y" "$kconfig" > /dev/null 2>&1; then - echo "INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" + echo "# INFO: CONFIG_GCOV_PROFILE_ALL should be disabled for coverage reports" GENERATE_GCOV_REPORT=0 fi if [ "$GENERATE_GCOV_REPORT" -eq 0 ]; then - echo "To enable gcov reports, please run "\ + echo "# To enable gcov reports, please run "\ "\"tools/testing/selftests/net/rds/config.sh -g\" and rebuild the kernel" else # if we have the required kernel configs, proceed to check the environment to @@ -101,6 +101,16 @@ check_conf_enabled() { exit 4 fi } + +check_rdma_conf_enabled() { + if ! grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then + echo "selftests: [SKIP] rdma transport requires $1 enabled" + echo "To enable, run " \ + "tools/testing/selftests/net/rds/config.sh -r and rebuild" + exit 4 + fi +} + check_conf_disabled() { if grep -x "$1=y" "$kconfig" > /dev/null 2>&1; then echo "selftests: [SKIP] This test requires $1 disabled" @@ -117,6 +127,28 @@ check_conf() { check_conf_disabled CONFIG_MODULES } +# Check kernel config and host environment for RDS-RDMA support. +# Exits with SKIP (4) if the user requested rdma but prerequisites +# are not met. +check_rdma_conf() +{ + case "$TRANSPORT" in + *rdma*) ;; + *) return ;; + esac + + # Kconfig will enforce CONFIG_INFINIBAND_* as dependencies + # of CONFIG_RDMA_RXE + check_rdma_conf_enabled CONFIG_RDMA_RXE + check_rdma_conf_enabled CONFIG_RDS_RDMA + + if ! which rdma > /dev/null 2>&1; then + echo "selftests: [SKIP] rdma transport requires the 'rdma'" \ + " tool (iproute2)" + exit 4 + fi +} + check_env() { if ! test -d "$obj_dir"; then @@ -150,28 +182,35 @@ check_env() fi } -LOG_DIR="$current_dir"/rds_logs -PLOSS=0 -PCORRUPT=0 -PDUP=0 +LOG_DIR="${RDS_LOG_DIR:-}" +TIMEOUT=$timeout GENERATE_GCOV_REPORT=1 -while getopts "d:l:c:u:" opt; do +TRANSPORT=tcp +FLAGS=() + +while getopts "d:l:c:u:t:T:" opt; do case ${opt} in d) LOG_DIR=${OPTARG} ;; l) - PLOSS=${OPTARG} + FLAGS+=("-l" "${OPTARG}") ;; c) - PCORRUPT=${OPTARG} + FLAGS+=("-c" "${OPTARG}") + ;; + t) + TIMEOUT=${OPTARG} ;; u) - PDUP=${OPTARG} + FLAGS+=("-u" "${OPTARG}") + ;; + T) + TRANSPORT=${OPTARG} ;; :) echo "USAGE: run.sh [-d logdir] [-l packet_loss] [-c packet_corruption]" \ - "[-u packet_duplcate] [-g]" + "[-u packet_duplicate] [-t timeout] [-T tcp|rdma|tcp,rdma]" exit 1 ;; ?) @@ -181,47 +220,79 @@ while getopts "d:l:c:u:" opt; do esac done +# Validate transport tokens +IFS=',' read -ra transports <<< "$TRANSPORT" +for t in "${transports[@]}"; do + if [ "$t" != "tcp" ] && [ "$t" != "rdma" ]; then + echo "run.sh: unknown transport '$t' (expected tcp or rdma)" + exit 1 + fi +done + +FLAGS+=("--transport" "${TRANSPORT}") check_env check_conf check_gcov_conf +check_rdma_conf + +TRACE_CMD=() +if [[ -n "$LOG_DIR" ]]; then + FLAGS+=("-d" "$LOG_DIR") + + TRACE_FILE="${LOG_DIR}/rds-strace.txt" + COVR_DIR="${LOG_DIR}/coverage/" + DMESG_FILE="${LOG_DIR}/rds-dmesg.out" + mkdir -p "$LOG_DIR" + mkdir -p "$COVR_DIR" -rm -fr "$LOG_DIR" -TRACE_FILE="${LOG_DIR}/rds-strace.txt" -COVR_DIR="${LOG_DIR}/coverage/" -mkdir -p "$LOG_DIR" -mkdir -p "$COVR_DIR" + rm -f "$TRACE_FILE" + rm -f "$DMESG_FILE" + rm -f "$LOG_DIR"/rds-*.pcap + rm -f "$COVR_DIR"/gcovr* + + echo "# Traces will be logged to ${TRACE_FILE}" + TRACE_CMD=(strace -T -tt -o "${TRACE_FILE}") +fi set +e -echo running RDS tests... -echo Traces will be logged to "$TRACE_FILE" -rm -f "$TRACE_FILE" -strace -T -tt -o "$TRACE_FILE" python3 "$(dirname "$0")/test.py" \ - --timeout "$timeout" -d "$LOG_DIR" -l "$PLOSS" -c "$PCORRUPT" -u "$PDUP" +echo "# running RDS tests..." +"${TRACE_CMD[@]}" python3 "$(dirname "$0")/test.py" "${FLAGS[@]}" -t "$TIMEOUT" test_rc=$? -dmesg > "${LOG_DIR}/dmesg.out" -if [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then - echo saving coverage data... +if [[ -n "$LOG_DIR" ]]; then + dmesg > "${DMESG_FILE}" +fi + +if [[ -n "$LOG_DIR" ]] && [ "$GENERATE_GCOV_REPORT" -eq 1 ]; then + echo "# saving coverage data..." + + # Ensure debugfs is mounted before reading gcov data. + if ! mountpoint -q /sys/kernel/debug 2>/dev/null; then + mount -t debugfs debugfs /sys/kernel/debug 2>/dev/null || true + fi + (set +x; cd /sys/kernel/debug/gcov; find ./* -name '*.gcda' | \ while read -r f do cat < "/sys/kernel/debug/gcov/$f" > "/$f" done) - echo running gcovr... + echo "# running gcovr..." gcovr -s --html-details --gcov-executable "$GCOV_CMD" --gcov-ignore-parse-errors \ - -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" + --root "${ksrc_dir}" -o "${COVR_DIR}/gcovr" "${ksrc_dir}/net/rds/" \ + > "${LOG_DIR}/gcovr.log" 2>&1 + echo "# gcovr log: ${LOG_DIR}/gcovr.log" else - echo "Coverage report will be skipped" + echo "# Coverage report will be skipped" fi if [ "$test_rc" -eq 0 ]; then - echo "PASS: Test completed successfully" + echo "# PASS: Test completed successfully" else - echo "FAIL: Test failed" + echo "# FAIL: Test failed" fi exit "$test_rc" diff --git a/tools/testing/selftests/net/rds/settings b/tools/testing/selftests/net/rds/settings index d2009a64589cc..8cb41e6a83cca 100644 --- a/tools/testing/selftests/net/rds/settings +++ b/tools/testing/selftests/net/rds/settings @@ -1 +1 @@ -timeout=400 +timeout=800 diff --git a/tools/testing/selftests/net/rds/test.py b/tools/testing/selftests/net/rds/test.py index 93e23e8b256ce..08f2a846a8ab5 100755 --- a/tools/testing/selftests/net/rds/test.py +++ b/tools/testing/selftests/net/rds/test.py @@ -1,23 +1,30 @@ #! /usr/bin/env python3 # SPDX-License-Identifier: GPL-2.0 +""" +This module provides functional testing for the net/rds component. +""" import argparse +import atexit import ctypes import errno import hashlib import os import select +import re import signal import socket import subprocess import sys -import tempfile -import shutil +import time # Allow utils module to be imported from different directory this_dir = os.path.dirname(os.path.realpath(__file__)) sys.path.append(os.path.join(this_dir, "../")) -from lib.py.utils import ip +# pylint: disable-next=wrong-import-position,import-error,no-name-in-module +from lib.py.utils import ip, cmd # noqa: E402 +# pylint: disable-next=wrong-import-position,import-error,no-name-in-module +from lib.py.ksft import ksft_pr # noqa: E402 libc = ctypes.cdll.LoadLibrary('libc.so.6') setns = libc.setns @@ -28,6 +35,36 @@ NET1 = 'net1' VETH0 = 'veth0' VETH1 = 'veth1' +tcpdump_procs = [] +tcp_addrs = [ + # we technically don't need different port numbers, but this will + # help identify traffic in the network analyzer + ('10.0.0.1', 10000), + ('10.0.0.2', 20000), +] + +# RDMA network configs +RXE_DEV0 = 'rxe0' +RXE_DEV1 = 'rxe1' + +VETH_RDMA0 = 'veth_rdma0' +VETH_RDMA1 = 'veth_rdma1' + +rdma_addrs = [ + ('10.0.0.3', 30000), + ('10.0.0.4', 30000), +] + +# send_packets flag space +OP_FLAG_TCP = 0x1 +OP_FLAG_RDMA = 0x2 + +signal_handler_label = "" + +tap_idx = 0 +nr_pass = 0 +nr_fail = 0 + # Helper function for creating a socket inside a network namespace. # We need this because otherwise RDS will detect that the two TCP # sockets are on the same interface and use the loop transport instead @@ -43,233 +80,459 @@ def netns_socket(netns, *sock_args): child = os.fork() if child == 0: - # change network namespace - with open(f'/var/run/netns/{netns}', encoding='utf-8') as f: - try: + try: + # change network namespace + with open(f'/var/run/netns/{netns}', encoding='utf-8') as f: setns(f.fileno(), 0) - except IOError as e: - print(e.errno) - print(e) + # create socket in target namespace + sock = socket.socket(*sock_args) - # create socket in target namespace - sock = socket.socket(*sock_args) + # send resulting socket to parent + socket.send_fds(u0, [], [sock.fileno()]) - # send resulting socket to parent - socket.send_fds(u0, [], [sock.fileno()]) - - sys.exit(0) + os._exit(0) + except BaseException: + os._exit(1) # receive socket from child _, fds, _, _ = socket.recv_fds(u1, 0, 1) - os.waitpid(child, 0) + _, status = os.waitpid(child, 0) u0.close() u1.close() + if not os.WIFEXITED(status) or os.WEXITSTATUS(status) != 0: + raise RuntimeError( + f"netns_socket child failed in netns {netns} (status={status})") return socket.fromfd(fds[0], *sock_args) +def send_burst(socks, ip_addrs, snd_hashes, nr_sent, nr_total): + """Send until blocked or nr_total reached. Return updated nr_sent.""" + + while nr_sent < nr_total: + data = hashlib.sha256( + f'packet {nr_sent}'.encode('utf-8')).hexdigest().encode('utf-8') + # pseudo-random send/receive pattern + snd_idx = nr_sent % 2 + rcv_idx = 1 - (nr_sent % 3) % 2 + + snd = socks[snd_idx] + rcv = socks[rcv_idx] + try: + snd.sendto(data, ip_addrs[rcv_idx]) + except BlockingIOError: + return nr_sent + except OSError as e: + if e.errno in (errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE): + return nr_sent + raise + snd_hashes.setdefault((snd.fileno(), rcv.fileno()), + hashlib.sha256()).update(f'<{data}>'.encode('utf-8')) + nr_sent += 1 + return nr_sent + +def recv_burst(epoll, socks, ip_addrs, rcv_hashes, nr_rcv): + """Drain whatever's readable from epoll. Return updated nr_recv.""" + for filen, evntmask in epoll.poll(): + if not evntmask & select.EPOLLRDNORM: + continue + rcv = next(s for s in socks if s.fileno() == filen) + while True: + try: + data, adr = rcv.recvfrom(1024) + except BlockingIOError: + break + snd_idx = ip_addrs.index(adr) + snd = socks[snd_idx] + rcv_hashes.setdefault((snd.fileno(), rcv.fileno()), + hashlib.sha256()).update(f'<{data}>'.encode('utf-8')) + nr_rcv += 1 + return nr_rcv + +def check_info(socks): + """ + Check all rds info pages for errors + + :param socks: list of sockets to check + """ + + # the Python socket module doesn't know these + rds_info_first = 10000 + rds_info_last = 10017 + + nr_success = 0 + nr_error = 0 + + for sock in socks: + for optname in range(rds_info_first, rds_info_last + 1): + # Sigh, the Python socket module doesn't allow us to pass + # buffer lengths greater than 1024 for some reason. RDS + # wants multiple pages. + try: + sock.getsockopt(socket.SOL_RDS, optname, 1024) + nr_success = nr_success + 1 + except OSError as e: + nr_error = nr_error + 1 + if e.errno == errno.ENOSPC: + # ignore + pass + + ksft_pr(f"getsockopt(): {nr_success}/{nr_error}") + +def verify_hashes(snd_hashes, rcv_hashes): + """Compare send/recv hashes per (sender, receiver) pair.""" + for key, snd_hash in snd_hashes.items(): + rcv_hash = rcv_hashes.get(key) + if rcv_hash is None: + ksft_pr("FAIL: No data received") + return 1 + if snd_hash.hexdigest() != rcv_hash.hexdigest(): + ksft_pr("FAIL: Send/recv mismatch") + ksft_pr("hash expected:", snd_hash.hexdigest()) + ksft_pr("hash received:", rcv_hash.hexdigest()) + return 1 + ksft_pr(f"{key[0]}/{key[1]}: ok") + return 0 + +def snd_rcv_packets(env): + """ + Send packets on the given network interfaces + + :param env: transport-environment dict for setup_tcp() / setup_rdma(). + "addrs": list of (ip, port) tuples matching the sockets + "netns": list of netns names for TCP or None for RDMA + "flags": OP_FLAG_TCP or OP_FLAG_RDMA, selects sockets + """ + + addrs = env["addrs"] + netns_list = env["netns"] + flags = env.get("flags", 0) + + if (flags & OP_FLAG_TCP) and (flags & OP_FLAG_RDMA): + raise RuntimeError(f"Invalid transport flag sets multiple transports: {flags}") + + if flags & OP_FLAG_TCP: + sockets = [ + netns_socket(netns_list[0], socket.AF_RDS, socket.SOCK_SEQPACKET), + netns_socket(netns_list[1], socket.AF_RDS, socket.SOCK_SEQPACKET), + ] + elif flags & OP_FLAG_RDMA: + sockets = [ + socket.socket(socket.AF_RDS, socket.SOCK_SEQPACKET), + socket.socket(socket.AF_RDS, socket.SOCK_SEQPACKET), + ] + else: + raise RuntimeError(f"Invalid transport flag sets no transports: {flags}") + + for s, addr in zip(sockets, addrs): + s.bind(addr) + s.setblocking(0) + + send_hashes = {} + recv_hashes = {} + + ep = select.epoll() + + for s in sockets: + ep.register(s, select.EPOLLRDNORM) + + num_packets = 50000 + nr_send = 0 + nr_recv = 0 + + while nr_send < num_packets: + + # Send as much as we can without blocking + ksft_pr("sending...", nr_send, nr_recv) + nr_send = send_burst(sockets, addrs, send_hashes, nr_send, num_packets) + + # Receive as much as we can without blocking + ksft_pr("receiving...", nr_send, nr_recv) + while nr_recv < nr_send: + nr_recv = recv_burst(ep, sockets, addrs, recv_hashes, nr_recv) + + # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() + if netns_list: + for net in netns_list: + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") + ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + + ksft_pr("done", nr_send, nr_recv) + + check_info(sockets) + + # We're done sending and receiving stuff, now let's check if what + # we received is what we sent. + rc = verify_hashes(send_hashes, recv_hashes) + + ep.close() + for s in sockets: + s.close() + + return rc + +def stop_pcaps(): + """Stop tcpdump processes. + + We use pop() here to drain the list in the event that the test + completes after the signal handler is fired. List will be empty + if logdir is not set + """ + + if not tcpdump_procs: + return + + ksft_pr("Stopping network packet captures") + while tcpdump_procs: + proc = tcpdump_procs.pop() + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() + proc.wait() + def signal_handler(_sig, _frame): """ Test timed out signal handler """ - print('Test timed out') + ksft_pr(f"Test timed out: {signal_handler_label}") + print(f"not ok {tap_idx} rds selftest {signal_handler_label}") sys.exit(1) -#Parse out command line arguments. We take an optional -# timeout parameter and an optional log output folder -parser = argparse.ArgumentParser(description="init script args", - formatter_class=argparse.ArgumentDefaultsHelpFormatter) -parser.add_argument("-d", "--logdir", action="store", - help="directory to store logs", default="/tmp") -parser.add_argument('--timeout', help="timeout to terminate hung test", - type=int, default=0) -parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", - type=int, default=0) -parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", - type=int, default=0) -parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", - type=int, default=0) -args = parser.parse_args() -logdir=args.logdir -PACKET_LOSS=str(args.loss)+'%' -PACKET_CORRUPTION=str(args.corruption)+'%' -PACKET_DUPLICATE=str(args.duplicate)+'%' +def setup_tcp(): + """ + Configure tcp network + """ -ip(f"netns add {NET0}") -ip(f"netns add {NET1}") -ip("link add type veth") + # clean up any leftovers from a previously interrupted run + teardown_tcp() -addrs = [ - # we technically don't need different port numbers, but this will - # help identify traffic in the network analyzer - ('10.0.0.1', 10000), - ('10.0.0.2', 20000), -] + ip(f"netns add {NET0}") + ip(f"netns add {NET1}") + ip("link add type veth") -# move interfaces to separate namespaces so they can no longer be -# bound directly; this prevents rds from switching over from the tcp -# transport to the loop transport. -ip(f"link set {VETH0} netns {NET0} up") -ip(f"link set {VETH1} netns {NET1} up") + # Move TCP interfaces into separate namespaces so they can no longer be + # bound directly; this prevents rds from switching over from the tcp + # transport to the loop transport. + ip(f"link set {VETH0} netns {NET0} up") + ip(f"link set {VETH1} netns {NET1} up") + # add addresses + ip(f"-n {NET0} addr add {tcp_addrs[0][0]}/32 dev {VETH0}") + ip(f"-n {NET1} addr add {tcp_addrs[1][0]}/32 dev {VETH1}") + # add routes + ip(f"-n {NET0} route add {tcp_addrs[1][0]}/32 dev {VETH0}") + ip(f"-n {NET1} route add {tcp_addrs[0][0]}/32 dev {VETH1}") -# add addresses -ip(f"-n {NET0} addr add {addrs[0][0]}/32 dev {VETH0}") -ip(f"-n {NET1} addr add {addrs[1][0]}/32 dev {VETH1}") + # sanity check that our two interfaces/addresses are correctly set up + # and communicating by doing a single ping + ip(f"netns exec {NET0} ping -c 1 {tcp_addrs[1][0]}") -# add routes -ip(f"-n {NET0} route add {addrs[1][0]}/32 dev {VETH0}") -ip(f"-n {NET1} route add {addrs[0][0]}/32 dev {VETH1}") + # Start a packet capture on each network + if logdir is not None: + for netn in [NET0, NET1]: + pcap = logdir+'/rds-'+netn+'.pcap' -# sanity check that our two interfaces/addresses are correctly set up -# and communicating by doing a single ping -ip(f"netns exec {NET0} ping -c 1 {addrs[1][0]}") + tcpdump_cmd = ['ip', 'netns', 'exec', netn, '/usr/sbin/tcpdump'] + sudo_user = os.environ.get('SUDO_USER') + if sudo_user: + tcpdump_cmd.extend(['-Z', sudo_user]) + tcpdump_cmd.extend(['-i', 'any', '-w', pcap]) -# Start a packet capture on each network -tcpdump_procs = [] -for net in [NET0, NET1]: - pcap = logdir+'/'+net+'.pcap' - fd, pcap_tmp = tempfile.mkstemp(suffix=".pcap", prefix=f"{net}-", dir="/tmp") - p = subprocess.Popen( - ['ip', 'netns', 'exec', net, - '/usr/sbin/tcpdump', '-i', 'any', '-w', pcap_tmp]) - tcpdump_procs.append((p, pcap_tmp, pcap, fd)) + # pylint: disable-next=consider-using-with + p = subprocess.Popen(tcpdump_cmd, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + tcpdump_procs.append(p) -# simulate packet loss, duplication and corruption -for net, iface in [(NET0, VETH0), (NET1, VETH1)]: - ip(f"netns exec {net} /usr/sbin/tc qdisc add dev {iface} root netem \ - corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate \ - {PACKET_DUPLICATE}") + # simulate packet loss, duplication and corruption + for netn, iface in [(NET0, VETH0), (NET1, VETH1)]: + ip(f"netns exec {netn} /usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate \ + {PACKET_DUPLICATE}") -# add a timeout -if args.timeout > 0: - signal.alarm(args.timeout) - signal.signal(signal.SIGALRM, signal_handler) +def teardown_tcp(): + """ + Tear down the tcp network configured by setup_tcp(). -sockets = [ - netns_socket(NET0, socket.AF_RDS, socket.SOCK_SEQPACKET), - netns_socket(NET1, socket.AF_RDS, socket.SOCK_SEQPACKET), -] + Removing the namespaces also removes the veth pair, addresses, + routes, and netem qdisc that live inside them. fail=False so + this is safe to call in error paths after a partial or complete setup. + """ + cmd(f"ip netns del {NET0}", fail=False) + cmd(f"ip netns del {NET1}", fail=False) -for s, addr in zip(sockets, addrs): - s.bind(addr) - s.setblocking(0) +def get_iface_mac(iface): + """Return the MAC address of a local network interface.""" + out = subprocess.check_output(['ip', 'link', 'show', iface], text=True) + mac = re.search(r'link/ether\s+([0-9a-f:]+)', out) + if not mac: + raise RuntimeError(f"Cannot determine MAC address of {iface}") + return mac.group(1) -fileno_to_socket = { - s.fileno(): s for s in sockets -} +def setup_rdma(): + """ + Configure rdma network + """ -addr_to_socket = dict(zip(addrs, sockets)) + # remove links left over by previously interrupted run. + teardown_rdma() -socket_to_addr = { - s: addr for addr, s in zip(addrs, sockets) -} + # use call here since modprobe may fail if the rdma_rxe + # module is built-in + subprocess.call(['modprobe', 'rdma_rxe'], + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) -send_hashes = {} -recv_hashes = {} + ip(f"link add {VETH_RDMA0} type veth peer name {VETH_RDMA1}") -ep = select.epoll() + ip(f"link set {VETH_RDMA0} up") + ip(f"link set {VETH_RDMA1} up") -for s in sockets: - ep.register(s, select.EPOLLRDNORM) + # Since both addresses are in the same namespace, the source address + # is always local, so enable accept_local + cmd(f"/usr/sbin/sysctl -q net.ipv4.conf.{VETH_RDMA0}.accept_local=1") + cmd(f"/usr/sbin/sysctl -q net.ipv4.conf.{VETH_RDMA1}.accept_local=1") -NUM_PACKETS = 50000 -nr_send = 0 -nr_recv = 0 + # Reverse path filters must be disabled so that the local routes don't + # cause RPF failures. + cmd(f"/usr/sbin/sysctl -q net.ipv4.conf.{VETH_RDMA0}.rp_filter=0") + cmd(f"/usr/sbin/sysctl -q net.ipv4.conf.{VETH_RDMA1}.rp_filter=0") -while nr_send < NUM_PACKETS: - # Send as much as we can without blocking - print("sending...", nr_send, nr_recv) - while nr_send < NUM_PACKETS: - send_data = hashlib.sha256( - f'packet {nr_send}'.encode('utf-8')).hexdigest().encode('utf-8') + # add addresses + ip(f"addr add {rdma_addrs[0][0]}/32 dev {VETH_RDMA0}") + ip(f"addr add {rdma_addrs[1][0]}/32 dev {VETH_RDMA1}") - # pseudo-random send/receive pattern - sender = sockets[nr_send % 2] - receiver = sockets[1 - (nr_send % 3) % 2] + # add routes + ip(f"route add {rdma_addrs[1][0]}/32 dev {VETH_RDMA0}") + ip(f"route add {rdma_addrs[0][0]}/32 dev {VETH_RDMA1}") - try: - sender.sendto(send_data, socket_to_addr[receiver]) - send_hashes.setdefault((sender.fileno(), receiver.fileno()), - hashlib.sha256()).update(f'<{send_data}>'.encode('utf-8')) - nr_send = nr_send + 1 - except BlockingIOError as e: - break - except OSError as e: - if e.errno in [errno.ENOBUFS, errno.ECONNRESET, errno.EPIPE]: - break - raise + # ARP will not resolve neighbor IPs on /32 routes without a subnet. + # Avoid this by adding neighbors directly so RDMA CM can populate path + # records with correct mac addrs without waiting for the ARP. + mac0 = get_iface_mac(VETH_RDMA0) + mac1 = get_iface_mac(VETH_RDMA1) + ip(f"neigh add {rdma_addrs[1][0]} lladdr {mac1} dev {VETH_RDMA0} nud permanent") + ip(f"neigh add {rdma_addrs[0][0]} lladdr {mac0} dev {VETH_RDMA1} nud permanent") - # Receive as much as we can without blocking - print("receiving...", nr_send, nr_recv) - while nr_recv < nr_send: - for fileno, eventmask in ep.poll(): - receiver = fileno_to_socket[fileno] + cmd(f'rdma link add {RXE_DEV0} type rxe netdev {VETH_RDMA0}') + cmd(f'rdma link add {RXE_DEV1} type rxe netdev {VETH_RDMA1}') - if eventmask & select.EPOLLRDNORM: - while True: - try: - recv_data, address = receiver.recvfrom(1024) - sender = addr_to_socket[address] - recv_hashes.setdefault((sender.fileno(), - receiver.fileno()), hashlib.sha256()).update( - f'<{recv_data}>'.encode('utf-8')) - nr_recv = nr_recv + 1 - except BlockingIOError as e: - break + time.sleep(1) # allow RXE devices to initialise - # exercise net/rds/tcp.c:rds_tcp_sysctl_reset() - for net in [NET0, NET1]: - ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_rcvbuf=10000") - ip(f"netns exec {net} /usr/sbin/sysctl net.rds.tcp.rds_tcp_sndbuf=10000") + # Start a packet capture on each network + if logdir is not None: + for iface in [VETH_RDMA0, VETH_RDMA1]: + pcap = logdir+'/rds-roce-'+iface+'.pcap' -print("done", nr_send, nr_recv) + tcpdump_cmd = ['/usr/sbin/tcpdump'] + sudo_user = os.environ.get('SUDO_USER') + if sudo_user: + tcpdump_cmd.extend(['-Z', sudo_user]) + tcpdump_cmd.extend(['-i', iface, '-w', pcap]) -# the Python socket module doesn't know these -RDS_INFO_FIRST = 10000 -RDS_INFO_LAST = 10017 + # pylint: disable-next=consider-using-with + p = subprocess.Popen(tcpdump_cmd, + stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + tcpdump_procs.append(p) -nr_success = 0 -nr_error = 0 + # simulate packet loss, duplication and corruption + for iface in [VETH_RDMA0, VETH_RDMA1]: + cmd(f"/usr/sbin/tc qdisc add dev {iface} root netem \ + corrupt {PACKET_CORRUPTION} loss {PACKET_LOSS} duplicate \ + {PACKET_DUPLICATE}") -for s in sockets: - for optname in range(RDS_INFO_FIRST, RDS_INFO_LAST + 1): - # Sigh, the Python socket module doesn't allow us to pass - # buffer lengths greater than 1024 for some reason. RDS - # wants multiple pages. - try: - s.getsockopt(socket.SOL_RDS, optname, 1024) - nr_success = nr_success + 1 - except OSError as e: - nr_error = nr_error + 1 - if e.errno == errno.ENOSPC: - # ignore - pass +def teardown_rdma(): + """ + Tear down the rdma network configured by setup_rdma(). + """ + + # remove links left over by previously interrupted run. + cmd(f'rdma link del {RXE_DEV0}', fail=False) + cmd(f'rdma link del {RXE_DEV1}', fail=False) + cmd(f'ip link del {VETH_RDMA0}', fail=False) + + +#Parse out command line arguments. We take an optional +# timeout parameter and an optional log output folder +parser = argparse.ArgumentParser(description="init script args", + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument("-d", "--logdir", action="store", + help="directory to store logs", default=None) +parser.add_argument("-T", "--transport", default="tcp", + help="Comma-separated list of transports to test: " + "tcp, rdma, or tcp,rdma. Each matching test " + "is run once per transport. " + "'rdma' requires CONFIG_RDS_RDMA and rdma_rxe.") +parser.add_argument('-t', '--timeout', help="timeout to terminate hung test", + type=int, default=0) +parser.add_argument('-l', '--loss', help="Simulate tcp packet loss", + type=int, default=0) +parser.add_argument('-c', '--corruption', help="Simulate tcp packet corruption", + type=int, default=0) +parser.add_argument('-u', '--duplicate', help="Simulate tcp packet duplication", + type=int, default=0) +args = parser.parse_args() +logdir=args.logdir +PACKET_LOSS=str(args.loss)+'%' +PACKET_CORRUPTION=str(args.corruption)+'%' +PACKET_DUPLICATE=str(args.duplicate)+'%' + +# check transport is either tcp or rdma +transports = [t.strip() for t in args.transport.split(',')] +for t in transports: + if t not in ('tcp', 'rdma'): + raise SystemExit(f"test.py: unknown transport: {t!r}") + +# Register stop_pcaps before any network setups so that any partially setup +# tcpdumps are still cleaned up on error +atexit.register(stop_pcaps) + +# Set up all requested transports upfront so network plumbing is +# ready before any test runs. +transport_envs = {} +FLAGS = 0 +if 'tcp' in transports: + # Register cleanups before setups to handle partial setups that error'd out + atexit.register(teardown_tcp) + setup_tcp() + transport_envs['tcp'] = { + 'addrs': tcp_addrs, + 'netns': [NET0, NET1], + 'flags': FLAGS | OP_FLAG_TCP, + } + +if 'rdma' in transports: + atexit.register(teardown_rdma) + setup_rdma() + transport_envs['rdma'] = { + 'addrs': rdma_addrs, + 'netns': None, + 'flags': FLAGS | OP_FLAG_RDMA, + } -print(f"getsockopt(): {nr_success}/{nr_error}") +print("TAP version 13") +print(f"1..{len(transport_envs)}") -print("Stopping network packet captures") -for p, pcap_tmp, pcap, fd in tcpdump_procs: - p.terminate() - p.wait() - os.close(fd) - shutil.move(pcap_tmp, pcap) +for transport, tenv in transport_envs.items(): + tap_idx += 1 -# We're done sending and receiving stuff, now let's check if what -# we received is what we sent. -for (sender, receiver), send_hash in send_hashes.items(): - recv_hash = recv_hashes.get((sender, receiver)) + # add a timeout + if args.timeout > 0: + signal_handler_label = transport + signal.alarm(args.timeout) + signal.signal(signal.SIGALRM, signal_handler) - if recv_hash is None: - print("FAIL: No data received") - sys.exit(1) + ret = snd_rcv_packets(tenv) - if send_hash.hexdigest() != recv_hash.hexdigest(): - print("FAIL: Send/recv mismatch") - print("hash expected:", send_hash.hexdigest()) - print("hash received:", recv_hash.hexdigest()) - sys.exit(1) + # cancel timeout + signal.alarm(0) - print(f"{sender}/{receiver}: ok") + if ret == 0: + ksft_pr("Success") + print(f"ok {tap_idx} rds selftest {transport}") + nr_pass += 1 + else: + print(f"not ok {tap_idx} rds selftest {transport}") + nr_fail += 1 -print("Success") -sys.exit(0) +ksft_pr(f"Totals: pass:{nr_pass} fail:{nr_fail} skip:0") +sys.exit(1 if nr_fail else 0) diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh deleted file mode 100755 index 5e861ad32a42e..0000000000000 --- a/tools/testing/selftests/net/so_txtime.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 -# -# Regression tests for the SO_TXTIME interface - -set -e - -readonly ksft_skip=4 -readonly DEV="veth0" -readonly BIN="./so_txtime" - -readonly RAND="$(mktemp -u XXXXXX)" -readonly NSPREFIX="ns-${RAND}" -readonly NS1="${NSPREFIX}1" -readonly NS2="${NSPREFIX}2" - -readonly SADDR4='192.168.1.1' -readonly DADDR4='192.168.1.2' -readonly SADDR6='fd::1' -readonly DADDR6='fd::2' - -cleanup() { - ip netns del "${NS2}" - ip netns del "${NS1}" -} - -trap cleanup EXIT - -# Create virtual ethernet pair between network namespaces -ip netns add "${NS1}" -ip netns add "${NS2}" - -ip link add "${DEV}" netns "${NS1}" type veth \ - peer name "${DEV}" netns "${NS2}" - -# Bring the devices up -ip -netns "${NS1}" link set "${DEV}" up -ip -netns "${NS2}" link set "${DEV}" up - -# Set fixed MAC addresses on the devices -ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02 -ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06 - -# Add fixed IP addresses to the devices -ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}" -ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" -ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad -ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad - -run_test() { - local readonly IP="$1" - local readonly CLOCK="$2" - local readonly TXARGS="$3" - local readonly RXARGS="$4" - - if [[ "${IP}" == "4" ]]; then - local readonly SADDR="${SADDR4}" - local readonly DADDR="${DADDR4}" - elif [[ "${IP}" == "6" ]]; then - local readonly SADDR="${SADDR6}" - local readonly DADDR="${DADDR6}" - else - echo "Invalid IP version ${IP}" - exit 1 - fi - - local readonly START="$(date +%s%N --date="+ 0.1 seconds")" - - ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & - ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" - wait "$!" -} - -do_test() { - run_test $@ - [ $? -ne 0 ] && ret=1 -} - -do_fail_test() { - run_test $@ - [ $? -eq 0 ] && ret=1 -} - -ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq -set +e -ret=0 -do_test 4 mono a,-1 a,-1 -do_test 6 mono a,0 a,0 -do_test 6 mono a,10 a,10 -do_test 4 mono a,10,b,20 a,10,b,20 -do_test 6 mono a,20,b,10 b,20,a,20 - -if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then - do_fail_test 4 tai a,-1 a,-1 - do_fail_test 6 tai a,0 a,0 - do_test 6 tai a,10 a,10 - do_test 4 tai a,10,b,20 a,10,b,20 - do_test 6 tai a,20,b,10 b,10,a,20 -else - echo "tc ($(tc -V)) does not support qdisc etf. skipping" - [ $ret -eq 0 ] && ret=$ksft_skip -fi - -if [ $ret -eq 0 ]; then - echo OK. All tests passed -elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then - echo "Ignoring errors due to slow environment" 1>&2 - ret=0 -fi -exit $ret diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config index f221485123654..1b120bfd89c40 100644 --- a/tools/testing/selftests/net/tcp_ao/config +++ b/tools/testing/selftests/net/tcp_ao/config @@ -1,7 +1,3 @@ -CONFIG_CRYPTO_CMAC=y -CONFIG_CRYPTO_HMAC=y -CONFIG_CRYPTO_RMD160=y -CONFIG_CRYPTO_SHA1=y CONFIG_IPV6=y CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_L3_MASTER_DEV=y diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c index 69d9a7a05d5c1..d86bb380b79f7 100644 --- a/tools/testing/selftests/net/tcp_ao/key-management.c +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -380,31 +380,6 @@ static void check_listen_socket(void) close(sk); } -static const char *fips_fpath = "/proc/sys/crypto/fips_enabled"; -static bool is_fips_enabled(void) -{ - static int fips_checked = -1; - FILE *fenabled; - int enabled; - - if (fips_checked >= 0) - return !!fips_checked; - if (access(fips_fpath, R_OK)) { - if (errno != ENOENT) - test_error("Can't open %s", fips_fpath); - fips_checked = 0; - return false; - } - fenabled = fopen(fips_fpath, "r"); - if (!fenabled) - test_error("Can't open %s", fips_fpath); - if (fscanf(fenabled, "%d", &enabled) != 1) - test_error("Can't read from %s", fips_fpath); - fclose(fenabled); - fips_checked = !!enabled; - return !!fips_checked; -} - struct test_key { char password[TCP_AO_MAXKEYLEN]; const char *alg; @@ -430,14 +405,7 @@ struct key_collection { static struct key_collection collection; #define TEST_MAX_MACLEN 16 -const char *test_algos[] = { - "cmac(aes128)", - "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)", - "hmac(sha224)", "hmac(sha3-512)", - /* only if !CONFIG_FIPS */ -#define TEST_NON_FIPS_ALGOS 2 - "hmac(rmd160)", "hmac(md5)" -}; +const char *test_algos[] = { "cmac(aes128)", "hmac(sha1)", "hmac(sha256)" }; const unsigned int test_maclens[] = { 1, 4, 12, 16 }; #define MACLEN_SHIFT 2 #define ALGOS_SHIFT 4 @@ -452,7 +420,7 @@ static unsigned int make_mask(unsigned int shift, unsigned int prev_shift) static void init_key_in_collection(unsigned int index, bool randomized) { struct test_key *key = &collection.keys[index]; - unsigned int algos_nr, algos_index; + unsigned int algos_index; /* Same for randomized and non-randomized test flows */ key->client_keyid = index; @@ -474,10 +442,7 @@ static void init_key_in_collection(unsigned int index, bool randomized) key->maclen = test_maclens[index & make_mask(shift, 0)]; algos_index = index & make_mask(ALGOS_SHIFT, shift); } - algos_nr = ARRAY_SIZE(test_algos); - if (is_fips_enabled()) - algos_nr -= TEST_NON_FIPS_ALGOS; - key->alg = test_algos[algos_index % algos_nr]; + key->alg = test_algos[algos_index % ARRAY_SIZE(test_algos)]; } static int init_default_key_collection(unsigned int nr_keys, bool randomized) diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index 9067197c90550..e9ed0d7509962 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -56,6 +56,12 @@ TESTS=" neigh_suppress_uc_ns neigh_vlan_suppress_arp neigh_vlan_suppress_ns + neigh_suppress_arp_probe + neigh_suppress_dad_ns + neigh_forward_grat_arp + neigh_forward_grat_na + neigh_vlan_forward_grat_arp + neigh_vlan_forward_grat_na " VERBOSE=0 PAUSE_ON_FAIL=no @@ -74,7 +80,8 @@ log_test() printf "TEST: %-60s [ OK ]\n" "${msg}" nsuccess=$((nsuccess+1)) else - ret=1 + # shellcheck disable=SC2154 + ret=$(ksft_exit_status_merge "$ret" "$ksft_fail") nfail=$((nfail+1)) printf "TEST: %-60s [FAIL]\n" "${msg}" if [ "$VERBOSE" = "1" ]; then @@ -97,6 +104,7 @@ log_test() fi [ "$VERBOSE" = "1" ] && echo + return 0 } run_cmd() @@ -134,6 +142,15 @@ tc_check_packets() [[ $pkts == $count ]] } +neigh_forward_grat_check() +{ + if ! bridge link help 2>&1 | grep -q "neigh_forward_grat"; then + echo "SKIP: iproute2 bridge too old, missing gratuitous ARP/unsolicited NA forwarding control support" + # shellcheck disable=SC2154 + return "$ksft_skip" + fi +} + ################################################################################ # Setup @@ -561,6 +578,17 @@ icmpv6_header_get() echo $p } +icmpv6_na_header_get() +{ + local csum=$1; shift + local tip=$1; shift + + # Type 136 (Neighbor Advertisement), hex format, Override flag set, + # Solicited flag clear (unsolicited NA). + # ICMPv6.type : ICMPv6.code : ICMPv6.checksum : Flags : Target Address + echo "88:00:$csum:20:00:00:00:$tip:" +} + neigh_suppress_uc_ns_common() { local vid=$1; shift @@ -875,6 +903,439 @@ neigh_vlan_suppress_ns() log_test $? 0 "NS suppression (VLAN $vid2)" } +neigh_suppress_arp_probe() +{ + local vid=10 + local tip=192.0.2.2 + local h2_mac + + echo + echo "Per-port ARP probe suppression" + echo "------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip 0.0.0.0 arp_op request action pass" + + # Initial state - check that ARP probes are not suppressed. + run_cmd "ip netns exec $h1 arping -D -q -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "ARP probe suppression" + + # Enable neighbor suppression and check that nothing changes. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 arping -D -q -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "ARP probe suppression" + + # Install FDB and a neighbor and check that ARP probes are suppressed. + h2_mac=$(ip -n "$h2" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + log_test $? 0 "FDB and neighbor entry installation" + + run_cmd "ip netns exec $h1 arping -D -q -c 1 -w 5 -I eth0.$vid $tip" + log_test $? 1 "arping" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "ARP probe suppression" + + # Remove the neighbor entry and check that ARP probes are not suppressed. + run_cmd "ip -n $sw1 neigh del $tip dev br0.$vid" + log_test $? 0 "neighbor removal" + + run_cmd "ip netns exec $h1 arping -D -q -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 3 + log_test $? 0 "ARP probe suppression" + + # Disable neighbor suppression. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 arping -D -q -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 4 + log_test $? 0 "ARP probe suppression" +} + +neigh_suppress_dad_ns() +{ + local vid=10 + local tip=2001:db8:1::99 + local mcast=ff02::1:ff00:99 + local dmac=33:33:ff:00:00:99 + local full_tip=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:99 + local csum="4b:bc" + local smac + local tmac + + echo + echo "Per-port DAD NS suppression" + echo "---------------------------" + + smac=$(ip -n "$h1" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $mcast src_ip :: type 135 code 0 action pass" + + # Initial state - check that DAD NS are not suppressed. + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A :: -B $mcast -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum" "$full_tip") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "DAD NS suppression" + + # Enable neighbor suppression and check that nothing changes. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A :: -B $mcast -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum" "$full_tip") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "DAD NS suppression" + + # Install FDB and a neighbor and check that DAD NS are suppressed + # and that a proxy NA is sent back to h1. + tmac=$(ip -n "$h2" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 -6 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid" + log_test $? 0 "FDB and neighbor entry installation" + + run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact" + run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip ff02::1 src_ip $tip type 136 code 0 action pass" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A :: -B $mcast -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum" "$full_tip") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "DAD NS suppression" + tc_check_packets "$h1" "dev eth0.$vid ingress" 101 1 + log_test $? 0 "DAD NS proxy NA reply" + + # Remove the neighbor entry and check that DAD NS are not suppressed. + run_cmd "ip -n $sw1 -6 neigh del $tip dev br0.$vid" + log_test $? 0 "neighbor removal" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A :: -B $mcast -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum" "$full_tip") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 3 + log_test $? 0 "DAD NS suppression" + + # Disable neighbor suppression. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + log_test $? 0 "\"neigh_suppress\" is off" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A :: -B $mcast -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum" "$full_tip") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 4 + log_test $? 0 "DAD NS suppression" +} + +neigh_forward_grat_arp() +{ + local vid=10 + local sip=192.0.2.1 + local tip=$sip + local h2_ip=192.0.2.2 + local h2_mac + + neigh_forward_grat_check || return $? + + echo + echo "Gratuitous ARP forwarding" + echo "-------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $h2_ip arp_sip $sip arp_op request action pass" + + h2_mac=$(ip -n "$h2" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $h2_ip lladdr $h2_mac nud permanent dev br0.$vid" + + # Enable neighbor suppression. Gratuitous ARP should be suppressed by + # default (neigh_forward_grat defaults to off). + run_cmd "ip -n $sw1 link set dev vx0 type bridge_slave neigh_suppress on" + run_cmd "ip -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + # Send gratuitous ARP (sip == tip) and check it's suppressed. + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 0 + log_test $? 0 "Gratuitous ARP suppression" + + # Explicitly enable neigh_forward_grat and verify gratuitous ARP is + # now forwarded. + run_cmd "ip -n $sw1 link set dev vx0 type bridge_slave neigh_forward_grat on" + run_cmd "ip -n $sw1 -d link show dev vx0 | grep \"neigh_forward_grat on\"" + log_test $? 0 "\"neigh_forward_grat\" is on" + + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Gratuitous ARP forwarding" + + # Verify that regular (non-gratuitous) ARP requests are still + # suppressed when neigh_forward_grat is enabled. + run_cmd "ip netns exec $h1 arping -c 1 -w 5 -I eth0.$vid $h2_ip" + tc_check_packets "$sw1" "dev vx0 egress" 102 0 + log_test $? 0 "Regular ARP suppression with \"neigh_forward_grat\" on" + + # Disable neigh_forward_grat and verify suppression resumes. + run_cmd "ip -n $sw1 link set dev vx0 type bridge_slave neigh_forward_grat off" + run_cmd "ip -n $sw1 -d link show dev vx0 | grep \"neigh_forward_grat off\"" + log_test $? 0 "\"neigh_forward_grat\" is off" + + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid $tip" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Gratuitous ARP suppression" +} + +# neigh_forward_grat_arp() uses 'ip link' interface, and neigh_forward_grat_na() +# uses 'bridge link' interface to exercise both paths. +neigh_forward_grat_na() +{ + local vid=10 + local saddr=2001:db8:1::1 + local daddr=ff02::1 + local h2_addr=2001:db8:1::2 + local h2_maddr=ff02::1:ff00:2 + local full_addr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01 + local h2_full_addr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum="fd:32" + local csum_ns="1f:2f" + local dmac=33:33:00:00:00:01 + local h2_dmac=33:33:ff:00:00:02 + local h2_mac + local smac + + neigh_forward_grat_check || return $? + + echo + echo "Unsolicited NA forwarding" + echo "-------------------------" + + smac=$(ip -n "$h1" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $daddr src_ip $saddr type 136 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $h2_maddr src_ip $saddr type 135 code 0 action pass" + + h2_mac=$(ip -n "$h2" -j -p link show eth0."$vid" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + run_cmd "ip -n $sw1 neigh replace $saddr lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $h2_addr lladdr $h2_mac nud permanent dev br0.$vid" + + # Enable neighbor suppression. Unsolicited NA should be suppressed by + # default (neigh_forward_grat defaults to off). + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on" + + # Send unsolicited NA and check it's suppressed. + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A $saddr -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum" "$full_addr") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 0 + log_test $? 0 "Unsolicited NA suppression" + + # Explicitly enable neigh_forward_grat and verify unsolicited NA is + # now forwarded. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_forward_grat on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_forward_grat on\"" + log_test $? 0 "\"neigh_forward_grat\" is on" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A $saddr -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum" "$full_addr") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Unsolicited NA forwarding" + + # Verify that solicited NS messages are still suppressed when + # neigh_forward_grat is enabled. + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $h2_dmac -A $saddr -B $h2_maddr -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum_ns" "$h2_full_addr") -q" + tc_check_packets "$sw1" "dev vx0 egress" 102 0 + log_test $? 0 "Solicited NS suppression with \"neigh_forward_grat\" on" + + # Disable neigh_forward_grat and verify suppression resumes. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_forward_grat off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_forward_grat off\"" + log_test $? 0 "\"neigh_forward_grat\" is off" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a $smac -b $dmac -A $saddr -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum" "$full_addr") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Unsolicited NA suppression" +} + +neigh_vlan_forward_grat_arp() +{ + local vid1=10 + local vid2=20 + local sip1=192.0.2.1 + local sip2=192.0.2.17 + local h2_ip1=192.0.2.2 + local h2_mac1 + local h2_mac2 + + neigh_forward_grat_check || return $? + + echo + echo "Per-VLAN gratuitous ARP forwarding" + echo "----------------------------------" + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $sip1 arp_sip $sip1 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $sip2 arp_sip $sip2 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 103 proto 0x0806 flower indev swp1 arp_tip $h2_ip1 arp_sip $sip1 arp_op request action pass" + + h2_mac1=$(ip -n "$h2" -j -p link show eth0."$vid1" | jq -r '.[]["address"]') + h2_mac2=$(ip -n "$h2" -j -p link show eth0."$vid2" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $sip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $sip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + run_cmd "ip -n $sw1 neigh replace $h2_ip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + + # Enable per-{Port, VLAN} neighbor suppression. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + log_test $? 0 "\"neigh_vlan_suppress\" is on" + + # Enable neighbor suppression on VLAN 10. Gratuitous ARP should be + # suppressed by default on VLAN 10 (neigh_forward_grat defaults to off) + # but not on VLAN 20. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" + + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid1 $sip1" + tc_check_packets "$sw1" "dev vx0 egress" 101 0 + log_test $? 0 "Gratuitous ARP suppression (VLAN $vid1)" + + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid2 $sip2" + tc_check_packets "$sw1" "dev vx0 egress" 102 1 + log_test $? 0 "Gratuitous ARP forwarding (VLAN $vid2)" + + # Enable neigh_forward_grat on VLAN 10 and verify gratuitous ARP is + # now forwarded. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_forward_grat on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_forward_grat on\"" + log_test $? 0 "\"neigh_forward_grat\" is on (VLAN $vid1)" + + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid1 $sip1" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Gratuitous ARP forwarding (VLAN $vid1)" + + # Verify that regular (non-gratuitous) ARP requests on VLAN $vid1 are + # still suppressed when neigh_forward_grat is enabled. + run_cmd "ip netns exec $h1 arping -c 1 -w 5 -I eth0.$vid1 $h2_ip1" + tc_check_packets "$sw1" "dev vx0 egress" 103 0 + log_test $? 0 "Regular ARP suppression with \"neigh_forward_grat\" on (VLAN $vid1)" + + # Enable neighbor suppression on VLAN 20 (neigh_forward_grat defaults to + # off), and verify gratuitous ARP is suppressed on VLAN 20. + run_cmd "bridge -n $sw1 vlan set vid $vid2 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid2)" + + # VLAN 10 should still forward (neigh_forward_grat is on). + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid1 $sip1" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "Gratuitous ARP forwarding (VLAN $vid1)" + + # VLAN 20 should suppress (neigh_forward_grat defaults to off). + run_cmd "ip netns exec $h1 arping -U -c 1 -w 5 -I eth0.$vid2 $sip2" + tc_check_packets "$sw1" "dev vx0 egress" 102 1 + log_test $? 0 "Gratuitous ARP suppression (VLAN $vid2)" +} + +neigh_vlan_forward_grat_na() +{ + local vid1=10 + local vid2=20 + local saddr1=2001:db8:1::1 + local daddr=ff02::1 + local h2_addr1=2001:db8:1::2 + local h2_maddr1=ff02::1:ff00:2 + local full_addr1=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01 + local h2_full_addr1=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02 + local csum1="fd:32" + local csum_ns1="1f:2f" + local saddr2=2001:db8:2::1 + local full_addr2=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01 + local csum2="fd:30" + local dmac=33:33:00:00:00:01 + local h2_dmac1=33:33:ff:00:00:02 + local h2_mac1 + local h2_mac2 + local smac + + neigh_forward_grat_check || return $? + + echo + echo "Per-VLAN unsolicited NA forwarding" + echo "----------------------------------" + + smac=$(ip -n "$h1" -j -p link show eth0."$vid1" | jq -r '.[]["address"]') + + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $daddr src_ip $saddr1 type 136 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $daddr src_ip $saddr2 type 136 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 103 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $h2_maddr1 src_ip $saddr1 type 135 code 0 action pass" + + h2_mac1=$(ip -n "$h2" -j -p link show eth0."$vid1" | jq -r '.[]["address"]') + h2_mac2=$(ip -n "$h2" -j -p link show eth0."$vid2" | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $saddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $saddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + run_cmd "ip -n $sw1 neigh replace $h2_addr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + + # Enable per-{Port, VLAN} neighbor suppression. + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + log_test $? 0 "\"neigh_vlan_suppress\" is on" + + # Enable neighbor suppression on VLAN 10. Unsolicited NA should be + # suppressed by default on VLAN 10 (neigh_forward_grat defaults to off) + # but not on VLAN 20. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid1 -c 1 -a $smac -b $dmac -A $saddr1 -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum1" "$full_addr1") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 0 + log_test $? 0 "Unsolicited NA suppression (VLAN $vid1)" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid2 -c 1 -a $smac -b $dmac -A $saddr2 -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum2" "$full_addr2") -q" + tc_check_packets "$sw1" "dev vx0 egress" 102 1 + log_test $? 0 "Unsolicited NA forwarding (VLAN $vid2)" + + # Enable neigh_forward_grat on VLAN 10 and verify unsolicited NA is + # now forwarded. + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_forward_grat on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_forward_grat on\"" + log_test $? 0 "\"neigh_forward_grat\" is on (VLAN $vid1)" + + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid1 -c 1 -a $smac -b $dmac -A $saddr1 -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum1" "$full_addr1") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 1 + log_test $? 0 "Unsolicited NA forwarding (VLAN $vid1)" + + # Verify that solicited NS messages on VLAN $vid1 are still suppressed + # when neigh_forward_grat is enabled. + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid1 -c 1 -a $smac -b $h2_dmac1 -A $saddr1 -B $h2_maddr1 -t ip hop=255,next=58,payload=$(icmpv6_header_get "$csum_ns1" "$h2_full_addr1") -q" + tc_check_packets "$sw1" "dev vx0 egress" 103 0 + log_test $? 0 "Solicited NS suppression with \"neigh_forward_grat\" on (VLAN $vid1)" + + # Enable neighbor suppression on VLAN 20 (neigh_forward_grat defaults to + # off), and verify unsolicited NA is suppressed on VLAN 20. + run_cmd "bridge -n $sw1 vlan set vid $vid2 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress on\"" + log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid2)" + + # VLAN 10 should still forward (neigh_forward_grat is on). + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid1 -c 1 -a $smac -b $dmac -A $saddr1 -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum1" "$full_addr1") -q" + tc_check_packets "$sw1" "dev vx0 egress" 101 2 + log_test $? 0 "Unsolicited NA forwarding (VLAN $vid1)" + + # VLAN 20 should suppress (neigh_forward_grat defaults to off). + run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid2 -c 1 -a $smac -b $dmac -A $saddr2 -B $daddr -t ip hop=255,next=58,payload=$(icmpv6_na_header_get "$csum2" "$full_addr2") -q" + tc_check_packets "$sw1" "dev vx0 egress" 102 1 + log_test $? 0 "Unsolicited NA suppression (VLAN $vid2)" +} + ################################################################################ # Usage @@ -961,7 +1422,10 @@ cleanup for t in $TESTS do - setup; $t; cleanup; + setup + $t + ret=$(ksft_exit_status_merge "$ret" $?) + cleanup done if [ "$TESTS" != "none" ]; then diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 30a236b8e9f73..9b9a3cb2700d1 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -1549,7 +1549,7 @@ test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, res = recv(self->cfd, rb, left > sizeof(rb) ? sizeof(rb) : left, 0); - EXPECT_GE(res, 0); + ASSERT_GE(res, 0); left -= res; } } else { @@ -1566,7 +1566,7 @@ test_mutliproc(struct __test_metadata *_metadata, struct _test_data_tls *self, res = send(self->fd, buf, left > file_sz ? file_sz : left, 0); - EXPECT_GE(res, 0); + ASSERT_GE(res, 0); left -= res; } } diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c20aa16b1d633..0e5618be03359 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -4,6 +4,7 @@ CONFIG_DUMMY=y CONFIG_VETH=y +CONFIG_IFB=y # # Core Netfilter Configuration diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py index bb19b8b76d3bc..0bece7c74f07c 100644 --- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py +++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py @@ -120,6 +120,7 @@ class SubPlugin(TdcPlugin): dev0 = self.args.NAMES["DEV0"]; dev1 = self.args.NAMES["DEV1"]; dummy = self.args.NAMES["DUMMY"]; + ifb = self.args.NAMES['IFB'] if self.args.verbose: print('{}._nl_ns_create'.format(self.sub_class)) @@ -129,6 +130,7 @@ class SubPlugin(TdcPlugin): with IPRoute() as ip: ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'}) ip.link('add', ifname=dummy, kind='dummy') + ip.link('add', ifname=ifb, kind='ifb') ticks = 20 while True: if ticks == 0: @@ -136,8 +138,10 @@ class SubPlugin(TdcPlugin): try: dev1_idx = ip.link_lookup(ifname=dev1)[0] dummy_idx = ip.link_lookup(ifname=dummy)[0] + ifb_idx = ip.link_lookup(ifname=ifb)[0] ip.link('set', index=dev1_idx, state='up') ip.link('set', index=dummy_idx, state='up') + ip.link('set', index=ifb_idx, state='up') break except: time.sleep(0.1) @@ -169,8 +173,11 @@ class SubPlugin(TdcPlugin): cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns))) cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns))) cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns))) + cmds.append(self._replace_keywords('link add $IFB type ifb')) + cmds.append(self._replace_keywords('link set $IFB netns {}'.format(ns))) cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns))) cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns))) + cmds.append(self._replace_keywords('netns exec {} $IP link set $IFB up'.format(ns))) cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns))) if self.args.device: diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 82c38a13dfbf8..1f4783724e5e6 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -392,26 +392,32 @@ "htb" ], "plugins": { - "requires": "nsPlugin" + "requires": [ + "nsPlugin", + "scapyPlugin" + ] }, "setup": [ - "$IP link set dev $DUMMY up || true", - "$IP addr add 10.10.10.10/24 dev $DUMMY || true", - "$TC qdisc add dev $DUMMY handle 1: root htb default 10", - "$TC class add dev $DUMMY parent 1: classid 1:10 htb rate 1kbit", - "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", - "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", - "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", - "sleep 0.1" + "$TC qdisc add dev $IFB handle 1: root htb default 10", + "$TC class add dev $IFB parent 1: classid 1:10 htb rate 1kbit", + "$TC qdisc add dev $IFB parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $IFB parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress mirror dev $IFB" ], - "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $IFB", "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "verifyCmd": "$TC -s qdisc show dev $IFB | grep -A 5 'qdisc fq_codel'", "matchPattern": "dropped [1-9][0-9]*", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + "$TC qdisc del dev $IFB root", + "$TC qdisc del dev $DEV1 ingress" ] }, { @@ -423,26 +429,32 @@ "qfq" ], "plugins": { - "requires": "nsPlugin" + "requires": [ + "nsPlugin", + "scapyPlugin" + ] }, "setup": [ - "$IP link set dev $DUMMY up || true", - "$IP addr add 10.10.10.10/24 dev $DUMMY || true", - "$TC qdisc add dev $DUMMY handle 1: root qfq", - "$TC class add dev $DUMMY parent 1: classid 1:10 qfq weight 1 maxpkt 1000", - "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", - "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", - "ping -c 10 -s 1000 -f -I $DUMMY 10.10.10.1 > /dev/null || true", - "sleep 0.1" + "$TC qdisc add dev $IFB handle 1: root qfq", + "$TC class add dev $IFB parent 1: classid 1:10 qfq weight 1 maxpkt 1000", + "$TC qdisc add dev $IFB parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $IFB parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress mirror dev $IFB" ], - "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "scapy": { + "iface": "$DEV0", + "count": 10, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $IFB", "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "verifyCmd": "$TC -s qdisc show dev $IFB | grep -A 5 'qdisc fq_codel'", "matchPattern": "dropped [1-9][0-9]*", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + "$TC qdisc del dev $IFB root", + "$TC qdisc del dev $DEV1 ingress" ] }, { @@ -454,26 +466,32 @@ "hfsc" ], "plugins": { - "requires": "nsPlugin" + "requires": [ + "nsPlugin", + "scapyPlugin" + ] }, "setup": [ - "$IP link set dev $DUMMY up || true", - "$IP addr add 10.10.10.10/24 dev $DUMMY || true", - "$TC qdisc add dev $DUMMY handle 1: root hfsc default 10", - "$TC class add dev $DUMMY parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", - "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", - "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", - "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", - "sleep 0.1" + "$TC qdisc add dev $IFB handle 1: root hfsc default 10", + "$TC class add dev $IFB parent 1: classid 1:10 hfsc sc rate 1kbit ul rate 1kbit", + "$TC qdisc add dev $IFB parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $IFB parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress mirror dev $IFB" ], - "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $IFB", "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "verifyCmd": "$TC -s qdisc show dev $IFB | grep -A 5 'qdisc fq_codel'", "matchPattern": "dropped [1-9][0-9]*", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + "$TC qdisc del dev $IFB root", + "$TC qdisc del dev $DEV1 ingress" ] }, { @@ -485,26 +503,32 @@ "drr" ], "plugins": { - "requires": "nsPlugin" + "requires": [ + "nsPlugin", + "scapyPlugin" + ] }, "setup": [ - "$IP link set dev $DUMMY up || true", - "$IP addr add 10.10.10.10/24 dev $DUMMY || true", - "$TC qdisc add dev $DUMMY handle 1: root drr", - "$TC class add dev $DUMMY parent 1: classid 1:10 drr quantum 1500", - "$TC qdisc add dev $DUMMY parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", - "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", - "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", - "sleep 0.1" + "$TC qdisc add dev $IFB handle 1: root drr", + "$TC class add dev $IFB parent 1: classid 1:10 drr quantum 1500", + "$TC qdisc add dev $IFB parent 1:10 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $IFB parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:10", + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress mirror dev $IFB" ], - "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $IFB", "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "verifyCmd": "$TC -s qdisc show dev $IFB | grep -A 5 'qdisc fq_codel'", "matchPattern": "dropped [1-9][0-9]*", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + "$TC qdisc del dev $IFB root", + "$TC qdisc del dev $DEV1 ingress" ] }, { @@ -516,26 +540,32 @@ "ets" ], "plugins": { - "requires": "nsPlugin" + "requires": [ + "nsPlugin", + "scapyPlugin" + ] }, "setup": [ - "$IP link set dev $DUMMY up || true", - "$IP addr add 10.10.10.10/24 dev $DUMMY || true", - "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 1", - "$TC class change dev $DUMMY parent 1: classid 1:1 ets", - "$TC qdisc add dev $DUMMY parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", - "$TC filter add dev $DUMMY parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", - "ping -c 5 -f -I $DUMMY 10.10.10.1 > /dev/null || true", - "sleep 0.1" + "$TC qdisc add dev $IFB handle 1: root ets bands 2 strict 1", + "$TC class change dev $IFB parent 1: classid 1:1 ets", + "$TC qdisc add dev $IFB parent 1:1 handle 10: fq_codel memory_limit 1 flows 1 target 0.1ms interval 1ms", + "$TC filter add dev $IFB parent 1: protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1", + "$TC qdisc add dev $DEV1 ingress", + "$TC filter add dev $DEV1 ingress protocol ip prio 1 u32 match ip protocol 1 0xff action mirred egress mirror dev $IFB" ], - "cmdUnderTest": "$TC -s qdisc show dev $DUMMY", + "scapy": { + "iface": "$DEV0", + "count": 5, + "packet": "Ether()/IP(dst='10.10.10.1', src='10.10.10.10')/ICMP()" + }, + "cmdUnderTest": "$TC -s qdisc show dev $IFB", "expExitCode": "0", - "verifyCmd": "$TC -s qdisc show dev $DUMMY | grep -A 5 'qdisc fq_codel'", + "verifyCmd": "$TC -s qdisc show dev $IFB | grep -A 5 'qdisc fq_codel'", "matchPattern": "dropped [1-9][0-9]*", "matchCount": "1", "teardown": [ - "$TC qdisc del dev $DUMMY handle 1: root", - "$IP addr del 10.10.10.10/24 dev $DUMMY || true" + "$TC qdisc del dev $IFB root", + "$TC qdisc del dev $DEV1 ingress" ] }, { diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index 81b4ac3f050c3..511d66c36a2a0 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -378,6 +378,7 @@ def run_one_test(pm, args, index, tidx): dev0 = NAMES['DEV0'] dev1 = NAMES['DEV1'] dummy = NAMES['DUMMY'] + ifb = NAMES['IFB'] result = True tresult = "" tap = "" @@ -414,6 +415,7 @@ def run_one_test(pm, args, index, tidx): NAMES['DEV0'] = '{}id{}'.format(NAMES['DEV0'], tidx['id']) NAMES['DEV1'] = '{}id{}'.format(NAMES['DEV1'], tidx['id']) NAMES['DUMMY'] = '{}id{}'.format(NAMES['DUMMY'], tidx['id']) + NAMES['IFB'] = '{}id{}'.format(NAMES['IFB'], tidx['id']) pm.call_pre_case(tidx) prepare_env(tidx, args, pm, 'setup', "-----> prepare stage", tidx["setup"]) @@ -474,6 +476,7 @@ def run_one_test(pm, args, index, tidx): NAMES['DEV0'] = dev0 NAMES['DEV1'] = dev1 NAMES['DUMMY'] = dummy + NAMES['IFB'] = ifb return res diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py index 9488b03cbc2c8..cd0bd42f05a54 100644 --- a/tools/testing/selftests/tc-testing/tdc_config.py +++ b/tools/testing/selftests/tc-testing/tdc_config.py @@ -17,6 +17,7 @@ NAMES = { 'DEV1': 'v0p1', 'DEV2': '', 'DUMMY': 'dummy1', + 'IFB': 'ifbtdc0', 'ETHTOOL': '/usr/sbin/ethtool', 'ETH': 'eth0', 'BATCH_FILE': './batch.txt', |
