Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions doc/user/zebra.rst
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,15 @@ the default route.
before removing it from the system if the nexthop group is no longer
being used. The default time is 180 seconds.

.. clicmd:: zebra nexthop-group resilience buckets (1-256) idle-timer (1-4294967295) unbalanced-timer (1-4294967295)

Make every multipath nexthop group that zebra itself creates a resilient
nexthop group, using the specified number of buckets, idle timer and
unbalanced timer. This applies the same resilient hashing parameters that
can be configured per group under ``nexthop-group NAME`` to all zebra-owned
groups. A nexthop group that an upper level protocol has already requested
to be resilient keeps its own parameters and is not overridden.

.. clicmd:: ip nht resolve-via-default

Allow IPv4 nexthop tracking to resolve via the default route. This parameter
Expand Down
16 changes: 16 additions & 0 deletions tests/topotests/zebra_nhg_resilience/r1/frr.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
!
hostname r1
!
zebra nexthop-group resilience buckets 8 idle-timer 100 unbalanced-timer 200
!
interface r1-eth0
ip address 192.168.1.1/24
!
interface r1-eth1
ip address 192.168.2.1/24
!
ip route 10.0.0.0/24 192.168.1.2 r1-eth0
ip route 10.0.0.0/24 192.168.2.2 r1-eth1
!
ip route 10.1.1.0/24 192.168.1.2 r1-eth0
!
186 changes: 186 additions & 0 deletions tests/topotests/zebra_nhg_resilience/test_zebra_nhg_resilience.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env python
# SPDX-License-Identifier: ISC

#
# Copyright (c) 2026 by Nvidia Corporation
# Donald Sharp
#

"""
Test that 'zebra nexthop-group resilience ...' causes every zebra-created
Comment on lines +1 to +10

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Missing __init__.py in test directory

Every other zebra_* topotest directory ships an __init__.py (e.g. zebra_netlink/, zebra_opaque/, zebra_reserved_ranges/, etc.). Without it, pytest running in default prepend import mode may fail to collect this test or, if two identically-named test files ever appear in different directories, produce an ImportError for a naming collision. The file is simply empty (\n), but its absence is inconsistent with the rest of the test suite and will cause problems in some CI environments.

Prompt To Fix With AI
This is a comment left during a code review.
Path: tests/topotests/zebra_nhg_resilience/test_zebra_nhg_resilience.py
Line: 1-10

Comment:
**Missing `__init__.py` in test directory**

Every other `zebra_*` topotest directory ships an `__init__.py` (e.g. `zebra_netlink/`, `zebra_opaque/`, `zebra_reserved_ranges/`, etc.). Without it, pytest running in default `prepend` import mode may fail to collect this test or, if two identically-named test files ever appear in different directories, produce an `ImportError` for a naming collision. The file is simply empty (`\n`), but its absence is inconsistent with the rest of the test suite and will cause problems in some CI environments.

How can I resolve this? If you propose a fix, please make it concise.

multipath nexthop group to be installed as a resilient nexthop group using
the configured parameters, while singleton groups are left alone and the
configuration can be removed again.
"""

import os
import sys
import json
import functools
import pytest

CWD = os.path.dirname(os.path.realpath(__file__))
sys.path.append(os.path.join(CWD, "../"))

# pylint: disable=C0413
from lib import topotest
from lib.topogen import Topogen, TopoRouter, get_topogen
from lib.common_config import step

pytestmark = [pytest.mark.mgmtd, pytest.mark.staticd]


def build_topo(tgen):
tgen.add_router("r1")

switch = tgen.add_switch("s1")
switch.add_link(tgen.gears["r1"])

switch = tgen.add_switch("s2")
switch.add_link(tgen.gears["r1"])


def setup_module(mod):
tgen = Topogen(build_topo, mod.__name__)
tgen.start_topology()

for rname, router in tgen.routers().items():
router.load_frr_config(
os.path.join(CWD, "{}/frr.conf".format(rname)),
[
(TopoRouter.RD_MGMTD, None),
(TopoRouter.RD_ZEBRA, None),
(TopoRouter.RD_STATIC, None),
],
)

tgen.start_router()


def teardown_module(mod):
tgen = get_topogen()
tgen.stop_topology()


def _route_nhg_id(router, prefix):
"""Return the nexthop group id zebra assigned to a route, or None."""
output = json.loads(router.vtysh_cmd("show ip route {} json".format(prefix)))

entries = output.get(prefix)
if not entries:
return None

return entries[0].get("nexthopGroupId")


def _check_nhg_resilience(router, prefix, buckets, idle, unbalanced, count):
"""Verify the route's nexthop group is resilient with the given params."""
nhgid = _route_nhg_id(router, prefix)
if nhgid is None:
return "{}: no nexthop group id installed yet".format(prefix)

output = json.loads(
router.vtysh_cmd("show nexthop-group rib {} json".format(nhgid))
)

expected = {
str(nhgid): {
"nexthopCount": count,
"buckets": buckets,
"idleTimer": idle,
"unbalancedTimer": unbalanced,
}
}

return topotest.json_cmp(output, expected)


def _check_nhg_not_resilient(router, prefix):
"""Verify the route's nexthop group has no resilience configured."""
nhgid = _route_nhg_id(router, prefix)
if nhgid is None:
return "{}: no nexthop group id installed yet".format(prefix)

output = json.loads(
router.vtysh_cmd("show nexthop-group rib {} json".format(nhgid))
)

group = output.get(str(nhgid))
if group is None:
return "{}: nexthop group {} not found".format(prefix, nhgid)

if "buckets" in group:
return "{}: nexthop group {} unexpectedly resilient: {}".format(
prefix, nhgid, group
)

return None


def test_multipath_route_is_resilient():
tgen = get_topogen()

if tgen.routers_have_failure():
pytest.skip(tgen.errors)

router = tgen.gears["r1"]

step("Multipath route inherits the configured resilience parameters")

test_func = functools.partial(
_check_nhg_resilience, router, "10.0.0.0/24", 8, 100, 200, 2
)
_, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5)
assert result is None, "Multipath nexthop group was not made resilient"


def test_singleton_route_is_not_resilient():
tgen = get_topogen()

if tgen.routers_have_failure():
pytest.skip(tgen.errors)

router = tgen.gears["r1"]

step("Singleton route is not made resilient")

test_func = functools.partial(_check_nhg_not_resilient, router, "10.1.1.0/24")
_, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5)
assert result is None, "Singleton nexthop group should not be resilient"


def test_resilience_removal():
tgen = get_topogen()

if tgen.routers_have_failure():
pytest.skip(tgen.errors)

router = tgen.gears["r1"]

step("Removing the configuration stops new groups from being resilient")

router.vtysh_cmd("configure terminal\nno zebra nexthop-group resilience")
router.vtysh_cmd(
"configure terminal\n"
"ip route 10.2.2.0/24 192.168.1.2 r1-eth0\n"
"ip route 10.2.2.0/24 192.168.2.2 r1-eth1"
)

test_func = functools.partial(_check_nhg_not_resilient, router, "10.2.2.0/24")
_, result = topotest.run_and_expect(test_func, None, count=60, wait=0.5)
assert (
result is None
), "Multipath group created after removal should not be resilient"

step("Existing resilient group is untouched by the removal")

test_func = functools.partial(
_check_nhg_resilience, router, "10.0.0.0/24", 8, 100, 200, 2
)
_, result = topotest.run_and_expect(test_func, None, count=30, wait=0.5)
assert result is None, "Existing resilient nexthop group should be unchanged"


if __name__ == "__main__":
args = ["-s"] + sys.argv[1:]
sys.exit(pytest.main(args))
41 changes: 41 additions & 0 deletions yang/frr-zebra.yang
Original file line number Diff line number Diff line change
Expand Up @@ -3173,6 +3173,47 @@ module frr-zebra {
description
"Limit on the number of updates queued to the dataplane subsystem.";
}
container nexthop-group {
description
"Global nexthop group configuration.";
container resilience {
presence
"Create every zebra-owned multipath nexthop group as a
resilient nexthop group using the parameters below. A
nexthop group that an upper level protocol has already
configured as resilient is left unchanged.";
description
"Resilient hashing parameters applied to every zebra-owned
multipath nexthop group.";
leaf buckets {
type uint16 {
range "1..256";
}
mandatory true;
description
"Number of buckets in the resilient hash for each group.";
}
leaf idle-timer {
type uint32 {
range "1..4294967295";
}
units "seconds";
mandatory true;
description
"The idle timer for each resilient nexthop group, in seconds.";
}
leaf unbalanced-timer {
type uint32 {
range "1..4294967295";
}
units "seconds";
mandatory true;
description
"The length of time that the nexthop group can be
unbalanced, in seconds.";
}
}
}
leaf ptm-enable {
if-feature ptm-bfd;
type boolean;
Expand Down
49 changes: 49 additions & 0 deletions zebra/zebra_cli.c
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,17 @@ static void zebra_workqueue_hold_timer_cli_write(struct vty *vty, const struct l
vty_out(vty, "zebra work-queue %u\n", timer);
}

static void zebra_nexthop_group_resilience_cli_write(struct vty *vty,
const struct lyd_node *dnode,
bool show_defaults)
{
vty_out(vty,
"zebra nexthop-group resilience buckets %u idle-timer %u unbalanced-timer %u\n",
yang_dnode_get_uint16(dnode, "buckets"),
yang_dnode_get_uint32(dnode, "idle-timer"),
yang_dnode_get_uint32(dnode, "unbalanced-timer"));
}

static void zebra_import_kernel_table_xpath(char *xpath, size_t xpath_len, afi_t afi, safi_t safi,
const char *table_id)
{
Expand Down Expand Up @@ -260,6 +271,39 @@ DEFPY_YANG_HIDDEN (zebra_workqueue_timer,
return nb_cli_apply_changes(vty, NULL);
}

DEFPY_YANG (zebra_nexthop_group_resilience,
zebra_nexthop_group_resilience_cmd,
"[no] zebra nexthop-group resilience ![buckets (1-256)$buckets idle-timer (1-4294967295)$idle_timer unbalanced-timer (1-4294967295)$unbalanced_timer]",
NO_STR
ZEBRA_STR
"Nexthop-group configuration\n"
"Create every zebra nexthop group as resilient\n"
"Buckets in the hash for each group\n"
"Number of buckets\n"
"The idle timer for each resilient nexthop group in seconds\n"
"Number of seconds of idle time\n"
"The length of time that the nexthop group can be unbalanced\n"
"Number of seconds of unbalanced time\n")
{
if (no) {
nb_cli_enqueue_change(vty,
"/frr-zebra:zebra/nexthop-group/resilience",
NB_OP_DESTROY, NULL);
} else {
nb_cli_enqueue_change(vty,
"/frr-zebra:zebra/nexthop-group/resilience/buckets",
NB_OP_MODIFY, buckets_str);
nb_cli_enqueue_change(vty,
"/frr-zebra:zebra/nexthop-group/resilience/idle-timer",
NB_OP_MODIFY, idle_timer_str);
nb_cli_enqueue_change(vty,
"/frr-zebra:zebra/nexthop-group/resilience/unbalanced-timer",
NB_OP_MODIFY, unbalanced_timer_str);
}

return nb_cli_apply_changes(vty, NULL);
}

DEFPY_YANG (ip_zebra_import_table_distance,
ip_zebra_import_table_distance_cmd,
"ip import-table (1-252)$table_id [mrib]$mrib [distance (1-255)$distance] [route-map RMAP_NAME$rmap]",
Expand Down Expand Up @@ -3039,6 +3083,10 @@ const struct frr_yang_module_info frr_zebra_cli_info = {
.xpath = "/frr-zebra:zebra/dplane-queue-limit",
.cbs.cli_show = zebra_dplane_queue_limit_cli_write,
},
{
.xpath = "/frr-zebra:zebra/nexthop-group/resilience",
.cbs.cli_show = zebra_nexthop_group_resilience_cli_write,
},
{
.xpath = "/frr-zebra:zebra/zapi-packets",
.cbs.cli_show = zebra_zapi_packets_cli_write,
Expand Down Expand Up @@ -3387,6 +3435,7 @@ void zebra_cli_init(void)
install_element(CONFIG_NODE, &zebra_route_map_timer_cmd);
install_element(CONFIG_NODE, &allow_external_route_update_cmd);
install_element(CONFIG_NODE, &zebra_dplane_queue_limit_cmd);
install_element(CONFIG_NODE, &zebra_nexthop_group_resilience_cmd);
install_element(CONFIG_NODE, &zebra_zapi_packets_cmd);
install_element(CONFIG_NODE, &zebra_workqueue_timer_cmd);
install_element(CONFIG_NODE, &ip_zebra_import_table_distance_cmd);
Expand Down
26 changes: 26 additions & 0 deletions zebra/zebra_nb.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,32 @@ const struct frr_yang_module_info frr_zebra_info = {
.modify = zebra_dplane_queue_limit_modify,
}
},
{
.xpath = "/frr-zebra:zebra/nexthop-group/resilience",
.cbs = {
.create = zebra_nexthop_group_resilience_create,
.destroy = zebra_nexthop_group_resilience_destroy,
.apply_finish = zebra_nexthop_group_resilience_apply_finish,
}
},
{
.xpath = "/frr-zebra:zebra/nexthop-group/resilience/buckets",
.cbs = {
.modify = zebra_nexthop_group_resilience_buckets_modify,
}
},
{
.xpath = "/frr-zebra:zebra/nexthop-group/resilience/idle-timer",
.cbs = {
.modify = zebra_nexthop_group_resilience_idle_timer_modify,
}
},
{
.xpath = "/frr-zebra:zebra/nexthop-group/resilience/unbalanced-timer",
.cbs = {
.modify = zebra_nexthop_group_resilience_unbalanced_timer_modify,
}
},
#if HAVE_BFDD == 0
{
.xpath = "/frr-zebra:zebra/ptm-enable",
Expand Down
10 changes: 10 additions & 0 deletions zebra/zebra_nb.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,16 @@ int zebra_import_kernel_table_route_map_destroy(
int zebra_allow_external_route_update_create(struct nb_cb_create_args *args);
int zebra_allow_external_route_update_destroy(struct nb_cb_destroy_args *args);
int zebra_dplane_queue_limit_modify(struct nb_cb_modify_args *args);
int zebra_nexthop_group_resilience_create(struct nb_cb_create_args *args);
int zebra_nexthop_group_resilience_destroy(struct nb_cb_destroy_args *args);
void zebra_nexthop_group_resilience_apply_finish(
struct nb_cb_apply_finish_args *args);
int zebra_nexthop_group_resilience_buckets_modify(
struct nb_cb_modify_args *args);
int zebra_nexthop_group_resilience_idle_timer_modify(
struct nb_cb_modify_args *args);
int zebra_nexthop_group_resilience_unbalanced_timer_modify(
struct nb_cb_modify_args *args);
#if HAVE_BFDD == 0
int zebra_ptm_enable_modify(struct nb_cb_modify_args *args);
#endif
Expand Down
Loading
Loading