Skip to content
Merged
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
33d3070
Create sub CMIS FSM for DP decomission
AnoopKamath Apr 23, 2025
64ebc0d
Update xcvrd.py
AnoopKamath Apr 30, 2025
f8f3468
Add mock test
AnoopKamath Apr 30, 2025
0166248
Update xcvrd.py
AnoopKamath Apr 30, 2025
1553174
Update xcvrd.py
AnoopKamath Apr 30, 2025
8f4b514
Update test_xcvrd.py
AnoopKamath Apr 30, 2025
1066c3b
Fix: Merge conflict
AnoopKamath Apr 30, 2025
8ebefcb
Add more coverage
AnoopKamath Apr 30, 2025
254b6da
Fix return type
AnoopKamath Apr 30, 2025
0cd225f
Update xcvrd.py
AnoopKamath May 1, 2025
dcc6014
Add logic to decommission in the main state machine itself
AnoopKamath May 12, 2025
5cd4106
Update xcvrd.py
AnoopKamath May 12, 2025
0734cbe
Merge branch 'sonic-net:master' into master
AnoopKamath Jun 25, 2025
6bcfdd1
New proposal to decommission DP
AnoopKamath Jun 25, 2025
a76605d
Update xcvrd.py
AnoopKamath Jun 25, 2025
99607fb
Revert mock changes
AnoopKamath Jun 25, 2025
e495798
Fix indentations
AnoopKamath Jun 25, 2025
5159eb3
Address review comments
AnoopKamath Jul 3, 2025
a6188ca
Update xcvrd.py
AnoopKamath Jul 3, 2025
53a034a
Fix build failure
AnoopKamath Jul 9, 2025
bfa884a
Update test_xcvrd.py
AnoopKamath Jul 9, 2025
8f09f2e
fix indentation
AnoopKamath Jul 9, 2025
bd9cce2
Update xcvrd.py
AnoopKamath Jul 10, 2025
075741c
Add more coverage
AnoopKamath Jul 10, 2025
1502d3c
Add decomm_pending_dict
longhuan-cisco Jul 14, 2025
c747a00
Increase code coverage
longhuan-cisco Jul 15, 2025
ae99e61
Address comments
longhuan-cisco Jul 15, 2025
13519f2
Add back empty line
longhuan-cisco Jul 16, 2025
c204261
Update function name to is_decomm_failed
longhuan-cisco Jul 16, 2025
6b42695
Log more detail for ConfigSuccess timeout case, increase code cov and…
longhuan-cisco Jul 18, 2025
02cf806
Move DPInitPending check before ConfigSuccess check to be inline with…
longhuan-cisco Jul 18, 2025
1b880bc
Revert "Move DPInitPending check before ConfigSuccess check to be inl…
longhuan-cisco Jul 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
176 changes: 164 additions & 12 deletions sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,21 @@

media_settings_empty = {}

def gen_cmis_lanes_dict(key_format_str, value, one_based=True):
start_idx = 1 if one_based else 0
lanes_dict = {}
for lane_idx in range(start_idx, start_idx + CmisManagerTask.CMIS_MAX_HOST_LANES):
lanes_dict[key_format_str.format(lane_idx)] = value
return lanes_dict

def gen_cmis_dp_state_dict(value):
return gen_cmis_lanes_dict('DP{}State', value)

def gen_cmis_config_status_dict(value):
return gen_cmis_lanes_dict('ConfigStatusLane{}', value)

def gen_cmis_dpinit_pending_dict(value):
return gen_cmis_lanes_dict('DPInitPending{}', value)

class TestXcvrdThreadException(object):

Expand Down Expand Up @@ -2303,15 +2318,15 @@ def test_CmisManagerTask_task_run_stop(self, mock_chassis):
(0, {0 : 1, 1 : 1, 2 : 1, 3 : 1}, True),
(1, {0 : 0, 1 : 0, 2 : 0, 3 : 0, 4 : 0, 5 : 0, 6 : 0, 7 : 0}, False)
])
def test_CmisManagerTask_is_appl_reconfigure_required(self, app_new, lane_appl_code, expected):
def test_CmisManagerTask_is_decommission_required(self, app_new, lane_appl_code, expected):
mock_xcvr_api = MagicMock()
def get_application(lane):
return lane_appl_code.get(lane, 0)
mock_xcvr_api.get_application = MagicMock(side_effect=get_application)
port_mapping = PortMapping()
stop_event = threading.Event()
task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
assert task.is_appl_reconfigure_required(mock_xcvr_api, app_new) == expected
assert task.is_decommission_required(mock_xcvr_api, app_new) == expected

DEFAULT_DP_STATE = {
'DP1State': 'DataPathActivated',
Expand Down Expand Up @@ -2621,7 +2636,10 @@ def test_CmisManagerTask_test_is_timer_expired(self, expired_time, current_time,
@patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock()))
@patch('xcvrd.xcvrd._wrapper_get_sfp_type', MagicMock(return_value='QSFP_DD'))
@patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock())
@patch('xcvrd.xcvrd.CmisManagerTask.is_decommission_required', MagicMock(return_value=False))
@patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True))
@patch('xcvrd.xcvrd_utilities.optics_si_parser.optics_si_present', MagicMock(return_value=(True)))
@patch('xcvrd.xcvrd_utilities.optics_si_parser.fetch_optics_si_setting', MagicMock())
def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_sw_tbl):
mock_get_status_sw_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_SW_TABLE)
mock_xcvr_api = MagicMock()
Expand Down Expand Up @@ -2830,8 +2848,6 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_sw_tbl)
task.configure_laser_frequency = MagicMock(return_value=1)

# Case 1: CMIS_STATE_DP_PRE_INIT_CHECK --> DP_DEINIT
task.is_appl_reconfigure_required = MagicMock(return_value=True)
mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=True)
task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT
Expand Down Expand Up @@ -2895,19 +2911,13 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_sw_tbl)
task.configure_tx_output_power = MagicMock(return_value=1)
task.configure_laser_frequency = MagicMock(return_value=1)

# Shouldn't proceed to DP_DEINIT on error
task.is_appl_reconfigure_required = MagicMock(return_value=True)
mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=False)
task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True])
task.task_worker()
assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_DP_DEINIT

@patch('xcvrd.xcvrd.XcvrTableHelper.get_status_sw_tbl')
@patch('xcvrd.xcvrd.platform_chassis')
@patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(True)))
@patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock()))
@patch('xcvrd.xcvrd._wrapper_get_sfp_type', MagicMock(return_value='QSFP_DD'))
@patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock())
@patch('xcvrd.xcvrd.CmisManagerTask.is_decommission_required', MagicMock(return_value=False))
@patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True))
def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_status_sw_tbl):
mock_get_status_sw_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_SW_TABLE)
Expand Down Expand Up @@ -3046,6 +3056,7 @@ def test_CmisManagerTask_task_worker_fastboot(self, mock_chassis, mock_get_statu
@patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock()))
@patch('xcvrd.xcvrd._wrapper_get_sfp_type', MagicMock(return_value='QSFP_DD'))
@patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock())
@patch('xcvrd.xcvrd.CmisManagerTask.is_decommission_required', MagicMock(return_value=False))
@patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True))
def test_CmisManagerTask_task_worker_host_tx_ready_false_to_true(self, mock_chassis, mock_get_status_sw_tbl):
mock_get_status_sw_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
Expand Down Expand Up @@ -3237,14 +3248,155 @@ def test_CmisManagerTask_task_worker_host_tx_ready_false_to_true(self, mock_chas
mock_sfp = MagicMock()
mock_sfp.get_xcvr_api = MagicMock(return_value=mock_xcvr_api)
mock_xcvr_api.is_coherent_module = MagicMock(return_value=False)
task.is_appl_reconfigure_required = MagicMock(return_value=False)
task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True])
task.task_worker()

assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT
assert task.port_dict['Ethernet0']['forced_tx_disabled'] == False
assert task.port_dict['Ethernet0']['cmis_retries'] == 1

@patch('xcvrd.xcvrd.XcvrTableHelper.get_status_sw_tbl')
@patch('xcvrd.xcvrd.platform_chassis')
@patch('xcvrd.xcvrd.is_fast_reboot_enabled', MagicMock(return_value=(False)))
@patch('xcvrd.xcvrd.PortChangeObserver', MagicMock(handle_port_update_event=MagicMock()))
@patch('xcvrd.xcvrd._wrapper_get_sfp_type', MagicMock(return_value='QSFP_DD'))
@patch('xcvrd.xcvrd.CmisManagerTask.wait_for_port_config_done', MagicMock())
@patch('xcvrd.xcvrd.is_cmis_api', MagicMock(return_value=True))
@patch('xcvrd.xcvrd.get_cmis_application_desired', MagicMock(return_value=1))
def test_CmisManagerTask_task_worker_decommission(self, mock_chassis, mock_get_status_sw_tbl):
mock_get_status_sw_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
mock_xcvr_api = MagicMock()
mock_xcvr_api.set_datapath_deinit = MagicMock(return_value=True)
mock_xcvr_api.set_datapath_init = MagicMock(return_value=True)
mock_xcvr_api.tx_disable_channel = MagicMock(return_value=True)
mock_xcvr_api.set_lpmode = MagicMock(return_value=True)
mock_xcvr_api.set_application = MagicMock(return_value=True)
mock_xcvr_api.is_flat_memory = MagicMock(return_value=False)
mock_xcvr_api.is_coherent_module = MagicMock(return_value=True)
mock_xcvr_api.get_tx_config_power = MagicMock(return_value=0)
mock_xcvr_api.get_laser_config_freq = MagicMock(return_value=0)
mock_xcvr_api.get_module_type_abbreviation = MagicMock(return_value='QSFP-DD')
mock_xcvr_api.get_datapath_init_duration = MagicMock(return_value=60000.0)
mock_xcvr_api.get_module_pwr_up_duration = MagicMock(return_value=70000.0)
mock_xcvr_api.get_datapath_deinit_duration = MagicMock(return_value=600000.0)
mock_xcvr_api.get_cmis_rev = MagicMock(return_value='5.0')
mock_xcvr_api.get_supported_freq_config = MagicMock(return_value=(0xA0,0,0,191300,196100))
mock_xcvr_api.get_dpinit_pending = MagicMock(return_value=gen_cmis_dpinit_pending_dict(True))
mock_xcvr_api.get_module_state = MagicMock(return_value='ModuleReady')
mock_xcvr_api.get_config_datapath_hostlane_status = MagicMock(return_value=gen_cmis_config_status_dict('ConfigSuccess'))
mock_xcvr_api.get_datapath_state = MagicMock(return_value=gen_cmis_dp_state_dict('DataPathDeactivated'))

stop_event = threading.Event()
mock_sfp = MagicMock()
mock_sfp.get_presence = MagicMock(return_value=True)
mock_sfp.get_xcvr_api = MagicMock(return_value=mock_xcvr_api)
mock_chassis.get_all_sfps = MagicMock(return_value=[mock_sfp])
mock_chassis.get_sfp = MagicMock(return_value=mock_sfp)

port_mapping = PortMapping()

task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
task.is_decommission_required = MagicMock(side_effect=[True]*2 + [False]*10)
task.xcvr_table_helper.get_status_sw_tbl.return_value = mock_get_status_sw_tbl
task.get_host_tx_status = MagicMock(return_value='true')
task.get_port_admin_status = MagicMock(return_value='up')
task.get_configured_tx_power_from_db = MagicMock(return_value=-13)
task.get_configured_laser_freq_from_db = MagicMock(return_value=193100)
task.configure_tx_output_power = MagicMock(return_value=1)
task.configure_laser_frequency = MagicMock(return_value=1)
task.get_cmis_host_lanes_mask = MagicMock(return_value=1)
task.get_cmis_media_lanes_mask = MagicMock(return_value=1)

physical_port_idx = 0

# Insert 1st subport event
port_change_event = PortChangeEvent('Ethernet0', physical_port_idx, 0, PortChangeEvent.PORT_SET, {'speed':'100000', 'lanes':'1,2', 'subport': '1'})
task.on_port_update_event(port_change_event)
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED

# 1st subport (as the lead) starting decommission state machine
task.task_stopping_event.is_set = MagicMock(side_effect=[False]*2 + [True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT
assert task.is_decomm_lead_lport('Ethernet0')

# Insert 2nd subport event
port_change_event = PortChangeEvent('Ethernet2', physical_port_idx, 0, PortChangeEvent.PORT_SET, {'speed':'100000', 'lanes':'3,4', 'subport': '2'})
task.on_port_update_event(port_change_event)
task.task_stopping_event.is_set = MagicMock(side_effect=[False]*3 + [True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_AP_CONF
# 2nd subport should not start decommission state machine as 1st subport already started decommission for the entire physical port
assert get_cmis_state_from_state_db('Ethernet2', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet2'))) == CMIS_STATE_INSERTED
assert task.is_decomm_pending('Ethernet0')
assert not task.is_decomm_failed('Ethernet0')

task.task_stopping_event.is_set = MagicMock(side_effect=[False]*3 + [True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_INIT
# 2nd subport is waiting for decommission to complete
assert get_cmis_state_from_state_db('Ethernet2', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet2'))) == CMIS_STATE_INSERTED
assert task.is_decomm_lead_lport('Ethernet0')

task.task_stopping_event.is_set = MagicMock(side_effect=[False]*3 + [True])
task.task_worker()
# 1st subport completed decommission state machine and proceed to normal state machine, entire physical port is done on decommission
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_INSERTED
# 2nd subport is unblocked from decommission and continue on normal state machine
assert get_cmis_state_from_state_db('Ethernet2', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet2'))) == CMIS_STATE_DP_PRE_INIT_CHECK
assert not task.is_decomm_lead_lport('Ethernet0')
assert not task.is_decomm_failed('Ethernet0')
assert not task.is_decomm_pending('Ethernet0')

# Delete the config for all subports
port_change_event = PortChangeEvent('Ethernet0', physical_port_idx, 0, PortChangeEvent.PORT_DEL, {}, db_name='CONFIG_DB', table_name='PORT')
task.on_port_update_event(port_change_event)
port_change_event = PortChangeEvent('Ethernet2', physical_port_idx, 0, PortChangeEvent.PORT_DEL, {}, db_name='CONFIG_DB', table_name='PORT')
task.on_port_update_event(port_change_event)
assert not task.port_dict

# Reset is_decommission_required() to start decommission from scratch
task.is_decommission_required = MagicMock(side_effect=[True]*2 + [False]*10)

# Test failed decommission case:

# Insert 1st subport event
port_change_event = PortChangeEvent('Ethernet0', physical_port_idx, 0, PortChangeEvent.PORT_SET, {'speed':'100000', 'lanes':'1,2', 'subport': '1'})
task.on_port_update_event(port_change_event)
task.task_stopping_event.is_set = MagicMock(side_effect=[False]*2 + [True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT
# 1st subport (as the lead) starting decommission state machine
assert task.is_decomm_lead_lport('Ethernet0')
# Set CMIS_STATE_FAILED to 1st subport to force decommission fail on the entire physical port
task.update_port_transceiver_status_table_sw_cmis_state('Ethernet0', CMIS_STATE_FAILED)
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED
assert task.is_decomm_failed('Ethernet0')

# Insert 2nd subport event
port_change_event = PortChangeEvent('Ethernet2', physical_port_idx, 0, PortChangeEvent.PORT_SET, {'speed':'100000', 'lanes':'3,4', 'subport': '2'})
task.on_port_update_event(port_change_event)
task.task_stopping_event.is_set = MagicMock(side_effect=[False]*3 + [True])
task.task_worker()
# 1st subport should stay in failed state
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_FAILED
assert task.is_decomm_failed('Ethernet0')
assert task.is_decomm_lead_lport('Ethernet0')
# 2nd subport is waiting for decommission to complete, and should also fall into failed state
assert get_cmis_state_from_state_db('Ethernet2', task.xcvr_table_helper.get_status_sw_tbl(task.get_asic_id('Ethernet2'))) == CMIS_STATE_FAILED
assert task.is_decomm_pending('Ethernet2')
assert task.is_decomm_failed('Ethernet2')

# Delete the config for 1st subport
port_change_event = PortChangeEvent('Ethernet0', physical_port_idx, 0, PortChangeEvent.PORT_DEL, {}, db_name='CONFIG_DB', table_name='PORT')
task.on_port_update_event(port_change_event)
# 1st subport is removed from port_dict
assert 'Ethernet0' not in task.port_dict
assert len(task.port_dict) == 1
# physical port should also be removed from decomm_pending_dict
assert physical_port_idx not in task.decomm_pending_dict
assert not task.is_decomm_pending('Ethernet2')

@pytest.mark.parametrize("lport, expected_dom_polling", [
('Ethernet0', 'disabled'),
('Ethernet4', 'disabled'),
Expand Down
Loading
Loading