Skip to content
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
33d3070
Create sub CMIS FSM for DP decomission
AnoopKamath Apr 23, 2025
64ebc0d
Update xcvrd.py
AnoopKamath Apr 30, 2025
f8f3468
Add mock test
AnoopKamath Apr 30, 2025
0166248
Update xcvrd.py
AnoopKamath Apr 30, 2025
1553174
Update xcvrd.py
AnoopKamath Apr 30, 2025
8f4b514
Update test_xcvrd.py
AnoopKamath Apr 30, 2025
1066c3b
Fix: Merge conflict
AnoopKamath Apr 30, 2025
8ebefcb
Add more coverage
AnoopKamath Apr 30, 2025
254b6da
Fix return type
AnoopKamath Apr 30, 2025
0cd225f
Update xcvrd.py
AnoopKamath May 1, 2025
dcc6014
Add logic to decommission in the main state machine itself
AnoopKamath May 12, 2025
5cd4106
Update xcvrd.py
AnoopKamath May 12, 2025
0734cbe
Merge branch 'sonic-net:master' into master
AnoopKamath Jun 25, 2025
6bcfdd1
New proposal to decommission DP
AnoopKamath Jun 25, 2025
a76605d
Update xcvrd.py
AnoopKamath Jun 25, 2025
99607fb
Revert mock changes
AnoopKamath Jun 25, 2025
e495798
Fix indentations
AnoopKamath Jun 25, 2025
5159eb3
Address review comments
AnoopKamath Jul 3, 2025
a6188ca
Update xcvrd.py
AnoopKamath Jul 3, 2025
53a034a
Fix build failure
AnoopKamath Jul 9, 2025
bfa884a
Update test_xcvrd.py
AnoopKamath Jul 9, 2025
8f09f2e
fix indentation
AnoopKamath Jul 9, 2025
bd9cce2
Update xcvrd.py
AnoopKamath Jul 10, 2025
075741c
Add more coverage
AnoopKamath Jul 10, 2025
1502d3c
Add decomm_pending_dict
longhuan-cisco Jul 14, 2025
c747a00
Increase code coverage
longhuan-cisco Jul 15, 2025
ae99e61
Address comments
longhuan-cisco Jul 15, 2025
13519f2
Add back empty line
longhuan-cisco Jul 16, 2025
c204261
Update function name to is_decomm_failed
longhuan-cisco Jul 16, 2025
6b42695
Log more detail for ConfigSuccess timeout case, increase code cov and…
longhuan-cisco Jul 18, 2025
02cf806
Move DPInitPending check before ConfigSuccess check to be inline with…
longhuan-cisco Jul 18, 2025
1b880bc
Revert "Move DPInitPending check before ConfigSuccess check to be inl…
longhuan-cisco Jul 19, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 78 additions & 6 deletions sonic-xcvrd/tests/test_xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2035,20 +2035,95 @@ def test_CmisManagerTask_task_run_stop(self, mock_chassis):
cmis_manager.join()
assert not cmis_manager.is_alive()

@patch('xcvrd.xcvrd.get_decommission_state_from_state_db')
@pytest.mark.parametrize("app_new, lane_appl_code, expected", [
(2, {0 : 1, 1 : 1, 2 : 1, 3 : 1, 4 : 2, 5 : 2, 6 : 2, 7 : 2}, True),
(0, {0 : 1, 1 : 1, 2 : 1, 3 : 1}, True),
(1, {0 : 0, 1 : 0, 2 : 0, 3 : 0, 4 : 0, 5 : 0, 6 : 0, 7 : 0}, False)
])
def test_CmisManagerTask_is_appl_reconfigure_required(self, app_new, lane_appl_code, expected):
def test_CmisManagerTask_is_appl_reconfigure_required(self, mock_get_decomm, app_new, lane_appl_code, expected):
mock_get_status_tbl = Table("STATE_DB", TRANSCEIVER_STATUS_TABLE)
mock_xcvr_api = MagicMock()
port_mapping = PortMapping()
stop_event = threading.Event()
task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE)
mock_get_status_tbl = MagicMock()
mock_get_status_tbl.set = MagicMock()
task.xcvr_table_helper.get_status_tbl = mock_get_status_tbl
def get_application(lane):
return lane_appl_code.get(lane, 0)
mock_xcvr_api.get_application = MagicMock(side_effect=get_application)
mock_get_decomm.return_value = False
assert task.is_appl_reconfigure_required(mock_xcvr_api, app_new, "Ethernet0") == expected

def test_CmisManagerTask_decomission_all_datapaths(self):
SUCCESS = 0
RETRY = 1
CONTINUE = 2
mock_xcvr_api = MagicMock()
port_mapping = PortMapping()
stop_event = threading.Event()
task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
assert task.is_appl_reconfigure_required(mock_xcvr_api, app_new) == expected
task.port_dict["Ethernet0"] = {}
mock_xcvr_api.get_datapath_deinit_duration = MagicMock(return_value=600000.0)
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == CONTINUE

task.port_dict['Ethernet0']['cmis_decom_state'] = CMIS_DECOM_APCONFIG
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == CONTINUE

task.is_timer_expired = MagicMock(return_value=(True))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == RETRY

task.check_datapath_state = MagicMock(return_value=(True))
mock_xcvr_api.scs_apply_datapath_init = MagicMock(return_value=(True))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == CONTINUE

mock_xcvr_api.scs_apply_datapath_init = MagicMock(return_value=(False))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == RETRY

task.port_dict['Ethernet0']['cmis_decom_state'] = CMIS_DECOM_DPINIT
task.is_timer_expired = MagicMock(return_value=(True))
task.check_config_error = MagicMock(return_value=(False))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == RETRY

task.is_timer_expired = MagicMock(return_value=(False))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == CONTINUE

task.check_config_error = MagicMock(return_value=(True))
assert task.decomission_all_datapaths("Ethernet0", mock_xcvr_api) == SUCCESS

def test_update_port_xcvr_status_tbl_decommission_state(self):
mock_status_tbl = MagicMock()
port_mapping = PortMapping()
stop_event = threading.Event()
task = CmisManagerTask(DEFAULT_NAMESPACE, port_mapping, stop_event)
task.xcvr_table_helper = XcvrTableHelper(DEFAULT_NAMESPACE)
mock_get_status_tbl = MagicMock()
mock_get_status_tbl.set = MagicMock()
task.xcvr_table_helper.get_status_tbl = mock_get_status_tbl
port_mapping.logical_port_list.count('Ethernet0')
lport = 'Ethernet0'
physical_port = [0]
logical_ports = ['Ethernet0', 'Ethernet4']
asic_id = 'asic0'
decommission_state = "True"

port_mapping.get_logical_to_physical = MagicMock(return_value=physical_port)
port_mapping.get_physical_to_logical = MagicMock(return_value=logical_ports)
task.get_asic_id = MagicMock(return_value=asic_id)
task.xcvr_table_helper.get_status_tbl.return_value = mock_status_tbl
task.update_port_xcvr_status_tbl_decommission_state(port_mapping, lport, decommission_state)

@pytest.mark.parametrize("mock_found, mock_status_dict, expected_decom_state", [
(True, {'decommission_state': True}, True),
(False, {}, False),
(True, {'other_key': 'some_value'}, False)
])
def test_get_decommission_state_from_state_db(self, mock_found, mock_status_dict, expected_decom_state):
status_tbl = MagicMock()
status_tbl.get.return_value = (mock_found, mock_status_dict)
assert get_decommission_state_from_state_db("Ethernet0", status_tbl) == expected_decom_state

DEFAULT_DP_STATE = {
'DP1State': 'DataPathActivated',
Expand Down Expand Up @@ -2549,8 +2624,7 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl):
task.configure_laser_frequency = MagicMock(return_value=1)

# Case 1: CMIS_STATE_DP_PRE_INIT_CHECK --> DP_DEINIT
task.is_appl_reconfigure_required = MagicMock(return_value=True)
mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=True)
task.is_appl_reconfigure_required = MagicMock(return_value=False)
task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True])
task.task_worker()
assert get_cmis_state_from_state_db('Ethernet0', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet0'))) == CMIS_STATE_DP_DEINIT
Expand Down Expand Up @@ -2615,8 +2689,6 @@ def test_CmisManagerTask_task_worker(self, mock_chassis, mock_get_status_tbl):
task.configure_laser_frequency = MagicMock(return_value=1)

# Shouldn't proceed to DP_DEINIT on error
task.is_appl_reconfigure_required = MagicMock(return_value=True)
mock_xcvr_api.decommission_all_datapaths = MagicMock(return_value=False)
task.task_stopping_event.is_set = MagicMock(side_effect=[False, False, True])
task.task_worker()
assert not get_cmis_state_from_state_db('Ethernet1', task.xcvr_table_helper.get_status_tbl(task.get_asic_id('Ethernet1'))) == CMIS_STATE_DP_DEINIT
Expand Down
46 changes: 37 additions & 9 deletions sonic-xcvrd/xcvrd/xcvrd.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
CMIS_STATE_UNKNOWN = 'UNKNOWN'
CMIS_STATE_INSERTED = 'INSERTED'
CMIS_STATE_DP_PRE_INIT_CHECK = 'DP_PRE_INIT_CHECK'
CMIS_STATE_DP_DECOM_INIT_CHECK = 'DP_DECOM_INIT_CHECK'
CMIS_STATE_DP_DEINIT = 'DP_DEINIT'
CMIS_STATE_AP_CONF = 'AP_CONFIGURED'
CMIS_STATE_DP_ACTIVATE = 'DP_ACTIVATION'
Expand Down Expand Up @@ -815,13 +816,26 @@ def get_cmis_media_lanes_mask(self, api, appl, lport, subport):

return media_lanes_mask

def is_appl_reconfigure_required(self, api, app_new):
def is_decommission_required(self, lport, api):
Comment thread
prgeor marked this conversation as resolved.
Outdated
Comment thread
AnoopKamath marked this conversation as resolved.
Outdated
"""
Reset app code if non default app code needs to configured
Reset app code if non default app code needs to configured
"""
if self.port_dict[lport].get('is_decomm_required', False):
return True

for port in self.port_dict:
Comment thread
prgeor marked this conversation as resolved.
Outdated
if self.port_dict[port]['index'] == self.port_dict[lport]['index'] and self.port_dict[port]['is_decomm_required']:
return False

reference_app = None

for lane in range(self.CMIS_MAX_HOST_LANES):
app_cur = api.get_application(lane)
if app_cur != 0 and app_cur != app_new:
app = api.get_application(lane) & 0xF
if app == 0:
continue
else:
#decommission all lanes/datapaths to default AppSel=0
self.log_notice(f"{lport}: Decommissioning: DP_DEINIT and set all lanes/datapaths to default AppSel=0")
return True
return False

Expand Down Expand Up @@ -885,6 +899,7 @@ def force_cmis_reinit(self, lport, retries=0):
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_INSERTED)
self.port_dict[lport]['cmis_retries'] = retries
self.port_dict[lport]['cmis_expired'] = None # No expiration
self.port_dict[lport]['is_decomm_required'] = False

def check_module_state(self, api, states):
"""
Expand Down Expand Up @@ -1324,6 +1339,11 @@ def task_worker(self):
self.post_port_active_apsel_to_db(api, lport, host_lanes_mask, reset_apsel=True)
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_READY)
continue
elif self.is_decommission_required(lport, api):
self.port_dict[lport]['is_decomm_required'] = True
api.decommission_all_datapaths(False)
Comment thread
prgeor marked this conversation as resolved.
Outdated
dpDeinitDuration = self.get_cmis_dp_deinit_duration_secs(api)
self.update_cmis_state_expiration_time(lport, dpDeinitDuration)
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_PRE_INIT_CHECK)
if state == CMIS_STATE_DP_PRE_INIT_CHECK:
if self.port_dict[lport].get('forced_tx_disabled', False):
Expand All @@ -1349,12 +1369,20 @@ def task_worker(self):
self.log_notice("{} Successfully configured Tx power = {}".format(lport, tx_power))

# Set all the DP lanes AppSel to unused(0) when non default app code needs to be configured
if True == self.is_appl_reconfigure_required(api, appl):
self.log_notice("{}: Decommissioning all lanes/datapaths to default AppSel=0".format(lport))
if True != api.decommission_all_datapaths():
self.log_notice("{}: Failed to default to AppSel=0".format(lport))
self.force_cmis_reinit(lport, retries + 1)
if self.port_dict[lport]['is_decomm_required']:
self.log_notice(f"{lport}: Decommissioning: DPInit all lanes/datapaths")
api.decommission_all_datapaths(True)
Comment thread
AnoopKamath marked this conversation as resolved.
Outdated
Comment thread
AnoopKamath marked this conversation as resolved.
Outdated
dpInitDuration = self.get_cmis_dp_init_duration_secs(api)
self.update_cmis_state_expiration_time(lport, dpInitDuration)
self.update_port_transceiver_status_table_sw_cmis_state(lport, CMIS_STATE_DP_DECOM_INIT_CHECK)
if state == CMIS_STATE_DP_DECOM_INIT_CHECK:
if self.port_dict[lport]['is_decomm_required']:
if not self.check_config_error(api, host_lanes_mask, ['ConfigSuccess']):
if self.is_timer_expired(expired):
self.log_notice("{}: Decommissioning: timeout for 'Config Success'".format(lport))
self.force_cmis_reinit(lport, retries + 1)
continue
self.log_notice(f"{lport}: Decommissioned physical port {self.port_mapping.get_logical_to_physical(lport)}")

need_update = self.is_cmis_application_update_required(api, appl, host_lanes_mask)

Expand Down
Loading