|
1 | 1 | """ |
2 | 2 | (C) Copyright 2020-2024 Intel Corporation. |
| 3 | + (C) Copyright 2026 Hewlett Packard Enterprise Development LP |
3 | 4 |
|
4 | 5 | SPDX-License-Identifier: BSD-2-Clause-Patent |
5 | 6 | """ |
6 | 7 |
|
7 | 8 | from avocado import fail_on |
8 | 9 | from daos_core_base import DaosCoreBase |
9 | | -from dmg_utils import get_dmg_smd_info |
10 | 10 | from exception_utils import CommandFailure |
11 | | -from general_utils import get_log_file |
| 11 | +from general_utils import get_host_data, get_journalctl_command, journalctl_time |
12 | 12 |
|
13 | 13 |
|
14 | 14 | class CsumErrorLog(DaosCoreBase): |
15 | 15 | """ |
16 | | - Test Class Description: This test runs |
17 | | - daos_test -z (Checksum tests) and verifies |
18 | | - whether Checksum Error Counters are incremented |
19 | | - in the NVME device due to checksum fault injection. |
| 16 | + Test Class Description: Test checksum error logging. |
| 17 | +
|
20 | 18 | :avocado: recursive |
21 | 19 | """ |
22 | | - # pylint: disable=too-many-instance-attributes |
23 | 20 |
|
24 | 21 | @fail_on(CommandFailure) |
25 | | - def get_checksum_error_value(self, dmg, device_id): |
26 | | - """Get checksum error value from dmg storage_query_list_devices with health. |
| 22 | + def get_checksum_error_value(self, t_start, t_end): |
| 23 | + """Query journalctl logs and count checksum error occurrences. |
27 | 24 |
|
28 | 25 | Args: |
29 | | - dmg (DmgCommand): the DmgCommand object used to call storage_query_list_devices() |
30 | | - device_id (str): Device UUID. |
| 26 | + t_start (str): The start time for the journalctl query. |
| 27 | + t_end (str): The end time for the journalctl query. |
31 | 28 |
|
32 | 29 | Returns: |
33 | | - int: the number of checksum errors on the device |
| 30 | + int: the number of checksum errors found |
34 | 31 | """ |
35 | | - info = get_dmg_smd_info(dmg.storage_query_list_devices, 'devices', uuid=device_id, |
36 | | - health=True) |
37 | | - for devices in info.values(): |
38 | | - for device in devices: |
39 | | - try: |
40 | | - if device['uuid'] == device_id: |
41 | | - return device['ctrlr']['health_stats']['checksum_errs'] |
42 | | - except KeyError as error: |
43 | | - self.fail( |
44 | | - 'Error parsing dmg storage query list-devices --health output: {}'.format( |
45 | | - error)) |
46 | | - return 0 |
| 32 | + cmd = get_journalctl_command(t_start, t_end, system=True, units="daos_server") |
| 33 | + results = get_host_data(self.hostlist_servers, cmd, text="journalctl", |
| 34 | + error="Error gathering system log events") |
| 35 | + self.log.debug(results) |
| 36 | + str_to_match = "CSUM error" |
| 37 | + occurrence = 0 |
| 38 | + for host_result in results: |
| 39 | + occurrence += host_result["data"].count(str_to_match) |
| 40 | + return occurrence |
47 | 41 |
|
48 | 42 | @fail_on(CommandFailure) |
49 | 43 | def test_csum_error_logging(self): |
50 | | - """Jira ID: DAOS-3927. |
| 44 | + """Jira ID: DAOS-3927, DAOS-18881. |
51 | 45 |
|
52 | | - Test Description: Write Avocado Test to verify single data after |
53 | | - pool/container disconnect/reconnect. |
| 46 | + Test Description: Inject checksum errors using daos_test -z and verify that the errors are |
| 47 | + logged in the system journal. |
54 | 48 |
|
55 | 49 | :avocado: tags=all,daily_regression |
56 | 50 | :avocado: tags=hw,medium |
57 | 51 | :avocado: tags=checksum,faults,daos_test |
58 | 52 | :avocado: tags=CsumErrorLog,test_csum_error_logging |
59 | 53 | """ |
60 | | - self.log_step('Detecting server devices (dmg storage query list-devices)') |
61 | | - test_run = False |
62 | | - dmg = self.get_dmg_command() |
63 | | - dmg.hostlist = self.hostlist_servers[0] |
64 | | - host_devices = get_dmg_smd_info(dmg.storage_query_list_devices, 'devices') |
65 | | - for host, devices in host_devices.items(): |
66 | | - for device in devices: |
67 | | - for entry in ('uuid', 'tgt_ids', 'role_bits'): |
68 | | - if entry not in device: |
69 | | - self.fail( |
70 | | - 'Missing {} info from dmg storage query list devices'.format(entry)) |
71 | | - self.log.info( |
72 | | - 'Host %s device: uuid=%s, targets=%s, role_bits=%s', |
73 | | - host, device['uuid'], device['tgt_ids'], device['role_bits']) |
74 | | - if not device['tgt_ids']: |
75 | | - self.log_step('Skipping device without targets on {}'.format(device['uuid'])) |
76 | | - continue |
77 | | - if (int(device['role_bits']) > 0) and not int(device['role_bits']) & 1: |
78 | | - self.log_step( |
79 | | - 'Skipping {} device without data on {}'.format( |
80 | | - device['role_bits'], device['uuid'])) |
81 | | - continue |
82 | | - if not device['uuid']: |
83 | | - self.fail('Device uuid undefined') |
84 | | - self.log_step( |
85 | | - 'Get checksum errors before running the test (dmg storage query list-devices ' |
86 | | - '--health)') |
87 | | - check_sum = self.get_checksum_error_value(dmg, device['uuid']) |
88 | | - dmg.copy_certificates(get_log_file("daosCA/certs"), self.hostlist_clients) |
89 | | - dmg.copy_configuration(self.hostlist_clients) |
90 | | - self.log.info("Checksum Errors before: %d", check_sum) |
91 | | - self.log_step('Run the test (daos_test -z)') |
92 | | - self.run_subtest() |
93 | | - test_run = True |
94 | | - self.log_step( |
95 | | - 'Get checksum errors after running the test (dmg storage query list-devices ' |
96 | | - '--health)') |
97 | | - check_sum_latest = self.get_checksum_error_value(dmg, device['uuid']) |
98 | | - self.log.info('Checksum Errors after: %d', check_sum_latest) |
99 | | - self.assertTrue(check_sum_latest > check_sum, 'Checksum Error Log not incremented') |
100 | | - if not test_run: |
101 | | - self.fail('No tests run for the devices found') |
102 | | - self.log_step('Test Passed') |
| 54 | + t_start = journalctl_time() |
| 55 | + self.log_step('Run the test (daos_test -z)') |
| 56 | + self.run_subtest() |
| 57 | + t_end = journalctl_time() |
| 58 | + self.log_step('Check checksum error logs') |
| 59 | + checksum_errs = self.get_checksum_error_value(t_start, t_end) |
| 60 | + self.log.info('Checksum Errors reported: %d', checksum_errs) |
| 61 | + self.assertGreater(checksum_errs, 0, 'Checksum Errors not detected') |
0 commit comments