Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 22 additions & 20 deletions checkov/secrets/plugins/custom_regex_detector.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,17 +168,31 @@ def _find_potential_secret(
multiline_regex = self.pattern_by_prerun_compiled.get(regex.pattern)
if multiline_regex is None:
continue
multiline_matches = multiline_regex.findall(file_content)
for mm in multiline_matches:
mm = self._extract_real_regex_match(mm)
if isinstance(mm, tuple):
mm = mm[0]
line_num = find_line_number(file_content, mm, line_number)
quoted_mm = f"'{mm}'"
for regex_match in multiline_regex.finditer(file_content):
secret_value = self._extract_real_regex_match(cast(Tuple[str], regex_match.groups()) or regex_match.group(0))
if isinstance(secret_value, tuple):
secret_value = secret_value[0]
# Line number strategy:
# - If secret_value is single-line (no \n), locate it within the match
# and compute its line directly — this gives the exact secret line.
# - If secret_value spans multiple lines (e.g. a full JSON block or PGP key body),
# no single line contains it, so fall back to the prerun pattern's line,
# which is the most meaningful trigger line (e.g. "BEGIN PRIVATE KEY").
if '\n' not in secret_value:
inner_offset = regex_match.group(0).find(secret_value)
if inner_offset < 0:
continue
secret_offset = regex_match.start() + inner_offset
line_num = file_content[:secret_offset].count('\n') + 1
else:
prerun_search = regex.search(file_content, regex_match.start(), regex_match.end())
secret_offset = prerun_search.start() if prerun_search else regex_match.start()
line_num = file_content[:secret_offset].count('\n') + 1
quoted_secret = f"'{secret_value}'"
ps = PotentialSecret(
type=regex_data["Name"],
filename=filename,
secret=quoted_mm,
secret=quoted_secret,
line_number=line_num,
is_verified=is_verified,
is_added=is_added,
Expand Down Expand Up @@ -228,15 +242,3 @@ def _extract_real_regex_match(self, regex_matches: Union[str, Tuple[str]]) -> Un
return match

return regex_matches


def find_line_number(file_string: str, substring: str, default_line_number: int) -> int:
try:
lines = file_string.splitlines()

for line_number, line in enumerate(lines, start=1):
if substring in line:
return line_number
return default_line_number
except Exception:
return default_line_number
11 changes: 11 additions & 0 deletions tests/secrets/multiline_finding/Dockerfile.two_secrets
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
BEGIN_SECRET
line1_of_secret_aaaaaaaaaaaaaaaaaa
line2_of_secret_aaaaaaaaaaaaaaaaaa
END_SECRET

some other content

BEGIN_SECRET
line1_of_secret_bbbbbbbbbbbbbbbbbb
line2_of_secret_bbbbbbbbbbbbbbbbbb
END_SECRET
5 changes: 5 additions & 0 deletions tests/secrets/single_line_finding/secret.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
const ADMIN_KEY = 'b782df1739614041699a45f8079a3623';

some other content

const SECOND_KEY = 'c891eg2840725152800b56f9180b4734';
105 changes: 104 additions & 1 deletion tests/secrets/test_multiline_finding_line_number.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,114 @@ def test_multiline_finding(self):
}
]}
runner = Runner()
report = runner.run(root_folder=valid_dir_path,
report = runner.run(root_folder=None,
files=[valid_dir_path + "/Dockerfile.mine"],
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's /Dockerfile.mine?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the original file name of the test. I explicitly specified it here because I added another file, and before the change it scanned the entire folder.

runner_filter=RunnerFilter(framework=['secrets'],
enable_secret_scan_all_files=True))
interesting_failed_checks = _filter_reports_for_incident_ids(report.failed_checks, ["test1"])
self.assertEqual(len(interesting_failed_checks), 1)
self.assertEqual(len(interesting_failed_checks[0].code_block), 1)
self.assertEqual(len(interesting_failed_checks[0].code_block[0]), 2)
self.assertEqual(interesting_failed_checks[0].code_block[0][0], 2)

def test_multiline_two_secrets_same_check_id(self):
"""
Regression test: when a file contains 2 secrets matching the same multiline rule
where the captured group spans multiple real lines, both must be detected with
correct and distinct line numbers.

Before fix, find_line_number() cannot find a multiline substring in any
single line, so it falls back to the prerun match line (line 1) for both secrets.

Dockerfile.two_multiline_secrets layout:
line 1: BEGIN_SECRET <- prerun matches here
line 2: line1_of_secret_aaa... <- captured group starts here (correct line)
line 3: line2_of_secret_aaa...
line 4: END_SECRET
...
line 8: BEGIN_SECRET <- prerun matches here
line 9: line1_of_secret_bbb... <- captured group starts here (correct line)
line 10: line2_of_secret_bbb...
line 11: END_SECRET
"""
current_dir = os.path.dirname(os.path.realpath(__file__))
bc_integration.customer_run_config_response = {"secretsPolicies": [
{
"incidentId": "test_multiline_two",
"category": "Secrets",
"severity": "MEDIUM",
"incidentType": "Violation",
"title": "test_multiline_two",
"guideline": "test",
"laceworkViolationId": None,
"prowlerCheckId": None,
"checkovCheckId": None,
"resourceTypes": [],
"provider": "OTHER",
"remediationIds": [],
"customerName": "test",
"isCustom": True,
"code": "definition:\n cond_type: secrets\n multiline: true\n prerun:\n - BEGIN_SECRET\n value:\n - BEGIN_SECRET\\n((?:.*\\n)+?)END_SECRET\n",
"descriptiveTitle": None,
"constructiveTitle": None,
"pcPolicyId": None,
"additionalPcPolicyIds": None,
"pcSeverity": None,
"sourceIncidentId": None
}
]}
runner = Runner()
report = runner.run(root_folder=None,
files=[current_dir + "/multiline_finding/Dockerfile.two_secrets"],
runner_filter=RunnerFilter(framework=['secrets'],
enable_secret_scan_all_files=True))
interesting_failed_checks = _filter_reports_for_incident_ids(report.failed_checks, ["test_multiline_two"])
# Both secrets must be found
self.assertEqual(len(interesting_failed_checks), 2)
lines = sorted(c.file_line_range[0] for c in interesting_failed_checks)
# The committed fix reports the prerun match line (BEGIN_SECRET) for multiline captured values.
# First secret: BEGIN_SECRET is on line 1, second: BEGIN_SECRET is on line 8.
# Before fix, both fall back to line 1 (the first prerun match line).
self.assertEqual(lines[0], 1)
self.assertEqual(lines[1], 8)

def test_single_line_two_secrets_same_check_id(self):
current_dir = os.path.dirname(os.path.realpath(__file__))
valid_dir_path = current_dir + "/single_line_finding"
bc_integration.customer_run_config_response = {"secretsPolicies": [
{
"incidentId": "test_single_two",
"category": "Secrets",
"severity": "MEDIUM",
"incidentType": "Violation",
"title": "test_single_two",
"guideline": "test",
"laceworkViolationId": None,
"prowlerCheckId": None,
"checkovCheckId": None,
"resourceTypes": ["aws_instance"],
"provider": "AWS",
"remediationIds": [],
"customerName": "test",
"isCustom": True,
"code": "definition:\n cond_type: secrets\n value:\n - (?i)(?:KEY)\\s*=\\s*'([A-Za-z0-9]{32})'\n",
"descriptiveTitle": None,
"constructiveTitle": None,
"pcPolicyId": None,
"additionalPcPolicyIds": None,
"pcSeverity": None,
"sourceIncidentId": None
}
]}
runner = Runner()
report = runner.run(root_folder=None,
files=[valid_dir_path + "/secret.txt"],
runner_filter=RunnerFilter(framework=['secrets'],
enable_secret_scan_all_files=True))
interesting_failed_checks = _filter_reports_for_incident_ids(report.failed_checks, ["test_single_two"])
# Both secrets must be found
self.assertEqual(len(interesting_failed_checks), 2)
lines = sorted(c.file_line_range[0] for c in interesting_failed_checks)
# First secret is on line 1, second is on line 5
self.assertEqual(lines[0], 1)
self.assertEqual(lines[1], 5)
Loading