Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,25 @@
## 1.2.0 - unreleased
## 1.2.1

### Fixed
* Discovery against appliances with a large number of application domains
(200+) where the orchestrator's API user lacks read access to many of them
no longer produces a SQL truncation failure in Keyfactor Command. Two
related fixes:
* `FlowLogger.GetSummary()` output is now capped at 3500 characters with
a `[truncated, N chars omitted; check orchestrator log for full
breadcrumb]` marker. Defense-in-depth against Command's
`AgentJobStatus.Message` column overflow (NVARCHAR(4000)).
* Discovery aggregates per-domain failures by error signature instead of
emitting one `FAIL` and one `SKIP` line per failed domain. Identical
HTTP errors across N domains collapse into one summary line with a
count and a 5-domain sample. Successful domains still emit one
`Discovered-<path>` step each.

Net effect on a 235-domain appliance with 114 inaccessible domains: the
breadcrumb summary drops from ~50 KB and 235+ step lines to ~1 KB and
~10 step lines, well under Command's column cap.

## 1.2.0

### Added
* Discovery job: automatically enumerates all application domains on a DataPower
Expand Down
15 changes: 14 additions & 1 deletion DataPower/FlowLogger.cs
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,12 @@ public void EndBranch()

public bool HasFailures => _steps.Any(s => s.Status == StepStatus.Failed);

// Command's AgentJobStatus.Message column has a hard length cap (typically
// NVARCHAR(4000)). Truncate aggressively so a job result update never fails
// with "String or binary data would be truncated". Leaves headroom for the
// FailureMessage prefix the caller adds.
private const int MaxSummaryChars = 3500;

public string GetSummary()
{
var hasFailures = HasFailures;
Expand Down Expand Up @@ -202,7 +208,14 @@ public string GetSummary()
}
sb.Append("----------------------------------------");

return sb.ToString();
var summary = sb.ToString();
if (summary.Length > MaxSummaryChars)
{
var omitted = summary.Length - MaxSummaryChars;
summary = summary.Substring(0, MaxSummaryChars)
+ $"\n... [truncated, {omitted} chars omitted; check orchestrator log for full breadcrumb]";
}
return summary;
}

public void Dispose()
Expand Down
116 changes: 84 additions & 32 deletions DataPower/Jobs/Discovery.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text.RegularExpressions;
using Keyfactor.Extensions.Orchestrator.DataPower.Client;
using Keyfactor.Logging;
using Keyfactor.Orchestrators.Common.Enums;
Expand Down Expand Up @@ -53,6 +54,29 @@ public class Discovery : JobBase, IDiscoveryJobExtension
// exact casing has shifted across Command versions.
private static readonly string[] DirsToSearchKeys = { "dirs", "Dirs", "directories", "Directories", "DirsToSearch" };

// Extracts a stable group key for an exception thrown by a per-domain
// ListFileStoreDirectories call. Strips domain-specific bits (the /_links/self/href
// URL changes per domain) so identical RBAC failures across hundreds of domains
// collapse into a single error group.
private static readonly Regex ErrorMessageRegex = new Regex(
"\"error\"\\s*:\\s*\\[\\s*\"([^\"]+)\"",
RegexOptions.Compiled | RegexOptions.Singleline);

private static string ErrorSignatureOf(Exception ex)
{
var apiEx = DataPowerApiException.Find(ex);
if (apiEx != null)
{
var m = ErrorMessageRegex.Match(apiEx.ResponseBody ?? string.Empty);
if (m.Success)
return $"HTTP {(int)apiEx.StatusCode} {apiEx.StatusCode}: {m.Groups[1].Value}";
return $"HTTP {(int)apiEx.StatusCode} {apiEx.StatusCode}";
}

var msg = ex?.Message ?? "(no message)";
return msg.Length > 80 ? msg.Substring(0, 80) + "..." : msg;
}

private static (HashSet<string> Dirs, string Source) ResolveDirsToSearch(DiscoveryJobConfiguration config)
{
if (config?.JobProperties != null)
Expand Down Expand Up @@ -175,53 +199,81 @@ private JobResult PerformDiscovery(DiscoveryJobConfiguration config, SubmitDisco
flow.Branch($"PerDomain (count={domainCount})");
try
{
var listedOk = 0;
var emptyNameSkipped = 0;

// Group per-domain failures by error signature instead of emitting a
// FAIL+SKIP line per failed domain. On appliances with 200+ domains and
// a non-trivial RBAC story, that easily produces a 50 KB summary which
// overflows Command's AgentJobStatus.Message column. One aggregated SKIP
// line per error class scales fine and is far more readable.
var errorGroups = new Dictionary<string, List<string>>(StringComparer.Ordinal);

foreach (var domain in domains)
{
if (string.IsNullOrWhiteSpace(domain?.Name))
{
flow.Skip("Domain", "empty domain name");
emptyNameSkipped++;
continue;
}

List<string> directories;
try
{
List<string> directories = null;
flow.Step($"ListFileStore-{domain.Name}", () =>
{
directories = apiClient.ListFileStoreDirectories(domain.Name);
});

// DataPower's filestore location names carry a trailing colon
// (e.g. "cert:" / "pubcert:" / "sharedcert:"). Strip it before
// matching and before composing the store path.
var certDirectories = directories
.Select(d => d?.TrimEnd(':'))
.Where(d => !string.IsNullOrEmpty(d) && certStoreDirectories.Contains(d))
.ToList();

var isDefault = string.Equals(domain.Name, DefaultDomainName, StringComparison.OrdinalIgnoreCase);
foreach (var dir in certDirectories)
{
if (ApplianceWideDirectories.Contains(dir) && !isDefault)
{
flow.Skip($"{domain.Name}\\{dir}", "appliance-wide; emitted only under default");
continue;
}

var storePath = $"{domain.Name}\\{dir}";
discoveredLocations.Add(storePath);
flow.Step($"Discovered-{storePath}");
}
directories = apiClient.ListFileStoreDirectories(domain.Name);
}
catch (Exception ex)
{
// Resilient by design: one inaccessible domain should not abort discovery
var inner = DescribeException(ex);
flow.Skip($"Domain-{domain.Name}", $"unable to list directories: {inner}");
Logger.LogWarning(ex, "Unable to list filestore directories for domain {DomainName}: {ErrorMessage}",
domain.Name, inner);
var signature = ErrorSignatureOf(ex);
if (!errorGroups.TryGetValue(signature, out var list))
{
list = new List<string>();
errorGroups[signature] = list;
}
list.Add(domain.Name);
Logger.LogWarning(ex,
"Unable to list filestore directories for domain {DomainName}: {ErrorMessage}",
domain.Name, DescribeException(ex));
continue;
}

listedOk++;

// DataPower's filestore location names carry a trailing colon
// (e.g. "cert:" / "pubcert:" / "sharedcert:"). Strip it before
// matching and before composing the store path.
var certDirectories = directories
.Select(d => d?.TrimEnd(':'))
.Where(d => !string.IsNullOrEmpty(d) && certStoreDirectories.Contains(d))
.ToList();

var isDefault = string.Equals(domain.Name, DefaultDomainName, StringComparison.OrdinalIgnoreCase);
foreach (var dir in certDirectories)
{
if (ApplianceWideDirectories.Contains(dir) && !isDefault)
continue; // appliance-wide; emitted only under default

var storePath = $"{domain.Name}\\{dir}";
discoveredLocations.Add(storePath);
flow.Step($"Discovered-{storePath}");
}
}

var listedDetail = $"listed={listedOk}/{domainCount}";
if (emptyNameSkipped > 0)
listedDetail += $", emptyName={emptyNameSkipped}";
if (errorGroups.Count > 0)
listedDetail += $", failed={errorGroups.Values.Sum(v => v.Count)}";
flow.Step("ListFileStore", listedDetail);

foreach (var kvp in errorGroups.OrderByDescending(g => g.Value.Count))
{
var sample = kvp.Value.Take(5).ToList();
var more = kvp.Value.Count - sample.Count;
var sampleStr = string.Join(", ", sample) + (more > 0 ? $" (+{more} more)" : "");
flow.Skip($"DomainsFailed[{kvp.Key}]", $"{kvp.Value.Count} domain(s): {sampleStr}");
}
}
finally
{
Expand Down
Loading