Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/gpuaudit/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@
func init() {
scanCmd.Flags().StringVar(&scanProfile, "profile", "", "AWS profile to use")
scanCmd.Flags().StringSliceVar(&scanRegions, "region", nil, "AWS regions to scan (default: common GPU regions)")
scanCmd.Flags().StringVar(&scanFormat, "format", "table", "Output format: table, json, markdown, slack")
scanCmd.Flags().StringVar(&scanFormat, "format", "table", "Output format: table, json, markdown, slack, csv")
scanCmd.Flags().StringVarP(&scanOutput, "output", "o", "", "Write output to file instead of stdout")
scanCmd.Flags().BoolVar(&scanSkipMetrics, "skip-metrics", false, "Skip CloudWatch metrics collection (faster but less accurate)")
scanCmd.Flags().BoolVar(&scanSkipSageMaker, "skip-sagemaker", false, "Skip SageMaker endpoint scanning")
Expand Down Expand Up @@ -191,6 +191,8 @@
output.FormatMarkdown(w, result)
case "slack":
return output.FormatSlack(w, result)
case: "csv":

Check failure on line 194 in cmd/gpuaudit/main.go

View workflow job for this annotation

GitHub Actions / test

syntax error: unexpected :, expected expression
return output.FormatCSV(w, result)

Check failure on line 195 in cmd/gpuaudit/main.go

View workflow job for this annotation

GitHub Actions / test

syntax error: unexpected keyword return, expected :
default:
output.FormatTable(w, result)
}
Expand Down
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ require (
github.com/aws/aws-sdk-go-v2/service/organizations v1.51.2
github.com/aws/aws-sdk-go-v2/service/sagemaker v1.238.0
github.com/aws/aws-sdk-go-v2/service/sts v1.41.10
github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1
github.com/prometheus/client_model v0.6.2
github.com/prometheus/common v0.67.5
github.com/spf13/cobra v1.10.2
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr
github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ=
github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1vB6EwHI=
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1 h1:FWNFq4fM1wPfcK40yHE5UO3RUdSNPaBC+j3PokzA6OQ=
github.com/gocarina/gocsv v0.0.0-20240520201108-78e41c74b4b1/go.mod h1:5YoVOkjYAQumqlV356Hj3xeYh4BdZuLE0/nRkf2NKkI=
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
Expand Down
138 changes: 69 additions & 69 deletions internal/models/models.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,116 +50,116 @@ const (
// GPUInstance represents a discovered GPU resource with its metrics and cost data.
type GPUInstance struct {
// Identity
InstanceID string `json:"instance_id"`
Source Source `json:"source"`
AccountID string `json:"account_id"`
Region string `json:"region"`
Name string `json:"name"` // from Name tag or endpoint name
InstanceID string `json:"instance_id" csv:"instance_id"`
Source Source `json:"source" csv:"source"`
AccountID string `json:"account_id" csv:"account_id"`
Region string `json:"region" csv:"region"`
Name string `json:"name" csv:"name"` // from Name tag or endpoint name
Tags map[string]string `json:"tags,omitempty"`

// Network (populated for EC2)
PrivateDnsName string `json:"private_dns_name,omitempty"`
PrivateDnsName string `json:"private_dns_name,omitempty" csv:"private_dns_name,omitempty"`

// GPU hardware
InstanceType string `json:"instance_type"`
GPUModel string `json:"gpu_model"`
GPUCount int `json:"gpu_count"`
GPUVRAMGiB float64 `json:"gpu_vram_gib"`
TotalVRAMGiB float64 `json:"total_vram_gib"`
InstanceType string `json:"instance_type" csv:"instance_type"`
GPUModel string `json:"gpu_model" csv:"gpu_model"`
GPUCount int `json:"gpu_count" csv:"gpu_count"`
GPUVRAMGiB float64 `json:"gpu_vram_gib" csv:"gpu_vram_gib"`
TotalVRAMGiB float64 `json:"total_vram_gib" csv:"total_vram_gib"`

// Kubernetes (populated for k8s-node source)
ClusterName string `json:"cluster_name,omitempty"`
K8sNodeName string `json:"k8s_node_name,omitempty"`
GPUAllocated int `json:"gpu_allocated,omitempty"`
ClusterName string `json:"cluster_name,omitempty" csv:"cluster_name,omitempty"`
K8sNodeName string `json:"k8s_node_name,omitempty" csv:"k8s_node_name,omitempty"`
GPUAllocated int `json:"gpu_allocated,omitempty" csv:"gpu_allocated,omitempty"`

// State
State string `json:"state"`
LaunchTime time.Time `json:"launch_time"`
UptimeHours float64 `json:"uptime_hours"`
State string `json:"state" csv:"state"`
LaunchTime time.Time `json:"launch_time" csv:"launch_time"`
UptimeHours float64 `json:"uptime_hours" csv:"uptime_hours"`

// Metrics (nil means unavailable)
AvgCPUPercent *float64 `json:"avg_cpu_percent,omitempty"`
MaxCPUPercent *float64 `json:"max_cpu_percent,omitempty"`
AvgNetworkInBytes *float64 `json:"avg_network_in_bytes,omitempty"`
AvgNetworkOutBytes *float64 `json:"avg_network_out_bytes,omitempty"`
AvgDiskReadOps *float64 `json:"avg_disk_read_ops,omitempty"`
AvgDiskWriteOps *float64 `json:"avg_disk_write_ops,omitempty"`
AvgGPUUtilization *float64 `json:"avg_gpu_utilization,omitempty"`
AvgGPUMemUtilization *float64 `json:"avg_gpu_mem_utilization,omitempty"`
InvocationCount *int64 `json:"invocation_count,omitempty"`
AvgCPUPercent *float64 `json:"avg_cpu_percent,omitempty" csv:"avg_cpu_percent,omitempty"`
MaxCPUPercent *float64 `json:"max_cpu_percent,omitempty" csv:"max_cpu_percent,omitempty"`
AvgNetworkInBytes *float64 `json:"avg_network_in_bytes,omitempty" csv:"avg_network_in_bytes,omitempty"`
AvgNetworkOutBytes *float64 `json:"avg_network_out_bytes,omitempty" csv:"avg_network_out_bytes,omitempty"`
AvgDiskReadOps *float64 `json:"avg_disk_read_ops,omitempty" csv:"avg_disk_read_ops,omitempty"`
AvgDiskWriteOps *float64 `json:"avg_disk_write_ops,omitempty" csv:"avg_disk_write_ops,omitempty"`
AvgGPUUtilization *float64 `json:"avg_gpu_utilization,omitempty" csv:"avg_gpu_utilization,omitempty"`
AvgGPUMemUtilization *float64 `json:"avg_gpu_mem_utilization,omitempty" csv:"avg_gpu_mem_utilization,omitempty"`
InvocationCount *int64 `json:"invocation_count,omitempty" csv:"invocation_count,omitempty"`

// Cost
PricingModel string `json:"pricing_model"` // on-demand, spot, reserved, savings-plan
HourlyCost float64 `json:"hourly_cost"`
MonthlyCost float64 `json:"monthly_cost"`
SpotHourlyCost *float64 `json:"spot_hourly_cost,omitempty"`
MTDCost *float64 `json:"mtd_cost,omitempty"`
PricingModel string `json:"pricing_model" csv:"pricing_model"` // on-demand, spot, reserved, savings-plan
HourlyCost float64 `json:"hourly_cost" csv:"hourly_cost"`
MonthlyCost float64 `json:"monthly_cost" csv:"monthly_cost"`
SpotHourlyCost *float64 `json:"spot_hourly_cost,omitempty" csv:"spot_hourly_cost,omitempty"`
MTDCost *float64 `json:"mtd_cost,omitempty" csv:"mtd_cost,omitempty"`

// Analysis results (populated by analysis engine)
WasteSignals []WasteSignal `json:"waste_signals,omitempty"`
Recommendations []Recommendation `json:"recommendations,omitempty"`
EstimatedSavings float64 `json:"estimated_savings"`
EstimatedSavings float64 `json:"estimated_savings" csv:"estimated_savings"`
}

// WasteSignal represents a detected waste indicator on a GPU instance.
type WasteSignal struct {
Type string `json:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations, spot_eligible
Severity Severity `json:"severity"`
Confidence float64 `json:"confidence"` // 0.0 - 1.0
Evidence string `json:"evidence"`
Type string `json:"type" csv:"type"` // idle, low_utilization, oversized_gpu, pricing_mismatch, stale, low_invocations, spot_eligible
Severity Severity `json:"severity" csv:"severity"`
Confidence float64 `json:"confidence" csv:"confidence"` // 0.0 - 1.0
Evidence string `json:"evidence" csv:"evidence"`
}

// Recommendation is a specific action the user can take to reduce cost.
type Recommendation struct {
Action Action `json:"action"`
Description string `json:"description"`
CurrentMonthlyCost float64 `json:"current_monthly_cost"`
RecommendedMonthlyCost float64 `json:"recommended_monthly_cost"`
MonthlySavings float64 `json:"monthly_savings"`
SavingsPercent float64 `json:"savings_percent"`
Risk Risk `json:"risk"`
Action Action `json:"action" csv:"action"`
Description string `json:"description" csv:"description"`
CurrentMonthlyCost float64 `json:"current_monthly_cost" csv:"current_monthly_cost"`
RecommendedMonthlyCost float64 `json:"recommended_monthly_cost" csv:"recommended_monthly_cost"`
MonthlySavings float64 `json:"monthly_savings" csv:"monthly_savings"`
SavingsPercent float64 `json:"savings_percent" csv:"savings_percent"`
Risk Risk `json:"risk" csv:"risk"`
}

// ScanResult holds the complete output of a gpuaudit scan.
type ScanResult struct {
Timestamp time.Time `json:"timestamp"`
AccountID string `json:"account_id"`
Targets []string `json:"targets,omitempty"`
Regions []string `json:"regions"`
ScanDuration string `json:"scan_duration"`
Instances []GPUInstance `json:"instances"`
Summary ScanSummary `json:"summary"`
TargetSummaries []TargetSummary `json:"target_summaries,omitempty"`
TargetErrors []TargetErrorInfo `json:"target_errors,omitempty"`
Timestamp time.Time `json:"timestamp" csv:"timestamp"`
AccountID string `json:"account_id" csv:"account_id"`
Targets []string `json:"targets,omitempty" csv:"targets,omitempty"`
Regions []string `json:"regions" csv:"regions"`
ScanDuration string `json:"scan_duration" csv:"scan_duration"`
Instances []GPUInstance `json:"instances" csv:"instances"`
Summary ScanSummary `json:"summary" csv:"summary"`
TargetSummaries []TargetSummary `json:"target_summaries,omitempty" csv:"target_summaries,omitempty"`
TargetErrors []TargetErrorInfo `json:"target_errors,omitempty" csv:"target_errors,omitempty"`
}

// ScanSummary provides aggregate statistics for a scan.
type ScanSummary struct {
TotalInstances int `json:"total_instances"`
TotalMonthlyCost float64 `json:"total_monthly_cost"`
TotalEstimatedWaste float64 `json:"total_estimated_waste"`
WastePercent float64 `json:"waste_percent"`
CriticalCount int `json:"critical_count"`
WarningCount int `json:"warning_count"`
InfoCount int `json:"info_count"`
HealthyCount int `json:"healthy_count"`
TotalInstances int `json:"total_instances" csv:"total_instances"`
TotalMonthlyCost float64 `json:"total_monthly_cost" csv:"total_monthly_cost"`
TotalEstimatedWaste float64 `json:"total_estimated_waste" csv:"total_estimated_waste"`
WastePercent float64 `json:"waste_percent" csv:"waste_percent"`
CriticalCount int `json:"critical_count" csv:"critical_count"`
WarningCount int `json:"warning_count" csv:"warning_count"`
InfoCount int `json:"info_count" csv:"info_count"`
HealthyCount int `json:"healthy_count" csv:"healthy_count"`
}

// TargetSummary provides per-target aggregate statistics.
type TargetSummary struct {
Target string `json:"target"`
TotalInstances int `json:"total_instances"`
TotalMonthlyCost float64 `json:"total_monthly_cost"`
TotalEstimatedWaste float64 `json:"total_estimated_waste"`
WastePercent float64 `json:"waste_percent"`
CriticalCount int `json:"critical_count"`
WarningCount int `json:"warning_count"`
Target string `json:"target" csv:"target"`
TotalInstances int `json:"total_instances" csv:"total_instances"`
TotalMonthlyCost float64 `json:"total_monthly_cost" csv:"total_monthly_cost"`
TotalEstimatedWaste float64 `json:"total_estimated_waste" csv:"total_estimated_waste"`
WastePercent float64 `json:"waste_percent" csv:"waste_percent"`
CriticalCount int `json:"critical_count" csv:"critical_count"`
WarningCount int `json:"warning_count" csv:"warning_count"`
}

// TargetErrorInfo describes a target that failed to scan.
type TargetErrorInfo struct {
Target string `json:"target"`
Error string `json:"error"`
Target string `json:"target" csv:"target"`
Error string `json:"error" csv:"error"`
}

// MaxSeverity returns the highest severity among the given waste signals.
Expand Down
20 changes: 20 additions & 0 deletions internal/output/csv.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
// Copyright 2026 the gpuaudit authors. All rights reserved.
// SPDX-License-Identifier: Apache-2.0

package output

import (
"fmt"
"io"

"github.com/gocarina/gocsv"
"github.com/gpuaudit/cli/internal/models"
)

// FormatCSV marshals the scan instances as CSV.
func FormatCSV(w io.Writer, result *models.ScanResult) error {
if err := gocsv.Marshal(result.Instances, w); err != nil {
return fmt.Errorf("encoding CSV: %w", err)
}
return nil
}
Loading
Loading