-
Notifications
You must be signed in to change notification settings - Fork 21
fix: gojq module review — permissions, collision, rename, upgrade #3451
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -43,7 +43,7 @@ type PayloadMetadata struct { | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // jqSchemaFilter is the jq filter that transforms JSON to schema | ||||||||||||||||||
| // This filter leverages gojq v0.12.18 features including: | ||||||||||||||||||
| // This filter leverages gojq v0.12.19 features including: | ||||||||||||||||||
| // - Enhanced array handling (supports up to 536,870,912 elements / 2^29) | ||||||||||||||||||
| // - Improved concurrent execution performance | ||||||||||||||||||
| // - Better error messages for type errors | ||||||||||||||||||
|
|
@@ -56,25 +56,26 @@ type PayloadMetadata struct { | |||||||||||||||||
| // For arrays, only the first element's schema is retained to represent the array structure. | ||||||||||||||||||
| // Empty arrays are preserved as []. | ||||||||||||||||||
| // | ||||||||||||||||||
| // NOTE: This defines a custom walk function rather than using gojq's built-in walk(f). | ||||||||||||||||||
| // NOTE: This defines a custom walk_schema function rather than using gojq's built-in walk(f). | ||||||||||||||||||
| // The built-in walk(f) applies f to every node but preserves the original structure. | ||||||||||||||||||
| // Our custom walk does two things the built-in cannot: | ||||||||||||||||||
| // Our custom walk_schema does two things the built-in cannot: | ||||||||||||||||||
| // 1. Replaces leaf values with their type name (e.g., "test" → "string") | ||||||||||||||||||
| // 2. Collapses arrays to only the first element for schema inference | ||||||||||||||||||
| // | ||||||||||||||||||
| // These behaviors are incompatible with standard walk(f) semantics, which would | ||||||||||||||||||
| // apply f post-recursion without structural changes to arrays. | ||||||||||||||||||
| // Using a distinct name avoids shadowing gojq's built-in walk/1. | ||||||||||||||||||
| const jqSchemaFilter = ` | ||||||||||||||||||
| def walk(f): | ||||||||||||||||||
| def walk_schema: | ||||||||||||||||||
| . as $in | | ||||||||||||||||||
| if type == "object" then | ||||||||||||||||||
| reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk(f))}) | ||||||||||||||||||
| reduce keys[] as $k ({}; . + {($k): ($in[$k] | walk_schema)}) | ||||||||||||||||||
| elif type == "array" then | ||||||||||||||||||
| if length == 0 then [] else [.[0] | walk(f)] end | ||||||||||||||||||
| if length == 0 then [] else [.[0] | walk_schema] end | ||||||||||||||||||
| else | ||||||||||||||||||
| type | ||||||||||||||||||
| end; | ||||||||||||||||||
| walk(.) | ||||||||||||||||||
| walk_schema | ||||||||||||||||||
| ` | ||||||||||||||||||
|
|
||||||||||||||||||
| // Pre-compiled jq query code for performance | ||||||||||||||||||
|
|
@@ -108,7 +109,7 @@ func init() { | |||||||||||||||||
| return | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| logMiddleware.Printf("Successfully compiled jq schema filter at init (gojq v0.12.18)") | ||||||||||||||||||
| logMiddleware.Printf("Successfully compiled jq schema filter at init (gojq v0.12.19)") | ||||||||||||||||||
| logger.LogInfo("startup", "jq schema filter compiled successfully - array limit: 2^29 elements, timeout: %v", DefaultJqTimeout) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
@@ -117,7 +118,7 @@ func generateRandomID() string { | |||||||||||||||||
| bytes := make([]byte, 16) | ||||||||||||||||||
| if _, err := rand.Read(bytes); err != nil { | ||||||||||||||||||
| // Fallback to timestamp-based ID if random fails | ||||||||||||||||||
| return fmt.Sprintf("fallback-%d", os.Getpid()) | ||||||||||||||||||
| return fmt.Sprintf("fallback-%d-%d", os.Getpid(), time.Now().UnixNano()) | ||||||||||||||||||
| } | ||||||||||||||||||
| return hex.EncodeToString(bytes) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
@@ -136,7 +137,7 @@ func generateRandomID() string { | |||||||||||||||||
| // Error handling: | ||||||||||||||||||
| // - Returns compilation errors if init() failed | ||||||||||||||||||
| // - Returns context.DeadlineExceeded if query times out | ||||||||||||||||||
| // - Returns enhanced error messages for type errors (gojq v0.12.18+) | ||||||||||||||||||
| // - Returns enhanced error messages for type errors (gojq v0.12.19+) | ||||||||||||||||||
| // - Properly handles gojq.HaltError for clean halt conditions | ||||||||||||||||||
| func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, error) { | ||||||||||||||||||
| // Check if compilation succeeded at init time | ||||||||||||||||||
|
|
@@ -153,7 +154,7 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro | |||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Run the pre-compiled query with context support (much faster than Parse+Run) | ||||||||||||||||||
| // The iterator is consumed only once because the walk(.) filter produces exactly | ||||||||||||||||||
| // The iterator is consumed only once because the walk_schema filter produces exactly | ||||||||||||||||||
| // one output value (the fully-transformed schema). There is no need to drain it. | ||||||||||||||||||
| iter := jqSchemaCode.RunWithContext(ctx, jsonData) | ||||||||||||||||||
| v, ok := iter.Next() | ||||||||||||||||||
|
|
@@ -178,7 +179,7 @@ func applyJqSchema(ctx context.Context, jsonData interface{}) (interface{}, erro | |||||||||||||||||
| return nil, fmt.Errorf("jq schema filter halted with error (exit code %d): %w", haltErr.ExitCode(), err) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| // Generic error case (includes enhanced v0.12.18+ type error messages) | ||||||||||||||||||
| // Generic error case (includes enhanced v0.12.19+ type error messages) | ||||||||||||||||||
| return nil, fmt.Errorf("jq schema filter error: %w", err) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
|
|
@@ -210,13 +211,13 @@ func savePayload(baseDir, pathPrefix, sessionID, queryID string, payload []byte) | |||||||||||||||||
| logger.LogInfo("payload", "Writing large payload to filesystem: path=%s, size=%d bytes (%.2f KB, %.2f MB)", | ||||||||||||||||||
| filePath, payloadSize, float64(payloadSize)/1024, float64(payloadSize)/(1024*1024)) | ||||||||||||||||||
|
|
||||||||||||||||||
| if err := os.WriteFile(filePath, payload, 0644); err != nil { | ||||||||||||||||||
| if err := os.WriteFile(filePath, payload, 0600); err != nil { | ||||||||||||||||||
| logger.LogError("payload", "Failed to write payload file: path=%s, size=%d bytes, error=%v", | ||||||||||||||||||
| filePath, payloadSize, err) | ||||||||||||||||||
| return "", fmt.Errorf("failed to write payload file: %w", err) | ||||||||||||||||||
| } | ||||||||||||||||||
|
|
||||||||||||||||||
| logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=0644", | ||||||||||||||||||
| logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=0600", | ||||||||||||||||||
|
||||||||||||||||||
| logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=0600", | |
| if err := os.Chmod(filePath, 0600); err != nil { | |
| logger.LogError("payload", "Failed to enforce payload file permissions: path=%s, size=%d bytes, error=%v", | |
| filePath, payloadSize, err) | |
| return "", fmt.Errorf("failed to set payload file permissions: %w", err) | |
| } | |
| logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, enforcedPermissions=0600", |
Outdated
Copilot
AI
Apr 9, 2026
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The success log hardcodes permissions=0600, but the resulting mode can still differ (e.g., if the file already existed, or if the OS applies different semantics). Consider logging the actual stat.Mode().Perm() after the write so logs match reality.
See below for a potential fix:
// Verify file was written correctly and log the actual resulting mode
if stat, err := os.Stat(filePath); err != nil {
logger.LogWarn("payload", "Could not verify payload file after write: path=%s, error=%v", filePath, err)
logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes",
filePath, payloadSize)
} else {
logger.LogInfo("payload", "Successfully saved large payload to filesystem: path=%s, size=%d bytes, permissions=%#o",
filePath, payloadSize, stat.Mode().Perm())
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -108,6 +108,33 @@ func TestApplyJqSchema(t *testing.T) { | |
| } | ||
| } | ||
|
|
||
| // TestApplyJqSchema_SingleOutputContract verifies that the walk_schema filter produces | ||
| // exactly one output value. This documents the invariant that the iterator yields a single | ||
| // result, catching any future filter changes that accidentally produce multiple outputs. | ||
| func TestApplyJqSchema_SingleOutputContract(t *testing.T) { | ||
| require := require.New(t) | ||
|
|
||
| inputs := []interface{}{ | ||
| map[string]interface{}{"name": "test", "count": 42}, | ||
| []interface{}{map[string]interface{}{"id": 1}}, | ||
| map[string]interface{}{"nested": map[string]interface{}{"a": []interface{}{1, 2, 3}}}, | ||
| } | ||
|
|
||
| for _, input := range inputs { | ||
| iter := jqSchemaCode.RunWithContext(context.Background(), input) | ||
|
|
||
|
Comment on lines
+126
to
+128
|
||
| // First call must return a value | ||
| v, ok := iter.Next() | ||
| require.True(ok, "walk_schema should produce at least one output") | ||
| _, isErr := v.(error) | ||
| require.False(isErr, "walk_schema should not produce an error: %v", v) | ||
|
|
||
| // Second call must signal exhaustion (no more values) | ||
| v2, ok2 := iter.Next() | ||
| require.False(ok2, "walk_schema should produce exactly one output, got second value: %v", v2) | ||
| } | ||
| } | ||
|
|
||
| func TestSavePayload(t *testing.T) { | ||
| // Create temporary directory for test | ||
| baseDir := filepath.Join(os.TempDir(), "test-jq-payloads") | ||
|
|
@@ -517,7 +544,7 @@ func TestPayloadStorage_FilePermissions(t *testing.T) { | |
| // Check file permissions | ||
| fileInfo, err := os.Stat(filePath) | ||
| require.NoError(t, err) | ||
| assert.Equal(t, os.FileMode(0644), fileInfo.Mode().Perm(), "File should have 0644 permissions") | ||
| assert.Equal(t, os.FileMode(0600), fileInfo.Mode().Perm(), "File should have 0600 permissions") | ||
| } | ||
|
|
||
| // TestPayloadStorage_DefaultSessionID verifies behavior when session ID is empty | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hardcoding the gojq version in logs tends to become stale as dependencies are upgraded. Consider removing the version from this message (or sourcing it from build/module metadata) so the log stays accurate without needing manual updates.