Skip to content

Commit d471fcf

Browse files
committed
fix(googlephotos): properly transition discovered assets to discarded state
Assets discovered during Google Photos takeout processing were being left in PENDING state when they should have been transitioned to DISCARDED. Two scenarios were affected: 1. Duplicate files in the same directory: When a file with the same base name was found in a directory, RecordAssetDiscardedImmediately was being called. However, the asset was already discovered and in PENDING state, so RecordAssetDiscarded should be used instead to properly transition it. 2. Files without JSON metadata: When --include-unmatched is false, files without matching JSON metadata were not being transitioned to DISCARDED state, leaving them stuck in PENDING. This resulted in assets being reported as "pending" at the end of upload, even though they were correctly identified as duplicates or filtered.
1 parent 2ae142c commit d471fcf

2 files changed

Lines changed: 124 additions & 2 deletions

File tree

adapters/googlePhotos/googlephotos.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -229,7 +229,8 @@ func (toc *TakeoutCmd) passOneFsWalk(ctx context.Context, w fs.FS) error {
229229
toc.fileTracker.Store(key, tracking) // to.fileTracker[key] = tracking
230230

231231
if _, ok := dirCatalog.unMatchedFiles[base]; ok {
232-
toc.processor.RecordAssetDiscardedImmediately(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory")
232+
// Asset was already discovered above, so transition from pending to discarded
233+
toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(w, name), finfo.Size(), fileevent.DiscardedLocalDuplicate, "duplicated in the directory")
233234
return nil
234235
}
235236

@@ -323,8 +324,11 @@ func (toc *TakeoutCmd) solvePuzzle(ctx context.Context) error {
323324
if toc.KeepJSONLess {
324325
a := toc.makeAsset(ctx, dir, i, nil)
325326
cat.matchedFiles[f] = a
326-
delete(cat.unMatchedFiles, f)
327+
} else {
328+
// Asset was discovered but has no JSON metadata and --include-unmatched is not set
329+
toc.processor.RecordAssetDiscarded(ctx, fshelper.FSName(i.fsys, path.Join(dir, i.base)), int64(i.length), fileevent.DiscardedFiltered, "no matching JSON metadata (use --include-unmatched to import)")
327330
}
331+
delete(cat.unMatchedFiles, f)
328332
}
329333
}
330334
}

internal/fileprocessor/processor_test.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -435,6 +435,124 @@ func TestSummary(t *testing.T) {
435435
}
436436
}
437437

438+
// TestDiscoverThenDiscardTransition verifies that assets discovered and later
439+
// determined to be discardable (e.g., duplicates found during processing, files
440+
// without required metadata) are properly transitioned from PENDING to DISCARDED.
441+
//
442+
// This test documents a bug fix where assets were being discovered but not
443+
// properly transitioned to a final state when discarded during later processing
444+
// stages, leaving them stuck in PENDING state.
445+
//
446+
// The key insight is:
447+
// - RecordAssetDiscardedImmediately: For assets discarded AT discovery time
448+
// (before being added to pending)
449+
// - RecordAssetDiscarded: For assets already discovered that are discarded
450+
// during later processing (must transition from PENDING to DISCARDED)
451+
func TestDiscoverThenDiscardTransition(t *testing.T) {
452+
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
453+
tracker := assettracker.New()
454+
recorder := fileevent.NewRecorder(logger)
455+
fp := New(tracker, recorder)
456+
457+
ctx := context.Background()
458+
459+
// Scenario 1: Duplicate file found during processing
460+
// In Google Photos takeout, the same image can appear in multiple zip parts
461+
// with different paths. Both are discovered, then one is detected as duplicate.
462+
file1 := newTestFile("/photos/album/image.jpg")
463+
fp.RecordAssetDiscovered(ctx, file1, 1024, fileevent.DiscoveredImage)
464+
465+
// Same image in a different location (e.g., year folder) - different full path
466+
file1InYear := newTestFile("/photos/2023/image.jpg")
467+
fp.RecordAssetDiscovered(ctx, file1InYear, 1024, fileevent.DiscoveredImage)
468+
469+
// The year folder copy is detected as duplicate during processing
470+
// Must use RecordAssetDiscarded (NOT RecordAssetDiscardedImmediately)
471+
// because the asset was already discovered and is in PENDING state
472+
fp.RecordAssetDiscarded(ctx, file1InYear, 1024, fileevent.DiscardedLocalDuplicate, "duplicate in directory")
473+
474+
// Scenario 2: File discovered but later filtered (e.g., no JSON metadata)
475+
fileNoMeta := newTestFile("/photos/year/orphan.jpg")
476+
fp.RecordAssetDiscovered(ctx, fileNoMeta, 2048, fileevent.DiscoveredImage)
477+
478+
// During puzzle solving, file has no matching JSON and --include-unmatched is false
479+
// Must use RecordAssetDiscarded to properly transition from PENDING
480+
fp.RecordAssetDiscarded(ctx, fileNoMeta, 2048, fileevent.DiscardedFiltered, "no matching JSON metadata")
481+
482+
// Process the first (non-duplicate) file normally
483+
fp.RecordAssetProcessed(ctx, file1, 1024, fileevent.ProcessedUploadSuccess)
484+
485+
// Verify final state - NO assets should be pending
486+
counters := fp.GetAssetCounters()
487+
if counters.Pending != 0 {
488+
t.Errorf("Expected 0 pending assets, got %d - assets not properly transitioned", counters.Pending)
489+
}
490+
if counters.Processed != 1 {
491+
t.Errorf("Expected 1 processed asset, got %d", counters.Processed)
492+
}
493+
if counters.Discarded != 2 {
494+
t.Errorf("Expected 2 discarded assets, got %d", counters.Discarded)
495+
}
496+
497+
// Finalize should succeed with no pending assets
498+
err := fp.Finalize(ctx)
499+
if err != nil {
500+
t.Errorf("Finalize should succeed when all assets reach final state, got error: %v", err)
501+
}
502+
}
503+
504+
// TestDiscardedImmediatelyVsDiscarded clarifies the difference between
505+
// RecordAssetDiscardedImmediately and RecordAssetDiscarded.
506+
func TestDiscardedImmediatelyVsDiscarded(t *testing.T) {
507+
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
508+
tracker := assettracker.New()
509+
recorder := fileevent.NewRecorder(logger)
510+
fp := New(tracker, recorder)
511+
512+
ctx := context.Background()
513+
514+
// RecordAssetDiscardedImmediately: Asset is rejected at discovery time
515+
// (e.g., banned filename pattern, excluded extension)
516+
// This creates the asset directly in DISCARDED state
517+
bannedFile := newTestFile("/photos/.DS_Store.jpg")
518+
fp.RecordAssetDiscardedImmediately(ctx, bannedFile, 100, fileevent.DiscardedBanned, "banned filename")
519+
520+
// Verify it went directly to discarded, never pending
521+
counters := fp.GetAssetCounters()
522+
if counters.Pending != 0 {
523+
t.Errorf("Immediately discarded asset should not be pending, got %d pending", counters.Pending)
524+
}
525+
if counters.Discarded != 1 {
526+
t.Errorf("Expected 1 discarded, got %d", counters.Discarded)
527+
}
528+
529+
// RecordAssetDiscarded: Asset was discovered, then later discarded during processing
530+
// First it must be discovered (goes to PENDING)
531+
laterDiscarded := newTestFile("/photos/image.jpg")
532+
fp.RecordAssetDiscovered(ctx, laterDiscarded, 512, fileevent.DiscoveredImage)
533+
534+
counters = fp.GetAssetCounters()
535+
if counters.Pending != 1 {
536+
t.Errorf("Discovered asset should be pending, got %d", counters.Pending)
537+
}
538+
539+
// Then it's discarded (transitions PENDING -> DISCARDED)
540+
fp.RecordAssetDiscarded(ctx, laterDiscarded, 512, fileevent.DiscardedLocalDuplicate, "duplicate")
541+
542+
counters = fp.GetAssetCounters()
543+
if counters.Pending != 0 {
544+
t.Errorf("After RecordAssetDiscarded, asset should not be pending, got %d", counters.Pending)
545+
}
546+
if counters.Discarded != 2 {
547+
t.Errorf("Expected 2 discarded (immediate + later), got %d", counters.Discarded)
548+
}
549+
550+
// Verify finalization succeeds
551+
if err := fp.Finalize(ctx); err != nil {
552+
t.Errorf("Finalize should succeed: %v", err)
553+
}
554+
}
555+
438556
func TestCompleteWorkflow(t *testing.T) {
439557
logger := slog.New(slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{Level: slog.LevelError}))
440558
tracker := assettracker.New()

0 commit comments

Comments
 (0)