Mirra/main.py at main · Sparshr04/Mirra · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
#!/usr/bin/env python3
"""
Main orchestration script for the Mirra Semantic 3D Reconstruction Pipeline.
Architecture: DUSt3R + SAM 2 + TSDF
"""

import gc
import os
import sys
import time
import traceback

import torch
import hydra
from omegaconf import DictConfig

# Add src to path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), "src"))

from src.video_utils import get_input_data, extract_frames, ingest_photos
from src.config_presets import apply_preset
from src.geometry_engine_v2 import GeometryEngineV2
from src.semantic_engine import SemanticEngine
from src.fusion_engine import FusionEngine


# ─── MPS Memory Management ──────────────────────────────────────────


def _flush_mps_memory():
    """Aggressively reclaim MPS unified memory on Apple Silicon."""
    gc.collect()
    if hasattr(torch, "mps") and hasattr(torch.mps, "empty_cache"):
        torch.mps.empty_cache()
    elif torch.cuda.is_available():
        torch.cuda.empty_cache()


# ─── Main Orchestrator ──────────────────────────────────────────────


@hydra.main(version_base=None, config_path="./", config_name="config")
def main(cfg: DictConfig):
    pipeline_start = time.time()

    # ─── APPLY PRESET PROFILE ────────────────────────────────────
    cfg = apply_preset(cfg)

    print("=" * 70)
    print("MIRRA — SEMANTIC 3D RECONSTRUCTION PIPELINE")
    preset = cfg.get("preset", "default")
    print(f"Mode: {preset.upper()} (DUSt3R + SAM 2 + TSDF)")
    print("=" * 70)

    project_root = (
        hydra.utils.get_original_cwd()
        if hasattr(hydra.utils, "get_original_cwd")
        else os.getcwd()
    )

    # ─── DATA ROUTING (Photos vs Video) ──────────────────────────────────
    mode, data_path = get_input_data(cfg, project_root)

    if mode == "photos":
        print(f"\n📷 Photo-folder mode detected: '{data_path}'")
        print("   Skipping video extraction. Routing directly to DUSt3R.")
    else:
        print(f"\n🎥 Video mode detected: '{data_path}'")

    # ─── STAGE 0: SHARED FRAME EXTRACTION ─────────────────────────────────
    print("\n[0/3] Preparing frames...")
    try:
        if mode == "photos":
            frames, frames_dir = ingest_photos(data_path, cfg, project_root)
        else:
            frames, frames_dir = extract_frames(data_path, cfg, project_root)

        if not frames:
            print("❌ No frames prepared. Aborting.")
            sys.exit(1)

        print(f"✅ Prepared {len(frames)} frames in {frames_dir}")
    except Exception as e:
        print(f"❌ Frame preparation failed: {e}")
        traceback.print_exc()
        sys.exit(1)

    # ─── STAGE 1: GEOMETRY (DUSt3R) ─────────────────────────────────────
    print("\n[1/3] Running Geometry Engine (DUSt3R)...")
    try:
        geo_engine = GeometryEngineV2(cfg)

        # Run DUSt3R on the prepared frames
        result = geo_engine.run_inference(frames)
        geo_engine.save_outputs(result, frames)

        # Free Memory
        geo_engine.unload_model()
        del geo_engine, result
        _flush_mps_memory()
        print("  🧹 DUSt3R fully unloaded, memory freed for SAM 2")

    except Exception as e:
        print(f"❌ Geometry stage failed: {e}")
        traceback.print_exc()
        sys.exit(1)

    # ─── STAGE 2: SEMANTICS (SAM 2) ─────────────────────────────────────
    # frames_dir was already populated in Stage 0 — SAM 2 reuses it directly.
    print("\n[2/3] Running Semantic Engine (SAM 2)...")
    try:
        sem_engine = SemanticEngine(cfg)
        output_masks, sem_frames_dir = sem_engine.process_input(mode, data_path)
        sem_engine.save_outputs(output_masks, sem_frames_dir)

        # Free Memory
        sem_engine.unload_all_models()
        del sem_engine, output_masks
        _flush_mps_memory()
        print("  🧹 SAM 2 fully unloaded, memory freed for TSDF")

    except Exception as e:
        print(f"❌ Semantics stage failed: {e}")
        traceback.print_exc()
        sys.exit(1)

    # ─── STAGE 3: TSDF FUSION ──────────────────────────────────────────
    print("\n[3/3] Running Fusion Engine (TSDF + Semantic Vote)...")
    try:
        fusion_engine = FusionEngine(cfg)
        fusion_engine.run()
        print("✅ Fusion stage complete.")

        # Free Memory
        del fusion_engine
        _flush_mps_memory()

    except Exception as e:
        print(f"❌ Fusion stage failed: {e}")
        traceback.print_exc()
        sys.exit(1)

    # ─── SUMMARY ───────────────────────────────────────────────────────
    elapsed = time.time() - pipeline_start
    final_dir = os.path.join(project_root, "outputs", "final")

    print("\n" + "=" * 70)
    print(f"PIPELINE COMPLETE — {elapsed:.1f}s total ({preset} mode)")
    print("=" * 70)
    print("\nOutputs:")
    print(f"  • Semantic Point Cloud: {final_dir}/semantic_world_clean.ply")
    print(f"  • Label Map:            {final_dir}/label_map.json")
    print(f"  • Poses Archive:        outputs/geometry/poses.npz")
    print("\nTo explore your world, run:")
    print(
        f"  uv run python tools/interactive_viewer.py {final_dir}/semantic_world_clean.ply"
    )
    print()


if __name__ == "__main__":
    main()