-
Notifications
You must be signed in to change notification settings - Fork 12
Expand file tree
/
Copy pathworldexplorer.py
More file actions
345 lines (289 loc) · 15.5 KB
/
worldexplorer.py
File metadata and controls
345 lines (289 loc) · 15.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
#!/usr/bin/env python
import typer
from typing import Optional
from pathlib import Path
import datetime
import os
import subprocess
# Import the refactored functions from existing modules
from model.scaffold_generation import GenerationMode, run_scaffold_generation
from model.scene_expansion import run_scene_expansion
app = typer.Typer(
name="WorldExplorer",
help="🌍 WorldExplorer - Towards Fully-Navigable 3D Scenes",
add_completion=False
)
def check_and_download_checkpoints():
"""Check for required checkpoints and download them if missing."""
# Check for Video-Depth-Anything checkpoint
vda_checkpoint_path = Path("model/Video-Depth-Anything/checkpoints/video_depth_anything_vits.pth")
if not vda_checkpoint_path.exists():
print("\n📥 Video-Depth-Anything checkpoint not found. Downloading...")
# Download the checkpoint file
download_url = "https://huggingface.co/depth-anything/Video-Depth-Anything-Small/resolve/main/video_depth_anything_vits.pth?download=true"
download_cmd = f'wget -O {vda_checkpoint_path} "{download_url}"'
try:
result = subprocess.run(download_cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
print(f"✅ Successfully downloaded Video-Depth-Anything checkpoint to {vda_checkpoint_path}")
else:
print(f"❌ Error downloading Video-Depth-Anything checkpoint: {result.stderr}")
print("\nPlease download manually from:")
print(f" {download_url}")
print(f"And save to: {vda_checkpoint_path}")
raise typer.Exit(1)
except Exception as e:
print(f"❌ Error downloading Video-Depth-Anything checkpoint: {e}")
print("\nPlease download manually from:")
print(f" {download_url}")
print(f"And save to: {vda_checkpoint_path}")
raise typer.Exit(1)
else:
print(f"✅ Video-Depth-Anything checkpoint found: {vda_checkpoint_path}")
# Check for Depth_Anything_V2 checkpoint
da2_checkpoint_path = Path("model/Depth_Anything_V2/checkpoints/depth_anything_v2_metric_hypersim_vitl.pth")
if not da2_checkpoint_path.exists():
print("\n📥 Depth_Anything_V2 checkpoint not found. Downloading...")
# Download the checkpoint file
download_url = "https://huggingface.co/depth-anything/Depth-Anything-V2-Metric-Hypersim-Large/resolve/main/depth_anything_v2_metric_hypersim_vitl.pth?download=true"
download_cmd = f'wget -O {da2_checkpoint_path} "{download_url}"'
try:
result = subprocess.run(download_cmd, shell=True, capture_output=True, text=True)
if result.returncode == 0:
print(f"✅ Successfully downloaded Depth_Anything_V2 checkpoint to {da2_checkpoint_path}")
else:
print(f"❌ Error downloading Depth_Anything_V2 checkpoint: {result.stderr}")
print("\nPlease download manually from:")
print(f" {download_url}")
print(f"And save to: {da2_checkpoint_path}")
raise typer.Exit(1)
except Exception as e:
print(f"❌ Error downloading Depth_Anything_V2 checkpoint: {e}")
print("\nPlease download manually from:")
print(f" {download_url}")
print(f"And save to: {da2_checkpoint_path}")
raise typer.Exit(1)
else:
print(f"✅ Depth_Anything_V2 checkpoint found: {da2_checkpoint_path}")
@app.command()
def generate(
theme: Optional[str] = typer.Argument(None, help="The theme for generation (e.g., 'Rustic Farmhouse (Wood, Leather, Wool)'). Not used with --custom"),
mode: GenerationMode = typer.Option(GenerationMode.manual, "--mode", "-m", help="Generation mode: fast (single output), automatic (CLIP-based selection), or manual (user selection)"),
translation_scaling_factor: float = typer.Option(3.0, "--translation-scaling", "-t", help="Translation scaling factor for scene expansion"),
skip_expansion: bool = typer.Option(False, "--skip-expansion", help="Skip scene expansion step"),
root_dir: Optional[str] = typer.Option(None, "--root-dir", help="Directory containing original trajectories (uses default if not specified)"),
custom: bool = typer.Option(False, "--custom", "-c", help="Enable custom mode for outdoor/custom scene generation with user-provided prompts"),
num_images_for_vggt: int = typer.Option(40, "--num-images-for-vggt", help="Number of images to be sampled from the global scene memory as input to VGGT in addition to the scaffold images. The higher the number of images, the better the initial pointcloud used for gaussian-splatting initialization. We recommend to set the parameter according to the available GPU memory.")
):
"""Generate scaffold and optionally expand to 3D scene."""
# Check and download checkpoints if needed
check_and_download_checkpoints()
print(f"\n{'='*80}")
print("SCAFFOLD GENERATION")
print(f"{'='*80}")
if custom:
# Custom mode - force manual mode and query user for prompts
if mode != GenerationMode.manual:
print("\n⚠️ Note: Custom mode requires manual selection for best results.")
print(" Switching to manual mode...")
mode = GenerationMode.manual
print("\n🎨 Custom Scene Generation Mode")
print("Please provide 4 prompts for generating a panoramic scene.")
print("Each prompt will generate one of the 4 cardinal directions.")
print("Note: The eye-level camera angle will be automatically added.\n")
custom_prompts = []
for i in range(4):
direction = ["North", "West", "South", "East"][i]
prompt = typer.prompt(f"Prompt for {direction} view")
custom_prompts.append(prompt)
# Query for translation scaling factor if not skipping expansion
if not skip_expansion:
print("\n📏 Translation Scaling Factor")
print("This controls the scale of movement in the 3D scene:")
print(" • Indoor scenes: typically 3")
print(" • Outdoor scenes: typically 10")
translation_scaling_factor = typer.prompt(
"Enter translation scaling factor",
type=float,
default=10.0,
show_default=True
)
print(f"\nMode: manual (required for custom scenes)")
# Run scaffold generation with custom prompts
parent_folder, output_folder, final_folder = run_scaffold_generation(
theme="custom",
mode=mode,
custom=True,
custom_prompts=custom_prompts
)
else:
# Standard mode
if theme is None:
print("❌ Error: Theme is required when not using --custom mode")
raise typer.Exit(1)
print(f"Theme: {theme}")
print(f"Mode: {mode.value}")
# Run scaffold generation
parent_folder, output_folder, final_folder = run_scaffold_generation(theme, mode)
print(f"\n✅ Scaffold generation completed!")
print(f"Images saved in: {final_folder}")
# For fast and automatic modes, continue with scene expansion
if mode in [GenerationMode.fast, GenerationMode.automatic] and not skip_expansion:
print(f"\n{'='*80}")
print("SCENE EXPANSION")
print(f"{'='*80}")
print("\n🚀 Starting scene expansion...")
print("⏱️ This process will take several hours to complete.")
print(" The system will continue running until finished.\n")
work_dir = run_scene_expansion(
final_folder,
translation_scaling_factor,
root_dir=root_dir,
num_images_for_vggt=num_images_for_vggt
)
print(f"\n✅ Scene expansion completed!")
print(f"Results saved in: {work_dir}")
elif mode == GenerationMode.manual and not skip_expansion:
print(f"\n{'='*80}")
print("MANUAL MODE - Scene Expansion Paused")
print(f"{'='*80}")
print("\n⏸️ Manual selection required before continuing to 3D expansion.")
print(f"\n📋 Next Steps:")
print(f"1. Review the generated variations in: {output_folder}")
print(f"2. Copy your selected images (001, 003, 005, 007) to: {final_folder}")
print(f"3. Run scene expansion with:")
print(f" python worldexplorer.py expand '{final_folder}'")
@app.command()
def expand(
input_folder: str = typer.Argument(..., help="Path to folder containing 8 images (000.png to 007.png)"),
translation_scaling_factor: Optional[float] = typer.Option(None, "--translation-scaling", "-t", help="Translation scaling factor for scene expansion"),
root_dir: Optional[str] = typer.Option(None, "--root-dir", help="Directory containing original trajectories (uses default if not specified)"),
num_images_for_vggt: int = typer.Option(40, "--num-images-for-vggt", help="Number of images to be sampled from the global scene memory as input to VGGT in addition to the scaffold images. The higher the number of images, the better the initial pointcloud used for gaussian-splatting initialization. We recommend to set the parameter according to the available GPU memory.")
):
"""Expand an existing scaffold to 3D scene."""
# Verify input folder exists
input_path = Path(input_folder)
if not input_path.exists():
print(f"❌ Error: Input folder '{input_folder}' does not exist!")
raise typer.Exit(1)
# Check if all required images exist
missing_images = []
for i in range(8):
img_path = input_path / f"00{i}.png"
if not img_path.exists():
missing_images.append(f"00{i}.png")
if missing_images:
print(f"❌ Error: Missing required images in {input_folder}:")
for img in missing_images:
print(f" - {img}")
print("\nPlease ensure all 8 images (000.png to 007.png) are present.")
raise typer.Exit(1)
print(f"\n✅ Found all 8 required images in: {input_folder}")
# Check and download checkpoints if needed
check_and_download_checkpoints()
# If translation scaling factor not provided, prompt user
if translation_scaling_factor is None:
print("\n📏 Translation Scaling Factor")
print("This controls the scale of movement in the 3D scene:")
print(" • Indoor scenes: typically 3")
print(" • Outdoor scenes: typically 10")
translation_scaling_factor = typer.prompt(
"Enter translation scaling factor",
type=float,
default=3.0,
show_default=True
)
print(f"Translation scaling factor: {translation_scaling_factor}")
print("\n🚀 Starting scene expansion...")
print("⏱️ This process will take several hours to complete.")
print(" The system will continue running until finished.\n")
work_dir = run_scene_expansion(
str(input_path),
translation_scaling_factor,
root_dir=root_dir,
num_images_for_vggt=num_images_for_vggt
)
print(f"\n✅ Scene expansion completed!")
print(f"Results saved in: {work_dir}")
@app.command()
def scaffold(
theme: Optional[str] = typer.Argument(None, help="The theme for generation (e.g., 'Rustic Farmhouse (Wood, Leather, Wool)'). Not used with --custom"),
mode: GenerationMode = typer.Option(GenerationMode.manual, "--mode", "-m", help="Generation mode: fast (single output), automatic (CLIP-based selection), or manual (user selection)"),
custom: bool = typer.Option(False, "--custom", "-c", help="Enable custom mode for outdoor/custom scene generation with user-provided prompts")
):
"""Generate only the scaffold (no scene expansion)."""
# Check and download checkpoints if needed
check_and_download_checkpoints()
print(f"\n{'='*80}")
print("SCAFFOLD GENERATION ONLY")
print(f"{'='*80}")
if custom:
# Custom mode - force manual mode
if mode != GenerationMode.manual:
print("\n⚠️ Note: Custom mode requires manual selection for best results.")
print(" Switching to manual mode...")
mode = GenerationMode.manual
print("\n🎨 Custom Scene Generation Mode")
print("Please provide 4 prompts for generating a panoramic scene.")
print("Each prompt will generate one of the 4 cardinal directions.")
print("Note: The eye-level camera angle will be automatically added.\n")
custom_prompts = []
for i in range(4):
direction = ["North", "West", "South", "East"][i]
prompt = typer.prompt(f"Prompt for {direction} view")
custom_prompts.append(prompt)
parent_folder, output_folder, final_folder = run_scaffold_generation(
theme="custom",
mode=mode,
custom=True,
custom_prompts=custom_prompts
)
else:
if theme is None:
print("❌ Error: Theme is required when not using --custom mode")
raise typer.Exit(1)
parent_folder, output_folder, final_folder = run_scaffold_generation(theme, mode)
print(f"\n✅ Scaffold generation completed!")
print(f"Output folder: {output_folder}")
print(f"Final images: {final_folder}")
print(f"\nTo expand this scaffold later, run:")
print(f" python worldexplorer.py expand '{final_folder}'")
@app.command()
def info():
"""Display information about WorldExplorer."""
print(f"\n{'='*80}")
print("🌍 WORLDEXPLORER - AI-Powered 3D Scene Generation")
print(f"{'='*80}")
print("\n📚 Overview:")
print("WorldExplorer transforms text descriptions into immersive 3D environments")
print("through a two-stage process:")
print(" 1. Scaffold Generation - Creates panoramic images from text")
print(" 2. Scene Expansion - Converts panoramas into full 3D scenes")
print("\n📋 Generation Modes:")
print(" • fast - Quick generation with single output per view")
print(" • automatic - Multiple variations with AI-based selection")
print(" • manual - Generate variations for human curation")
print("\n🎯 Commands:")
print("\n1. Full Pipeline (indoor scenes):")
print(" python worldexplorer.py generate 'Modern Office' --mode fast")
print("\n2. Custom/Outdoor Scenes (manual mode only):")
print(" python worldexplorer.py generate --custom")
print(" # You'll be prompted for 4 custom prompts (N, W, S, E)")
print(" # and translation scaling factor (3 for indoor, 10 for outdoor)")
print("\n3. Scaffold Only (no 3D expansion):")
print(" python worldexplorer.py scaffold 'Rustic Farmhouse' --mode manual")
print(" python worldexplorer.py scaffold --custom")
print("\n4. Expand Existing Panorama:")
print(" python worldexplorer.py expand './panoramas/[name]/[timestamp]/final'")
print("\n5. Manual Curation Workflow:")
print(" python worldexplorer.py scaffold 'Beach House' --mode manual")
print(" # Manually select best inpainted images")
print(" python worldexplorer.py expand './panoramas/[name]/[timestamp]/final'")
print("\n⏱️ Time Estimates:")
print(" • Scaffold generation: 5 minutes")
print(" • Scene expansion: 6-7 hours")
print("\n💡 Tips:")
print(" • Manual mode often produces better results")
print(f"{'='*80}\n")
if __name__ == "__main__":
app()