Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 11 additions & 1 deletion env.example
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,19 @@ OLLAMA_EMULATING_MODEL_TAG=latest
# ENABLE_TABLE_PROCESSING=true
# ENABLE_EQUATION_PROCESSING=true

### Audio Processing Configuration (requires: pip install raganything[audio])
# WHISPER_MODEL=base # tiny/base/small/medium/large-v3
# WHISPER_LANGUAGE= # Auto-detect if empty. Set to "zh", "en", etc.

### Video Processing Configuration (requires: pip install raganything[video])
### Uses SceneDetect for scene boundaries + VLM for visual description + Whisper for audio
# VIDEO_SCENE_THRESHOLD=27.0 # SceneDetect sensitivity (lower = more scenes)
# VIDEO_MIN_SCENE_DURATION=5.0 # Minimum scene duration in seconds
# VIDEO_MAX_SCENES=50 # Maximum scenes to process per video

### Batch Processing Configuration
# MAX_CONCURRENT_FILES=1
# SUPPORTED_FILE_EXTENSIONS=.pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md
# SUPPORTED_FILE_EXTENSIONS=.pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md,.mp3,.wav,.flac,.m4a,.ogg
# RECURSIVE_FOLDER_PROCESSING=true

### Context Extraction Configuration
Expand Down
6 changes: 6 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ markdown = [
"weasyprint>=60.0",
"pygments>=2.10.0",
]
audio = ["faster-whisper>=1.0.0"]
video = ["scenedetect[opencv]>=0.6.0", "moviepy>=2.0.0", "faster-whisper>=1.0.0", "opencv-python>=4.8.0"]
all = [
"Pillow>=10.0.0",
"reportlab>=4.0.0",
Expand All @@ -48,6 +50,10 @@ all = [
"markdown>=3.4.0",
"weasyprint>=60.0",
"pygments>=2.10.0",
"faster-whisper>=1.0.0",
"scenedetect[opencv]>=0.6.0",
"moviepy>=2.0.0",
"opencv-python>=4.8.0",
]

[project.urls]
Expand Down
36 changes: 36 additions & 0 deletions raganything/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,26 @@
except ImportError:
pass

# Optional: audio modal processor (requires faster-whisper).
try:
from .modalprocessors_audio import (
AudioModalProcessor as AudioModalProcessor,
is_audio_file as is_audio_file,
)
except ImportError:
# faster-whisper not installed; audio processing unavailable.
pass

# Optional: video modal processor (requires scenedetect + moviepy + faster-whisper + opencv).
try:
from .modalprocessors_video import (
VideoModalProcessor as VideoModalProcessor,
is_video_file as is_video_file,
)
except ImportError:
# Video dependencies not installed; video processing unavailable.
pass

# Optional: multilingual prompt manager.
try:
from .prompt_manager import (
Expand Down Expand Up @@ -97,6 +117,22 @@
]
)

if "AudioModalProcessor" in globals():
__all__.extend(
[
"AudioModalProcessor",
"is_audio_file",
]
)

if "VideoModalProcessor" in globals():
__all__.extend(
[
"VideoModalProcessor",
"is_video_file",
]
)

if "set_prompt_language" in globals():
__all__.extend(
[
Expand Down
2 changes: 1 addition & 1 deletion raganything/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ class RAGAnythingConfig:
x.strip()
for x in get_env_value(
"SUPPORTED_FILE_EXTENSIONS",
".pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md",
".pdf,.jpg,.jpeg,.png,.bmp,.tiff,.tif,.gif,.webp,.doc,.docx,.ppt,.pptx,.xls,.xlsx,.txt,.md,.mp3,.wav,.flac,.m4a,.ogg,.wma,.aac,.opus,.mp4,.mov,.webm,.avi,.mkv,.flv,.wmv,.m4v",
str,
).split(",")
]
Expand Down
Loading