Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ Morphik is an AI-native toolset for visually rich documents and multimodal data.
- **Services**: Business logic in `core/services/`
- **Routes**: API endpoints in `core/routes/`
- **Vector Store**: Multiple providers (pgvector, TurboPuffer) in `core/vector_store/`
- **Embedding**: Support for multiple providers (OpenAI, Ollama, Azure) in `core/embedding/`
- **Embedding**: Support for multiple providers (OpenAI, Ollama, Azure, MiniMax) in `core/embedding/`
- **Parser**: Document processing and chunking in `core/parser/`

### Frontend (TypeScript/Next.js)
Expand Down Expand Up @@ -95,7 +95,7 @@ docker compose down -v # Reset all data
- Connection pooling and retry mechanisms

### AI Model Integration
- Abstracted model interface supporting OpenAI, Anthropic, Google, Ollama, Azure
- Abstracted model interface supporting OpenAI, Anthropic, Google, Ollama, Azure, MiniMax
- Vision-capable models for multimodal processing
- Embedding models for vector similarity search
- Completion models for chat and generation
Expand Down
1 change: 1 addition & 0 deletions DOCKER.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ Create a `.env` file to customize these settings:
```bash
JWT_SECRET_KEY=your-secure-key-here # Important: Change in production
OPENAI_API_KEY=sk-... # Only if using OpenAI
MINIMAX_API_KEY=... # Only if using MiniMax (embo-01 embedding)
HOST=0.0.0.0 # Leave as is for Docker
PORT=8000 # Change if needed
```
Expand Down
3 changes: 2 additions & 1 deletion core/embedding/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from core.embedding.base_embedding_model import BaseEmbeddingModel
from core.embedding.colpali_embedding_model import ColpaliEmbeddingModel
from core.embedding.litellm_embedding import LiteLLMEmbeddingModel
from core.embedding.minimax_embedding import MiniMaxEmbeddingModel

__all__ = ["BaseEmbeddingModel", "LiteLLMEmbeddingModel", "ColpaliEmbeddingModel"]
__all__ = ["BaseEmbeddingModel", "LiteLLMEmbeddingModel", "ColpaliEmbeddingModel", "MiniMaxEmbeddingModel"]
123 changes: 123 additions & 0 deletions core/embedding/minimax_embedding.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
"""MiniMax embedding model using the native MiniMax Embeddings API.

MiniMax's embo-01 model uses a different request/response format from OpenAI:
- Request: {"model": "embo-01", "texts": [...], "type": "db"|"query"}
- Response: {"vectors": [[...]], "total_tokens": N}

The ``type`` parameter distinguishes between embedding documents for storage ("db")
and embedding queries for search ("query"), which improves retrieval quality.
"""

import logging
import os
from typing import List, Union

import httpx

from core.config import get_settings
from core.embedding.base_embedding_model import BaseEmbeddingModel
from core.models.chunk import Chunk

logger = logging.getLogger(__name__)

MINIMAX_API_BASE = "https://api.minimax.io/v1"


class MiniMaxEmbeddingModel(BaseEmbeddingModel):
"""Embedding model using the native MiniMax embedding API (embo-01)."""

def __init__(self, model_key: str):
settings = get_settings()
self.model_key = model_key

if not hasattr(settings, "REGISTERED_MODELS") or model_key not in settings.REGISTERED_MODELS:
raise ValueError(f"Model '{model_key}' not found in registered_models configuration")

self.model_config = settings.REGISTERED_MODELS[model_key]
self.model_name = self.model_config.get("model_name", "embo-01")
self.api_base = self.model_config.get("api_base", MINIMAX_API_BASE).rstrip("/")
self.api_key = os.environ.get("MINIMAX_API_KEY", "")
self.dimensions = min(settings.VECTOR_DIMENSIONS, 2000)

if not self.api_key:
raise ValueError("MINIMAX_API_KEY environment variable is not set")

logger.info(
"Initialized MiniMax embedding model: model_key=%s, model=%s, dimensions=%d",
model_key,
self.model_name,
self.dimensions,
)

async def _embed(self, texts: List[str], embed_type: str) -> List[List[float]]:
"""Call the MiniMax embedding API.

Args:
texts: Texts to embed.
embed_type: "db" for document storage, "query" for search queries.

Returns:
List of embedding vectors.
"""
url = f"{self.api_base}/embeddings"
headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
}
payload = {
"model": self.model_name,
"texts": texts,
"type": embed_type,
}

async with httpx.AsyncClient(timeout=60.0) as client:
response = await client.post(url, json=payload, headers=headers)
response.raise_for_status()
data = response.json()

if "vectors" not in data:
base_resp = data.get("base_resp", {})
raise ValueError(
f"MiniMax embedding API error: "
f"status_code={base_resp.get('status_code')}, "
f"status_msg={base_resp.get('status_msg')}"
)

vectors = data["vectors"]

# Validate dimensions
if vectors and len(vectors[0]) != self.dimensions:
logger.warning(
"Embedding dimension mismatch: got %d, expected %d. "
"Update VECTOR_DIMENSIONS in morphik.toml to match.",
len(vectors[0]),
self.dimensions,
)

return vectors

async def embed_for_ingestion(self, chunks: Union[Chunk, List[Chunk]]) -> List[List[float]]:
"""Embed chunks for storage using type='db'."""
if isinstance(chunks, Chunk):
chunks = [chunks]

texts = [chunk.content for chunk in chunks]
if not texts:
return []

# Batch to respect rate limits
batch_size = 50
embeddings: List[List[float]] = []
for i in range(0, len(texts), batch_size):
batch = texts[i : i + batch_size]
batch_embeddings = await self._embed(batch, embed_type="db")
embeddings.extend(batch_embeddings)

return embeddings

async def embed_for_query(self, text: str) -> List[float]:
"""Embed a single query for search using type='query'."""
vectors = await self._embed([text], embed_type="query")
if not vectors:
return [0.0] * self.dimensions
return vectors[0]
11 changes: 9 additions & 2 deletions core/services_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from core.database.postgres_database import PostgresDatabase
from core.embedding.colpali_api_embedding_model import ColpaliApiEmbeddingModel
from core.embedding.litellm_embedding import LiteLLMEmbeddingModel
from core.embedding.minimax_embedding import MiniMaxEmbeddingModel
from core.parser.morphik_parser import MorphikParser
from core.reranker.flag_reranker import FlagReranker
from core.services.document_service import DocumentService
Expand Down Expand Up @@ -91,8 +92,14 @@
use_contextual_chunking=settings.USE_CONTEXTUAL_CHUNKING,
)

embedding_model = LiteLLMEmbeddingModel(model_key=settings.EMBEDDING_MODEL)
logger.info("Initialized LiteLLM embedding model with model key: %s", settings.EMBEDDING_MODEL)
# Use MiniMax embedding model if the registered model has provider="minimax"
_emb_cfg = settings.REGISTERED_MODELS.get(settings.EMBEDDING_MODEL, {})
if _emb_cfg.get("provider") == "minimax":
embedding_model = MiniMaxEmbeddingModel(model_key=settings.EMBEDDING_MODEL)
logger.info("Initialized MiniMax embedding model with model key: %s", settings.EMBEDDING_MODEL)
else:
embedding_model = LiteLLMEmbeddingModel(model_key=settings.EMBEDDING_MODEL)
logger.info("Initialized LiteLLM embedding model with model key: %s", settings.EMBEDDING_MODEL)

completion_model = LiteLLMCompletionModel(model_key=settings.COMPLETION_MODEL)
logger.info("Initialized LiteLLM completion model with model key: %s", settings.COMPLETION_MODEL)
Expand Down
109 changes: 109 additions & 0 deletions core/tests/integration/test_minimax_embedding_integration.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""Integration tests for MiniMaxEmbeddingModel against the live API.

These tests require a valid MINIMAX_API_KEY environment variable.
They are skipped automatically when the key is absent or the API is
unreachable.
"""

import os
import sys
from unittest.mock import MagicMock

# Prevent ImportError when colpali_engine is not installed
for _mod in ("colpali_engine", "colpali_engine.models"):
sys.modules.setdefault(_mod, MagicMock())

import pytest

from core.models.chunk import Chunk

MINIMAX_API_KEY = os.environ.get("MINIMAX_API_KEY", "")
SKIP_REASON = "MINIMAX_API_KEY not set"


def _have_key():
return bool(MINIMAX_API_KEY)


def _create_mock_settings():
"""Create a mock settings object for integration tests."""
from unittest.mock import MagicMock

s = MagicMock()
s.REGISTERED_MODELS = {
"minimax_embedding": {
"model_name": "embo-01",
"provider": "minimax",
}
}
s.VECTOR_DIMENSIONS = 1536
return s


@pytest.fixture
def embedding_model():
"""Create a MiniMaxEmbeddingModel for integration testing."""
if not _have_key():
pytest.skip(SKIP_REASON)

from unittest.mock import patch

with patch("core.embedding.minimax_embedding.get_settings", return_value=_create_mock_settings()):
from core.embedding.minimax_embedding import MiniMaxEmbeddingModel

return MiniMaxEmbeddingModel(model_key="minimax_embedding")


@pytest.mark.integration
@pytest.mark.asyncio
async def test_embed_query_live(embedding_model):
"""Verify embed_for_query returns a 1536-d vector from the live API."""
try:
result = await embedding_model.embed_for_query("What is retrieval augmented generation?")
except Exception as exc:
pytest.skip(f"MiniMax API unreachable: {exc}")

assert isinstance(result, list)
assert len(result) == 1536
# Values should be floats in a reasonable range
assert all(isinstance(v, float) for v in result[:10])


@pytest.mark.integration
@pytest.mark.asyncio
async def test_embed_ingestion_live(embedding_model):
"""Verify embed_for_ingestion returns vectors for document chunks."""
chunks = [
Chunk(content="Morphik is an AI-native document processing system.", metadata={}),
Chunk(content="MiniMax provides large language models and embedding APIs.", metadata={}),
]

try:
result = await embedding_model.embed_for_ingestion(chunks)
except Exception as exc:
pytest.skip(f"MiniMax API unreachable: {exc}")

assert isinstance(result, list)
assert len(result) == 2
for vec in result:
assert len(vec) == 1536


@pytest.mark.integration
@pytest.mark.asyncio
async def test_query_vs_db_embeddings_differ(embedding_model):
"""Query and document embeddings for the same text should differ
because MiniMax uses different encoding for type=query vs type=db."""
text = "artificial intelligence research paper"

try:
query_vec = await embedding_model.embed_for_query(text)
chunk = Chunk(content=text, metadata={})
db_vecs = await embedding_model.embed_for_ingestion(chunk)
except Exception as exc:
pytest.skip(f"MiniMax API unreachable: {exc}")

db_vec = db_vecs[0]
assert len(query_vec) == len(db_vec) == 1536
# The vectors should not be identical (different encoding types)
assert query_vec != db_vec
Loading