Skip to content

Fix Docker postgres build #45

Fix Docker postgres build

Fix Docker postgres build #45

Workflow file for this run

name: Build Docker Image
on:
pull_request
permissions:
contents: read
jobs:
docker-build:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4
- name: Free up disk space
run: |
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force
sudo docker builder prune -a --force
df -h
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Extract metadata
id: meta
uses: docker/metadata-action@v5
with:
images: ghcr.io/${{ github.repository_owner }}/morphik-core
tags: |
type=ref,event=pr
type=sha,prefix=pr-{{branch}}-
- name: Build Docker image
uses: docker/build-push-action@v5
with:
context: .
file: ./dockerfile
push: false
load: true
tags: |
${{ steps.meta.outputs.tags }}
morphik-core:test
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=gha
# Remove cache-to for PR builds to save disk space
- name: Test Docker container
run: |
# Use the local test tag instead of the registry tag
IMAGE_TAG="morphik-core:test"
echo "Testing image: $IMAGE_TAG"
# Create a minimal config file for testing
cat > morphik.toml.test << 'EOF'
[api]
host = "0.0.0.0"
port = 8000
reload = true
[auth]
jwt_algorithm = "HS256"
dev_mode = true # Enabled by default for easier local development
dev_entity_id = "dev_user" # Default dev user ID
dev_entity_type = "developer" # Default dev entity type
dev_permissions = ["read", "write", "admin"] # Default dev permissions
#### Registered models
[registered_models]
# OpenAI models
openai_gpt4 = { model_name = "gpt-4" }
openai_gpt4-1 = { model_name = "gpt-4.1" }
openai_gpt4o = { model_name = "gpt-4o" }
# Embedding models
openai_embedding = { model_name = "text-embedding-3-small" }
openai_embedding_large = { model_name = "text-embedding-3-large" }
#### Component configurations ####
[agent]
model = "openai_gpt4-1"
[completion]
model = "openai_gpt4o"
default_max_tokens = "1000"
default_temperature = 0.5
[document_analysis]
model = "openai_gpt4-1"
[database]
provider = "postgres"
# Connection pool settings
pool_size = 10 # Maximum number of connections in the pool
max_overflow = 15 # Maximum number of connections that can be created beyond pool_size
pool_recycle = 3600 # Time in seconds after which a connection is recycled (1 hour)
pool_timeout = 10 # Seconds to wait for a connection from the pool
pool_pre_ping = true # Check connection viability before using it from the pool
max_retries = 3 # Number of retries for database operations
retry_delay = 1.0 # Initial delay between retries in seconds
[embedding]
model = "openai_embedding" # Reference to registered model
dimensions = 1536
similarity_metric = "cosine"
[parser]
chunk_size = 6000
chunk_overlap = 300
use_unstructured_api = false
use_contextual_chunking = false
contextual_chunking_model = "openai_gpt4-1" # Reference to a key in registered_models
[parser.vision]
model = "openai_gpt4-1" # Reference to a key in registered_models
frame_sample_rate = -1 # Set to -1 to disable frame captioning
[reranker]
use_reranker = true
provider = "flag"
model_name = "BAAI/bge-reranker-large"
query_max_length = 256
passage_max_length = 512
use_fp16 = true
device = "cpu" # use "cpu" if on docker and using a mac, "cuda" if cuda enabled device
[storage]
provider = "local"
storage_path = "./storage"
# [storage]
# provider = "aws-s3"
# region = "us-east-2"
# bucket_name = "morphik-s3-storage"
[vector_store]
provider = "pgvector"
[rules]
model = "openai_gpt4-1"
batch_size = 4096
[morphik]
enable_colpali = true
mode = "self_hosted" # "cloud" or "self_hosted"
api_domain = "api.morphik.ai" # API domain for cloud URIs
[redis]
host = "redis" # use "redis" for docker
port = 6379
[graph]
model = "openai_gpt4-1"
enable_entity_resolution = true
# Start container in detached mode with config mounted
CONTAINER_ID=$(docker run -d -p 8000:8000 \
-e POSTGRES_URI="postgresql://morphik:morphik@localhost:5432/morphik" \
-v "$(pwd)/morphik.toml.test:/app/morphik.toml" \
"$IMAGE_TAG")
echo "Started container: $CONTAINER_ID"
# Wait for server to be ready with 60 second timeout
timeout=60
interval=2
elapsed=0
echo "Waiting for server to be ready..."
while [ $elapsed -lt $timeout ]; do
if curl -f -s http://localhost:8000/ping > /dev/null 2>&1; then
echo "✅ Server is responding to /ping endpoint"
break
fi
echo "⏳ Waiting for server... (${elapsed}s/${timeout}s)"
sleep $interval
elapsed=$((elapsed + interval))
done
# Check if we timed out
if [ $elapsed -ge $timeout ]; then
echo "❌ Server failed to respond within ${timeout} seconds"
echo "Container logs:"
docker logs "$CONTAINER_ID"
docker stop "$CONTAINER_ID"
docker rm "$CONTAINER_ID"
exit 1
fi
# Verify the response is actually 200
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping)
if [ "$HTTP_CODE" = "200" ]; then
echo "✅ Health check passed - /ping returned HTTP $HTTP_CODE"
else
echo "❌ Health check failed - /ping returned HTTP $HTTP_CODE"
docker logs "$CONTAINER_ID"
docker stop "$CONTAINER_ID"
docker rm "$CONTAINER_ID"
exit 1
fi
# Clean up
echo "🧹 Cleaning up container"
docker stop "$CONTAINER_ID"
docker rm "$CONTAINER_ID"
echo "✅ Test completed successfully"