1+ name : Build Docker Image
2+
3+ on :
4+ pull_request
5+
6+ permissions :
7+ contents : read
8+
9+ jobs :
10+ docker-build :
11+ runs-on : ubuntu-latest
12+
13+ steps :
14+ - name : Checkout repository
15+ uses : actions/checkout@v4
16+
17+ - name : Free up disk space
18+ run : |
19+ sudo rm -rf /usr/share/dotnet
20+ sudo rm -rf /usr/local/lib/android
21+ sudo rm -rf /opt/ghc
22+ sudo rm -rf /opt/hostedtoolcache/CodeQL
23+ sudo docker image prune --all --force
24+ sudo docker builder prune -a --force
25+ df -h
26+
27+ - name : Set up Docker Buildx
28+ uses : docker/setup-buildx-action@v3
29+
30+ - name : Extract metadata
31+ id : meta
32+ uses : docker/metadata-action@v5
33+ with :
34+ images : ghcr.io/${{ github.repository_owner }}/morphik-core
35+ tags : |
36+ type=ref,event=pr
37+ type=sha,prefix=pr-{{branch}}-
38+
39+ - name : Build Docker image
40+ uses : docker/build-push-action@v5
41+ with :
42+ context : .
43+ file : ./dockerfile
44+ push : false
45+ load : true
46+ tags : |
47+ ${{ steps.meta.outputs.tags }}
48+ morphik-core:test
49+ labels : ${{ steps.meta.outputs.labels }}
50+ cache-from : type=gha
51+ # Remove cache-to for PR builds to save disk space
52+
53+ - name : Test Docker container
54+ run : |
55+ # Use the local test tag instead of the registry tag
56+ IMAGE_TAG="morphik-core:test"
57+ echo "Testing image: $IMAGE_TAG"
58+
59+ # Create a minimal config file for testing
60+ cat > morphik.toml.test << 'EOF'
61+ [api]
62+ host = "0.0.0.0"
63+ port = 8000
64+ reload = true
65+
66+ [auth]
67+ jwt_algorithm = "HS256"
68+ dev_mode = true # Enabled by default for easier local development
69+ dev_entity_id = "dev_user" # Default dev user ID
70+ dev_entity_type = "developer" # Default dev entity type
71+ dev_permissions = ["read", "write", "admin"] # Default dev permissions
72+
73+ #### Registered models
74+ [registered_models]
75+
76+ # OpenAI models
77+ openai_gpt4 = { model_name = "gpt-4" }
78+ openai_gpt4-1 = { model_name = "gpt-4.1" }
79+ openai_gpt4o = { model_name = "gpt-4o" }
80+
81+ # Embedding models
82+ openai_embedding = { model_name = "text-embedding-3-small" }
83+ openai_embedding_large = { model_name = "text-embedding-3-large" }
84+
85+ #### Component configurations ####
86+
87+ [agent]
88+ model = "openai_gpt4-1"
89+
90+ [completion]
91+ model = "openai_gpt4o"
92+ default_max_tokens = "1000"
93+ default_temperature = 0.5
94+
95+ [document_analysis]
96+ model = "openai_gpt4-1"
97+
98+ [database]
99+ provider = "postgres"
100+ # Connection pool settings
101+ pool_size = 10 # Maximum number of connections in the pool
102+ max_overflow = 15 # Maximum number of connections that can be created beyond pool_size
103+ pool_recycle = 3600 # Time in seconds after which a connection is recycled (1 hour)
104+ pool_timeout = 10 # Seconds to wait for a connection from the pool
105+ pool_pre_ping = true # Check connection viability before using it from the pool
106+ max_retries = 3 # Number of retries for database operations
107+ retry_delay = 1.0 # Initial delay between retries in seconds
108+
109+ [embedding]
110+ model = "openai_embedding" # Reference to registered model
111+ dimensions = 1536
112+ similarity_metric = "cosine"
113+
114+ [parser]
115+ chunk_size = 6000
116+ chunk_overlap = 300
117+ use_unstructured_api = false
118+ use_contextual_chunking = false
119+ contextual_chunking_model = "openai_gpt4-1" # Reference to a key in registered_models
120+
121+ [parser.vision]
122+ model = "openai_gpt4-1" # Reference to a key in registered_models
123+ frame_sample_rate = -1 # Set to -1 to disable frame captioning
124+
125+ [reranker]
126+ use_reranker = true
127+ provider = "flag"
128+ model_name = "BAAI/bge-reranker-large"
129+ query_max_length = 256
130+ passage_max_length = 512
131+ use_fp16 = true
132+ device = "cpu" # use "cpu" if on docker and using a mac, "cuda" if cuda enabled device
133+
134+ [storage]
135+ provider = "local"
136+ storage_path = "./storage"
137+
138+ # [storage]
139+ # provider = "aws-s3"
140+ # region = "us-east-2"
141+ # bucket_name = "morphik-s3-storage"
142+
143+ [vector_store]
144+ provider = "pgvector"
145+
146+ [rules]
147+ model = "openai_gpt4-1"
148+ batch_size = 4096
149+
150+ [morphik]
151+ enable_colpali = true
152+ mode = "self_hosted" # "cloud" or "self_hosted"
153+ api_domain = "api.morphik.ai" # API domain for cloud URIs
154+
155+ [redis]
156+ host = "redis" # use "redis" for docker
157+ port = 6379
158+
159+ [graph]
160+ model = "openai_gpt4-1"
161+ enable_entity_resolution = true
162+
163+ # Start container in detached mode with config mounted
164+ CONTAINER_ID=$(docker run -d -p 8000:8000 \
165+ -e POSTGRES_URI="postgresql://morphik:morphik@localhost:5432/morphik" \
166+ -v "$(pwd)/morphik.toml.test:/app/morphik.toml" \
167+ "$IMAGE_TAG")
168+
169+ echo "Started container: $CONTAINER_ID"
170+
171+ # Wait for server to be ready with 60 second timeout
172+ timeout=60
173+ interval=2
174+ elapsed=0
175+
176+ echo "Waiting for server to be ready..."
177+ while [ $elapsed -lt $timeout ]; do
178+ if curl -f -s http://localhost:8000/ping > /dev/null 2>&1; then
179+ echo "✅ Server is responding to /ping endpoint"
180+ break
181+ fi
182+
183+ echo "⏳ Waiting for server... (${elapsed}s/${timeout}s)"
184+ sleep $interval
185+ elapsed=$((elapsed + interval))
186+ done
187+
188+ # Check if we timed out
189+ if [ $elapsed -ge $timeout ]; then
190+ echo "❌ Server failed to respond within ${timeout} seconds"
191+ echo "Container logs:"
192+ docker logs "$CONTAINER_ID"
193+ docker stop "$CONTAINER_ID"
194+ docker rm "$CONTAINER_ID"
195+ exit 1
196+ fi
197+
198+ # Verify the response is actually 200
199+ HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping)
200+ if [ "$HTTP_CODE" = "200" ]; then
201+ echo "✅ Health check passed - /ping returned HTTP $HTTP_CODE"
202+ else
203+ echo "❌ Health check failed - /ping returned HTTP $HTTP_CODE"
204+ docker logs "$CONTAINER_ID"
205+ docker stop "$CONTAINER_ID"
206+ docker rm "$CONTAINER_ID"
207+ exit 1
208+ fi
209+
210+ # Clean up
211+ echo "🧹 Cleaning up container"
212+ docker stop "$CONTAINER_ID"
213+ docker rm "$CONTAINER_ID"
214+ echo "✅ Test completed successfully"
0 commit comments