Skip to content

Commit c0a223e

Browse files
authored
Build + Publish Docker image (#169)
1 parent 4b9ea82 commit c0a223e

2 files changed

Lines changed: 280 additions & 0 deletions

File tree

.github/workflows/docker-build.yml

Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
name: Build Docker Image
2+
3+
on:
4+
pull_request
5+
6+
permissions:
7+
contents: read
8+
9+
jobs:
10+
docker-build:
11+
runs-on: ubuntu-latest
12+
13+
steps:
14+
- name: Checkout repository
15+
uses: actions/checkout@v4
16+
17+
- name: Free up disk space
18+
run: |
19+
sudo rm -rf /usr/share/dotnet
20+
sudo rm -rf /usr/local/lib/android
21+
sudo rm -rf /opt/ghc
22+
sudo rm -rf /opt/hostedtoolcache/CodeQL
23+
sudo docker image prune --all --force
24+
sudo docker builder prune -a --force
25+
df -h
26+
27+
- name: Set up Docker Buildx
28+
uses: docker/setup-buildx-action@v3
29+
30+
- name: Extract metadata
31+
id: meta
32+
uses: docker/metadata-action@v5
33+
with:
34+
images: ghcr.io/${{ github.repository_owner }}/morphik-core
35+
tags: |
36+
type=ref,event=pr
37+
type=sha,prefix=pr-{{branch}}-
38+
39+
- name: Build Docker image
40+
uses: docker/build-push-action@v5
41+
with:
42+
context: .
43+
file: ./dockerfile
44+
push: false
45+
load: true
46+
tags: |
47+
${{ steps.meta.outputs.tags }}
48+
morphik-core:test
49+
labels: ${{ steps.meta.outputs.labels }}
50+
cache-from: type=gha
51+
# Remove cache-to for PR builds to save disk space
52+
53+
- name: Test Docker container
54+
run: |
55+
# Use the local test tag instead of the registry tag
56+
IMAGE_TAG="morphik-core:test"
57+
echo "Testing image: $IMAGE_TAG"
58+
59+
# Create a minimal config file for testing
60+
cat > morphik.toml.test << 'EOF'
61+
[api]
62+
host = "0.0.0.0"
63+
port = 8000
64+
reload = true
65+
66+
[auth]
67+
jwt_algorithm = "HS256"
68+
dev_mode = true # Enabled by default for easier local development
69+
dev_entity_id = "dev_user" # Default dev user ID
70+
dev_entity_type = "developer" # Default dev entity type
71+
dev_permissions = ["read", "write", "admin"] # Default dev permissions
72+
73+
#### Registered models
74+
[registered_models]
75+
76+
# OpenAI models
77+
openai_gpt4 = { model_name = "gpt-4" }
78+
openai_gpt4-1 = { model_name = "gpt-4.1" }
79+
openai_gpt4o = { model_name = "gpt-4o" }
80+
81+
# Embedding models
82+
openai_embedding = { model_name = "text-embedding-3-small" }
83+
openai_embedding_large = { model_name = "text-embedding-3-large" }
84+
85+
#### Component configurations ####
86+
87+
[agent]
88+
model = "openai_gpt4-1"
89+
90+
[completion]
91+
model = "openai_gpt4o"
92+
default_max_tokens = "1000"
93+
default_temperature = 0.5
94+
95+
[document_analysis]
96+
model = "openai_gpt4-1"
97+
98+
[database]
99+
provider = "postgres"
100+
# Connection pool settings
101+
pool_size = 10 # Maximum number of connections in the pool
102+
max_overflow = 15 # Maximum number of connections that can be created beyond pool_size
103+
pool_recycle = 3600 # Time in seconds after which a connection is recycled (1 hour)
104+
pool_timeout = 10 # Seconds to wait for a connection from the pool
105+
pool_pre_ping = true # Check connection viability before using it from the pool
106+
max_retries = 3 # Number of retries for database operations
107+
retry_delay = 1.0 # Initial delay between retries in seconds
108+
109+
[embedding]
110+
model = "openai_embedding" # Reference to registered model
111+
dimensions = 1536
112+
similarity_metric = "cosine"
113+
114+
[parser]
115+
chunk_size = 6000
116+
chunk_overlap = 300
117+
use_unstructured_api = false
118+
use_contextual_chunking = false
119+
contextual_chunking_model = "openai_gpt4-1" # Reference to a key in registered_models
120+
121+
[parser.vision]
122+
model = "openai_gpt4-1" # Reference to a key in registered_models
123+
frame_sample_rate = -1 # Set to -1 to disable frame captioning
124+
125+
[reranker]
126+
use_reranker = true
127+
provider = "flag"
128+
model_name = "BAAI/bge-reranker-large"
129+
query_max_length = 256
130+
passage_max_length = 512
131+
use_fp16 = true
132+
device = "cpu" # use "cpu" if on docker and using a mac, "cuda" if cuda enabled device
133+
134+
[storage]
135+
provider = "local"
136+
storage_path = "./storage"
137+
138+
# [storage]
139+
# provider = "aws-s3"
140+
# region = "us-east-2"
141+
# bucket_name = "morphik-s3-storage"
142+
143+
[vector_store]
144+
provider = "pgvector"
145+
146+
[rules]
147+
model = "openai_gpt4-1"
148+
batch_size = 4096
149+
150+
[morphik]
151+
enable_colpali = true
152+
mode = "self_hosted" # "cloud" or "self_hosted"
153+
api_domain = "api.morphik.ai" # API domain for cloud URIs
154+
155+
[redis]
156+
host = "redis" # use "redis" for docker
157+
port = 6379
158+
159+
[graph]
160+
model = "openai_gpt4-1"
161+
enable_entity_resolution = true
162+
163+
# Start container in detached mode with config mounted
164+
CONTAINER_ID=$(docker run -d -p 8000:8000 \
165+
-e POSTGRES_URI="postgresql://morphik:morphik@localhost:5432/morphik" \
166+
-v "$(pwd)/morphik.toml.test:/app/morphik.toml" \
167+
"$IMAGE_TAG")
168+
169+
echo "Started container: $CONTAINER_ID"
170+
171+
# Wait for server to be ready with 60 second timeout
172+
timeout=60
173+
interval=2
174+
elapsed=0
175+
176+
echo "Waiting for server to be ready..."
177+
while [ $elapsed -lt $timeout ]; do
178+
if curl -f -s http://localhost:8000/ping > /dev/null 2>&1; then
179+
echo "✅ Server is responding to /ping endpoint"
180+
break
181+
fi
182+
183+
echo "⏳ Waiting for server... (${elapsed}s/${timeout}s)"
184+
sleep $interval
185+
elapsed=$((elapsed + interval))
186+
done
187+
188+
# Check if we timed out
189+
if [ $elapsed -ge $timeout ]; then
190+
echo "❌ Server failed to respond within ${timeout} seconds"
191+
echo "Container logs:"
192+
docker logs "$CONTAINER_ID"
193+
docker stop "$CONTAINER_ID"
194+
docker rm "$CONTAINER_ID"
195+
exit 1
196+
fi
197+
198+
# Verify the response is actually 200
199+
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" http://localhost:8000/ping)
200+
if [ "$HTTP_CODE" = "200" ]; then
201+
echo "✅ Health check passed - /ping returned HTTP $HTTP_CODE"
202+
else
203+
echo "❌ Health check failed - /ping returned HTTP $HTTP_CODE"
204+
docker logs "$CONTAINER_ID"
205+
docker stop "$CONTAINER_ID"
206+
docker rm "$CONTAINER_ID"
207+
exit 1
208+
fi
209+
210+
# Clean up
211+
echo "🧹 Cleaning up container"
212+
docker stop "$CONTAINER_ID"
213+
docker rm "$CONTAINER_ID"
214+
echo "✅ Test completed successfully"
Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
name: Publish Docker Image
2+
3+
on:
4+
push:
5+
branches:
6+
- main
7+
8+
permissions:
9+
contents: read
10+
packages: write
11+
12+
jobs:
13+
docker-publish:
14+
runs-on: ubuntu-latest
15+
16+
steps:
17+
- name: Checkout repository
18+
uses: actions/checkout@v4
19+
20+
- name: Debug GitHub context
21+
run: |
22+
echo "Repository: ${{ github.repository }}"
23+
echo "Repository Owner: ${{ github.repository_owner }}"
24+
echo "Actor: ${{ github.actor }}"
25+
echo "Ref: ${{ github.ref }}"
26+
27+
- name: Free up disk space
28+
run: |
29+
sudo rm -rf /usr/share/dotnet
30+
sudo rm -rf /usr/local/lib/android
31+
sudo rm -rf /opt/ghc
32+
sudo rm -rf /opt/hostedtoolcache/CodeQL
33+
sudo docker image prune --all --force
34+
sudo docker builder prune -a --force
35+
df -h
36+
37+
- name: Set up Docker Buildx
38+
uses: docker/setup-buildx-action@v3
39+
40+
- name: Log in to GitHub Container Registry
41+
uses: docker/login-action@v3
42+
with:
43+
registry: ghcr.io
44+
username: ${{ github.actor }}
45+
password: ${{ secrets.GITHUB_TOKEN }}
46+
47+
- name: Extract metadata
48+
id: meta
49+
uses: docker/metadata-action@v5
50+
with:
51+
images: ghcr.io/${{ github.repository_owner }}/morphik-core
52+
tags: |
53+
type=ref,event=branch
54+
type=sha,prefix={{branch}}-
55+
type=raw,value=latest,enable={{is_default_branch}}
56+
57+
- name: Build and push Docker image
58+
uses: docker/build-push-action@v5
59+
with:
60+
context: .
61+
file: ./dockerfile
62+
push: true
63+
tags: ${{ steps.meta.outputs.tags }}
64+
labels: ${{ steps.meta.outputs.labels }}
65+
cache-from: type=gha
66+
cache-to: type=gha,mode=min

0 commit comments

Comments
 (0)