-
-
Notifications
You must be signed in to change notification settings - Fork 352
Expand file tree
/
Copy pathdocker-compose.pg.yml
More file actions
129 lines (119 loc) · 3.97 KB
/
Copy pathdocker-compose.pg.yml
File metadata and controls
129 lines (119 loc) · 3.97 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
name: anycrawl-pg
x-common-service: &common-service
networks:
- anycrawl-network
volumes:
- anycrawl-storage:/usr/src/app/storage
x-common-env: &common-env
NODE_ENV: ${NODE_ENV:-production}
ANYCRAWL_NAME: ${ANYCRAWL_NAME:-AnyCrawl}
ANYCRAWL_DOMAIN: ${ANYCRAWL_DOMAIN:-http://localhost:8080}
ANYCRAWL_HEADLESS: ${ANYCRAWL_HEADLESS:-true}
ANYCRAWL_PROXY_URL: ${ANYCRAWL_PROXY_URL:-}
ANYCRAWL_PROXY_CONFIG: ${ANYCRAWL_PROXY_CONFIG:-}
ANYCRAWL_IGNORE_SSL_ERROR: ${ANYCRAWL_IGNORE_SSL_ERROR:-true}
ANYCRAWL_REDIS_URL: redis://redis:6379
ANYCRAWL_API_PORT: ${ANYCRAWL_API_PORT:-8080}
ANYCRAWL_API_AUTH_ENABLED: ${ANYCRAWL_API_AUTH_ENABLED:-true}
ANYCRAWL_API_CREDITS_ENABLED: ${ANYCRAWL_API_CREDITS_ENABLED:-true}
ANYCRAWL_API_DB_TYPE: postgresql
ANYCRAWL_API_DB_CONNECTION: postgres://${POSTGRES_USER:-anycrawl}:${POSTGRES_PASSWORD:-anycrawl}@postgres:5432/${POSTGRES_DB:-anycrawl}
ANYCRAWL_LOCAL_STORAGE_DIR: /usr/src/app/storage
ANYCRAWL_KEEP_ALIVE: ${ANYCRAWL_KEEP_ALIVE:-true}
ANYCRAWL_BROWSER_IDLE_RETIRE_SECS: ${ANYCRAWL_BROWSER_IDLE_RETIRE_SECS:-3600}
ANYCRAWL_BROWSER_MAX_PAGES_PER_BROWSER: ${ANYCRAWL_BROWSER_MAX_PAGES_PER_BROWSER:-500}
ANYCRAWL_BROWSER_MAX_OPEN_PAGES_PER_BROWSER: ${ANYCRAWL_BROWSER_MAX_OPEN_PAGES_PER_BROWSER:-20}
ANYCRAWL_BROWSER_ISOLATE_CONTEXTS: ${ANYCRAWL_BROWSER_ISOLATE_CONTEXTS:-true}
MIGRATE_DATABASE: ${MIGRATE_DATABASE:-true}
ANYCRAWL_SCHEDULER_ENABLED: ${ANYCRAWL_SCHEDULER_ENABLED:-true}
ANYCRAWL_WEBHOOKS_ENABLED: ${ANYCRAWL_WEBHOOKS_ENABLED:-true}
ANYCRAWL_WEBHOOKS_QUEUE_CONCURRENCY: ${ANYCRAWL_WEBHOOKS_QUEUE_CONCURRENCY:-10}
ALLOW_LOCAL_WEBHOOKS: ${ALLOW_LOCAL_WEBHOOKS:-false}
services:
api:
<<: *common-service
image: ghcr.io/any4ai/anycrawl-api:latest
environment:
<<: *common-env
ports:
- "${ANYCRAWL_API_PORT:-8080}:8080"
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
scrape-playwright:
<<: *common-service
image: ghcr.io/any4ai/anycrawl-scrape-playwright:latest
environment:
<<: *common-env
ANYCRAWL_CRAWLEE_STORAGE_DIR: /tmp/anycrawl-crawlee-storage/playwright
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
scrape-cheerio:
<<: *common-service
image: ghcr.io/any4ai/anycrawl-scrape-cheerio:latest
environment:
<<: *common-env
ANYCRAWL_CRAWLEE_STORAGE_DIR: /tmp/anycrawl-crawlee-storage/cheerio
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
scrape-puppeteer:
<<: *common-service
profiles: ["puppeteer"]
image: ghcr.io/any4ai/anycrawl-scrape-puppeteer:latest
platform: ${ANYCRAWL_PUPPETEER_PLATFORM:-linux/amd64}
environment:
<<: *common-env
ANYCRAWL_CRAWLEE_STORAGE_DIR: /tmp/anycrawl-crawlee-storage/puppeteer
depends_on:
postgres:
condition: service_healthy
redis:
condition: service_healthy
restart: unless-stopped
redis:
image: redis:7-alpine
command: redis-server --appendonly yes
volumes:
- redis-data:/data
networks:
- anycrawl-network
healthcheck:
test: ["CMD", "redis-cli", "ping"]
interval: 10s
timeout: 3s
retries: 5
restart: unless-stopped
postgres:
image: postgres:16-alpine
environment:
POSTGRES_DB: ${POSTGRES_DB:-anycrawl}
POSTGRES_USER: ${POSTGRES_USER:-anycrawl}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-anycrawl}
volumes:
- postgres-data:/var/lib/postgresql/data
networks:
- anycrawl-network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}"]
interval: 10s
timeout: 5s
retries: 10
restart: unless-stopped
volumes:
anycrawl-storage:
redis-data:
postgres-data:
networks:
anycrawl-network:
driver: bridge