Skip to content

Commit 677299e

Browse files
authored
Merge pull request #214 from maykinmedia/feature/oaf-188-csv-data-dump
add csv option to data dump script
2 parents 6b293b5 + b8214e2 commit 677299e

File tree

8 files changed

+90
-46
lines changed

8 files changed

+90
-46
lines changed

.github/workflows/bin-check.yml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ jobs:
3939
python src/manage.py migrate
4040
src/manage.py loaddata demodata
4141
SCRIPTPATH=bin DUMP_FILE=dump.sql bin/dump_data.sh --combined
42+
SCRIPTPATH=bin TAR_FILE=dump.tar bin/dump_data.sh --csv
4243
env:
4344
DB_PASSWORD: ""
4445
DB_USER: postgres
@@ -56,3 +57,10 @@ jobs:
5657
run: |
5758
createdb -h localhost -U postgres test
5859
psql -v ON_ERROR_STOP=1 -h localhost -U postgres -d test -f dump.sql
60+
61+
- name: validate csv dump
62+
run: |
63+
tar -xf dump.tar
64+
test -f core_objecttype.csv || exit 1
65+
! test -f auth_group.csv || exit 1
66+
grep "id,uuid,name,name_plural,contact_email,description,maintainer_department,maintainer_organization,data_classification,contact_person,documentation_url,labels,provider_organization,source,update_frequency,created_at,modified_at,allow_geometry" core_objecttype.csv

Dockerfile

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,13 @@
11
# Stage 1 - Compile needed python dependencies
2-
FROM python:3.12-slim-bookworm AS build
2+
FROM python:3.12-slim-trixie AS build
33

44
RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-recommends \
55
pkg-config \
66
build-essential \
77
libpq-dev \
88
# required for (log) routing support in uwsgi
9-
libpcre3 \
10-
libpcre3-dev \
9+
libpcre2-8-0 \
10+
libpcre2-dev \
1111
git \
1212
&& rm -rf /var/lib/apt/lists/*
1313

@@ -19,7 +19,7 @@ RUN pip install -r requirements/production.txt
1919

2020

2121
# Stage 2 - build frontend
22-
FROM node:24-alpine AS frontend-build
22+
FROM node:24-trixie-slim AS frontend-build
2323

2424
WORKDIR /app
2525

@@ -35,7 +35,7 @@ RUN npm run build
3535

3636

3737
# Stage 3 - Build docker image suitable for execution and deployment
38-
FROM python:3.12-slim-bookworm AS production
38+
FROM python:3.12-slim-trixie AS production
3939

4040
# Stage 3.1 - Set up the needed production dependencies
4141
# install all the dependencies for GeoDjango
@@ -44,11 +44,11 @@ RUN apt-get update && apt-get upgrade -y && apt-get install -y --no-install-reco
4444
procps \
4545
vim \
4646
# serve correct Content-Type headers
47-
mime-support \
47+
media-types \
4848
# (geo) django dependencies
4949
postgresql-client \
5050
gettext \
51-
libpcre3 \
51+
libpcre2-8-0 \
5252
binutils \
5353
libproj-dev \
5454
gdal-bin \

bin/dump_data.sh

Lines changed: 63 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -10,49 +10,58 @@
1010
# or --combined which appends the data dump to the schema dump.
1111
# The schema dump could not use -t to filter tables because this excludes extensions like postgis in the dump.
1212
# pg_dump also does not add related tables automatically, so `dump_data.sh` does not add related data from accounts to the dump.
13+
#
14+
# with --csv a csv dump can be created for all tables in the given components. The csv files will be generated in the temporary directory csv_dumps
15+
# and combined into a single TAR archive csv_dumps.
1316

14-
15-
set -e
17+
set -euo pipefail
1618

1719
DEFAULT_APPS=(core)
1820

1921
export PGHOST=${DB_HOST:-db}
2022
export PGPORT=${DB_PORT:-5432}
21-
export PGUSER=${DB_USER:-objectypes}
22-
export PGDATABASE=${DB_NAME:-objectypes}
23+
export PGUSER=${DB_USER:-objecttypes}
24+
export PGDATABASE=${DB_NAME:-objecttypes}
2325
export PGPASSWORD=${DB_PASSWORD:-""}
2426

2527
SCRIPT=$(readlink -f "$0")
2628
SCRIPTPATH=$(dirname "$SCRIPT")
2729

2830
${SCRIPTPATH}/wait_for_db.sh
2931

30-
DUMP_FILE=${DUMP_FILE:-"dump_$(date +'%Y-%m-%d_%H-%M-%S').sql"}
32+
DEFAULT_FILE_NAME="dump_$(date +'%Y-%m-%d_%H-%M-%S')"
33+
DUMP_FILE=${DUMP_FILE:-"$DEFAULT_FILE_NAME.sql"}
34+
TAR_FILE=${TAR_FILE:-"$DEFAULT_FILE_NAME.tar"}
35+
CSV_OUTPUT_DIR="csv_dumps"
3136

37+
CSV=false
3238
SCHEMA=true
3339
DATA=true
3440
COMBINED=false
41+
APPS=()
3542

3643
for arg in "$@"; do
37-
case "$arg" in
44+
case "$arg" in
45+
--csv) CSV=true ;;
3846
--schema-only) DATA=false ;;
39-
--data-only) SCHEMA=false ;;
40-
--combined) COMBINED=true ;;
47+
--data-only) SCHEMA=false ;;
48+
--combined) COMBINED=true ;;
4149
--*)
42-
echo "Unknown flag: $arg"
43-
exit 1
44-
;;
50+
echo "Unknown flag: $arg"
51+
exit 1
52+
;;
4553
*)
46-
APPS+=("$arg") ;;
47-
esac
54+
APPS+=("$arg")
55+
;;
56+
esac
4857
done
4958

5059
# export given apps or export DEFAULT_APPS
5160
if [ "${#APPS[@]}" -eq 0 ]; then
52-
APPS=("${DEFAULT_APPS[@]}")
61+
APPS=("${DEFAULT_APPS[@]}")
5362
fi
5463

55-
>&2 echo "exporting: ${APPS[*]}"
64+
echo >&2 "exporting: ${APPS[*]}"
5665

5766
# create -t flags for each app
5867
INCLUDES=()
@@ -61,32 +70,59 @@ for app in "${APPS[@]}"; do
6170
done
6271

6372
dump_schema() {
64-
echo "Dumping schema to $1..."
65-
pg_dump --schema-only -f "$1"
73+
echo "Dumping schema to $1..."
74+
pg_dump --schema-only -f "$1"
6675
}
6776

6877
dump_data() {
69-
echo "Dumping data to $1..."
70-
pg_dump "${INCLUDES[@]}" --disable-triggers --data-only > "$1"
78+
echo "Dumping data to $1..."
79+
pg_dump "${INCLUDES[@]}" --disable-triggers --data-only >"$1"
7180
}
7281

7382
append_data() {
74-
echo "Appending data to $1..."
75-
pg_dump "${INCLUDES[@]}" --disable-triggers --data-only \
76-
| sed '/^SET\|^SELECT pg_catalog.set_config/d' >> "$1"
83+
echo "Appending data to $1..."
84+
pg_dump "${INCLUDES[@]}" --disable-triggers --data-only |
85+
sed '/^SET\|^SELECT pg_catalog.set_config/d' >>"$1"
7786
}
7887

88+
dump_csv() {
89+
mkdir -p $CSV_OUTPUT_DIR
90+
echo "Dumping data to csv..."
91+
92+
WHERE_CLAUSE=""
93+
for app in "${APPS[@]}"; do
94+
if [ -n "$WHERE_CLAUSE" ]; then
95+
WHERE_CLAUSE+=" OR "
96+
fi
97+
WHERE_CLAUSE+="tablename LIKE '${app}_%'"
98+
done
99+
100+
TABLES=$(psql -Atc "SELECT tablename FROM pg_tables WHERE schemaname='public' AND ($WHERE_CLAUSE);")
101+
102+
for table in $TABLES; do
103+
echo "dumping $table..."
104+
psql -c "\copy $table TO '$CSV_OUTPUT_DIR/$table.csv' WITH CSV HEADER"
105+
done
106+
107+
tar -cf "$TAR_FILE" -C "$CSV_OUTPUT_DIR" .
108+
rm -rf "$CSV_OUTPUT_DIR"
109+
}
110+
111+
if $CSV; then
112+
dump_csv
113+
exit 0
114+
fi
79115

80116
if $COMBINED; then
81-
dump_schema "$DUMP_FILE"
82-
append_data "$DUMP_FILE"
83-
exit 0
117+
dump_schema "$DUMP_FILE"
118+
append_data "$DUMP_FILE"
119+
exit 0
84120
fi
85121

86122
if $SCHEMA; then
87-
dump_schema "schema__$DUMP_FILE"
123+
dump_schema "schema__$DUMP_FILE"
88124
fi
89125

90126
if $DATA; then
91-
dump_data "data__$DUMP_FILE"
127+
dump_data "data__$DUMP_FILE"
92128
fi

docker-compose.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,12 @@ services:
66
POSTGRES_PASSWORD: objecttypes
77
command: postgres -c max_connections=300 -c log_min_messages=LOG
88
networks:
9-
- objecttypes-api-dev
9+
- objecttypes-api-dev
1010

1111
redis:
1212
image: redis
1313
networks:
14-
- objecttypes-api-dev
14+
- objecttypes-api-dev
1515

1616
web:
1717
image: maykinmedia/objecttypes-api:latest
@@ -39,8 +39,8 @@ services:
3939
- app=objecttypes-api
4040
- service=api
4141
networks:
42-
- objecttypes-api-dev
43-
42+
- objecttypes-api-dev
43+
4444

4545
web-init:
4646
image: maykinmedia/objecttypes-api:latest
@@ -57,12 +57,12 @@ services:
5757
depends_on:
5858
- db
5959
networks:
60-
- objecttypes-api-dev
60+
- objecttypes-api-dev
6161

6262
volumes:
6363
db:
6464
log:
6565

6666
networks:
6767
objecttypes-api-dev:
68-
name: objecttypes-api-dev
68+
name: objecttypes-api-dev

requirements/base.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ urllib3==2.5.0
353353
# elastic-apm
354354
# requests
355355
# sentry-sdk
356-
uwsgi==2.0.29
356+
uwsgi==2.0.31
357357
# via open-api-framework
358358
vine==5.1.0
359359
# via

requirements/ci.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,7 @@ urllib3==2.5.0
663663
# requests
664664
# sentry-sdk
665665
# vcrpy
666-
uwsgi==2.0.29
666+
uwsgi==2.0.31
667667
# via
668668
# -c requirements/base.txt
669669
# -r requirements/base.txt

requirements/dev.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,7 @@ urllib3==2.5.0
814814
# requests
815815
# sentry-sdk
816816
# vcrpy
817-
uwsgi==2.0.29
817+
uwsgi==2.0.31
818818
# via
819819
# -c requirements/ci.txt
820820
# -r requirements/ci.txt

src/objecttypes/conf/docker.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
import os
22

33
os.environ.setdefault("DB_HOST", "db")
4-
os.environ.setdefault("DB_NAME", "postgres")
5-
os.environ.setdefault("DB_USER", "postgres")
6-
os.environ.setdefault("DB_PASSWORD", "")
4+
os.environ.setdefault("DB_NAME", "objecttypes")
5+
os.environ.setdefault("DB_USER", "objecttypes")
6+
os.environ.setdefault("DB_PASSWORD", "objecttypes")
77
os.environ.setdefault("DB_CONN_MAX_AGE", "60")
88

99
os.environ.setdefault("ENVIRONMENT", "docker")

0 commit comments

Comments
 (0)