Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
c8b5e18
set web to edge, katsu to subject-optional pr, public to map pr
davidlougheed Oct 20, 2025
870b7c2
set reference to edge
davidlougheed Oct 20, 2025
a5cae53
set aggregation to edge
davidlougheed Oct 20, 2025
4f10c12
Merge remote-tracking branch 'origin/main' into releases/v20
davidlougheed Oct 21, 2025
3a431db
set drop box to edge
davidlougheed Oct 21, 2025
2d0bca8
set katsu to edge
davidlougheed Oct 21, 2025
0d8dcba
ci: update actions
davidlougheed Oct 28, 2025
a8cc852
set drs and wes to edge
davidlougheed Oct 28, 2025
9e0e79c
set authz to edge
davidlougheed Oct 29, 2025
5a76193
chore: init and todos
v-rocheleau Jun 4, 2025
fe4b068
wip: start replace todos
BarrothTheDragon Jun 5, 2025
9b3119c
configure bento env with etl image
BarrothTheDragon Jun 9, 2025
521a76b
add etl feature flag
BarrothTheDragon Jun 12, 2025
d4c0963
setup alias and repo source
BarrothTheDragon Jun 12, 2025
dde6d54
remove todo
BarrothTheDragon Jun 12, 2025
97a3918
move etl service name to maintain alphabetical ordering
BarrothTheDragon Jun 12, 2025
394b646
create etl gateway config
BarrothTheDragon Jun 12, 2025
f984a74
correct indentation
BarrothTheDragon Jun 12, 2025
9b88afc
set unique ports for etl
BarrothTheDragon Jun 12, 2025
3ac0db1
add etl service
BarrothTheDragon Jun 12, 2025
1d53942
add service_url_base_path to etl docker compose
BarrothTheDragon Jun 16, 2025
5d62bd7
deactivate ssl validation when on dev
BarrothTheDragon Jun 16, 2025
4aef595
add authz environment variables
BarrothTheDragon Jun 16, 2025
ac72ce9
add client id to etl env
BarrothTheDragon Jun 17, 2025
f7c7132
create etl installation steps file
BarrothTheDragon Jun 30, 2025
1666a59
document etl ports
BarrothTheDragon Jun 30, 2025
c2f25a9
setup etl database
BarrothTheDragon Jun 30, 2025
74084ab
setup docker compose for etl and db communication
BarrothTheDragon Jul 3, 2025
5b602b6
set etl-db port in documentation
BarrothTheDragon Jul 3, 2025
d949243
remove etl-db
BarrothTheDragon Jul 14, 2025
49b59ba
increaed pr count
BarrothTheDragon Aug 27, 2025
89c7d91
update etl description in docs
BarrothTheDragon Jul 25, 2025
9d8ba9e
remove todos
BarrothTheDragon Jul 25, 2025
395af5d
set correct test point
BarrothTheDragon Jul 25, 2025
99b15d2
lint
BarrothTheDragon Jul 25, 2025
182c6ac
chore: pin stable versions for ingestion without s3
noctillion Aug 14, 2025
8df4a0f
chore: pin edge versions
noctillion Sep 3, 2025
711b414
Added volume for etl
SanjeevLakhwani Sep 25, 2025
4e8eb36
added more env vars for db
SanjeevLakhwani Sep 29, 2025
8406c97
fix linting and type errors
SanjeevLakhwani Nov 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/lint-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
lint:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v5

- uses: actions/setup-python@v4
- uses: actions/setup-python@v6
name: Set up Python
with:
python-version: "3.10"
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/test-actions.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ jobs:
test_dev_init:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- uses: actions/checkout@v5

- uses: actions/setup-python@v4
- uses: actions/setup-python@v6
name: Set up Python
with:
python-version: "3.10"
Expand Down
15 changes: 15 additions & 0 deletions docker-compose.dev.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,11 @@ services:
- ${BENTOV2_PORTAL_DOMAIN}
- ${BENTOV2_AUTH_DOMAIN}
- ${BENTO_MINIO_DOMAIN}
etl-net:
aliases:
- ${BENTOV2_DOMAIN}
- ${BENTOV2_PORTAL_DOMAIN}
- ${BENTOV2_AUTH_DOMAIN}
event-relay-net:
aliases:
- ${BENTOV2_DOMAIN}
Expand Down Expand Up @@ -172,6 +177,16 @@ services:
- "${BENTOV2_WES_EXTERNAL_PORT}:${BENTOV2_WES_INTERNAL_PORT}"
- "${BENTOV2_WES_DEBUGGER_EXTERNAL_PORT}:${BENTOV2_WES_DEBUGGER_INTERNAL_PORT}"

etl:
environment:
- BENTO_DEBUG=True
- BENTO_VALIDATE_SSL=False
- PORT=${BENTO_ETL_INTERNAL_PORT}
- DEBUGGER_PORT=${BENTO_ETL_DEBUGGER_EXTERNAL_PORT}
ports:
- "${BENTO_ETL_EXTERNAL_PORT}:${BENTO_ETL_INTERNAL_PORT}"
- "${BENTO_ETL_DEBUGGER_EXTERNAL_PORT}:${BENTO_ETL_DEBUGGER_INTERNAL_PORT}"

drs:
environment:
- FLASK_DEBUG=True
Expand Down
9 changes: 9 additions & 0 deletions docker-compose.local.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,15 @@ services:
volumes:
- ./repos/wes:/wes

etl:
image: ${BENTO_ETL_IMAGE}:${BENTO_ETL_VERSION_DEV}
environment:
- BENTO_GIT_NAME
- BENTO_GIT_EMAIL
- BENTO_GIT_REPOSITORY_DIR=/etl
volumes:
- ./repos/etl:/etl

drs:
image: ${BENTOV2_DRS_IMAGE}:${BENTOV2_DRS_VERSION_DEV}
environment:
Expand Down
1 change: 1 addition & 0 deletions docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ include:
- lib/cbioportal/docker-compose.cbioportal.yaml # Optional feature; controlled by a compose profile
- lib/drop-box/docker-compose.drop-box.yaml
- lib/drs/docker-compose.drs.yaml
- lib/etl/docker-compose.etl.yaml
- lib/event-relay/docker-compose.event-relay.yaml
- lib/gohan/docker-compose.gohan.yaml # Optional feature; controlled by a compose profile
- lib/katsu/docker-compose.katsu.yaml
Expand Down
2 changes: 1 addition & 1 deletion docs/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,7 @@ The following is a list of all host port allocations for Bento services in devel
| Reference DB | 9512 | `N/A` |
| Service Registry | 5010 | Unimplemented |
| WES | 9250 | 5680 |

| ETL | 6400 | 6456 |

## Local NPM package development

Expand Down
69 changes: 69 additions & 0 deletions docs/etl.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# Bento ETL (Extract, Transform, Load)

Bento ETL is a service to automate the injestion of external health data into the Bento data services.
With customizable transformers, ETL can collect and convert a variety of data types from any data source.

## Configuration

Please follow the instructions below to deploy the ETL service in a Bento stack.

### Environment variables

Enable Bento ETL by setting the feature flag in `local.env`.

```bash
BENTO_ETL_ENABLED='true'
```

### Initialize networking and directories and generate client secret

```bash
./bentoctl.bash init-docker # Creates the Docker network for Bento ETL
./bentoctl.bash init-dirs # Creates Bento ETL's data directory

# Generates the client secret
./bentoctl.bash run auth
./bentoctl.bash run gateway
./bentoctl.bash init-auth
```
After running `init-auth`, update the following in your `local.env` file!

```bash
BENTO_ETL_CLIENT_ID=etl
BENTO_ETL_CLIENT_SECRET={your-newly-generated-secret-here!}
```

### Create additional grants

As Bento ETL is responsible for the upload of data to other Bento data services, it needs some additional grants to be able to do so.

First, run and shell into the authorization service:

```bash
./bentoctl.bash run authz
./bentoctl.bash shell authz
```

Then, substituting the `<ISSUER_HERE>` field with your issuer (`iss`), run the following:

```bash
bento_authz create grant \
'{"iss": "<ISSUER_HERE>", "client": "etl"}' \
'{"everything": true}' \
'view:private_portal'

bento_authz create grant \
'{"iss": "<ISSUER_HERE>", "client": "etl"}' \
'{"everything": true}' \
'query:data' 'ingest:data' 'ingest:reference_material' 'delete:reference_material'
```

## Run Bento ETL

Now that everything's set up, you can start the ETL service container using:

```bash
./bentoctl.bash run etl # Runs just the etl container
# or
./bentoctl.bash run # Runs everything, including etl
```
33 changes: 24 additions & 9 deletions etc/bento.env
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ BENTO_AUTH_DB_NETWORK="${BENTOV2_PREFIX}-auth-db-net"

# - Authz service
BENTO_AUTHZ_IMAGE=ghcr.io/bento-platform/bento_authorization_service
BENTO_AUTHZ_VERSION=0.11.0
BENTO_AUTHZ_VERSION=edge
BENTO_AUTHZ_VERSION_DEV=${BENTO_AUTHZ_VERSION}-dev
BENTO_AUTHZ_CONTAINER_NAME=${BENTOV2_PREFIX}-authz
BENTO_AUTHZ_NETWORK=${BENTOV2_PREFIX}-authz-net
Expand All @@ -100,7 +100,7 @@ BENTO_AUTHZ_DB_MEM_LIM=1G
# Web
BENTO_WEB_CUSTOM_HEADER=
BENTOV2_WEB_IMAGE=ghcr.io/bento-platform/bento_web
BENTOV2_WEB_VERSION=7.1.0
BENTOV2_WEB_VERSION=edge
BENTOV2_WEB_VERSION_DEV=${BENTOV2_WEB_VERSION}-dev
BENTOV2_WEB_CONTAINER_NAME=${BENTOV2_PREFIX}-web
BENTO_WEB_NETWORK=${BENTOV2_PREFIX}-web-net
Expand All @@ -112,7 +112,7 @@ BENTOV2_WEB_CPUS=2

# Drop-Box
BENTOV2_DROP_BOX_IMAGE=ghcr.io/bento-platform/bento_drop_box_service
BENTOV2_DROP_BOX_VERSION=2.0.1
BENTOV2_DROP_BOX_VERSION=edge
BENTOV2_DROP_BOX_VERSION_DEV=${BENTOV2_DROP_BOX_VERSION}-dev
BENTOV2_DROP_BOX_CONTAINER_NAME=${BENTOV2_PREFIX}-drop-box
BENTO_DROP_BOX_NETWORK=${BENTOV2_PREFIX}-drop-box-net
Expand Down Expand Up @@ -158,7 +158,7 @@ BENTOV2_NOTIFICATION_CPUS=2

# Aggregation
BENTOV2_AGGREGATION_IMAGE=ghcr.io/bento-platform/bento_aggregation_service
BENTOV2_AGGREGATION_VERSION=0.20.3
BENTOV2_AGGREGATION_VERSION=edge
BENTOV2_AGGREGATION_VERSION_DEV=${BENTOV2_AGGREGATION_VERSION}-dev
BENTOV2_AGGREGATION_CONTAINER_NAME=${BENTOV2_PREFIX}-aggregation
BENTO_AGGREGATION_NETWORK=${BENTOV2_PREFIX}-aggregation-net
Expand Down Expand Up @@ -187,7 +187,7 @@ BENTOV2_EVENT_RELAY_CPUS=1
# Reference
# - Service
BENTO_REFERENCE_IMAGE=ghcr.io/bento-platform/bento_reference_service
BENTO_REFERENCE_VERSION=0.5.1
BENTO_REFERENCE_VERSION=edge
BENTO_REFERENCE_VERSION_DEV=${BENTO_REFERENCE_VERSION}-dev
BENTO_REFERENCE_CONTAINER_NAME=${BENTOV2_PREFIX}-reference
BENTO_REFERENCE_NETWORK=${BENTOV2_PREFIX}-reference-net
Expand All @@ -209,7 +209,7 @@ BENTO_REFERENCE_DB_USER="reference_user"

# WES
BENTOV2_WES_IMAGE=ghcr.io/bento-platform/bento_wes
BENTOV2_WES_VERSION=0.15.1
BENTOV2_WES_VERSION=edge
BENTOV2_WES_VERSION_DEV=${BENTOV2_WES_VERSION}-dev
BENTOV2_WES_CONTAINER_NAME=${BENTOV2_PREFIX}-wes
BENTO_WES_NETWORK=${BENTOV2_PREFIX}-wes-net
Expand All @@ -233,7 +233,7 @@ BENTOV2_WES_WORKFLOW_TIMEOUT=172800

# DRS
BENTOV2_DRS_IMAGE=ghcr.io/bento-platform/bento_drs
BENTOV2_DRS_VERSION=0.20.0
BENTOV2_DRS_VERSION=edge
BENTOV2_DRS_VERSION_DEV=${BENTOV2_DRS_VERSION}-dev
BENTOV2_DRS_CONTAINER_NAME=${BENTOV2_PREFIX}-drs
BENTO_DRS_NETWORK=${BENTOV2_PREFIX}-drs-net
Expand Down Expand Up @@ -278,7 +278,7 @@ BENTOV2_KATSU_DB_CPUS=4

# Katsu
BENTOV2_KATSU_IMAGE=ghcr.io/bento-platform/katsu
BENTOV2_KATSU_VERSION=11.0.0
BENTOV2_KATSU_VERSION=edge
BENTOV2_KATSU_VERSION_DEV=${BENTOV2_KATSU_VERSION}-dev
BENTOV2_KATSU_CONTAINER_NAME=${BENTOV2_PREFIX}-katsu
BENTO_KATSU_NETWORK=${BENTOV2_PREFIX}-katsu-net
Expand Down Expand Up @@ -381,7 +381,7 @@ BENTOV2_GOHAN_PRIVATE_AUTHZ_URL=http://${BENTOV2_GOHAN_AUTHZ_OPA_CONTAINER_NAME}
# Bento-Public

BENTO_PUBLIC_IMAGE=ghcr.io/bento-platform/bento_public
BENTO_PUBLIC_VERSION=0.24.0
BENTO_PUBLIC_VERSION=pr-285
BENTO_PUBLIC_VERSION_DEV=${BENTO_PUBLIC_VERSION}-dev
BENTO_PUBLIC_CONTAINER_NAME=${BENTOV2_PREFIX}-public
BENTO_PUBLIC_NETWORK=${BENTOV2_PREFIX}-public-net
Expand Down Expand Up @@ -476,3 +476,18 @@ BENTO_MINIO_NETWORK=${BENTOV2_PREFIX}-minio-net
BENTO_MINIO_ROOT_USER=root
BENTO_MINIO_INTERNAL_PORT=9000
BENTO_MINIO_CONSOLE_PORT=9001

# ETL
BENTO_ETL_IMAGE=ghcr.io/bento-platform/bento_etl
BENTO_ETL_VERSION=pr-9
BENTO_ETL_VERSION_DEV=${BENTO_ETL_VERSION}-dev
BENTO_ETL_CONTAINER_NAME=${BENTOV2_PREFIX}-etl
BENTO_ETL_VOL_DATA_DIR=${BENTO_FAST_DATA_DIR}/etl/data
BENTO_ETL_INTERNAL_PROJECT_HOME=/etl
BENTO_ETL_INTERNAL_DATA_DIR=${BENTO_ETL_INTERNAL_PROJECT_HOME}/data
BENTO_ETL_SQLITE_DB_NAME=bento_etl.db
BENTO_ETL_NETWORK=${BENTOV2_PREFIX}-etl-net
BENTO_ETL_INTERNAL_PORT=${BENTO_STD_SERVICE_INTERNAL_PORT}
BENTO_ETL_EXTERNAL_PORT=6400
BENTO_ETL_DEBUGGER_EXTERNAL_PORT=6456
BENTO_ETL_DEBUGGER_INTERNAL_PORT=6456
5 changes: 5 additions & 0 deletions etc/bento_deploy.env
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ BENTO_CBIOPORTAL_ENABLED='false'
BENTO_GOHAN_ENABLED='true'
BENTO_MONITORING_ENABLED='false'
BENTO_MINIO_ENABLED='false'
BENTO_ETL_ENABLED='false'

# - Display flags for Bento portals
# - Switch to enable French translation in Bento Public
Expand Down Expand Up @@ -70,6 +71,10 @@ BENTO_AGGREGATION_CLIENT_SECRET= # TODO: SET ME WHEN DEPLOYING!
BENTO_WES_CLIENT_ID=wes
BENTO_WES_CLIENT_SECRET= # TODO: SET ME WHEN DEPLOYING!

# - ETL Client ID/secret; client within BENTOV2_AUTH_REALM
BENTO_ETL_CLIENT_ID=etl
BENTO_ETL_CLIENT_SECRET= # TODO: SET ME WHEN DEPLOYING!

# - Grafana Client ID/secret; client within BENTOV2_AUTH_REALM
BENTO_GRAFANA_CLIENT_ID=grafana
BENTO_GRAFANA_CLIENT_SECRET= # TODO: SET ME WHEN DEPLOYING IF GRAFANA IS ENABLED!
Expand Down
5 changes: 5 additions & 0 deletions etc/bento_dev.env
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ BENTO_CBIOPORTAL_ENABLED='false'
BENTO_GOHAN_ENABLED='true'
BENTO_MONITORING_ENABLED='false'
BENTO_MINIO_ENABLED='false'
BENTO_ETL_ENABLED='false'

# - Display flags for Bento portals
# - Switch to enable French translation in Bento Public
Expand Down Expand Up @@ -70,6 +71,10 @@ BENTO_AGGREGATION_CLIENT_SECRET=
BENTO_WES_CLIENT_ID=wes
BENTO_WES_CLIENT_SECRET=

# - ETL Client ID/secret; client within BENTOV2_AUTH_REALM
BENTO_ETL_CLIENT_ID=etl
BENTO_ETL_CLIENT_SECRET=

# - Grafana Client ID/secret; client within BENTOV2_AUTH_REALM
BENTO_GRAFANA_CLIENT_ID=grafana
BENTO_GRAFANA_CLIENT_SECRET=
Expand Down
5 changes: 5 additions & 0 deletions etc/bento_services.json
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@
"url_template": "{BENTO_PUBLIC_URL}/api/{service_kind}",
"repository": "git@github.com:bento-platform/bento_wes"
},
"etl": {
"service_kind": "etl",
"url_template": "{BENTO_PUBLIC_URL}/api/{service_kind}",
"repository": "git@github.com:bento-platform/bento_etl"
},
"aggregation": {
"service_kind": "aggregation",
"url_template": "{BENTO_PUBLIC_URL}/api/{service_kind}",
Expand Down
4 changes: 4 additions & 0 deletions etc/default_config.env
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ BENTO_CBIOPORTAL_ENABLED='false'
BENTO_GOHAN_ENABLED='true'
BENTO_MONITORING_ENABLED='false'
BENTO_MINIO_ENABLED='false'
BENTO_ETL_ENABLED='false'

# - Display flags for Bento portals
# - Switch to enable French translation in Bento Public
Expand Down Expand Up @@ -104,6 +105,9 @@ BENTO_GRAFANA_CLIENT_SECRET=
# - WES Client ID/secret; secret to be filled by local.env - client within BENTOV2_AUTH_REALM
BENTO_WES_CLIENT_ID=wes
BENTO_WES_CLIENT_SECRET=
# - ETL ClientID/secret; secret to be filled by local.env - client within BENTOV2_AUTH_REALM
BENTO_ETL_CLIENT_ID=etl
BENTO_ETL_CLIENT_SECRET=
# --------------------------------------------------------------------

# Drop-Box
Expand Down
39 changes: 39 additions & 0 deletions lib/etl/docker-compose.etl.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
services:
etl:
image: ${BENTO_ETL_IMAGE}:${BENTO_ETL_VERSION}
container_name: ${BENTO_ETL_CONTAINER_NAME}
networks:
- etl-net
expose:
- ${BENTO_ETL_INTERNAL_PORT}
environment:
- BENTO_UID
- BENTO_DEBUG=False
- BENTO_AUTHZ_ENABLED=True
- BENTO_AUTHZ_SERVICE_URL
- CORS_ORIGINS=${BENTO_CORS_ORIGINS}
- KATSU_URL=${BENTO_KATSU_URL}/
- SERVICE_URL_BASE_PATH=${BENTOV2_PUBLIC_URL}/api/etl
- BENTO_OPENID_CONFIG_URL
- ETL_CLIENT_ID=${BENTO_ETL_CLIENT_ID}
- ETL_CLIENT_SECRET=${BENTO_ETL_CLIENT_SECRET}
- DB_NAME=${BENTO_ETL_SQLITE_DB_NAME}
- DATABASE_PATH=${BENTO_ETL_INTERNAL_DATA_DIR}/{BENTO_ETL_SQLITE_DB_NAME}
healthcheck:
test:
[
"CMD",
"curl",
"http://localhost:${BENTO_ETL_INTERNAL_PORT}/service-info",
]
timeout: ${BENTO_HEALTHCHECK_TIMEOUT}
interval: ${BENTO_HEALTHCHECK_INTERVAL}
start_period: ${BENTO_HEALTHCHECK_START_PERIOD}
start_interval: ${BENTO_HEALTHCHECK_START_INTERVAL}
volumes:
- ${BENTO_ETL_VOL_DATA_DIR}:${BENTO_ETL_INTERNAL_DATA_DIR}

networks:
etl-net:
external: true
name: ${BENTO_ETL_NETWORK}
Loading