-
Notifications
You must be signed in to change notification settings - Fork 8
Expand file tree
/
Copy pathdatabricks.yml
More file actions
107 lines (95 loc) · 3.47 KB
/
databricks.yml
File metadata and controls
107 lines (95 loc) · 3.47 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
bundle:
name: customer-segmentation
variables:
catalog_name:
description: Unity Catalog to use for this solution accelerator
default: dev_customer_segmentation
schema_name:
description: Schema to use for this solution accelerator
default: segmentation
warehouse_id:
description: ID for the SQL Warehouse connected to the dashboard
default: 4b9b953939869799 # Change here or in variable overrides
targets:
dev:
mode: development
default: true
workspace:
root_path: ~/.databricks/bundles/customer-segmentation
prod:
mode: production
workspace:
root_path: /Shared/customer-segmentation
variables:
catalog_name: prod_customer_segmentation
resources:
jobs:
data_setup_job:
name: "Data Setup - ${bundle.target}"
tasks:
- task_key: generate_synthetic_data
notebook_task:
notebook_path: ./notebooks/01_Data_Setup.py
base_parameters:
catalog_name: ${var.catalog_name}
schema_name: ${workspace.current_user.short_name}_${var.schema_name}
insights_job:
name: "Business Insights - ${bundle.target}"
tasks:
- task_key: create_business_insights
notebook_task:
notebook_path: ./notebooks/03_Business_Insights.py
base_parameters:
catalog_name: ${var.catalog_name}
schema_name: ${workspace.current_user.short_name}_${var.schema_name}
segmentation_mlflow_job:
name: "Unsupervised Customer Segmentation - ${bundle.target}"
tasks:
- task_key: unsupervised_customer_segmentation
notebook_task:
notebook_path: ./notebooks/02b_Segmentation_MLflow.py
base_parameters:
catalog_name: ${var.catalog_name}
schema_name: ${workspace.current_user.short_name}_${var.schema_name}
customer_segmentation_demo_install:
name: "Customer Segmentation Complete - ${bundle.target}"
tasks:
- task_key: setup_data
run_job_task:
job_id: ${resources.jobs.data_setup_job.id}
- task_key: run_segmentation_pipeline
depends_on:
- task_key: setup_data
pipeline_task:
pipeline_id: ${resources.pipelines.segmentation_pipeline.id}
full_refresh: true
- task_key: unsupervised_segmentation
depends_on:
- task_key: run_segmentation_pipeline
run_job_task:
job_id: ${resources.jobs.segmentation_mlflow_job.id}
- task_key: generate_insights
depends_on:
- task_key: run_segmentation_pipeline
run_job_task:
job_id: ${resources.jobs.insights_job.id}
pipelines:
segmentation_pipeline:
name: "Segmentation Pipeline - ${bundle.target}"
edition: advanced
continuous: false
serverless: true
catalog: ${var.catalog_name}
target: ${workspace.current_user.short_name}_${var.schema_name}
libraries:
- notebook:
path: ./notebooks/02a_Segmentation_Lakeflow.py
configuration:
"pipelines.trigger.interval": "manual"
"catalog": "${var.catalog_name}"
"schema": "${workspace.current_user.short_name}_${var.schema_name}"
dashboards:
customer_segmentation_dashboard:
display_name: 'Customer Segmentation Dashboard'
file_path: 'src/customer_segmentation.lvdash.json'
warehouse_id: ${var.warehouse_id}