simple-bayesian-mmm/src/main.py at main · thrivent-oss/simple-bayesian-mmm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import pandas as pd
from random import random

# repo-level imports
import config
from mmm import create_baseline, roas

# Take this data sample to be one aggregation of a full dataset, for example one channel or geo.
n_weeks = 52
df = pd.DataFrame({
	# Date is formally unused and thus can be of any type, but data must be...
	#  1) in temporal order, ascending (oldest first), without any missing temporal steps
	#  2) unique per aggregation (ex: if you aggregate by channel, date must be unique after agg)
	'date': range(n_weeks),

	# TOTAL spend (dollars) attributed to this aggregation.
	# If you manually track distinct spend sources, aggregate into this feature first.
	# Simulate starting to spend on marketing in Q3, but not at all before then.
	'spend': [
		(1 + random())*3E3 if week >= 27
		else 0
		for week in range(n_weeks)
	],

	# Target variable (sales dollars in the default example) attributed to this aggregation.
	# Simulate response lift from marketing efforts, starting in Q3, increasing weekly via adstock.
	config.target_feature: [
		1E5 + random()*5E4 if week < 27
		else 1E5 + (1 + random())*5E4 + (week-27)*5E3
		for week in range(n_weeks)
	]
})

# Include any external economic controls, such as personal savings rate (see config.py for examples)
controls_df = pd.DataFrame()
for col in config.control_cols:
	controls_df[col] = [random()*1 for _ in range(n_weeks)]

# Create baseline sales (unlifted) if you don't have this already included
print('Creating baseline...')
df = create_baseline(df=df)  # using default values of efficiency and coupling

# What is the return on ad spend in Q3 (weeks 27-39)?
print('Calculating ROAS...')
print('ROAS for Q3:', roas(t0=27, t1=39, df=df, controls_df=controls_df))

# Sanity check for your parameter tuning; ROAS should be roughly aligned with a metric like this
#   NOTE: In this minimal example problem, there has been no tuning so ROAS will not match.
df['sanity_lift'] = df['sales_dollars'] - df['baseline'] - df['spend']
slice = df[27:40]
print(
	'Rough ROAS calc (sanity check):',
	(sum(slice['sanity_lift']) - sum(slice['spend'])) / sum(slice['spend'])
)