-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy patholap.py
More file actions
133 lines (115 loc) · 4.4 KB
/
olap.py
File metadata and controls
133 lines (115 loc) · 4.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import sqlite3
import pandas as pd
import matplotlib.pyplot as plt
from concurrent.futures import ProcessPoolExecutor
import numpy as np
import time
import matplotlib as mpl
from matplotlib.font_manager import FontProperties
# 配置matplotlib支持中文显示
def setup_chinese_font():
# 方法1:尝试使用系统中文字体
try:
# Windows系统常见中文字体
font_paths = [
'C:/Windows/Fonts/simhei.ttf', # 黑体
'C:/Windows/Fonts/msyh.ttf', # 微软雅黑
'C:/Windows/Fonts/simsun.ttc', # 宋体
]
# 尝试加载系统中文字体
for path in font_paths:
try:
chinese_font = FontProperties(fname=path)
return chinese_font
except:
continue
# 方法2:如果找不到特定中文字体,使用系统默认字体
plt.rcParams['font.sans-serif'] = ['SimHei', 'Microsoft YaHei', 'SimSun', 'DejaVu Sans']
plt.rcParams['axes.unicode_minus'] = False # 解决负号显示问题
return None
except Exception as e:
print(f"设置中文字体时出错: {e}")
# 方法3:如果以上方法都失败,使用英文标签
return None
# 连接到数据库
conn = sqlite3.connect('ecommerce.db')
# 加载数据到Pandas
def load_data():
return pd.read_sql_query("SELECT * FROM orders", conn)
# 复杂分析任务
def analyze_sales_by_product(df):
result = df.groupby('product_id').agg({
'quantity': 'sum',
'price': 'mean',
'order_id': 'count'
}).rename(columns={'order_id': 'total_orders'})
result['total_revenue'] = result['quantity'] * result['price']
return result
def analyze_customer_behavior(df):
return df.groupby('customer_id').agg({
'order_id': 'count',
'price': ['sum', 'mean', 'std'],
'quantity': 'sum'
})
def time_trend_analysis(df):
df['order_date'] = pd.to_datetime(df['order_date'])
df['date'] = df['order_date'].dt.date
return df.groupby('date').agg({
'order_id': 'count',
'price': 'sum'
}).rename(columns={'order_id': 'orders_count', 'price': 'daily_revenue'})
# 使用并行处理执行多个分析任务
def parallel_analysis(df):
with ProcessPoolExecutor(max_workers=3) as executor:
product_future = executor.submit(analyze_sales_by_product, df)
customer_future = executor.submit(analyze_customer_behavior, df)
time_future = executor.submit(time_trend_analysis, df)
product_analysis = product_future.result()
customer_analysis = customer_future.result()
time_analysis = time_future.result()
return product_analysis, customer_analysis, time_analysis
# 执行分析
def run_olap_analysis():
print("开始OLAP分析...")
df = load_data()
start_time = time.time()
product_analysis, customer_analysis, time_analysis = parallel_analysis(df)
end_time = time.time()
print(f"OLAP分析完成,耗时: {end_time - start_time:.2f}秒")
print("\n产品销售分析:")
print(product_analysis)
print("\n客户行为分析:")
print(customer_analysis)
print("\n时间趋势分析:")
print(time_analysis)
# 设置中文字体
chinese_font = setup_chinese_font()
# 可视化分析结果
plt.figure(figsize=(10, 6))
plt.bar(product_analysis.index, product_analysis['total_revenue'])
if chinese_font:
plt.title('各产品总收入', fontproperties=chinese_font)
plt.xlabel('产品ID', fontproperties=chinese_font)
plt.ylabel('总收入', fontproperties=chinese_font)
else:
plt.title('Product Total Revenue')
plt.xlabel('Product ID')
plt.ylabel('Total Revenue')
plt.savefig('product_revenue.png')
plt.close()
plt.figure(figsize=(10, 6))
orders_by_customer = customer_analysis[('order_id', 'count')]
plt.bar(orders_by_customer.index, orders_by_customer.values)
if chinese_font:
plt.title('各客户订单数量', fontproperties=chinese_font)
plt.xlabel('客户ID', fontproperties=chinese_font)
plt.ylabel('订单数量', fontproperties=chinese_font)
else:
plt.title('Customer Order Count')
plt.xlabel('Customer ID')
plt.ylabel('Order Count')
plt.savefig('customer_orders.png')
plt.close()
if __name__ == "__main__":
run_olap_analysis()
conn.close()