-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathTAs_split.py
More file actions
43 lines (33 loc) · 1.33 KB
/
Copy pathTAs_split.py
File metadata and controls
43 lines (33 loc) · 1.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import geopandas as gpd
import pandas as pd
import pickle
from sklearn.utils import resample
# 1. Load labeled polygons (TAs) path
gdf = gpd.read_file("your_Manchester_TA.geojson")
output_path = 'save_to_your_TA_splits.pkl'
splits = []
# 确保索引是唯一的,这对于后续通过索引筛选 Test set 至关重要
gdf = gdf.reset_index(drop=True)
for i in range(25): # 重复 25 次
train_indices = []
# 2. 执行分层 Bootstrap (Stratified Bootstrap)
# 我们按 'Name' 分组,对每一组进行有放回采样 !Name = LCZ class
groups = gdf.groupby("Name")
for name, group in groups:
bootstrap_sample = resample(
group,
replace=True, # 有放回
n_samples=len(group), # 保持数量不变
random_state=i # 随机种子
)
train_indices.extend(bootstrap_sample.index)
# 3. 构建训练集 (Bootstrap Sample)
train_gdf = gdf.loc[train_indices].copy()
# 4. 构建测试集 (Out-Of-Bag Sample)
unique_train_indices = set(train_indices)
test_gdf = gdf.loc[~gdf.index.isin(unique_train_indices)].copy()
splits.append((train_gdf, test_gdf))
# 5. 保存 splits 到 pickle 文件
with open(output_path, 'wb') as f:
pickle.dump(splits, f)
print(f"Successfully saved 25 bootstrap splits to {output_path}")