-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.py
More file actions
88 lines (73 loc) ยท 4.22 KB
/
Copy pathmain.py
File metadata and controls
88 lines (73 loc) ยท 4.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import time
import argparse
import numpy as np
import pandas as pd
from utils import seed_everything,send_model
from data_loader import Preprocessor, context_data_loader
# from src.data import context_data_load, context_data_split, context_data_loader
from models.model import FactorizationMachineModel, CatBoostModel
from sklearn.model_selection import StratifiedKFold
from catboost import CatBoostClassifier
def main(args):
seed_everything(args.SEED)
######################## DATA LOAD
print(f'--------------- {args.MODEL} Load Data ---------------')
preprocessor = Preprocessor(args)
data = preprocessor.preprocess_train_dataset()
######################## TRAIN
print(f'--------------- {args.MODEL} TRAINING ---------------')
# model.train()
skf = StratifiedKFold(n_splits = 5, shuffle = True)
auc_scores = np.array([])
for idx, (train_index, valid_index) in enumerate(skf.split(
data['train'].drop(['label'], axis = 1),
data['train']['label']
)):
#TODO: user๋ณ validation ๋นผ๋๊ธฐ
#TODO:
data['X_train']= data['train'].drop(['label'], axis = 1).iloc[train_index]
data['y_train'] = data['train']['label'].iloc[train_index]
data['X_valid']= data['train'].drop(['label'], axis = 1).iloc[valid_index]
data['y_valid'] = data['train']['label'].iloc[valid_index]
data = context_data_loader(args, data)
print(f'--------------- FOLD-{idx}, INIT {args.MODEL} ---------------')
if args.MODEL=='FM':
model = FactorizationMachineModel(args, data)
elif args.MODEL == 'CatBoost':
model = CatBoostModel(args, data)
print(f'--------------- FOLD-{idx}, {args.MODEL} TRAINING ---------------')
auc_score = model.train(fold_num = idx+1)
auc_scores = np.append(auc_scores, auc_score)
print(f"[FINAL AUC SCORES MEAN]: {auc_scores.mean()}")
print('-----------------Complete Train----------------')
result_path = args.SAVE_PATH+args.MODEL+"/fold1/checkpoint.cbm"
send_url = 'http://www.recommendu.kro.kr:30001/services/save_model/'
send_model(result_path,send_url,args.MODEL)
print('-----------------Complete Send----------------')
if __name__ == "__main__":
######################## BASIC ENVIRONMENT SETUP
parser = argparse.ArgumentParser(description='parser')
arg = parser.add_argument
############### BASIC OPTION
arg('--DATA_PATH', type=str, default='../data/', help='Data path๋ฅผ ์ค์ ํ ์ ์์ต๋๋ค.')
arg('--SAVE_PATH', type = str, default = "/opt/ml/RecommendU-ml/model_output/")
arg('--MODEL', type=str, choices=['FM', 'CatBoost'],
help='ํ์ต ๋ฐ ์์ธกํ ๋ชจ๋ธ์ ์ ํํ ์ ์์ต๋๋ค.')
arg('--DATA_SHUFFLE', type=bool, default=True, help='๋ฐ์ดํฐ ์
ํ ์ฌ๋ถ๋ฅผ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--TEST_SIZE', type=float, default=0.2, help='Train/Valid split ๋น์จ์ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--SEED', type=int, default=42, help='seed ๊ฐ์ ์กฐ์ ํ ์ ์์ต๋๋ค.')
############### TRAINING OPTION
arg('--BATCH_SIZE', type=int, default=4, help='Batch size๋ฅผ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--EPOCHS', type=int, default=30, help='Epoch ์๋ฅผ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--LR', type=float, default=1e-3, help='Learning Rate๋ฅผ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--WEIGHT_DECAY', type=float, default=1e-3, help='Adam optimizer์์ ์ ๊ทํ์ ์ฌ์ฉํ๋ ๊ฐ์ ์กฐ์ ํ ์ ์์ต๋๋ค.')
arg('--PATIENCE', type = int, default = 3)
arg('--ITERATIONS', type = int, default = 2000)
arg('--DEPTH', type = int, default = 12)
arg('--EVAL_METRIC', type = str, default = "AUC")
############### GPU
arg('--DEVICE', type=str, default='cpu', choices=['cuda', 'cpu'], help='ํ์ต์ ์ฌ์ฉํ Device๋ฅผ ์กฐ์ ํ ์ ์์ต๋๋ค.')
############### FM
arg('--FM_EMBED_DIM', type=int, default=6, help='FM์์ embedding์ํฌ ์ฐจ์์ ์กฐ์ ํ ์ ์์ต๋๋ค.')
args = parser.parse_args()
main(args)