-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdataset.py
More file actions
62 lines (52 loc) · 1.92 KB
/
Copy pathdataset.py
File metadata and controls
62 lines (52 loc) · 1.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from tqdm import tqdm
import os
import pickle
import logging as log
import torch
from torch.utils import data
# from torch_geometric.data import Data, Batch
import math
import random
import json
import numpy as np
from torch.utils.data import DataLoader
class Dataset(data.Dataset):
def __init__(self, knowledge_dim, split, args):
self.args = args
self.split = split
self.knowledge_dim = knowledge_dim
self.process()
def __len__(self):
return len(self.data_list)
def __getitem__(self, idx):
return self.data_list[idx]
def process(self):
if self.split == 'train':
data_file = self.args.data_dir+'train_set.json'
elif self.split == 'valid':
data_file = self.args.data_dir+'val_set.json'
else:
data_file = self.args.data_dir+'test_set.json'
with open(data_file, encoding='utf8') as i_f:
data_raw = json.load(i_f)
data = []
for stu in data_raw:
records = stu['logs']
user_id = stu['user_id']
for log in records:
data.append({'user_id': user_id, 'exer_id': log['exer_id'], 'score': log['score'],
'knowledge_code': log['knowledge_code']})
if self.split == 'train':
random.shuffle(data)
self.data_list = []
for item in tqdm(range(len(data)), desc="Processing", unit="item"):
record = data[item]
new_record = []
new_record.append(record['user_id']-1)
new_record.append(record['exer_id']-1)
new_record.append(record['score'])
knowledge_emb = [0.]* self.knowledge_dim
for k in record['knowledge_code']:
knowledge_emb[k] = 1.0
new_record.append(torch.tensor(np.array(knowledge_emb)).float())
self.data_list.append(new_record)