-
Notifications
You must be signed in to change notification settings - Fork 5
Expand file tree
/
Copy pathutils.py
More file actions
29 lines (21 loc) · 964 Bytes
/
utils.py
File metadata and controls
29 lines (21 loc) · 964 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# -*- coding: utf-8 -*-
import json
import codecs
def sent2id(text, vocab_dict):
return [vocab_dict.get(w, 0) for w in text]
def load_data(data_dir=None, vocab_path=None):
vocab_dict = json.load(codecs.open(vocab_path, "r", "utf-8"))
x_train, y_train, text_train, x_dev, y_dev, text_dev = [], [], [], [], [], []
with codecs.open(data_dir + "/train.txt", "r", "utf-8") as fr:
for line in fr:
line = line.strip().split("\t")
x_train.append(sent2id(line[1], vocab_dict))
y_train.append(int(line[0]))
text_train.append(line[1])
with codecs.open(data_dir + "/dev.txt", "r", "utf-8") as fr:
for line in fr:
line = line.strip().split("\t")
x_dev.append(sent2id(line[1], vocab_dict))
y_dev.append(int(line[0]))
text_dev.append(line[1])
return (x_train, y_train, text_train), (x_dev, y_dev, text_dev), max(vocab_dict.values())+1