-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathconfig.py
More file actions
37 lines (28 loc) · 962 Bytes
/
Copy pathconfig.py
File metadata and controls
37 lines (28 loc) · 962 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
import os
# Path of the main project directory
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
# Paths for data folders
PROCESSED_DATA_DIR = os.path.join(BASE_DIR, 'data', 'processed')
DATASET = "dataset.json"
# Input file names for training
TRAIN_FILE = 'train.JSON'
VAL_FILE = 'val.JSON'
TEST_FILE = 'test.JSON'
# File path for merged train and test data
TRAIN_MERGED_FILE = "train_merged.json"
VAL_MERGED_FILE = "val_merged.json"
TEST_MERGED_FILE = "test_merged.json"
# Parameters for reproducibility
RANDOM_STATE = 42
# Important columns in the datasets
TEXT_COLUMN = 'conversation'
PERSON_COUPLE_COLUMN = 'person_couple'
# Parameters for dataset splitting
TEST_SIZE = 0.15 # Percentage for test set
VAL_SIZE = 0.15 # Percentage for validation set from the remaining (or total)
RANDOM_STATE = 42 # For reproducibility
# Rules for text cleaning
REMOVE_PUNCTUATION = False
TO_LOWERCASE = False
REMOVE_NUMBERS = False
REMOVE_STOPWORDS = False