-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathdata.py
More file actions
70 lines (57 loc) · 2.21 KB
/
Copy pathdata.py
File metadata and controls
70 lines (57 loc) · 2.21 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import subprocess
import pandas as pd
from datetime import timedelta
from dataclasses import dataclass
@dataclass
class Tweets:
username: str
def __post_init__(self):
self.load_data()
def load_data(self, refresh = False):
try:
assert not refresh
self.data = pd.read_pickle(self.username)
self.check_data()
except:
self.data = pd.DataFrame()
self.save_data()
def save_data(self):
self.check_data()
self.data.to_pickle(self.username)
def check_data(self):
assert self.data.index.duplicated().sum() == 0, 'Duplicate rows'
def query(self, params = {}, seconds = 60):
subprocess.call(
' '.join([
f'cd twint-zero && timeout {seconds}',
f'go run main.go -Query "from:{self.username}'
] + [
f'{k}:{v}' for k,v in params.items()
]) + '" > ../temp', shell = True
)
def load_temp(self):
temp = pd.read_csv('temp', sep = '\t', quoting = 3, names = ['id', 'date', 'handle', 'text'])
temp = temp[temp['handle'] == f'@{self.username}'].drop('handle', axis = 1)
temp['date'] = pd.to_datetime(temp['date'], format = '%b %d, %Y · %I:%M %p %Z')
return temp.set_index('id')
def fill(self, forward = True, date = None):
while True:
size = self.data.shape[0]
try:
dates = self.data.date
self.query({
'since': date if date else dates.max().strftime('%Y-%m-%d')
} if forward else {
'until': date if date else (dates.min() + timedelta(days = 1)).strftime('%Y-%m-%d')
})
except:
self.query()
temp = self.load_temp()
self.data = pd.concat([self.data, temp]).groupby('id').first() if size > 0 else temp
self.save_data()
if self.data.shape[0] == size:
break
def run(self, backward = False):
if backward:
self.fill(forward = False)
self.fill()