-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper_file_read.py
More file actions
61 lines (45 loc) · 1.79 KB
/
Copy pathscraper_file_read.py
File metadata and controls
61 lines (45 loc) · 1.79 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
import os
import csv
import requests
from dotenv import load_dotenv
from utils import extract_emails, name_email_similarity
load_dotenv()
HASDATA_API_KEY = os.getenv('HASDATA_API_KEY')
URL = 'https://api.hasdata.com/scrape/google/serp'
HEADERS = {
'x-api-key': HASDATA_API_KEY,
'Content-Type': "application/json"
}
def read_csv_file(file_path: str):
with open(file_path, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return list(reader)
def get_person_email(person_first_name, person_last_name, person_company):
info_to_extract = 'email'
term_to_search = f'"{person_first_name}" "{person_last_name}" {person_company} {info_to_extract}'
params = {"q": term_to_search}
response = requests.get(url=URL, params=params, headers=HEADERS)
response = response.json()
most_similar_email = ''
max_similarity_so_far = 0
for result in response['organicResults']:
snippet = result['snippet']
full_name = f'{person_last_name} {person_last_name}'
emails = extract_emails(snippet)
for email in emails:
score = name_email_similarity(full_name, email)
if score > max_similarity_so_far:
max_similarity_so_far = score
most_similar_email = email
return most_similar_email
def main():
csv_data = read_csv_file('people_info.csv')
print('\n\n')
for row in csv_data:
person_first_name, person_last_name = row['full_name'].split(' ')
person_company = row['company']
email = get_person_email(person_first_name, person_last_name, person_company)
print(f'{person_first_name} {person_last_name}')
print(f'Email: {email}\n')
if '__main__' == __name__:
main()