-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmain.py
More file actions
85 lines (66 loc) · 2.37 KB
/
Copy pathmain.py
File metadata and controls
85 lines (66 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import csv
import subprocess
from datetime import datetime
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By
product = input("Enter Product: ").replace(" ", "%20") + "/?"
price = "&price_start=" + input("Enter Min Price: ")
count = int(int(input("Enter number of listings: ")) / 40)
driver = webdriver.Chrome()
URL = "https://id.carousell.com/search/" + product + price
driver.get(URL)
xp = '//*[@id="main"]/div[2]/div/section[3]/div[1]/div/button'
def check_exists_by_xpath(xp):
try:
driver.implicitly_wait(5)
driver.find_element(By.XPATH, xp)
except NoSuchElementException:
return False
return True
for i in range(0, count):
if check_exists_by_xpath(xp):
driver.find_element(By.XPATH, xp).click()
else:
break
HEADERS = {
"User Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.1 Safari/605.1.15",
"Accept-Language": "en-US, en;q=0.5",
}
soup = BeautifulSoup(driver.page_source, "html.parser")
def select_item(tag):
return tag.has_attr("data-testid") and "listing-card" in tag.get("data-testid")
def select_link(tag):
return tag.has_attr("href") and "referrer_browse_type" in tag.get("href")
i = 0
items = soup.find_all(select_item)
data = []
for item in items:
tags = item.find_all("p")
item_data = []
# add data to item_data
for tag in tags:
s = tag.text
if s and s != "Free delivery" and s != "Buyer Protection":
item_data.append(s)
tagL = item.find(select_link)
# add link to item_data
if tagL:
link = "https://id.carousell.com" + tagL.get("href")
item_data.append(link)
if item_data:
data.append(item_data)
i += 1
header = ["Name", "Date", "Title", "Price", "Condition", "Link"]
current_datetime = datetime.now().strftime("%Y%m%d_%H%M%S")
csv_file_name = f"data_{current_datetime}.csv"
with open(csv_file_name, "w", newline="", encoding="UTF8") as f:
writer = csv.writer(f, delimiter=';')
writer.writerow(header)
for d in data:
writer.writerow(d)
driver.quit()
print("Completed!", i, "new listings added to", csv_file_name)
if input("Open CSV file?(y/n): ") == "y":
subprocess.call(["start", "excel", csv_file_name], shell=True)