-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathRVTraderScaper.cs
More file actions
123 lines (107 loc) · 5.31 KB
/
Copy pathRVTraderScaper.cs
File metadata and controls
123 lines (107 loc) · 5.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
using System;
using System.Globalization;
using System.Xml;
using HtmlAgilityPack;
using System.Linq;
namespace CLReader
{
static public class RVTraderScraper
{
static public void Scrape(SearchTerm st,Matches matches,Matches lastMatches)
{
//If there are multiple terms, loop through them
foreach(string searchTermToUse in st.RVTraderSearch.Split(',').ToList())
{
Scrape(st,matches,lastMatches,searchTermToUse);
}
}
static public void Scrape(SearchTerm st,Matches matches,Matches lastMatches,string searchTermToUse)
{
//Nationwide, limited by class B and less than 25'
var html = $"https://www.rvtrader.com/search-results?type=Class%20B%7C198068&keyword={searchTermToUse}&radius=any&zip=98026&sort=create_date%3Adesc&modelkeyword=1&layoutView=listView&page=1&price={st.MinPrice}%3A{st.MaxPrice}&year={st.MinYear}%3A*&length=*%3A25&";
HtmlWeb web = new HtmlWeb();
var _doc = web.Load(html);
var findclasses = _doc.DocumentNode
.Descendants( "div" )
.Where( d =>
d.Attributes.Contains("class")
&&
(d.Attributes["class"].Value.Contains("searchResultsMid listingContainer-list")
||
d.Attributes["class"].Value.Contains("searchResultsMid feat-listing"))
);
//var node = htmlDoc.DocumentNode.SelectSingleNode("//head/title");
foreach(var node in findclasses)
{
try
{
//Get title and listing link
var title = node
.Descendants( "a" )
.Where( d =>
d.Attributes.Contains("class")
&&
d.Attributes["class"].Value.Contains("listing-info-title")
).Single();
//Get title and link
//string titleString=title.Attributes["title"].Value;
string titleString= System.Net.WebUtility.HtmlDecode(title.Attributes["title"].Value);
string link=title.Attributes["href"].Value;
//Get listing date
var listing = node
.Descendants( "div" )
.Where( d =>
d.Attributes.Contains("class")
&&
d.Attributes["class"].Value.Contains("companyName")
).Single();
//Parse date
string listingDateStr = listing.InnerText.Substring(listing.InnerText.IndexOf("Created")+8);
DateTime publishDate = DateTime.Parse(listingDateStr);
//Get Price
var price = node
.Descendants( "div" )
.Where( d =>
d.Attributes.Contains("class")
&&
d.Attributes["class"].Value.Contains("price")
).Single();
//Parse currency
string priceString=price.Element("span").InnerText;
int start=priceString.IndexOf('$');
int end=priceString.IndexOf(' ',start+1);
string rawPrice=priceString.Substring(start,end-start);
decimal decPrice = decimal.Parse(rawPrice, NumberStyles.Any);
//Console.WriteLine("Price: " + value.ToString());
//Get and check price (zero if can't parse)
bool priceFlag = false;
if((((long)decPrice == 0 && !st.IgnoreZeroPrice) || (long)decPrice >= st.MinPrice) && (long)decPrice <= st.MaxPrice)
priceFlag=true;
//Check for exclusions
bool excludeKeywordsFlag = Helper.CheckTitle(st.ExcludeKeywords,title.Attributes["title"].Value);
bool excludeCharsFlag = Helper.CheckTitle(st.ExcludeChars,title.Attributes["title"].Value);
//If we're still good to go, let's add the item
if(excludeKeywordsFlag && excludeCharsFlag && priceFlag)
{
//Add
Item item = new Item
{
Link="https://www.rvtrader.com"+link,
Title=titleString + priceString,
SearchString=searchTermToUse,
WebSite="RVTrader",
Starred=st.Starred,
PublishDate=publishDate
};
matches.AddItem(item,lastMatches);
}
}
catch (System.Exception e)
{
//Console.WriteLine(e.Message);
//Console.WriteLine($"Problem parsing: {node.OuterHtml}");
}
}
}
}
}