-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathscraper.ts
More file actions
138 lines (115 loc) · 3.96 KB
/
Copy pathscraper.ts
File metadata and controls
138 lines (115 loc) · 3.96 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import {
DOMParser,
Element,
Node,
} from "https://deno.land/x/deno_dom/deno-dom-wasm.ts";
import { parseLectureData } from "./heuristics.ts";
import { ParsedLectureData } from "./heuristics.ts";
const baseUrl = "https://eva2.inf.h-brs.de/stundenplan/";
async function getParams() {
const res = await fetch(baseUrl);
const dom = new DOMParser().parseFromString(await res.text(), "text/html");
if (!dom) throw new Error("Failed to parse document");
// const weeks = dom.querySelector("#input_weeks > option[selected]")!
// .getAttribute("value")!;
// HACK: Table-view does not show events in the past if multiple weeks are selected
// For now, just select the last week if that is the current week.
const week_options = dom.querySelector("#input_weeks")!.children
let week_option: Element
if (week_options[week_options.length-1].textContent.includes("aktuell")) {
// Current week is last week, pick last week
week_option = week_options[week_options.length-1]
} else {
// Current weeks is not last week, pick all weeks
week_option = week_options[0]
}
const weeks = week_option.getAttribute("value")!
const semesterElems = Array.from(
dom.querySelectorAll("#identifier_semester > option")!,
) as Element[];
const semesters = semesterElems
.map((s) => [s.textContent, s.getAttribute("value")])
.filter((v) => v[1] != null && v[1].length > 0) as [string, string][];
const term = dom.querySelector("input[name=term]")!.getAttribute("value")!;
return {
params: {
weeks,
days: "1-7", // Mon-Sun
term,
mode: "table",
identifier_dozent: "",
identifier_raum: "",
show_semester: "",
},
semesters,
};
}
const classToName = {
"liste-startzeit": "startTime",
"liste-endzeit": "endTime",
"liste-raum": "room",
"liste-veranstaltung": "title",
"liste-beginn": "date",
"liste-wer": "lecturer",
} as const;
type ClassName = keyof typeof classToName;
type FieldName = typeof classToName[ClassName];
export type LectureData = Record<FieldName | "weekday", string>;
const columnCount = Object.keys(classToName).length;
async function getLectureData(params: Record<string, string>) {
const url = new URL(baseUrl + "anzeigen/");
for (const [name, value] of Object.entries(params)) {
url.searchParams.set(name, value);
}
console.log("Using URL", url.toString());
const res = await fetch(url);
const text = await res.text();
const dom = new DOMParser().parseFromString(text, "text/html");
if (!dom) throw new Error("Failed to parse document");
const table = dom.querySelector("table > tbody");
const rows = Array.from(table!.children).slice(1);
const events: LectureData[] = [];
let currentDay = "";
for (const row of rows) {
if (row.children[0].getAttribute("rowspan")) {
currentDay = row.children[0].textContent;
}
const attributes = Array.from(row.children)
.filter((elem) => elem.className in classToName)
.map((elem) => [
classToName[elem.className as keyof typeof classToName],
elem.textContent,
]);
if (attributes.length != columnCount) {
throw Error(
`Failed to parse row, expected ${columnCount} attributes, got ${attributes.length}`,
);
}
const attributesRecord = Object.fromEntries(attributes) as Record<
FieldName,
string
>;
events.push({
weekday: currentDay,
...attributesRecord,
});
}
return events;
}
export type FullLectureData = ParsedLectureData & { semesterName: string }
export async function getAllSemesterData() {
const { params, semesters } = await getParams();
const allData: FullLectureData[] = [];
for (const [semesterName, semesterId] of semesters) {
const data = await getLectureData({
...params,
identifier_semester: semesterId,
});
const cleanedData = data.map((lecture) => ({
semesterName,
...parseLectureData(lecture),
}));
allData.push(...cleanedData);
}
return allData;
}