Skip to content

Commit a6d5447

Browse files
author
PingIsFun
committed
Revert "Fixed issue 12. Not fully tested yet."
This reverts commit f858c99.
1 parent a8d3c34 commit a6d5447

1 file changed

Lines changed: 49 additions & 33 deletions

File tree

src/eAsisitent_scraper/scraper.py

Lines changed: 49 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,20 @@ def request_schedule(
4747
today = datetime.date.today()
4848

4949

50+
def hour_to_num(hour: str):
51+
"""
52+
Convert hour name to integer
53+
54+
:param hour: the hour that you want to be converted to int
55+
:type hour: str
56+
:return: The hour as an integer.
57+
"""
58+
if hour.lower() == "predura":
59+
return int(0)
60+
else:
61+
return int(hour.split(". ura")[0])
62+
63+
5064
def get_schedule_data(
5165
school_id: str,
5266
class_id=0,
@@ -81,64 +95,64 @@ def get_schedule_data(
8195
school_week=school_week,
8296
student_id=student_id)
8397
soup = BeautifulSoup(response.text, "html5lib")
84-
table_rows = soup.select("body > table > tbody > tr")
98+
seznam_ur_teden = soup.select("body > table > tbody > tr")
8599

86100
count: int = -1
87101

88102
dates: list = []
89103
dates_formatted: list = []
90104
hour_times: list = []
91105

92-
scraped_data: dict = {str(i): {} for i in range(7)}
106+
scraped_data: dict = {str(i): {str(j): {} for j in range(15)} for i in range(7)}
107+
scraped_data["week_data"] = {"hour_times": [], "dates": [], "current_week": "", "class": ""}
93108

94109
current_week = int("".join(re.findall("[0-9]", [item.text.split(",")[0] for item in soup.select("body > div > span")][0])))
95110
current_class = str([item.text.strip() for item in soup.select("body > div > strong")][0])
96111

97-
for table_row in table_rows:
112+
for i in seznam_ur_teden:
98113
if count == -1:
99-
for days in table_row:
114+
for days in i:
100115
if type(days) == bs4.element.Tag:
101116
day = days.select("div")
102117
if day[0].text != "Ura":
103118
temp_date = re.findall(r"[^A-z,. ]+", day[1].text)
104119
temp_datetime = datetime.datetime(
105-
day=int(temp_date[0]),
106-
month=int(temp_date[1]),
107-
year=today.year,
108-
)
120+
day=int(temp_date[0]),
121+
month=int(temp_date[1]),
122+
year=today.year,
123+
)
109124
dates_formatted.append(str(temp_datetime.strftime("%Y-%m-%d")))
110125
dates.append(temp_datetime)
111126
if count >= 0:
112-
row = table_row.find_all("td", class_="ednevnik-seznam_ur_teden-td")
113-
hour_name = str(row[0].find(class_="text14").text)
127+
row = i.find_all("td", class_="ednevnik-seznam_ur_teden-td")
128+
hour_name = row[0].find(class_="text14").text
114129
hour_time = row[0].find(class_="text10").text
115130
hour_times.append(hour_time)
116-
131+
hour_num = str(hour_to_num(hour_name))
132+
hour_num = str(hour_num)
117133
count2: int = 0
118-
for row_part in row:
134+
for block in row:
119135
if count2 != 0:
120136
"""Pass the first collum that contains hour times"""
121137
date = dates[count2 - 1]
122138
day_num = str(date.weekday())
123139
date_formatted = str(date.strftime("%Y-%m-%d"))
124-
scraped_data[day_num].update({str(hour_name): {}})
125-
126-
if "style" not in row_part.attrs:
140+
if "style" not in block.attrs:
127141
data_out = {
128142
"subject": None,
129143
"teacher": None,
130144
"classroom": None,
131145
"group": None,
132146
"event": None,
133-
"hour": hour_name,
147+
"hour": int(hour_num),
134148
"week_day": int(day_num),
135149
"hour_in_block": 0,
136150
"date": date_formatted,
137151
}
138-
scraped_data[day_num][hour_name]["0"] = data_out
152+
scraped_data[day_num][hour_num]["0"] = data_out
139153
else:
140154
classes_in_hour = 0
141-
for section in row_part:
155+
for section in block:
142156
if type(section) == bs4.element.Tag:
143157
event = None
144158
subject = None
@@ -184,9 +198,10 @@ def get_schedule_data(
184198
teacher = teacher_classroom[0]
185199
classroom = teacher_classroom[1]
186200
except IndexError:
187-
pass # Makes it so empty strings don't crash the program
201+
pass
188202
except AttributeError:
189-
pass # Makes it so empty strings don't crash the program
203+
"""Makes it so empty strings don't crash the program"""
204+
pass
190205
if group_raw:
191206
for gr in group_raw:
192207
group.append(gr.text)
@@ -196,17 +211,17 @@ def get_schedule_data(
196211
section.attrs["id"],
197212
)
198213
):
199-
# Check for blocks
200-
for block in section:
201-
if type(block) == bs4.element.Tag:
214+
"""Check for blocks"""
215+
for block_part in section:
216+
if type(block_part) == bs4.element.Tag:
202217
event = None
203218
subject = None
204219
group_raw = None
205220
group = []
206221
teacher = None
207222
classroom = None
208223
teacher_classroom = None
209-
for img in block.select("img"):
224+
for img in block_part.select("img"):
210225
events_list = {
211226
"Odpadla ura": "cancelled",
212227
"Dogodek": "event",
@@ -227,15 +242,15 @@ def get_schedule_data(
227242
event = "unknown_event"
228243
try:
229244
subject = (
230-
block.find(class_="text14")
245+
block_part.find(class_="text14")
231246
.text.replace("\n", "")
232247
.replace("\t", "")
233248
)
234-
group_raw = block.find_all(
249+
group_raw = block_part.find_all(
235250
class_="text11 gray bold"
236251
)
237252
teacher_classroom = (
238-
block.find(class_="text11")
253+
block_part.find(class_="text11")
239254
.text.replace("\n", "")
240255
.replace("\t", "")
241256
.replace("\r", "")
@@ -246,7 +261,8 @@ def get_schedule_data(
246261
except IndexError:
247262
pass
248263
except AttributeError:
249-
pass # Makes it so empty strings don't crash the program
264+
"""Makes it so empty strings don't crash the program"""
265+
pass
250266
if group_raw:
251267
for gr in group_raw:
252268
group.append(gr.text)
@@ -256,12 +272,12 @@ def get_schedule_data(
256272
"classroom": classroom,
257273
"group": group,
258274
"event": event,
259-
"hour": hour_name,
275+
"hour": int(hour_num),
260276
"week_day": int(day_num),
261277
"hour_in_block": int(classes_in_hour),
262278
"date": date_formatted,
263279
}
264-
scraped_data[day_num][hour_name][
280+
scraped_data[day_num][hour_num][
265281
classes_in_hour
266282
] = data_out
267283
classes_in_hour += 1
@@ -273,20 +289,20 @@ def get_schedule_data(
273289
"classroom": classroom,
274290
"group": group,
275291
"event": event,
276-
"hour": hour_name,
292+
"hour": int(hour_num),
277293
"week_day": int(day_num),
278294
"hour_in_block": int(classes_in_hour),
279295
"date": date_formatted,
280296
}
281-
scraped_data[day_num][hour_name][
297+
scraped_data[day_num][hour_num][
282298
classes_in_hour
283299
] = data_out
284300
classes_in_hour += 1
285301
count2 += 1
286302
count += 1
287-
scraped_data["week_data"] = {"hour_times": [], "dates": [], "current_week": "", "class": ""}
288303
scraped_data["week_data"]["hour_times"] = hour_times
289304
scraped_data["week_data"]["dates"] = dates_formatted
290305
scraped_data["week_data"]["current_week"] = current_week
291306
scraped_data["week_data"]["class"] = current_class
307+
292308
return scraped_data

0 commit comments

Comments
 (0)