diff --git a/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py b/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py deleted file mode 100644 index d8478c62..00000000 --- a/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py +++ /dev/null @@ -1,203 +0,0 @@ -import requests -import json -from waste_collection_schedule import Collection # type: ignore[attr-defined] -from waste_collection_schedule.service.ICS import ICS -from bs4 import BeautifulSoup - -TITLE = "AW Harburg" -DESCRIPTION = "Abfallwirtschaft Landkreis Harburg" -URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/" - -TEST_CASES = { - "CityWithTwoLevels": { - "district_level_1": "Hanstedt", - "district_level_2": "Evendorf", - }, - "CityWithThreeLevels": { - "district_level_1": "Buchholz", - "district_level_2": "Buchholz mit Steinbeck (ohne Reindorf)", - "district_level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2", - }, -} - -class Source: - def __init__(self, district_level_1, district_level_2, district_level_3=None): - self._district_level_1 = district_level_1 - self._district_level_2 = district_level_2 - self._district_level_3 = district_level_3 - self._ics = ICS() - - def fetch(self): - # Use a session to keep cookies and stuff - s = requests.Session() - - # Creat some fake header because for some reason people seem to believe it is bad - # to read public garbage collection data via a script - headers = { - 'Connection': 'keep-alive', - 'Cache-Control': 'max-age=0', - 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Opera";v="84"', - 'sec-ch-ua-mobile': '?0', - 'sec-ch-ua-platform': '"Windows"', - 'Upgrade-Insecure-Requests': '1', - 'Content-Type': 'application/x-www-form-urlencoded', - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/84.0.4316.21', - 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', - 'Sec-Fetch-Site': 'same-origin', - 'Sec-Fetch-Mode': 'navigate', - 'Sec-Fetch-User': '?1', - 'Sec-Fetch-Dest': 'document', - 'Accept-Language': 'de-DE,de;q=0.9', - } - - # Get the IDs of the districts on the first level - # Double loading is on purpose because sometimes the webpage has an overlay - # which is gone on the second try in a session - response = s.get(URL, headers=headers) - if "Zur aufgerufenen Seite" in response.text: - response = s.get(URL, headers=headers) - if response.status_code != 200: - raise Exception( - "Error: failed to fetch first url: {}".format( - URL - ) - ) - soup = BeautifulSoup(response.text, features="html.parser") - select_content = soup.find_all("select", id="strukturEbene1") - soup = BeautifulSoup(str(select_content), features="html.parser") - options_content = soup.find_all("option") - level_1_ids = {} - for option in options_content: - # Ignore the "Bitte wählen..." - if option.get("value")!="0": - level_1_ids[option.text] = option.get("value") - if level_1_ids == {}: - raise Exception( - "Error: Level 1 Dictionary empty" - ) - if self._district_level_1 not in level_1_ids: - raise Exception( - "Error: District 1 is not in the dictionary: {}".format( - (self._district_level_1, level_1_ids) - ) - ) - - # Get the IDs of the districts on the second level - url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_1_ids[self._district_level_1] + '&ebene=1&portal=1&selected_ebene=0' - - response = s.get(url, headers=headers) - if response.status_code != 200: - raise Exception( - "Error: failed to fetch second url: {}".format( - url - ) - ) - soup = BeautifulSoup(response.text, features="html.parser") - select_content = soup.find_all("select", id="strukturEbene2") - soup = BeautifulSoup(str(select_content), features="html.parser") - options_content = soup.find_all("option") - level_2_ids = {} - for option in options_content: - # Ignore the "Bitte wählen..." - if option.get("value")!="0": - level_2_ids[option.text] = option.get("value") - if level_2_ids == {}: - raise Exception( - "Error: Level 2 Dictionary empty" - ) - if self._district_level_2 not in level_2_ids: - raise Exception( - "Error: District 2 is not in the dictionary: {}".format( - (self._district_level_2, level_2_ids) - ) - ) - - # Get the IDs of the third level - if applicable - if self._district_level_3 != None: - # Get the IDs of the districts on the third level - url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_2_ids[self._district_level_2] + '&ebene=2&portal=1&selected_ebene=0' - - response = s.get(url, headers=headers) - if response.status_code != 200: - raise Exception( - "Error: failed to fetch third url: {}".format( - url - ) - ) - soup = BeautifulSoup(response.text, features="html.parser") - select_content = soup.find_all("select", id="strukturEbene3") - soup = BeautifulSoup(str(select_content), features="html.parser") - options_content = soup.find_all("option") - level_3_ids = {} - for option in options_content: - # Ignore the "Bitte wählen..." - if option.get("value")!="0": - level_3_ids[option.text] = option.get("value") - if level_3_ids == {}: - raise Exception( - "Error: Level 3 Dictionary empty" - ) - if self._district_level_3 not in level_3_ids: - raise Exception( - "Error: District 3 is not in the dictionary: {}".format( - (self._district_level_3, level_3_ids) - ) - ) - - # Prepare data for the real web request - if self._district_level_3 != None: - url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_3_ids[self._district_level_3] + '&owner=20100' - else: - url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_2_ids[self._district_level_2] + '&owner=20100' - - response = s.get(url, headers=headers) - # Sometimes there is no garbage calendar available - if "Es sind keine Abfuhrbezirke hinterlegt." in response.text: - raise Exception( - "Error: \"Es sind keine Abfuhrbezirke hinterlegt.\" for \"" + self._district_level_3 + "\" please use different input data." - ) - soup = BeautifulSoup(response.text, features="html.parser") - links = soup.find_all("a") - ical_url = "" - for any_link in links: - if " als iCal" in any_link.text: - ical_url = any_link.get("href") - - if "ical.html" not in ical_url: - raise Exception( - "No ical Link in the result: " + str(links) - ) - - # Get the final data - response = s.post(ical_url, headers=headers) - - # Stop if something else as status code 200 is returned - if response.status_code != 200: - raise Exception( - "Error: failed to fetch ical_url: {}".format( - ical_url - ) - ) - - return self.fetch_ics(ical_url, headers=headers) - - def fetch_ics(self, url, headers={}): - r = requests.get(url, headers=headers) - - if not r.ok: - raise Exception( - "Error: failed to fetch url: {}".format( - url - ) - ) - - # Parse ics file, fix broken encoding - if r.encoding=="ISO-8859-1": - dates = self._ics.convert(r.text.encode("latin_1").decode("utf-8")) - else: - dates = self._ics.convert(r.text) - - entries = [] - for d in dates: - entries.append(Collection(d[0], d[1])) - return entries diff --git a/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg_de.py b/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg_de.py new file mode 100644 index 00000000..54bec58f --- /dev/null +++ b/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg_de.py @@ -0,0 +1,134 @@ +import requests +from bs4 import BeautifulSoup +from waste_collection_schedule import Collection # type: ignore[attr-defined] +from waste_collection_schedule.service.ICS import ICS + +TITLE = "AW Harburg" +DESCRIPTION = "Abfallwirtschaft Landkreis Harburg" +URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/" + +TEST_CASES = { + "CityWithTwoLevels": {"level_1": "Hanstedt", "level_2": "Evendorf"}, + "CityWithThreeLevels": { + "level_1": "Buchholz", + "level_2": "Buchholz mit Steinbeck (ohne Reindorf)", + "level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2", + }, +} + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)", +} + + +class Source: + def __init__(self, level_1, level_2, level_3=None): + self._districts = [level_1, level_2, level_3] + self._ics = ICS() + + def fetch(self): + # Use a session to keep cookies and stuff + session = requests.Session() + + # Get the IDs of the districts on the first level + # Double loading is on purpose because sometimes the webpage has an overlay + # which is gone on the second try in a session + r = session.get(URL, headers=HEADERS) + if "Zur aufgerufenen Seite" in r.text: + r = session.get(URL, headers=HEADERS) + if r.status_code != 200: + raise Exception(f"Error: failed to fetch first url: {URL}") + + # Get the IDs of the districts on the first level + id = self.parse_level(r.text, 1) + + # Get the IDs of the districts on the second level + url = ( + "https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html" + ) + params = { + "parent": id, + "ebene": 1, + "portal": 1, + "selected_ebene": 0, + } + r = session.get(url, params=params, headers=HEADERS) + if r.status_code != 200: + raise Exception(f"Error: failed to fetch second url: {url}") + + # Get the IDs of the districts on the second level + id = self.parse_level(r.text, 2) + + # Get the IDs of the third level - if applicable + if self._districts[3 - 1] is not None: + # Get the IDs of the districts on the third level + params = { + "parent": id, + "ebene": 2, + "portal": 1, + "selected_ebene": 0, + } + r = session.get(url, params=params, headers=HEADERS) + if r.status_code != 200: + raise Exception(f"Error: failed to fetch third url: {url}") + + # Get the IDs of the districts on the third level + id = self.parse_level(r.text, 3) + + # Prepare data for the real web request + url = "https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html" + params = { + "selected_ebene": id, + "owner": 20100, + } + r = session.get(url, params=params, headers=HEADERS) + + # Sometimes there is no garbage calendar available + if "Es sind keine Abfuhrbezirke hinterlegt." in r.text: + raise Exception( + f'Error: "Es sind keine Abfuhrbezirke hinterlegt." for "{self._districts[3-1]}". Please use different input data.' + ) + + soup = BeautifulSoup(r.text, features="html.parser") + links = soup.find_all("a") + ical_url = "" + for any_link in links: + if " als iCal" in any_link.text: + ical_url = any_link.get("href") + + if "ical.html" not in ical_url: + raise Exception("No ical Link in the result: " + str(links)) + + # Get the final data + r = requests.get(ical_url, headers=HEADERS) + if not r.ok: + raise Exception(f"Error: failed to fetch url: {ical_url}") + + # Parse ics file + dates = self._ics.convert(r.text) + + entries = [] + for d in dates: + entries.append(Collection(d[0], d[1])) + return entries + + def parse_level(self, response, level): + soup = BeautifulSoup(response, features="html.parser") + select_content = soup.find_all("select", id=f"strukturEbene{level}") + soup = BeautifulSoup(str(select_content), features="html.parser") + options_content = soup.find_all("option") + level_ids = {} + for option in options_content: + # Ignore the "Bitte wählen..." + if option.get("value") != "0": + level_ids[option.text] = option.get("value") + + if level_ids == {}: + raise Exception(f"Error: Level {level} Dictionary empty") + + if self._districts[level - 1] not in level_ids: + raise Exception( + f"Error: District {self._districts[level]} is not in the dictionary: {level_ids}" + ) + + return level_ids[self._districts[level - 1]] diff --git a/doc/source/aw_harburg_de.md b/doc/source/aw_harburg_de.md index 6e375734..b042ba29 100644 --- a/doc/source/aw_harburg_de.md +++ b/doc/source/aw_harburg_de.md @@ -9,118 +9,44 @@ waste_collection_schedule: sources: - name: aw_harburg args: - district_level_1: "Hanstedt" - district_level_2: "Evendorf" + level_1: LEVEL_1 + level_2: LEVEL_2 + level_3: LEVEL_3 ``` ### Configuration Variables -**district_level_1**
+**level_1**
*(string) (required)* -**district_level_2**
+**level_2**
*(string) (required)* -**district_level_3**
-*(string) (optional - depending on district_level_2)* +**level_3**
+*(string) (optional - depending on level_2)* ## Example ```yaml waste_collection_schedule: sources: - - name: aw_harburg + - name: aw_harburg_de args: - district_level_1: "Buchholz" - district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)" - district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2" - customize: - - type: Biotonne - alias: Biomüll - show: true - - type: Grünabfall - alias: Grünabfall - show: true - - type: Gelber Sack - alias: Gelber Sack - show: true - - type: Hausmüll 14-täglich - alias: Hausmüll 2wö - show: true - - type: Hausmüll 4-wöchentlich - alias: Hausmüll 4wö - show: true - - type: Altpapier - alias: Papier - show: true + level_1: "Hanstedt" + level_2: "Evendorf" ``` -Use `sources.customize` to filter or rename the waste types: ```yaml waste_collection_schedule: sources: - - name: aw_harburg + - name: aw_harburg_de args: - district_level_1: "Buchholz" - district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)" - district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2" - customize: - - type: Biotonne - alias: Biomüll - show: true - - type: Grünabfall - alias: Grünabfall - show: true - - type: Gelber Sack - alias: Gelber Sack - show: true - - type: Hausmüll 14-täglich - alias: Hausmüll 2wö - show: true - - type: Hausmüll 4-wöchentlich - alias: Hausmüll 4wö - show: true - - type: Altpapier - alias: Papier - show: true - -sensor: - # Nächste Müllabholung - - platform: waste_collection_schedule - name: Nächste Leerung - - # Nächste Biomüll Leerung - - platform: waste_collection_schedule - name: Nächste Biomüll Leerung - types: Biomüll - - # Nächste Grünabfall Abholung - - platform: waste_collection_schedule - name: Nächste Grünabfall Abholung - types: Grünabfall - - # Nächste Gelber Sack Abholung - - platform: waste_collection_schedule - name: Nächste Gelber Sack Abholung - types: Gelber Sack - - # Nächste Hausmüll 14-täglich Leerung - - platform: waste_collection_schedule - name: Nächste Hausmüll 2wö Leerung - types: Hausmüll 2wö - - # Nächste Hausmüll 4-wöchentlich Leerung - - platform: waste_collection_schedule - name: Nächste Hausmüll 4wö Leerung - types: Hausmüll 4wö - - # Nächste Papier Leerung - - platform: waste_collection_schedule - name: Nächste Papier Leerung - types: Papier + level_1: "Buchholz" + level_2: "Buchholz mit Steinbeck (ohne Reindorf)" + level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2" ``` ## How to get the source arguments -Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158" \ No newline at end of file +Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158". diff --git a/info.md b/info.md index 0c9f140b..3396c125 100644 --- a/info.md +++ b/info.md @@ -62,6 +62,7 @@ Currently the following service providers are supported: - [Abfall.IO / AbfallPlus.de](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfall_io.md) - [AbfallNavi.de (RegioIT.de)](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfallnavi_de.md) - [Abfallkalender Würzburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/wuerzburg_de.md) +- [Abfallwirtschaft Landkreis Harburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/aw_harburg_de.md) - [Abfallwirtschaft Rendsburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awr_de.md) - [Abfallwirtschaft Stuttgart](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/stuttgart_de.md) - [Abfallwirtschaft Südholstein](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awsh_de.md)