diff --git a/README.md b/README.md index e8412352..0c1b49d1 100644 --- a/README.md +++ b/README.md @@ -76,6 +76,7 @@ Currently the following service providers are supported: - [Abfall.IO / AbfallPlus.de](./doc/source/abfall_io.md) - [AbfallNavi.de (RegioIT.de)](./doc/source/abfallnavi_de.md) - [Abfallkalender Würzburg](./doc/source/wuerzburg_de.md) +- [Abfallwirtschaft Landkreis Harburg](./doc/source/aw_harburg_de.md) - [Abfallwirtschaft Rendsburg](./doc/source/awr_de.md) - [Abfallwirtschaft Stuttgart](./doc/source/stuttgart_de.md) - [Abfallwirtschaft Südholstein](./doc/source/awsh_de.md) diff --git a/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py b/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py new file mode 100644 index 00000000..d8478c62 --- /dev/null +++ b/custom_components/waste_collection_schedule/waste_collection_schedule/source/aw_harburg.py @@ -0,0 +1,203 @@ +import requests +import json +from waste_collection_schedule import Collection # type: ignore[attr-defined] +from waste_collection_schedule.service.ICS import ICS +from bs4 import BeautifulSoup + +TITLE = "AW Harburg" +DESCRIPTION = "Abfallwirtschaft Landkreis Harburg" +URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/" + +TEST_CASES = { + "CityWithTwoLevels": { + "district_level_1": "Hanstedt", + "district_level_2": "Evendorf", + }, + "CityWithThreeLevels": { + "district_level_1": "Buchholz", + "district_level_2": "Buchholz mit Steinbeck (ohne Reindorf)", + "district_level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2", + }, +} + +class Source: + def __init__(self, district_level_1, district_level_2, district_level_3=None): + self._district_level_1 = district_level_1 + self._district_level_2 = district_level_2 + self._district_level_3 = district_level_3 + self._ics = ICS() + + def fetch(self): + # Use a session to keep cookies and stuff + s = requests.Session() + + # Creat some fake header because for some reason people seem to believe it is bad + # to read public garbage collection data via a script + headers = { + 'Connection': 'keep-alive', + 'Cache-Control': 'max-age=0', + 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Opera";v="84"', + 'sec-ch-ua-mobile': '?0', + 'sec-ch-ua-platform': '"Windows"', + 'Upgrade-Insecure-Requests': '1', + 'Content-Type': 'application/x-www-form-urlencoded', + 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/84.0.4316.21', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', + 'Sec-Fetch-Site': 'same-origin', + 'Sec-Fetch-Mode': 'navigate', + 'Sec-Fetch-User': '?1', + 'Sec-Fetch-Dest': 'document', + 'Accept-Language': 'de-DE,de;q=0.9', + } + + # Get the IDs of the districts on the first level + # Double loading is on purpose because sometimes the webpage has an overlay + # which is gone on the second try in a session + response = s.get(URL, headers=headers) + if "Zur aufgerufenen Seite" in response.text: + response = s.get(URL, headers=headers) + if response.status_code != 200: + raise Exception( + "Error: failed to fetch first url: {}".format( + URL + ) + ) + soup = BeautifulSoup(response.text, features="html.parser") + select_content = soup.find_all("select", id="strukturEbene1") + soup = BeautifulSoup(str(select_content), features="html.parser") + options_content = soup.find_all("option") + level_1_ids = {} + for option in options_content: + # Ignore the "Bitte wählen..." + if option.get("value")!="0": + level_1_ids[option.text] = option.get("value") + if level_1_ids == {}: + raise Exception( + "Error: Level 1 Dictionary empty" + ) + if self._district_level_1 not in level_1_ids: + raise Exception( + "Error: District 1 is not in the dictionary: {}".format( + (self._district_level_1, level_1_ids) + ) + ) + + # Get the IDs of the districts on the second level + url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_1_ids[self._district_level_1] + '&ebene=1&portal=1&selected_ebene=0' + + response = s.get(url, headers=headers) + if response.status_code != 200: + raise Exception( + "Error: failed to fetch second url: {}".format( + url + ) + ) + soup = BeautifulSoup(response.text, features="html.parser") + select_content = soup.find_all("select", id="strukturEbene2") + soup = BeautifulSoup(str(select_content), features="html.parser") + options_content = soup.find_all("option") + level_2_ids = {} + for option in options_content: + # Ignore the "Bitte wählen..." + if option.get("value")!="0": + level_2_ids[option.text] = option.get("value") + if level_2_ids == {}: + raise Exception( + "Error: Level 2 Dictionary empty" + ) + if self._district_level_2 not in level_2_ids: + raise Exception( + "Error: District 2 is not in the dictionary: {}".format( + (self._district_level_2, level_2_ids) + ) + ) + + # Get the IDs of the third level - if applicable + if self._district_level_3 != None: + # Get the IDs of the districts on the third level + url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_2_ids[self._district_level_2] + '&ebene=2&portal=1&selected_ebene=0' + + response = s.get(url, headers=headers) + if response.status_code != 200: + raise Exception( + "Error: failed to fetch third url: {}".format( + url + ) + ) + soup = BeautifulSoup(response.text, features="html.parser") + select_content = soup.find_all("select", id="strukturEbene3") + soup = BeautifulSoup(str(select_content), features="html.parser") + options_content = soup.find_all("option") + level_3_ids = {} + for option in options_content: + # Ignore the "Bitte wählen..." + if option.get("value")!="0": + level_3_ids[option.text] = option.get("value") + if level_3_ids == {}: + raise Exception( + "Error: Level 3 Dictionary empty" + ) + if self._district_level_3 not in level_3_ids: + raise Exception( + "Error: District 3 is not in the dictionary: {}".format( + (self._district_level_3, level_3_ids) + ) + ) + + # Prepare data for the real web request + if self._district_level_3 != None: + url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_3_ids[self._district_level_3] + '&owner=20100' + else: + url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_2_ids[self._district_level_2] + '&owner=20100' + + response = s.get(url, headers=headers) + # Sometimes there is no garbage calendar available + if "Es sind keine Abfuhrbezirke hinterlegt." in response.text: + raise Exception( + "Error: \"Es sind keine Abfuhrbezirke hinterlegt.\" for \"" + self._district_level_3 + "\" please use different input data." + ) + soup = BeautifulSoup(response.text, features="html.parser") + links = soup.find_all("a") + ical_url = "" + for any_link in links: + if " als iCal" in any_link.text: + ical_url = any_link.get("href") + + if "ical.html" not in ical_url: + raise Exception( + "No ical Link in the result: " + str(links) + ) + + # Get the final data + response = s.post(ical_url, headers=headers) + + # Stop if something else as status code 200 is returned + if response.status_code != 200: + raise Exception( + "Error: failed to fetch ical_url: {}".format( + ical_url + ) + ) + + return self.fetch_ics(ical_url, headers=headers) + + def fetch_ics(self, url, headers={}): + r = requests.get(url, headers=headers) + + if not r.ok: + raise Exception( + "Error: failed to fetch url: {}".format( + url + ) + ) + + # Parse ics file, fix broken encoding + if r.encoding=="ISO-8859-1": + dates = self._ics.convert(r.text.encode("latin_1").decode("utf-8")) + else: + dates = self._ics.convert(r.text) + + entries = [] + for d in dates: + entries.append(Collection(d[0], d[1])) + return entries diff --git a/doc/source/aw_harburg_de.md b/doc/source/aw_harburg_de.md new file mode 100644 index 00000000..6e375734 --- /dev/null +++ b/doc/source/aw_harburg_de.md @@ -0,0 +1,126 @@ +# AW Harburg + +Support for schedules provided by [AW Landkreis Harburg](https://www.landkreis-harburg.de) located in Lower Saxony, Germany. + +## Configuration via configuration.yaml + +```yaml +waste_collection_schedule: + sources: + - name: aw_harburg + args: + district_level_1: "Hanstedt" + district_level_2: "Evendorf" +``` + +### Configuration Variables + +**district_level_1**
+*(string) (required)* + +**district_level_2**
+*(string) (required)* + +**district_level_3**
+*(string) (optional - depending on district_level_2)* + +## Example + +```yaml +waste_collection_schedule: + sources: + - name: aw_harburg + args: + district_level_1: "Buchholz" + district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)" + district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2" + customize: + - type: Biotonne + alias: Biomüll + show: true + - type: Grünabfall + alias: Grünabfall + show: true + - type: Gelber Sack + alias: Gelber Sack + show: true + - type: Hausmüll 14-täglich + alias: Hausmüll 2wö + show: true + - type: Hausmüll 4-wöchentlich + alias: Hausmüll 4wö + show: true + - type: Altpapier + alias: Papier + show: true +``` + +Use `sources.customize` to filter or rename the waste types: + +```yaml +waste_collection_schedule: + sources: + - name: aw_harburg + args: + district_level_1: "Buchholz" + district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)" + district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2" + customize: + - type: Biotonne + alias: Biomüll + show: true + - type: Grünabfall + alias: Grünabfall + show: true + - type: Gelber Sack + alias: Gelber Sack + show: true + - type: Hausmüll 14-täglich + alias: Hausmüll 2wö + show: true + - type: Hausmüll 4-wöchentlich + alias: Hausmüll 4wö + show: true + - type: Altpapier + alias: Papier + show: true + +sensor: + # Nächste Müllabholung + - platform: waste_collection_schedule + name: Nächste Leerung + + # Nächste Biomüll Leerung + - platform: waste_collection_schedule + name: Nächste Biomüll Leerung + types: Biomüll + + # Nächste Grünabfall Abholung + - platform: waste_collection_schedule + name: Nächste Grünabfall Abholung + types: Grünabfall + + # Nächste Gelber Sack Abholung + - platform: waste_collection_schedule + name: Nächste Gelber Sack Abholung + types: Gelber Sack + + # Nächste Hausmüll 14-täglich Leerung + - platform: waste_collection_schedule + name: Nächste Hausmüll 2wö Leerung + types: Hausmüll 2wö + + # Nächste Hausmüll 4-wöchentlich Leerung + - platform: waste_collection_schedule + name: Nächste Hausmüll 4wö Leerung + types: Hausmüll 4wö + + # Nächste Papier Leerung + - platform: waste_collection_schedule + name: Nächste Papier Leerung + types: Papier +``` + +## How to get the source arguments + +Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158" \ No newline at end of file