refactor aw_harburg_de

This commit is contained in:
mampfes
2022-04-02 12:49:47 +02:00
parent fd26bf7b84
commit 9b90fd5a57
4 changed files with 150 additions and 292 deletions

View File

@@ -1,203 +0,0 @@
import requests
import json
from waste_collection_schedule import Collection # type: ignore[attr-defined]
from waste_collection_schedule.service.ICS import ICS
from bs4 import BeautifulSoup
TITLE = "AW Harburg"
DESCRIPTION = "Abfallwirtschaft Landkreis Harburg"
URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/"
TEST_CASES = {
"CityWithTwoLevels": {
"district_level_1": "Hanstedt",
"district_level_2": "Evendorf",
},
"CityWithThreeLevels": {
"district_level_1": "Buchholz",
"district_level_2": "Buchholz mit Steinbeck (ohne Reindorf)",
"district_level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2",
},
}
class Source:
def __init__(self, district_level_1, district_level_2, district_level_3=None):
self._district_level_1 = district_level_1
self._district_level_2 = district_level_2
self._district_level_3 = district_level_3
self._ics = ICS()
def fetch(self):
# Use a session to keep cookies and stuff
s = requests.Session()
# Creat some fake header because for some reason people seem to believe it is bad
# to read public garbage collection data via a script
headers = {
'Connection': 'keep-alive',
'Cache-Control': 'max-age=0',
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Opera";v="84"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'Upgrade-Insecure-Requests': '1',
'Content-Type': 'application/x-www-form-urlencoded',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/84.0.4316.21',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'navigate',
'Sec-Fetch-User': '?1',
'Sec-Fetch-Dest': 'document',
'Accept-Language': 'de-DE,de;q=0.9',
}
# Get the IDs of the districts on the first level
# Double loading is on purpose because sometimes the webpage has an overlay
# which is gone on the second try in a session
response = s.get(URL, headers=headers)
if "Zur aufgerufenen Seite" in response.text:
response = s.get(URL, headers=headers)
if response.status_code != 200:
raise Exception(
"Error: failed to fetch first url: {}".format(
URL
)
)
soup = BeautifulSoup(response.text, features="html.parser")
select_content = soup.find_all("select", id="strukturEbene1")
soup = BeautifulSoup(str(select_content), features="html.parser")
options_content = soup.find_all("option")
level_1_ids = {}
for option in options_content:
# Ignore the "Bitte wählen..."
if option.get("value")!="0":
level_1_ids[option.text] = option.get("value")
if level_1_ids == {}:
raise Exception(
"Error: Level 1 Dictionary empty"
)
if self._district_level_1 not in level_1_ids:
raise Exception(
"Error: District 1 is not in the dictionary: {}".format(
(self._district_level_1, level_1_ids)
)
)
# Get the IDs of the districts on the second level
url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_1_ids[self._district_level_1] + '&ebene=1&portal=1&selected_ebene=0'
response = s.get(url, headers=headers)
if response.status_code != 200:
raise Exception(
"Error: failed to fetch second url: {}".format(
url
)
)
soup = BeautifulSoup(response.text, features="html.parser")
select_content = soup.find_all("select", id="strukturEbene2")
soup = BeautifulSoup(str(select_content), features="html.parser")
options_content = soup.find_all("option")
level_2_ids = {}
for option in options_content:
# Ignore the "Bitte wählen..."
if option.get("value")!="0":
level_2_ids[option.text] = option.get("value")
if level_2_ids == {}:
raise Exception(
"Error: Level 2 Dictionary empty"
)
if self._district_level_2 not in level_2_ids:
raise Exception(
"Error: District 2 is not in the dictionary: {}".format(
(self._district_level_2, level_2_ids)
)
)
# Get the IDs of the third level - if applicable
if self._district_level_3 != None:
# Get the IDs of the districts on the third level
url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_2_ids[self._district_level_2] + '&ebene=2&portal=1&selected_ebene=0'
response = s.get(url, headers=headers)
if response.status_code != 200:
raise Exception(
"Error: failed to fetch third url: {}".format(
url
)
)
soup = BeautifulSoup(response.text, features="html.parser")
select_content = soup.find_all("select", id="strukturEbene3")
soup = BeautifulSoup(str(select_content), features="html.parser")
options_content = soup.find_all("option")
level_3_ids = {}
for option in options_content:
# Ignore the "Bitte wählen..."
if option.get("value")!="0":
level_3_ids[option.text] = option.get("value")
if level_3_ids == {}:
raise Exception(
"Error: Level 3 Dictionary empty"
)
if self._district_level_3 not in level_3_ids:
raise Exception(
"Error: District 3 is not in the dictionary: {}".format(
(self._district_level_3, level_3_ids)
)
)
# Prepare data for the real web request
if self._district_level_3 != None:
url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_3_ids[self._district_level_3] + '&owner=20100'
else:
url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_2_ids[self._district_level_2] + '&owner=20100'
response = s.get(url, headers=headers)
# Sometimes there is no garbage calendar available
if "Es sind keine Abfuhrbezirke hinterlegt." in response.text:
raise Exception(
"Error: \"Es sind keine Abfuhrbezirke hinterlegt.\" for \"" + self._district_level_3 + "\" please use different input data."
)
soup = BeautifulSoup(response.text, features="html.parser")
links = soup.find_all("a")
ical_url = ""
for any_link in links:
if " als iCal" in any_link.text:
ical_url = any_link.get("href")
if "ical.html" not in ical_url:
raise Exception(
"No ical Link in the result: " + str(links)
)
# Get the final data
response = s.post(ical_url, headers=headers)
# Stop if something else as status code 200 is returned
if response.status_code != 200:
raise Exception(
"Error: failed to fetch ical_url: {}".format(
ical_url
)
)
return self.fetch_ics(ical_url, headers=headers)
def fetch_ics(self, url, headers={}):
r = requests.get(url, headers=headers)
if not r.ok:
raise Exception(
"Error: failed to fetch url: {}".format(
url
)
)
# Parse ics file, fix broken encoding
if r.encoding=="ISO-8859-1":
dates = self._ics.convert(r.text.encode("latin_1").decode("utf-8"))
else:
dates = self._ics.convert(r.text)
entries = []
for d in dates:
entries.append(Collection(d[0], d[1]))
return entries

View File

@@ -0,0 +1,134 @@
import requests
from bs4 import BeautifulSoup
from waste_collection_schedule import Collection # type: ignore[attr-defined]
from waste_collection_schedule.service.ICS import ICS
TITLE = "AW Harburg"
DESCRIPTION = "Abfallwirtschaft Landkreis Harburg"
URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/"
TEST_CASES = {
"CityWithTwoLevels": {"level_1": "Hanstedt", "level_2": "Evendorf"},
"CityWithThreeLevels": {
"level_1": "Buchholz",
"level_2": "Buchholz mit Steinbeck (ohne Reindorf)",
"level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2",
},
}
HEADERS = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
}
class Source:
def __init__(self, level_1, level_2, level_3=None):
self._districts = [level_1, level_2, level_3]
self._ics = ICS()
def fetch(self):
# Use a session to keep cookies and stuff
session = requests.Session()
# Get the IDs of the districts on the first level
# Double loading is on purpose because sometimes the webpage has an overlay
# which is gone on the second try in a session
r = session.get(URL, headers=HEADERS)
if "Zur aufgerufenen Seite" in r.text:
r = session.get(URL, headers=HEADERS)
if r.status_code != 200:
raise Exception(f"Error: failed to fetch first url: {URL}")
# Get the IDs of the districts on the first level
id = self.parse_level(r.text, 1)
# Get the IDs of the districts on the second level
url = (
"https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html"
)
params = {
"parent": id,
"ebene": 1,
"portal": 1,
"selected_ebene": 0,
}
r = session.get(url, params=params, headers=HEADERS)
if r.status_code != 200:
raise Exception(f"Error: failed to fetch second url: {url}")
# Get the IDs of the districts on the second level
id = self.parse_level(r.text, 2)
# Get the IDs of the third level - if applicable
if self._districts[3 - 1] is not None:
# Get the IDs of the districts on the third level
params = {
"parent": id,
"ebene": 2,
"portal": 1,
"selected_ebene": 0,
}
r = session.get(url, params=params, headers=HEADERS)
if r.status_code != 200:
raise Exception(f"Error: failed to fetch third url: {url}")
# Get the IDs of the districts on the third level
id = self.parse_level(r.text, 3)
# Prepare data for the real web request
url = "https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html"
params = {
"selected_ebene": id,
"owner": 20100,
}
r = session.get(url, params=params, headers=HEADERS)
# Sometimes there is no garbage calendar available
if "Es sind keine Abfuhrbezirke hinterlegt." in r.text:
raise Exception(
f'Error: "Es sind keine Abfuhrbezirke hinterlegt." for "{self._districts[3-1]}". Please use different input data.'
)
soup = BeautifulSoup(r.text, features="html.parser")
links = soup.find_all("a")
ical_url = ""
for any_link in links:
if " als iCal" in any_link.text:
ical_url = any_link.get("href")
if "ical.html" not in ical_url:
raise Exception("No ical Link in the result: " + str(links))
# Get the final data
r = requests.get(ical_url, headers=HEADERS)
if not r.ok:
raise Exception(f"Error: failed to fetch url: {ical_url}")
# Parse ics file
dates = self._ics.convert(r.text)
entries = []
for d in dates:
entries.append(Collection(d[0], d[1]))
return entries
def parse_level(self, response, level):
soup = BeautifulSoup(response, features="html.parser")
select_content = soup.find_all("select", id=f"strukturEbene{level}")
soup = BeautifulSoup(str(select_content), features="html.parser")
options_content = soup.find_all("option")
level_ids = {}
for option in options_content:
# Ignore the "Bitte wählen..."
if option.get("value") != "0":
level_ids[option.text] = option.get("value")
if level_ids == {}:
raise Exception(f"Error: Level {level} Dictionary empty")
if self._districts[level - 1] not in level_ids:
raise Exception(
f"Error: District {self._districts[level]} is not in the dictionary: {level_ids}"
)
return level_ids[self._districts[level - 1]]

View File

@@ -9,118 +9,44 @@ waste_collection_schedule:
sources:
- name: aw_harburg
args:
district_level_1: "Hanstedt"
district_level_2: "Evendorf"
level_1: LEVEL_1
level_2: LEVEL_2
level_3: LEVEL_3
```
### Configuration Variables
**district_level_1**<br>
**level_1**<br>
*(string) (required)*
**district_level_2**<br>
**level_2**<br>
*(string) (required)*
**district_level_3**<br>
*(string) (optional - depending on district_level_2)*
**level_3**<br>
*(string) (optional - depending on level_2)*
## Example
```yaml
waste_collection_schedule:
sources:
- name: aw_harburg
- name: aw_harburg_de
args:
district_level_1: "Buchholz"
district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
customize:
- type: Biotonne
alias: Biomüll
show: true
- type: Grünabfall
alias: Grünabfall
show: true
- type: Gelber Sack
alias: Gelber Sack
show: true
- type: Hausmüll 14-täglich
alias: Hausmüll 2wö
show: true
- type: Hausmüll 4-wöchentlich
alias: Hausmüll 4wö
show: true
- type: Altpapier
alias: Papier
show: true
level_1: "Hanstedt"
level_2: "Evendorf"
```
Use `sources.customize` to filter or rename the waste types:
```yaml
waste_collection_schedule:
sources:
- name: aw_harburg
- name: aw_harburg_de
args:
district_level_1: "Buchholz"
district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
customize:
- type: Biotonne
alias: Biomüll
show: true
- type: Grünabfall
alias: Grünabfall
show: true
- type: Gelber Sack
alias: Gelber Sack
show: true
- type: Hausmüll 14-täglich
alias: Hausmüll 2wö
show: true
- type: Hausmüll 4-wöchentlich
alias: Hausmüll 4wö
show: true
- type: Altpapier
alias: Papier
show: true
sensor:
# Nächste Müllabholung
- platform: waste_collection_schedule
name: Nächste Leerung
# Nächste Biomüll Leerung
- platform: waste_collection_schedule
name: Nächste Biomüll Leerung
types: Biomüll
# Nächste Grünabfall Abholung
- platform: waste_collection_schedule
name: Nächste Grünabfall Abholung
types: Grünabfall
# Nächste Gelber Sack Abholung
- platform: waste_collection_schedule
name: Nächste Gelber Sack Abholung
types: Gelber Sack
# Nächste Hausmüll 14-täglich Leerung
- platform: waste_collection_schedule
name: Nächste Hausmüll 2wö Leerung
types: Hausmüll 2wö
# Nächste Hausmüll 4-wöchentlich Leerung
- platform: waste_collection_schedule
name: Nächste Hausmüll 4wö Leerung
types: Hausmüll 4wö
# Nächste Papier Leerung
- platform: waste_collection_schedule
name: Nächste Papier Leerung
types: Papier
level_1: "Buchholz"
level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
```
## How to get the source arguments
Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158"
Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158".

View File

@@ -62,6 +62,7 @@ Currently the following service providers are supported:
- [Abfall.IO / AbfallPlus.de](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfall_io.md)
- [AbfallNavi.de (RegioIT.de)](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfallnavi_de.md)
- [Abfallkalender Würzburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/wuerzburg_de.md)
- [Abfallwirtschaft Landkreis Harburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/aw_harburg_de.md)
- [Abfallwirtschaft Rendsburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awr_de.md)
- [Abfallwirtschaft Stuttgart](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/stuttgart_de.md)
- [Abfallwirtschaft Südholstein](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awsh_de.md)