mirror of
https://github.com/sascha-hemi/hacs_waste_collection_schedule.git
synced 2026-03-21 04:06:03 +01:00
refactor aw_harburg_de
This commit is contained in:
@@ -1,203 +0,0 @@
|
||||
import requests
|
||||
import json
|
||||
from waste_collection_schedule import Collection # type: ignore[attr-defined]
|
||||
from waste_collection_schedule.service.ICS import ICS
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
TITLE = "AW Harburg"
|
||||
DESCRIPTION = "Abfallwirtschaft Landkreis Harburg"
|
||||
URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/"
|
||||
|
||||
TEST_CASES = {
|
||||
"CityWithTwoLevels": {
|
||||
"district_level_1": "Hanstedt",
|
||||
"district_level_2": "Evendorf",
|
||||
},
|
||||
"CityWithThreeLevels": {
|
||||
"district_level_1": "Buchholz",
|
||||
"district_level_2": "Buchholz mit Steinbeck (ohne Reindorf)",
|
||||
"district_level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2",
|
||||
},
|
||||
}
|
||||
|
||||
class Source:
|
||||
def __init__(self, district_level_1, district_level_2, district_level_3=None):
|
||||
self._district_level_1 = district_level_1
|
||||
self._district_level_2 = district_level_2
|
||||
self._district_level_3 = district_level_3
|
||||
self._ics = ICS()
|
||||
|
||||
def fetch(self):
|
||||
# Use a session to keep cookies and stuff
|
||||
s = requests.Session()
|
||||
|
||||
# Creat some fake header because for some reason people seem to believe it is bad
|
||||
# to read public garbage collection data via a script
|
||||
headers = {
|
||||
'Connection': 'keep-alive',
|
||||
'Cache-Control': 'max-age=0',
|
||||
'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="98", "Opera";v="84"',
|
||||
'sec-ch-ua-mobile': '?0',
|
||||
'sec-ch-ua-platform': '"Windows"',
|
||||
'Upgrade-Insecure-Requests': '1',
|
||||
'Content-Type': 'application/x-www-form-urlencoded',
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36 OPR/84.0.4316.21',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
||||
'Sec-Fetch-Site': 'same-origin',
|
||||
'Sec-Fetch-Mode': 'navigate',
|
||||
'Sec-Fetch-User': '?1',
|
||||
'Sec-Fetch-Dest': 'document',
|
||||
'Accept-Language': 'de-DE,de;q=0.9',
|
||||
}
|
||||
|
||||
# Get the IDs of the districts on the first level
|
||||
# Double loading is on purpose because sometimes the webpage has an overlay
|
||||
# which is gone on the second try in a session
|
||||
response = s.get(URL, headers=headers)
|
||||
if "Zur aufgerufenen Seite" in response.text:
|
||||
response = s.get(URL, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
"Error: failed to fetch first url: {}".format(
|
||||
URL
|
||||
)
|
||||
)
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
select_content = soup.find_all("select", id="strukturEbene1")
|
||||
soup = BeautifulSoup(str(select_content), features="html.parser")
|
||||
options_content = soup.find_all("option")
|
||||
level_1_ids = {}
|
||||
for option in options_content:
|
||||
# Ignore the "Bitte wählen..."
|
||||
if option.get("value")!="0":
|
||||
level_1_ids[option.text] = option.get("value")
|
||||
if level_1_ids == {}:
|
||||
raise Exception(
|
||||
"Error: Level 1 Dictionary empty"
|
||||
)
|
||||
if self._district_level_1 not in level_1_ids:
|
||||
raise Exception(
|
||||
"Error: District 1 is not in the dictionary: {}".format(
|
||||
(self._district_level_1, level_1_ids)
|
||||
)
|
||||
)
|
||||
|
||||
# Get the IDs of the districts on the second level
|
||||
url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_1_ids[self._district_level_1] + '&ebene=1&portal=1&selected_ebene=0'
|
||||
|
||||
response = s.get(url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
"Error: failed to fetch second url: {}".format(
|
||||
url
|
||||
)
|
||||
)
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
select_content = soup.find_all("select", id="strukturEbene2")
|
||||
soup = BeautifulSoup(str(select_content), features="html.parser")
|
||||
options_content = soup.find_all("option")
|
||||
level_2_ids = {}
|
||||
for option in options_content:
|
||||
# Ignore the "Bitte wählen..."
|
||||
if option.get("value")!="0":
|
||||
level_2_ids[option.text] = option.get("value")
|
||||
if level_2_ids == {}:
|
||||
raise Exception(
|
||||
"Error: Level 2 Dictionary empty"
|
||||
)
|
||||
if self._district_level_2 not in level_2_ids:
|
||||
raise Exception(
|
||||
"Error: District 2 is not in the dictionary: {}".format(
|
||||
(self._district_level_2, level_2_ids)
|
||||
)
|
||||
)
|
||||
|
||||
# Get the IDs of the third level - if applicable
|
||||
if self._district_level_3 != None:
|
||||
# Get the IDs of the districts on the third level
|
||||
url = 'https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html?parent=' + level_2_ids[self._district_level_2] + '&ebene=2&portal=1&selected_ebene=0'
|
||||
|
||||
response = s.get(url, headers=headers)
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
"Error: failed to fetch third url: {}".format(
|
||||
url
|
||||
)
|
||||
)
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
select_content = soup.find_all("select", id="strukturEbene3")
|
||||
soup = BeautifulSoup(str(select_content), features="html.parser")
|
||||
options_content = soup.find_all("option")
|
||||
level_3_ids = {}
|
||||
for option in options_content:
|
||||
# Ignore the "Bitte wählen..."
|
||||
if option.get("value")!="0":
|
||||
level_3_ids[option.text] = option.get("value")
|
||||
if level_3_ids == {}:
|
||||
raise Exception(
|
||||
"Error: Level 3 Dictionary empty"
|
||||
)
|
||||
if self._district_level_3 not in level_3_ids:
|
||||
raise Exception(
|
||||
"Error: District 3 is not in the dictionary: {}".format(
|
||||
(self._district_level_3, level_3_ids)
|
||||
)
|
||||
)
|
||||
|
||||
# Prepare data for the real web request
|
||||
if self._district_level_3 != None:
|
||||
url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_3_ids[self._district_level_3] + '&owner=20100'
|
||||
else:
|
||||
url = 'https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html?selected_ebene=' + level_2_ids[self._district_level_2] + '&owner=20100'
|
||||
|
||||
response = s.get(url, headers=headers)
|
||||
# Sometimes there is no garbage calendar available
|
||||
if "Es sind keine Abfuhrbezirke hinterlegt." in response.text:
|
||||
raise Exception(
|
||||
"Error: \"Es sind keine Abfuhrbezirke hinterlegt.\" for \"" + self._district_level_3 + "\" please use different input data."
|
||||
)
|
||||
soup = BeautifulSoup(response.text, features="html.parser")
|
||||
links = soup.find_all("a")
|
||||
ical_url = ""
|
||||
for any_link in links:
|
||||
if " als iCal" in any_link.text:
|
||||
ical_url = any_link.get("href")
|
||||
|
||||
if "ical.html" not in ical_url:
|
||||
raise Exception(
|
||||
"No ical Link in the result: " + str(links)
|
||||
)
|
||||
|
||||
# Get the final data
|
||||
response = s.post(ical_url, headers=headers)
|
||||
|
||||
# Stop if something else as status code 200 is returned
|
||||
if response.status_code != 200:
|
||||
raise Exception(
|
||||
"Error: failed to fetch ical_url: {}".format(
|
||||
ical_url
|
||||
)
|
||||
)
|
||||
|
||||
return self.fetch_ics(ical_url, headers=headers)
|
||||
|
||||
def fetch_ics(self, url, headers={}):
|
||||
r = requests.get(url, headers=headers)
|
||||
|
||||
if not r.ok:
|
||||
raise Exception(
|
||||
"Error: failed to fetch url: {}".format(
|
||||
url
|
||||
)
|
||||
)
|
||||
|
||||
# Parse ics file, fix broken encoding
|
||||
if r.encoding=="ISO-8859-1":
|
||||
dates = self._ics.convert(r.text.encode("latin_1").decode("utf-8"))
|
||||
else:
|
||||
dates = self._ics.convert(r.text)
|
||||
|
||||
entries = []
|
||||
for d in dates:
|
||||
entries.append(Collection(d[0], d[1]))
|
||||
return entries
|
||||
@@ -0,0 +1,134 @@
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from waste_collection_schedule import Collection # type: ignore[attr-defined]
|
||||
from waste_collection_schedule.service.ICS import ICS
|
||||
|
||||
TITLE = "AW Harburg"
|
||||
DESCRIPTION = "Abfallwirtschaft Landkreis Harburg"
|
||||
URL = "https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/"
|
||||
|
||||
TEST_CASES = {
|
||||
"CityWithTwoLevels": {"level_1": "Hanstedt", "level_2": "Evendorf"},
|
||||
"CityWithThreeLevels": {
|
||||
"level_1": "Buchholz",
|
||||
"level_2": "Buchholz mit Steinbeck (ohne Reindorf)",
|
||||
"level_3": "Seppenser Mühlenweg Haus-Nr. 1 / 2",
|
||||
},
|
||||
}
|
||||
|
||||
HEADERS = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64)",
|
||||
}
|
||||
|
||||
|
||||
class Source:
|
||||
def __init__(self, level_1, level_2, level_3=None):
|
||||
self._districts = [level_1, level_2, level_3]
|
||||
self._ics = ICS()
|
||||
|
||||
def fetch(self):
|
||||
# Use a session to keep cookies and stuff
|
||||
session = requests.Session()
|
||||
|
||||
# Get the IDs of the districts on the first level
|
||||
# Double loading is on purpose because sometimes the webpage has an overlay
|
||||
# which is gone on the second try in a session
|
||||
r = session.get(URL, headers=HEADERS)
|
||||
if "Zur aufgerufenen Seite" in r.text:
|
||||
r = session.get(URL, headers=HEADERS)
|
||||
if r.status_code != 200:
|
||||
raise Exception(f"Error: failed to fetch first url: {URL}")
|
||||
|
||||
# Get the IDs of the districts on the first level
|
||||
id = self.parse_level(r.text, 1)
|
||||
|
||||
# Get the IDs of the districts on the second level
|
||||
url = (
|
||||
"https://www.landkreis-harburg.de/ajax/abfall_gebiete_struktur_select.html"
|
||||
)
|
||||
params = {
|
||||
"parent": id,
|
||||
"ebene": 1,
|
||||
"portal": 1,
|
||||
"selected_ebene": 0,
|
||||
}
|
||||
r = session.get(url, params=params, headers=HEADERS)
|
||||
if r.status_code != 200:
|
||||
raise Exception(f"Error: failed to fetch second url: {url}")
|
||||
|
||||
# Get the IDs of the districts on the second level
|
||||
id = self.parse_level(r.text, 2)
|
||||
|
||||
# Get the IDs of the third level - if applicable
|
||||
if self._districts[3 - 1] is not None:
|
||||
# Get the IDs of the districts on the third level
|
||||
params = {
|
||||
"parent": id,
|
||||
"ebene": 2,
|
||||
"portal": 1,
|
||||
"selected_ebene": 0,
|
||||
}
|
||||
r = session.get(url, params=params, headers=HEADERS)
|
||||
if r.status_code != 200:
|
||||
raise Exception(f"Error: failed to fetch third url: {url}")
|
||||
|
||||
# Get the IDs of the districts on the third level
|
||||
id = self.parse_level(r.text, 3)
|
||||
|
||||
# Prepare data for the real web request
|
||||
url = "https://www.landkreis-harburg.de/abfallkalender/abfallkalender_struktur_daten_suche.html"
|
||||
params = {
|
||||
"selected_ebene": id,
|
||||
"owner": 20100,
|
||||
}
|
||||
r = session.get(url, params=params, headers=HEADERS)
|
||||
|
||||
# Sometimes there is no garbage calendar available
|
||||
if "Es sind keine Abfuhrbezirke hinterlegt." in r.text:
|
||||
raise Exception(
|
||||
f'Error: "Es sind keine Abfuhrbezirke hinterlegt." for "{self._districts[3-1]}". Please use different input data.'
|
||||
)
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
links = soup.find_all("a")
|
||||
ical_url = ""
|
||||
for any_link in links:
|
||||
if " als iCal" in any_link.text:
|
||||
ical_url = any_link.get("href")
|
||||
|
||||
if "ical.html" not in ical_url:
|
||||
raise Exception("No ical Link in the result: " + str(links))
|
||||
|
||||
# Get the final data
|
||||
r = requests.get(ical_url, headers=HEADERS)
|
||||
if not r.ok:
|
||||
raise Exception(f"Error: failed to fetch url: {ical_url}")
|
||||
|
||||
# Parse ics file
|
||||
dates = self._ics.convert(r.text)
|
||||
|
||||
entries = []
|
||||
for d in dates:
|
||||
entries.append(Collection(d[0], d[1]))
|
||||
return entries
|
||||
|
||||
def parse_level(self, response, level):
|
||||
soup = BeautifulSoup(response, features="html.parser")
|
||||
select_content = soup.find_all("select", id=f"strukturEbene{level}")
|
||||
soup = BeautifulSoup(str(select_content), features="html.parser")
|
||||
options_content = soup.find_all("option")
|
||||
level_ids = {}
|
||||
for option in options_content:
|
||||
# Ignore the "Bitte wählen..."
|
||||
if option.get("value") != "0":
|
||||
level_ids[option.text] = option.get("value")
|
||||
|
||||
if level_ids == {}:
|
||||
raise Exception(f"Error: Level {level} Dictionary empty")
|
||||
|
||||
if self._districts[level - 1] not in level_ids:
|
||||
raise Exception(
|
||||
f"Error: District {self._districts[level]} is not in the dictionary: {level_ids}"
|
||||
)
|
||||
|
||||
return level_ids[self._districts[level - 1]]
|
||||
@@ -9,118 +9,44 @@ waste_collection_schedule:
|
||||
sources:
|
||||
- name: aw_harburg
|
||||
args:
|
||||
district_level_1: "Hanstedt"
|
||||
district_level_2: "Evendorf"
|
||||
level_1: LEVEL_1
|
||||
level_2: LEVEL_2
|
||||
level_3: LEVEL_3
|
||||
```
|
||||
|
||||
### Configuration Variables
|
||||
|
||||
**district_level_1**<br>
|
||||
**level_1**<br>
|
||||
*(string) (required)*
|
||||
|
||||
**district_level_2**<br>
|
||||
**level_2**<br>
|
||||
*(string) (required)*
|
||||
|
||||
**district_level_3**<br>
|
||||
*(string) (optional - depending on district_level_2)*
|
||||
**level_3**<br>
|
||||
*(string) (optional - depending on level_2)*
|
||||
|
||||
## Example
|
||||
|
||||
```yaml
|
||||
waste_collection_schedule:
|
||||
sources:
|
||||
- name: aw_harburg
|
||||
- name: aw_harburg_de
|
||||
args:
|
||||
district_level_1: "Buchholz"
|
||||
district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
|
||||
district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
|
||||
customize:
|
||||
- type: Biotonne
|
||||
alias: Biomüll
|
||||
show: true
|
||||
- type: Grünabfall
|
||||
alias: Grünabfall
|
||||
show: true
|
||||
- type: Gelber Sack
|
||||
alias: Gelber Sack
|
||||
show: true
|
||||
- type: Hausmüll 14-täglich
|
||||
alias: Hausmüll 2wö
|
||||
show: true
|
||||
- type: Hausmüll 4-wöchentlich
|
||||
alias: Hausmüll 4wö
|
||||
show: true
|
||||
- type: Altpapier
|
||||
alias: Papier
|
||||
show: true
|
||||
level_1: "Hanstedt"
|
||||
level_2: "Evendorf"
|
||||
```
|
||||
|
||||
Use `sources.customize` to filter or rename the waste types:
|
||||
|
||||
```yaml
|
||||
waste_collection_schedule:
|
||||
sources:
|
||||
- name: aw_harburg
|
||||
- name: aw_harburg_de
|
||||
args:
|
||||
district_level_1: "Buchholz"
|
||||
district_level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
|
||||
district_level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
|
||||
customize:
|
||||
- type: Biotonne
|
||||
alias: Biomüll
|
||||
show: true
|
||||
- type: Grünabfall
|
||||
alias: Grünabfall
|
||||
show: true
|
||||
- type: Gelber Sack
|
||||
alias: Gelber Sack
|
||||
show: true
|
||||
- type: Hausmüll 14-täglich
|
||||
alias: Hausmüll 2wö
|
||||
show: true
|
||||
- type: Hausmüll 4-wöchentlich
|
||||
alias: Hausmüll 4wö
|
||||
show: true
|
||||
- type: Altpapier
|
||||
alias: Papier
|
||||
show: true
|
||||
|
||||
sensor:
|
||||
# Nächste Müllabholung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Leerung
|
||||
|
||||
# Nächste Biomüll Leerung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Biomüll Leerung
|
||||
types: Biomüll
|
||||
|
||||
# Nächste Grünabfall Abholung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Grünabfall Abholung
|
||||
types: Grünabfall
|
||||
|
||||
# Nächste Gelber Sack Abholung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Gelber Sack Abholung
|
||||
types: Gelber Sack
|
||||
|
||||
# Nächste Hausmüll 14-täglich Leerung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Hausmüll 2wö Leerung
|
||||
types: Hausmüll 2wö
|
||||
|
||||
# Nächste Hausmüll 4-wöchentlich Leerung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Hausmüll 4wö Leerung
|
||||
types: Hausmüll 4wö
|
||||
|
||||
# Nächste Papier Leerung
|
||||
- platform: waste_collection_schedule
|
||||
name: Nächste Papier Leerung
|
||||
types: Papier
|
||||
level_1: "Buchholz"
|
||||
level_2: "Buchholz mit Steinbeck (ohne Reindorf)"
|
||||
level_3: "Seppenser Mühlenweg Haus-Nr. 1 / 2"
|
||||
```
|
||||
|
||||
## How to get the source arguments
|
||||
|
||||
Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158"
|
||||
Check [AW Harburg Abfallkalender](https://www.landkreis-harburg.de/bauen-umwelt/abfallwirtschaft/abfallkalender/) if you need two or three levels of entries in the config. The strings need to be written in the exact same way as in the webinterface e.g. "Bremer Straße Haus-Nr. 93 - 197 / 78 - 158".
|
||||
|
||||
1
info.md
1
info.md
@@ -62,6 +62,7 @@ Currently the following service providers are supported:
|
||||
- [Abfall.IO / AbfallPlus.de](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfall_io.md)
|
||||
- [AbfallNavi.de (RegioIT.de)](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/abfallnavi_de.md)
|
||||
- [Abfallkalender Würzburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/wuerzburg_de.md)
|
||||
- [Abfallwirtschaft Landkreis Harburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/aw_harburg_de.md)
|
||||
- [Abfallwirtschaft Rendsburg](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awr_de.md)
|
||||
- [Abfallwirtschaft Stuttgart](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/stuttgart_de.md)
|
||||
- [Abfallwirtschaft Südholstein](https://github.com/mampfes/hacs_waste_collection_schedule/blob/master/doc/source/awsh_de.md)
|
||||
|
||||
Reference in New Issue
Block a user