mirror of
https://github.com/sascha-hemi/hacs_waste_collection_schedule.git
synced 2026-03-21 03:04:09 +01:00
a_region_ch move to service file now using the ICS download
This commit is contained in:
@@ -0,0 +1,183 @@
|
||||
from typing import Literal
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from waste_collection_schedule.source.ics import Source as ICS
|
||||
|
||||
SERVICES = {
|
||||
"winterthur": "https://m.winterthur.ch",
|
||||
"a_region": "https://www.a-region.ch",
|
||||
}
|
||||
SERVICES_LITERALS = Literal["winterthur", "a_region"]
|
||||
|
||||
|
||||
class A_region_ch:
|
||||
def __init__(
|
||||
self,
|
||||
service: SERVICES_LITERALS,
|
||||
region_url: str,
|
||||
district: str | None = None,
|
||||
regex: str | None = None,
|
||||
):
|
||||
if service not in SERVICES:
|
||||
raise Exception(f"service '{service}' not found")
|
||||
self._base_url = SERVICES[service]
|
||||
|
||||
self._regex = regex
|
||||
|
||||
self._municipality_url = region_url
|
||||
self._district = district
|
||||
|
||||
def fetch(self) -> list[ICS]:
|
||||
waste_types = self.get_waste_types(self._municipality_url)
|
||||
|
||||
entries = []
|
||||
|
||||
for tour, link in waste_types.items():
|
||||
entries += self.get_ICS_sources(link, tour)
|
||||
return entries
|
||||
|
||||
def get_municipalities(self) -> dict[str, str]:
|
||||
municipalities: dict[str, str] = {}
|
||||
|
||||
# get PHPSESSID
|
||||
session = requests.session()
|
||||
r = session.get(f"{self._base_url}")
|
||||
r.raise_for_status()
|
||||
|
||||
# cookies = {'PHPSESSID': requests.utils.dict_from_cookiejar(r.cookies)['PHPSESSID']}
|
||||
|
||||
params: dict[str, str | int] = {"apid": "13875680", "apparentid": "4618613"}
|
||||
r = session.get(f"{self._base_url}/index.php", params=params)
|
||||
r.raise_for_status()
|
||||
self.extract_municipalities(r.text, municipalities)
|
||||
|
||||
page = 1
|
||||
while True:
|
||||
params = {
|
||||
"do": "searchFetchMore",
|
||||
"hash": "606ee79ca61fc6eef434ab4fca0d5956",
|
||||
"p": page,
|
||||
}
|
||||
headers = {
|
||||
"cookie": "PHPSESSID=71v67j0et4ih04qa142d402ebm;"
|
||||
} # TODO: get cookie from first request
|
||||
r = session.get(
|
||||
f"{self._base_url}/appl/ajax/index.php", params=params, headers=headers
|
||||
)
|
||||
r.raise_for_status()
|
||||
if r.text == "":
|
||||
break
|
||||
self.extract_municipalities(r.text, municipalities)
|
||||
page = page + 1
|
||||
return municipalities
|
||||
|
||||
def extract_municipalities(self, text: str, municipalities: dict[str, str]):
|
||||
soup = BeautifulSoup(text, features="html.parser")
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/index.hp"
|
||||
href = download.get("href")
|
||||
if "ref=search" in href:
|
||||
for title in download.find_all("div", class_="title"):
|
||||
# title ::= "Abfallkalender Andwil"
|
||||
municipalities[title.string.removeprefix("Abfallkalender ")] = href
|
||||
|
||||
def get_waste_types(self, link: str) -> dict[str, str]:
|
||||
if not link.startswith("http"):
|
||||
link = f"{self._base_url}{link}"
|
||||
r = requests.get(link)
|
||||
r.raise_for_status()
|
||||
|
||||
waste_types = {}
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/index.php?apid=12731252&apparentid=5011362"
|
||||
href = download.get("href")
|
||||
if download.find("div", class_="badgeIcon") or download.find(
|
||||
"img", class_="rowImg"
|
||||
):
|
||||
titles = download.find_all("div", class_="title")
|
||||
if "PDF" in titles:
|
||||
continue
|
||||
titles = [title.string for title in titles]
|
||||
if not titles:
|
||||
titles = [download.get_text(strip=True)]
|
||||
for title in titles:
|
||||
# title ::= "Altmetall"
|
||||
waste_types[title] = href
|
||||
|
||||
return waste_types
|
||||
|
||||
def get_ICS_sources(self, link: str, tour: str) -> list[ICS]:
|
||||
if not link.startswith("http"):
|
||||
link = f"{self._base_url}{link}"
|
||||
r = requests.get(link)
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
|
||||
# check for additional districts
|
||||
districts = {}
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
href = download.get("href")
|
||||
if "apparentid" in href:
|
||||
title = download.find("div", class_="title")
|
||||
if title is not None:
|
||||
# additional district found ->
|
||||
district_name_split = title.string.split(": ")
|
||||
districts[
|
||||
district_name_split[1 if len(district_name_split) > 1 else 0]
|
||||
] = href
|
||||
if len(districts) > 0:
|
||||
if len(districts) == 1:
|
||||
# only one district found -> use it
|
||||
return self.get_ICS_sources(list(districts.values())[0], tour)
|
||||
if self._district is None:
|
||||
raise Exception("district is missing")
|
||||
if self._district not in districts:
|
||||
raise Exception(f"district '{self._district}' not found")
|
||||
return self.get_ICS_sources(districts[self._district], tour)
|
||||
|
||||
dates = list()
|
||||
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/appl/ics.php?apid=12731252&from=2022-05-04%2013%3A00%3A00&to=2022-05-04%2013%3A00%3A00"
|
||||
href = download.get("href")
|
||||
if href.startswith("webcal") and "ical.php" in href:
|
||||
dates.append(ICS(url=href, regex=self._regex))
|
||||
|
||||
return dates
|
||||
|
||||
|
||||
def get_region_url_by_street(
|
||||
service: SERVICES_LITERALS,
|
||||
street: str,
|
||||
search_url: str,
|
||||
district: str | None = None,
|
||||
regex: str | None = None,
|
||||
) -> A_region_ch:
|
||||
r = requests.get(search_url, params={"q": street})
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
as_ = soup.select("a")
|
||||
if len(as_) == 0:
|
||||
raise Exception("No streets found")
|
||||
streets = []
|
||||
for a in as_:
|
||||
href = a.get("href")
|
||||
if not isinstance(href, str):
|
||||
continue
|
||||
streets.append(a.get_text(strip=True))
|
||||
|
||||
if a.get_text(strip=True).lower().replace(" ", "") == street.lower().replace(
|
||||
" ", ""
|
||||
):
|
||||
return A_region_ch(service, href, district, regex)
|
||||
|
||||
raise Exception("Street not found, use one of")
|
||||
@@ -1,9 +1,5 @@
|
||||
import datetime
|
||||
from urllib.parse import parse_qs, urlparse
|
||||
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
from waste_collection_schedule import Collection # type: ignore[attr-defined]
|
||||
from waste_collection_schedule import Collection
|
||||
from waste_collection_schedule.service.A_region_ch import A_region_ch
|
||||
|
||||
TITLE = "A-Region"
|
||||
DESCRIPTION = "Source for A-Region, Switzerland waste collection."
|
||||
@@ -21,7 +17,6 @@ TEST_CASES = {
|
||||
"Speicher": {"municipality": "Speicher"},
|
||||
}
|
||||
|
||||
BASE_URL = "https://www.a-region.ch"
|
||||
|
||||
MUNICIPALITIES = {
|
||||
"Andwil": "/index.php?ref=search&refid=13875680&apid=5011362",
|
||||
@@ -67,128 +62,43 @@ class Source:
|
||||
def __init__(self, municipality, district=None):
|
||||
self._municipality = municipality
|
||||
self._district = district
|
||||
if municipality not in MUNICIPALITIES:
|
||||
raise Exception(f"municipality '{municipality}' not found")
|
||||
self._municipality_url = MUNICIPALITIES[municipality]
|
||||
|
||||
def fetch(self):
|
||||
# municipalities = self.get_municipalities()
|
||||
municipalities = MUNICIPALITIES
|
||||
if self._municipality not in municipalities:
|
||||
raise Exception(f"municipality '{self._municipality}' not found")
|
||||
self._ics_sources = []
|
||||
|
||||
waste_types = self.get_waste_types(municipalities[self._municipality])
|
||||
def _get_ics_sources(self):
|
||||
self._ics_sources = A_region_ch(
|
||||
"a_region", self._municipality_url, self._district
|
||||
).fetch()
|
||||
|
||||
def fetch(self) -> list[Collection]:
|
||||
fresh_sources = False
|
||||
if not self._ics_sources:
|
||||
fresh_sources = True
|
||||
self._get_ics_sources()
|
||||
|
||||
entries = []
|
||||
|
||||
for waste_type, link in waste_types.items():
|
||||
dates = self.get_dates(link)
|
||||
|
||||
for d in dates:
|
||||
entries.append(Collection(d, waste_type))
|
||||
|
||||
for source in self._ics_sources:
|
||||
fresh_sources, e = self._get_dates(source, fresh_sources)
|
||||
entries += e
|
||||
return entries
|
||||
|
||||
def get_municipalities(self):
|
||||
municipalities = {}
|
||||
def _get_dates(self, source, fresh=False) -> tuple[bool, list[Collection]]:
|
||||
exception = None
|
||||
try:
|
||||
entries = source.fetch()
|
||||
except Exception as e:
|
||||
exception = e
|
||||
|
||||
# get PHPSESSID
|
||||
session = requests.session()
|
||||
r = session.get(f"{BASE_URL}")
|
||||
r.raise_for_status()
|
||||
if exception or not entries:
|
||||
if fresh:
|
||||
if exception:
|
||||
raise exception
|
||||
return fresh, []
|
||||
|
||||
# cookies = {'PHPSESSID': requests.utils.dict_from_cookiejar(r.cookies)['PHPSESSID']}
|
||||
self._get_ics_sources()
|
||||
return self._get_dates(source, fresh=True)
|
||||
|
||||
params = {"apid": "13875680", "apparentid": "4618613"}
|
||||
r = session.get(f"{BASE_URL}/index.php", params=params)
|
||||
r.raise_for_status()
|
||||
self.extract_municipalities(r.text, municipalities)
|
||||
|
||||
page = 1
|
||||
while True:
|
||||
params = {
|
||||
"do": "searchFetchMore",
|
||||
"hash": "606ee79ca61fc6eef434ab4fca0d5956",
|
||||
"p": page,
|
||||
}
|
||||
headers = {
|
||||
"cookie": "PHPSESSID=71v67j0et4ih04qa142d402ebm;"
|
||||
} # TODO: get cookie from first request
|
||||
r = session.get(
|
||||
f"{BASE_URL}/appl/ajax/index.php", params=params, headers=headers
|
||||
)
|
||||
r.raise_for_status()
|
||||
if r.text == "":
|
||||
break
|
||||
self.extract_municipalities(r.text, municipalities)
|
||||
page = page + 1
|
||||
return municipalities
|
||||
|
||||
def extract_municipalities(self, text, municipalities):
|
||||
soup = BeautifulSoup(text, features="html.parser")
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/index.hp"
|
||||
href = download.get("href")
|
||||
if "ref=search" in href:
|
||||
for title in download.find_all("div", class_="title"):
|
||||
# title ::= "Abfallkalender Andwil"
|
||||
municipalities[title.string.removeprefix("Abfallkalender ")] = href
|
||||
|
||||
def get_waste_types(self, link):
|
||||
r = requests.get(f"{BASE_URL}{link}")
|
||||
r.raise_for_status()
|
||||
|
||||
waste_types = {}
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/index.php?apid=12731252&apparentid=5011362"
|
||||
href = download.get("href")
|
||||
if "apparentid" in href:
|
||||
for title in download.find_all("div", class_="title"):
|
||||
# title ::= "Altmetall"
|
||||
waste_types[title.string] = href
|
||||
|
||||
return waste_types
|
||||
|
||||
def get_dates(self, link):
|
||||
r = requests.get(f"{BASE_URL}{link}")
|
||||
r.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(r.text, features="html.parser")
|
||||
|
||||
# check for additional districts
|
||||
districts = {}
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
href = download.get("href")
|
||||
if "apparentid" in href:
|
||||
title = download.find("div", class_="title")
|
||||
if title is not None:
|
||||
# additional district found ->
|
||||
district_name_split = title.string.split(": ")
|
||||
districts[
|
||||
district_name_split[1 if len(district_name_split) > 1 else 0]
|
||||
] = href
|
||||
if len(districts) > 0:
|
||||
if len(districts) == 1:
|
||||
# only one district found -> use it
|
||||
return self.get_dates(list(districts.values())[0])
|
||||
if self._district is None:
|
||||
raise Exception("district is missing")
|
||||
if self._district not in districts:
|
||||
raise Exception(f"district '{self._district}' not found")
|
||||
return self.get_dates(districts[self._district])
|
||||
|
||||
dates = set()
|
||||
|
||||
downloads = soup.find_all("a", href=True)
|
||||
for download in downloads:
|
||||
# href ::= "/appl/ics.php?apid=12731252&from=2022-05-04%2013%3A00%3A00&to=2022-05-04%2013%3A00%3A00"
|
||||
href = download.get("href")
|
||||
if "ics.php" in href:
|
||||
parsed = urlparse(href)
|
||||
query = parse_qs(parsed.query)
|
||||
date = datetime.datetime.strptime(query["from"][0], "%Y-%m-%d %H:%M:%S")
|
||||
dates.add(date.date())
|
||||
|
||||
return dates
|
||||
return fresh, entries
|
||||
|
||||
Reference in New Issue
Block a user