a_region_ch move to service file now using the ICS download

2026-03-21 03:04:09 +01:00 · 2024-08-20 17:30:42 +02:00
parent e2ab638657
commit 7986f93e2a
2 changed files with 216 additions and 123 deletions
--- a/custom_components/waste_collection_schedule/waste_collection_schedule/service/A_region_ch.py
+++ b/custom_components/waste_collection_schedule/waste_collection_schedule/service/A_region_ch.py
@@ -0,0 +1,183 @@
+from typing import Literal
+
+import requests
+from bs4 import BeautifulSoup
+from waste_collection_schedule.source.ics import Source as ICS
+
+SERVICES = {
+    "winterthur": "https://m.winterthur.ch",
+    "a_region": "https://www.a-region.ch",
+}
+SERVICES_LITERALS = Literal["winterthur", "a_region"]
+
+
+class A_region_ch:
+    def __init__(
+        self,
+        service: SERVICES_LITERALS,
+        region_url: str,
+        district: str | None = None,
+        regex: str | None = None,
+    ):
+        if service not in SERVICES:
+            raise Exception(f"service '{service}' not found")
+        self._base_url = SERVICES[service]
+
+        self._regex = regex
+
+        self._municipality_url = region_url
+        self._district = district
+
+    def fetch(self) -> list[ICS]:
+        waste_types = self.get_waste_types(self._municipality_url)
+
+        entries = []
+
+        for tour, link in waste_types.items():
+            entries += self.get_ICS_sources(link, tour)
+        return entries
+
+    def get_municipalities(self) -> dict[str, str]:
+        municipalities: dict[str, str] = {}
+
+        # get PHPSESSID
+        session = requests.session()
+        r = session.get(f"{self._base_url}")
+        r.raise_for_status()
+
+        # cookies = {'PHPSESSID': requests.utils.dict_from_cookiejar(r.cookies)['PHPSESSID']}
+
+        params: dict[str, str | int] = {"apid": "13875680", "apparentid": "4618613"}
+        r = session.get(f"{self._base_url}/index.php", params=params)
+        r.raise_for_status()
+        self.extract_municipalities(r.text, municipalities)
+
+        page = 1
+        while True:
+            params = {
+                "do": "searchFetchMore",
+                "hash": "606ee79ca61fc6eef434ab4fca0d5956",
+                "p": page,
+            }
+            headers = {
+                "cookie": "PHPSESSID=71v67j0et4ih04qa142d402ebm;"
+            }  # TODO: get cookie from first request
+            r = session.get(
+                f"{self._base_url}/appl/ajax/index.php", params=params, headers=headers
+            )
+            r.raise_for_status()
+            if r.text == "":
+                break
+            self.extract_municipalities(r.text, municipalities)
+            page = page + 1
+        return municipalities
+
+    def extract_municipalities(self, text: str, municipalities: dict[str, str]):
+        soup = BeautifulSoup(text, features="html.parser")
+        downloads = soup.find_all("a", href=True)
+        for download in downloads:
+            # href ::= "/index.hp"
+            href = download.get("href")
+            if "ref=search" in href:
+                for title in download.find_all("div", class_="title"):
+                    # title ::= "Abfallkalender Andwil"
+                    municipalities[title.string.removeprefix("Abfallkalender ")] = href
+
+    def get_waste_types(self, link: str) -> dict[str, str]:
+        if not link.startswith("http"):
+            link = f"{self._base_url}{link}"
+        r = requests.get(link)
+        r.raise_for_status()
+
+        waste_types = {}
+
+        soup = BeautifulSoup(r.text, features="html.parser")
+        downloads = soup.find_all("a", href=True)
+        for download in downloads:
+            # href ::= "/index.php?apid=12731252&amp;apparentid=5011362"
+            href = download.get("href")
+            if download.find("div", class_="badgeIcon") or download.find(
+                "img", class_="rowImg"
+            ):
+                titles = download.find_all("div", class_="title")
+                if "PDF" in titles:
+                    continue
+                titles = [title.string for title in titles]
+                if not titles:
+                    titles = [download.get_text(strip=True)]
+                for title in titles:
+                    # title ::= "Altmetall"
+                    waste_types[title] = href
+
+        return waste_types
+
+    def get_ICS_sources(self, link: str, tour: str) -> list[ICS]:
+        if not link.startswith("http"):
+            link = f"{self._base_url}{link}"
+        r = requests.get(link)
+        r.raise_for_status()
+
+        soup = BeautifulSoup(r.text, features="html.parser")
+
+        # check for additional districts
+        districts = {}
+        downloads = soup.find_all("a", href=True)
+        for download in downloads:
+            href = download.get("href")
+            if "apparentid" in href:
+                title = download.find("div", class_="title")
+                if title is not None:
+                    # additional district found ->
+                    district_name_split = title.string.split(": ")
+                    districts[
+                        district_name_split[1 if len(district_name_split) > 1 else 0]
+                    ] = href
+        if len(districts) > 0:
+            if len(districts) == 1:
+                # only one district found -> use it
+                return self.get_ICS_sources(list(districts.values())[0], tour)
+            if self._district is None:
+                raise Exception("district is missing")
+            if self._district not in districts:
+                raise Exception(f"district '{self._district}' not found")
+            return self.get_ICS_sources(districts[self._district], tour)
+
+        dates = list()
+
+        downloads = soup.find_all("a", href=True)
+        for download in downloads:
+            # href ::= "/appl/ics.php?apid=12731252&amp;from=2022-05-04%2013%3A00%3A00&amp;to=2022-05-04%2013%3A00%3A00"
+            href = download.get("href")
+            if href.startswith("webcal") and "ical.php" in href:
+                dates.append(ICS(url=href, regex=self._regex))
+
+        return dates
+
+
+def get_region_url_by_street(
+    service: SERVICES_LITERALS,
+    street: str,
+    search_url: str,
+    district: str | None = None,
+    regex: str | None = None,
+) -> A_region_ch:
+    r = requests.get(search_url, params={"q": street})
+    r.raise_for_status()
+
+    soup = BeautifulSoup(r.text, features="html.parser")
+    as_ = soup.select("a")
+    if len(as_) == 0:
+        raise Exception("No streets found")
+    streets = []
+    for a in as_:
+        href = a.get("href")
+        if not isinstance(href, str):
+            continue
+        streets.append(a.get_text(strip=True))
+
+        if a.get_text(strip=True).lower().replace(" ", "") == street.lower().replace(
+            " ", ""
+        ):
+            return A_region_ch(service, href, district, regex)
+
+    raise Exception("Street not found, use one of")
--- a/custom_components/waste_collection_schedule/waste_collection_schedule/source/a_region_ch.py
+++ b/custom_components/waste_collection_schedule/waste_collection_schedule/source/a_region_ch.py
@@ -1,9 +1,5 @@
-import datetime
-from urllib.parse import parse_qs, urlparse
-
-import requests
-from bs4 import BeautifulSoup
-from waste_collection_schedule import Collection  # type: ignore[attr-defined]
+from waste_collection_schedule import Collection
+from waste_collection_schedule.service.A_region_ch import A_region_ch

 TITLE = "A-Region"
 DESCRIPTION = "Source for A-Region, Switzerland waste collection."
@@ -21,7 +17,6 @@ TEST_CASES = {
    "Speicher": {"municipality": "Speicher"},
 }

-BASE_URL = "https://www.a-region.ch"

 MUNICIPALITIES = {
    "Andwil": "/index.php?ref=search&refid=13875680&apid=5011362",
@@ -67,128 +62,43 @@ class Source:
    def __init__(self, municipality, district=None):
        self._municipality = municipality
        self._district = district
+        if municipality not in MUNICIPALITIES:
+            raise Exception(f"municipality '{municipality}' not found")
+        self._municipality_url = MUNICIPALITIES[municipality]

-    def fetch(self):
-        # municipalities = self.get_municipalities()
-        municipalities = MUNICIPALITIES
-        if self._municipality not in municipalities:
-            raise Exception(f"municipality '{self._municipality}' not found")
+        self._ics_sources = []

-        waste_types = self.get_waste_types(municipalities[self._municipality])
+    def _get_ics_sources(self):
+        self._ics_sources = A_region_ch(
+            "a_region", self._municipality_url, self._district
+        ).fetch()
+
+    def fetch(self) -> list[Collection]:
+        fresh_sources = False
+        if not self._ics_sources:
+            fresh_sources = True
+            self._get_ics_sources()

        entries = []
-
-        for waste_type, link in waste_types.items():
-            dates = self.get_dates(link)
-
-            for d in dates:
-                entries.append(Collection(d, waste_type))
-
+        for source in self._ics_sources:
+            fresh_sources, e = self._get_dates(source, fresh_sources)
+            entries += e
        return entries

-    def get_municipalities(self):
-        municipalities = {}
+    def _get_dates(self, source, fresh=False) -> tuple[bool, list[Collection]]:
+        exception = None
+        try:
+            entries = source.fetch()
+        except Exception as e:
+            exception = e

-        # get PHPSESSID
-        session = requests.session()
-        r = session.get(f"{BASE_URL}")
-        r.raise_for_status()
+        if exception or not entries:
+            if fresh:
+                if exception:
+                    raise exception
+                return fresh, []

-        # cookies = {'PHPSESSID': requests.utils.dict_from_cookiejar(r.cookies)['PHPSESSID']}
+            self._get_ics_sources()
+            return self._get_dates(source, fresh=True)

-        params = {"apid": "13875680", "apparentid": "4618613"}
-        r = session.get(f"{BASE_URL}/index.php", params=params)
-        r.raise_for_status()
-        self.extract_municipalities(r.text, municipalities)
-
-        page = 1
-        while True:
-            params = {
-                "do": "searchFetchMore",
-                "hash": "606ee79ca61fc6eef434ab4fca0d5956",
-                "p": page,
-            }
-            headers = {
-                "cookie": "PHPSESSID=71v67j0et4ih04qa142d402ebm;"
-            }  # TODO: get cookie from first request
-            r = session.get(
-                f"{BASE_URL}/appl/ajax/index.php", params=params, headers=headers
-            )
-            r.raise_for_status()
-            if r.text == "":
-                break
-            self.extract_municipalities(r.text, municipalities)
-            page = page + 1
-        return municipalities
-
-    def extract_municipalities(self, text, municipalities):
-        soup = BeautifulSoup(text, features="html.parser")
-        downloads = soup.find_all("a", href=True)
-        for download in downloads:
-            # href ::= "/index.hp"
-            href = download.get("href")
-            if "ref=search" in href:
-                for title in download.find_all("div", class_="title"):
-                    # title ::= "Abfallkalender Andwil"
-                    municipalities[title.string.removeprefix("Abfallkalender ")] = href
-
-    def get_waste_types(self, link):
-        r = requests.get(f"{BASE_URL}{link}")
-        r.raise_for_status()
-
-        waste_types = {}
-
-        soup = BeautifulSoup(r.text, features="html.parser")
-        downloads = soup.find_all("a", href=True)
-        for download in downloads:
-            # href ::= "/index.php?apid=12731252&amp;apparentid=5011362"
-            href = download.get("href")
-            if "apparentid" in href:
-                for title in download.find_all("div", class_="title"):
-                    # title ::= "Altmetall"
-                    waste_types[title.string] = href
-
-        return waste_types
-
-    def get_dates(self, link):
-        r = requests.get(f"{BASE_URL}{link}")
-        r.raise_for_status()
-
-        soup = BeautifulSoup(r.text, features="html.parser")
-
-        # check for additional districts
-        districts = {}
-        downloads = soup.find_all("a", href=True)
-        for download in downloads:
-            href = download.get("href")
-            if "apparentid" in href:
-                title = download.find("div", class_="title")
-                if title is not None:
-                    # additional district found ->
-                    district_name_split = title.string.split(": ")
-                    districts[
-                        district_name_split[1 if len(district_name_split) > 1 else 0]
-                    ] = href
-        if len(districts) > 0:
-            if len(districts) == 1:
-                # only one district found -> use it
-                return self.get_dates(list(districts.values())[0])
-            if self._district is None:
-                raise Exception("district is missing")
-            if self._district not in districts:
-                raise Exception(f"district '{self._district}' not found")
-            return self.get_dates(districts[self._district])
-
-        dates = set()
-
-        downloads = soup.find_all("a", href=True)
-        for download in downloads:
-            # href ::= "/appl/ics.php?apid=12731252&amp;from=2022-05-04%2013%3A00%3A00&amp;to=2022-05-04%2013%3A00%3A00"
-            href = download.get("href")
-            if "ics.php" in href:
-                parsed = urlparse(href)
-                query = parse_qs(parsed.query)
-                date = datetime.datetime.strptime(query["from"][0], "%Y-%m-%d %H:%M:%S")
-                dates.add(date.date())
-
-        return dates
+        return fresh, entries