From a4176e2402d9cb78ff70aadf49141d116e5c6d37 Mon Sep 17 00:00:00 2001
From: 5ila5 <5ila5@users.noreply.github.com>
Date: Mon, 8 Jul 2024 18:08:28 +0200
Subject: [PATCH] rewrite of wyreforestdc_gov_uk

---
 .../source/wyreforestdc_gov_uk.py             | 181 ++++++++++++------
 doc/source/wyreforestdc_gov_uk.md             |  52 +++--
 2 files changed, 146 insertions(+), 87 deletions(-)

diff --git a/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py b/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py
index 2251e747..85dd38ae 100644
--- a/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py
+++ b/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py
@@ -1,23 +1,26 @@
-import datetime
 import logging
+import re
+from datetime import date, timedelta
 
 import requests
 from bs4 import BeautifulSoup
-from waste_collection_schedule import Collection
+from waste_collection_schedule import Collection  # type: ignore[attr-defined]
 
 TITLE = "Wyre Forest District Council"
 DESCRIPTION = "Source for wyreforestdc.gov.uk, Wyre Forest District Council, UK"
 URL = "https://www.wyreforestdc.gov.uk"
 
 TEST_CASES = {
-    "2 Kinver Avenue, Kidderminster": {"uprn": 100120731673},
-    "14 Forestry Houses, Callow Hill": {"post_code": "DY14 9XQ", "number": 14},
-    "The Park, Stourbridge": {"post_code": "DY9 0EX", "name": "The Park"},
+    "2 Kinver Avenue, Kidderminster": {
+        "street": "hilltop avenue",
+        "town": "BEWDLEY",
+        "garden_cutomer": "308072",
+    },
 }
 
 API_URLS = {
-    "address_search": "https://forms.wyreforestdc.gov.uk/bindays/",
-    "collection": "https://forms.wyreforestdc.gov.uk/bindays/Home/Details",
+    "waste": "https://forms.wyreforestdc.gov.uk/querybin.asp",
+    "garden_waste": "https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details",
 }
 
 ICON_MAP = {
@@ -28,64 +31,126 @@ ICON_MAP = {
 
 DAYS = ["MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY", "SUNDAY"]
 
+# Next Rubbish Collection
+REGEX_GET_BIN_TYPE = re.compile(r"Next (.*?) Collection")
+# collection is on a WEDNESDAY and will be collected on the same week as your rubbish bin collection
+REGEX_GET_GARDEN_DAY = re.compile(
+    r"collection is on a\s*(.*?)\s*and will be collected on the same week as"
+)
+REGEX_GET_GARDEN_SAME_WEEK_AS = re.compile(
+    r"collected on the same week as your\s*(.*?)\s*(bin)?\s*collection"
+)
+
 _LOGGER = logging.getLogger(__name__)
 
 
+def get_date_by_weekday(weekday: str) -> date:
+    this_week = re.match("This (.*?)$", weekday, re.IGNORECASE)
+    next_week = re.match("Next (.*?)$", weekday, re.IGNORECASE)
+    if this_week:
+        weekday_idx = DAYS.index(this_week.group(1).upper())
+        offset = 0
+    elif next_week:
+        weekday_idx = DAYS.index(next_week.group(1).upper())
+        offset = 7
+    else:
+        raise ValueError(f"Invalid weekday: {weekday}")
+
+    d = date.today() + timedelta(days=offset)
+    while d.weekday() != weekday_idx:
+        d += timedelta(days=1)
+    return d
+
+
+def predict_next_collections(first_date: date, day_interval: int = 14):
+    return [first_date + timedelta(days=i * day_interval) for i in range(5)]
+
+
 class Source:
-    def __init__(self, post_code=None, number=None, name=None, uprn=None):
-        self._post_code = post_code
-        self._number = number
-        self._name = name
-        self._uprn = uprn
+    def __init__(self, street: str, town: str, garden_cutomer: str | int | None = None):
+        self._street = street.upper().strip()
+        self._town = town.upper().strip()
+        self._garden_cutomer = str(garden_cutomer).strip() if garden_cutomer else None
 
-    def fetch(self):
-        s = requests.Session()
-
-        if not self._uprn:
-            # look up the UPRN for the address
-            payload = {"searchTerm": self._post_code}
-            r = s.post(str(API_URLS["address_search"]), data=payload)
-
-            soup = BeautifulSoup(r.text, features="html.parser")
-            propertyUprns = soup.find("select", {"id": "UPRN"}).findAll("option")
-            for match in propertyUprns:
-                if self._name:
-                    if (
-                        match.text.strip()
-                        .capitalize()
-                        .startswith(self._name.capitalize())
-                    ):
-                        self._uprn = match["value"]
-                if self._number:
-                    if match.text.strip().startswith(str(self._number)):
-                        self._uprn = match["value"]
-
-        # GET request returns schedule for matching uprn
-        payload = {"UPRN": self._uprn}
-        r = s.post(str(API_URLS["collection"]), data=payload)
+    def get_garden_waste(self, type_to_day: dict[str, str]) -> list[Collection]:
+        data = {
+            "CUST_No": self._garden_cutomer,
+        }
+        r = requests.post(API_URLS["garden_waste"], data=data)
         r.raise_for_status()
-
-        entries = []
-
-        # Extract waste types and dates from responseContent
         soup = BeautifulSoup(r.text, "html.parser")
-        x = soup.findAll("p")
-        for i in x:  # ignores elements containing address and marketing message
-            if "this week is a " in i.text:
-                for round_type in ICON_MAP:
-                    if round_type in i.text:
-                        dayRaw = i.find("strong")
-                        dayName = dayRaw.contents[0].strip()
-                        d = datetime.date.today()
-                        nextDate = d + datetime.timedelta(
-                            (DAYS.index(dayName) + 1 - d.isoweekday()) % 7
-                        )
-                        entries.append(
-                            Collection(
-                                date=nextDate,
-                                t=round_type,
-                                icon=ICON_MAP.get(round_type),
-                            )
+
+        day = REGEX_GET_GARDEN_DAY.search(soup.text)
+        same_week_as = REGEX_GET_GARDEN_SAME_WEEK_AS.search(soup.text)
+        if not day or not same_week_as:
+            raise ValueError(
+                f"Could not find garden waste collection days: {day} {same_week_as}"
+            )
+
+        relevant_coll_date = get_date_by_weekday(
+            type_to_day[same_week_as.group(1).lower()]
+        )
+        monday_of_garden_week = relevant_coll_date - timedelta(
+            days=relevant_coll_date.weekday()
+        )
+
+        garden_day = monday_of_garden_week + timedelta(DAYS.index(day.group(1).upper()))
+        entries = []
+        for coll_date in predict_next_collections(garden_day):
+            entries.append(
+                Collection(
+                    date=coll_date,
+                    icon=ICON_MAP.get("garden waste"),
+                    t="Garden waste",
+                )
+            )
+        return entries
+
+    def fetch(self) -> list[Collection]:
+        entries: list[Collection] = []
+        params = {
+            "txtStreetName": self._street,
+            "select": "yes",
+            "town": self._town,
+        }
+        r = requests.post(API_URLS["waste"], params=params)
+        r.raise_for_status()
+        type_to_day: dict[str, str] = {}
+
+        soup = BeautifulSoup(r.text, "html.parser")
+        coll_day_header_p = soup.find("p", text="Collection Day")
+
+        if not coll_day_header_p:
+            raise ValueError("Could not find collection day header")
+        coll_day_table = coll_day_header_p.find_parent("table")
+        if not coll_day_table:
+            raise ValueError("Could not find collection day table")
+        coll_day_rows = coll_day_table.find_all("tr")
+        if not len(coll_day_rows) == 2:
+            raise ValueError("Could not find collection day rows")
+
+        headings = [td.text.strip() for td in coll_day_rows[0].find_all("td")]
+        values = [td.text.strip() for td in coll_day_rows[1].find_all("td")]
+
+        for heading, value in list(zip(headings, values)):
+            if REGEX_GET_BIN_TYPE.match(heading):
+                bin_type_match = REGEX_GET_BIN_TYPE.match(heading)
+                if not bin_type_match:
+                    raise ValueError(f"Could not find bin type in heading: {heading}")
+                bin_type = bin_type_match.group(1)
+
+                type_to_day[bin_type.lower()] = value
+
+                for coll_date in predict_next_collections(get_date_by_weekday(value)):
+                    entries.append(
+                        Collection(
+                            date=coll_date,
+                            icon=ICON_MAP.get(bin_type),
+                            t=bin_type,
                         )
+                    )
+
+        if self._garden_cutomer:
+            entries += self.get_garden_waste(type_to_day)
 
         return entries
diff --git a/doc/source/wyreforestdc_gov_uk.md b/doc/source/wyreforestdc_gov_uk.md
index d902d371..d5f40f6c 100644
--- a/doc/source/wyreforestdc_gov_uk.md
+++ b/doc/source/wyreforestdc_gov_uk.md
@@ -9,55 +9,49 @@ waste_collection_schedule:
     sources:
     - name: wyreforestdc_gov_uk
       args:
-        uprn: UNIQUE_PROPERTY_REFERENCE_NUMBER
-        post_code: POST_CODE
-        name: HOUSE_NAME
-        number: HOUSE_NUMBER
+        street: UNIQUE_PROPERTY_REFERENCE_NUMBER
+        town: POST_CODE
+        garden_cutomer: HOUSE_NAME
 ```
 
 ### Configuration Variables
 
-### Configuration Variables
+**street**  
+*(string) (required)*
 
-**uprn**<br>
+**town**  
+*(string) (required)*
+
+**street** and **town** should match the url parameters when clicking on an address here: <https://forms.wyreforestdc.gov.uk/querybin.asp>
+
+**garden_cutomer**  
 *(string) (optional)*
 
-This is required if you do not supply any other options. (Using this removes the need to do an address look up web request)
+This is required if you want to show garden waste collections.
 
-**name**<br>
-*(string) (optional)*
+#### How to find your `garden_cutomer` id
 
-This is required if you supply a Postcode and do not have a house number.
+Go to <https://forms.wyreforestdc.gov.uk/gardenwastechecker> enter your postcode and `select` your address. Before pressing Select open your developer tools (right click -> inspect (or press F12)) and go to the network tab. Press the `select` button on the webpage and you will see a POST request to `https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details`. You can see your `garden_cutomer` id in the request payload.
 
-**number**<br>
-*(string) (optional)*
+## Example with garden waste
 
-This is required if you supply a Postcode and have a house number.
-
-**post_code**<br>
-*(string) (optional)*
-
-This is required if you do not supply a UPRN. Single space between 1st and 2nd part of postcode is required.
-
-#### How to find your `UPRN`
-An easy way to discover your Unique Property Reference Number (UPRN) is by going to https://www.findmyaddress.co.uk/ and entering in your address details.
-Otherwise you can inspect the web requests the Wyre Forest District Council website makes when entering in your postcode and then selecting your address.
-
-## Example using UPRN
 ```yaml
 waste_collection_schedule:
     sources:
     - name: wyreforestdc_gov_uk
       args:
-        uprn: 10003378634
+        street: "hilltop avenue"
+        town: "BEWDLEY"
+        garden_cutomer: 308072
 ```
 
-## Example using Address lookup
+## Example without garden waste
+
 ```yaml
 waste_collection_schedule:
     sources:
     - name: wyreforestdc_gov_uk
       args:
-        post_code: "DY11 7WF"
-        name: "Wyre Forest House"
-```
\ No newline at end of file
+        street: "WORCESTER STREET"
+        town: "STOURPORT ON SEVERN"
+```