From a4176e2402d9cb78ff70aadf49141d116e5c6d37 Mon Sep 17 00:00:00 2001 From: 5ila5 <5ila5@users.noreply.github.com> Date: Mon, 8 Jul 2024 18:08:28 +0200 Subject: [PATCH] rewrite of wyreforestdc_gov_uk --- .../source/wyreforestdc_gov_uk.py | 181 ++++++++++++------ doc/source/wyreforestdc_gov_uk.md | 52 +++-- 2 files changed, 146 insertions(+), 87 deletions(-) diff --git a/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py b/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py index 2251e747..85dd38ae 100644 --- a/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py +++ b/custom_components/waste_collection_schedule/waste_collection_schedule/source/wyreforestdc_gov_uk.py @@ -1,23 +1,26 @@ -import datetime import logging +import re +from datetime import date, timedelta import requests from bs4 import BeautifulSoup -from waste_collection_schedule import Collection +from waste_collection_schedule import Collection # type: ignore[attr-defined] TITLE = "Wyre Forest District Council" DESCRIPTION = "Source for wyreforestdc.gov.uk, Wyre Forest District Council, UK" URL = "https://www.wyreforestdc.gov.uk" TEST_CASES = { - "2 Kinver Avenue, Kidderminster": {"uprn": 100120731673}, - "14 Forestry Houses, Callow Hill": {"post_code": "DY14 9XQ", "number": 14}, - "The Park, Stourbridge": {"post_code": "DY9 0EX", "name": "The Park"}, + "2 Kinver Avenue, Kidderminster": { + "street": "hilltop avenue", + "town": "BEWDLEY", + "garden_cutomer": "308072", + }, } API_URLS = { - "address_search": "https://forms.wyreforestdc.gov.uk/bindays/", - "collection": "https://forms.wyreforestdc.gov.uk/bindays/Home/Details", + "waste": "https://forms.wyreforestdc.gov.uk/querybin.asp", + "garden_waste": "https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details", } ICON_MAP = { @@ -28,64 +31,126 @@ ICON_MAP = { DAYS = ["MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY", "SUNDAY"] +# Next Rubbish Collection +REGEX_GET_BIN_TYPE = re.compile(r"Next (.*?) Collection") +# collection is on a WEDNESDAY and will be collected on the same week as your rubbish bin collection +REGEX_GET_GARDEN_DAY = re.compile( + r"collection is on a\s*(.*?)\s*and will be collected on the same week as" +) +REGEX_GET_GARDEN_SAME_WEEK_AS = re.compile( + r"collected on the same week as your\s*(.*?)\s*(bin)?\s*collection" +) + _LOGGER = logging.getLogger(__name__) +def get_date_by_weekday(weekday: str) -> date: + this_week = re.match("This (.*?)$", weekday, re.IGNORECASE) + next_week = re.match("Next (.*?)$", weekday, re.IGNORECASE) + if this_week: + weekday_idx = DAYS.index(this_week.group(1).upper()) + offset = 0 + elif next_week: + weekday_idx = DAYS.index(next_week.group(1).upper()) + offset = 7 + else: + raise ValueError(f"Invalid weekday: {weekday}") + + d = date.today() + timedelta(days=offset) + while d.weekday() != weekday_idx: + d += timedelta(days=1) + return d + + +def predict_next_collections(first_date: date, day_interval: int = 14): + return [first_date + timedelta(days=i * day_interval) for i in range(5)] + + class Source: - def __init__(self, post_code=None, number=None, name=None, uprn=None): - self._post_code = post_code - self._number = number - self._name = name - self._uprn = uprn + def __init__(self, street: str, town: str, garden_cutomer: str | int | None = None): + self._street = street.upper().strip() + self._town = town.upper().strip() + self._garden_cutomer = str(garden_cutomer).strip() if garden_cutomer else None - def fetch(self): - s = requests.Session() - - if not self._uprn: - # look up the UPRN for the address - payload = {"searchTerm": self._post_code} - r = s.post(str(API_URLS["address_search"]), data=payload) - - soup = BeautifulSoup(r.text, features="html.parser") - propertyUprns = soup.find("select", {"id": "UPRN"}).findAll("option") - for match in propertyUprns: - if self._name: - if ( - match.text.strip() - .capitalize() - .startswith(self._name.capitalize()) - ): - self._uprn = match["value"] - if self._number: - if match.text.strip().startswith(str(self._number)): - self._uprn = match["value"] - - # GET request returns schedule for matching uprn - payload = {"UPRN": self._uprn} - r = s.post(str(API_URLS["collection"]), data=payload) + def get_garden_waste(self, type_to_day: dict[str, str]) -> list[Collection]: + data = { + "CUST_No": self._garden_cutomer, + } + r = requests.post(API_URLS["garden_waste"], data=data) r.raise_for_status() - - entries = [] - - # Extract waste types and dates from responseContent soup = BeautifulSoup(r.text, "html.parser") - x = soup.findAll("p") - for i in x: # ignores elements containing address and marketing message - if "this week is a " in i.text: - for round_type in ICON_MAP: - if round_type in i.text: - dayRaw = i.find("strong") - dayName = dayRaw.contents[0].strip() - d = datetime.date.today() - nextDate = d + datetime.timedelta( - (DAYS.index(dayName) + 1 - d.isoweekday()) % 7 - ) - entries.append( - Collection( - date=nextDate, - t=round_type, - icon=ICON_MAP.get(round_type), - ) + + day = REGEX_GET_GARDEN_DAY.search(soup.text) + same_week_as = REGEX_GET_GARDEN_SAME_WEEK_AS.search(soup.text) + if not day or not same_week_as: + raise ValueError( + f"Could not find garden waste collection days: {day} {same_week_as}" + ) + + relevant_coll_date = get_date_by_weekday( + type_to_day[same_week_as.group(1).lower()] + ) + monday_of_garden_week = relevant_coll_date - timedelta( + days=relevant_coll_date.weekday() + ) + + garden_day = monday_of_garden_week + timedelta(DAYS.index(day.group(1).upper())) + entries = [] + for coll_date in predict_next_collections(garden_day): + entries.append( + Collection( + date=coll_date, + icon=ICON_MAP.get("garden waste"), + t="Garden waste", + ) + ) + return entries + + def fetch(self) -> list[Collection]: + entries: list[Collection] = [] + params = { + "txtStreetName": self._street, + "select": "yes", + "town": self._town, + } + r = requests.post(API_URLS["waste"], params=params) + r.raise_for_status() + type_to_day: dict[str, str] = {} + + soup = BeautifulSoup(r.text, "html.parser") + coll_day_header_p = soup.find("p", text="Collection Day") + + if not coll_day_header_p: + raise ValueError("Could not find collection day header") + coll_day_table = coll_day_header_p.find_parent("table") + if not coll_day_table: + raise ValueError("Could not find collection day table") + coll_day_rows = coll_day_table.find_all("tr") + if not len(coll_day_rows) == 2: + raise ValueError("Could not find collection day rows") + + headings = [td.text.strip() for td in coll_day_rows[0].find_all("td")] + values = [td.text.strip() for td in coll_day_rows[1].find_all("td")] + + for heading, value in list(zip(headings, values)): + if REGEX_GET_BIN_TYPE.match(heading): + bin_type_match = REGEX_GET_BIN_TYPE.match(heading) + if not bin_type_match: + raise ValueError(f"Could not find bin type in heading: {heading}") + bin_type = bin_type_match.group(1) + + type_to_day[bin_type.lower()] = value + + for coll_date in predict_next_collections(get_date_by_weekday(value)): + entries.append( + Collection( + date=coll_date, + icon=ICON_MAP.get(bin_type), + t=bin_type, ) + ) + + if self._garden_cutomer: + entries += self.get_garden_waste(type_to_day) return entries diff --git a/doc/source/wyreforestdc_gov_uk.md b/doc/source/wyreforestdc_gov_uk.md index d902d371..d5f40f6c 100644 --- a/doc/source/wyreforestdc_gov_uk.md +++ b/doc/source/wyreforestdc_gov_uk.md @@ -9,55 +9,49 @@ waste_collection_schedule: sources: - name: wyreforestdc_gov_uk args: - uprn: UNIQUE_PROPERTY_REFERENCE_NUMBER - post_code: POST_CODE - name: HOUSE_NAME - number: HOUSE_NUMBER + street: UNIQUE_PROPERTY_REFERENCE_NUMBER + town: POST_CODE + garden_cutomer: HOUSE_NAME ``` ### Configuration Variables -### Configuration Variables +**street** +*(string) (required)* -**uprn**
+**town** +*(string) (required)* + +**street** and **town** should match the url parameters when clicking on an address here: + +**garden_cutomer** *(string) (optional)* -This is required if you do not supply any other options. (Using this removes the need to do an address look up web request) +This is required if you want to show garden waste collections. -**name**
-*(string) (optional)* +#### How to find your `garden_cutomer` id -This is required if you supply a Postcode and do not have a house number. +Go to enter your postcode and `select` your address. Before pressing Select open your developer tools (right click -> inspect (or press F12)) and go to the network tab. Press the `select` button on the webpage and you will see a POST request to `https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details`. You can see your `garden_cutomer` id in the request payload. -**number**
-*(string) (optional)* +## Example with garden waste -This is required if you supply a Postcode and have a house number. - -**post_code**
-*(string) (optional)* - -This is required if you do not supply a UPRN. Single space between 1st and 2nd part of postcode is required. - -#### How to find your `UPRN` -An easy way to discover your Unique Property Reference Number (UPRN) is by going to https://www.findmyaddress.co.uk/ and entering in your address details. -Otherwise you can inspect the web requests the Wyre Forest District Council website makes when entering in your postcode and then selecting your address. - -## Example using UPRN ```yaml waste_collection_schedule: sources: - name: wyreforestdc_gov_uk args: - uprn: 10003378634 + street: "hilltop avenue" + town: "BEWDLEY" + garden_cutomer: 308072 ``` -## Example using Address lookup +## Example without garden waste + ```yaml waste_collection_schedule: sources: - name: wyreforestdc_gov_uk args: - post_code: "DY11 7WF" - name: "Wyre Forest House" -``` \ No newline at end of file + street: "WORCESTER STREET" + town: "STOURPORT ON SEVERN" +```