rewrite of wyreforestdc_gov_uk

This commit is contained in:
5ila5
2024-07-08 18:08:28 +02:00
committed by 5ila5
parent cd9b17ebe8
commit a4176e2402
2 changed files with 146 additions and 87 deletions

View File

@@ -1,23 +1,26 @@
import datetime
import logging
import re
from datetime import date, timedelta
import requests
from bs4 import BeautifulSoup
from waste_collection_schedule import Collection
from waste_collection_schedule import Collection # type: ignore[attr-defined]
TITLE = "Wyre Forest District Council"
DESCRIPTION = "Source for wyreforestdc.gov.uk, Wyre Forest District Council, UK"
URL = "https://www.wyreforestdc.gov.uk"
TEST_CASES = {
"2 Kinver Avenue, Kidderminster": {"uprn": 100120731673},
"14 Forestry Houses, Callow Hill": {"post_code": "DY14 9XQ", "number": 14},
"The Park, Stourbridge": {"post_code": "DY9 0EX", "name": "The Park"},
"2 Kinver Avenue, Kidderminster": {
"street": "hilltop avenue",
"town": "BEWDLEY",
"garden_cutomer": "308072",
},
}
API_URLS = {
"address_search": "https://forms.wyreforestdc.gov.uk/bindays/",
"collection": "https://forms.wyreforestdc.gov.uk/bindays/Home/Details",
"waste": "https://forms.wyreforestdc.gov.uk/querybin.asp",
"garden_waste": "https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details",
}
ICON_MAP = {
@@ -28,64 +31,126 @@ ICON_MAP = {
DAYS = ["MONDAY", "TUESDAY", "WEDNESDAY", "THURSDAY", "FRIDAY", "SATURDAY", "SUNDAY"]
# Next Rubbish Collection
REGEX_GET_BIN_TYPE = re.compile(r"Next (.*?) Collection")
# collection is on a WEDNESDAY and will be collected on the same week as your rubbish bin collection
REGEX_GET_GARDEN_DAY = re.compile(
r"collection is on a\s*(.*?)\s*and will be collected on the same week as"
)
REGEX_GET_GARDEN_SAME_WEEK_AS = re.compile(
r"collected on the same week as your\s*(.*?)\s*(bin)?\s*collection"
)
_LOGGER = logging.getLogger(__name__)
def get_date_by_weekday(weekday: str) -> date:
this_week = re.match("This (.*?)$", weekday, re.IGNORECASE)
next_week = re.match("Next (.*?)$", weekday, re.IGNORECASE)
if this_week:
weekday_idx = DAYS.index(this_week.group(1).upper())
offset = 0
elif next_week:
weekday_idx = DAYS.index(next_week.group(1).upper())
offset = 7
else:
raise ValueError(f"Invalid weekday: {weekday}")
d = date.today() + timedelta(days=offset)
while d.weekday() != weekday_idx:
d += timedelta(days=1)
return d
def predict_next_collections(first_date: date, day_interval: int = 14):
return [first_date + timedelta(days=i * day_interval) for i in range(5)]
class Source:
def __init__(self, post_code=None, number=None, name=None, uprn=None):
self._post_code = post_code
self._number = number
self._name = name
self._uprn = uprn
def __init__(self, street: str, town: str, garden_cutomer: str | int | None = None):
self._street = street.upper().strip()
self._town = town.upper().strip()
self._garden_cutomer = str(garden_cutomer).strip() if garden_cutomer else None
def fetch(self):
s = requests.Session()
if not self._uprn:
# look up the UPRN for the address
payload = {"searchTerm": self._post_code}
r = s.post(str(API_URLS["address_search"]), data=payload)
soup = BeautifulSoup(r.text, features="html.parser")
propertyUprns = soup.find("select", {"id": "UPRN"}).findAll("option")
for match in propertyUprns:
if self._name:
if (
match.text.strip()
.capitalize()
.startswith(self._name.capitalize())
):
self._uprn = match["value"]
if self._number:
if match.text.strip().startswith(str(self._number)):
self._uprn = match["value"]
# GET request returns schedule for matching uprn
payload = {"UPRN": self._uprn}
r = s.post(str(API_URLS["collection"]), data=payload)
def get_garden_waste(self, type_to_day: dict[str, str]) -> list[Collection]:
data = {
"CUST_No": self._garden_cutomer,
}
r = requests.post(API_URLS["garden_waste"], data=data)
r.raise_for_status()
entries = []
# Extract waste types and dates from responseContent
soup = BeautifulSoup(r.text, "html.parser")
x = soup.findAll("p")
for i in x: # ignores elements containing address and marketing message
if "this week is a " in i.text:
for round_type in ICON_MAP:
if round_type in i.text:
dayRaw = i.find("strong")
dayName = dayRaw.contents[0].strip()
d = datetime.date.today()
nextDate = d + datetime.timedelta(
(DAYS.index(dayName) + 1 - d.isoweekday()) % 7
)
entries.append(
Collection(
date=nextDate,
t=round_type,
icon=ICON_MAP.get(round_type),
)
day = REGEX_GET_GARDEN_DAY.search(soup.text)
same_week_as = REGEX_GET_GARDEN_SAME_WEEK_AS.search(soup.text)
if not day or not same_week_as:
raise ValueError(
f"Could not find garden waste collection days: {day} {same_week_as}"
)
relevant_coll_date = get_date_by_weekday(
type_to_day[same_week_as.group(1).lower()]
)
monday_of_garden_week = relevant_coll_date - timedelta(
days=relevant_coll_date.weekday()
)
garden_day = monday_of_garden_week + timedelta(DAYS.index(day.group(1).upper()))
entries = []
for coll_date in predict_next_collections(garden_day):
entries.append(
Collection(
date=coll_date,
icon=ICON_MAP.get("garden waste"),
t="Garden waste",
)
)
return entries
def fetch(self) -> list[Collection]:
entries: list[Collection] = []
params = {
"txtStreetName": self._street,
"select": "yes",
"town": self._town,
}
r = requests.post(API_URLS["waste"], params=params)
r.raise_for_status()
type_to_day: dict[str, str] = {}
soup = BeautifulSoup(r.text, "html.parser")
coll_day_header_p = soup.find("p", text="Collection Day")
if not coll_day_header_p:
raise ValueError("Could not find collection day header")
coll_day_table = coll_day_header_p.find_parent("table")
if not coll_day_table:
raise ValueError("Could not find collection day table")
coll_day_rows = coll_day_table.find_all("tr")
if not len(coll_day_rows) == 2:
raise ValueError("Could not find collection day rows")
headings = [td.text.strip() for td in coll_day_rows[0].find_all("td")]
values = [td.text.strip() for td in coll_day_rows[1].find_all("td")]
for heading, value in list(zip(headings, values)):
if REGEX_GET_BIN_TYPE.match(heading):
bin_type_match = REGEX_GET_BIN_TYPE.match(heading)
if not bin_type_match:
raise ValueError(f"Could not find bin type in heading: {heading}")
bin_type = bin_type_match.group(1)
type_to_day[bin_type.lower()] = value
for coll_date in predict_next_collections(get_date_by_weekday(value)):
entries.append(
Collection(
date=coll_date,
icon=ICON_MAP.get(bin_type),
t=bin_type,
)
)
if self._garden_cutomer:
entries += self.get_garden_waste(type_to_day)
return entries

View File

@@ -9,55 +9,49 @@ waste_collection_schedule:
sources:
- name: wyreforestdc_gov_uk
args:
uprn: UNIQUE_PROPERTY_REFERENCE_NUMBER
post_code: POST_CODE
name: HOUSE_NAME
number: HOUSE_NUMBER
street: UNIQUE_PROPERTY_REFERENCE_NUMBER
town: POST_CODE
garden_cutomer: HOUSE_NAME
```
### Configuration Variables
### Configuration Variables
**street**
*(string) (required)*
**uprn**<br>
**town**
*(string) (required)*
**street** and **town** should match the url parameters when clicking on an address here: <https://forms.wyreforestdc.gov.uk/querybin.asp>
**garden_cutomer**
*(string) (optional)*
This is required if you do not supply any other options. (Using this removes the need to do an address look up web request)
This is required if you want to show garden waste collections.
**name**<br>
*(string) (optional)*
#### How to find your `garden_cutomer` id
This is required if you supply a Postcode and do not have a house number.
Go to <https://forms.wyreforestdc.gov.uk/gardenwastechecker> enter your postcode and `select` your address. Before pressing Select open your developer tools (right click -> inspect (or press F12)) and go to the network tab. Press the `select` button on the webpage and you will see a POST request to `https://forms.wyreforestdc.gov.uk/GardenWasteChecker/Home/Details`. You can see your `garden_cutomer` id in the request payload.
**number**<br>
*(string) (optional)*
## Example with garden waste
This is required if you supply a Postcode and have a house number.
**post_code**<br>
*(string) (optional)*
This is required if you do not supply a UPRN. Single space between 1st and 2nd part of postcode is required.
#### How to find your `UPRN`
An easy way to discover your Unique Property Reference Number (UPRN) is by going to https://www.findmyaddress.co.uk/ and entering in your address details.
Otherwise you can inspect the web requests the Wyre Forest District Council website makes when entering in your postcode and then selecting your address.
## Example using UPRN
```yaml
waste_collection_schedule:
sources:
- name: wyreforestdc_gov_uk
args:
uprn: 10003378634
street: "hilltop avenue"
town: "BEWDLEY"
garden_cutomer: 308072
```
## Example using Address lookup
## Example without garden waste
```yaml
waste_collection_schedule:
sources:
- name: wyreforestdc_gov_uk
args:
post_code: "DY11 7WF"
name: "Wyre Forest House"
street: "WORCESTER STREET"
town: "STOURPORT ON SEVERN"
```