fix Kirklees_gov_uk failing if multiple addresses are found

This commit is contained in:
5ila5
2024-09-04 15:55:58 +02:00
committed by 5ila5
parent 9281b4a970
commit 23d0cacb2f
2 changed files with 93 additions and 19 deletions

View File

@@ -1,8 +1,10 @@
import logging
import re
from datetime import datetime
from typing import Any
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from waste_collection_schedule import Collection # type: ignore[attr-defined]
_LOGGER = logging.getLogger(__name__)
@@ -13,6 +15,7 @@ URL = "https://www.kirklees.gov.uk"
TEST_CASES = {
"Test_001": {"door_num": 20, "postcode": "HD9 6LW"},
"test_002": {"door_num": "6", "postcode": "hd9 1js"},
"HD8 8NA, 1": {"door_num": "1", "postcode": "HD8 8NA", "uprn": "83194785"},
}
BASE_URL = "https://www.kirklees.gov.uk/beta/your-property-bins-recycling/your-bins/"
@@ -27,12 +30,11 @@ PARAMS = {
"__SCROLLPOSITIONY": "0",
"__EVENTVALIDATION": "",
"ctl00$ctl00$cphPageBody$cphContent$hdnBinUPRN": "",
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoPremises": "",
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoSearch": "",
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$butGeoSearch": ""
}
COLLECTION_REGEX = "(Recycling|Domestic|Garden Waste).*collection date ([0-3][0-9] [a-zA-Z]* [0-9]{4})"
COLLECTION_REGEX = (
"(Recycling|Domestic|Garden Waste).*collection date ([0-3][0-9] [a-zA-Z]* [0-9]{4})"
)
ICON_MAP = {
"DOMESTIC": "mdi:trash-can",
@@ -42,37 +44,91 @@ ICON_MAP = {
class Source:
def __init__(self, door_num, postcode):
def __init__(
self, door_num: str | int, postcode: str, uprn: str | int | None = None
):
self._door_num = door_num
self._postcode = postcode
self._uprn = uprn
self._session = requests.Session()
self._params: dict[str, Any] = PARAMS
def _update_params(self, soup: BeautifulSoup) -> None:
self._params = {k: v for k, v in PARAMS.items()}
self._params["__VIEWSTATE"] = (
soup.select_one("input#__VIEWSTATE") or dict[str, str]()
).get("value")
self._params["__VIEWSTATEGENERATOR"] = (
soup.select_one("input#__VIEWSTATEGENERATOR") or dict[str, str]()
).get("value")
self._params["__EVENTVALIDATION"] = (
soup.select_one("input#__EVENTVALIDATION") or dict[str, str]()
).get("value")
if soup.find(
"input",
{"name": "ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoPremises"},
):
self._params[
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoPremises"
] = self._door_num
self._params[
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoSearch"
] = self._postcode
self._params[
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$butGeoSearch"
] = (soup.select_one("input#butGeoSearch") or dict[str, str]()).get("value")
if soup.select_one("table#dagAddressList"):
self._params["ctl00$ctl00$cphPageBody$cphContent$hdnBinUPRN"] = self._uprn
self._params["UPRN"] = self._uprn
self._params[
"ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$butSelectAddress"
] = (soup.select_one("input#butSelectAddress") or dict[str, str]()).get(
"value"
)
def fetch(self):
entries = []
self._session.cookies.set("cookiesacceptedGDPR", "true", domain=".kirklees.gov.uk")
self._session.cookies.set(
"cookiesacceptedGDPR", "true", domain=".kirklees.gov.uk"
)
r0 = self._session.get(f"{BASE_URL}/default.aspx")
r0.raise_for_status()
r0_bs4 = BeautifulSoup(r0.text, features="html.parser")
PARAMS['__VIEWSTATE'] = r0_bs4.find("input", {"id": "__VIEWSTATE"})['value']
PARAMS['__VIEWSTATEGENERATOR'] = r0_bs4.find("input", {"id": "__VIEWSTATEGENERATOR"})['value']
PARAMS['__EVENTVALIDATION'] = r0_bs4.find("input", {"id": "__EVENTVALIDATION"})['value']
PARAMS['ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoPremises'] = self._door_num
PARAMS['ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$txtGeoSearch'] = self._postcode
PARAMS['ctl00$ctl00$cphPageBody$cphContent$thisGeoSearch$butGeoSearch'] = r0_bs4.find("input", {"id": "butGeoSearch"})['value']
r1 = self._session.get(f"{BASE_URL}/default.aspx", params=PARAMS)
self._update_params(r0_bs4)
r1 = self._session.get(f"{BASE_URL}/default.aspx", params=self._params)
r1.raise_for_status()
r1_bs4 = BeautifulSoup(r1.text, features="html.parser")
cal_link = r1_bs4.find("a", {"id": "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"})['href']
if r1_bs4.select_one("table#dagAddressList"):
# If multiple addresses are found, we need to select one with UPRN
if not self._uprn:
raise ValueError("UPRN Required for this address")
self._update_params(r1_bs4)
r1 = self._session.post(f"{BASE_URL}/default.aspx", data=self._params)
r1.raise_for_status()
r1_bs4 = BeautifulSoup(r1.text, features="html.parser")
cal_link = r1_bs4.find(
"a", {"id": "cphPageBody_cphContent_wtcDomestic240__LnkCalendar"}
)["href"]
r2 = self._session.get(f"{BASE_URL}/{cal_link}")
r2.raise_for_status()
r2_bs4 = BeautifulSoup(r2.text, features="html.parser")
for collection in r2_bs4.find_all("img", {"id": re.compile('^cphPageBody_cphContent_rptr_Sticker_rptr_Collections_[0-9]_rptr_Bins_[0-9]_img_binType_[0-9]')}):
matches = re.findall(COLLECTION_REGEX, collection['alt'])
for collection in r2_bs4.find_all(
"img",
{
"id": re.compile(
"^cphPageBody_cphContent_rptr_Sticker_rptr_Collections_[0-9]_rptr_Bins_[0-9]_img_binType_[0-9]"
)
},
):
matches = re.findall(COLLECTION_REGEX, collection["alt"])
entries.append(
Collection(
date=datetime.strptime(matches[0][1], "%d %B %Y").date(),

View File

@@ -11,6 +11,7 @@ waste_collection_schedule:
args:
door_num: 1
postcode: "HD9 6RJ"
uprn: UPRN # only required sometimes
```
### Configuration Variables
@@ -24,3 +25,20 @@ Door number identifier for the property
*(string) (required)*
Postcode of the property
**uprn**
*(string) (optional)*
Unique Property Reference Number (UPRN) of the property. This is required if multiple properties are found when searching by door number and postcode. An easy way to discover your Unique Property Reference Number (UPRN) is by going to <https://www.findmyaddress.co.uk/> and entering in your address details.
## Example with UPRN
```yaml
waste_collection_schedule:
sources:
- name: kirklees_gov_uk
args:
door_num: 1
postcode: "HD8 8NA"
uprn: 83194785
```