From ac9ca8acbf4ccf2a72b552dc2044d246db0ecb0c Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Tue, 14 Nov 2023 23:58:30 +0100 Subject: [PATCH 1/4] Add parameter 'keep_tld' to function 'get_website_name' --- scraper/domains.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scraper/domains.py b/scraper/domains.py index ad317ec9..318dd0e8 100644 --- a/scraper/domains.py +++ b/scraper/domains.py @@ -490,12 +490,12 @@ def get_short_url(self) -> str: return f"https://www.hifiklubben.dk/{id}" -def get_website_name(url: str) -> str: +def get_website_name(url: str, keep_tld=False) -> str: stripped_url = url.removeprefix("https://").removeprefix("http://").removeprefix("www.") domain = stripped_url.split("/")[0] - # Remove the TLD/DNS name (such as ".com") - website_name_list = domain.split(".")[:-1] + # Remove the TLD/DNS name (such as ".com") if keep_tld is false + website_name_list = domain.split(".") if keep_tld else domain.split(".")[:-1] website_name = ".".join(website_name_list) return website_name From aec710cbfa82e1905d4b1347b37ebd5bee094401 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Tue, 14 Nov 2023 23:58:56 +0100 Subject: [PATCH 2/4] Add custom exception 'WebsiteVersionNotSupported' --- scraper/exceptions.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scraper/exceptions.py b/scraper/exceptions.py index c96b60aa..c9545bc3 100644 --- a/scraper/exceptions.py +++ b/scraper/exceptions.py @@ -10,6 +10,15 @@ def __str__(self) -> str: return f"Website '{self.website_name}' is currently not supported" +class WebsiteVersionNotSupported(Exception): + def __init__(self, website_name: str, *args: object) -> None: + super().__init__(*args) + self.website_name = website_name + + def __str__(self) -> str: + return f"Website version '{self.website_name}' is currently not supported" + + class URLMissingSchema(Exception): def __init__(self, url, *args: object) -> None: super().__init__(*args) From 68147d268fe34d0069b6fe746f46db3581640229 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Wed, 15 Nov 2023 00:00:44 +0100 Subject: [PATCH 3/4] Add API link for Elgiganten.se --- scraper/domains.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/scraper/domains.py b/scraper/domains.py index 318dd0e8..360f4432 100644 --- a/scraper/domains.py +++ b/scraper/domains.py @@ -10,6 +10,7 @@ from scraper.format import Format from scraper.constants import REQUEST_HEADER, REQUEST_COOKIES from scraper.filemanager import Config +from scraper.exceptions import WebsiteVersionNotSupported def request_url(url: str) -> requests.Response: @@ -43,6 +44,9 @@ def get_product_info(self) -> Info: except (AttributeError, ValueError, TypeError): logging.getLogger(__name__).exception(f"Could not get all the data needed from url: {self.url}") return Info(None, None, None, None, valid=False) + except WebsiteVersionNotSupported as ex: + logging.getLogger(__name__).exception(ex) + print(ex) def _request_product_data(self) -> None: # option for each specific class to change how the request data is being handled @@ -180,8 +184,15 @@ def _get_product_id(self) -> str: def _get_json_api_data(self) -> dict: id_number = self._get_product_id() + # API link to get price and currency - api_link = f"https://www.elgiganten.dk/cxorchestrator/dk/api?appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%229bfbc062032a2a6b924883b81508af5c77bbfc5f66cc41c7ffd7d519885ac5e4%22%7D%7D" + if "elgiganten.dk" in self.url: + api_link = f"https://www.elgiganten.dk/cxorchestrator/dk/api?appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%229bfbc062032a2a6b924883b81508af5c77bbfc5f66cc41c7ffd7d519885ac5e4%22%7D%7D" # noqa E501 + elif "elgiganten.se" in self.url: + api_link = f"https://www.elgiganten.se/cxorchestrator/se/api?getProductWithDynamicDetails&appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22229bbb14ee6f93449967eb326f5bfb87619a37e7ee6c4555b94496313c139ee1%22%7D%7D" # noqa E501 + else: + raise WebsiteVersionNotSupported(get_website_name(self.url, keep_tld=True)) + response = request_url(api_link) return response.json() From 0b7b1391b2b77eedb6cea3b4bbc0682d4c23ec38 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Wed, 15 Nov 2023 00:07:37 +0100 Subject: [PATCH 4/4] Update README.md - add Elgiganten.se --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 888677cf..62c9b26e 100644 --- a/README.md +++ b/README.md @@ -117,6 +117,7 @@ This scraper can (so far) scrape prices on products from: - [Proshop.dk](https://www.proshop.dk/) - [Computersalg.dk](https://www.computersalg.dk/) - [Elgiganten.dk](https://www.elgiganten.dk/) +- [Elgiganten.se](https://www.elgiganten.se/) - [AvXperten.dk](https://www.avxperten.dk/) - [Av-Cables.dk](https://www.av-cables.dk/) - [Power.dk](https://www.power.dk/)