Skip to content

Commit

Permalink
Merge pull request #229 from Crinibus/add-support-for-elgiganten-sweden
Browse files Browse the repository at this point in the history
Add support for Elgiganten.se
  • Loading branch information
Crinibus authored Nov 15, 2023
2 parents 2ce1afc + 0b7b139 commit c0b21eb
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 4 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ This scraper can (so far) scrape prices on products from:
- [Proshop.dk](https://www.proshop.dk/)
- [Computersalg.dk](https://www.computersalg.dk/)
- [Elgiganten.dk](https://www.elgiganten.dk/)
- [Elgiganten.se](https://www.elgiganten.se/)
- [AvXperten.dk](https://www.avxperten.dk/)
- [Av-Cables.dk](https://www.av-cables.dk/)
- [Power.dk](https://www.power.dk/)
Expand Down
19 changes: 15 additions & 4 deletions scraper/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from scraper.format import Format
from scraper.constants import REQUEST_HEADER, REQUEST_COOKIES
from scraper.filemanager import Config
from scraper.exceptions import WebsiteVersionNotSupported


def request_url(url: str) -> requests.Response:
Expand Down Expand Up @@ -43,6 +44,9 @@ def get_product_info(self) -> Info:
except (AttributeError, ValueError, TypeError):
logging.getLogger(__name__).exception(f"Could not get all the data needed from url: {self.url}")
return Info(None, None, None, None, valid=False)
except WebsiteVersionNotSupported as ex:
logging.getLogger(__name__).exception(ex)
print(ex)

def _request_product_data(self) -> None:
# option for each specific class to change how the request data is being handled
Expand Down Expand Up @@ -180,8 +184,15 @@ def _get_product_id(self) -> str:

def _get_json_api_data(self) -> dict:
id_number = self._get_product_id()

# API link to get price and currency
api_link = f"https://www.elgiganten.dk/cxorchestrator/dk/api?appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%229bfbc062032a2a6b924883b81508af5c77bbfc5f66cc41c7ffd7d519885ac5e4%22%7D%7D"
if "elgiganten.dk" in self.url:
api_link = f"https://www.elgiganten.dk/cxorchestrator/dk/api?appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%229bfbc062032a2a6b924883b81508af5c77bbfc5f66cc41c7ffd7d519885ac5e4%22%7D%7D" # noqa E501
elif "elgiganten.se" in self.url:
api_link = f"https://www.elgiganten.se/cxorchestrator/se/api?getProductWithDynamicDetails&appMode=b2c&user=anonymous&operationName=getProductWithDynamicDetails&variables=%7B%22articleNumber%22%3A%22{id_number}%22%2C%22withCustomerSpecificPrices%22%3Afalse%7D&extensions=%7B%22persistedQuery%22%3A%7B%22version%22%3A1%2C%22sha256Hash%22%3A%22229bbb14ee6f93449967eb326f5bfb87619a37e7ee6c4555b94496313c139ee1%22%7D%7D" # noqa E501
else:
raise WebsiteVersionNotSupported(get_website_name(self.url, keep_tld=True))

response = request_url(api_link)
return response.json()

Expand Down Expand Up @@ -490,12 +501,12 @@ def get_short_url(self) -> str:
return f"https://www.hifiklubben.dk/{id}"


def get_website_name(url: str) -> str:
def get_website_name(url: str, keep_tld=False) -> str:
stripped_url = url.removeprefix("https://").removeprefix("http://").removeprefix("www.")
domain = stripped_url.split("/")[0]

# Remove the TLD/DNS name (such as ".com")
website_name_list = domain.split(".")[:-1]
# Remove the TLD/DNS name (such as ".com") if keep_tld is false
website_name_list = domain.split(".") if keep_tld else domain.split(".")[:-1]
website_name = ".".join(website_name_list)
return website_name

Expand Down
9 changes: 9 additions & 0 deletions scraper/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,15 @@ def __str__(self) -> str:
return f"Website '{self.website_name}' is currently not supported"


class WebsiteVersionNotSupported(Exception):
def __init__(self, website_name: str, *args: object) -> None:
super().__init__(*args)
self.website_name = website_name

def __str__(self) -> str:
return f"Website version '{self.website_name}' is currently not supported"


class URLMissingSchema(Exception):
def __init__(self, url, *args: object) -> None:
super().__init__(*args)
Expand Down

0 comments on commit c0b21eb

Please sign in to comment.