Skip to content

Commit

Permalink
Move data structures to new folder "models"
Browse files Browse the repository at this point in the history
To have them in one place and be able to use them everywhere in the project
  • Loading branch information
Crinibus committed Jun 20, 2023
1 parent 3f755d2 commit 6162d0d
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 83 deletions.
4 changes: 3 additions & 1 deletion scraper/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
import logging
from abc import ABC, abstractmethod
from scraper.format import Format, Info

from scraper.models import Info
from scraper.format import Format
from scraper.constants import REQUEST_HEADER, REQUEST_COOKIES
from scraper.filemanager import Config

Expand Down
12 changes: 0 additions & 12 deletions scraper/format.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
from dataclasses import dataclass
from scraper.filemanager import Config


@dataclass
class Info:
"""Scraped info about product"""

name: str
price: float
currency: str
id: str
valid: bool = True


class Format:
@staticmethod
def get_user_product_name(product_name: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions scraper/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from scraper.models.product import Datapoint, Info, Product, MasterProduct
80 changes: 80 additions & 0 deletions scraper/models/product.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from dataclasses import dataclass, field
import re


@dataclass
class Info:
"""Scraped info about product"""

name: str
price: float
currency: str
id: str
valid: bool = True


@dataclass
class Datapoint:
date: str
price: float


@dataclass
class Product:
product_name: str
category: str
url: str
id: str
currency: str
website: str
datapoints: list[Datapoint]
is_up_to_date: bool

def get_all_dates(self) -> list[str]:
return [datapoint.date for datapoint in self.datapoints]

def get_all_prices(self) -> list[float]:
return [datapoint.price for datapoint in self.datapoints]

def to_string_format(self, format: str) -> str:
"""Return a string representing the product, controlled by an explicit format string.
>>> p = Product("ASUS RTX 4090", "GPU", "https://www.example.com/", "123", "USD", "example", [datepoints], True)
>>> p.to_string_format("Name: %name, Category: %category, URL: %url, ID: %id, Website: %website")
'Name: ASUS RTX 4090, Category: GPU, URL: https://www.example.com/, ID: 123, Website: example'
"""
# inspiration from https://docs.python.org/3/library/re.html#writing-a-tokenizer
token_specification = [
("NAME", r"(%name)"),
("CATEGORY", r"(%category)"),
("URL", r"(%url)"),
("ID", r"(%id)"),
("CURRENCY", r"(%currency)"),
("WEBSITE", r"(%website)"),
]
format_to = {
"NAME": self.product_name,
"CATEGORY": self.category,
"URL": self.url,
"ID": self.id,
"CURRENCY": self.currency,
"WEBSITE": self.website,
}

tok_regex = "|".join("(?P<%s>%s)" % pair for pair in token_specification)
new_string = format

for mo in re.finditer(tok_regex, format):
kind = mo.lastgroup
value = mo.group()

new_string = new_string.replace(value, format_to[kind], 1)

return new_string


@dataclass
class MasterProduct:
product_name: str
category: str
products: list[Product] = field(default_factory=list)
3 changes: 2 additions & 1 deletion scraper/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import threading
import logging
from datetime import datetime

from scraper.models import Info
from scraper.domains import BaseWebsiteHandler, get_website_handler
from scraper.filemanager import Filemanager
from scraper.format import Info


class Scraper:
Expand Down
70 changes: 1 addition & 69 deletions scraper/visualize.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,12 @@
from dataclasses import dataclass, field
from typing import Iterable, Iterator
import plotly.graph_objs as go
from datetime import datetime
import re

from scraper.models import Datapoint, MasterProduct, Product
from scraper import Filemanager
from scraper.constants import WEBSITE_COLORS


@dataclass
class Datapoint:
date: str
price: float


@dataclass
class Product:
product_name: str
category: str
url: str
id: str
currency: str
website: str
datapoints: list[Datapoint]
is_up_to_date: bool

def get_all_dates(self) -> list[str]:
return [datapoint.date for datapoint in self.datapoints]

def get_all_prices(self) -> list[float]:
return [datapoint.price for datapoint in self.datapoints]

def to_string_format(self, format: str) -> str:
"""Return a string representing the product, controlled by an explicit format string.
>>> p = Product("ASUS RTX 4090", "GPU", "https://www.example.com/", "123", "USD", "example", [datepoints], True)
>>> p.to_string_format("Name: %name, Category: %category, URL: %url, ID: %id, Website: %website")
'Name: ASUS RTX 4090, Category: GPU, URL: https://www.example.com/, ID: 123, Website: example'
"""
# inspiration from https://docs.python.org/3/library/re.html#writing-a-tokenizer
token_specification = [
("NAME", r"(%name)"),
("CATEGORY", r"(%category)"),
("URL", r"(%url)"),
("ID", r"(%id)"),
("CURRENCY", r"(%currency)"),
("WEBSITE", r"(%website)"),
]
format_to = {
"NAME": self.product_name,
"CATEGORY": self.category,
"URL": self.url,
"ID": self.id,
"CURRENCY": self.currency,
"WEBSITE": self.website,
}

tok_regex = "|".join("(?P<%s>%s)" % pair for pair in token_specification)
new_string = format

for mo in re.finditer(tok_regex, format):
kind = mo.lastgroup
value = mo.group()

new_string = new_string.replace(value, format_to[kind], 1)

return new_string


@dataclass
class MasterProduct:
product_name: str
category: str
products: list[Product] = field(default_factory=list)


def visualize_data(
show_all: bool, categories: list[str], ids: list[str], names: list[str], only_up_to_date: bool, compare: bool
) -> None:
Expand Down

0 comments on commit 6162d0d

Please sign in to comment.