Skip to content

Commit

Permalink
Merge pull request #222 from Crinibus/create-folder-models
Browse files Browse the repository at this point in the history
Move dataclasses to new folder "models"
  • Loading branch information
Crinibus authored Jun 28, 2023
2 parents 0ab4a6e + 6162d0d commit 2ce1afc
Show file tree
Hide file tree
Showing 6 changed files with 87 additions and 83 deletions.
4 changes: 3 additions & 1 deletion scraper/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
import json
import logging
from abc import ABC, abstractmethod
from scraper.format import Format, Info

from scraper.models import Info
from scraper.format import Format
from scraper.constants import REQUEST_HEADER, REQUEST_COOKIES
from scraper.filemanager import Config

Expand Down
12 changes: 0 additions & 12 deletions scraper/format.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,6 @@
from dataclasses import dataclass
from scraper.filemanager import Config


@dataclass
class Info:
"""Scraped info about product"""

name: str
price: float
currency: str
id: str
valid: bool = True


class Format:
@staticmethod
def get_user_product_name(product_name: str) -> str:
Expand Down
1 change: 1 addition & 0 deletions scraper/models/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from scraper.models.product import Datapoint, Info, Product, MasterProduct
80 changes: 80 additions & 0 deletions scraper/models/product.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
from dataclasses import dataclass, field
import re


@dataclass
class Info:
"""Scraped info about product"""

name: str
price: float
currency: str
id: str
valid: bool = True


@dataclass
class Datapoint:
date: str
price: float


@dataclass
class Product:
product_name: str
category: str
url: str
id: str
currency: str
website: str
datapoints: list[Datapoint]
is_up_to_date: bool

def get_all_dates(self) -> list[str]:
return [datapoint.date for datapoint in self.datapoints]

def get_all_prices(self) -> list[float]:
return [datapoint.price for datapoint in self.datapoints]

def to_string_format(self, format: str) -> str:
"""Return a string representing the product, controlled by an explicit format string.
>>> p = Product("ASUS RTX 4090", "GPU", "https://www.example.com/", "123", "USD", "example", [datepoints], True)
>>> p.to_string_format("Name: %name, Category: %category, URL: %url, ID: %id, Website: %website")
'Name: ASUS RTX 4090, Category: GPU, URL: https://www.example.com/, ID: 123, Website: example'
"""
# inspiration from https://docs.python.org/3/library/re.html#writing-a-tokenizer
token_specification = [
("NAME", r"(%name)"),
("CATEGORY", r"(%category)"),
("URL", r"(%url)"),
("ID", r"(%id)"),
("CURRENCY", r"(%currency)"),
("WEBSITE", r"(%website)"),
]
format_to = {
"NAME": self.product_name,
"CATEGORY": self.category,
"URL": self.url,
"ID": self.id,
"CURRENCY": self.currency,
"WEBSITE": self.website,
}

tok_regex = "|".join("(?P<%s>%s)" % pair for pair in token_specification)
new_string = format

for mo in re.finditer(tok_regex, format):
kind = mo.lastgroup
value = mo.group()

new_string = new_string.replace(value, format_to[kind], 1)

return new_string


@dataclass
class MasterProduct:
product_name: str
category: str
products: list[Product] = field(default_factory=list)
3 changes: 2 additions & 1 deletion scraper/scrape.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
import threading
import logging
from datetime import datetime

from scraper.models import Info
from scraper.domains import BaseWebsiteHandler, get_website_handler
from scraper.filemanager import Filemanager
from scraper.format import Info


class Scraper:
Expand Down
70 changes: 1 addition & 69 deletions scraper/visualize.py
Original file line number Diff line number Diff line change
@@ -1,80 +1,12 @@
from dataclasses import dataclass, field
from typing import Iterable, Iterator
import plotly.graph_objs as go
from datetime import datetime
import re

from scraper.models import Datapoint, MasterProduct, Product
from scraper import Filemanager
from scraper.constants import WEBSITE_COLORS


@dataclass
class Datapoint:
date: str
price: float


@dataclass
class Product:
product_name: str
category: str
url: str
id: str
currency: str
website: str
datapoints: list[Datapoint]
is_up_to_date: bool

def get_all_dates(self) -> list[str]:
return [datapoint.date for datapoint in self.datapoints]

def get_all_prices(self) -> list[float]:
return [datapoint.price for datapoint in self.datapoints]

def to_string_format(self, format: str) -> str:
"""Return a string representing the product, controlled by an explicit format string.
>>> p = Product("ASUS RTX 4090", "GPU", "https://www.example.com/", "123", "USD", "example", [datepoints], True)
>>> p.to_string_format("Name: %name, Category: %category, URL: %url, ID: %id, Website: %website")
'Name: ASUS RTX 4090, Category: GPU, URL: https://www.example.com/, ID: 123, Website: example'
"""
# inspiration from https://docs.python.org/3/library/re.html#writing-a-tokenizer
token_specification = [
("NAME", r"(%name)"),
("CATEGORY", r"(%category)"),
("URL", r"(%url)"),
("ID", r"(%id)"),
("CURRENCY", r"(%currency)"),
("WEBSITE", r"(%website)"),
]
format_to = {
"NAME": self.product_name,
"CATEGORY": self.category,
"URL": self.url,
"ID": self.id,
"CURRENCY": self.currency,
"WEBSITE": self.website,
}

tok_regex = "|".join("(?P<%s>%s)" % pair for pair in token_specification)
new_string = format

for mo in re.finditer(tok_regex, format):
kind = mo.lastgroup
value = mo.group()

new_string = new_string.replace(value, format_to[kind], 1)

return new_string


@dataclass
class MasterProduct:
product_name: str
category: str
products: list[Product] = field(default_factory=list)


def visualize_data(
show_all: bool, categories: list[str], ids: list[str], names: list[str], only_up_to_date: bool, compare: bool
) -> None:
Expand Down

0 comments on commit 2ce1afc

Please sign in to comment.