-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoddslooker.py
101 lines (82 loc) · 3.38 KB
/
oddslooker.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from tqdm import tqdm # Barre de progression
import time
import re
# URL de base
BASE_URL = "https://www.oddsportal.com"
# Liste des URL cibles
target_urls = [
"/football/europe/",
"/football/france/",
"/football/england/",
"/football/germany/",
"/football/italy/",
"/football/spain/",
"/football/portugal/",
"/football/netherlands/",
"/football/belgium/",
"/football/russia/",
"/football/turkey/",
"/football/ukraine/",
"/football/greece/",
"/football/scotland/",
"/football/austria/",
]
# Configurer Selenium WebDriver
driver = webdriver.Chrome() # Remplacez par le driver correspondant à votre navigateur
# Fonction pour attendre les éléments visibles
def wait_for_element(xpath, timeout=10):
try:
return WebDriverWait(driver, timeout).until(EC.presence_of_element_located((By.XPATH, xpath)))
except:
print(f"Element not found: {xpath}")
return None
# Fonction pour récupérer les liens des leagues
def get_leagues(target_url):
url = BASE_URL + target_url
driver.get(url)
time.sleep(1) # Attendre que la page charge complètement
links = driver.find_elements(By.XPATH, "//a[contains(@href, '/football/')]")
leagues = list(set(link.get_attribute("href") for link in links))
return leagues
# Fonction pour filtrer les liens de matchs
def filter_match_links(links):
match_pattern = re.compile(r"^https://www.oddsportal.com/football/.+/[a-z0-9\-]+-[A-Za-z0-9]{8}/$")
return [link for link in links if match_pattern.match(link)]
# Fichier de sortie
output_file = "list_of_matches.txt"
# Ensemble pour éviter les doublons
unique_match_links = set()
# Ouvrir une barre de progression pour les cibles
with tqdm(total=len(target_urls), desc="Processing target URLs", unit="target") as target_pbar:
for target_url in target_urls:
print(f"Fetching leagues for {target_url}...")
leagues = get_leagues(target_url)
if not leagues:
target_pbar.update(1)
continue
print(f"Leagues found: {len(leagues)}")
# Barre de progression pour les leagues
with tqdm(total=len(leagues), desc=f"Fetching matches in {target_url}", unit="league") as league_pbar:
for league in leagues:
print(f"Fetching matches for league: {league}")
driver.get(league)
time.sleep(1)
# Extraire les liens des matchs
links = driver.find_elements(By.XPATH, "//a[contains(@href, '/football/')]")
match_links = filter_match_links([link.get_attribute("href") for link in links])
print(f"Matches found: {len(match_links)}")
# Ajouter les liens uniques au set
unique_match_links.update(match_links)
league_pbar.update(1) # Mettre à jour la barre pour les leagues
target_pbar.update(1) # Mettre à jour la barre pour les cibles
# Enregistrer les liens uniques dans le fichier
with open(output_file, "w", encoding="utf-8") as file:
for match_link in unique_match_links:
file.write(match_link + "\n")
# Fermer le navigateur
driver.quit()
print(f"All unique match links have been saved to {output_file}")