import os import re import requests from bs4 import BeautifulSoup HTML_FILE = "baseset.html" DOWNLOAD_FOLDER = "baseset" SET_MAP = { "Basis-Set": 1, } os.makedirs(DOWNLOAD_FOLDER, exist_ok=True) with open(HTML_FILE, "r", encoding="utf-8") as f: soup = BeautifulSoup(f, "html.parser") entries = [] for a in soup.find_all("a"): url = a.get("href", "") title = a.get("data-elementor-lightbox-title") if not title or not url.lower().endswith(".jpg"): continue # We only want the original (no -427x600 etc) if re.search(r"-\d+x\d+\.jpg$", url.lower()): continue # Parse title: "Abra 43/102 - Basis-Set" m = re.match(r"(.+)\s+(\d+)\/(\d+)\s*-\s*(.+)", title.strip()) if not m: print(f"Skipping unmatched title format: {title}") continue name, card, total, set_name = m.groups() card = int(card) if set_name not in SET_MAP: print(f"Unknown set: {set_name}, please map it.") continue set_num = SET_MAP[set_name] new_filename = f"base{set_num}-{card}.jpg" entries.append((url, new_filename)) print(f"Found {len(entries)} images to download.") for url, filename in entries: filepath = os.path.join(DOWNLOAD_FOLDER, filename) print(f"Downloading {filename} from {url}") try: r = requests.get(url, timeout=10) r.raise_for_status() except Exception as e: print(f" Failed: {e}") continue with open(filepath, "wb") as f: f.write(r.content) print("Done!")