Files
pkmntcg-backend/pythonScripts/download_baseset.py
2026-01-16 18:07:23 +01:00

66 lines
1.5 KiB
Python

import os
import re
import requests
from bs4 import BeautifulSoup
HTML_FILE = "baseset.html"
DOWNLOAD_FOLDER = "baseset"
SET_MAP = {
"Basis-Set": 1,
}
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
with open(HTML_FILE, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
entries = []
for a in soup.find_all("a"):
url = a.get("href", "")
title = a.get("data-elementor-lightbox-title")
if not title or not url.lower().endswith(".jpg"):
continue
# We only want the original (no -427x600 etc)
if re.search(r"-\d+x\d+\.jpg$", url.lower()):
continue
# Parse title: "Abra 43/102 - Basis-Set"
m = re.match(r"(.+)\s+(\d+)\/(\d+)\s*-\s*(.+)", title.strip())
if not m:
print(f"Skipping unmatched title format: {title}")
continue
name, card, total, set_name = m.groups()
card = int(card)
if set_name not in SET_MAP:
print(f"Unknown set: {set_name}, please map it.")
continue
set_num = SET_MAP[set_name]
new_filename = f"base{set_num}-{card}.jpg"
entries.append((url, new_filename))
print(f"Found {len(entries)} images to download.")
for url, filename in entries:
filepath = os.path.join(DOWNLOAD_FOLDER, filename)
print(f"Downloading {filename} from {url}")
try:
r = requests.get(url, timeout=10)
r.raise_for_status()
except Exception as e:
print(f" Failed: {e}")
continue
with open(filepath, "wb") as f:
f.write(r.content)
print("Done!")