66 lines
1.5 KiB
Python
66 lines
1.5 KiB
Python
import os
|
|
import re
|
|
import requests
|
|
from bs4 import BeautifulSoup
|
|
|
|
HTML_FILE = "baseset.html"
|
|
DOWNLOAD_FOLDER = "baseset"
|
|
|
|
SET_MAP = {
|
|
"Basis-Set": 1,
|
|
}
|
|
|
|
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
|
|
|
|
with open(HTML_FILE, "r", encoding="utf-8") as f:
|
|
soup = BeautifulSoup(f, "html.parser")
|
|
|
|
entries = []
|
|
|
|
for a in soup.find_all("a"):
|
|
url = a.get("href", "")
|
|
title = a.get("data-elementor-lightbox-title")
|
|
|
|
if not title or not url.lower().endswith(".jpg"):
|
|
continue
|
|
|
|
# We only want the original (no -427x600 etc)
|
|
if re.search(r"-\d+x\d+\.jpg$", url.lower()):
|
|
continue
|
|
|
|
# Parse title: "Abra 43/102 - Basis-Set"
|
|
m = re.match(r"(.+)\s+(\d+)\/(\d+)\s*-\s*(.+)", title.strip())
|
|
if not m:
|
|
print(f"Skipping unmatched title format: {title}")
|
|
continue
|
|
|
|
name, card, total, set_name = m.groups()
|
|
card = int(card)
|
|
|
|
if set_name not in SET_MAP:
|
|
print(f"Unknown set: {set_name}, please map it.")
|
|
continue
|
|
|
|
set_num = SET_MAP[set_name]
|
|
new_filename = f"base{set_num}-{card}.jpg"
|
|
|
|
entries.append((url, new_filename))
|
|
|
|
print(f"Found {len(entries)} images to download.")
|
|
|
|
for url, filename in entries:
|
|
filepath = os.path.join(DOWNLOAD_FOLDER, filename)
|
|
print(f"Downloading {filename} from {url}")
|
|
|
|
try:
|
|
r = requests.get(url, timeout=10)
|
|
r.raise_for_status()
|
|
except Exception as e:
|
|
print(f" Failed: {e}")
|
|
continue
|
|
|
|
with open(filepath, "wb") as f:
|
|
f.write(r.content)
|
|
|
|
print("Done!")
|