restructure backend
This commit is contained in:
65
pythonScripts/download_baseset.py
Normal file
65
pythonScripts/download_baseset.py
Normal file
@@ -0,0 +1,65 @@
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
HTML_FILE = "baseset.html"
|
||||
DOWNLOAD_FOLDER = "baseset"
|
||||
|
||||
SET_MAP = {
|
||||
"Basis-Set": 1,
|
||||
}
|
||||
|
||||
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
|
||||
|
||||
with open(HTML_FILE, "r", encoding="utf-8") as f:
|
||||
soup = BeautifulSoup(f, "html.parser")
|
||||
|
||||
entries = []
|
||||
|
||||
for a in soup.find_all("a"):
|
||||
url = a.get("href", "")
|
||||
title = a.get("data-elementor-lightbox-title")
|
||||
|
||||
if not title or not url.lower().endswith(".jpg"):
|
||||
continue
|
||||
|
||||
# We only want the original (no -427x600 etc)
|
||||
if re.search(r"-\d+x\d+\.jpg$", url.lower()):
|
||||
continue
|
||||
|
||||
# Parse title: "Abra 43/102 - Basis-Set"
|
||||
m = re.match(r"(.+)\s+(\d+)\/(\d+)\s*-\s*(.+)", title.strip())
|
||||
if not m:
|
||||
print(f"Skipping unmatched title format: {title}")
|
||||
continue
|
||||
|
||||
name, card, total, set_name = m.groups()
|
||||
card = int(card)
|
||||
|
||||
if set_name not in SET_MAP:
|
||||
print(f"Unknown set: {set_name}, please map it.")
|
||||
continue
|
||||
|
||||
set_num = SET_MAP[set_name]
|
||||
new_filename = f"base{set_num}-{card}.jpg"
|
||||
|
||||
entries.append((url, new_filename))
|
||||
|
||||
print(f"Found {len(entries)} images to download.")
|
||||
|
||||
for url, filename in entries:
|
||||
filepath = os.path.join(DOWNLOAD_FOLDER, filename)
|
||||
print(f"Downloading {filename} from {url}")
|
||||
|
||||
try:
|
||||
r = requests.get(url, timeout=10)
|
||||
r.raise_for_status()
|
||||
except Exception as e:
|
||||
print(f" Failed: {e}")
|
||||
continue
|
||||
|
||||
with open(filepath, "wb") as f:
|
||||
f.write(r.content)
|
||||
|
||||
print("Done!")
|
||||
Reference in New Issue
Block a user