restructure backend

This commit is contained in:
2026-01-16 18:07:23 +01:00
parent ebcc33b1f7
commit cd9d2e9900
21 changed files with 4323 additions and 69 deletions

1
.gitignore vendored
View File

@@ -34,3 +34,4 @@ report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json
.DS_Store
**/venv
images

View File

@@ -1,15 +1,43 @@
# desktop
# Bun Server
To install dependencies:
Install dependencies:
```bash
bun install
```
To run:
Run:
```bash
bun run index.ts
bun run src/index.ts
```
This project was created using `bun init` in bun v1.3.5. [Bun](https://bun.com) is a fast all-in-one JavaScript runtime.
# Python Service
Run:
```bash
venv/Scripts/Activate.ps1
```
```bash
python python_service/app.py
```
# BACKEND DATA GENERATION
Update index / embeddings / ids for (new) cards:
1. update cards in `\images\cards`
2. activate venv:
```bash
venv/Scripts/Activate.ps1
```
3. run python script:
```bash
python encodeImages.py
```
4. great success

View File

@@ -5,7 +5,7 @@
"": {
"name": "desktop",
"devDependencies": {
"@types/bun": "latest",
"@types/bun": "^1.3.6",
},
"peerDependencies": {
"typescript": "^5",
@@ -13,11 +13,11 @@
},
},
"packages": {
"@types/bun": ["@types/bun@1.3.5", "", { "dependencies": { "bun-types": "1.3.5" } }, "sha512-RnygCqNrd3srIPEWBd5LFeUYG7plCoH2Yw9WaZGyNmdTEei+gWaHqydbaIRkIkcbXwhBT94q78QljxN0Sk838w=="],
"@types/bun": ["@types/bun@1.3.6", "", { "dependencies": { "bun-types": "1.3.6" } }, "sha512-uWCv6FO/8LcpREhenN1d1b6fcspAB+cefwD7uti8C8VffIv0Um08TKMn98FynpTiU38+y2dUO55T11NgDt8VAA=="],
"@types/node": ["@types/node@25.0.3", "", { "dependencies": { "undici-types": "~7.16.0" } }, "sha512-W609buLVRVmeW693xKfzHeIV6nJGGz98uCPfeXI1ELMLXVeKYZ9m15fAMSaUPBHYLGFsVRcMmSCksQOrZV9BYA=="],
"bun-types": ["bun-types@1.3.5", "", { "dependencies": { "@types/node": "*" } }, "sha512-inmAYe2PFLs0SUbFOWSVD24sg1jFlMPxOjOSSCYqUgn4Hsc3rDc7dFvfVYjFPNHtov6kgUeulV4SxbuIV/stPw=="],
"bun-types": ["bun-types@1.3.6", "", { "dependencies": { "@types/node": "*" } }, "sha512-OlFwHcnNV99r//9v5IIOgQ9Uk37gZqrNMCcqEaExdkVq3Avwqok1bJFmvGMCkCE0FqzdY8VMOZpfpR3lwI+CsQ=="],
"typescript": ["typescript@5.9.3", "", { "bin": { "tsc": "bin/tsc", "tsserver": "bin/tsserver" } }, "sha512-jl1vZzPDinLr9eUt3J/t7V6FgNEw9QjvBPdysz9KfQDD41fQrC2Y4vKQdiaUpFT4bXlb1RHhLpp8wtm6M5TgSw=="],

15
docker-compose.yml Normal file
View File

@@ -0,0 +1,15 @@
version: "3.9"
services:
pkmtcg-backend:
build: .
container_name: pkmtcg-backend
ports:
- "3333:3000"
environment:
IMAGES_DIR: /images/cards
FAISS_DIR: /faiss
volumes:
- ./data/images:/images/cards # host ./data/images => container /images/cards
- ./data/faiss:/faiss # host ./data/faiss => container /faiss
restart: unless-stopped

View File

@@ -11,14 +11,22 @@ RUN apt-get update && apt-get install -y \
RUN curl -fsSL https://bun.sh/install | bash
ENV PATH="/root/.bun/bin:${PATH}"
# --- Upgrade pip ---
RUN pip3 install --upgrade pip
# --- Install Python dependencies ---
RUN pip3 install --upgrade pip
RUN pip3 install torch --index-url https://download.pytorch.org/whl/cpu
RUN pip3 install --no-cache-dir fastapi uvicorn pillow open-clip-torch numpy faiss-cpu python-multipart
RUN pip3 install fastapi uvicorn pillow torch open-clip-torch numpy faiss-cpu python-multipart
# --- Set working directory ---
WORKDIR /app
# --- Create directories for external data ---
RUN mkdir -p /images/cards /faiss
# --- Copy project files ---
WORKDIR /app
COPY python_service ./python_service
COPY python_service/app.py ./python_service/app.py
COPY src ./src
COPY package.json .
COPY bun.lock .
@@ -26,6 +34,10 @@ COPY bun.lock .
# --- Expose ports ---
EXPOSE 3000
# --- Environment variables for external paths ---
ENV IMAGES_DIR=/images/cards
ENV FAISS_DIR=/faiss
# --- Start services ---
# Use & to run Python worker in background, Bun frontend as main process
# Python FAISS worker runs in background, Bun frontend as main process
CMD python3 python_service/app.py & bun run src/index.ts

71
encodeImages.py Normal file
View File

@@ -0,0 +1,71 @@
import os
import numpy as np
import torch
import open_clip
import faiss
from PIL import Image
CARDS_FOLDER = "/images/cards"
EMBEDDINGS_FILE = "/pythonService/embeddings.npy"
IDS_FILE = "/pythonService/ids.npy"
FAISS_FILE = "/pythonService/card_index.faiss"
device = "cuda" if torch.cuda.is_available() else "cpu"
model, _, preprocess = open_clip.create_model_and_transforms(
'ViT-L-14', pretrained='laion2b_s32b_b82k'
)
model = model.to(device).eval()
# ---- load existing or initialize ----
if os.path.exists(FAISS_FILE):
print("Loading existing FAISS index...")
index = faiss.read_index(FAISS_FILE)
embeddings = np.load(EMBEDDINGS_FILE)
ids = np.load(IDS_FILE)
else:
print("Creating new FAISS index...")
embeddings = np.zeros((0, 1024), dtype='float32') # 1024 for ViT-L-14
ids = np.array([], dtype='<U100')
index = faiss.IndexFlatIP(1024)
# ---- find images not yet processed ----
existing_ids = set(ids.tolist())
new_files = [
f for f in os.listdir(CARDS_FOLDER)
if f.lower().endswith((".png", ".jpg")) and f.rsplit(".", 1)[0] not in existing_ids
]
print(f"Found {len(new_files)} new cards to add")
new_embeddings = []
new_ids = []
for fname in new_files:
path = os.path.join(CARDS_FOLDER, fname)
img = Image.open(path).convert("RGB")
with torch.no_grad():
emb = model.encode_image(preprocess(img).unsqueeze(0).to(device))
new_embeddings.append(emb.cpu().numpy())
new_ids.append(fname.rsplit(".", 1)[0])
print("Encoded:", fname)
if len(new_embeddings) > 0:
new_embeddings = np.vstack(new_embeddings).astype('float32')
faiss.normalize_L2(new_embeddings)
# add to FAISS
index.add(new_embeddings)
# append to numpy arrays
embeddings = np.vstack([embeddings, new_embeddings])
ids = np.concatenate([ids, np.array(new_ids)])
# save everything
np.save(EMBEDDINGS_FILE, embeddings)
np.save(IDS_FILE, ids)
faiss.write_index(index, FAISS_FILE)
print(f"Added {len(new_files)} cards. Total now:", index.ntotal)
else:
print("No new cards found — nothing to update.")

3933
name.txt Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -7,7 +7,7 @@
"dev": "bun run src/index.ts"
},
"devDependencies": {
"@types/bun": "latest"
"@types/bun": "^1.3.6"
},
"peerDependencies": {
"typescript": "^5"

View File

@@ -0,0 +1,61 @@
import os
import numpy as np
from PIL import Image
import torch
import open_clip
import faiss
# --- Configuration ---
CARDS_FOLDER = "cards_old"
EMBEDDINGS_FILE = "embeddings.npy"
IDS_FILE = "ids.npy"
FAISS_INDEX_FILE = "card_index.faiss"
# --- Device ---
device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)
# --- Load CLIP model ---
model, _, preprocess = open_clip.create_model_and_transforms(
'ViT-L-14', pretrained='laion2b_s32b_b82k'
)
model = model.to(device).eval()
# --- Helper: encode image ---
def encode_image(path):
img = Image.open(path).convert("RGB")
with torch.no_grad():
emb = model.encode_image(preprocess(img).unsqueeze(0).to(device))
return emb.cpu().numpy()
# --- Build embeddings ---
embeddings = []
ids = []
for fname in os.listdir(CARDS_FOLDER):
if fname.lower().endswith((".jpg", ".png")):
path = os.path.join(CARDS_FOLDER, fname)
emb = encode_image(path)
embeddings.append(emb)
ids.append(fname)
print("Encoded:", fname)
embeddings = np.vstack(embeddings)
# --- Save embeddings & IDs ---
np.save(EMBEDDINGS_FILE, embeddings)
np.save(IDS_FILE, np.array(ids))
print("Saved embeddings and IDs.")
# --- Normalize embeddings ---
faiss.normalize_L2(embeddings)
# --- Build FAISS index ---
d = embeddings.shape[1] # embedding dimension
index = faiss.IndexFlatIP(d) # inner product = cosine similarity
index.add(embeddings)
print("FAISS index built with", index.ntotal, "cards.")
# --- Save FAISS index ---
faiss.write_index(index, FAISS_INDEX_FILE)
print("FAISS index saved:", FAISS_INDEX_FILE)

View File

@@ -0,0 +1,65 @@
import os
import re
import requests
from bs4 import BeautifulSoup
HTML_FILE = "baseset.html"
DOWNLOAD_FOLDER = "baseset"
SET_MAP = {
"Basis-Set": 1,
}
os.makedirs(DOWNLOAD_FOLDER, exist_ok=True)
with open(HTML_FILE, "r", encoding="utf-8") as f:
soup = BeautifulSoup(f, "html.parser")
entries = []
for a in soup.find_all("a"):
url = a.get("href", "")
title = a.get("data-elementor-lightbox-title")
if not title or not url.lower().endswith(".jpg"):
continue
# We only want the original (no -427x600 etc)
if re.search(r"-\d+x\d+\.jpg$", url.lower()):
continue
# Parse title: "Abra 43/102 - Basis-Set"
m = re.match(r"(.+)\s+(\d+)\/(\d+)\s*-\s*(.+)", title.strip())
if not m:
print(f"Skipping unmatched title format: {title}")
continue
name, card, total, set_name = m.groups()
card = int(card)
if set_name not in SET_MAP:
print(f"Unknown set: {set_name}, please map it.")
continue
set_num = SET_MAP[set_name]
new_filename = f"base{set_num}-{card}.jpg"
entries.append((url, new_filename))
print(f"Found {len(entries)} images to download.")
for url, filename in entries:
filepath = os.path.join(DOWNLOAD_FOLDER, filename)
print(f"Downloading {filename} from {url}")
try:
r = requests.get(url, timeout=10)
r.raise_for_status()
except Exception as e:
print(f" Failed: {e}")
continue
with open(filepath, "wb") as f:
f.write(r.content)
print("Done!")

View File

@@ -0,0 +1,56 @@
import os
import requests
from time import sleep
# --- Configuration ---
TCGDEX_API = "https://api.tcgdex.net/v2/de/cards"
OUTPUT_FOLDER = "cards"
REQUEST_DELAY = 0.1 # seconds between requests to avoid rate limiting
# Create output folder if not exists
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
# Fetch card list from TCGdex
print("Fetching card list...")
resp = requests.get(TCGDEX_API)
if resp.status_code != 200:
raise Exception(f"Failed to fetch card list: {resp.status_code}")
cards = resp.json()
print(f"Total cards fetched: {len(cards)}")
# Download each card image
for card in cards:
card_id = card.get("id", None)
image_base = card.get("image", None)
if not card_id:
print("Skipping card with missing ID:", card)
continue
if not image_base:
print(f"No image URL for {card_id}, skipping...")
continue
image_url = image_base + "/high.png"
output_path = os.path.join(OUTPUT_FOLDER, f"{card_id}.png")
# Skip if already downloaded
if os.path.exists(output_path):
print(f"Already exists: {card_id}")
continue
try:
r = requests.get(image_url, stream=True)
if r.status_code == 200:
with open(output_path, "wb") as f:
for chunk in r.iter_content(1024):
f.write(chunk)
print(f"Downloaded: {card_id}")
else:
print(f"Failed to download {card_id}: HTTP {r.status_code}")
except Exception as e:
print(f"Error downloading {card_id}: {e}")
sleep(REQUEST_DELAY) # small delay to be polite
print("All done!")

View File

@@ -0,0 +1,37 @@
import os
import requests
from time import sleep
# --- Configuration ---
TCGDEX_API = "https://api.tcgdex.net/v2/de/cards"
OUTPUT_FOLDER = "names"
REQUEST_DELAY = 0.1 # seconds between requests to avoid rate limiting
# Create output folder if not exists
os.makedirs(OUTPUT_FOLDER, exist_ok=True)
# Fetch card list from TCGdex
print("Fetching card list...")
resp = requests.get(TCGDEX_API)
if resp.status_code != 200:
raise Exception(f"Failed to fetch card list: {resp.status_code}")
cards = resp.json()
print(f"Total cards fetched: {len(cards)}")
names = set() # using a set avoids duplicates automatically
for card in cards:
card_name = card.get("name")
if not card_name:
print("Skipping card with missing name:", card)
continue
if "" in card_name:
continue
names.add(card_name) # set ignores duplicates
output_path = os.path.join(OUTPUT_FOLDER, "name.txt")
with open(output_path, "w", encoding="utf-8") as f:
for name in names:
f.write("'" + name + "',\n")
print(f"Wrote {len(names)} unique names to {output_path}")

View File

@@ -23,7 +23,7 @@ BASE = os.path.dirname(os.path.abspath(__file__))
FAISS_INDEX_FILE = os.path.join(BASE, "card_index.faiss")
EMBEDDINGS_FILE = os.path.join(BASE, "embeddings.npy")
IDS_FILE = os.path.join(BASE, "ids.npy")
TOP_K = 5
TOP_K = 3
# --- Load CLIP model ---
device = "cuda" if torch.cuda.is_available() else "cpu"

18
src/api.ts Normal file
View File

@@ -0,0 +1,18 @@
export async function getImage(req: Bun.BunRequest<"/api/cards/:id">) {
const { id } = req.params;
let file = Bun.file(`images/cards/${id}.png`);
let type = "png";
if (!(await file.exists())) {
file = Bun.file(`images/cards/${id}.jpg`);
type = "jpg";
if (!(await file.exists())) {
file = Bun.file("images/cards/placeholder.png");
type = "png";
console.error(`File for image ${id} does not exist, serving placeholder`);
}
}
return new Response(file, { headers: { "Content-Type": `image/${type}` }, status: 200 });
}

View File

@@ -1,21 +0,0 @@
import type { Card } from "./types";
// Placeholder cards array
let cards: Card[] = [
{ id: "swsh1-1", name: "Celebi V", set: "Swsh1", number: "1", imageUrl: "https://assets.tcgdex.net/de/swsh/swsh1/1/high.png" },
{ id: "swsh12-001", name: "Bluzuk", set: "Swsh12", number: "001", imageUrl: "https://assets.tcgdex.net/de/swsh/swsh12/001/high.png" },
];
export function loadCards() {
// Placeholder: you can later load embeddings.npy + FAISS
console.log("Cards module loaded (currently empty)");
}
export function queryCardById(id: string): Card | null {
return cards.find(c => c.id === id) || null;
}
// Example placeholder: return top N matches
export function queryCardByEmbedding(/* embedding */): Card[] {
return cards.slice(0, 5); // dummy top 5
}

View File

@@ -1,6 +1,5 @@
import { serve, spawn } from 'bun';
import { queryCardByEmbedding, queryCardById } from './embeddings';
import type { Card } from './types';
import { getImage } from './api';
const PYTHON_SERVICE = "http://localhost:5001/query";
@@ -27,29 +26,18 @@ const server = serve({
}
},
},
"/api/cards/:id": async (req) => {
const { id } = req.params;
const card = queryCardById(id);
if (!card) return new Response(JSON.stringify({ error: "Card not found" }), { status: 404, headers: { "Content-Type": "application/json" } });
return new Response(JSON.stringify(card), { headers: { "Content-Type": "application/json" } });
"/api/cards/image/:id": {
async GET(req) {
return getImage(req);
/* const { id } = req.params;
const card = queryCardById(id);
if (!card) return new Response(JSON.stringify({ error: "Card not found" }), { status: 404, headers: { "Content-Type": "application/json" } });
return new Response(JSON.stringify(card), { headers: { "Content-Type": "application/json" } }); */
}
},
"/*": async () => {
return new Response("<h1>Pokemon Card Backend</h1>", { headers: { "Content-Type": "text/html" } });
},
/* "/api/cards/query": {
async POST(req) {
try {
const { embedding } = await req.json() as { embedding: number[] };
if (!embedding) return new Response(JSON.stringify({ error: "Missing embedding" }), { status: 400, headers: { "Content-Type": "application/json" } });
const results: Card[] = queryCardByEmbedding(embedding);
return new Response(JSON.stringify(results), { headers: { "Content-Type": "application/json" } });
} catch (err) {
console.error("Error querying card:", err);
return new Response(JSON.stringify({ error: "Failed to query card" }), { status: 500, headers: { "Content-Type": "application/json" } });
}
},
}, */
},
development: process.env.NODE_ENV !== 'production' && {
@@ -61,4 +49,4 @@ const server = serve({
},
});
console.log(`🚀 Server running at ${server.url}`);
console.log(`Server running at ${server.url}`);

View File

@@ -1,10 +0,0 @@
export interface Card {
id: string;
name: string;
set: string;
number: string;
rarity?: string;
variant?: string;
foil?: boolean;
imageUrl: string;
}

View File

@@ -1,7 +1,7 @@
{
"compilerOptions": {
// Environment setup & latest features
"lib": ["ESNext"],
"lib": ["ESNext", "DOM"],
"target": "ESNext",
"module": "Preserve",
"moduleDetection": "force",