This commit is contained in:
2026-04-01 18:31:33 +02:00
parent 6e23e32bb0
commit b5c7c5305a
95 changed files with 9609 additions and 2374 deletions

0
backend/ai/__init__.py Normal file
View File

176
backend/ai/card_pick_nn.py Normal file
View File

@@ -0,0 +1,176 @@
import os
import numpy as np
from ai.nn import NeuralNet, _softmax
# Separate weights file so this NN trains independently from the plan NN.
CARD_PICK_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "card_pick_weights.json")
N_CARD_FEATURES = 15
# Normalization constants — chosen to cover the realistic stat range for generated cards.
_MAX_ATK = 50.0
_MAX_DEF = 100.0
def _precompute_static_features(allowed: list) -> np.ndarray:
"""
Vectorized precomputation of the 7 per-card static features for the whole pool.
Returns (n, 7) float32. Called once per choose_cards() invocation.
"""
n = len(allowed)
atk = np.array([c.attack for c in allowed], dtype=np.float32)
defn = np.array([c.defense for c in allowed], dtype=np.float32)
cost = np.array([c.cost for c in allowed], dtype=np.float32)
rar = np.array([c.card_rarity.value for c in allowed], dtype=np.float32)
typ = np.array([c.card_type.value for c in allowed], dtype=np.float32)
exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
total = atk + defn
atk_ratio = np.where(total > 0, atk / total, 0.5)
pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
out = np.empty((n, 7), dtype=np.float32)
out[:, 0] = atk / _MAX_ATK
out[:, 1] = defn / _MAX_DEF
out[:, 2] = cost / 10.0
out[:, 3] = rar / 5.0
out[:, 4] = atk_ratio
out[:, 5] = pcv_norm
out[:, 6] = typ / 9.0
return out
class CardPickPlayer:
"""
Uses a NeuralNet to sequentially select cards from a pool until the cost
budget is exhausted. API mirrors NeuralPlayer so training code stays uniform.
In training mode: samples stochastically (softmax) and records the
trajectory for a REINFORCE update after the game ends.
In inference mode: picks the highest-scoring affordable card at each step.
Performance design:
- Static per-card features (7) are computed once via vectorized numpy.
- Context features (8) use running totals updated by O(1) increments.
- Picked cards are tracked with a boolean mask; no list.remove() calls.
- Each pick step does one small forward pass over the affordable subset only.
"""
def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
self.net = net
self.training = training
self.temperature = temperature
self.trajectory: list[tuple[np.ndarray, int]] = [] # (features_matrix, chosen_idx)
def choose_cards(self, allowed: list, difficulty: int) -> list:
"""
allowed: pre-filtered list of Card objects (cost ≤ max_card_cost already applied).
Returns the selected deck as a list of Cards.
"""
BUDGET = 50
n = len(allowed)
static = _precompute_static_features(allowed) # (n, 7) — computed once
costs = np.array([c.cost for c in allowed], dtype=np.float32)
picked = np.zeros(n, dtype=bool)
budget_remaining = BUDGET
selected: list = []
# Running totals for context features — incremented O(1) per pick.
n_picked = 0
sum_atk = 0.0
sum_def = 0.0
sum_cost = 0.0
n_cheap = 0 # cost ≤ 3
n_high = 0 # cost ≥ 6
diff_norm = difficulty / 10.0
while True:
mask = (~picked) & (costs <= budget_remaining)
if not mask.any():
break
idxs = np.where(mask)[0]
# Context row — same for every candidate this step, broadcast via tile.
if n_picked > 0:
ctx = np.array([
n_picked / 30.0,
budget_remaining / 50.0,
sum_atk / n_picked / _MAX_ATK,
sum_def / n_picked / _MAX_DEF,
sum_cost / n_picked / 10.0,
n_cheap / n_picked,
n_high / n_picked,
diff_norm,
], dtype=np.float32)
else:
ctx = np.array([
0.0, budget_remaining / 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, diff_norm,
], dtype=np.float32)
features = np.concatenate(
[static[idxs], np.tile(ctx, (len(idxs), 1))],
axis=1,
)
scores = self.net.forward(features)
if self.training:
probs = _softmax((scores / self.temperature).astype(np.float64))
probs = np.clip(probs, 1e-10, None)
probs /= probs.sum()
local_idx = int(np.random.choice(len(idxs), p=probs))
self.trajectory.append((features, local_idx))
else:
local_idx = int(np.argmax(scores))
global_idx = idxs[local_idx]
card = allowed[global_idx]
picked[global_idx] = True
selected.append(card)
# Incremental context update — O(1).
budget_remaining -= card.cost
n_picked += 1
sum_atk += card.attack
sum_def += card.defense
sum_cost += card.cost
if card.cost <= 3: n_cheap += 1
if card.cost >= 6: n_high += 1
return selected
def compute_grads(self, outcome: float) -> tuple[list, list] | None:
"""
REINFORCE gradients averaged over the pick trajectory.
outcome: centered reward (win/loss minus baseline).
Returns (grads_w, grads_b), or None if no picks were made.
"""
if not self.trajectory:
return None
acc_gw = [np.zeros_like(w) for w in self.net.weights]
acc_gb = [np.zeros_like(b) for b in self.net.biases]
for features, chosen_idx in self.trajectory:
scores = self.net.forward(features)
probs = _softmax(scores.astype(np.float64)).astype(np.float32)
upstream = -probs.copy()
upstream[chosen_idx] += 1.0
upstream *= outcome
gw, gb = self.net.backward(upstream)
for i in range(len(acc_gw)):
acc_gw[i] += gw[i]
acc_gb[i] += gb[i]
n = len(self.trajectory)
for i in range(len(acc_gw)):
acc_gw[i] /= n
acc_gb[i] /= n
self.trajectory.clear()
return acc_gw, acc_gb

File diff suppressed because one or more lines are too long

459
backend/ai/engine.py Normal file
View File

@@ -0,0 +1,459 @@
import asyncio
import logging
import os
import random
from dataclasses import dataclass
from enum import Enum
from itertools import combinations, permutations
import numpy as np
from game.card import Card
from game.rules import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE, PlayerState
logger = logging.getLogger("app")
AI_USER_ID = "ai"
class AIPersonality(Enum):
AGGRESSIVE = "aggressive"
DEFENSIVE = "defensive"
BALANCED = "balanced"
GREEDY = "greedy" # prioritizes high cost cards, willing to sacrifice
SWARM = "swarm"
CONTROL = "control"
ARBITRARY = "arbitrary"
JEBRASKA = "jebraska" # trained neural network plan scorer
def get_random_personality() -> AIPersonality:
return random.choice(list(AIPersonality))
def calculate_exact_cost(attack: int, defense: int) -> float:
"""Calculate the exact cost before rounding (matches card.py formula)."""
return min(10.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))
def get_power_curve_value(card) -> float:
"""
Returns how much above the power curve a card is.
Positive values mean the card is a better-than-expected deal for its cost.
"""
exact_cost = calculate_exact_cost(card.attack, card.defense)
return exact_cost - card.cost
def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
BUDGET = 50
if difficulty >= 6:
max_card_cost = difficulty + 1
else:
max_card_cost = 6
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
# Vectorized scoring over all allowed cards at once
atk = np.array([c.attack for c in allowed], dtype=np.float32)
defn = np.array([c.defense for c in allowed], dtype=np.float32)
cost = np.array([c.cost for c in allowed], dtype=np.float32)
exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
cost_norm = cost / max_card_cost
totals = atk + defn
atk_ratio = np.where(totals > 0, atk / totals, 0.5)
def_not_one = np.where(defn != 1, 1.0, 0.0)
if personality == AIPersonality.AGGRESSIVE:
# (1-cost_norm) penalizes expensive cards. High-attack cards are inherently expensive,
# so without this the second pass drifts toward costly cards at higher difficulty,
# shrinking the deck. The bonus grows with max_card_cost and exactly offsets that drift.
scores = 0.50 * atk_ratio + 0.35 * pcv_norm + 0.15 * (1.0 - cost_norm) + 0.10 * def_not_one
elif personality == AIPersonality.DEFENSIVE:
# Small (1-cost_norm) for the same anti-shrinkage reason; lighter because high-defense
# cards don't correlate as strongly with cost as high-attack cards do.
scores = 0.10 * (1.0 - atk_ratio) + 0.80 * pcv_norm + 0.10 * cost_norm
elif personality == AIPersonality.GREEDY:
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
scores = 0.20 * cost_norm + 0.80 * pcv_norm
elif personality == AIPersonality.SWARM:
scores = 0.40 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm + 0.05 * def_not_one
elif personality == AIPersonality.CONTROL:
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
scores = 0.85 * pcv_norm + 0.15 * cost_norm
elif personality == AIPersonality.BALANCED:
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
elif personality == AIPersonality.JEBRASKA:
# Delegate entirely to the card-pick NN; skip the heuristic scoring path.
from ai.card_pick_nn import CardPickPlayer, CARD_PICK_WEIGHTS_PATH
from ai.nn import NeuralNet
if not hasattr(choose_cards, "_card_pick_net"):
choose_cards._card_pick_net = (
NeuralNet.load(CARD_PICK_WEIGHTS_PATH)
if os.path.exists(CARD_PICK_WEIGHTS_PATH) else None
)
net = choose_cards._card_pick_net
if net is not None:
return CardPickPlayer(net, training=False).choose_cards(allowed, difficulty)
# Fall through to BALANCED heuristic if weights aren't trained yet.
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
else: # ARBITRARY
w = 0.09 * difficulty
scores = w * pcv_norm + (1.0 - w) * np.random.random(len(allowed)).astype(np.float32)
# Small noise floor at D10 prevents fully deterministic deck building.
noise = (max(0,12 - difficulty)**2) * 0.008
scores = scores + np.random.normal(0, noise, len(allowed)).astype(np.float32)
order = np.argsort(-scores)
sorted_cards = [allowed[i] for i in order]
early_budget = {
AIPersonality.GREEDY: 20, # cheap cards are sacrifice fodder for big plays
AIPersonality.SWARM: 12,
AIPersonality.AGGRESSIVE: 18, # raised: ensures cheap high-attack fodder regardless of difficulty
AIPersonality.DEFENSIVE: 15, # raised: stable cheap-card base across difficulty levels
AIPersonality.CONTROL: 8,
AIPersonality.BALANCED: 25, # spread the deck across all cost levels
AIPersonality.JEBRASKA: 25, # fallback (no trained weights yet)
AIPersonality.ARBITRARY: 8,
}[personality]
selected: list[Card] = []
total_cost = 0
# First pass: secure early-game cards
cheap_spent = 0
for card in sorted_cards:
if cheap_spent >= early_budget:
break
if card.cost > 3 or total_cost + card.cost > BUDGET:
continue
selected.append(card)
total_cost += card.cost
cheap_spent += card.cost
# Second pass: fill remaining budget greedily by score
taken = {id(c) for c in selected}
for card in sorted_cards:
if total_cost >= BUDGET:
break
if id(card) in taken or total_cost + card.cost > BUDGET:
continue
selected.append(card)
total_cost += card.cost
return selected
@dataclass
class MovePlan:
sacrifice_slots: list[int]
plays: list[tuple] # (CardInstance, board_slot: int)
label: str = ""
def _affordable_subsets(hand, energy, start=0):
"""Yield every subset of cards from hand whose total cost fits within energy."""
yield []
for i in range(start, len(hand)):
card = hand[i]
if card.cost <= energy:
for rest in _affordable_subsets(hand, energy - card.cost, i + 1):
yield [card] + rest
def _plans_for_sacrifice(player, opponent, sacrifice_slots):
"""Generate one plan per affordable card subset for a given sacrifice set."""
board = list(player.board)
energy = player.energy
for slot in sacrifice_slots:
if board[slot] is not None:
board[slot] = None
energy += 1
hand = list(player.hand)
empty_slots = [i for i, c in enumerate(board) if c is None]
en_board = opponent.board
return [
MovePlan(
sacrifice_slots=list(sacrifice_slots),
plays=list(zip(cards, scoring_slots)),
label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
)
for cards in _affordable_subsets(hand, energy)
for scoring_slots in permutations(empty_slots, len(cards))
]
def generate_plans(player, opponent) -> list[MovePlan]:
plans = []
# Sacrifice n board cards
occupied = [s for s in range(BOARD_SIZE) if player.board[s] is not None]
for n in range(len(occupied) + 1):
for slots in combinations(occupied, n):
plans += _plans_for_sacrifice(player, opponent, list(slots))
# Idle: do nothing
plans.append(MovePlan(sacrifice_slots=[], plays=[], label="idle"))
return plans
def score_plans_batch(
plans: list[MovePlan],
player: PlayerState,
opponent: PlayerState,
personality: AIPersonality,
) -> np.ndarray:
n = len(plans)
# Pre-compute PCV for every hand card once
pcv_cache = {
id(c): max(0.0, min(1.0, get_power_curve_value(c)))
for c in player.hand
}
# Build board-state arrays
board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
n_sac = np.zeros(n, dtype=np.float32)
sac_val = np.zeros(n, dtype=np.float32)
play_val = np.zeros(n, dtype=np.float32)
pcv_score = np.full(n, 0.5, dtype=np.float32)
for idx, plan in enumerate(plans):
board = list(player.board)
for slot in plan.sacrifice_slots:
board_slot = board[slot]
if board_slot is not None:
sac_val[idx] += board_slot.cost
board[slot] = None
n_sac[idx] = len(plan.sacrifice_slots)
for card, slot in plan.plays:
board[slot] = card
play_val[idx] += card.cost
for slot in range(BOARD_SIZE):
board_slot = board[slot]
if board_slot is not None:
board_atk[idx, slot] = board_slot.attack
board_occ[idx, slot] = True
if plan.plays:
pcv_vals = [pcv_cache.get(id(c), 0.5) for c, _ in plan.plays]
pcv_score[idx] = sum(pcv_vals) / len(pcv_vals)
# Enemy board — same for every plan
en_atk = np.array([c.attack if c else 0 for c in opponent.board], dtype=np.float32)
en_def = np.array([c.defense if c else 0 for c in opponent.board], dtype=np.float32)
en_occ = np.array([c is not None for c in opponent.board], dtype=np.bool_)
enemy_occupied = int(en_occ.sum())
# --- Metrics (all shape (n,)) ---
direct_damage = (board_atk * ~en_occ).sum(axis=1)
blocking = board_occ & en_occ # (n, 5)
blocking_slots = blocking.sum(axis=1).astype(np.float32)
cards_on_board = board_occ.sum(axis=1).astype(np.float32)
cards_destroyed = ((board_atk >= en_def) & blocking).sum(axis=1).astype(np.float32)
unblocked_in = (en_atk * ~board_occ).sum(axis=1)
atk_score = np.minimum(1.0, direct_damage / max(opponent.life, 1))
block_score = blocking_slots / enemy_occupied if enemy_occupied > 0 else np.ones(n, dtype=np.float32)
open_slots = BOARD_SIZE - enemy_occupied
cover_score = (
(cards_on_board - blocking_slots) / open_slots
if open_slots > 0
else np.ones(n, dtype=np.float32)
)
destroy_score = cards_destroyed / enemy_occupied if enemy_occupied > 0 else np.zeros(n, dtype=np.float32)
threat_score = 1.0 - np.minimum(1.0, unblocked_in / max(player.life, 1))
opp_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
attrition_score = my_cards_left / (my_cards_left + max(opp_cards_left, 1))
net_value = play_val - sac_val
net_value_norm = np.clip((net_value + 10) / 20, 0.0, 1.0)
# --- Sacrifice penalty ---
energy_leftover = player.energy + n_sac - play_val
wasted_energy = np.maximum(0, np.minimum(n_sac, energy_leftover))
wasted_penalty = np.where(n_sac > 0, wasted_energy / np.maximum(n_sac, 1), 0.0)
swap_penalty = np.clip((n_sac - net_value) / np.maximum(n_sac, 1), 0.0, 1.0)
sac_penalty = np.where(n_sac > 0, 0.65 * wasted_penalty + 0.35 * swap_penalty, 0.0)
# --- Personality weights ---
if personality == AIPersonality.AGGRESSIVE:
score = (0.30 * atk_score + 0.07 * block_score + 0.15 * cover_score +
0.08 * net_value_norm + 0.25 * destroy_score +
0.08 * attrition_score + 0.04 * pcv_score + 0.03 * threat_score)
elif personality == AIPersonality.DEFENSIVE:
score = (0.12 * atk_score + 0.20 * block_score + 0.18 * cover_score +
0.04 * net_value_norm + 0.18 * destroy_score +
0.15 * attrition_score + 0.05 * pcv_score + 0.08 * threat_score)
elif personality == AIPersonality.SWARM:
score = (0.25 * atk_score + 0.10 * block_score + 0.35 * cover_score +
0.05 * net_value_norm + 0.05 * destroy_score +
0.10 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
elif personality == AIPersonality.GREEDY:
score = (0.15 * atk_score + 0.05 * block_score + 0.18 * cover_score +
0.38 * net_value_norm + 0.05 * destroy_score +
0.09 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
elif personality == AIPersonality.CONTROL:
score = (0.10 * atk_score + 0.05 * block_score + 0.05 * cover_score +
0.20 * net_value_norm + 0.05 * destroy_score +
0.10 * attrition_score + 0.40 * pcv_score + 0.05 * threat_score)
elif personality == AIPersonality.BALANCED:
score = (0.12 * atk_score + 0.13 * block_score + 0.15 * cover_score +
0.10 * net_value_norm + 0.12 * destroy_score +
0.15 * attrition_score + 0.12 * pcv_score + 0.11 * threat_score)
else: # ARBITRARY
score = (0.50 * np.random.random(n).astype(np.float32) +
0.06 * atk_score + 0.06 * block_score + 0.08 * cover_score +
0.05 * net_value_norm + 0.06 * destroy_score +
0.08 * attrition_score + 0.06 * pcv_score + 0.05 * threat_score)
# --- Context adjustments ---
score = np.where(direct_damage >= opponent.life, np.maximum(score, 0.95), score)
score = np.where(unblocked_in >= player.life, np.minimum(score, 0.05), score)
if opponent.deck_type in ("God Card", "Pantheon"):
score = np.minimum(1.0, score + 0.08 * cover_score)
if opponent.deck_type in ("Aggro", "Rush"):
score = np.minimum(1.0, score + 0.06 * block_score + 0.04 * threat_score)
if opponent.deck_type == "Wall":
score = np.minimum(1.0, score + 0.06 * atk_score)
if opponent.life < STARTING_LIFE * 0.3:
score = np.minimum(1.0, score + 0.06 * atk_score)
if player.life < STARTING_LIFE * 0.3:
score = np.minimum(1.0, score + 0.06 * threat_score + 0.04 * block_score)
if opp_cards_left <= 5:
score = np.where(cards_on_board > 0, np.minimum(1.0, score + 0.05), score)
return np.maximum(0.0, score - sac_penalty)
def choose_plan(player: PlayerState, opponent: PlayerState, personality: AIPersonality, difficulty: int) -> MovePlan:
plans = generate_plans(player, opponent)
if personality == AIPersonality.JEBRASKA:
from ai.nn import NeuralNet
import os
_weights = os.path.join(os.path.dirname(__file__), "nn_weights.json")
if not hasattr(choose_plan, "_neural_net"):
choose_plan._neural_net = NeuralNet.load(_weights) if os.path.exists(_weights) else None
net = choose_plan._neural_net
if net is not None:
from ai.nn import extract_plan_features
scores = net.forward(extract_plan_features(plans, player, opponent))
else: # fallback to BALANCED if weights not found
scores = score_plans_batch(plans, player, opponent, AIPersonality.BALANCED)
else:
scores = score_plans_batch(plans, player, opponent, personality)
noise_scale = ((max(0,12 - difficulty)**2) - 4) * 0.008
noise = np.random.normal(0, noise_scale, len(scores)).astype(np.float32)
return plans[int(np.argmax(scores + noise))]
async def run_ai_turn(game_id: str):
from game.manager import (
active_games, connections, active_deck_ids,
serialize_state, record_game_result, calculate_combat_animation_time
)
state = active_games.get(game_id)
if not state or state.result:
return
if state.active_player_id != AI_USER_ID:
return
human_id = state.opponent_id(AI_USER_ID)
waited = 0
while not connections[game_id].get(human_id) and waited < 10:
await asyncio.sleep(0.5)
waited += 0.5
await asyncio.sleep(calculate_combat_animation_time(state.last_combat_events))
player = state.players[AI_USER_ID]
opponent = state.players[human_id]
difficulty = state.ai_difficulty
personality = (
AIPersonality(state.ai_personality)
if state.ai_personality
else AIPersonality.BALANCED
)
ws = connections[game_id].get(human_id)
async def send_state(s):
if ws:
try:
await ws.send_json({"type": "state", "state": serialize_state(s, human_id)})
except Exception:
pass
async def send_sacrifice_anim(instance_id):
if ws:
try:
await ws.send_json({"type": "sacrifice_animation", "instance_id": instance_id})
except Exception:
pass
best_plan = choose_plan(player, opponent, personality, difficulty)
logger.info(
f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} " +
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
)
for slot in best_plan.sacrifice_slots:
card_slot = player.board[slot]
if card_slot is None:
continue
await send_sacrifice_anim(card_slot.instance_id)
await asyncio.sleep(0.65)
action_sacrifice(state, slot)
await send_state(state)
await asyncio.sleep(0.35)
# Shuffle play order so the AI doesn't always fill slots left-to-right
plays = list(best_plan.plays)
random.shuffle(plays)
for card, slot in plays:
# Re-look up hand index each time (hand shrinks as cards are played)
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
if hand_idx is None:
continue
if player.board[slot] is not None:
continue
if card.cost > player.energy:
continue
action_play_card(state, hand_idx, slot)
await send_state(state)
await asyncio.sleep(0.5)
action_end_turn(state)
await send_state(state)
if state.result:
from core.database import SessionLocal
db = SessionLocal()
try:
record_game_result(state, db)
if ws:
await ws.send_json({
"type": "state",
"state": serialize_state(state, human_id),
})
finally:
db.close()
active_deck_ids.pop(human_id, None)
active_deck_ids.pop(AI_USER_ID, None)
active_games.pop(game_id, None)
connections.pop(game_id, None)
return
if state.active_player_id == AI_USER_ID:
asyncio.create_task(run_ai_turn(game_id))

266
backend/ai/nn.py Normal file
View File

@@ -0,0 +1,266 @@
import json
import numpy as np
# Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15) | opp_deck_type(8)]
N_FEATURES = 64
_DECK_TYPES = ["Balanced", "Aggro", "Wall", "Rush", "Control", "God Card", "Pantheon", "Unplayable"]
_DECK_TYPE_IDX = {dt: i for i, dt in enumerate(_DECK_TYPES)}
_MAX_ATK = 50.0
_MAX_DEF = 100.0
_MAX_DECK = 30.0
def _softmax(x: np.ndarray) -> np.ndarray:
e = np.exp(x - x.max())
return e / e.sum()
class NeuralNet:
"""
Fully-connected plan scorer: n_features → 64 → 32 → 1
Pure numpy so it can be pickled into worker processes.
Optimizer: Adam.
"""
def __init__(self, n_features: int = N_FEATURES, hidden: tuple = (64, 32), seed: int | None = None):
rng = np.random.RandomState(seed)
sizes = [n_features] + list(hidden) + [1]
self.weights: list[np.ndarray] = []
self.biases: list[np.ndarray] = []
self.m_w: list[np.ndarray] = []
self.v_w: list[np.ndarray] = []
self.m_b: list[np.ndarray] = []
self.v_b: list[np.ndarray] = []
self.t = 0
for fan_in, fan_out in zip(sizes, sizes[1:]):
w = rng.randn(fan_in, fan_out).astype(np.float32) * np.sqrt(2.0 / fan_in)
b = np.zeros(fan_out, dtype=np.float32)
self.weights.append(w)
self.biases.append(b)
self.m_w.append(np.zeros_like(w))
self.v_w.append(np.zeros_like(w))
self.m_b.append(np.zeros_like(b))
self.v_b.append(np.zeros_like(b))
self._acts: list[np.ndarray] = []
self._pre_acts: list[np.ndarray] = []
def forward(self, X: np.ndarray) -> np.ndarray:
"""X: (n, n_features) → scores: (n,)"""
h = X.astype(np.float32)
self._acts = [h]
self._pre_acts = []
for i, (W, b) in enumerate(zip(self.weights, self.biases)):
z = h @ W + b
self._pre_acts.append(z)
h = np.maximum(0.0, z) if i < len(self.weights) - 1 else z
self._acts.append(h)
return h.squeeze(-1)
def backward(self, upstream: np.ndarray) -> tuple[list, list]:
"""
upstream: (n,) — dJ/d(scores), gradient for ascent.
Returns (grads_w, grads_b).
"""
n = len(upstream)
delta = upstream[:, None] # (n, 1)
grads_w = [None] * len(self.weights)
grads_b = [None] * len(self.biases)
for i in range(len(self.weights) - 1, -1, -1):
h_in = self._acts[i] # (n, in_size)
grads_w[i] = h_in.T @ delta / n
grads_b[i] = delta.mean(axis=0)
if i > 0:
delta = (delta @ self.weights[i].T) * (self._pre_acts[i - 1] > 0)
return grads_w, grads_b
def adam_update(self, grads_w: list, grads_b: list,
lr: float = 1e-3, beta1: float = 0.9,
beta2: float = 0.999, eps: float = 1e-8,
grad_clip: float = 1.0) -> None:
# Global gradient norm clipping
all_grads = [g for g in grads_w + grads_b if g is not None]
global_norm = np.sqrt(sum(np.sum(g * g) for g in all_grads))
if global_norm > grad_clip:
scale = grad_clip / global_norm
grads_w = [g * scale for g in grads_w]
grads_b = [g * scale for g in grads_b]
self.t += 1
bc1 = 1 - beta1 ** self.t
bc2 = 1 - beta2 ** self.t
for i, (gw, gb) in enumerate(zip(grads_w, grads_b)):
self.m_w[i] = beta1 * self.m_w[i] + (1 - beta1) * gw
self.v_w[i] = beta2 * self.v_w[i] + (1 - beta2) * gw * gw
self.weights[i] += lr * (self.m_w[i] / bc1) / (np.sqrt(self.v_w[i] / bc2) + eps)
self.m_b[i] = beta1 * self.m_b[i] + (1 - beta1) * gb
self.v_b[i] = beta2 * self.v_b[i] + (1 - beta2) * gb * gb
self.biases[i] += lr * (self.m_b[i] / bc1) / (np.sqrt(self.v_b[i] / bc2) + eps)
def save(self, path: str) -> None:
data = {
"weights": [w.tolist() for w in self.weights],
"biases": [b.tolist() for b in self.biases],
"m_w": [m.tolist() for m in self.m_w],
"v_w": [v.tolist() for v in self.v_w],
"m_b": [m.tolist() for m in self.m_b],
"v_b": [v.tolist() for v in self.v_b],
"t": self.t,
}
with open(path, "w") as f:
json.dump(data, f)
@classmethod
def load(cls, path: str) -> "NeuralNet":
with open(path) as f:
data = json.load(f)
net = cls.__new__(cls)
net.weights = [np.array(w, dtype=np.float32) for w in data["weights"]]
net.biases = [np.array(b, dtype=np.float32) for b in data["biases"]]
net.m_w = [np.array(m, dtype=np.float32) for m in data["m_w"]]
net.v_w = [np.array(v, dtype=np.float32) for v in data["v_w"]]
net.m_b = [np.array(m, dtype=np.float32) for m in data["m_b"]]
net.v_b = [np.array(v, dtype=np.float32) for v in data["v_b"]]
net.t = data["t"]
net._acts = []
net._pre_acts = []
return net
def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
"""
Returns (n_plans, N_FEATURES) float32 array.
Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15)]
"""
from game.rules import BOARD_SIZE, HAND_SIZE, MAX_ENERGY_CAP, STARTING_LIFE
n = len(plans)
# state (same for every plan)
state = np.array([
player.life / STARTING_LIFE,
opponent.life / STARTING_LIFE,
player.energy / MAX_ENERGY_CAP,
player.energy_cap / MAX_ENERGY_CAP,
len(player.hand) / HAND_SIZE,
len(opponent.hand) / HAND_SIZE,
len(player.deck) / _MAX_DECK,
len(opponent.deck) / _MAX_DECK,
], dtype=np.float32)
# current boards (same for every plan)
my_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
opp_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
for slot in range(BOARD_SIZE):
c = player.board[slot]
if c is not None:
my_board[slot * 3] = c.attack / _MAX_ATK
my_board[slot * 3 + 1] = c.defense / _MAX_DEF
my_board[slot * 3 + 2] = 1.0
c = opponent.board[slot]
if c is not None:
opp_board[slot * 3] = c.attack / _MAX_ATK
opp_board[slot * 3 + 1] = c.defense / _MAX_DEF
opp_board[slot * 3 + 2] = 1.0
# per-plan features
plan_part = np.zeros((n, 3 + BOARD_SIZE * 3), dtype=np.float32)
for idx, plan in enumerate(plans):
# simulate board result
result = list(player.board)
for slot in plan.sacrifice_slots:
result[slot] = None
for card, slot in plan.plays:
result[slot] = card
total_cost = sum(c.cost for c, _ in plan.plays) if plan.plays else 0
plan_part[idx, 0] = len(plan.sacrifice_slots) / BOARD_SIZE
plan_part[idx, 1] = len(plan.plays) / HAND_SIZE
plan_part[idx, 2] = total_cost / (MAX_ENERGY_CAP + BOARD_SIZE)
for slot in range(BOARD_SIZE):
c = result[slot]
if c is not None:
plan_part[idx, 3 + slot * 3] = c.attack / _MAX_ATK
plan_part[idx, 3 + slot * 3 + 1] = c.defense / _MAX_DEF
plan_part[idx, 3 + slot * 3 + 2] = 1.0
# opponent deck type one-hot (same for every plan)
opp_deck_oh = np.zeros(len(_DECK_TYPES), dtype=np.float32)
opp_deck_oh[_DECK_TYPE_IDX.get(opponent.deck_type, 0)] = 1.0
state_t = np.tile(state, (n, 1))
my_board_t = np.tile(my_board, (n, 1))
opp_board_t = np.tile(opp_board, (n, 1))
opp_deck_t = np.tile(opp_deck_oh, (n, 1))
return np.concatenate([state_t, my_board_t, opp_board_t, plan_part, opp_deck_t], axis=1)
class NeuralPlayer:
"""
Wraps a NeuralNet for use in game simulation.
In training mode, samples plans stochastically and records the trajectory
for a REINFORCE update after the game ends.
In inference mode, picks the highest-scoring plan deterministically.
"""
def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
self.net = net
self.training = training
self.temperature = temperature
self.trajectory: list[tuple[np.ndarray, int]] = [] # (features, chosen_idx)
def choose_plan(self, player, opponent):
from ai.engine import generate_plans
plans = generate_plans(player, opponent)
features = extract_plan_features(plans, player, opponent)
scores = self.net.forward(features)
if self.training:
probs = _softmax((scores / self.temperature).astype(np.float64))
probs = np.clip(probs, 1e-10, None)
probs /= probs.sum()
chosen_idx = int(np.random.choice(len(plans), p=probs))
self.trajectory.append((features, chosen_idx))
else:
chosen_idx = int(np.argmax(scores))
return plans[chosen_idx]
def compute_grads(self, outcome: float) -> tuple[list, list] | None:
"""
Computes averaged REINFORCE gradients for this trajectory without updating weights.
outcome: centered reward (win/loss minus baseline).
Returns (grads_w, grads_b), or None if trajectory is empty.
"""
if not self.trajectory:
return None
acc_gw = [np.zeros_like(w) for w in self.net.weights]
acc_gb = [np.zeros_like(b) for b in self.net.biases]
for features, chosen_idx in self.trajectory:
scores = self.net.forward(features)
probs = _softmax(scores.astype(np.float64)).astype(np.float32)
upstream = -probs.copy()
upstream[chosen_idx] += 1.0
upstream *= outcome
gw, gb = self.net.backward(upstream)
for i in range(len(acc_gw)):
acc_gw[i] += gw[i]
acc_gb[i] += gb[i]
n = len(self.trajectory)
for i in range(len(acc_gw)):
acc_gw[i] /= n
acc_gb[i] /= n
self.trajectory.clear()
return acc_gw, acc_gb

File diff suppressed because one or more lines are too long

634
backend/ai/simulate.py Normal file
View File

@@ -0,0 +1,634 @@
import asyncio
import json
import math
import os
import random
import uuid
from concurrent.futures import ProcessPoolExecutor
from datetime import datetime
from dotenv import load_dotenv
load_dotenv()
from game.card import Card, CardType, CardRarity, generate_cards, compute_deck_type
from game.rules import (
CardInstance, PlayerState, GameState,
action_play_card, action_sacrifice, action_end_turn,
)
from ai.engine import AIPersonality, choose_cards, choose_plan
SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
SIMULATION_CARD_COUNT = 1000
def _card_to_dict(card: Card) -> dict:
return {
"name": card.name,
"generated_at": card.generated_at.isoformat(),
"image_link": card.image_link,
"card_rarity": card.card_rarity.name,
"card_type": card.card_type.name,
"wikidata_instance": card.wikidata_instance,
"text": card.text,
"attack": card.attack,
"defense": card.defense,
"cost": card.cost,
}
def _dict_to_card(d: dict) -> Card:
return Card(
name=d["name"],
generated_at=datetime.fromisoformat(d["generated_at"]),
image_link=d["image_link"],
card_rarity=CardRarity[d["card_rarity"]],
card_type=CardType[d["card_type"]],
wikidata_instance=d["wikidata_instance"],
text=d["text"],
attack=d["attack"],
defense=d["defense"],
cost=d["cost"],
)
def get_simulation_cards() -> list[Card]:
if os.path.exists(SIMULATION_CARDS_PATH):
with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
return [_dict_to_card(d) for d in data]
print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
cards = generate_cards(SIMULATION_CARD_COUNT)
with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
return cards
PLAYER1_ID = "p1"
PLAYER2_ID = "p2"
MAX_TURNS = 300 # safety cap to prevent infinite games
def _make_instances(deck: list[Card]) -> list[CardInstance]:
return [
CardInstance(
instance_id=str(uuid.uuid4()),
card_id=card.name,
name=card.name,
attack=card.attack,
defense=card.defense,
max_defense=card.defense,
cost=card.cost,
card_type=card.card_type.name,
card_rarity=card.card_rarity.name,
image_link=card.image_link or "",
text=card.text or "",
)
for card in deck
]
def simulate_game(
cards: list[Card],
difficulty1: int,
personality1: AIPersonality,
difficulty2: int,
personality2: AIPersonality,
) -> str | None:
"""
Simulate a single game between two AIs choosing from `cards`.
Player 1 always goes first.
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
"""
deck1 = choose_cards(cards, difficulty1, personality1)
deck2 = choose_cards(cards, difficulty2, personality2)
instances1 = _make_instances(deck1)
instances2 = _make_instances(deck2)
random.shuffle(instances1)
random.shuffle(instances2)
deck_type1 = compute_deck_type(deck1) or "Balanced"
deck_type2 = compute_deck_type(deck2) or "Balanced"
p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
# P1 always goes first
p1.increment_energy_cap()
p2.increment_energy_cap()
p1.refill_energy()
p1.draw_to_full()
state = GameState(
game_id=str(uuid.uuid4()),
players={PLAYER1_ID: p1, PLAYER2_ID: p2},
player_order=[PLAYER1_ID, PLAYER2_ID],
active_player_id=PLAYER1_ID,
phase="main",
turn=1,
)
configs = {
PLAYER1_ID: (difficulty1, personality1),
PLAYER2_ID: (difficulty2, personality2),
}
for _ in range(MAX_TURNS):
if state.result:
break
active_id = state.active_player_id
difficulty, personality = configs[active_id]
player = state.players[active_id]
opponent = state.players[state.opponent_id(active_id)]
plan = choose_plan(player, opponent, personality, difficulty)
for slot in plan.sacrifice_slots:
if player.board[slot] is not None:
action_sacrifice(state, slot)
plays = list(plan.plays)
random.shuffle(plays)
for card, slot in plays:
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
if hand_idx is None:
continue
if player.board[slot] is not None:
continue
if card.cost > player.energy:
continue
action_play_card(state, hand_idx, slot)
action_end_turn(state)
if state.result and state.result.winner_id:
return state.result.winner_id
return None
# These must be module-level so they are picklable.
_worker_cards: list[Card] = []
def _init_worker(cards: list[Card]) -> None:
global _worker_cards
_worker_cards = cards
def _run_game_sync(args: tuple) -> str | None:
d1, p1_name, d2, p2_name = args
return simulate_game(
_worker_cards,
d1, AIPersonality(p1_name),
d2, AIPersonality(p2_name),
)
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
if difficulties is None:
difficulties = list(range(1, 11))
return [
(personality, difficulty)
for personality in AIPersonality
for difficulty in difficulties
]
def _player_label(personality: AIPersonality, difficulty: int) -> str:
return f"{personality.value[:3].upper()}-{difficulty}"
async def run_tournament(
cards: list[Card],
games_per_matchup: int = 5,
difficulties: list[int] | None = None,
) -> dict[tuple[int, int], int]:
"""
Pit every (personality, difficulty) pair against every other, as both
first and second player.
`difficulties` selects which difficulty levels to include (default: 1-10).
Returns a wins dict keyed by (first_player_index, second_player_index)
where the value is how many of `games_per_matchup` games the first player won.
Games run in parallel across all CPU cores via ProcessPoolExecutor.
Cards are sent to each worker once at startup, not once per game.
"""
players = _all_players(difficulties)
n = len(players)
indexed_args: list[tuple[int, int, tuple]] = []
for i in range(n):
p1_personality, p1_difficulty = players[i]
for j in range(n):
p2_personality, p2_difficulty = players[j]
args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
for _ in range(games_per_matchup):
indexed_args.append((i, j, args))
total_games = len(indexed_args)
n_workers = os.cpu_count() or 1
print(f"Running {total_games} games across {n_workers} workers "
f"({n} players, {games_per_matchup} games per ordered pair)...")
done = [0]
report_every = max(1, total_games // 200)
loop = asyncio.get_running_loop()
async def tracked(future):
result = await future
done[0] += 1
if done[0] % report_every == 0 or done[0] == total_games:
pct = done[0] / total_games * 100
print(f" {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
return result
with ProcessPoolExecutor(
max_workers=n_workers,
initializer=_init_worker,
initargs=(cards,),
) as executor:
futures = [
loop.run_in_executor(executor, _run_game_sync, args)
for _, _, args in indexed_args
]
results = await asyncio.gather(*[tracked(f) for f in futures])
print("\nFinished")
wins: dict[tuple[int, int], int] = {}
ties = 0
for (i, j, _), winner in zip(indexed_args, results):
key = (i, j)
if key not in wins:
wins[key] = 0
if winner == PLAYER1_ID:
wins[key] += 1
elif winner is None:
ties += 1
print(f"Ties: {ties}")
return wins
def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
"""
Return True when the SPRT has reached a decision for this matchup.
Tests H0: win_rate = 0.5 vs H1: win_rate = p_decisive (or 1-p_decisive).
log_win = log(p_decisive / 0.5)
log_loss = log((1 - p_decisive) / 0.5)
LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
Decided when LLR crosses ±log_B.
"""
llr = wins * log_win + (total - wins) * log_loss
return llr >= log_B or llr <= -log_B
async def run_tournament_adaptive(
cards: list[Card],
difficulties: list[int] | None = None,
min_games: int = 5,
max_games: int = 200,
p_decisive: float = 0.65,
alpha: float = 0.05,
) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
"""
Like run_tournament but allocates games adaptively.
Each ordered pair (i, j) plays until SPRT decides one player is dominant
(win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
Close matchups play more games; lopsided ones stop early.
Returns (wins, played):
wins[(i, j)] — how many games player i won as first player against j
played[(i, j)] — how many games were played for that pair
Each round, all currently-undecided pairs play one game in parallel across
all CPU cores, preserving full parallelism while adapting per-pair budgets.
"""
players = _all_players(difficulties)
n = len(players)
all_pairs = [(i, j) for i in range(n) for j in range(n)]
wins: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
decided: set[tuple[int, int]] = set()
# Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
log_B = math.log((1 - alpha) / alpha)
log_win = math.log(p_decisive / 0.5)
log_loss = math.log((1 - p_decisive) / 0.5)
def make_args(i: int, j: int) -> tuple:
p1, d1 = players[i]
p2, d2 = players[j]
return (d1, p1.value, d2, p2.value)
n_workers = os.cpu_count() or 1
loop = asyncio.get_running_loop()
total_played = 0
max_possible = len(all_pairs) * max_games
print(
f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
f"SPRT p_decisive={p_decisive} alpha={alpha}, "
f"min={min_games} max={max_games} games/pair\n"
f"Worst case: {max_possible:,} games across {n_workers} workers"
)
with ProcessPoolExecutor(
max_workers=n_workers,
initializer=_init_worker,
initargs=(cards,),
) as executor:
round_num = 0
while True:
pending = [
pair for pair in all_pairs
if pair not in decided and played[pair] < max_games
]
if not pending:
break
round_num += 1
batch = [(i, j, make_args(i, j)) for (i, j) in pending]
futures = [
loop.run_in_executor(executor, _run_game_sync, args)
for _, _, args in batch
]
results = await asyncio.gather(*futures)
newly_decided = 0
for (i, j, _), winner in zip(batch, results):
played[(i, j)] += 1
if winner == PLAYER1_ID:
wins[(i, j)] += 1
total_played += 1
if (played[(i, j)] >= min_games
and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
decided.add((i, j))
newly_decided += 1
remaining = len(all_pairs) - len(decided)
pct = total_played / max_possible * 100
print(
f" Round {round_num:3d}: {len(pending):5d} games, "
f"+{newly_decided:4d} decided, "
f"{remaining:5d} pairs left, "
f"{total_played:,} total ({pct:.1f}% of worst case)",
end="\r", flush=True,
)
savings = max_possible - total_played
print(
f"\nFinished: {total_played:,} games played "
f"(saved {savings:,} vs fixed, "
f"{savings / max_possible * 100:.1f}% reduction)"
)
print(
f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
f"({len(decided) / len(all_pairs) * 100:.1f}%)"
)
return wins, played
def compute_bradley_terry(
wins: dict[tuple[int, int], int],
n: int,
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
iterations: int = 1000,
) -> list[float]:
"""
Compute Bradley-Terry strength parameters for all n players.
For each pair (i, j): w_ij wins for i, w_ji wins for j.
Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))
Returns a list of strength values indexed by player. Unlike Elo, this is
path-independent and converges to a unique maximum-likelihood solution.
"""
w: list[list[int]] = [[0] * n for _ in range(n)]
for (i, j), p1_wins in wins.items():
g = played[(i, j)] if played is not None else games_per_matchup
if g:
w[i][j] += p1_wins
w[j][i] += g - p1_wins
strength = [1.0] * n
for _ in range(iterations):
new_strength = [0.0] * n
for i in range(n):
wins_i = sum(w[i][j] for j in range(n) if j != i)
denom = sum(
(w[i][j] + w[j][i]) / (strength[i] + strength[j])
for j in range(n)
if j != i and (w[i][j] + w[j][i]) > 0
)
new_strength[i] = wins_i / denom if denom > 0 else strength[i]
# Normalize so the mean stays at 1.0
mean = sum(new_strength) / n
strength = [s / mean for s in new_strength]
return strength
def rank_players(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]],
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
) -> list[int]:
"""
Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.
Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
"""
if played is None and games_per_matchup is None:
raise ValueError("Provide either played or games_per_matchup")
ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
return sorted(range(len(players)), key=lambda i: ratings[i])
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
def save_tournament(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]],
path: str = TOURNAMENT_RESULTS_PATH,
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
):
data = {
"players": [
{"personality": p.value, "difficulty": d}
for p, d in players
],
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
}
if played is not None:
data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
if games_per_matchup is not None:
data["games_per_matchup"] = games_per_matchup
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
print(f"Tournament results saved to {path}")
def load_tournament(
path: str = TOURNAMENT_RESULTS_PATH,
) -> tuple[
dict[tuple[int, int], int],
dict[tuple[int, int], int] | None,
int | None,
list[tuple[AIPersonality, int]],
]:
"""Returns (wins, played, games_per_matchup, players).
`played` is None for legacy fixed-game files (use games_per_matchup instead).
`games_per_matchup` is None for adaptive files (use played instead).
"""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}
wins = parse_pair_dict(data["wins"])
played = parse_pair_dict(data["played"]) if "played" in data else None
games_per_matchup = data.get("games_per_matchup")
players = [
(AIPersonality(p["personality"]), p["difficulty"])
for p in data["players"]
]
return wins, played, games_per_matchup, players
def draw_grid(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]] | None = None,
output_path: str = "tournament_grid.png",
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
ranked: list[int] | None = None,
):
"""
Draw a heatmap grid of tournament results.
Rows = first player
Cols = second player
Color = red if first player won more of their games in that cell
green if second player won more
× = one player swept all games in that cell
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
if played is None and games_per_matchup is None:
raise ValueError("Provide either played or games_per_matchup")
if players is None:
players = _all_players()
n = len(players)
if ranked is None:
ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)
labels = [_player_label(*players[i]) for i in ranked]
def games(i: int, j: int) -> int:
return_value = played[(i, j)] if played is not None else games_per_matchup
return return_value if return_value is not None else 0
# Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
matrix = np.full((n, n), np.nan)
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
g = games(i, j)
p1_wins = wins.get((i, j), 0)
matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0
cell_size = 0.22
fig_size = n * cell_size + 3
fig, ax = plt.subplots(figsize=(fig_size, fig_size))
cmap = mcolors.LinearSegmentedColormap.from_list(
"p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"] # pastel green → blue → red
)
norm = mcolors.Normalize(vmin=-1, vmax=1)
img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
# × marks for sweeps
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
g = games(i, j)
p1_wins = wins.get((i, j), 0)
if p1_wins == g or p1_wins == 0:
ax.text(col, row, "×", ha="center", va="center",
fontsize=5, color="black", fontweight="bold", zorder=3)
ax.set_xticks(range(n))
ax.set_yticks(range(n))
ax.set_xticklabels(labels, rotation=90, fontsize=4)
ax.set_yticklabels(labels, fontsize=4)
ax.xaxis.set_label_position("top")
ax.xaxis.tick_top()
ax.set_xlabel("Second player", labelpad=8, fontsize=8)
ax.set_ylabel("First player", labelpad=8, fontsize=8)
ax.set_title(
"Tournament results — red: first player wins more, green: second player wins more",
pad=14, fontsize=9,
)
plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
label="(P1 wins - P2 wins) / games per cell")
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
print(f"Grid saved to {output_path}")
if __name__ == "__main__":
difficulties = list(range(8, 11))
card_pool = get_simulation_cards()
players = _all_players(difficulties)
wins, played = asyncio.run(run_tournament_adaptive(
card_pool,
difficulties=difficulties,
min_games=20,
max_games=1000,
p_decisive=0.65,
alpha=0.05,
))
save_tournament(wins, players=players, played=played)
ratings = compute_bradley_terry(wins, len(players), played=played)
ranked = sorted(range(len(players)), key=lambda i: ratings[i]) # worst-to-best
draw_grid(wins, players=players, played=played, ranked=ranked)
print("\nFinal Elo ratings (best to worst):")
for rank, i in enumerate(reversed(ranked), 1):
personality, difficulty = players[i]
label = _player_label(personality, difficulty)
print(f" {rank:2d}. {label:<12} {ratings[i]:.1f}")

278
backend/ai/train_nn.py Normal file
View File

@@ -0,0 +1,278 @@
import os
import random
import uuid
from collections import deque
import numpy as np
from dotenv import load_dotenv
load_dotenv()
from game.card import compute_deck_type
from ai.engine import AIPersonality, choose_cards, choose_plan
from game.rules import PlayerState, GameState, action_play_card, action_sacrifice, action_end_turn
from ai.simulate import get_simulation_cards, _make_instances, MAX_TURNS
from ai.nn import NeuralNet, NeuralPlayer
from ai.card_pick_nn import CardPickPlayer, N_CARD_FEATURES, CARD_PICK_WEIGHTS_PATH
NN_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "nn_weights.json")
P1 = "p1"
P2 = "p2"
FIXED_PERSONALITIES = [
p for p in AIPersonality
if p not in (
AIPersonality.ARBITRARY,
AIPersonality.JEBRASKA
)
]
def _build_player(pid: str, name: str, cards: list, difficulty: int, personality: AIPersonality,
deck_pool: dict | None = None) -> PlayerState:
if deck_pool and personality in deck_pool:
deck = random.choice(deck_pool[personality])
else:
deck = choose_cards(cards, difficulty, personality)
instances = _make_instances(deck)
random.shuffle(instances)
p = PlayerState(
user_id=pid, username=name,
deck_type=compute_deck_type(deck) or "Balanced",
deck=instances,
)
return p
def _build_nn_player(pid: str, name: str, cards: list, difficulty: int,
card_pick_player: CardPickPlayer) -> PlayerState:
"""Build a PlayerState using the card-pick NN for deck selection."""
max_card_cost = difficulty + 1 if difficulty >= 6 else 6
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
deck = card_pick_player.choose_cards(allowed, difficulty)
instances = _make_instances(deck)
random.shuffle(instances)
return PlayerState(
user_id=pid, username=name,
deck_type=compute_deck_type(deck) or "Balanced",
deck=instances,
)
def run_episode(
p1_state: PlayerState,
p2_state: PlayerState,
p1_ctrl, # (player, opponent) -> MovePlan
p2_ctrl, # (player, opponent) -> MovePlan
) -> str | None:
"""Returns winner_id (P1 or P2) or None on timeout."""
p1_state.increment_energy_cap()
p2_state.increment_energy_cap()
p1_state.refill_energy()
p1_state.draw_to_full()
state = GameState(
game_id=str(uuid.uuid4()),
players={P1: p1_state, P2: p2_state},
player_order=[P1, P2],
active_player_id=P1,
phase="main",
turn=1,
)
ctrls = {P1: p1_ctrl, P2: p2_ctrl}
for _ in range(MAX_TURNS):
if state.result:
break
active_id = state.active_player_id
player = state.players[active_id]
opponent = state.players[state.opponent_id(active_id)]
plan = ctrls[active_id](player, opponent)
for slot in plan.sacrifice_slots:
if player.board[slot] is not None:
action_sacrifice(state, slot)
plays = list(plan.plays)
random.shuffle(plays)
for card, slot in plays:
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
if hand_idx is None or player.board[slot] is not None or card.cost > player.energy:
continue
action_play_card(state, hand_idx, slot)
action_end_turn(state)
return state.result.winner_id if state.result else None
def train(
n_episodes: int = 50_000,
self_play_start: int = 0,
self_play_max_frac: float = 0.9,
lr: float = 1e-3,
opp_difficulty: int = 10,
temperature: float = 1.0,
batch_size: int = 500,
save_every: int = 5_000,
save_path: str = NN_WEIGHTS_PATH,
) -> NeuralNet:
cards = get_simulation_cards()
# Pre-build a pool of opponent decks per personality to avoid rebuilding from scratch each episode.
DECK_POOL_SIZE = 100
opp_deck_pool: dict[AIPersonality, list] = {
p: [choose_cards(cards, opp_difficulty, p) for _ in range(DECK_POOL_SIZE)]
for p in FIXED_PERSONALITIES
}
if os.path.exists(save_path):
print(f"Resuming plan net from {save_path}")
net = NeuralNet.load(save_path)
else:
print("Initializing new plan network")
net = NeuralNet(seed=42)
cp_path = CARD_PICK_WEIGHTS_PATH
if os.path.exists(cp_path):
print(f"Resuming card-pick net from {cp_path}")
card_pick_net = NeuralNet.load(cp_path)
else:
print("Initializing new card-pick network")
card_pick_net = NeuralNet(n_features=N_CARD_FEATURES, hidden=(32, 16), seed=43)
recent_outcomes: deque[int] = deque(maxlen=1000) # rolling window for win rate display
baseline = 0.0 # EMA of recent outcomes; subtracted before each update
baseline_alpha = 0.99 # decay — roughly a 100-episode window
batch_gw = [np.zeros_like(w) for w in net.weights]
batch_gb = [np.zeros_like(b) for b in net.biases]
batch_count = 0
cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
cp_batch_count = 0
for episode in range(1, n_episodes + 1):
# Ramp self-play fraction linearly from 0 to self_play_max_frac
if episode >= self_play_start:
progress = (episode - self_play_start) / max(1, n_episodes - self_play_start)
self_play_prob = self_play_max_frac * progress
else:
self_play_prob = 0.0
# Randomly decide who goes first (NN is always P1 for simplicity)
nn_goes_first = random.random() < 0.5
if random.random() < self_play_prob:
nn1 = NeuralPlayer(net, training=True, temperature=temperature)
nn2 = NeuralPlayer(net, training=True, temperature=temperature)
cp1 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
cp2 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
p1_state = _build_nn_player(P1, "NN1", cards, 10, cp1)
p2_state = _build_nn_player(P2, "NN2", cards, 10, cp2)
if not nn_goes_first:
p1_state, p2_state = p2_state, p1_state
winner = run_episode(p1_state, p2_state, nn1.choose_plan, nn2.choose_plan)
p1_outcome = 1.0 if winner == P1 else -1.0
baseline = baseline_alpha * baseline + (1 - baseline_alpha) * p1_outcome
for player_grads in [nn1.compute_grads(p1_outcome - baseline),
nn2.compute_grads(-p1_outcome - baseline)]:
if player_grads is not None:
gw, gb = player_grads
for i in range(len(batch_gw)):
batch_gw[i] += gw[i]
batch_gb[i] += gb[i]
batch_count += 1
for cp_grads in [cp1.compute_grads(p1_outcome - baseline),
cp2.compute_grads(-p1_outcome - baseline)]:
if cp_grads is not None:
gw, gb = cp_grads
for i in range(len(cp_batch_gw)):
cp_batch_gw[i] += gw[i]
cp_batch_gb[i] += gb[i]
cp_batch_count += 1
else:
opp_personality = random.choice(FIXED_PERSONALITIES)
nn_player = NeuralPlayer(net, training=True, temperature=temperature)
cp_player = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
opp_ctrl = lambda p, o, pers=opp_personality, diff=opp_difficulty: choose_plan(p, o, pers, diff)
if nn_goes_first:
nn_id = P1
p1_state = _build_nn_player(P1, "NN", cards, 10, cp_player)
p2_state = _build_player(P2, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
winner = run_episode(p1_state, p2_state, nn_player.choose_plan, opp_ctrl)
else:
nn_id = P2
p1_state = _build_player(P1, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
p2_state = _build_nn_player(P2, "NN", cards, 10, cp_player)
winner = run_episode(p1_state, p2_state, opp_ctrl, nn_player.choose_plan)
nn_outcome = 1.0 if winner == nn_id else -1.0
player_grads = nn_player.compute_grads(nn_outcome - baseline)
baseline = baseline_alpha * baseline + (1 - baseline_alpha) * nn_outcome
if player_grads is not None:
gw, gb = player_grads
for i in range(len(batch_gw)):
batch_gw[i] += gw[i]
batch_gb[i] += gb[i]
batch_count += 1
cp_grads = cp_player.compute_grads(nn_outcome - baseline)
if cp_grads is not None:
gw, gb = cp_grads
for i in range(len(cp_batch_gw)):
cp_batch_gw[i] += gw[i]
cp_batch_gb[i] += gb[i]
cp_batch_count += 1
recent_outcomes.append(1 if winner == nn_id else 0)
if batch_count >= batch_size:
for i in range(len(batch_gw)):
batch_gw[i] /= batch_count
batch_gb[i] /= batch_count
net.adam_update(batch_gw, batch_gb, lr=lr)
batch_gw = [np.zeros_like(w) for w in net.weights]
batch_gb = [np.zeros_like(b) for b in net.biases]
batch_count = 0
if cp_batch_count >= batch_size:
for i in range(len(cp_batch_gw)):
cp_batch_gw[i] /= cp_batch_count
cp_batch_gb[i] /= cp_batch_count
card_pick_net.adam_update(cp_batch_gw, cp_batch_gb, lr=lr)
cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
cp_batch_count = 0
if episode % 1000 == 0 or episode == n_episodes:
wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
print(f"\r[{episode:>6}/{n_episodes}] win rate (last {len(recent_outcomes)}): {wr:.1%} "
f"self-play frac: {self_play_prob:.0%}", flush=True)
else:
print(f" {episode % 1000}/1000", end="\r", flush=True)
if episode % save_every == 0:
net.save(save_path)
card_pick_net.save(cp_path)
print(f" → saved to {save_path} and {cp_path}")
net.save(save_path)
card_pick_net.save(cp_path)
wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
print(f"Done. Final win rate (last {len(recent_outcomes)}): {wr:.1%}")
return net
if __name__ == "__main__":
train()