🐐

2026-04-01 18:31:33 +02:00
parent 6e23e32bb0
commit b5c7c5305a
95 changed files with 9609 additions and 2374 deletions
@@ -0,0 +1,176 @@
+import os
+
+import numpy as np
+
+from ai.nn import NeuralNet, _softmax
+
+# Separate weights file so this NN trains independently from the plan NN.
+CARD_PICK_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "card_pick_weights.json")
+
+N_CARD_FEATURES = 15
+
+# Normalization constants — chosen to cover the realistic stat range for generated cards.
+_MAX_ATK = 50.0
+_MAX_DEF = 100.0
+
+
+def _precompute_static_features(allowed: list) -> np.ndarray:
+  """
+  Vectorized precomputation of the 7 per-card static features for the whole pool.
+  Returns (n, 7) float32. Called once per choose_cards() invocation.
+  """
+  n = len(allowed)
+  atk  = np.array([c.attack           for c in allowed], dtype=np.float32)
+  defn = np.array([c.defense          for c in allowed], dtype=np.float32)
+  cost = np.array([c.cost             for c in allowed], dtype=np.float32)
+  rar  = np.array([c.card_rarity.value for c in allowed], dtype=np.float32)
+  typ  = np.array([c.card_type.value   for c in allowed], dtype=np.float32)
+
+  exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
+  total      = atk + defn
+  atk_ratio  = np.where(total > 0, atk / total, 0.5)
+  pcv_norm   = np.clip(exact_cost - cost, 0.0, 1.0)
+
+  out = np.empty((n, 7), dtype=np.float32)
+  out[:, 0] = atk  / _MAX_ATK
+  out[:, 1] = defn / _MAX_DEF
+  out[:, 2] = cost / 10.0
+  out[:, 3] = rar  / 5.0
+  out[:, 4] = atk_ratio
+  out[:, 5] = pcv_norm
+  out[:, 6] = typ  / 9.0
+  return out
+
+
+class CardPickPlayer:
+  """
+  Uses a NeuralNet to sequentially select cards from a pool until the cost
+  budget is exhausted.  API mirrors NeuralPlayer so training code stays uniform.
+
+  In training mode: samples stochastically (softmax) and records the
+  trajectory for a REINFORCE update after the game ends.
+  In inference mode: picks the highest-scoring affordable card at each step.
+
+  Performance design:
+    - Static per-card features (7) are computed once via vectorized numpy.
+    - Context features (8) use running totals updated by O(1) increments.
+    - Picked cards are tracked with a boolean mask; no list.remove() calls.
+    - Each pick step does one small forward pass over the affordable subset only.
+  """
+
+  def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
+    self.net = net
+    self.training = training
+    self.temperature = temperature
+    self.trajectory: list[tuple[np.ndarray, int]] = []  # (features_matrix, chosen_idx)
+
+  def choose_cards(self, allowed: list, difficulty: int) -> list:
+    """
+    allowed: pre-filtered list of Card objects (cost ≤ max_card_cost already applied).
+    Returns the selected deck as a list of Cards.
+    """
+    BUDGET = 50
+    n = len(allowed)
+
+    static   = _precompute_static_features(allowed)   # (n, 7) — computed once
+    costs    = np.array([c.cost for c in allowed], dtype=np.float32)
+    picked   = np.zeros(n, dtype=bool)
+
+    budget_remaining = BUDGET
+    selected: list = []
+
+    # Running totals for context features — incremented O(1) per pick.
+    n_picked   = 0
+    sum_atk    = 0.0
+    sum_def    = 0.0
+    sum_cost   = 0.0
+    n_cheap    = 0   # cost ≤ 3
+    n_high     = 0   # cost ≥ 6
+
+    diff_norm = difficulty / 10.0
+
+    while True:
+      mask = (~picked) & (costs <= budget_remaining)
+      if not mask.any():
+        break
+
+      idxs = np.where(mask)[0]
+
+      # Context row — same for every candidate this step, broadcast via tile.
+      if n_picked > 0:
+        ctx = np.array([
+          n_picked / 30.0,
+          budget_remaining / 50.0,
+          sum_atk / n_picked / _MAX_ATK,
+          sum_def / n_picked / _MAX_DEF,
+          sum_cost / n_picked / 10.0,
+          n_cheap / n_picked,
+          n_high  / n_picked,
+          diff_norm,
+        ], dtype=np.float32)
+      else:
+        ctx = np.array([
+          0.0, budget_remaining / 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, diff_norm,
+        ], dtype=np.float32)
+
+      features = np.concatenate(
+        [static[idxs], np.tile(ctx, (len(idxs), 1))],
+        axis=1,
+      )
+      scores = self.net.forward(features)
+
+      if self.training:
+        probs = _softmax((scores / self.temperature).astype(np.float64))
+        probs = np.clip(probs, 1e-10, None)
+        probs /= probs.sum()
+        local_idx = int(np.random.choice(len(idxs), p=probs))
+        self.trajectory.append((features, local_idx))
+      else:
+        local_idx = int(np.argmax(scores))
+
+      global_idx = idxs[local_idx]
+      card = allowed[global_idx]
+      picked[global_idx] = True
+      selected.append(card)
+
+      # Incremental context update — O(1).
+      budget_remaining -= card.cost
+      n_picked  += 1
+      sum_atk   += card.attack
+      sum_def   += card.defense
+      sum_cost  += card.cost
+      if card.cost <= 3: n_cheap += 1
+      if card.cost >= 6: n_high  += 1
+
+    return selected
+
+  def compute_grads(self, outcome: float) -> tuple[list, list] | None:
+    """
+    REINFORCE gradients averaged over the pick trajectory.
+    outcome: centered reward (win/loss minus baseline).
+    Returns (grads_w, grads_b), or None if no picks were made.
+    """
+    if not self.trajectory:
+      return None
+
+    acc_gw = [np.zeros_like(w) for w in self.net.weights]
+    acc_gb = [np.zeros_like(b) for b in self.net.biases]
+
+    for features, chosen_idx in self.trajectory:
+      scores   = self.net.forward(features)
+      probs    = _softmax(scores.astype(np.float64)).astype(np.float32)
+      upstream = -probs.copy()
+      upstream[chosen_idx] += 1.0
+      upstream *= outcome
+      gw, gb = self.net.backward(upstream)
+      for i in range(len(acc_gw)):
+        acc_gw[i] += gw[i]
+        acc_gb[i] += gb[i]
+
+    n = len(self.trajectory)
+    for i in range(len(acc_gw)):
+      acc_gw[i] /= n
+      acc_gb[i] /= n
+
+    self.trajectory.clear()
+    return acc_gw, acc_gb
@@ -0,0 +1,459 @@
+import asyncio
+import logging
+import os
+import random
+from dataclasses import dataclass
+from enum import Enum
+from itertools import combinations, permutations
+
+import numpy as np
+
+from game.card import Card
+from game.rules import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE, PlayerState
+
+logger = logging.getLogger("app")
+
+AI_USER_ID = "ai"
+
+class AIPersonality(Enum):
+  AGGRESSIVE = "aggressive"
+  DEFENSIVE = "defensive"
+  BALANCED = "balanced"
+  GREEDY = "greedy"           # prioritizes high cost cards, willing to sacrifice
+  SWARM = "swarm"
+  CONTROL = "control"
+  ARBITRARY = "arbitrary"
+  JEBRASKA  = "jebraska"      # trained neural network plan scorer
+
+def get_random_personality() -> AIPersonality:
+  return random.choice(list(AIPersonality))
+
+def calculate_exact_cost(attack: int, defense: int) -> float:
+  """Calculate the exact cost before rounding (matches card.py formula)."""
+  return min(10.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))
+
+def get_power_curve_value(card) -> float:
+  """
+  Returns how much above the power curve a card is.
+  Positive values mean the card is a better-than-expected deal for its cost.
+  """
+  exact_cost = calculate_exact_cost(card.attack, card.defense)
+  return exact_cost - card.cost
+
+
+def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
+  BUDGET = 50
+
+  if difficulty >= 6:
+    max_card_cost = difficulty + 1
+  else:
+    max_card_cost = 6
+
+  allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
+
+  # Vectorized scoring over all allowed cards at once
+  atk  = np.array([c.attack  for c in allowed], dtype=np.float32)
+  defn = np.array([c.defense for c in allowed], dtype=np.float32)
+  cost = np.array([c.cost    for c in allowed], dtype=np.float32)
+
+  exact_cost    = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
+  pcv_norm      = np.clip(exact_cost - cost, 0.0, 1.0)
+  cost_norm     = cost / max_card_cost
+  totals        = atk + defn
+  atk_ratio     = np.where(totals > 0, atk / totals, 0.5)
+  def_not_one   = np.where(defn != 1, 1.0, 0.0)
+
+  if personality == AIPersonality.AGGRESSIVE:
+    # (1-cost_norm) penalizes expensive cards. High-attack cards are inherently expensive,
+    # so without this the second pass drifts toward costly cards at higher difficulty,
+    # shrinking the deck. The bonus grows with max_card_cost and exactly offsets that drift.
+    scores = 0.50 * atk_ratio + 0.35 * pcv_norm + 0.15 * (1.0 - cost_norm) + 0.10 * def_not_one
+  elif personality == AIPersonality.DEFENSIVE:
+    # Small (1-cost_norm) for the same anti-shrinkage reason; lighter because high-defense
+    # cards don't correlate as strongly with cost as high-attack cards do.
+    scores = 0.10 * (1.0 - atk_ratio) + 0.80 * pcv_norm + 0.10 * cost_norm
+  elif personality == AIPersonality.GREEDY:
+    # Small cost_norm keeps flavour without causing severe deck shrinkage at D10
+    scores = 0.20 * cost_norm + 0.80 * pcv_norm
+  elif personality == AIPersonality.SWARM:
+    scores = 0.40 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm + 0.05 * def_not_one
+  elif personality == AIPersonality.CONTROL:
+    # Small cost_norm keeps flavour without causing severe deck shrinkage at D10
+    scores = 0.85 * pcv_norm + 0.15 * cost_norm
+  elif personality == AIPersonality.BALANCED:
+    scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
+  elif personality == AIPersonality.JEBRASKA:
+    # Delegate entirely to the card-pick NN; skip the heuristic scoring path.
+    from ai.card_pick_nn import CardPickPlayer, CARD_PICK_WEIGHTS_PATH
+    from ai.nn import NeuralNet
+    if not hasattr(choose_cards, "_card_pick_net"):
+      choose_cards._card_pick_net = (
+        NeuralNet.load(CARD_PICK_WEIGHTS_PATH)
+        if os.path.exists(CARD_PICK_WEIGHTS_PATH) else None
+      )
+    net = choose_cards._card_pick_net
+    if net is not None:
+      return CardPickPlayer(net, training=False).choose_cards(allowed, difficulty)
+    # Fall through to BALANCED heuristic if weights aren't trained yet.
+    scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
+  else:  # ARBITRARY
+    w = 0.09 * difficulty
+    scores = w * pcv_norm + (1.0 - w) * np.random.random(len(allowed)).astype(np.float32)
+
+  # Small noise floor at D10 prevents fully deterministic deck building.
+  noise = (max(0,12 - difficulty)**2) * 0.008
+  scores = scores + np.random.normal(0, noise, len(allowed)).astype(np.float32)
+
+  order = np.argsort(-scores)
+  sorted_cards = [allowed[i] for i in order]
+
+  early_budget = {
+    AIPersonality.GREEDY:     20,  # cheap cards are sacrifice fodder for big plays
+    AIPersonality.SWARM:      12,
+    AIPersonality.AGGRESSIVE: 18,  # raised: ensures cheap high-attack fodder regardless of difficulty
+    AIPersonality.DEFENSIVE:  15,  # raised: stable cheap-card base across difficulty levels
+    AIPersonality.CONTROL:     8,
+    AIPersonality.BALANCED:   25,  # spread the deck across all cost levels
+    AIPersonality.JEBRASKA:   25,  # fallback (no trained weights yet)
+    AIPersonality.ARBITRARY:   8,
+  }[personality]
+
+  selected: list[Card] = []
+  total_cost = 0
+
+  # First pass: secure early-game cards
+  cheap_spent = 0
+  for card in sorted_cards:
+    if cheap_spent >= early_budget:
+      break
+    if card.cost > 3 or total_cost + card.cost > BUDGET:
+      continue
+    selected.append(card)
+    total_cost += card.cost
+    cheap_spent += card.cost
+
+  # Second pass: fill remaining budget greedily by score
+  taken = {id(c) for c in selected}
+  for card in sorted_cards:
+    if total_cost >= BUDGET:
+      break
+    if id(card) in taken or total_cost + card.cost > BUDGET:
+      continue
+    selected.append(card)
+    total_cost += card.cost
+
+  return selected
+
+
+@dataclass
+class MovePlan:
+  sacrifice_slots: list[int]
+  plays: list[tuple]   # (CardInstance, board_slot: int)
+  label: str = ""
+
+
+def _affordable_subsets(hand, energy, start=0):
+  """Yield every subset of cards from hand whose total cost fits within energy."""
+  yield []
+  for i in range(start, len(hand)):
+    card = hand[i]
+    if card.cost <= energy:
+      for rest in _affordable_subsets(hand, energy - card.cost, i + 1):
+        yield [card] + rest
+
+
+def _plans_for_sacrifice(player, opponent, sacrifice_slots):
+  """Generate one plan per affordable card subset for a given sacrifice set."""
+  board = list(player.board)
+  energy = player.energy
+
+  for slot in sacrifice_slots:
+    if board[slot] is not None:
+      board[slot] = None
+      energy += 1
+
+  hand = list(player.hand)
+  empty_slots = [i for i, c in enumerate(board) if c is None]
+  en_board = opponent.board
+
+  return [
+    MovePlan(
+      sacrifice_slots=list(sacrifice_slots),
+      plays=list(zip(cards, scoring_slots)),
+      label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
+    )
+    for cards in _affordable_subsets(hand, energy)
+    for scoring_slots in permutations(empty_slots, len(cards))
+  ]
+
+
+def generate_plans(player, opponent) -> list[MovePlan]:
+  plans = []
+
+  # Sacrifice n board cards
+  occupied = [s for s in range(BOARD_SIZE) if player.board[s] is not None]
+  for n in range(len(occupied) + 1):
+    for slots in combinations(occupied, n):
+      plans += _plans_for_sacrifice(player, opponent, list(slots))
+
+  # Idle: do nothing
+  plans.append(MovePlan(sacrifice_slots=[], plays=[], label="idle"))
+
+  return plans
+
+def score_plans_batch(
+  plans: list[MovePlan],
+  player: PlayerState,
+  opponent: PlayerState,
+  personality: AIPersonality,
+) -> np.ndarray:
+  n = len(plans)
+
+  # Pre-compute PCV for every hand card once
+  pcv_cache = {
+    id(c): max(0.0, min(1.0, get_power_curve_value(c)))
+    for c in player.hand
+  }
+
+  # Build board-state arrays
+  board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
+  board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
+  n_sac     = np.zeros(n, dtype=np.float32)
+  sac_val   = np.zeros(n, dtype=np.float32)
+  play_val  = np.zeros(n, dtype=np.float32)
+  pcv_score = np.full(n, 0.5, dtype=np.float32)
+
+  for idx, plan in enumerate(plans):
+    board = list(player.board)
+    for slot in plan.sacrifice_slots:
+      board_slot = board[slot]
+      if board_slot is not None:
+        sac_val[idx] += board_slot.cost
+        board[slot] = None
+    n_sac[idx] = len(plan.sacrifice_slots)
+    for card, slot in plan.plays:
+      board[slot] = card
+      play_val[idx] += card.cost
+    for slot in range(BOARD_SIZE):
+      board_slot = board[slot]
+      if board_slot is not None:
+        board_atk[idx, slot] = board_slot.attack
+        board_occ[idx, slot] = True
+    if plan.plays:
+      pcv_vals = [pcv_cache.get(id(c), 0.5) for c, _ in plan.plays]
+      pcv_score[idx] = sum(pcv_vals) / len(pcv_vals)
+
+  # Enemy board — same for every plan
+  en_atk = np.array([c.attack   if c else 0 for c in opponent.board], dtype=np.float32)
+  en_def = np.array([c.defense  if c else 0 for c in opponent.board], dtype=np.float32)
+  en_occ = np.array([c is not None          for c in opponent.board], dtype=np.bool_)
+  enemy_occupied = int(en_occ.sum())
+
+  # --- Metrics (all shape (n,)) ---
+  direct_damage     = (board_atk *  ~en_occ).sum(axis=1)
+  blocking          = board_occ & en_occ                          # (n, 5)
+  blocking_slots    = blocking.sum(axis=1).astype(np.float32)
+  cards_on_board    = board_occ.sum(axis=1).astype(np.float32)
+  cards_destroyed   = ((board_atk >= en_def) & blocking).sum(axis=1).astype(np.float32)
+  unblocked_in      = (en_atk * ~board_occ).sum(axis=1)
+
+  atk_score     = np.minimum(1.0, direct_damage / max(opponent.life, 1))
+  block_score   = blocking_slots / enemy_occupied if enemy_occupied > 0 else np.ones(n,  dtype=np.float32)
+  open_slots    = BOARD_SIZE - enemy_occupied
+  cover_score   = (
+    (cards_on_board - blocking_slots) / open_slots
+    if open_slots > 0
+    else np.ones(n, dtype=np.float32)
+  )
+  destroy_score = cards_destroyed / enemy_occupied if enemy_occupied > 0 else np.zeros(n, dtype=np.float32)
+  threat_score  = 1.0 - np.minimum(1.0, unblocked_in / max(player.life, 1))
+
+  opp_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
+  my_cards_left  = len(player.deck) + len(player.hand) + blocking_slots
+  attrition_score = my_cards_left / (my_cards_left + max(opp_cards_left, 1))
+
+  net_value      = play_val - sac_val
+  net_value_norm = np.clip((net_value + 10) / 20, 0.0, 1.0)
+
+  # --- Sacrifice penalty ---
+  energy_leftover = player.energy + n_sac - play_val
+  wasted_energy   = np.maximum(0, np.minimum(n_sac, energy_leftover))
+  wasted_penalty  = np.where(n_sac > 0, wasted_energy / np.maximum(n_sac, 1), 0.0)
+  swap_penalty    = np.clip((n_sac - net_value) / np.maximum(n_sac, 1), 0.0, 1.0)
+  sac_penalty     = np.where(n_sac > 0, 0.65 * wasted_penalty + 0.35 * swap_penalty, 0.0)
+
+  # --- Personality weights ---
+  if personality == AIPersonality.AGGRESSIVE:
+    score = (0.30 * atk_score + 0.07 * block_score + 0.15 * cover_score +
+             0.08 * net_value_norm + 0.25 * destroy_score +
+             0.08 * attrition_score + 0.04 * pcv_score + 0.03 * threat_score)
+  elif personality == AIPersonality.DEFENSIVE:
+    score = (0.12 * atk_score + 0.20 * block_score + 0.18 * cover_score +
+             0.04 * net_value_norm + 0.18 * destroy_score +
+             0.15 * attrition_score + 0.05 * pcv_score + 0.08 * threat_score)
+  elif personality == AIPersonality.SWARM:
+    score = (0.25 * atk_score + 0.10 * block_score + 0.35 * cover_score +
+             0.05 * net_value_norm + 0.05 * destroy_score +
+             0.10 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
+  elif personality == AIPersonality.GREEDY:
+    score = (0.15 * atk_score + 0.05 * block_score + 0.18 * cover_score +
+             0.38 * net_value_norm + 0.05 * destroy_score +
+             0.09 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
+  elif personality == AIPersonality.CONTROL:
+    score = (0.10 * atk_score + 0.05 * block_score + 0.05 * cover_score +
+             0.20 * net_value_norm + 0.05 * destroy_score +
+             0.10 * attrition_score + 0.40 * pcv_score + 0.05 * threat_score)
+  elif personality == AIPersonality.BALANCED:
+    score = (0.12 * atk_score + 0.13 * block_score + 0.15 * cover_score +
+             0.10 * net_value_norm + 0.12 * destroy_score +
+             0.15 * attrition_score + 0.12 * pcv_score + 0.11 * threat_score)
+  else:  # ARBITRARY
+    score = (0.50 * np.random.random(n).astype(np.float32) +
+             0.06 * atk_score + 0.06 * block_score + 0.08 * cover_score +
+             0.05 * net_value_norm + 0.06 * destroy_score +
+             0.08 * attrition_score + 0.06 * pcv_score + 0.05 * threat_score)
+
+  # --- Context adjustments ---
+  score = np.where(direct_damage >= opponent.life, np.maximum(score, 0.95), score)
+  score = np.where(unblocked_in  >= player.life,   np.minimum(score, 0.05), score)
+
+  if opponent.deck_type in ("God Card", "Pantheon"):
+    score = np.minimum(1.0, score + 0.08 * cover_score)
+  if opponent.deck_type in ("Aggro", "Rush"):
+    score = np.minimum(1.0, score + 0.06 * block_score + 0.04 * threat_score)
+  if opponent.deck_type == "Wall":
+    score = np.minimum(1.0, score + 0.06 * atk_score)
+  if opponent.life < STARTING_LIFE * 0.3:
+    score = np.minimum(1.0, score + 0.06 * atk_score)
+  if player.life < STARTING_LIFE * 0.3:
+    score = np.minimum(1.0, score + 0.06 * threat_score + 0.04 * block_score)
+  if opp_cards_left <= 5:
+    score = np.where(cards_on_board > 0, np.minimum(1.0, score + 0.05), score)
+
+  return np.maximum(0.0, score - sac_penalty)
+
+
+def choose_plan(player: PlayerState, opponent: PlayerState, personality: AIPersonality, difficulty: int) -> MovePlan:
+  plans = generate_plans(player, opponent)
+
+  if personality == AIPersonality.JEBRASKA:
+    from ai.nn import NeuralNet
+    import os
+    _weights = os.path.join(os.path.dirname(__file__), "nn_weights.json")
+    if not hasattr(choose_plan, "_neural_net"):
+      choose_plan._neural_net = NeuralNet.load(_weights) if os.path.exists(_weights) else None
+    net = choose_plan._neural_net
+    if net is not None:
+      from ai.nn import extract_plan_features
+      scores = net.forward(extract_plan_features(plans, player, opponent))
+    else: # fallback to BALANCED if weights not found
+      scores = score_plans_batch(plans, player, opponent, AIPersonality.BALANCED)
+  else:
+    scores = score_plans_batch(plans, player, opponent, personality)
+
+  noise_scale = ((max(0,12 - difficulty)**2) - 4) * 0.008
+  noise = np.random.normal(0, noise_scale, len(scores)).astype(np.float32)
+  return plans[int(np.argmax(scores + noise))]
+
+async def run_ai_turn(game_id: str):
+  from game.manager import (
+    active_games, connections, active_deck_ids,
+    serialize_state, record_game_result, calculate_combat_animation_time
+  )
+
+  state = active_games.get(game_id)
+  if not state or state.result:
+    return
+  if state.active_player_id != AI_USER_ID:
+    return
+
+  human_id = state.opponent_id(AI_USER_ID)
+  waited = 0
+  while not connections[game_id].get(human_id) and waited < 10:
+    await asyncio.sleep(0.5)
+    waited += 0.5
+
+  await asyncio.sleep(calculate_combat_animation_time(state.last_combat_events))
+
+  player = state.players[AI_USER_ID]
+  opponent = state.players[human_id]
+  difficulty = state.ai_difficulty
+  personality = (
+    AIPersonality(state.ai_personality)
+    if state.ai_personality
+    else AIPersonality.BALANCED
+  )
+
+  ws = connections[game_id].get(human_id)
+
+  async def send_state(s):
+    if ws:
+      try:
+        await ws.send_json({"type": "state", "state": serialize_state(s, human_id)})
+      except Exception:
+        pass
+
+  async def send_sacrifice_anim(instance_id):
+    if ws:
+      try:
+        await ws.send_json({"type": "sacrifice_animation", "instance_id": instance_id})
+      except Exception:
+        pass
+
+  best_plan = choose_plan(player, opponent, personality, difficulty)
+
+  logger.info(
+    f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} " +
+    f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
+  )
+
+  for slot in best_plan.sacrifice_slots:
+    card_slot = player.board[slot]
+    if card_slot is None:
+      continue
+    await send_sacrifice_anim(card_slot.instance_id)
+    await asyncio.sleep(0.65)
+    action_sacrifice(state, slot)
+    await send_state(state)
+    await asyncio.sleep(0.35)
+
+  # Shuffle play order so the AI doesn't always fill slots left-to-right
+  plays = list(best_plan.plays)
+  random.shuffle(plays)
+
+  for card, slot in plays:
+    # Re-look up hand index each time (hand shrinks as cards are played)
+    hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
+    if hand_idx is None:
+      continue
+    if player.board[slot] is not None:
+      continue
+    if card.cost > player.energy:
+      continue
+    action_play_card(state, hand_idx, slot)
+    await send_state(state)
+    await asyncio.sleep(0.5)
+
+  action_end_turn(state)
+  await send_state(state)
+
+  if state.result:
+    from core.database import SessionLocal
+    db = SessionLocal()
+    try:
+      record_game_result(state, db)
+      if ws:
+        await ws.send_json({
+          "type": "state",
+          "state": serialize_state(state, human_id),
+        })
+    finally:
+      db.close()
+    active_deck_ids.pop(human_id, None)
+    active_deck_ids.pop(AI_USER_ID, None)
+    active_games.pop(game_id, None)
+    connections.pop(game_id, None)
+    return
+
+  if state.active_player_id == AI_USER_ID:
+    asyncio.create_task(run_ai_turn(game_id))
@@ -0,0 +1,266 @@
+import json
+
+import numpy as np
+
+# Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15) | opp_deck_type(8)]
+N_FEATURES = 64
+
+_DECK_TYPES = ["Balanced", "Aggro", "Wall", "Rush", "Control", "God Card", "Pantheon", "Unplayable"]
+_DECK_TYPE_IDX = {dt: i for i, dt in enumerate(_DECK_TYPES)}
+
+_MAX_ATK  = 50.0
+_MAX_DEF  = 100.0
+_MAX_DECK = 30.0
+
+
+def _softmax(x: np.ndarray) -> np.ndarray:
+  e = np.exp(x - x.max())
+  return e / e.sum()
+
+
+class NeuralNet:
+  """
+  Fully-connected plan scorer: n_features → 64 → 32 → 1
+  Pure numpy so it can be pickled into worker processes.
+  Optimizer: Adam.
+  """
+
+  def __init__(self, n_features: int = N_FEATURES, hidden: tuple = (64, 32), seed: int | None = None):
+    rng = np.random.RandomState(seed)
+    sizes = [n_features] + list(hidden) + [1]
+
+    self.weights: list[np.ndarray] = []
+    self.biases:  list[np.ndarray] = []
+    self.m_w: list[np.ndarray] = []
+    self.v_w: list[np.ndarray] = []
+    self.m_b: list[np.ndarray] = []
+    self.v_b: list[np.ndarray] = []
+    self.t = 0
+
+    for fan_in, fan_out in zip(sizes, sizes[1:]):
+      w = rng.randn(fan_in, fan_out).astype(np.float32) * np.sqrt(2.0 / fan_in)
+      b = np.zeros(fan_out, dtype=np.float32)
+      self.weights.append(w)
+      self.biases.append(b)
+      self.m_w.append(np.zeros_like(w))
+      self.v_w.append(np.zeros_like(w))
+      self.m_b.append(np.zeros_like(b))
+      self.v_b.append(np.zeros_like(b))
+
+    self._acts:   list[np.ndarray] = []
+    self._pre_acts: list[np.ndarray] = []
+
+  def forward(self, X: np.ndarray) -> np.ndarray:
+    """X: (n, n_features) → scores: (n,)"""
+    h = X.astype(np.float32)
+    self._acts = [h]
+    self._pre_acts = []
+    for i, (W, b) in enumerate(zip(self.weights, self.biases)):
+      z = h @ W + b
+      self._pre_acts.append(z)
+      h = np.maximum(0.0, z) if i < len(self.weights) - 1 else z
+      self._acts.append(h)
+    return h.squeeze(-1)
+
+  def backward(self, upstream: np.ndarray) -> tuple[list, list]:
+    """
+    upstream: (n,) — dJ/d(scores), gradient for ascent.
+    Returns (grads_w, grads_b).
+    """
+    n = len(upstream)
+    delta = upstream[:, None]             # (n, 1)
+    grads_w = [None] * len(self.weights)
+    grads_b = [None] * len(self.biases)
+    for i in range(len(self.weights) - 1, -1, -1):
+      h_in = self._acts[i]            # (n, in_size)
+      grads_w[i] = h_in.T @ delta / n
+      grads_b[i] = delta.mean(axis=0)
+      if i > 0:
+        delta = (delta @ self.weights[i].T) * (self._pre_acts[i - 1] > 0)
+    return grads_w, grads_b
+
+  def adam_update(self, grads_w: list, grads_b: list,
+          lr: float = 1e-3, beta1: float = 0.9,
+          beta2: float = 0.999, eps: float = 1e-8,
+          grad_clip: float = 1.0) -> None:
+    # Global gradient norm clipping
+    all_grads = [g for g in grads_w + grads_b if g is not None]
+    global_norm = np.sqrt(sum(np.sum(g * g) for g in all_grads))
+    if global_norm > grad_clip:
+      scale = grad_clip / global_norm
+      grads_w = [g * scale for g in grads_w]
+      grads_b = [g * scale for g in grads_b]
+
+    self.t += 1
+    bc1 = 1 - beta1 ** self.t
+    bc2 = 1 - beta2 ** self.t
+    for i, (gw, gb) in enumerate(zip(grads_w, grads_b)):
+      self.m_w[i] = beta1 * self.m_w[i] + (1 - beta1) * gw
+      self.v_w[i] = beta2 * self.v_w[i] + (1 - beta2) * gw * gw
+      self.weights[i] += lr * (self.m_w[i] / bc1) / (np.sqrt(self.v_w[i] / bc2) + eps)
+
+      self.m_b[i] = beta1 * self.m_b[i] + (1 - beta1) * gb
+      self.v_b[i] = beta2 * self.v_b[i] + (1 - beta2) * gb * gb
+      self.biases[i] += lr * (self.m_b[i] / bc1) / (np.sqrt(self.v_b[i] / bc2) + eps)
+
+  def save(self, path: str) -> None:
+    data = {
+      "weights": [w.tolist() for w in self.weights],
+      "biases":  [b.tolist() for b in self.biases],
+      "m_w": [m.tolist() for m in self.m_w],
+      "v_w": [v.tolist() for v in self.v_w],
+      "m_b": [m.tolist() for m in self.m_b],
+      "v_b": [v.tolist() for v in self.v_b],
+      "t": self.t,
+    }
+    with open(path, "w") as f:
+      json.dump(data, f)
+
+  @classmethod
+  def load(cls, path: str) -> "NeuralNet":
+    with open(path) as f:
+      data = json.load(f)
+    net = cls.__new__(cls)
+    net.weights = [np.array(w, dtype=np.float32) for w in data["weights"]]
+    net.biases  = [np.array(b, dtype=np.float32) for b in data["biases"]]
+    net.m_w   = [np.array(m, dtype=np.float32) for m in data["m_w"]]
+    net.v_w   = [np.array(v, dtype=np.float32) for v in data["v_w"]]
+    net.m_b   = [np.array(m, dtype=np.float32) for m in data["m_b"]]
+    net.v_b   = [np.array(v, dtype=np.float32) for v in data["v_b"]]
+    net.t     = data["t"]
+    net._acts = []
+    net._pre_acts = []
+    return net
+
+
+def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
+  """
+  Returns (n_plans, N_FEATURES) float32 array.
+  Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15)]
+  """
+  from game.rules import BOARD_SIZE, HAND_SIZE, MAX_ENERGY_CAP, STARTING_LIFE
+
+  n = len(plans)
+
+  # state (same for every plan)
+  state = np.array([
+    player.life    / STARTING_LIFE,
+    opponent.life    / STARTING_LIFE,
+    player.energy    / MAX_ENERGY_CAP,
+    player.energy_cap  / MAX_ENERGY_CAP,
+    len(player.hand)   / HAND_SIZE,
+    len(opponent.hand) / HAND_SIZE,
+    len(player.deck)   / _MAX_DECK,
+    len(opponent.deck) / _MAX_DECK,
+  ], dtype=np.float32)
+
+  # current boards (same for every plan)
+  my_board  = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
+  opp_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
+  for slot in range(BOARD_SIZE):
+    c = player.board[slot]
+    if c is not None:
+      my_board[slot * 3]   = c.attack  / _MAX_ATK
+      my_board[slot * 3 + 1] = c.defense / _MAX_DEF
+      my_board[slot * 3 + 2] = 1.0
+    c = opponent.board[slot]
+    if c is not None:
+      opp_board[slot * 3]   = c.attack  / _MAX_ATK
+      opp_board[slot * 3 + 1] = c.defense / _MAX_DEF
+      opp_board[slot * 3 + 2] = 1.0
+
+  # per-plan features
+  plan_part = np.zeros((n, 3 + BOARD_SIZE * 3), dtype=np.float32)
+  for idx, plan in enumerate(plans):
+    # simulate board result
+    result = list(player.board)
+    for slot in plan.sacrifice_slots:
+      result[slot] = None
+    for card, slot in plan.plays:
+      result[slot] = card
+
+    total_cost = sum(c.cost for c, _ in plan.plays) if plan.plays else 0
+    plan_part[idx, 0] = len(plan.sacrifice_slots) / BOARD_SIZE
+    plan_part[idx, 1] = len(plan.plays)      / HAND_SIZE
+    plan_part[idx, 2] = total_cost         / (MAX_ENERGY_CAP + BOARD_SIZE)
+
+    for slot in range(BOARD_SIZE):
+      c = result[slot]
+      if c is not None:
+        plan_part[idx, 3 + slot * 3]   = c.attack  / _MAX_ATK
+        plan_part[idx, 3 + slot * 3 + 1] = c.defense / _MAX_DEF
+        plan_part[idx, 3 + slot * 3 + 2] = 1.0
+
+  # opponent deck type one-hot (same for every plan)
+  opp_deck_oh = np.zeros(len(_DECK_TYPES), dtype=np.float32)
+  opp_deck_oh[_DECK_TYPE_IDX.get(opponent.deck_type, 0)] = 1.0
+
+  state_t     = np.tile(state,     (n, 1))
+  my_board_t  = np.tile(my_board,  (n, 1))
+  opp_board_t = np.tile(opp_board, (n, 1))
+  opp_deck_t  = np.tile(opp_deck_oh, (n, 1))
+
+  return np.concatenate([state_t, my_board_t, opp_board_t, plan_part, opp_deck_t], axis=1)
+
+
+class NeuralPlayer:
+  """
+  Wraps a NeuralNet for use in game simulation.
+  In training mode, samples plans stochastically and records the trajectory
+  for a REINFORCE update after the game ends.
+  In inference mode, picks the highest-scoring plan deterministically.
+  """
+
+  def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
+    self.net = net
+    self.training = training
+    self.temperature = temperature
+    self.trajectory: list[tuple[np.ndarray, int]] = []  # (features, chosen_idx)
+
+  def choose_plan(self, player, opponent):
+    from ai.engine import generate_plans
+    plans = generate_plans(player, opponent)
+    features = extract_plan_features(plans, player, opponent)
+    scores = self.net.forward(features)
+
+    if self.training:
+      probs = _softmax((scores / self.temperature).astype(np.float64))
+      probs = np.clip(probs, 1e-10, None)
+      probs /= probs.sum()
+      chosen_idx = int(np.random.choice(len(plans), p=probs))
+      self.trajectory.append((features, chosen_idx))
+    else:
+      chosen_idx = int(np.argmax(scores))
+
+    return plans[chosen_idx]
+
+  def compute_grads(self, outcome: float) -> tuple[list, list] | None:
+    """
+    Computes averaged REINFORCE gradients for this trajectory without updating weights.
+    outcome: centered reward (win/loss minus baseline).
+    Returns (grads_w, grads_b), or None if trajectory is empty.
+    """
+    if not self.trajectory:
+      return None
+
+    acc_gw = [np.zeros_like(w) for w in self.net.weights]
+    acc_gb = [np.zeros_like(b) for b in self.net.biases]
+
+    for features, chosen_idx in self.trajectory:
+      scores = self.net.forward(features)
+      probs = _softmax(scores.astype(np.float64)).astype(np.float32)
+      upstream = -probs.copy()
+      upstream[chosen_idx] += 1.0
+      upstream *= outcome
+      gw, gb = self.net.backward(upstream)
+      for i in range(len(acc_gw)):
+        acc_gw[i] += gw[i]
+        acc_gb[i] += gb[i]
+
+    n = len(self.trajectory)
+    for i in range(len(acc_gw)):
+      acc_gw[i] /= n
+      acc_gb[i] /= n
+
+    self.trajectory.clear()
+    return acc_gw, acc_gb
@@ -0,0 +1,634 @@
+import asyncio
+import json
+import math
+import os
+import random
+import uuid
+from concurrent.futures import ProcessPoolExecutor
+from datetime import datetime
+
+from dotenv import load_dotenv
+load_dotenv()
+
+from game.card import Card, CardType, CardRarity, generate_cards, compute_deck_type
+from game.rules import (
+  CardInstance, PlayerState, GameState,
+  action_play_card, action_sacrifice, action_end_turn,
+)
+from ai.engine import AIPersonality, choose_cards, choose_plan
+
+SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
+SIMULATION_CARD_COUNT = 1000
+
+
+def _card_to_dict(card: Card) -> dict:
+  return {
+    "name": card.name,
+    "generated_at": card.generated_at.isoformat(),
+    "image_link": card.image_link,
+    "card_rarity": card.card_rarity.name,
+    "card_type": card.card_type.name,
+    "wikidata_instance": card.wikidata_instance,
+    "text": card.text,
+    "attack": card.attack,
+    "defense": card.defense,
+    "cost": card.cost,
+  }
+
+
+def _dict_to_card(d: dict) -> Card:
+  return Card(
+    name=d["name"],
+    generated_at=datetime.fromisoformat(d["generated_at"]),
+    image_link=d["image_link"],
+    card_rarity=CardRarity[d["card_rarity"]],
+    card_type=CardType[d["card_type"]],
+    wikidata_instance=d["wikidata_instance"],
+    text=d["text"],
+    attack=d["attack"],
+    defense=d["defense"],
+    cost=d["cost"],
+  )
+
+
+def get_simulation_cards() -> list[Card]:
+  if os.path.exists(SIMULATION_CARDS_PATH):
+    with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
+      data = json.load(f)
+    return [_dict_to_card(d) for d in data]
+
+  print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
+  cards = generate_cards(SIMULATION_CARD_COUNT)
+
+  with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
+    json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
+
+  print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
+  return cards
+
+
+PLAYER1_ID = "p1"
+PLAYER2_ID = "p2"
+MAX_TURNS = 300  # safety cap to prevent infinite games
+
+
+def _make_instances(deck: list[Card]) -> list[CardInstance]:
+  return [
+    CardInstance(
+      instance_id=str(uuid.uuid4()),
+      card_id=card.name,
+      name=card.name,
+      attack=card.attack,
+      defense=card.defense,
+      max_defense=card.defense,
+      cost=card.cost,
+      card_type=card.card_type.name,
+      card_rarity=card.card_rarity.name,
+      image_link=card.image_link or "",
+      text=card.text or "",
+    )
+    for card in deck
+  ]
+
+
+def simulate_game(
+  cards: list[Card],
+  difficulty1: int,
+  personality1: AIPersonality,
+  difficulty2: int,
+  personality2: AIPersonality,
+) -> str | None:
+  """
+  Simulate a single game between two AIs choosing from `cards`.
+  Player 1 always goes first.
+
+  Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
+  """
+  deck1 = choose_cards(cards, difficulty1, personality1)
+  deck2 = choose_cards(cards, difficulty2, personality2)
+
+  instances1 = _make_instances(deck1)
+  instances2 = _make_instances(deck2)
+  random.shuffle(instances1)
+  random.shuffle(instances2)
+
+  deck_type1 = compute_deck_type(deck1) or "Balanced"
+  deck_type2 = compute_deck_type(deck2) or "Balanced"
+
+  p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
+  p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
+
+  # P1 always goes first
+  p1.increment_energy_cap()
+  p2.increment_energy_cap()
+  p1.refill_energy()
+  p1.draw_to_full()
+
+  state = GameState(
+    game_id=str(uuid.uuid4()),
+    players={PLAYER1_ID: p1, PLAYER2_ID: p2},
+    player_order=[PLAYER1_ID, PLAYER2_ID],
+    active_player_id=PLAYER1_ID,
+    phase="main",
+    turn=1,
+  )
+
+  configs = {
+    PLAYER1_ID: (difficulty1, personality1),
+    PLAYER2_ID: (difficulty2, personality2),
+  }
+
+  for _ in range(MAX_TURNS):
+    if state.result:
+      break
+
+    active_id = state.active_player_id
+    difficulty, personality = configs[active_id]
+    player = state.players[active_id]
+    opponent = state.players[state.opponent_id(active_id)]
+
+    plan = choose_plan(player, opponent, personality, difficulty)
+
+    for slot in plan.sacrifice_slots:
+      if player.board[slot] is not None:
+        action_sacrifice(state, slot)
+
+    plays = list(plan.plays)
+    random.shuffle(plays)
+    for card, slot in plays:
+      hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
+      if hand_idx is None:
+        continue
+      if player.board[slot] is not None:
+        continue
+      if card.cost > player.energy:
+        continue
+      action_play_card(state, hand_idx, slot)
+
+    action_end_turn(state)
+
+  if state.result and state.result.winner_id:
+    return state.result.winner_id
+  return None
+
+
+# These must be module-level so they are picklable.
+
+_worker_cards: list[Card] = []
+
+def _init_worker(cards: list[Card]) -> None:
+  global _worker_cards
+  _worker_cards = cards
+
+def _run_game_sync(args: tuple) -> str | None:
+  d1, p1_name, d2, p2_name = args
+  return simulate_game(
+    _worker_cards,
+    d1, AIPersonality(p1_name),
+    d2, AIPersonality(p2_name),
+  )
+
+
+def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
+  """Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
+  if difficulties is None:
+    difficulties = list(range(1, 11))
+  return [
+    (personality, difficulty)
+    for personality in AIPersonality
+    for difficulty in difficulties
+  ]
+
+
+def _player_label(personality: AIPersonality, difficulty: int) -> str:
+  return f"{personality.value[:3].upper()}-{difficulty}"
+
+
+async def run_tournament(
+  cards: list[Card],
+  games_per_matchup: int = 5,
+  difficulties: list[int] | None = None,
+) -> dict[tuple[int, int], int]:
+  """
+  Pit every (personality, difficulty) pair against every other, as both
+  first and second player.
+
+  `difficulties` selects which difficulty levels to include (default: 1-10).
+
+  Returns a wins dict keyed by (first_player_index, second_player_index)
+  where the value is how many of `games_per_matchup` games the first player won.
+
+  Games run in parallel across all CPU cores via ProcessPoolExecutor.
+  Cards are sent to each worker once at startup, not once per game.
+  """
+  players = _all_players(difficulties)
+  n = len(players)
+
+  indexed_args: list[tuple[int, int, tuple]] = []
+  for i in range(n):
+    p1_personality, p1_difficulty = players[i]
+    for j in range(n):
+      p2_personality, p2_difficulty = players[j]
+      args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
+      for _ in range(games_per_matchup):
+        indexed_args.append((i, j, args))
+
+  total_games = len(indexed_args)
+  n_workers = os.cpu_count() or 1
+  print(f"Running {total_games} games across {n_workers} workers "
+        f"({n} players, {games_per_matchup} games per ordered pair)...")
+
+  done = [0]
+  report_every = max(1, total_games // 200)
+
+  loop = asyncio.get_running_loop()
+
+  async def tracked(future):
+    result = await future
+    done[0] += 1
+    if done[0] % report_every == 0 or done[0] == total_games:
+      pct = done[0] / total_games * 100
+      print(f"  {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
+    return result
+
+  with ProcessPoolExecutor(
+    max_workers=n_workers,
+    initializer=_init_worker,
+    initargs=(cards,),
+  ) as executor:
+    futures = [
+      loop.run_in_executor(executor, _run_game_sync, args)
+      for _, _, args in indexed_args
+    ]
+    results = await asyncio.gather(*[tracked(f) for f in futures])
+
+  print("\nFinished")
+
+  wins: dict[tuple[int, int], int] = {}
+  ties = 0
+  for (i, j, _), winner in zip(indexed_args, results):
+    key = (i, j)
+    if key not in wins:
+      wins[key] = 0
+    if winner == PLAYER1_ID:
+      wins[key] += 1
+    elif winner is None:
+      ties += 1
+
+  print(f"Ties: {ties}")
+
+  return wins
+
+
+def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
+  """
+  Return True when the SPRT has reached a decision for this matchup.
+
+  Tests H0: win_rate = 0.5  vs  H1: win_rate = p_decisive (or 1-p_decisive).
+  log_win  = log(p_decisive / 0.5)
+  log_loss = log((1 - p_decisive) / 0.5)
+
+  LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
+  Decided when LLR crosses ±log_B.
+  """
+  llr = wins * log_win + (total - wins) * log_loss
+  return llr >= log_B or llr <= -log_B
+
+
+async def run_tournament_adaptive(
+  cards: list[Card],
+  difficulties: list[int] | None = None,
+  min_games: int = 5,
+  max_games: int = 200,
+  p_decisive: float = 0.65,
+  alpha: float = 0.05,
+) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
+  """
+  Like run_tournament but allocates games adaptively.
+
+  Each ordered pair (i, j) plays until SPRT decides one player is dominant
+  (win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
+  Close matchups play more games; lopsided ones stop early.
+
+  Returns (wins, played):
+    wins[(i, j)]   — how many games player i won as first player against j
+    played[(i, j)] — how many games were played for that pair
+
+  Each round, all currently-undecided pairs play one game in parallel across
+  all CPU cores, preserving full parallelism while adapting per-pair budgets.
+  """
+  players = _all_players(difficulties)
+  n = len(players)
+  all_pairs = [(i, j) for i in range(n) for j in range(n)]
+
+  wins:   dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
+  played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
+  decided: set[tuple[int, int]] = set()
+
+  # Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
+  log_B    = math.log((1 - alpha) / alpha)
+  log_win  = math.log(p_decisive / 0.5)
+  log_loss = math.log((1 - p_decisive) / 0.5)
+
+  def make_args(i: int, j: int) -> tuple:
+    p1, d1 = players[i]
+    p2, d2 = players[j]
+    return (d1, p1.value, d2, p2.value)
+
+  n_workers = os.cpu_count() or 1
+  loop = asyncio.get_running_loop()
+  total_played = 0
+  max_possible = len(all_pairs) * max_games
+
+  print(
+    f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
+    f"SPRT p_decisive={p_decisive} alpha={alpha}, "
+    f"min={min_games} max={max_games} games/pair\n"
+    f"Worst case: {max_possible:,} games across {n_workers} workers"
+  )
+
+  with ProcessPoolExecutor(
+    max_workers=n_workers,
+    initializer=_init_worker,
+    initargs=(cards,),
+  ) as executor:
+    round_num = 0
+    while True:
+      pending = [
+        pair for pair in all_pairs
+        if pair not in decided and played[pair] < max_games
+      ]
+      if not pending:
+        break
+
+      round_num += 1
+      batch = [(i, j, make_args(i, j)) for (i, j) in pending]
+      futures = [
+        loop.run_in_executor(executor, _run_game_sync, args)
+        for _, _, args in batch
+      ]
+      results = await asyncio.gather(*futures)
+
+      newly_decided = 0
+      for (i, j, _), winner in zip(batch, results):
+        played[(i, j)] += 1
+        if winner == PLAYER1_ID:
+          wins[(i, j)] += 1
+        total_played += 1
+
+        if (played[(i, j)] >= min_games
+            and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
+          decided.add((i, j))
+          newly_decided += 1
+
+      remaining = len(all_pairs) - len(decided)
+      pct = total_played / max_possible * 100
+      print(
+        f"  Round {round_num:3d}: {len(pending):5d} games, "
+        f"+{newly_decided:4d} decided, "
+        f"{remaining:5d} pairs left, "
+        f"{total_played:,} total ({pct:.1f}% of worst case)",
+        end="\r", flush=True,
+      )
+
+  savings = max_possible - total_played
+  print(
+    f"\nFinished: {total_played:,} games played "
+    f"(saved {savings:,} vs fixed, "
+    f"{savings / max_possible * 100:.1f}% reduction)"
+  )
+  print(
+    f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
+    f"({len(decided) / len(all_pairs) * 100:.1f}%)"
+  )
+
+  return wins, played
+
+
+def compute_bradley_terry(
+  wins: dict[tuple[int, int], int],
+  n: int,
+  played: dict[tuple[int, int], int] | None = None,
+  games_per_matchup: int | None = None,
+  iterations: int = 1000,
+) -> list[float]:
+  """
+  Compute Bradley-Terry strength parameters for all n players.
+
+  For each pair (i, j): w_ij wins for i, w_ji wins for j.
+  Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))
+
+  Returns a list of strength values indexed by player. Unlike Elo, this is
+  path-independent and converges to a unique maximum-likelihood solution.
+  """
+  w: list[list[int]] = [[0] * n for _ in range(n)]
+  for (i, j), p1_wins in wins.items():
+    g = played[(i, j)] if played is not None else games_per_matchup
+    if g:
+      w[i][j] += p1_wins
+      w[j][i] += g - p1_wins
+
+  strength = [1.0] * n
+  for _ in range(iterations):
+    new_strength = [0.0] * n
+    for i in range(n):
+      wins_i = sum(w[i][j] for j in range(n) if j != i)
+      denom = sum(
+        (w[i][j] + w[j][i]) / (strength[i] + strength[j])
+        for j in range(n)
+        if j != i and (w[i][j] + w[j][i]) > 0
+      )
+      new_strength[i] = wins_i / denom if denom > 0 else strength[i]
+    # Normalize so the mean stays at 1.0
+    mean = sum(new_strength) / n
+    strength = [s / mean for s in new_strength]
+
+  return strength
+
+
+def rank_players(
+  wins: dict[tuple[int, int], int],
+  players: list[tuple[AIPersonality, int]],
+  played: dict[tuple[int, int], int] | None = None,
+  games_per_matchup: int | None = None,
+) -> list[int]:
+  """
+  Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.
+
+  Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
+  """
+  if played is None and games_per_matchup is None:
+    raise ValueError("Provide either played or games_per_matchup")
+
+  ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
+  return sorted(range(len(players)), key=lambda i: ratings[i])
+
+
+TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
+
+
+def save_tournament(
+  wins: dict[tuple[int, int], int],
+  players: list[tuple[AIPersonality, int]],
+  path: str = TOURNAMENT_RESULTS_PATH,
+  played: dict[tuple[int, int], int] | None = None,
+  games_per_matchup: int | None = None,
+):
+  data = {
+    "players": [
+      {"personality": p.value, "difficulty": d}
+      for p, d in players
+    ],
+    "wins": {f"{i},{j}": w for (i, j), w in wins.items()},
+  }
+  if played is not None:
+    data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
+  if games_per_matchup is not None:
+    data["games_per_matchup"] = games_per_matchup
+  with open(path, "w", encoding="utf-8") as f:
+    json.dump(data, f, indent=2)
+  print(f"Tournament results saved to {path}")
+
+
+def load_tournament(
+  path: str = TOURNAMENT_RESULTS_PATH,
+) -> tuple[
+  dict[tuple[int, int], int],
+  dict[tuple[int, int], int] | None,
+  int | None,
+  list[tuple[AIPersonality, int]],
+]:
+  """Returns (wins, played, games_per_matchup, players).
+
+  `played` is None for legacy fixed-game files (use games_per_matchup instead).
+  `games_per_matchup` is None for adaptive files (use played instead).
+  """
+  with open(path, "r", encoding="utf-8") as f:
+    data = json.load(f)
+
+  def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
+    return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}
+
+  wins = parse_pair_dict(data["wins"])
+  played = parse_pair_dict(data["played"]) if "played" in data else None
+  games_per_matchup = data.get("games_per_matchup")
+  players = [
+    (AIPersonality(p["personality"]), p["difficulty"])
+    for p in data["players"]
+  ]
+  return wins, played, games_per_matchup, players
+
+
+def draw_grid(
+  wins: dict[tuple[int, int], int],
+  players: list[tuple[AIPersonality, int]] | None = None,
+  output_path: str = "tournament_grid.png",
+  played: dict[tuple[int, int], int] | None = None,
+  games_per_matchup: int | None = None,
+  ranked: list[int] | None = None,
+):
+  """
+  Draw a heatmap grid of tournament results.
+
+  Rows  = first player
+  Cols  = second player
+  Color = red if first  player won more of their games in that cell
+          green if second player won more
+  ×     = one player swept all games in that cell
+  """
+  import matplotlib
+  matplotlib.use("Agg")
+  import matplotlib.pyplot as plt
+  import matplotlib.colors as mcolors
+  import numpy as np
+
+  if played is None and games_per_matchup is None:
+    raise ValueError("Provide either played or games_per_matchup")
+
+  if players is None:
+    players = _all_players()
+  n = len(players)
+  if ranked is None:
+    ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)
+
+  labels = [_player_label(*players[i]) for i in ranked]
+
+  def games(i: int, j: int) -> int:
+    return_value = played[(i, j)] if played is not None else games_per_matchup
+    return return_value if return_value is not None else 0
+
+  # Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
+  matrix = np.full((n, n), np.nan)
+  for row, i in enumerate(ranked):
+    for col, j in enumerate(ranked):
+      g = games(i, j)
+      p1_wins = wins.get((i, j), 0)
+      matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0
+
+  cell_size = 0.22
+  fig_size = n * cell_size + 3
+  fig, ax = plt.subplots(figsize=(fig_size, fig_size))
+
+  cmap = mcolors.LinearSegmentedColormap.from_list(
+    "p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"]  # pastel green → blue → red
+  )
+  norm = mcolors.Normalize(vmin=-1, vmax=1)
+
+  img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
+
+  # × marks for sweeps
+  for row, i in enumerate(ranked):
+    for col, j in enumerate(ranked):
+      g = games(i, j)
+      p1_wins = wins.get((i, j), 0)
+      if p1_wins == g or p1_wins == 0:
+        ax.text(col, row, "×", ha="center", va="center",
+                fontsize=5, color="black", fontweight="bold", zorder=3)
+
+  ax.set_xticks(range(n))
+  ax.set_yticks(range(n))
+  ax.set_xticklabels(labels, rotation=90, fontsize=4)
+  ax.set_yticklabels(labels, fontsize=4)
+  ax.xaxis.set_label_position("top")
+  ax.xaxis.tick_top()
+
+  ax.set_xlabel("Second player", labelpad=8, fontsize=8)
+  ax.set_ylabel("First player", labelpad=8, fontsize=8)
+  ax.set_title(
+    "Tournament results — red: first player wins more,  green: second player wins more",
+    pad=14, fontsize=9,
+  )
+
+  plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
+               label="(P1 wins - P2 wins) / games per cell")
+
+  plt.tight_layout()
+  plt.savefig(output_path, dpi=150, bbox_inches="tight")
+  plt.close()
+  print(f"Grid saved to {output_path}")
+
+
+if __name__ == "__main__":
+  difficulties = list(range(8, 11))
+
+  card_pool = get_simulation_cards()
+  players = _all_players(difficulties)
+  wins, played = asyncio.run(run_tournament_adaptive(
+    card_pool,
+    difficulties=difficulties,
+    min_games=20,
+    max_games=1000,
+    p_decisive=0.65,
+    alpha=0.05,
+  ))
+  save_tournament(wins, players=players, played=played)
+
+  ratings = compute_bradley_terry(wins, len(players), played=played)
+  ranked = sorted(range(len(players)), key=lambda i: ratings[i])  # worst-to-best
+  draw_grid(wins, players=players, played=played, ranked=ranked)
+
+  print("\nFinal Elo ratings (best to worst):")
+  for rank, i in enumerate(reversed(ranked), 1):
+    personality, difficulty = players[i]
+    label = _player_label(personality, difficulty)
+    print(f"  {rank:2d}. {label:<12} {ratings[i]:.1f}")
@@ -0,0 +1,278 @@
+import os
+import random
+import uuid
+from collections import deque
+
+import numpy as np
+from dotenv import load_dotenv
+load_dotenv()
+
+from game.card import compute_deck_type
+from ai.engine import AIPersonality, choose_cards, choose_plan
+from game.rules import PlayerState, GameState, action_play_card, action_sacrifice, action_end_turn
+from ai.simulate import get_simulation_cards, _make_instances, MAX_TURNS
+from ai.nn import NeuralNet, NeuralPlayer
+from ai.card_pick_nn import CardPickPlayer, N_CARD_FEATURES, CARD_PICK_WEIGHTS_PATH
+
+NN_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "nn_weights.json")
+
+P1 = "p1"
+P2 = "p2"
+
+FIXED_PERSONALITIES = [
+  p for p in AIPersonality
+  if p not in (
+    AIPersonality.ARBITRARY,
+    AIPersonality.JEBRASKA
+  )
+]
+
+
+def _build_player(pid: str, name: str, cards: list, difficulty: int, personality: AIPersonality,
+          deck_pool: dict | None = None) -> PlayerState:
+  if deck_pool and personality in deck_pool:
+    deck = random.choice(deck_pool[personality])
+  else:
+    deck = choose_cards(cards, difficulty, personality)
+  instances = _make_instances(deck)
+  random.shuffle(instances)
+  p = PlayerState(
+    user_id=pid, username=name,
+    deck_type=compute_deck_type(deck) or "Balanced",
+    deck=instances,
+  )
+  return p
+
+
+def _build_nn_player(pid: str, name: str, cards: list, difficulty: int,
+             card_pick_player: CardPickPlayer) -> PlayerState:
+  """Build a PlayerState using the card-pick NN for deck selection."""
+  max_card_cost = difficulty + 1 if difficulty >= 6 else 6
+  allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
+  deck = card_pick_player.choose_cards(allowed, difficulty)
+  instances = _make_instances(deck)
+  random.shuffle(instances)
+  return PlayerState(
+    user_id=pid, username=name,
+    deck_type=compute_deck_type(deck) or "Balanced",
+    deck=instances,
+  )
+
+
+def run_episode(
+  p1_state: PlayerState,
+  p2_state: PlayerState,
+  p1_ctrl,   # (player, opponent) -> MovePlan
+  p2_ctrl,   # (player, opponent) -> MovePlan
+) -> str | None:
+  """Returns winner_id (P1 or P2) or None on timeout."""
+  p1_state.increment_energy_cap()
+  p2_state.increment_energy_cap()
+  p1_state.refill_energy()
+  p1_state.draw_to_full()
+
+  state = GameState(
+    game_id=str(uuid.uuid4()),
+    players={P1: p1_state, P2: p2_state},
+    player_order=[P1, P2],
+    active_player_id=P1,
+    phase="main",
+    turn=1,
+  )
+  ctrls = {P1: p1_ctrl, P2: p2_ctrl}
+
+  for _ in range(MAX_TURNS):
+    if state.result:
+      break
+    active_id = state.active_player_id
+    player   = state.players[active_id]
+    opponent = state.players[state.opponent_id(active_id)]
+
+    plan = ctrls[active_id](player, opponent)
+
+    for slot in plan.sacrifice_slots:
+      if player.board[slot] is not None:
+        action_sacrifice(state, slot)
+
+    plays = list(plan.plays)
+    random.shuffle(plays)
+    for card, slot in plays:
+      hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
+      if hand_idx is None or player.board[slot] is not None or card.cost > player.energy:
+        continue
+      action_play_card(state, hand_idx, slot)
+
+    action_end_turn(state)
+
+  return state.result.winner_id if state.result else None
+
+
+def train(
+  n_episodes: int = 50_000,
+  self_play_start: int = 0,
+  self_play_max_frac: float = 0.9,
+  lr: float = 1e-3,
+  opp_difficulty: int = 10,
+  temperature: float = 1.0,
+  batch_size: int = 500,
+  save_every: int = 5_000,
+  save_path: str = NN_WEIGHTS_PATH,
+) -> NeuralNet:
+  cards = get_simulation_cards()
+
+  # Pre-build a pool of opponent decks per personality to avoid rebuilding from scratch each episode.
+  DECK_POOL_SIZE = 100
+  opp_deck_pool: dict[AIPersonality, list] = {
+    p: [choose_cards(cards, opp_difficulty, p) for _ in range(DECK_POOL_SIZE)]
+    for p in FIXED_PERSONALITIES
+  }
+
+  if os.path.exists(save_path):
+    print(f"Resuming plan net from {save_path}")
+    net = NeuralNet.load(save_path)
+  else:
+    print("Initializing new plan network")
+    net = NeuralNet(seed=42)
+
+  cp_path = CARD_PICK_WEIGHTS_PATH
+  if os.path.exists(cp_path):
+    print(f"Resuming card-pick net from {cp_path}")
+    card_pick_net = NeuralNet.load(cp_path)
+  else:
+    print("Initializing new card-pick network")
+    card_pick_net = NeuralNet(n_features=N_CARD_FEATURES, hidden=(32, 16), seed=43)
+
+  recent_outcomes: deque[int] = deque(maxlen=1000)  # rolling window for win rate display
+  baseline = 0.0      # EMA of recent outcomes; subtracted before each update
+  baseline_alpha = 0.99   # decay — roughly a 100-episode window
+
+  batch_gw = [np.zeros_like(w) for w in net.weights]
+  batch_gb = [np.zeros_like(b) for b in net.biases]
+  batch_count = 0
+
+  cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
+  cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
+  cp_batch_count = 0
+
+  for episode in range(1, n_episodes + 1):
+    # Ramp self-play fraction linearly from 0 to self_play_max_frac
+    if episode >= self_play_start:
+      progress = (episode - self_play_start) / max(1, n_episodes - self_play_start)
+      self_play_prob = self_play_max_frac * progress
+    else:
+      self_play_prob = 0.0
+
+    # Randomly decide who goes first (NN is always P1 for simplicity)
+    nn_goes_first = random.random() < 0.5
+
+    if random.random() < self_play_prob:
+      nn1 = NeuralPlayer(net, training=True, temperature=temperature)
+      nn2 = NeuralPlayer(net, training=True, temperature=temperature)
+      cp1 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
+      cp2 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
+
+      p1_state = _build_nn_player(P1, "NN1", cards, 10, cp1)
+      p2_state = _build_nn_player(P2, "NN2", cards, 10, cp2)
+
+      if not nn_goes_first:
+        p1_state, p2_state = p2_state, p1_state
+
+      winner = run_episode(p1_state, p2_state, nn1.choose_plan, nn2.choose_plan)
+      p1_outcome = 1.0 if winner == P1 else -1.0
+      baseline = baseline_alpha * baseline + (1 - baseline_alpha) * p1_outcome
+
+      for player_grads in [nn1.compute_grads(p1_outcome - baseline),
+                  nn2.compute_grads(-p1_outcome - baseline)]:
+        if player_grads is not None:
+          gw, gb = player_grads
+          for i in range(len(batch_gw)):
+            batch_gw[i] += gw[i]
+            batch_gb[i] += gb[i]
+          batch_count += 1
+
+      for cp_grads in [cp1.compute_grads(p1_outcome - baseline),
+               cp2.compute_grads(-p1_outcome - baseline)]:
+        if cp_grads is not None:
+          gw, gb = cp_grads
+          for i in range(len(cp_batch_gw)):
+            cp_batch_gw[i] += gw[i]
+            cp_batch_gb[i] += gb[i]
+          cp_batch_count += 1
+
+    else:
+      opp_personality = random.choice(FIXED_PERSONALITIES)
+      nn_player = NeuralPlayer(net, training=True, temperature=temperature)
+      cp_player = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
+      opp_ctrl  = lambda p, o, pers=opp_personality, diff=opp_difficulty: choose_plan(p, o, pers, diff)
+
+      if nn_goes_first:
+        nn_id  = P1
+        p1_state = _build_nn_player(P1, "NN",  cards, 10, cp_player)
+        p2_state = _build_player(P2, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
+        winner = run_episode(p1_state, p2_state, nn_player.choose_plan, opp_ctrl)
+      else:
+        nn_id  = P2
+        p1_state = _build_player(P1, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
+        p2_state = _build_nn_player(P2, "NN",  cards, 10, cp_player)
+        winner = run_episode(p1_state, p2_state, opp_ctrl, nn_player.choose_plan)
+
+      nn_outcome = 1.0 if winner == nn_id else -1.0
+      player_grads = nn_player.compute_grads(nn_outcome - baseline)
+      baseline = baseline_alpha * baseline + (1 - baseline_alpha) * nn_outcome
+
+      if player_grads is not None:
+        gw, gb = player_grads
+        for i in range(len(batch_gw)):
+          batch_gw[i] += gw[i]
+          batch_gb[i] += gb[i]
+        batch_count += 1
+
+      cp_grads = cp_player.compute_grads(nn_outcome - baseline)
+      if cp_grads is not None:
+        gw, gb = cp_grads
+        for i in range(len(cp_batch_gw)):
+          cp_batch_gw[i] += gw[i]
+          cp_batch_gb[i] += gb[i]
+        cp_batch_count += 1
+
+      recent_outcomes.append(1 if winner == nn_id else 0)
+
+    if batch_count >= batch_size:
+      for i in range(len(batch_gw)):
+        batch_gw[i] /= batch_count
+        batch_gb[i] /= batch_count
+      net.adam_update(batch_gw, batch_gb, lr=lr)
+      batch_gw = [np.zeros_like(w) for w in net.weights]
+      batch_gb = [np.zeros_like(b) for b in net.biases]
+      batch_count = 0
+
+    if cp_batch_count >= batch_size:
+      for i in range(len(cp_batch_gw)):
+        cp_batch_gw[i] /= cp_batch_count
+        cp_batch_gb[i] /= cp_batch_count
+      card_pick_net.adam_update(cp_batch_gw, cp_batch_gb, lr=lr)
+      cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
+      cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
+      cp_batch_count = 0
+
+    if episode % 1000 == 0 or episode == n_episodes:
+      wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
+      print(f"\r[{episode:>6}/{n_episodes}]  win rate (last {len(recent_outcomes)}): {wr:.1%}  "
+          f"self-play frac: {self_play_prob:.0%}", flush=True)
+    else:
+      print(f"  {episode % 1000}/1000", end="\r", flush=True)
+
+    if episode % save_every == 0:
+      net.save(save_path)
+      card_pick_net.save(cp_path)
+      print(f"  → saved to {save_path} and {cp_path}")
+
+  net.save(save_path)
+  card_pick_net.save(cp_path)
+  wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
+  print(f"Done. Final win rate (last {len(recent_outcomes)}): {wr:.1%}")
+  return net
+
+
+if __name__ == "__main__":
+  train()