wiki-tcg/backend/simulate.py

import json
import math
import os
import random
import uuid
import asyncio
from concurrent.futures import ProcessPoolExecutor
from dotenv import load_dotenv
load_dotenv()

from datetime import datetime

from card import Card, CardType, CardRarity, generate_cards, compute_deck_type
from game import (
  CardInstance, PlayerState, GameState,
  action_play_card, action_sacrifice, action_end_turn,
)
from ai import AIPersonality, choose_cards, choose_plan

SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
SIMULATION_CARD_COUNT = 1000


# ==================== Card pool ====================

def _card_to_dict(card: Card) -> dict:
  return {
    "name": card.name,
    "created_at": card.created_at.isoformat(),
    "image_link": card.image_link,
    "card_rarity": card.card_rarity.name,
    "card_type": card.card_type.name,
    "wikidata_instance": card.wikidata_instance,
    "text": card.text,
    "attack": card.attack,
    "defense": card.defense,
    "cost": card.cost,
  }


def _dict_to_card(d: dict) -> Card:
  return Card(
    name=d["name"],
    created_at=datetime.fromisoformat(d["created_at"]),
    image_link=d["image_link"],
    card_rarity=CardRarity[d["card_rarity"]],
    card_type=CardType[d["card_type"]],
    wikidata_instance=d["wikidata_instance"],
    text=d["text"],
    attack=d["attack"],
    defense=d["defense"],
    cost=d["cost"],
  )


def get_simulation_cards() -> list[Card]:
  if os.path.exists(SIMULATION_CARDS_PATH):
    with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
      data = json.load(f)
    return [_dict_to_card(d) for d in data]

  print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
  cards = generate_cards(SIMULATION_CARD_COUNT)

  with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
    json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)

  print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
  return cards


# ==================== Single game ====================

PLAYER1_ID = "p1"
PLAYER2_ID = "p2"
MAX_TURNS = 300  # safety cap to prevent infinite games


def _make_instances(deck: list[Card]) -> list[CardInstance]:
  return [
    CardInstance(
      instance_id=str(uuid.uuid4()),
      card_id=card.name,
      name=card.name,
      attack=card.attack,
      defense=card.defense,
      max_defense=card.defense,
      cost=card.cost,
      card_type=card.card_type.name,
      card_rarity=card.card_rarity.name,
      image_link=card.image_link or "",
      text=card.text or "",
    )
    for card in deck
  ]


def simulate_game(
  cards: list[Card],
  difficulty1: int,
  personality1: AIPersonality,
  difficulty2: int,
  personality2: AIPersonality,
) -> str | None:
  """
  Simulate a single game between two AIs choosing from `cards`.
  Player 1 always goes first.

  Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
  """
  deck1 = choose_cards(cards, difficulty1, personality1)
  deck2 = choose_cards(cards, difficulty2, personality2)

  instances1 = _make_instances(deck1)
  instances2 = _make_instances(deck2)
  random.shuffle(instances1)
  random.shuffle(instances2)

  deck_type1 = compute_deck_type(deck1) or "Balanced"
  deck_type2 = compute_deck_type(deck2) or "Balanced"

  p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
  p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)

  # P1 always goes first
  p1.increment_energy_cap()
  p2.increment_energy_cap()
  p1.refill_energy()
  p1.draw_to_full()

  state = GameState(
    game_id=str(uuid.uuid4()),
    players={PLAYER1_ID: p1, PLAYER2_ID: p2},
    player_order=[PLAYER1_ID, PLAYER2_ID],
    active_player_id=PLAYER1_ID,
    phase="main",
    turn=1,
  )

  configs = {
    PLAYER1_ID: (difficulty1, personality1),
    PLAYER2_ID: (difficulty2, personality2),
  }

  for _ in range(MAX_TURNS):
    if state.result:
      break

    active_id = state.active_player_id
    difficulty, personality = configs[active_id]
    player = state.players[active_id]
    opponent = state.players[state.opponent_id(active_id)]

    plan = choose_plan(player, opponent, personality, difficulty)

    for slot in plan.sacrifice_slots:
      if player.board[slot] is not None:
        action_sacrifice(state, slot)

    plays = list(plan.plays)
    random.shuffle(plays)
    for card, slot in plays:
      hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
      if hand_idx is None:
        continue
      if player.board[slot] is not None:
        continue
      if card.cost > player.energy:
        continue
      action_play_card(state, hand_idx, slot)

    action_end_turn(state)

  if state.result and state.result.winner_id:
    return state.result.winner_id
  return None


# ==================== Process-pool worker ====================
# These must be module-level so they are picklable.

_worker_cards: list[Card] = []

def _init_worker(cards: list[Card]) -> None:
  global _worker_cards
  _worker_cards = cards

def _run_game_sync(args: tuple) -> str | None:
  """Synchronous entry point for a worker process."""
  d1, p1_name, d2, p2_name = args
  return simulate_game(
    _worker_cards,
    d1, AIPersonality(p1_name),
    d2, AIPersonality(p2_name),
  )


# ==================== Tournament ====================

def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
  """Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
  if difficulties is None:
    difficulties = list(range(1, 11))
  return [
    (personality, difficulty)
    for personality in AIPersonality
    for difficulty in difficulties
  ]


def _player_label(personality: AIPersonality, difficulty: int) -> str:
  return f"{personality.value[:3].upper()}-{difficulty}"


async def run_tournament(
  cards: list[Card],
  games_per_matchup: int = 5,
  difficulties: list[int] | None = None,
) -> dict[tuple[int, int], int]:
  """
  Pit every (personality, difficulty) pair against every other, as both
  first and second player.

  `difficulties` selects which difficulty levels to include (default: 1-10).

  Returns a wins dict keyed by (first_player_index, second_player_index)
  where the value is how many of `games_per_matchup` games the first player won.

  Games run in parallel across all CPU cores via ProcessPoolExecutor.
  Cards are sent to each worker once at startup, not once per game.
  """
  players = _all_players(difficulties)
  n = len(players)

  # Build the flat list of (i, j, args) for every game
  indexed_args: list[tuple[int, int, tuple]] = []
  for i in range(n):
    p1_personality, p1_difficulty = players[i]
    for j in range(n):
      p2_personality, p2_difficulty = players[j]
      args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
      for _ in range(games_per_matchup):
        indexed_args.append((i, j, args))

  total_games = len(indexed_args)
  n_workers = os.cpu_count() or 1
  print(f"Running {total_games} games across {n_workers} workers "
        f"({n} players, {games_per_matchup} games per ordered pair)...")

  done = [0]
  report_every = max(1, total_games // 200)

  loop = asyncio.get_running_loop()

  async def tracked(future):
    result = await future
    done[0] += 1
    if done[0] % report_every == 0 or done[0] == total_games:
      pct = done[0] / total_games * 100
      print(f"  {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
    return result

  with ProcessPoolExecutor(
    max_workers=n_workers,
    initializer=_init_worker,
    initargs=(cards,),
  ) as executor:
    futures = [
      loop.run_in_executor(executor, _run_game_sync, args)
      for _, _, args in indexed_args
    ]
    results = await asyncio.gather(*[tracked(f) for f in futures])

  print("\nFinished")

  wins: dict[tuple[int, int], int] = {}
  ties = 0
  for (i, j, _), winner in zip(indexed_args, results):
    key = (i, j)
    if key not in wins:
      wins[key] = 0
    if winner == PLAYER1_ID:
      wins[key] += 1
    elif winner is None:
      ties += 1

  print(f"Ties: {ties}")

  return wins


def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
  """
  Return True when the SPRT has reached a decision for this matchup.

  Tests H0: win_rate = 0.5  vs  H1: win_rate = p_decisive (or 1-p_decisive).
  log_win  = log(p_decisive / 0.5)
  log_loss = log((1 - p_decisive) / 0.5)

  LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
  Decided when LLR crosses ±log_B.
  """
  llr = wins * log_win + (total - wins) * log_loss
  return llr >= log_B or llr <= -log_B


async def run_tournament_adaptive(
  cards: list[Card],
  difficulties: list[int] | None = None,
  min_games: int = 5,
  max_games: int = 200,
  p_decisive: float = 0.65,
  alpha: float = 0.05,
) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
  """
  Like run_tournament but allocates games adaptively.

  Each ordered pair (i, j) plays until SPRT decides one player is dominant
  (win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
  Close matchups play more games; lopsided ones stop early.

  Returns (wins, played):
    wins[(i, j)]   — how many games player i won as first player against j
    played[(i, j)] — how many games were played for that pair

  Each round, all currently-undecided pairs play one game in parallel across
  all CPU cores, preserving full parallelism while adapting per-pair budgets.
  """
  players = _all_players(difficulties)
  n = len(players)
  all_pairs = [(i, j) for i in range(n) for j in range(n)]

  wins:   dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
  played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
  decided: set[tuple[int, int]] = set()

  # Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
  log_B    = math.log((1 - alpha) / alpha)
  log_win  = math.log(p_decisive / 0.5)
  log_loss = math.log((1 - p_decisive) / 0.5)

  def make_args(i: int, j: int) -> tuple:
    p1, d1 = players[i]
    p2, d2 = players[j]
    return (d1, p1.value, d2, p2.value)

  n_workers = os.cpu_count() or 1
  loop = asyncio.get_running_loop()
  total_played = 0
  max_possible = len(all_pairs) * max_games

  print(
    f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
    f"SPRT p_decisive={p_decisive} alpha={alpha}, "
    f"min={min_games} max={max_games} games/pair\n"
    f"Worst case: {max_possible:,} games across {n_workers} workers"
  )

  with ProcessPoolExecutor(
    max_workers=n_workers,
    initializer=_init_worker,
    initargs=(cards,),
  ) as executor:
    round_num = 0
    while True:
      pending = [
        pair for pair in all_pairs
        if pair not in decided and played[pair] < max_games
      ]
      if not pending:
        break

      round_num += 1
      batch = [(i, j, make_args(i, j)) for (i, j) in pending]
      futures = [
        loop.run_in_executor(executor, _run_game_sync, args)
        for _, _, args in batch
      ]
      results = await asyncio.gather(*futures)

      newly_decided = 0
      for (i, j, _), winner in zip(batch, results):
        played[(i, j)] += 1
        if winner == PLAYER1_ID:
          wins[(i, j)] += 1
        total_played += 1

        if (played[(i, j)] >= min_games
            and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
          decided.add((i, j))
          newly_decided += 1

      remaining = len(all_pairs) - len(decided)
      pct = total_played / max_possible * 100
      print(
        f"  Round {round_num:3d}: {len(pending):5d} games, "
        f"+{newly_decided:4d} decided, "
        f"{remaining:5d} pairs left, "
        f"{total_played:,} total ({pct:.1f}% of worst case)",
        end="\r", flush=True,
      )

  savings = max_possible - total_played
  print(
    f"\nFinished: {total_played:,} games played "
    f"(saved {savings:,} vs fixed, "
    f"{savings / max_possible * 100:.1f}% reduction)"
  )
  print(
    f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
    f"({len(decided) / len(all_pairs) * 100:.1f}%)"
  )

  return wins, played


def compute_bradley_terry(
  wins: dict[tuple[int, int], int],
  n: int,
  played: dict[tuple[int, int], int] | None = None,
  games_per_matchup: int | None = None,
  iterations: int = 1000,
) -> list[float]:
  """
  Compute Bradley-Terry strength parameters for all n players.

  For each pair (i, j): w_ij wins for i, w_ji wins for j.
  Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))

  Returns a list of strength values indexed by player. Unlike Elo, this is
  path-independent and converges to a unique maximum-likelihood solution.
  """
  w: list[list[int]] = [[0] * n for _ in range(n)]
  for (i, j), p1_wins in wins.items():
    g = played[(i, j)] if played is not None else games_per_matchup
    if g:
      w[i][j] += p1_wins
      w[j][i] += g - p1_wins

  strength = [1.0] * n
  for _ in range(iterations):
    new_strength = [0.0] * n
    for i in range(n):
      wins_i = sum(w[i][j] for j in range(n) if j != i)
      denom = sum(
        (w[i][j] + w[j][i]) / (strength[i] + strength[j])
        for j in range(n)
        if j != i and (w[i][j] + w[j][i]) > 0
      )
      new_strength[i] = wins_i / denom if denom > 0 else strength[i]
    # Normalize so the mean stays at 1.0
    mean = sum(new_strength) / n
    strength = [s / mean for s in new_strength]

  return strength


def rank_players(
  wins: dict[tuple[int, int], int],
  players: list[tuple[AIPersonality, int]],
  played: dict[tuple[int, int], int] | None = None,
  games_per_matchup: int | None = None,
) -> list[int]:
  """
  Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.

  Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
  """
  if played is None and games_per_matchup is None:
    raise ValueError("Provide either played or games_per_matchup")

  ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
  return sorted(range(len(players)), key=lambda i: ratings[i])


TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")


def save_tournament(
  wins: dict[tuple[int, int], int],
  players: list[tuple[AIPersonality, int]],
  path: str = TOURNAMENT_RESULTS_PATH,
  played: dict[tuple[int, int], int] | None = None,
  games_per_matchup: int | None = None,
):
  data = {
    "players": [
      {"personality": p.value, "difficulty": d}
      for p, d in players
    ],
    "wins": {f"{i},{j}": w for (i, j), w in wins.items()},
  }
  if played is not None:
    data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
  if games_per_matchup is not None:
    data["games_per_matchup"] = games_per_matchup
  with open(path, "w", encoding="utf-8") as f:
    json.dump(data, f, indent=2)
  print(f"Tournament results saved to {path}")


def load_tournament(
  path: str = TOURNAMENT_RESULTS_PATH,
) -> tuple[
  dict[tuple[int, int], int],
  dict[tuple[int, int], int] | None,
  int | None,
  list[tuple[AIPersonality, int]],
]:
  """Returns (wins, played, games_per_matchup, players).

  `played` is None for legacy fixed-game files (use games_per_matchup instead).
  `games_per_matchup` is None for adaptive files (use played instead).
  """
  with open(path, "r", encoding="utf-8") as f:
    data = json.load(f)

  def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
    return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}

  wins = parse_pair_dict(data["wins"])
  played = parse_pair_dict(data["played"]) if "played" in data else None
  games_per_matchup = data.get("games_per_matchup")
  players = [
    (AIPersonality(p["personality"]), p["difficulty"])
    for p in data["players"]
  ]
  return wins, played, games_per_matchup, players


def draw_grid(
  wins: dict[tuple[int, int], int],
  players: list[tuple[AIPersonality, int]] | None = None,
  output_path: str = "tournament_grid.png",
  played: dict[tuple[int, int], int] | None = None,
  games_per_matchup: int | None = None,
  ranked: list[int] | None = None,
):
  """
  Draw a heatmap grid of tournament results.

  Rows  = first player
  Cols  = second player
  Color = red if first  player won more of their games in that cell
          green if second player won more
  ×     = one player swept all games in that cell
  """
  import matplotlib
  matplotlib.use("Agg")
  import matplotlib.pyplot as plt
  import matplotlib.colors as mcolors
  import numpy as np

  if played is None and games_per_matchup is None:
    raise ValueError("Provide either played or games_per_matchup")

  if players is None:
    players = _all_players()
  n = len(players)
  if ranked is None:
    ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)

  labels = [_player_label(*players[i]) for i in ranked]

  def games(i: int, j: int) -> int:
    return_value = played[(i, j)] if played is not None else games_per_matchup
    return return_value if return_value is not None else 0

  # Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
  matrix = np.full((n, n), np.nan)
  for row, i in enumerate(ranked):
    for col, j in enumerate(ranked):
      g = games(i, j)
      p1_wins = wins.get((i, j), 0)
      matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0

  cell_size = 0.22
  fig_size = n * cell_size + 3
  fig, ax = plt.subplots(figsize=(fig_size, fig_size))

  cmap = mcolors.LinearSegmentedColormap.from_list(
    "p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"]  # pastel green → blue → red
  )
  norm = mcolors.Normalize(vmin=-1, vmax=1)

  img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")

  # × marks for sweeps
  for row, i in enumerate(ranked):
    for col, j in enumerate(ranked):
      g = games(i, j)
      p1_wins = wins.get((i, j), 0)
      if p1_wins == g or p1_wins == 0:
        ax.text(col, row, "×", ha="center", va="center",
                fontsize=5, color="black", fontweight="bold", zorder=3)

  ax.set_xticks(range(n))
  ax.set_yticks(range(n))
  ax.set_xticklabels(labels, rotation=90, fontsize=4)
  ax.set_yticklabels(labels, fontsize=4)
  ax.xaxis.set_label_position("top")
  ax.xaxis.tick_top()

  ax.set_xlabel("Second player", labelpad=8, fontsize=8)
  ax.set_ylabel("First player", labelpad=8, fontsize=8)
  ax.set_title(
    "Tournament results — red: first player wins more,  green: second player wins more",
    pad=14, fontsize=9,
  )

  plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
               label="(P1 wins - P2 wins) / games per cell")

  plt.tight_layout()
  plt.savefig(output_path, dpi=150, bbox_inches="tight")
  plt.close()
  print(f"Grid saved to {output_path}")


if __name__ == "__main__":
  difficulties = list(range(7, 11))

  card_pool = get_simulation_cards()
  players = _all_players(difficulties)
  wins, played = asyncio.run(run_tournament_adaptive(
    card_pool,
    difficulties=difficulties,
    min_games=20,
    max_games=1000,
    p_decisive=0.65,
    alpha=0.05,
  ))
  save_tournament(wins, players=players, played=played)

  ratings = compute_bradley_terry(wins, len(players), played=played)
  ranked = sorted(range(len(players)), key=lambda i: ratings[i])  # worst-to-best
  draw_grid(wins, players=players, played=played, ranked=ranked)

  print("\nFinal Elo ratings (best to worst):")
  for rank, i in enumerate(reversed(ranked), 1):
    personality, difficulty = players[i]
    label = _player_label(personality, difficulty)
    print(f"  {rank:2d}. {label:<12} {ratings[i]:.1f}")