Files
wiki-tcg/backend/simulate.py
2026-03-26 00:51:25 +01:00

644 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import json
import math
import os
import random
import uuid
import asyncio
from concurrent.futures import ProcessPoolExecutor
from dotenv import load_dotenv
load_dotenv()
from datetime import datetime
from card import Card, CardType, CardRarity, generate_cards, compute_deck_type
from game import (
CardInstance, PlayerState, GameState,
action_play_card, action_sacrifice, action_end_turn,
)
from ai import AIPersonality, choose_cards, choose_plan
SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
SIMULATION_CARD_COUNT = 1000
# ==================== Card pool ====================
def _card_to_dict(card: Card) -> dict:
return {
"name": card.name,
"created_at": card.created_at.isoformat(),
"image_link": card.image_link,
"card_rarity": card.card_rarity.name,
"card_type": card.card_type.name,
"wikidata_instance": card.wikidata_instance,
"text": card.text,
"attack": card.attack,
"defense": card.defense,
"cost": card.cost,
}
def _dict_to_card(d: dict) -> Card:
return Card(
name=d["name"],
created_at=datetime.fromisoformat(d["created_at"]),
image_link=d["image_link"],
card_rarity=CardRarity[d["card_rarity"]],
card_type=CardType[d["card_type"]],
wikidata_instance=d["wikidata_instance"],
text=d["text"],
attack=d["attack"],
defense=d["defense"],
cost=d["cost"],
)
def get_simulation_cards() -> list[Card]:
if os.path.exists(SIMULATION_CARDS_PATH):
with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
return [_dict_to_card(d) for d in data]
print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
cards = generate_cards(SIMULATION_CARD_COUNT)
with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
return cards
# ==================== Single game ====================
PLAYER1_ID = "p1"
PLAYER2_ID = "p2"
MAX_TURNS = 300 # safety cap to prevent infinite games
def _make_instances(deck: list[Card]) -> list[CardInstance]:
return [
CardInstance(
instance_id=str(uuid.uuid4()),
card_id=card.name,
name=card.name,
attack=card.attack,
defense=card.defense,
max_defense=card.defense,
cost=card.cost,
card_type=card.card_type.name,
card_rarity=card.card_rarity.name,
image_link=card.image_link or "",
text=card.text or "",
)
for card in deck
]
def simulate_game(
cards: list[Card],
difficulty1: int,
personality1: AIPersonality,
difficulty2: int,
personality2: AIPersonality,
) -> str | None:
"""
Simulate a single game between two AIs choosing from `cards`.
Player 1 always goes first.
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
"""
deck1 = choose_cards(cards, difficulty1, personality1)
deck2 = choose_cards(cards, difficulty2, personality2)
instances1 = _make_instances(deck1)
instances2 = _make_instances(deck2)
random.shuffle(instances1)
random.shuffle(instances2)
deck_type1 = compute_deck_type(deck1) or "Balanced"
deck_type2 = compute_deck_type(deck2) or "Balanced"
p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
# P1 always goes first
p1.increment_energy_cap()
p2.increment_energy_cap()
p1.refill_energy()
p1.draw_to_full()
state = GameState(
game_id=str(uuid.uuid4()),
players={PLAYER1_ID: p1, PLAYER2_ID: p2},
player_order=[PLAYER1_ID, PLAYER2_ID],
active_player_id=PLAYER1_ID,
phase="main",
turn=1,
)
configs = {
PLAYER1_ID: (difficulty1, personality1),
PLAYER2_ID: (difficulty2, personality2),
}
for _ in range(MAX_TURNS):
if state.result:
break
active_id = state.active_player_id
difficulty, personality = configs[active_id]
player = state.players[active_id]
opponent = state.players[state.opponent_id(active_id)]
plan = choose_plan(player, opponent, personality, difficulty)
for slot in plan.sacrifice_slots:
if player.board[slot] is not None:
action_sacrifice(state, slot)
plays = list(plan.plays)
random.shuffle(plays)
for card, slot in plays:
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
if hand_idx is None:
continue
if player.board[slot] is not None:
continue
if card.cost > player.energy:
continue
action_play_card(state, hand_idx, slot)
action_end_turn(state)
if state.result and state.result.winner_id:
return state.result.winner_id
return None
# ==================== Process-pool worker ====================
# These must be module-level so they are picklable.
_worker_cards: list[Card] = []
def _init_worker(cards: list[Card]) -> None:
global _worker_cards
_worker_cards = cards
def _run_game_sync(args: tuple) -> str | None:
"""Synchronous entry point for a worker process."""
d1, p1_name, d2, p2_name = args
return simulate_game(
_worker_cards,
d1, AIPersonality(p1_name),
d2, AIPersonality(p2_name),
)
# ==================== Tournament ====================
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
if difficulties is None:
difficulties = list(range(1, 11))
return [
(personality, difficulty)
for personality in AIPersonality
for difficulty in difficulties
]
def _player_label(personality: AIPersonality, difficulty: int) -> str:
return f"{personality.value[:3].upper()}-{difficulty}"
async def run_tournament(
cards: list[Card],
games_per_matchup: int = 5,
difficulties: list[int] | None = None,
) -> dict[tuple[int, int], int]:
"""
Pit every (personality, difficulty) pair against every other, as both
first and second player.
`difficulties` selects which difficulty levels to include (default: 1-10).
Returns a wins dict keyed by (first_player_index, second_player_index)
where the value is how many of `games_per_matchup` games the first player won.
Games run in parallel across all CPU cores via ProcessPoolExecutor.
Cards are sent to each worker once at startup, not once per game.
"""
players = _all_players(difficulties)
n = len(players)
# Build the flat list of (i, j, args) for every game
indexed_args: list[tuple[int, int, tuple]] = []
for i in range(n):
p1_personality, p1_difficulty = players[i]
for j in range(n):
p2_personality, p2_difficulty = players[j]
args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
for _ in range(games_per_matchup):
indexed_args.append((i, j, args))
total_games = len(indexed_args)
n_workers = os.cpu_count() or 1
print(f"Running {total_games} games across {n_workers} workers "
f"({n} players, {games_per_matchup} games per ordered pair)...")
done = [0]
report_every = max(1, total_games // 200)
loop = asyncio.get_running_loop()
async def tracked(future):
result = await future
done[0] += 1
if done[0] % report_every == 0 or done[0] == total_games:
pct = done[0] / total_games * 100
print(f" {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
return result
with ProcessPoolExecutor(
max_workers=n_workers,
initializer=_init_worker,
initargs=(cards,),
) as executor:
futures = [
loop.run_in_executor(executor, _run_game_sync, args)
for _, _, args in indexed_args
]
results = await asyncio.gather(*[tracked(f) for f in futures])
print("\nFinished")
wins: dict[tuple[int, int], int] = {}
ties = 0
for (i, j, _), winner in zip(indexed_args, results):
key = (i, j)
if key not in wins:
wins[key] = 0
if winner == PLAYER1_ID:
wins[key] += 1
elif winner is None:
ties += 1
print(f"Ties: {ties}")
return wins
def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
"""
Return True when the SPRT has reached a decision for this matchup.
Tests H0: win_rate = 0.5 vs H1: win_rate = p_decisive (or 1-p_decisive).
log_win = log(p_decisive / 0.5)
log_loss = log((1 - p_decisive) / 0.5)
LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
Decided when LLR crosses ±log_B.
"""
llr = wins * log_win + (total - wins) * log_loss
return llr >= log_B or llr <= -log_B
async def run_tournament_adaptive(
cards: list[Card],
difficulties: list[int] | None = None,
min_games: int = 5,
max_games: int = 200,
p_decisive: float = 0.65,
alpha: float = 0.05,
) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
"""
Like run_tournament but allocates games adaptively.
Each ordered pair (i, j) plays until SPRT decides one player is dominant
(win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
Close matchups play more games; lopsided ones stop early.
Returns (wins, played):
wins[(i, j)] — how many games player i won as first player against j
played[(i, j)] — how many games were played for that pair
Each round, all currently-undecided pairs play one game in parallel across
all CPU cores, preserving full parallelism while adapting per-pair budgets.
"""
players = _all_players(difficulties)
n = len(players)
all_pairs = [(i, j) for i in range(n) for j in range(n)]
wins: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
decided: set[tuple[int, int]] = set()
# Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
log_B = math.log((1 - alpha) / alpha)
log_win = math.log(p_decisive / 0.5)
log_loss = math.log((1 - p_decisive) / 0.5)
def make_args(i: int, j: int) -> tuple:
p1, d1 = players[i]
p2, d2 = players[j]
return (d1, p1.value, d2, p2.value)
n_workers = os.cpu_count() or 1
loop = asyncio.get_running_loop()
total_played = 0
max_possible = len(all_pairs) * max_games
print(
f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
f"SPRT p_decisive={p_decisive} alpha={alpha}, "
f"min={min_games} max={max_games} games/pair\n"
f"Worst case: {max_possible:,} games across {n_workers} workers"
)
with ProcessPoolExecutor(
max_workers=n_workers,
initializer=_init_worker,
initargs=(cards,),
) as executor:
round_num = 0
while True:
pending = [
pair for pair in all_pairs
if pair not in decided and played[pair] < max_games
]
if not pending:
break
round_num += 1
batch = [(i, j, make_args(i, j)) for (i, j) in pending]
futures = [
loop.run_in_executor(executor, _run_game_sync, args)
for _, _, args in batch
]
results = await asyncio.gather(*futures)
newly_decided = 0
for (i, j, _), winner in zip(batch, results):
played[(i, j)] += 1
if winner == PLAYER1_ID:
wins[(i, j)] += 1
total_played += 1
if (played[(i, j)] >= min_games
and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
decided.add((i, j))
newly_decided += 1
remaining = len(all_pairs) - len(decided)
pct = total_played / max_possible * 100
print(
f" Round {round_num:3d}: {len(pending):5d} games, "
f"+{newly_decided:4d} decided, "
f"{remaining:5d} pairs left, "
f"{total_played:,} total ({pct:.1f}% of worst case)",
end="\r", flush=True,
)
savings = max_possible - total_played
print(
f"\nFinished: {total_played:,} games played "
f"(saved {savings:,} vs fixed, "
f"{savings / max_possible * 100:.1f}% reduction)"
)
print(
f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
f"({len(decided) / len(all_pairs) * 100:.1f}%)"
)
return wins, played
def compute_bradley_terry(
wins: dict[tuple[int, int], int],
n: int,
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
iterations: int = 1000,
) -> list[float]:
"""
Compute Bradley-Terry strength parameters for all n players.
For each pair (i, j): w_ij wins for i, w_ji wins for j.
Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))
Returns a list of strength values indexed by player. Unlike Elo, this is
path-independent and converges to a unique maximum-likelihood solution.
"""
w: list[list[int]] = [[0] * n for _ in range(n)]
for (i, j), p1_wins in wins.items():
g = played[(i, j)] if played is not None else games_per_matchup
if g:
w[i][j] += p1_wins
w[j][i] += g - p1_wins
strength = [1.0] * n
for _ in range(iterations):
new_strength = [0.0] * n
for i in range(n):
wins_i = sum(w[i][j] for j in range(n) if j != i)
denom = sum(
(w[i][j] + w[j][i]) / (strength[i] + strength[j])
for j in range(n)
if j != i and (w[i][j] + w[j][i]) > 0
)
new_strength[i] = wins_i / denom if denom > 0 else strength[i]
# Normalize so the mean stays at 1.0
mean = sum(new_strength) / n
strength = [s / mean for s in new_strength]
return strength
def rank_players(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]],
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
) -> list[int]:
"""
Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.
Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
"""
if played is None and games_per_matchup is None:
raise ValueError("Provide either played or games_per_matchup")
ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
return sorted(range(len(players)), key=lambda i: ratings[i])
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
def save_tournament(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]],
path: str = TOURNAMENT_RESULTS_PATH,
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
):
data = {
"players": [
{"personality": p.value, "difficulty": d}
for p, d in players
],
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
}
if played is not None:
data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
if games_per_matchup is not None:
data["games_per_matchup"] = games_per_matchup
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
print(f"Tournament results saved to {path}")
def load_tournament(
path: str = TOURNAMENT_RESULTS_PATH,
) -> tuple[
dict[tuple[int, int], int],
dict[tuple[int, int], int] | None,
int | None,
list[tuple[AIPersonality, int]],
]:
"""Returns (wins, played, games_per_matchup, players).
`played` is None for legacy fixed-game files (use games_per_matchup instead).
`games_per_matchup` is None for adaptive files (use played instead).
"""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}
wins = parse_pair_dict(data["wins"])
played = parse_pair_dict(data["played"]) if "played" in data else None
games_per_matchup = data.get("games_per_matchup")
players = [
(AIPersonality(p["personality"]), p["difficulty"])
for p in data["players"]
]
return wins, played, games_per_matchup, players
def draw_grid(
wins: dict[tuple[int, int], int],
players: list[tuple[AIPersonality, int]] | None = None,
output_path: str = "tournament_grid.png",
played: dict[tuple[int, int], int] | None = None,
games_per_matchup: int | None = None,
ranked: list[int] | None = None,
):
"""
Draw a heatmap grid of tournament results.
Rows = first player
Cols = second player
Color = red if first player won more of their games in that cell
green if second player won more
× = one player swept all games in that cell
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
if played is None and games_per_matchup is None:
raise ValueError("Provide either played or games_per_matchup")
if players is None:
players = _all_players()
n = len(players)
if ranked is None:
ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)
labels = [_player_label(*players[i]) for i in ranked]
def games(i: int, j: int) -> int:
return_value = played[(i, j)] if played is not None else games_per_matchup
return return_value if return_value is not None else 0
# Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
matrix = np.full((n, n), np.nan)
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
g = games(i, j)
p1_wins = wins.get((i, j), 0)
matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0
cell_size = 0.22
fig_size = n * cell_size + 3
fig, ax = plt.subplots(figsize=(fig_size, fig_size))
cmap = mcolors.LinearSegmentedColormap.from_list(
"p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"] # pastel green → blue → red
)
norm = mcolors.Normalize(vmin=-1, vmax=1)
img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
# × marks for sweeps
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
g = games(i, j)
p1_wins = wins.get((i, j), 0)
if p1_wins == g or p1_wins == 0:
ax.text(col, row, "×", ha="center", va="center",
fontsize=5, color="black", fontweight="bold", zorder=3)
ax.set_xticks(range(n))
ax.set_yticks(range(n))
ax.set_xticklabels(labels, rotation=90, fontsize=4)
ax.set_yticklabels(labels, fontsize=4)
ax.xaxis.set_label_position("top")
ax.xaxis.tick_top()
ax.set_xlabel("Second player", labelpad=8, fontsize=8)
ax.set_ylabel("First player", labelpad=8, fontsize=8)
ax.set_title(
"Tournament results — red: first player wins more, green: second player wins more",
pad=14, fontsize=9,
)
plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
label="(P1 wins - P2 wins) / games per cell")
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
print(f"Grid saved to {output_path}")
if __name__ == "__main__":
difficulties = list(range(7, 11))
card_pool = get_simulation_cards()
players = _all_players(difficulties)
wins, played = asyncio.run(run_tournament_adaptive(
card_pool,
difficulties=difficulties,
min_games=20,
max_games=1000,
p_decisive=0.65,
alpha=0.05,
))
save_tournament(wins, players=players, played=played)
ratings = compute_bradley_terry(wins, len(players), played=played)
ranked = sorted(range(len(players)), key=lambda i: ratings[i]) # worst-to-best
draw_grid(wins, players=players, played=played, ranked=ranked)
print("\nFinal Elo ratings (best to worst):")
for rank, i in enumerate(reversed(ranked), 1):
personality, difficulty = players[i]
label = _player_label(personality, difficulty)
print(f" {rank:2d}. {label:<12} {ratings[i]:.1f}")