🐐
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import uuid
|
||||
@@ -94,7 +95,7 @@ def _make_instances(deck: list[Card]) -> list[CardInstance]:
|
||||
]
|
||||
|
||||
|
||||
async def simulate_game(
|
||||
def simulate_game(
|
||||
cards: list[Card],
|
||||
difficulty1: int,
|
||||
personality1: AIPersonality,
|
||||
@@ -106,8 +107,6 @@ async def simulate_game(
|
||||
Player 1 always goes first.
|
||||
|
||||
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
|
||||
|
||||
Designed to be awaited inside asyncio.gather() to run many games concurrently.
|
||||
"""
|
||||
deck1 = choose_cards(cards, difficulty1, personality1)
|
||||
deck2 = choose_cards(cards, difficulty2, personality2)
|
||||
@@ -152,7 +151,7 @@ async def simulate_game(
|
||||
player = state.players[active_id]
|
||||
opponent = state.players[state.opponent_id(active_id)]
|
||||
|
||||
plan = await choose_plan(player, opponent, personality, difficulty)
|
||||
plan = choose_plan(player, opponent, personality, difficulty)
|
||||
|
||||
for slot in plan.sacrifice_slots:
|
||||
if player.board[slot] is not None:
|
||||
@@ -189,11 +188,11 @@ def _init_worker(cards: list[Card]) -> None:
|
||||
def _run_game_sync(args: tuple) -> str | None:
|
||||
"""Synchronous entry point for a worker process."""
|
||||
d1, p1_name, d2, p2_name = args
|
||||
return asyncio.run(simulate_game(
|
||||
return simulate_game(
|
||||
_worker_cards,
|
||||
d1, AIPersonality(p1_name),
|
||||
d2, AIPersonality(p2_name),
|
||||
))
|
||||
)
|
||||
|
||||
|
||||
# ==================== Tournament ====================
|
||||
@@ -290,26 +289,188 @@ async def run_tournament(
|
||||
return wins
|
||||
|
||||
|
||||
def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
|
||||
"""
|
||||
Return True when the SPRT has reached a decision for this matchup.
|
||||
|
||||
Tests H0: win_rate = 0.5 vs H1: win_rate = p_decisive (or 1-p_decisive).
|
||||
log_win = log(p_decisive / 0.5)
|
||||
log_loss = log((1 - p_decisive) / 0.5)
|
||||
|
||||
LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
|
||||
Decided when LLR crosses ±log_B.
|
||||
"""
|
||||
llr = wins * log_win + (total - wins) * log_loss
|
||||
return llr >= log_B or llr <= -log_B
|
||||
|
||||
|
||||
async def run_tournament_adaptive(
|
||||
cards: list[Card],
|
||||
difficulties: list[int] | None = None,
|
||||
min_games: int = 5,
|
||||
max_games: int = 200,
|
||||
p_decisive: float = 0.65,
|
||||
alpha: float = 0.05,
|
||||
) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
|
||||
"""
|
||||
Like run_tournament but allocates games adaptively.
|
||||
|
||||
Each ordered pair (i, j) plays until SPRT decides one player is dominant
|
||||
(win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
|
||||
Close matchups play more games; lopsided ones stop early.
|
||||
|
||||
Returns (wins, played):
|
||||
wins[(i, j)] — how many games player i won as first player against j
|
||||
played[(i, j)] — how many games were played for that pair
|
||||
|
||||
Each round, all currently-undecided pairs play one game in parallel across
|
||||
all CPU cores, preserving full parallelism while adapting per-pair budgets.
|
||||
"""
|
||||
players = _all_players(difficulties)
|
||||
n = len(players)
|
||||
all_pairs = [(i, j) for i in range(n) for j in range(n)]
|
||||
|
||||
wins: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
|
||||
played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
|
||||
decided: set[tuple[int, int]] = set()
|
||||
|
||||
# Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
|
||||
log_B = math.log((1 - alpha) / alpha)
|
||||
log_win = math.log(p_decisive / 0.5)
|
||||
log_loss = math.log((1 - p_decisive) / 0.5)
|
||||
|
||||
def make_args(i: int, j: int) -> tuple:
|
||||
p1, d1 = players[i]
|
||||
p2, d2 = players[j]
|
||||
return (d1, p1.value, d2, p2.value)
|
||||
|
||||
n_workers = os.cpu_count() or 1
|
||||
loop = asyncio.get_running_loop()
|
||||
total_played = 0
|
||||
max_possible = len(all_pairs) * max_games
|
||||
|
||||
print(
|
||||
f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
|
||||
f"SPRT p_decisive={p_decisive} alpha={alpha}, "
|
||||
f"min={min_games} max={max_games} games/pair\n"
|
||||
f"Worst case: {max_possible:,} games across {n_workers} workers"
|
||||
)
|
||||
|
||||
with ProcessPoolExecutor(
|
||||
max_workers=n_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(cards,),
|
||||
) as executor:
|
||||
round_num = 0
|
||||
while True:
|
||||
pending = [
|
||||
pair for pair in all_pairs
|
||||
if pair not in decided and played[pair] < max_games
|
||||
]
|
||||
if not pending:
|
||||
break
|
||||
|
||||
round_num += 1
|
||||
batch = [(i, j, make_args(i, j)) for (i, j) in pending]
|
||||
futures = [
|
||||
loop.run_in_executor(executor, _run_game_sync, args)
|
||||
for _, _, args in batch
|
||||
]
|
||||
results = await asyncio.gather(*futures)
|
||||
|
||||
newly_decided = 0
|
||||
for (i, j, _), winner in zip(batch, results):
|
||||
played[(i, j)] += 1
|
||||
if winner == PLAYER1_ID:
|
||||
wins[(i, j)] += 1
|
||||
total_played += 1
|
||||
|
||||
if (played[(i, j)] >= min_games
|
||||
and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
|
||||
decided.add((i, j))
|
||||
newly_decided += 1
|
||||
|
||||
remaining = len(all_pairs) - len(decided)
|
||||
pct = total_played / max_possible * 100
|
||||
print(
|
||||
f" Round {round_num:3d}: {len(pending):5d} games, "
|
||||
f"+{newly_decided:4d} decided, "
|
||||
f"{remaining:5d} pairs left, "
|
||||
f"{total_played:,} total ({pct:.1f}% of worst case)",
|
||||
end="\r", flush=True,
|
||||
)
|
||||
|
||||
savings = max_possible - total_played
|
||||
print(
|
||||
f"\nFinished: {total_played:,} games played "
|
||||
f"(saved {savings:,} vs fixed, "
|
||||
f"{savings / max_possible * 100:.1f}% reduction)"
|
||||
)
|
||||
print(
|
||||
f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
|
||||
f"({len(decided) / len(all_pairs) * 100:.1f}%)"
|
||||
)
|
||||
|
||||
return wins, played
|
||||
|
||||
|
||||
def compute_bradley_terry(
|
||||
wins: dict[tuple[int, int], int],
|
||||
n: int,
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
iterations: int = 1000,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Compute Bradley-Terry strength parameters for all n players.
|
||||
|
||||
For each pair (i, j): w_ij wins for i, w_ji wins for j.
|
||||
Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))
|
||||
|
||||
Returns a list of strength values indexed by player. Unlike Elo, this is
|
||||
path-independent and converges to a unique maximum-likelihood solution.
|
||||
"""
|
||||
w: list[list[int]] = [[0] * n for _ in range(n)]
|
||||
for (i, j), p1_wins in wins.items():
|
||||
g = played[(i, j)] if played is not None else games_per_matchup
|
||||
if g:
|
||||
w[i][j] += p1_wins
|
||||
w[j][i] += g - p1_wins
|
||||
|
||||
strength = [1.0] * n
|
||||
for _ in range(iterations):
|
||||
new_strength = [0.0] * n
|
||||
for i in range(n):
|
||||
wins_i = sum(w[i][j] for j in range(n) if j != i)
|
||||
denom = sum(
|
||||
(w[i][j] + w[j][i]) / (strength[i] + strength[j])
|
||||
for j in range(n)
|
||||
if j != i and (w[i][j] + w[j][i]) > 0
|
||||
)
|
||||
new_strength[i] = wins_i / denom if denom > 0 else strength[i]
|
||||
# Normalize so the mean stays at 1.0
|
||||
mean = sum(new_strength) / n
|
||||
strength = [s / mean for s in new_strength]
|
||||
|
||||
return strength
|
||||
|
||||
|
||||
def rank_players(
|
||||
wins: dict[tuple[int, int], int],
|
||||
games_per_matchup: int,
|
||||
players: list[tuple[AIPersonality, int]],
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Rank player indices by total wins (as first + second player combined).
|
||||
Returns indices sorted worst-to-best.
|
||||
Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.
|
||||
|
||||
Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
|
||||
"""
|
||||
n = len(players)
|
||||
total_wins = [0] * n
|
||||
if played is None and games_per_matchup is None:
|
||||
raise ValueError("Provide either played or games_per_matchup")
|
||||
|
||||
for (i, j), p1_wins in wins.items():
|
||||
if i == j:
|
||||
continue # self-matchups are symmetric; skip to avoid double-counting
|
||||
p2_wins = games_per_matchup - p1_wins
|
||||
total_wins[i] += p1_wins
|
||||
total_wins[j] += p2_wins
|
||||
|
||||
return sorted(range(n), key=lambda k: total_wins[k])
|
||||
ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
|
||||
return sorted(range(len(players)), key=lambda i: ratings[i])
|
||||
|
||||
|
||||
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
|
||||
@@ -317,43 +478,63 @@ TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_re
|
||||
|
||||
def save_tournament(
|
||||
wins: dict[tuple[int, int], int],
|
||||
games_per_matchup: int,
|
||||
players: list[tuple[AIPersonality, int]],
|
||||
path: str = TOURNAMENT_RESULTS_PATH,
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
):
|
||||
data = {
|
||||
"games_per_matchup": games_per_matchup,
|
||||
"players": [
|
||||
{"personality": p.value, "difficulty": d}
|
||||
for p, d in players
|
||||
],
|
||||
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
|
||||
}
|
||||
if played is not None:
|
||||
data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
|
||||
if games_per_matchup is not None:
|
||||
data["games_per_matchup"] = games_per_matchup
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
print(f"Tournament results saved to {path}")
|
||||
|
||||
|
||||
def load_tournament(path: str = TOURNAMENT_RESULTS_PATH) -> tuple[dict[tuple[int, int], int], int, list[tuple[AIPersonality, int]]]:
|
||||
"""Returns (wins, games_per_matchup, players)."""
|
||||
def load_tournament(
|
||||
path: str = TOURNAMENT_RESULTS_PATH,
|
||||
) -> tuple[
|
||||
dict[tuple[int, int], int],
|
||||
dict[tuple[int, int], int] | None,
|
||||
int | None,
|
||||
list[tuple[AIPersonality, int]],
|
||||
]:
|
||||
"""Returns (wins, played, games_per_matchup, players).
|
||||
|
||||
`played` is None for legacy fixed-game files (use games_per_matchup instead).
|
||||
`games_per_matchup` is None for adaptive files (use played instead).
|
||||
"""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
wins = {
|
||||
(int(k.split(",")[0]), int(k.split(",")[1])): v
|
||||
for k, v in data["wins"].items()
|
||||
}
|
||||
|
||||
def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
|
||||
return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}
|
||||
|
||||
wins = parse_pair_dict(data["wins"])
|
||||
played = parse_pair_dict(data["played"]) if "played" in data else None
|
||||
games_per_matchup = data.get("games_per_matchup")
|
||||
players = [
|
||||
(AIPersonality(p["personality"]), p["difficulty"])
|
||||
for p in data["players"]
|
||||
]
|
||||
return wins, data["games_per_matchup"], players
|
||||
return wins, played, games_per_matchup, players
|
||||
|
||||
|
||||
def draw_grid(
|
||||
wins: dict[tuple[int, int], int],
|
||||
games_per_matchup: int = 5,
|
||||
players: list[tuple[AIPersonality, int]] | None = None,
|
||||
output_path: str = "tournament_grid.png",
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
ranked: list[int] | None = None,
|
||||
):
|
||||
"""
|
||||
Draw a heatmap grid of tournament results.
|
||||
@@ -370,19 +551,28 @@ def draw_grid(
|
||||
import matplotlib.colors as mcolors
|
||||
import numpy as np
|
||||
|
||||
if played is None and games_per_matchup is None:
|
||||
raise ValueError("Provide either played or games_per_matchup")
|
||||
|
||||
if players is None:
|
||||
players = _all_players()
|
||||
n = len(players)
|
||||
ranked = rank_players(wins, games_per_matchup, players) # worst-to-best indices
|
||||
if ranked is None:
|
||||
ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)
|
||||
|
||||
labels = [_player_label(*players[i]) for i in ranked]
|
||||
|
||||
# Build value matrix: (p1_wins - p2_wins) / games_per_matchup ∈ [-1, 1], NaN on diagonal
|
||||
def games(i: int, j: int) -> int:
|
||||
return_value = played[(i, j)] if played is not None else games_per_matchup
|
||||
return return_value if return_value is not None else 0
|
||||
|
||||
# Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
|
||||
matrix = np.full((n, n), np.nan)
|
||||
for row, i in enumerate(ranked):
|
||||
for col, j in enumerate(ranked):
|
||||
g = games(i, j)
|
||||
p1_wins = wins.get((i, j), 0)
|
||||
matrix[row, col] = (p1_wins - (games_per_matchup - p1_wins)) / games_per_matchup
|
||||
matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0
|
||||
|
||||
cell_size = 0.22
|
||||
fig_size = n * cell_size + 3
|
||||
@@ -398,8 +588,9 @@ def draw_grid(
|
||||
# × marks for sweeps
|
||||
for row, i in enumerate(ranked):
|
||||
for col, j in enumerate(ranked):
|
||||
g = games(i, j)
|
||||
p1_wins = wins.get((i, j), 0)
|
||||
if p1_wins == games_per_matchup or p1_wins == 0:
|
||||
if p1_wins == g or p1_wins == 0:
|
||||
ax.text(col, row, "×", ha="center", va="center",
|
||||
fontsize=5, color="black", fontweight="bold", zorder=3)
|
||||
|
||||
@@ -427,14 +618,26 @@ def draw_grid(
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
GAMES_PER_MATCHUP = 10
|
||||
|
||||
difficulties = list(range(6, 11))
|
||||
difficulties = list(range(7, 11))
|
||||
|
||||
card_pool = get_simulation_cards()
|
||||
players = _all_players(difficulties)
|
||||
wins = asyncio.run(run_tournament(card_pool, games_per_matchup=GAMES_PER_MATCHUP, difficulties=difficulties))
|
||||
save_tournament(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)
|
||||
draw_grid(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)
|
||||
wins, played = asyncio.run(run_tournament_adaptive(
|
||||
card_pool,
|
||||
difficulties=difficulties,
|
||||
min_games=20,
|
||||
max_games=1000,
|
||||
p_decisive=0.65,
|
||||
alpha=0.05,
|
||||
))
|
||||
save_tournament(wins, players=players, played=played)
|
||||
|
||||
ratings = compute_bradley_terry(wins, len(players), played=played)
|
||||
ranked = sorted(range(len(players)), key=lambda i: ratings[i]) # worst-to-best
|
||||
draw_grid(wins, players=players, played=played, ranked=ranked)
|
||||
|
||||
print("\nFinal Elo ratings (best to worst):")
|
||||
for rank, i in enumerate(reversed(ranked), 1):
|
||||
personality, difficulty = players[i]
|
||||
label = _player_label(personality, difficulty)
|
||||
print(f" {rank:2d}. {label:<12} {ratings[i]:.1f}")
|
||||
|
||||
Reference in New Issue
Block a user