🐐
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -2,3 +2,6 @@
|
|||||||
__pycache__/
|
__pycache__/
|
||||||
.svelte-kit/
|
.svelte-kit/
|
||||||
.env
|
.env
|
||||||
|
|
||||||
|
backend/simulation_cards.json
|
||||||
|
backend/tournament_grid.png
|
||||||
463
backend/ai.py
463
backend/ai.py
@@ -3,9 +3,10 @@ import random
|
|||||||
import logging
|
import logging
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from enum import Enum
|
from enum import Enum
|
||||||
from itertools import combinations
|
from itertools import combinations, permutations
|
||||||
|
import numpy as np
|
||||||
from card import Card
|
from card import Card
|
||||||
from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE
|
from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE, PlayerState
|
||||||
|
|
||||||
logger = logging.getLogger("app")
|
logger = logging.getLogger("app")
|
||||||
|
|
||||||
@@ -18,6 +19,7 @@ class AIPersonality(Enum):
|
|||||||
GREEDY = "greedy" # Prioritizes high cost cards, willing to sacrifice
|
GREEDY = "greedy" # Prioritizes high cost cards, willing to sacrifice
|
||||||
SWARM = "swarm" # Prefers low cost cards, fills board quickly
|
SWARM = "swarm" # Prefers low cost cards, fills board quickly
|
||||||
CONTROL = "control" # Focuses on board control and efficiency
|
CONTROL = "control" # Focuses on board control and efficiency
|
||||||
|
SHOCKER = "shocker" # Cheap high-defense walls + a few powerful high-attack finishers
|
||||||
ARBITRARY = "arbitrary" # Just does whatever
|
ARBITRARY = "arbitrary" # Just does whatever
|
||||||
|
|
||||||
def get_random_personality() -> AIPersonality:
|
def get_random_personality() -> AIPersonality:
|
||||||
@@ -40,78 +42,70 @@ def get_power_curve_value(card) -> float:
|
|||||||
def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
|
def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
|
||||||
BUDGET = 50
|
BUDGET = 50
|
||||||
|
|
||||||
logger.info(f"Personality: {personality.value}")
|
|
||||||
logger.info(f"Difficulty: {difficulty}")
|
|
||||||
card_strings = [
|
|
||||||
f"{c.name} {c.cost}"
|
|
||||||
for c in sorted(cards, key=lambda x: x.cost)[::-1][:20]
|
|
||||||
]
|
|
||||||
logger.info("Cards:\n"+("\n".join(card_strings)))
|
|
||||||
|
|
||||||
# God cards (cost 7-11) are gated by difficulty. Below difficulty 7 they are excluded.
|
|
||||||
# Each level from 7 upward unlocks a higher cost tier; at difficulty 10 all are allowed.
|
|
||||||
if difficulty >= 6:
|
if difficulty >= 6:
|
||||||
max_card_cost = difficulty+1
|
max_card_cost = difficulty + 1
|
||||||
else:
|
else:
|
||||||
max_card_cost = 6
|
max_card_cost = 6
|
||||||
|
|
||||||
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
|
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
|
||||||
|
|
||||||
def card_score(card: Card) -> float:
|
# Vectorized scoring over all allowed cards at once
|
||||||
pcv = get_power_curve_value(card)
|
atk = np.array([c.attack for c in allowed], dtype=np.float32)
|
||||||
# Normalize pcv to [0, 1].
|
defn = np.array([c.defense for c in allowed], dtype=np.float32)
|
||||||
pcv_norm = max(0.0, min(1.0, pcv))
|
cost = np.array([c.cost for c in allowed], dtype=np.float32)
|
||||||
|
|
||||||
cost_norm = card.cost / max_card_cost # [0, 1]; higher = more expensive
|
exact_cost = np.minimum(11.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
|
||||||
total = card.attack + card.defense
|
pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
|
||||||
atk_ratio = card.attack / total if total else 0.5
|
cost_norm = cost / max_card_cost
|
||||||
|
totals = atk + defn
|
||||||
|
atk_ratio = np.where(totals > 0, atk / totals, 0.5)
|
||||||
|
def_not_one = np.where(defn != 1, 1.0, 0.0)
|
||||||
|
|
||||||
if personality == AIPersonality.AGGRESSIVE:
|
if personality == AIPersonality.AGGRESSIVE:
|
||||||
# Prefers high-attack cards; slight bias toward high cost for raw power
|
# (1-cost_norm) penalizes expensive cards. High-attack cards are inherently expensive,
|
||||||
return 0.50 * atk_ratio + 0.30 * pcv_norm + 0.20 * cost_norm
|
# so without this the second pass drifts toward costly cards at higher difficulty,
|
||||||
|
# shrinking the deck. The bonus grows with max_card_cost and exactly offsets that drift.
|
||||||
|
scores = 0.50 * atk_ratio + 0.35 * pcv_norm + 0.15 * (1.0 - cost_norm) + 0.10 * def_not_one
|
||||||
|
elif personality == AIPersonality.DEFENSIVE:
|
||||||
|
# Small (1-cost_norm) for the same anti-shrinkage reason; lighter because high-defense
|
||||||
|
# cards don't correlate as strongly with cost as high-attack cards do.
|
||||||
|
scores = 0.10 * (1.0 - atk_ratio) + 0.80 * pcv_norm + 0.10 * cost_norm
|
||||||
|
elif personality == AIPersonality.GREEDY:
|
||||||
|
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
|
||||||
|
scores = 0.20 * cost_norm + 0.80 * pcv_norm
|
||||||
|
elif personality == AIPersonality.SWARM:
|
||||||
|
scores = 0.40 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm + 0.05 * def_not_one
|
||||||
|
elif personality == AIPersonality.CONTROL:
|
||||||
|
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
|
||||||
|
scores = 0.85 * pcv_norm + 0.15 * cost_norm
|
||||||
|
elif personality == AIPersonality.BALANCED:
|
||||||
|
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
|
||||||
|
elif personality == AIPersonality.SHOCKER:
|
||||||
|
# Both cheap walls and expensive finishers want high attack.
|
||||||
|
# (1-cost_norm) drives first-pass cheap-card selection; pcv_norm drives second-pass finishers.
|
||||||
|
# defense_ok zeros out cards with defense==1 on the first term so fragile walls are excluded.
|
||||||
|
# cost-11 cards have pcv=0 so they score near-zero and never shrink the deck.
|
||||||
|
scores = atk_ratio * (1.0 - cost_norm) * def_not_one + atk_ratio * pcv_norm
|
||||||
|
else: # ARBITRARY
|
||||||
|
w = 0.05 * difficulty
|
||||||
|
scores = w * pcv_norm + (1.0 - w) * np.random.random(len(allowed)).astype(np.float32)
|
||||||
|
|
||||||
if personality == AIPersonality.DEFENSIVE:
|
# Small noise floor at D10 prevents fully deterministic deck building.
|
||||||
# Prefers high-defense cards; same cost bias
|
# A locked-in deck loses every game against counters; tiny randomness avoids this.
|
||||||
return 0.50 * (1.0 - atk_ratio) + 0.30 * pcv_norm + 0.20 * cost_norm
|
noise = max(0.03, (10 - difficulty) / 9.0) * 0.50
|
||||||
|
scores = scores + np.random.normal(0, noise, len(allowed)).astype(np.float32)
|
||||||
|
|
||||||
if personality == AIPersonality.GREEDY:
|
order = np.argsort(-scores)
|
||||||
# Fills budget with the fewest, most expensive cards possible
|
sorted_cards = [allowed[i] for i in order]
|
||||||
return 0.70 * cost_norm + 0.30 * pcv_norm
|
|
||||||
|
|
||||||
if personality == AIPersonality.SWARM:
|
|
||||||
# Cheap cards
|
|
||||||
return 0.45 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm
|
|
||||||
|
|
||||||
if personality == AIPersonality.CONTROL:
|
|
||||||
# Values efficiency above all: wants cards that are above the power curve,
|
|
||||||
# with a secondary preference for higher cost
|
|
||||||
return 0.70 * pcv_norm + 0.30 * cost_norm
|
|
||||||
|
|
||||||
if personality == AIPersonality.BALANCED:
|
|
||||||
# Blends everything: efficiency, cost spread, and a slight attack lean
|
|
||||||
return 0.40 * pcv_norm + 0.35 * cost_norm + 0.15 * atk_ratio + 0.10 * (1.0 - atk_ratio)
|
|
||||||
|
|
||||||
# ARBITRARY: mostly random at lower difficulties
|
|
||||||
return (0.05 * difficulty) * pcv_norm + (1 - (0.05 * difficulty)) * random.random()
|
|
||||||
|
|
||||||
# Higher difficulty -> less noise -> more optimal deck composition
|
|
||||||
noise = ((10 - difficulty) / 9.0) * 0.50
|
|
||||||
|
|
||||||
scored = sorted(
|
|
||||||
[(card_score(c) + random.gauss(0, noise), c) for c in allowed],
|
|
||||||
key=lambda x: x[0],
|
|
||||||
reverse=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Minimum budget reserved for cheap (cost 1-3) cards to ensure early-game presence.
|
|
||||||
# Without cheap cards the AI will play nothing for the first several turns.
|
|
||||||
early_budget = {
|
early_budget = {
|
||||||
AIPersonality.GREEDY: 4,
|
AIPersonality.GREEDY: 20, # cheap cards are sacrifice fodder for big plays
|
||||||
AIPersonality.SWARM: 12,
|
AIPersonality.SWARM: 12,
|
||||||
AIPersonality.AGGRESSIVE: 8,
|
AIPersonality.AGGRESSIVE: 18, # raised: ensures cheap high-attack fodder regardless of difficulty
|
||||||
AIPersonality.DEFENSIVE: 10,
|
AIPersonality.DEFENSIVE: 15, # raised: stable cheap-card base across difficulty levels
|
||||||
AIPersonality.CONTROL: 8,
|
AIPersonality.CONTROL: 8,
|
||||||
AIPersonality.BALANCED: 10,
|
AIPersonality.BALANCED: 25, # spread the deck across all cost levels
|
||||||
|
AIPersonality.SHOCKER: 15, # ~15 cost-1 shields, then expensive attackers fill remaining budget
|
||||||
AIPersonality.ARBITRARY: 8,
|
AIPersonality.ARBITRARY: 8,
|
||||||
}[personality]
|
}[personality]
|
||||||
|
|
||||||
@@ -120,7 +114,7 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
|
|||||||
|
|
||||||
# First pass: secure early-game cards
|
# First pass: secure early-game cards
|
||||||
cheap_spent = 0
|
cheap_spent = 0
|
||||||
for _, card in scored:
|
for card in sorted_cards:
|
||||||
if cheap_spent >= early_budget:
|
if cheap_spent >= early_budget:
|
||||||
break
|
break
|
||||||
if card.cost > 3 or total_cost + card.cost > BUDGET:
|
if card.cost > 3 or total_cost + card.cost > BUDGET:
|
||||||
@@ -131,7 +125,7 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
|
|||||||
|
|
||||||
# Second pass: fill remaining budget greedily by score
|
# Second pass: fill remaining budget greedily by score
|
||||||
taken = {id(c) for c in selected}
|
taken = {id(c) for c in selected}
|
||||||
for _, card in scored:
|
for card in sorted_cards:
|
||||||
if total_cost >= BUDGET:
|
if total_cost >= BUDGET:
|
||||||
break
|
break
|
||||||
if id(card) in taken or total_cost + card.cost > BUDGET:
|
if id(card) in taken or total_cost + card.cost > BUDGET:
|
||||||
@@ -139,13 +133,6 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
|
|||||||
selected.append(card)
|
selected.append(card)
|
||||||
total_cost += card.cost
|
total_cost += card.cost
|
||||||
|
|
||||||
|
|
||||||
card_strings = [
|
|
||||||
f"{c.name} {c.cost}"
|
|
||||||
for c in sorted(selected, key=lambda x: x.cost)
|
|
||||||
]
|
|
||||||
logger.info("Selected:\n"+("\n".join(card_strings)))
|
|
||||||
|
|
||||||
return selected
|
return selected
|
||||||
|
|
||||||
|
|
||||||
@@ -182,13 +169,6 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):
|
|||||||
empty_slots = [i for i, c in enumerate(board) if c is None]
|
empty_slots = [i for i, c in enumerate(board) if c is None]
|
||||||
en_board = opponent.board
|
en_board = opponent.board
|
||||||
|
|
||||||
# For scoring: open enemy slots first so the simulation reflects
|
|
||||||
# direct-damage potential accurately.
|
|
||||||
scoring_slots = (
|
|
||||||
[s for s in empty_slots if en_board[s] is None] +
|
|
||||||
[s for s in empty_slots if en_board[s] is not None]
|
|
||||||
)
|
|
||||||
|
|
||||||
return [
|
return [
|
||||||
MovePlan(
|
MovePlan(
|
||||||
sacrifice_slots=list(sacrifice_slots),
|
sacrifice_slots=list(sacrifice_slots),
|
||||||
@@ -196,6 +176,7 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):
|
|||||||
label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
|
label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
|
||||||
)
|
)
|
||||||
for cards in _affordable_subsets(hand, energy)
|
for cards in _affordable_subsets(hand, energy)
|
||||||
|
for scoring_slots in permutations(empty_slots, len(cards))
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@@ -214,230 +195,152 @@ def generate_plans(player, opponent) -> list[MovePlan]:
|
|||||||
|
|
||||||
return plans
|
return plans
|
||||||
|
|
||||||
|
# ==================== Turn execution ====================
|
||||||
|
|
||||||
def score_plan(plan: MovePlan, player, opponent, personality: AIPersonality) -> float:
|
def score_plans_batch(
|
||||||
"""
|
plans: list[MovePlan],
|
||||||
Score a plan from ~0.0 to ~1.0 based on the projected board state after
|
player: PlayerState,
|
||||||
executing it. Higher is better.
|
opponent: PlayerState,
|
||||||
"""
|
personality: AIPersonality,
|
||||||
# Simulate board after sacrifices + plays
|
) -> np.ndarray:
|
||||||
|
n = len(plans)
|
||||||
|
|
||||||
|
# Pre-compute PCV for every hand card once
|
||||||
|
pcv_cache = {
|
||||||
|
id(c): max(0.0, min(1.0, get_power_curve_value(c)))
|
||||||
|
for c in player.hand
|
||||||
|
}
|
||||||
|
|
||||||
|
# Build board-state arrays with one Python loop (unavoidable)
|
||||||
|
board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
|
||||||
|
board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
|
||||||
|
n_sac = np.zeros(n, dtype=np.float32)
|
||||||
|
sac_val = np.zeros(n, dtype=np.float32)
|
||||||
|
play_val = np.zeros(n, dtype=np.float32)
|
||||||
|
pcv_score = np.full(n, 0.5, dtype=np.float32)
|
||||||
|
|
||||||
|
for idx, plan in enumerate(plans):
|
||||||
board = list(player.board)
|
board = list(player.board)
|
||||||
energy = player.energy
|
|
||||||
for slot in plan.sacrifice_slots:
|
for slot in plan.sacrifice_slots:
|
||||||
if board[slot] is not None:
|
board_slot = board[slot]
|
||||||
|
if board_slot is not None:
|
||||||
|
sac_val[idx] += board_slot.cost
|
||||||
board[slot] = None
|
board[slot] = None
|
||||||
energy += 1
|
n_sac[idx] = len(plan.sacrifice_slots)
|
||||||
for card, slot in plan.plays:
|
for card, slot in plan.plays:
|
||||||
board[slot] = card
|
board[slot] = card
|
||||||
|
play_val[idx] += card.cost
|
||||||
en_board = opponent.board
|
|
||||||
enemy_occupied = sum(1 for c in en_board if c is not None)
|
|
||||||
|
|
||||||
# --- Combat metrics ---
|
|
||||||
direct_damage = 0 # AI attacks going straight to opponent life
|
|
||||||
board_damage = 0 # AI attacks hitting enemy cards
|
|
||||||
blocking_slots = 0 # Slots where AI blocks an enemy card
|
|
||||||
cards_destroyed = 0 # Enemy cards the AI would destroy this turn
|
|
||||||
unblocked_incoming = 0 # Enemy attacks that go straight to AI life
|
|
||||||
cards_on_board = 0
|
|
||||||
|
|
||||||
for slot in range(BOARD_SIZE):
|
for slot in range(BOARD_SIZE):
|
||||||
my = board[slot]
|
board_slot = board[slot]
|
||||||
en = en_board[slot]
|
if board_slot is not None:
|
||||||
if my:
|
board_atk[idx, slot] = board_slot.attack
|
||||||
cards_on_board += 1
|
board_occ[idx, slot] = True
|
||||||
if my and en is None:
|
|
||||||
direct_damage += my.attack
|
|
||||||
if my and en:
|
|
||||||
board_damage += my.attack
|
|
||||||
blocking_slots += 1
|
|
||||||
if my.attack >= en.defense:
|
|
||||||
cards_destroyed += 1
|
|
||||||
if not my and en:
|
|
||||||
unblocked_incoming += en.attack
|
|
||||||
|
|
||||||
# --- Normalize to [0, 1] ---
|
|
||||||
# How threatening is the attack relative to what remains of opponent's life?
|
|
||||||
atk_score = min(1.0, direct_damage / max(opponent.life, 1))
|
|
||||||
|
|
||||||
# What fraction of enemy slots are blocked?
|
|
||||||
block_score = (blocking_slots / enemy_occupied) if enemy_occupied > 0 else 1.0
|
|
||||||
|
|
||||||
# What fraction of all slots are filled?
|
|
||||||
cover_score = cards_on_board / BOARD_SIZE
|
|
||||||
|
|
||||||
# What fraction of enemy cards do are destroyed?
|
|
||||||
destroy_score = (cards_destroyed / enemy_occupied) if enemy_occupied > 0 else 0.0
|
|
||||||
|
|
||||||
# How safe is the AI from unblocked hits relative to its own life?
|
|
||||||
threat_score = 1.0 - min(1.0, unblocked_incoming / max(player.life, 1))
|
|
||||||
|
|
||||||
# How many cards compared to the enemy?
|
|
||||||
opponent_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
|
|
||||||
my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
|
|
||||||
attrition_score = my_cards_left/(my_cards_left + opponent_cards_left)
|
|
||||||
|
|
||||||
# Net value: cost of cards played minus cost of cards sacrificed.
|
|
||||||
n_sac = len(plan.sacrifice_slots)
|
|
||||||
sac_value = sum(player.board[s].cost for s in plan.sacrifice_slots if player.board[s] is not None)
|
|
||||||
play_value = sum(c.cost for c, _ in plan.plays)
|
|
||||||
net_value = play_value - sac_value
|
|
||||||
net_value_norm = max(0.0, min(1.0, (net_value + 10) / 20))
|
|
||||||
|
|
||||||
# Sacrifice penalty. Applied as a flat deduction after personality scoring.
|
|
||||||
sacrifice_penalty = 0.0
|
|
||||||
if n_sac > 0:
|
|
||||||
# Penalty 1: wasted energy. Each sacrifice gives +1 energy; if that energy
|
|
||||||
# goes unspent it was pointless. Weighted heavily.
|
|
||||||
energy_leftover = player.energy + n_sac - play_value
|
|
||||||
wasted_sac_energy = max(0, min(n_sac, energy_leftover))
|
|
||||||
wasted_penalty = wasted_sac_energy / n_sac
|
|
||||||
|
|
||||||
# Penalty 2: low-value swap. Each sacrifice should at minimum unlock a card
|
|
||||||
# that costs more than the one removed (net_value > n_sac means each
|
|
||||||
# sacrifice bought at least one extra cost point). Anything less is a bad trade.
|
|
||||||
swap_penalty = max(0.0, min(1.0, (n_sac - net_value) / max(n_sac, 1)))
|
|
||||||
|
|
||||||
sacrifice_penalty = 0.65 * wasted_penalty + 0.35 * swap_penalty
|
|
||||||
|
|
||||||
# Power curve value of the cards played (are they good value for their cost?)
|
|
||||||
if plan.plays:
|
if plan.plays:
|
||||||
pcv_scores = [max(0.0, min(1.0, get_power_curve_value(c))) for c, _ in plan.plays]
|
pcv_vals = [pcv_cache.get(id(c), 0.5) for c, _ in plan.plays]
|
||||||
pcv_score = sum(pcv_scores) / len(pcv_scores)
|
pcv_score[idx] = sum(pcv_vals) / len(pcv_vals)
|
||||||
else:
|
|
||||||
pcv_score = 0.5
|
# Enemy board — same for every plan
|
||||||
|
en_atk = np.array([c.attack if c else 0 for c in opponent.board], dtype=np.float32)
|
||||||
|
en_def = np.array([c.defense if c else 0 for c in opponent.board], dtype=np.float32)
|
||||||
|
en_occ = np.array([c is not None for c in opponent.board], dtype=np.bool_)
|
||||||
|
enemy_occupied = int(en_occ.sum())
|
||||||
|
|
||||||
|
# --- Metrics (all shape (n,)) ---
|
||||||
|
direct_damage = (board_atk * ~en_occ).sum(axis=1)
|
||||||
|
blocking = board_occ & en_occ # (n, 5)
|
||||||
|
blocking_slots = blocking.sum(axis=1).astype(np.float32)
|
||||||
|
cards_on_board = board_occ.sum(axis=1).astype(np.float32)
|
||||||
|
cards_destroyed = ((board_atk >= en_def) & blocking).sum(axis=1).astype(np.float32)
|
||||||
|
unblocked_in = (en_atk * ~board_occ).sum(axis=1)
|
||||||
|
|
||||||
|
atk_score = np.minimum(1.0, direct_damage / max(opponent.life, 1))
|
||||||
|
block_score = blocking_slots / enemy_occupied if enemy_occupied > 0 else np.ones(n, dtype=np.float32)
|
||||||
|
open_slots = BOARD_SIZE - enemy_occupied
|
||||||
|
cover_score = (
|
||||||
|
(cards_on_board - blocking_slots) / open_slots
|
||||||
|
if open_slots > 0
|
||||||
|
else np.ones(n, dtype=np.float32)
|
||||||
|
)
|
||||||
|
destroy_score = cards_destroyed / enemy_occupied if enemy_occupied > 0 else np.zeros(n, dtype=np.float32)
|
||||||
|
threat_score = 1.0 - np.minimum(1.0, unblocked_in / max(player.life, 1))
|
||||||
|
|
||||||
|
opp_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
|
||||||
|
my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
|
||||||
|
attrition_score = my_cards_left / (my_cards_left + max(opp_cards_left, 1))
|
||||||
|
|
||||||
|
net_value = play_val - sac_val
|
||||||
|
net_value_norm = np.clip((net_value + 10) / 20, 0.0, 1.0)
|
||||||
|
|
||||||
|
# --- Sacrifice penalty ---
|
||||||
|
energy_leftover = player.energy + n_sac - play_val
|
||||||
|
wasted_energy = np.maximum(0, np.minimum(n_sac, energy_leftover))
|
||||||
|
wasted_penalty = np.where(n_sac > 0, wasted_energy / np.maximum(n_sac, 1), 0.0)
|
||||||
|
swap_penalty = np.clip((n_sac - net_value) / np.maximum(n_sac, 1), 0.0, 1.0)
|
||||||
|
sac_penalty = np.where(n_sac > 0, 0.65 * wasted_penalty + 0.35 * swap_penalty, 0.0)
|
||||||
|
|
||||||
# --- Personality weights ---
|
# --- Personality weights ---
|
||||||
if personality == AIPersonality.AGGRESSIVE:
|
if personality == AIPersonality.AGGRESSIVE:
|
||||||
# Maximize direct damage
|
score = (0.30 * atk_score + 0.07 * block_score + 0.15 * cover_score +
|
||||||
score = (
|
0.08 * net_value_norm + 0.25 * destroy_score +
|
||||||
0.40 * atk_score +
|
0.08 * attrition_score + 0.04 * pcv_score + 0.03 * threat_score)
|
||||||
0.10 * block_score +
|
|
||||||
0.10 * cover_score +
|
|
||||||
0.10 * net_value_norm +
|
|
||||||
0.15 * destroy_score +
|
|
||||||
0.05 * attrition_score +
|
|
||||||
0.05 * pcv_score +
|
|
||||||
0.05 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
elif personality == AIPersonality.DEFENSIVE:
|
elif personality == AIPersonality.DEFENSIVE:
|
||||||
# Block everything
|
score = (0.12 * atk_score + 0.20 * block_score + 0.18 * cover_score +
|
||||||
score = (
|
0.04 * net_value_norm + 0.18 * destroy_score +
|
||||||
0.05 * atk_score +
|
0.15 * attrition_score + 0.05 * pcv_score + 0.08 * threat_score)
|
||||||
0.35 * block_score +
|
|
||||||
0.20 * cover_score +
|
|
||||||
0.05 * net_value_norm +
|
|
||||||
0.05 * destroy_score +
|
|
||||||
0.10 * attrition_score +
|
|
||||||
0.05 * pcv_score +
|
|
||||||
0.15 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
elif personality == AIPersonality.SWARM:
|
elif personality == AIPersonality.SWARM:
|
||||||
# Fill the board and press with direct damage
|
score = (0.25 * atk_score + 0.10 * block_score + 0.35 * cover_score +
|
||||||
score = (
|
0.05 * net_value_norm + 0.05 * destroy_score +
|
||||||
0.25 * atk_score +
|
0.10 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
|
||||||
0.10 * block_score +
|
|
||||||
0.35 * cover_score +
|
|
||||||
0.05 * net_value_norm +
|
|
||||||
0.05 * destroy_score +
|
|
||||||
0.10 * attrition_score +
|
|
||||||
0.05 * pcv_score +
|
|
||||||
0.05 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
elif personality == AIPersonality.GREEDY:
|
elif personality == AIPersonality.GREEDY:
|
||||||
# High-value card plays, willing to sacrifice weak cards for strong ones
|
score = (0.15 * atk_score + 0.05 * block_score + 0.18 * cover_score +
|
||||||
score = (
|
0.38 * net_value_norm + 0.05 * destroy_score +
|
||||||
0.20 * atk_score +
|
0.09 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
|
||||||
0.05 * block_score +
|
|
||||||
0.10 * cover_score +
|
|
||||||
0.40 * net_value_norm +
|
|
||||||
0.05 * destroy_score +
|
|
||||||
0.05 * attrition_score +
|
|
||||||
0.10 * pcv_score +
|
|
||||||
0.05 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
elif personality == AIPersonality.CONTROL:
|
elif personality == AIPersonality.CONTROL:
|
||||||
# Efficiency
|
score = (0.10 * atk_score + 0.05 * block_score + 0.05 * cover_score +
|
||||||
score = (
|
0.20 * net_value_norm + 0.05 * destroy_score +
|
||||||
0.10 * atk_score +
|
0.10 * attrition_score + 0.40 * pcv_score + 0.05 * threat_score)
|
||||||
0.05 * block_score +
|
|
||||||
0.05 * cover_score +
|
|
||||||
0.20 * net_value_norm +
|
|
||||||
0.05 * destroy_score +
|
|
||||||
0.10 * attrition_score +
|
|
||||||
0.40 * pcv_score +
|
|
||||||
0.05 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
elif personality == AIPersonality.BALANCED:
|
elif personality == AIPersonality.BALANCED:
|
||||||
score = (
|
score = (0.12 * atk_score + 0.13 * block_score + 0.15 * cover_score +
|
||||||
0.10 * atk_score +
|
0.10 * net_value_norm + 0.12 * destroy_score +
|
||||||
0.15 * block_score +
|
0.15 * attrition_score + 0.12 * pcv_score + 0.11 * threat_score)
|
||||||
0.10 * cover_score +
|
elif personality == AIPersonality.SHOCKER:
|
||||||
0.10 * net_value_norm +
|
score = (0.25 * destroy_score + 0.33 * cover_score + 0.18 * atk_score +
|
||||||
0.10 * destroy_score +
|
0.05 * block_score + 0.8 * attrition_score + 0.02 * threat_score +
|
||||||
0.10 * attrition_score +
|
0.05 * net_value_norm + 0.04 * pcv_score)
|
||||||
0.15 * pcv_score +
|
|
||||||
0.10 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
else: # ARBITRARY
|
else: # ARBITRARY
|
||||||
score = (
|
score = (0.60 * np.random.random(n).astype(np.float32) +
|
||||||
0.60 * random.random() +
|
0.05 * atk_score + 0.05 * block_score + 0.05 * cover_score +
|
||||||
0.05 * atk_score +
|
0.05 * net_value_norm + 0.05 * destroy_score +
|
||||||
0.05 * block_score +
|
0.05 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
|
||||||
0.05 * cover_score +
|
|
||||||
0.05 * net_value_norm +
|
|
||||||
0.05 * destroy_score +
|
|
||||||
0.05 * attrition_score +
|
|
||||||
0.05 * pcv_score +
|
|
||||||
0.05 * threat_score
|
|
||||||
)
|
|
||||||
|
|
||||||
# --- Context adjustments ---
|
# --- Context adjustments ---
|
||||||
|
score = np.where(direct_damage >= opponent.life, np.maximum(score, 0.95), score)
|
||||||
|
score = np.where(unblocked_in >= player.life, np.minimum(score, 0.05), score)
|
||||||
|
|
||||||
# Lethal takes priority regardless of personality
|
|
||||||
if direct_damage >= opponent.life:
|
|
||||||
score = max(score, 0.95)
|
|
||||||
|
|
||||||
if unblocked_incoming >= player.life:
|
|
||||||
score = min(score, 0.05)
|
|
||||||
|
|
||||||
# Against god-card decks: cover all slots so their big cards can't attack freely
|
|
||||||
if opponent.deck_type in ("God Card", "Pantheon"):
|
if opponent.deck_type in ("God Card", "Pantheon"):
|
||||||
score = min(1.0, score + 0.08 * cover_score)
|
score = np.minimum(1.0, score + 0.08 * cover_score)
|
||||||
|
|
||||||
# Against aggro/rush: need to block more urgently
|
|
||||||
if opponent.deck_type in ("Aggro", "Rush"):
|
if opponent.deck_type in ("Aggro", "Rush"):
|
||||||
score = min(1.0, score + 0.06 * block_score + 0.04 * threat_score)
|
score = np.minimum(1.0, score + 0.06 * block_score + 0.04 * threat_score)
|
||||||
|
|
||||||
# Against wall decks: direct damage matters more than destroying cards
|
|
||||||
if opponent.deck_type == "Wall":
|
if opponent.deck_type == "Wall":
|
||||||
score = min(1.0, score + 0.06 * atk_score)
|
score = np.minimum(1.0, score + 0.06 * atk_score)
|
||||||
|
|
||||||
# Press the advantage when opponent is low on life
|
|
||||||
if opponent.life < STARTING_LIFE * 0.3:
|
if opponent.life < STARTING_LIFE * 0.3:
|
||||||
score = min(1.0, score + 0.06 * atk_score)
|
score = np.minimum(1.0, score + 0.06 * atk_score)
|
||||||
|
|
||||||
# Prioritize survival when low on life
|
|
||||||
if player.life < STARTING_LIFE * 0.3:
|
if player.life < STARTING_LIFE * 0.3:
|
||||||
score = min(1.0, score + 0.06 * threat_score + 0.04 * block_score)
|
score = np.minimum(1.0, score + 0.06 * threat_score + 0.04 * block_score)
|
||||||
|
if opp_cards_left <= 5:
|
||||||
|
score = np.where(cards_on_board > 0, np.minimum(1.0, score + 0.05), score)
|
||||||
|
|
||||||
# Opponent running low on cards: keep a card on board for attrition win condition
|
return np.maximum(0.0, score - sac_penalty)
|
||||||
if opponent_cards_left <= 5 and cards_on_board > 0:
|
|
||||||
score = min(1.0, score + 0.05)
|
|
||||||
|
|
||||||
# Apply sacrifice penalty last so it can override all other considerations.
|
|
||||||
score = max(0.0, score - sacrifice_penalty)
|
|
||||||
|
|
||||||
return score
|
|
||||||
|
|
||||||
|
|
||||||
# ==================== Turn execution ====================
|
async def choose_plan(player: PlayerState, opponent: PlayerState, personality: AIPersonality, difficulty: int) -> MovePlan:
|
||||||
|
plans = generate_plans(player, opponent)
|
||||||
|
|
||||||
|
scores = score_plans_batch(plans, player, opponent, personality)
|
||||||
|
|
||||||
|
noise_scale = (max(0,11 - difficulty)**2) * 0.01 - 0.01
|
||||||
|
noise = np.random.normal(0, noise_scale, len(scores)).astype(np.float32)
|
||||||
|
return plans[int(np.argmax(scores + noise))]
|
||||||
|
|
||||||
async def run_ai_turn(game_id: str):
|
async def run_ai_turn(game_id: str):
|
||||||
from game_manager import (
|
from game_manager import (
|
||||||
@@ -485,24 +388,10 @@ async def run_ai_turn(game_id: str):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
# --- Generate and score candidate plans ---
|
# --- Generate and score candidate plans ---
|
||||||
plans = generate_plans(player, opponent)
|
best_plan = await choose_plan(player, opponent, personality, difficulty)
|
||||||
|
|
||||||
if difficulty <= 2:
|
|
||||||
# Actively bad
|
|
||||||
scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, 0.15*difficulty), p)
|
|
||||||
for p in plans]
|
|
||||||
best_plan = min(scored, key=lambda x: x[0])[1]
|
|
||||||
elif difficulty == 3:
|
|
||||||
# Fully random
|
|
||||||
best_plan = random.choice(plans)
|
|
||||||
else:
|
|
||||||
noise = max(0.0, ((8 - difficulty) / 6.0) * 0.30)
|
|
||||||
scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, noise), p)
|
|
||||||
for p in plans]
|
|
||||||
best_plan = max(scored, key=lambda x: x[0])[1]
|
|
||||||
|
|
||||||
logger.info(
|
logger.info(
|
||||||
f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} plans={len(plans)} " +
|
f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} " +
|
||||||
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
|
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -541,7 +541,17 @@ async def _get_specific_card_async(title: str) -> Card|None:
|
|||||||
|
|
||||||
# Sync entrypoints
|
# Sync entrypoints
|
||||||
def generate_cards(size: int) -> list[Card]:
|
def generate_cards(size: int) -> list[Card]:
|
||||||
return asyncio.run(_get_cards_async(size))
|
cards = []
|
||||||
|
remaining = size
|
||||||
|
while remaining > 0:
|
||||||
|
batch = min(remaining,10)
|
||||||
|
logger.warning(f"Generating {batch} cards ({len(cards)}/{size})")
|
||||||
|
cards += asyncio.run(_get_cards_async(batch))
|
||||||
|
remaining = size - len(cards)
|
||||||
|
if remaining > 0:
|
||||||
|
sleep(4)
|
||||||
|
|
||||||
|
return cards
|
||||||
|
|
||||||
def generate_card(title: str) -> Card|None:
|
def generate_card(title: str) -> Card|None:
|
||||||
return asyncio.run(_get_specific_card_async(title))
|
return asyncio.run(_get_specific_card_async(title))
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from datetime import datetime
|
|||||||
|
|
||||||
from models import Card as CardModel
|
from models import Card as CardModel
|
||||||
|
|
||||||
STARTING_LIFE = 500
|
STARTING_LIFE = 1000
|
||||||
MAX_ENERGY_CAP = 6
|
MAX_ENERGY_CAP = 6
|
||||||
BOARD_SIZE = 5
|
BOARD_SIZE = 5
|
||||||
HAND_SIZE = 5
|
HAND_SIZE = 5
|
||||||
|
|||||||
@@ -538,11 +538,7 @@ if __name__ == "__main__":
|
|||||||
from card import generate_cards, Card
|
from card import generate_cards, Card
|
||||||
from time import sleep
|
from time import sleep
|
||||||
|
|
||||||
all_cards: list[Card] = []
|
all_cards = generate_cards(500)
|
||||||
for i in range(30):
|
|
||||||
print(i)
|
|
||||||
all_cards += generate_cards(10)
|
|
||||||
sleep(5)
|
|
||||||
|
|
||||||
all_cards.sort(key=lambda x: x.cost, reverse=True)
|
all_cards.sort(key=lambda x: x.cost, reverse=True)
|
||||||
|
|
||||||
|
|||||||
440
backend/simulate.py
Normal file
440
backend/simulate.py
Normal file
@@ -0,0 +1,440 @@
|
|||||||
|
import json
|
||||||
|
import os
|
||||||
|
import random
|
||||||
|
import uuid
|
||||||
|
import asyncio
|
||||||
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
|
||||||
|
from card import Card, CardType, CardRarity, generate_cards, compute_deck_type
|
||||||
|
from game import (
|
||||||
|
CardInstance, PlayerState, GameState,
|
||||||
|
action_play_card, action_sacrifice, action_end_turn,
|
||||||
|
)
|
||||||
|
from ai import AIPersonality, choose_cards, choose_plan
|
||||||
|
|
||||||
|
SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
|
||||||
|
SIMULATION_CARD_COUNT = 1000
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Card pool ====================
|
||||||
|
|
||||||
|
def _card_to_dict(card: Card) -> dict:
|
||||||
|
return {
|
||||||
|
"name": card.name,
|
||||||
|
"created_at": card.created_at.isoformat(),
|
||||||
|
"image_link": card.image_link,
|
||||||
|
"card_rarity": card.card_rarity.name,
|
||||||
|
"card_type": card.card_type.name,
|
||||||
|
"wikidata_instance": card.wikidata_instance,
|
||||||
|
"text": card.text,
|
||||||
|
"attack": card.attack,
|
||||||
|
"defense": card.defense,
|
||||||
|
"cost": card.cost,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _dict_to_card(d: dict) -> Card:
|
||||||
|
return Card(
|
||||||
|
name=d["name"],
|
||||||
|
created_at=datetime.fromisoformat(d["created_at"]),
|
||||||
|
image_link=d["image_link"],
|
||||||
|
card_rarity=CardRarity[d["card_rarity"]],
|
||||||
|
card_type=CardType[d["card_type"]],
|
||||||
|
wikidata_instance=d["wikidata_instance"],
|
||||||
|
text=d["text"],
|
||||||
|
attack=d["attack"],
|
||||||
|
defense=d["defense"],
|
||||||
|
cost=d["cost"],
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def get_simulation_cards() -> list[Card]:
|
||||||
|
if os.path.exists(SIMULATION_CARDS_PATH):
|
||||||
|
with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
return [_dict_to_card(d) for d in data]
|
||||||
|
|
||||||
|
print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
|
||||||
|
cards = generate_cards(SIMULATION_CARD_COUNT)
|
||||||
|
|
||||||
|
with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
|
||||||
|
json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
|
||||||
|
return cards
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Single game ====================
|
||||||
|
|
||||||
|
PLAYER1_ID = "p1"
|
||||||
|
PLAYER2_ID = "p2"
|
||||||
|
MAX_TURNS = 300 # safety cap to prevent infinite games
|
||||||
|
|
||||||
|
|
||||||
|
def _make_instances(deck: list[Card]) -> list[CardInstance]:
|
||||||
|
return [
|
||||||
|
CardInstance(
|
||||||
|
instance_id=str(uuid.uuid4()),
|
||||||
|
card_id=card.name,
|
||||||
|
name=card.name,
|
||||||
|
attack=card.attack,
|
||||||
|
defense=card.defense,
|
||||||
|
max_defense=card.defense,
|
||||||
|
cost=card.cost,
|
||||||
|
card_type=card.card_type.name,
|
||||||
|
card_rarity=card.card_rarity.name,
|
||||||
|
image_link=card.image_link or "",
|
||||||
|
text=card.text or "",
|
||||||
|
)
|
||||||
|
for card in deck
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def simulate_game(
|
||||||
|
cards: list[Card],
|
||||||
|
difficulty1: int,
|
||||||
|
personality1: AIPersonality,
|
||||||
|
difficulty2: int,
|
||||||
|
personality2: AIPersonality,
|
||||||
|
) -> str | None:
|
||||||
|
"""
|
||||||
|
Simulate a single game between two AIs choosing from `cards`.
|
||||||
|
Player 1 always goes first.
|
||||||
|
|
||||||
|
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
|
||||||
|
|
||||||
|
Designed to be awaited inside asyncio.gather() to run many games concurrently.
|
||||||
|
"""
|
||||||
|
deck1 = choose_cards(cards, difficulty1, personality1)
|
||||||
|
deck2 = choose_cards(cards, difficulty2, personality2)
|
||||||
|
|
||||||
|
instances1 = _make_instances(deck1)
|
||||||
|
instances2 = _make_instances(deck2)
|
||||||
|
random.shuffle(instances1)
|
||||||
|
random.shuffle(instances2)
|
||||||
|
|
||||||
|
deck_type1 = compute_deck_type(deck1) or "Balanced"
|
||||||
|
deck_type2 = compute_deck_type(deck2) or "Balanced"
|
||||||
|
|
||||||
|
p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
|
||||||
|
p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
|
||||||
|
|
||||||
|
# P1 always goes first
|
||||||
|
p1.increment_energy_cap()
|
||||||
|
p2.increment_energy_cap()
|
||||||
|
p1.refill_energy()
|
||||||
|
p1.draw_to_full()
|
||||||
|
|
||||||
|
state = GameState(
|
||||||
|
game_id=str(uuid.uuid4()),
|
||||||
|
players={PLAYER1_ID: p1, PLAYER2_ID: p2},
|
||||||
|
player_order=[PLAYER1_ID, PLAYER2_ID],
|
||||||
|
active_player_id=PLAYER1_ID,
|
||||||
|
phase="main",
|
||||||
|
turn=1,
|
||||||
|
)
|
||||||
|
|
||||||
|
configs = {
|
||||||
|
PLAYER1_ID: (difficulty1, personality1),
|
||||||
|
PLAYER2_ID: (difficulty2, personality2),
|
||||||
|
}
|
||||||
|
|
||||||
|
for _ in range(MAX_TURNS):
|
||||||
|
if state.result:
|
||||||
|
break
|
||||||
|
|
||||||
|
active_id = state.active_player_id
|
||||||
|
difficulty, personality = configs[active_id]
|
||||||
|
player = state.players[active_id]
|
||||||
|
opponent = state.players[state.opponent_id(active_id)]
|
||||||
|
|
||||||
|
plan = await choose_plan(player, opponent, personality, difficulty)
|
||||||
|
|
||||||
|
for slot in plan.sacrifice_slots:
|
||||||
|
if player.board[slot] is not None:
|
||||||
|
action_sacrifice(state, slot)
|
||||||
|
|
||||||
|
plays = list(plan.plays)
|
||||||
|
random.shuffle(plays)
|
||||||
|
for card, slot in plays:
|
||||||
|
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
|
||||||
|
if hand_idx is None:
|
||||||
|
continue
|
||||||
|
if player.board[slot] is not None:
|
||||||
|
continue
|
||||||
|
if card.cost > player.energy:
|
||||||
|
continue
|
||||||
|
action_play_card(state, hand_idx, slot)
|
||||||
|
|
||||||
|
action_end_turn(state)
|
||||||
|
|
||||||
|
if state.result and state.result.winner_id:
|
||||||
|
return state.result.winner_id
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Process-pool worker ====================
|
||||||
|
# These must be module-level so they are picklable.
|
||||||
|
|
||||||
|
_worker_cards: list[Card] = []
|
||||||
|
|
||||||
|
def _init_worker(cards: list[Card]) -> None:
|
||||||
|
global _worker_cards
|
||||||
|
_worker_cards = cards
|
||||||
|
|
||||||
|
def _run_game_sync(args: tuple) -> str | None:
|
||||||
|
"""Synchronous entry point for a worker process."""
|
||||||
|
d1, p1_name, d2, p2_name = args
|
||||||
|
return asyncio.run(simulate_game(
|
||||||
|
_worker_cards,
|
||||||
|
d1, AIPersonality(p1_name),
|
||||||
|
d2, AIPersonality(p2_name),
|
||||||
|
))
|
||||||
|
|
||||||
|
|
||||||
|
# ==================== Tournament ====================
|
||||||
|
|
||||||
|
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
|
||||||
|
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
|
||||||
|
if difficulties is None:
|
||||||
|
difficulties = list(range(1, 11))
|
||||||
|
return [
|
||||||
|
(personality, difficulty)
|
||||||
|
for personality in AIPersonality
|
||||||
|
for difficulty in difficulties
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def _player_label(personality: AIPersonality, difficulty: int) -> str:
|
||||||
|
return f"{personality.value[:3].upper()}-{difficulty}"
|
||||||
|
|
||||||
|
|
||||||
|
async def run_tournament(
|
||||||
|
cards: list[Card],
|
||||||
|
games_per_matchup: int = 5,
|
||||||
|
difficulties: list[int] | None = None,
|
||||||
|
) -> dict[tuple[int, int], int]:
|
||||||
|
"""
|
||||||
|
Pit every (personality, difficulty) pair against every other, as both
|
||||||
|
first and second player.
|
||||||
|
|
||||||
|
`difficulties` selects which difficulty levels to include (default: 1-10).
|
||||||
|
|
||||||
|
Returns a wins dict keyed by (first_player_index, second_player_index)
|
||||||
|
where the value is how many of `games_per_matchup` games the first player won.
|
||||||
|
|
||||||
|
Games run in parallel across all CPU cores via ProcessPoolExecutor.
|
||||||
|
Cards are sent to each worker once at startup, not once per game.
|
||||||
|
"""
|
||||||
|
players = _all_players(difficulties)
|
||||||
|
n = len(players)
|
||||||
|
|
||||||
|
# Build the flat list of (i, j, args) for every game
|
||||||
|
indexed_args: list[tuple[int, int, tuple]] = []
|
||||||
|
for i in range(n):
|
||||||
|
p1_personality, p1_difficulty = players[i]
|
||||||
|
for j in range(n):
|
||||||
|
p2_personality, p2_difficulty = players[j]
|
||||||
|
args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
|
||||||
|
for _ in range(games_per_matchup):
|
||||||
|
indexed_args.append((i, j, args))
|
||||||
|
|
||||||
|
total_games = len(indexed_args)
|
||||||
|
n_workers = os.cpu_count() or 1
|
||||||
|
print(f"Running {total_games} games across {n_workers} workers "
|
||||||
|
f"({n} players, {games_per_matchup} games per ordered pair)...")
|
||||||
|
|
||||||
|
done = [0]
|
||||||
|
report_every = max(1, total_games // 200)
|
||||||
|
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
|
||||||
|
async def tracked(future):
|
||||||
|
result = await future
|
||||||
|
done[0] += 1
|
||||||
|
if done[0] % report_every == 0 or done[0] == total_games:
|
||||||
|
pct = done[0] / total_games * 100
|
||||||
|
print(f" {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
|
||||||
|
return result
|
||||||
|
|
||||||
|
with ProcessPoolExecutor(
|
||||||
|
max_workers=n_workers,
|
||||||
|
initializer=_init_worker,
|
||||||
|
initargs=(cards,),
|
||||||
|
) as executor:
|
||||||
|
futures = [
|
||||||
|
loop.run_in_executor(executor, _run_game_sync, args)
|
||||||
|
for _, _, args in indexed_args
|
||||||
|
]
|
||||||
|
results = await asyncio.gather(*[tracked(f) for f in futures])
|
||||||
|
|
||||||
|
print("\nFinished")
|
||||||
|
|
||||||
|
wins: dict[tuple[int, int], int] = {}
|
||||||
|
ties = 0
|
||||||
|
for (i, j, _), winner in zip(indexed_args, results):
|
||||||
|
key = (i, j)
|
||||||
|
if key not in wins:
|
||||||
|
wins[key] = 0
|
||||||
|
if winner == PLAYER1_ID:
|
||||||
|
wins[key] += 1
|
||||||
|
elif winner is None:
|
||||||
|
ties += 1
|
||||||
|
|
||||||
|
print(f"Ties: {ties}")
|
||||||
|
|
||||||
|
return wins
|
||||||
|
|
||||||
|
|
||||||
|
def rank_players(
|
||||||
|
wins: dict[tuple[int, int], int],
|
||||||
|
games_per_matchup: int,
|
||||||
|
players: list[tuple[AIPersonality, int]],
|
||||||
|
) -> list[int]:
|
||||||
|
"""
|
||||||
|
Rank player indices by total wins (as first + second player combined).
|
||||||
|
Returns indices sorted worst-to-best.
|
||||||
|
"""
|
||||||
|
n = len(players)
|
||||||
|
total_wins = [0] * n
|
||||||
|
|
||||||
|
for (i, j), p1_wins in wins.items():
|
||||||
|
if i == j:
|
||||||
|
continue # self-matchups are symmetric; skip to avoid double-counting
|
||||||
|
p2_wins = games_per_matchup - p1_wins
|
||||||
|
total_wins[i] += p1_wins
|
||||||
|
total_wins[j] += p2_wins
|
||||||
|
|
||||||
|
return sorted(range(n), key=lambda k: total_wins[k])
|
||||||
|
|
||||||
|
|
||||||
|
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
|
||||||
|
|
||||||
|
|
||||||
|
def save_tournament(
|
||||||
|
wins: dict[tuple[int, int], int],
|
||||||
|
games_per_matchup: int,
|
||||||
|
players: list[tuple[AIPersonality, int]],
|
||||||
|
path: str = TOURNAMENT_RESULTS_PATH,
|
||||||
|
):
|
||||||
|
data = {
|
||||||
|
"games_per_matchup": games_per_matchup,
|
||||||
|
"players": [
|
||||||
|
{"personality": p.value, "difficulty": d}
|
||||||
|
for p, d in players
|
||||||
|
],
|
||||||
|
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
|
||||||
|
}
|
||||||
|
with open(path, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(data, f, indent=2)
|
||||||
|
print(f"Tournament results saved to {path}")
|
||||||
|
|
||||||
|
|
||||||
|
def load_tournament(path: str = TOURNAMENT_RESULTS_PATH) -> tuple[dict[tuple[int, int], int], int, list[tuple[AIPersonality, int]]]:
|
||||||
|
"""Returns (wins, games_per_matchup, players)."""
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
data = json.load(f)
|
||||||
|
wins = {
|
||||||
|
(int(k.split(",")[0]), int(k.split(",")[1])): v
|
||||||
|
for k, v in data["wins"].items()
|
||||||
|
}
|
||||||
|
players = [
|
||||||
|
(AIPersonality(p["personality"]), p["difficulty"])
|
||||||
|
for p in data["players"]
|
||||||
|
]
|
||||||
|
return wins, data["games_per_matchup"], players
|
||||||
|
|
||||||
|
|
||||||
|
def draw_grid(
|
||||||
|
wins: dict[tuple[int, int], int],
|
||||||
|
games_per_matchup: int = 5,
|
||||||
|
players: list[tuple[AIPersonality, int]] | None = None,
|
||||||
|
output_path: str = "tournament_grid.png",
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Draw a heatmap grid of tournament results.
|
||||||
|
|
||||||
|
Rows = first player
|
||||||
|
Cols = second player
|
||||||
|
Color = red if first player won more of their games in that cell
|
||||||
|
green if second player won more
|
||||||
|
× = one player swept all games in that cell
|
||||||
|
"""
|
||||||
|
import matplotlib
|
||||||
|
matplotlib.use("Agg")
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import matplotlib.colors as mcolors
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
if players is None:
|
||||||
|
players = _all_players()
|
||||||
|
n = len(players)
|
||||||
|
ranked = rank_players(wins, games_per_matchup, players) # worst-to-best indices
|
||||||
|
|
||||||
|
labels = [_player_label(*players[i]) for i in ranked]
|
||||||
|
|
||||||
|
# Build value matrix: (p1_wins - p2_wins) / games_per_matchup ∈ [-1, 1], NaN on diagonal
|
||||||
|
matrix = np.full((n, n), np.nan)
|
||||||
|
for row, i in enumerate(ranked):
|
||||||
|
for col, j in enumerate(ranked):
|
||||||
|
p1_wins = wins.get((i, j), 0)
|
||||||
|
matrix[row, col] = (p1_wins - (games_per_matchup - p1_wins)) / games_per_matchup
|
||||||
|
|
||||||
|
cell_size = 0.22
|
||||||
|
fig_size = n * cell_size + 3
|
||||||
|
fig, ax = plt.subplots(figsize=(fig_size, fig_size))
|
||||||
|
|
||||||
|
cmap = mcolors.LinearSegmentedColormap.from_list(
|
||||||
|
"p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"] # pastel green → blue → red
|
||||||
|
)
|
||||||
|
norm = mcolors.Normalize(vmin=-1, vmax=1)
|
||||||
|
|
||||||
|
img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
|
||||||
|
|
||||||
|
# × marks for sweeps
|
||||||
|
for row, i in enumerate(ranked):
|
||||||
|
for col, j in enumerate(ranked):
|
||||||
|
p1_wins = wins.get((i, j), 0)
|
||||||
|
if p1_wins == games_per_matchup or p1_wins == 0:
|
||||||
|
ax.text(col, row, "×", ha="center", va="center",
|
||||||
|
fontsize=5, color="black", fontweight="bold", zorder=3)
|
||||||
|
|
||||||
|
ax.set_xticks(range(n))
|
||||||
|
ax.set_yticks(range(n))
|
||||||
|
ax.set_xticklabels(labels, rotation=90, fontsize=4)
|
||||||
|
ax.set_yticklabels(labels, fontsize=4)
|
||||||
|
ax.xaxis.set_label_position("top")
|
||||||
|
ax.xaxis.tick_top()
|
||||||
|
|
||||||
|
ax.set_xlabel("Second player", labelpad=8, fontsize=8)
|
||||||
|
ax.set_ylabel("First player", labelpad=8, fontsize=8)
|
||||||
|
ax.set_title(
|
||||||
|
"Tournament results — red: first player wins more, green: second player wins more",
|
||||||
|
pad=14, fontsize=9,
|
||||||
|
)
|
||||||
|
|
||||||
|
plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
|
||||||
|
label="(P1 wins - P2 wins) / games per cell")
|
||||||
|
|
||||||
|
plt.tight_layout()
|
||||||
|
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||||
|
plt.close()
|
||||||
|
print(f"Grid saved to {output_path}")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import sys
|
||||||
|
|
||||||
|
GAMES_PER_MATCHUP = 50
|
||||||
|
|
||||||
|
difficulties = list(range(1, 11))
|
||||||
|
|
||||||
|
card_pool = get_simulation_cards()
|
||||||
|
players = _all_players(difficulties)
|
||||||
|
wins = asyncio.run(run_tournament(card_pool, games_per_matchup=GAMES_PER_MATCHUP, difficulties=difficulties))
|
||||||
|
save_tournament(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)
|
||||||
|
draw_grid(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)
|
||||||
1767
backend/tournament_results.json
Normal file
1767
backend/tournament_results.json
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user