This commit is contained in:
2026-03-19 22:53:33 +01:00
parent fa05447895
commit 4fa0cadc7f
7 changed files with 2399 additions and 294 deletions

3
.gitignore vendored
View File

@@ -2,3 +2,6 @@
__pycache__/ __pycache__/
.svelte-kit/ .svelte-kit/
.env .env
backend/simulation_cards.json
backend/tournament_grid.png

View File

@@ -3,9 +3,10 @@ import random
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from enum import Enum from enum import Enum
from itertools import combinations from itertools import combinations, permutations
import numpy as np
from card import Card from card import Card
from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE, PlayerState
logger = logging.getLogger("app") logger = logging.getLogger("app")
@@ -18,6 +19,7 @@ class AIPersonality(Enum):
GREEDY = "greedy" # Prioritizes high cost cards, willing to sacrifice GREEDY = "greedy" # Prioritizes high cost cards, willing to sacrifice
SWARM = "swarm" # Prefers low cost cards, fills board quickly SWARM = "swarm" # Prefers low cost cards, fills board quickly
CONTROL = "control" # Focuses on board control and efficiency CONTROL = "control" # Focuses on board control and efficiency
SHOCKER = "shocker" # Cheap high-defense walls + a few powerful high-attack finishers
ARBITRARY = "arbitrary" # Just does whatever ARBITRARY = "arbitrary" # Just does whatever
def get_random_personality() -> AIPersonality: def get_random_personality() -> AIPersonality:
@@ -40,78 +42,70 @@ def get_power_curve_value(card) -> float:
def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]: def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
BUDGET = 50 BUDGET = 50
logger.info(f"Personality: {personality.value}")
logger.info(f"Difficulty: {difficulty}")
card_strings = [
f"{c.name} {c.cost}"
for c in sorted(cards, key=lambda x: x.cost)[::-1][:20]
]
logger.info("Cards:\n"+("\n".join(card_strings)))
# God cards (cost 7-11) are gated by difficulty. Below difficulty 7 they are excluded.
# Each level from 7 upward unlocks a higher cost tier; at difficulty 10 all are allowed.
if difficulty >= 6: if difficulty >= 6:
max_card_cost = difficulty+1 max_card_cost = difficulty + 1
else: else:
max_card_cost = 6 max_card_cost = 6
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards) allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
def card_score(card: Card) -> float: # Vectorized scoring over all allowed cards at once
pcv = get_power_curve_value(card) atk = np.array([c.attack for c in allowed], dtype=np.float32)
# Normalize pcv to [0, 1]. defn = np.array([c.defense for c in allowed], dtype=np.float32)
pcv_norm = max(0.0, min(1.0, pcv)) cost = np.array([c.cost for c in allowed], dtype=np.float32)
cost_norm = card.cost / max_card_cost # [0, 1]; higher = more expensive exact_cost = np.minimum(11.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
total = card.attack + card.defense pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
atk_ratio = card.attack / total if total else 0.5 cost_norm = cost / max_card_cost
totals = atk + defn
atk_ratio = np.where(totals > 0, atk / totals, 0.5)
def_not_one = np.where(defn != 1, 1.0, 0.0)
if personality == AIPersonality.AGGRESSIVE: if personality == AIPersonality.AGGRESSIVE:
# Prefers high-attack cards; slight bias toward high cost for raw power # (1-cost_norm) penalizes expensive cards. High-attack cards are inherently expensive,
return 0.50 * atk_ratio + 0.30 * pcv_norm + 0.20 * cost_norm # so without this the second pass drifts toward costly cards at higher difficulty,
# shrinking the deck. The bonus grows with max_card_cost and exactly offsets that drift.
scores = 0.50 * atk_ratio + 0.35 * pcv_norm + 0.15 * (1.0 - cost_norm) + 0.10 * def_not_one
elif personality == AIPersonality.DEFENSIVE:
# Small (1-cost_norm) for the same anti-shrinkage reason; lighter because high-defense
# cards don't correlate as strongly with cost as high-attack cards do.
scores = 0.10 * (1.0 - atk_ratio) + 0.80 * pcv_norm + 0.10 * cost_norm
elif personality == AIPersonality.GREEDY:
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
scores = 0.20 * cost_norm + 0.80 * pcv_norm
elif personality == AIPersonality.SWARM:
scores = 0.40 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm + 0.05 * def_not_one
elif personality == AIPersonality.CONTROL:
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
scores = 0.85 * pcv_norm + 0.15 * cost_norm
elif personality == AIPersonality.BALANCED:
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
elif personality == AIPersonality.SHOCKER:
# Both cheap walls and expensive finishers want high attack.
# (1-cost_norm) drives first-pass cheap-card selection; pcv_norm drives second-pass finishers.
# defense_ok zeros out cards with defense==1 on the first term so fragile walls are excluded.
# cost-11 cards have pcv=0 so they score near-zero and never shrink the deck.
scores = atk_ratio * (1.0 - cost_norm) * def_not_one + atk_ratio * pcv_norm
else: # ARBITRARY
w = 0.05 * difficulty
scores = w * pcv_norm + (1.0 - w) * np.random.random(len(allowed)).astype(np.float32)
if personality == AIPersonality.DEFENSIVE: # Small noise floor at D10 prevents fully deterministic deck building.
# Prefers high-defense cards; same cost bias # A locked-in deck loses every game against counters; tiny randomness avoids this.
return 0.50 * (1.0 - atk_ratio) + 0.30 * pcv_norm + 0.20 * cost_norm noise = max(0.03, (10 - difficulty) / 9.0) * 0.50
scores = scores + np.random.normal(0, noise, len(allowed)).astype(np.float32)
if personality == AIPersonality.GREEDY: order = np.argsort(-scores)
# Fills budget with the fewest, most expensive cards possible sorted_cards = [allowed[i] for i in order]
return 0.70 * cost_norm + 0.30 * pcv_norm
if personality == AIPersonality.SWARM:
# Cheap cards
return 0.45 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm
if personality == AIPersonality.CONTROL:
# Values efficiency above all: wants cards that are above the power curve,
# with a secondary preference for higher cost
return 0.70 * pcv_norm + 0.30 * cost_norm
if personality == AIPersonality.BALANCED:
# Blends everything: efficiency, cost spread, and a slight attack lean
return 0.40 * pcv_norm + 0.35 * cost_norm + 0.15 * atk_ratio + 0.10 * (1.0 - atk_ratio)
# ARBITRARY: mostly random at lower difficulties
return (0.05 * difficulty) * pcv_norm + (1 - (0.05 * difficulty)) * random.random()
# Higher difficulty -> less noise -> more optimal deck composition
noise = ((10 - difficulty) / 9.0) * 0.50
scored = sorted(
[(card_score(c) + random.gauss(0, noise), c) for c in allowed],
key=lambda x: x[0],
reverse=True,
)
# Minimum budget reserved for cheap (cost 1-3) cards to ensure early-game presence.
# Without cheap cards the AI will play nothing for the first several turns.
early_budget = { early_budget = {
AIPersonality.GREEDY: 4, AIPersonality.GREEDY: 20, # cheap cards are sacrifice fodder for big plays
AIPersonality.SWARM: 12, AIPersonality.SWARM: 12,
AIPersonality.AGGRESSIVE: 8, AIPersonality.AGGRESSIVE: 18, # raised: ensures cheap high-attack fodder regardless of difficulty
AIPersonality.DEFENSIVE: 10, AIPersonality.DEFENSIVE: 15, # raised: stable cheap-card base across difficulty levels
AIPersonality.CONTROL: 8, AIPersonality.CONTROL: 8,
AIPersonality.BALANCED: 10, AIPersonality.BALANCED: 25, # spread the deck across all cost levels
AIPersonality.SHOCKER: 15, # ~15 cost-1 shields, then expensive attackers fill remaining budget
AIPersonality.ARBITRARY: 8, AIPersonality.ARBITRARY: 8,
}[personality] }[personality]
@@ -120,7 +114,7 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
# First pass: secure early-game cards # First pass: secure early-game cards
cheap_spent = 0 cheap_spent = 0
for _, card in scored: for card in sorted_cards:
if cheap_spent >= early_budget: if cheap_spent >= early_budget:
break break
if card.cost > 3 or total_cost + card.cost > BUDGET: if card.cost > 3 or total_cost + card.cost > BUDGET:
@@ -131,7 +125,7 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
# Second pass: fill remaining budget greedily by score # Second pass: fill remaining budget greedily by score
taken = {id(c) for c in selected} taken = {id(c) for c in selected}
for _, card in scored: for card in sorted_cards:
if total_cost >= BUDGET: if total_cost >= BUDGET:
break break
if id(card) in taken or total_cost + card.cost > BUDGET: if id(card) in taken or total_cost + card.cost > BUDGET:
@@ -139,13 +133,6 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
selected.append(card) selected.append(card)
total_cost += card.cost total_cost += card.cost
card_strings = [
f"{c.name} {c.cost}"
for c in sorted(selected, key=lambda x: x.cost)
]
logger.info("Selected:\n"+("\n".join(card_strings)))
return selected return selected
@@ -182,13 +169,6 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):
empty_slots = [i for i, c in enumerate(board) if c is None] empty_slots = [i for i, c in enumerate(board) if c is None]
en_board = opponent.board en_board = opponent.board
# For scoring: open enemy slots first so the simulation reflects
# direct-damage potential accurately.
scoring_slots = (
[s for s in empty_slots if en_board[s] is None] +
[s for s in empty_slots if en_board[s] is not None]
)
return [ return [
MovePlan( MovePlan(
sacrifice_slots=list(sacrifice_slots), sacrifice_slots=list(sacrifice_slots),
@@ -196,6 +176,7 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):
label=f"sac{len(sacrifice_slots)}_play{len(cards)}", label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
) )
for cards in _affordable_subsets(hand, energy) for cards in _affordable_subsets(hand, energy)
for scoring_slots in permutations(empty_slots, len(cards))
] ]
@@ -214,230 +195,152 @@ def generate_plans(player, opponent) -> list[MovePlan]:
return plans return plans
# ==================== Turn execution ====================
def score_plan(plan: MovePlan, player, opponent, personality: AIPersonality) -> float: def score_plans_batch(
""" plans: list[MovePlan],
Score a plan from ~0.0 to ~1.0 based on the projected board state after player: PlayerState,
executing it. Higher is better. opponent: PlayerState,
""" personality: AIPersonality,
# Simulate board after sacrifices + plays ) -> np.ndarray:
board = list(player.board) n = len(plans)
energy = player.energy
for slot in plan.sacrifice_slots:
if board[slot] is not None:
board[slot] = None
energy += 1
for card, slot in plan.plays:
board[slot] = card
en_board = opponent.board # Pre-compute PCV for every hand card once
enemy_occupied = sum(1 for c in en_board if c is not None) pcv_cache = {
id(c): max(0.0, min(1.0, get_power_curve_value(c)))
for c in player.hand
}
# --- Combat metrics --- # Build board-state arrays with one Python loop (unavoidable)
direct_damage = 0 # AI attacks going straight to opponent life board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
board_damage = 0 # AI attacks hitting enemy cards board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
blocking_slots = 0 # Slots where AI blocks an enemy card n_sac = np.zeros(n, dtype=np.float32)
cards_destroyed = 0 # Enemy cards the AI would destroy this turn sac_val = np.zeros(n, dtype=np.float32)
unblocked_incoming = 0 # Enemy attacks that go straight to AI life play_val = np.zeros(n, dtype=np.float32)
cards_on_board = 0 pcv_score = np.full(n, 0.5, dtype=np.float32)
for slot in range(BOARD_SIZE): for idx, plan in enumerate(plans):
my = board[slot] board = list(player.board)
en = en_board[slot] for slot in plan.sacrifice_slots:
if my: board_slot = board[slot]
cards_on_board += 1 if board_slot is not None:
if my and en is None: sac_val[idx] += board_slot.cost
direct_damage += my.attack board[slot] = None
if my and en: n_sac[idx] = len(plan.sacrifice_slots)
board_damage += my.attack for card, slot in plan.plays:
blocking_slots += 1 board[slot] = card
if my.attack >= en.defense: play_val[idx] += card.cost
cards_destroyed += 1 for slot in range(BOARD_SIZE):
if not my and en: board_slot = board[slot]
unblocked_incoming += en.attack if board_slot is not None:
board_atk[idx, slot] = board_slot.attack
board_occ[idx, slot] = True
if plan.plays:
pcv_vals = [pcv_cache.get(id(c), 0.5) for c, _ in plan.plays]
pcv_score[idx] = sum(pcv_vals) / len(pcv_vals)
# --- Normalize to [0, 1] --- # Enemy board — same for every plan
# How threatening is the attack relative to what remains of opponent's life? en_atk = np.array([c.attack if c else 0 for c in opponent.board], dtype=np.float32)
atk_score = min(1.0, direct_damage / max(opponent.life, 1)) en_def = np.array([c.defense if c else 0 for c in opponent.board], dtype=np.float32)
en_occ = np.array([c is not None for c in opponent.board], dtype=np.bool_)
enemy_occupied = int(en_occ.sum())
# What fraction of enemy slots are blocked? # --- Metrics (all shape (n,)) ---
block_score = (blocking_slots / enemy_occupied) if enemy_occupied > 0 else 1.0 direct_damage = (board_atk * ~en_occ).sum(axis=1)
blocking = board_occ & en_occ # (n, 5)
blocking_slots = blocking.sum(axis=1).astype(np.float32)
cards_on_board = board_occ.sum(axis=1).astype(np.float32)
cards_destroyed = ((board_atk >= en_def) & blocking).sum(axis=1).astype(np.float32)
unblocked_in = (en_atk * ~board_occ).sum(axis=1)
# What fraction of all slots are filled? atk_score = np.minimum(1.0, direct_damage / max(opponent.life, 1))
cover_score = cards_on_board / BOARD_SIZE block_score = blocking_slots / enemy_occupied if enemy_occupied > 0 else np.ones(n, dtype=np.float32)
open_slots = BOARD_SIZE - enemy_occupied
cover_score = (
(cards_on_board - blocking_slots) / open_slots
if open_slots > 0
else np.ones(n, dtype=np.float32)
)
destroy_score = cards_destroyed / enemy_occupied if enemy_occupied > 0 else np.zeros(n, dtype=np.float32)
threat_score = 1.0 - np.minimum(1.0, unblocked_in / max(player.life, 1))
# What fraction of enemy cards do are destroyed? opp_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
destroy_score = (cards_destroyed / enemy_occupied) if enemy_occupied > 0 else 0.0 my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
attrition_score = my_cards_left / (my_cards_left + max(opp_cards_left, 1))
# How safe is the AI from unblocked hits relative to its own life? net_value = play_val - sac_val
threat_score = 1.0 - min(1.0, unblocked_incoming / max(player.life, 1)) net_value_norm = np.clip((net_value + 10) / 20, 0.0, 1.0)
# How many cards compared to the enemy? # --- Sacrifice penalty ---
opponent_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied energy_leftover = player.energy + n_sac - play_val
my_cards_left = len(player.deck) + len(player.hand) + blocking_slots wasted_energy = np.maximum(0, np.minimum(n_sac, energy_leftover))
attrition_score = my_cards_left/(my_cards_left + opponent_cards_left) wasted_penalty = np.where(n_sac > 0, wasted_energy / np.maximum(n_sac, 1), 0.0)
swap_penalty = np.clip((n_sac - net_value) / np.maximum(n_sac, 1), 0.0, 1.0)
# Net value: cost of cards played minus cost of cards sacrificed. sac_penalty = np.where(n_sac > 0, 0.65 * wasted_penalty + 0.35 * swap_penalty, 0.0)
n_sac = len(plan.sacrifice_slots)
sac_value = sum(player.board[s].cost for s in plan.sacrifice_slots if player.board[s] is not None)
play_value = sum(c.cost for c, _ in plan.plays)
net_value = play_value - sac_value
net_value_norm = max(0.0, min(1.0, (net_value + 10) / 20))
# Sacrifice penalty. Applied as a flat deduction after personality scoring.
sacrifice_penalty = 0.0
if n_sac > 0:
# Penalty 1: wasted energy. Each sacrifice gives +1 energy; if that energy
# goes unspent it was pointless. Weighted heavily.
energy_leftover = player.energy + n_sac - play_value
wasted_sac_energy = max(0, min(n_sac, energy_leftover))
wasted_penalty = wasted_sac_energy / n_sac
# Penalty 2: low-value swap. Each sacrifice should at minimum unlock a card
# that costs more than the one removed (net_value > n_sac means each
# sacrifice bought at least one extra cost point). Anything less is a bad trade.
swap_penalty = max(0.0, min(1.0, (n_sac - net_value) / max(n_sac, 1)))
sacrifice_penalty = 0.65 * wasted_penalty + 0.35 * swap_penalty
# Power curve value of the cards played (are they good value for their cost?)
if plan.plays:
pcv_scores = [max(0.0, min(1.0, get_power_curve_value(c))) for c, _ in plan.plays]
pcv_score = sum(pcv_scores) / len(pcv_scores)
else:
pcv_score = 0.5
# --- Personality weights --- # --- Personality weights ---
if personality == AIPersonality.AGGRESSIVE: if personality == AIPersonality.AGGRESSIVE:
# Maximize direct damage score = (0.30 * atk_score + 0.07 * block_score + 0.15 * cover_score +
score = ( 0.08 * net_value_norm + 0.25 * destroy_score +
0.40 * atk_score + 0.08 * attrition_score + 0.04 * pcv_score + 0.03 * threat_score)
0.10 * block_score +
0.10 * cover_score +
0.10 * net_value_norm +
0.15 * destroy_score +
0.05 * attrition_score +
0.05 * pcv_score +
0.05 * threat_score
)
elif personality == AIPersonality.DEFENSIVE: elif personality == AIPersonality.DEFENSIVE:
# Block everything score = (0.12 * atk_score + 0.20 * block_score + 0.18 * cover_score +
score = ( 0.04 * net_value_norm + 0.18 * destroy_score +
0.05 * atk_score + 0.15 * attrition_score + 0.05 * pcv_score + 0.08 * threat_score)
0.35 * block_score +
0.20 * cover_score +
0.05 * net_value_norm +
0.05 * destroy_score +
0.10 * attrition_score +
0.05 * pcv_score +
0.15 * threat_score
)
elif personality == AIPersonality.SWARM: elif personality == AIPersonality.SWARM:
# Fill the board and press with direct damage score = (0.25 * atk_score + 0.10 * block_score + 0.35 * cover_score +
score = ( 0.05 * net_value_norm + 0.05 * destroy_score +
0.25 * atk_score + 0.10 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
0.10 * block_score +
0.35 * cover_score +
0.05 * net_value_norm +
0.05 * destroy_score +
0.10 * attrition_score +
0.05 * pcv_score +
0.05 * threat_score
)
elif personality == AIPersonality.GREEDY: elif personality == AIPersonality.GREEDY:
# High-value card plays, willing to sacrifice weak cards for strong ones score = (0.15 * atk_score + 0.05 * block_score + 0.18 * cover_score +
score = ( 0.38 * net_value_norm + 0.05 * destroy_score +
0.20 * atk_score + 0.09 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
0.05 * block_score +
0.10 * cover_score +
0.40 * net_value_norm +
0.05 * destroy_score +
0.05 * attrition_score +
0.10 * pcv_score +
0.05 * threat_score
)
elif personality == AIPersonality.CONTROL: elif personality == AIPersonality.CONTROL:
# Efficiency score = (0.10 * atk_score + 0.05 * block_score + 0.05 * cover_score +
score = ( 0.20 * net_value_norm + 0.05 * destroy_score +
0.10 * atk_score + 0.10 * attrition_score + 0.40 * pcv_score + 0.05 * threat_score)
0.05 * block_score +
0.05 * cover_score +
0.20 * net_value_norm +
0.05 * destroy_score +
0.10 * attrition_score +
0.40 * pcv_score +
0.05 * threat_score
)
elif personality == AIPersonality.BALANCED: elif personality == AIPersonality.BALANCED:
score = ( score = (0.12 * atk_score + 0.13 * block_score + 0.15 * cover_score +
0.10 * atk_score + 0.10 * net_value_norm + 0.12 * destroy_score +
0.15 * block_score + 0.15 * attrition_score + 0.12 * pcv_score + 0.11 * threat_score)
0.10 * cover_score + elif personality == AIPersonality.SHOCKER:
0.10 * net_value_norm + score = (0.25 * destroy_score + 0.33 * cover_score + 0.18 * atk_score +
0.10 * destroy_score + 0.05 * block_score + 0.8 * attrition_score + 0.02 * threat_score +
0.10 * attrition_score + 0.05 * net_value_norm + 0.04 * pcv_score)
0.15 * pcv_score +
0.10 * threat_score
)
else: # ARBITRARY else: # ARBITRARY
score = ( score = (0.60 * np.random.random(n).astype(np.float32) +
0.60 * random.random() + 0.05 * atk_score + 0.05 * block_score + 0.05 * cover_score +
0.05 * atk_score + 0.05 * net_value_norm + 0.05 * destroy_score +
0.05 * block_score + 0.05 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
0.05 * cover_score +
0.05 * net_value_norm +
0.05 * destroy_score +
0.05 * attrition_score +
0.05 * pcv_score +
0.05 * threat_score
)
# --- Context adjustments --- # --- Context adjustments ---
score = np.where(direct_damage >= opponent.life, np.maximum(score, 0.95), score)
score = np.where(unblocked_in >= player.life, np.minimum(score, 0.05), score)
# Lethal takes priority regardless of personality
if direct_damage >= opponent.life:
score = max(score, 0.95)
if unblocked_incoming >= player.life:
score = min(score, 0.05)
# Against god-card decks: cover all slots so their big cards can't attack freely
if opponent.deck_type in ("God Card", "Pantheon"): if opponent.deck_type in ("God Card", "Pantheon"):
score = min(1.0, score + 0.08 * cover_score) score = np.minimum(1.0, score + 0.08 * cover_score)
# Against aggro/rush: need to block more urgently
if opponent.deck_type in ("Aggro", "Rush"): if opponent.deck_type in ("Aggro", "Rush"):
score = min(1.0, score + 0.06 * block_score + 0.04 * threat_score) score = np.minimum(1.0, score + 0.06 * block_score + 0.04 * threat_score)
# Against wall decks: direct damage matters more than destroying cards
if opponent.deck_type == "Wall": if opponent.deck_type == "Wall":
score = min(1.0, score + 0.06 * atk_score) score = np.minimum(1.0, score + 0.06 * atk_score)
# Press the advantage when opponent is low on life
if opponent.life < STARTING_LIFE * 0.3: if opponent.life < STARTING_LIFE * 0.3:
score = min(1.0, score + 0.06 * atk_score) score = np.minimum(1.0, score + 0.06 * atk_score)
# Prioritize survival when low on life
if player.life < STARTING_LIFE * 0.3: if player.life < STARTING_LIFE * 0.3:
score = min(1.0, score + 0.06 * threat_score + 0.04 * block_score) score = np.minimum(1.0, score + 0.06 * threat_score + 0.04 * block_score)
if opp_cards_left <= 5:
score = np.where(cards_on_board > 0, np.minimum(1.0, score + 0.05), score)
# Opponent running low on cards: keep a card on board for attrition win condition return np.maximum(0.0, score - sac_penalty)
if opponent_cards_left <= 5 and cards_on_board > 0:
score = min(1.0, score + 0.05)
# Apply sacrifice penalty last so it can override all other considerations.
score = max(0.0, score - sacrifice_penalty)
return score
# ==================== Turn execution ==================== async def choose_plan(player: PlayerState, opponent: PlayerState, personality: AIPersonality, difficulty: int) -> MovePlan:
plans = generate_plans(player, opponent)
scores = score_plans_batch(plans, player, opponent, personality)
noise_scale = (max(0,11 - difficulty)**2) * 0.01 - 0.01
noise = np.random.normal(0, noise_scale, len(scores)).astype(np.float32)
return plans[int(np.argmax(scores + noise))]
async def run_ai_turn(game_id: str): async def run_ai_turn(game_id: str):
from game_manager import ( from game_manager import (
@@ -485,24 +388,10 @@ async def run_ai_turn(game_id: str):
pass pass
# --- Generate and score candidate plans --- # --- Generate and score candidate plans ---
plans = generate_plans(player, opponent) best_plan = await choose_plan(player, opponent, personality, difficulty)
if difficulty <= 2:
# Actively bad
scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, 0.15*difficulty), p)
for p in plans]
best_plan = min(scored, key=lambda x: x[0])[1]
elif difficulty == 3:
# Fully random
best_plan = random.choice(plans)
else:
noise = max(0.0, ((8 - difficulty) / 6.0) * 0.30)
scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, noise), p)
for p in plans]
best_plan = max(scored, key=lambda x: x[0])[1]
logger.info( logger.info(
f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} plans={len(plans)} " + f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} " +
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}" f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
) )

View File

@@ -541,7 +541,17 @@ async def _get_specific_card_async(title: str) -> Card|None:
# Sync entrypoints # Sync entrypoints
def generate_cards(size: int) -> list[Card]: def generate_cards(size: int) -> list[Card]:
return asyncio.run(_get_cards_async(size)) cards = []
remaining = size
while remaining > 0:
batch = min(remaining,10)
logger.warning(f"Generating {batch} cards ({len(cards)}/{size})")
cards += asyncio.run(_get_cards_async(batch))
remaining = size - len(cards)
if remaining > 0:
sleep(4)
return cards
def generate_card(title: str) -> Card|None: def generate_card(title: str) -> Card|None:
return asyncio.run(_get_specific_card_async(title)) return asyncio.run(_get_specific_card_async(title))

View File

@@ -6,7 +6,7 @@ from datetime import datetime
from models import Card as CardModel from models import Card as CardModel
STARTING_LIFE = 500 STARTING_LIFE = 1000
MAX_ENERGY_CAP = 6 MAX_ENERGY_CAP = 6
BOARD_SIZE = 5 BOARD_SIZE = 5
HAND_SIZE = 5 HAND_SIZE = 5

View File

@@ -538,11 +538,7 @@ if __name__ == "__main__":
from card import generate_cards, Card from card import generate_cards, Card
from time import sleep from time import sleep
all_cards: list[Card] = [] all_cards = generate_cards(500)
for i in range(30):
print(i)
all_cards += generate_cards(10)
sleep(5)
all_cards.sort(key=lambda x: x.cost, reverse=True) all_cards.sort(key=lambda x: x.cost, reverse=True)

440
backend/simulate.py Normal file
View File

@@ -0,0 +1,440 @@
import json
import os
import random
import uuid
import asyncio
from concurrent.futures import ProcessPoolExecutor
from dotenv import load_dotenv
load_dotenv()
from datetime import datetime
from card import Card, CardType, CardRarity, generate_cards, compute_deck_type
from game import (
CardInstance, PlayerState, GameState,
action_play_card, action_sacrifice, action_end_turn,
)
from ai import AIPersonality, choose_cards, choose_plan
SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
SIMULATION_CARD_COUNT = 1000
# ==================== Card pool ====================
def _card_to_dict(card: Card) -> dict:
return {
"name": card.name,
"created_at": card.created_at.isoformat(),
"image_link": card.image_link,
"card_rarity": card.card_rarity.name,
"card_type": card.card_type.name,
"wikidata_instance": card.wikidata_instance,
"text": card.text,
"attack": card.attack,
"defense": card.defense,
"cost": card.cost,
}
def _dict_to_card(d: dict) -> Card:
return Card(
name=d["name"],
created_at=datetime.fromisoformat(d["created_at"]),
image_link=d["image_link"],
card_rarity=CardRarity[d["card_rarity"]],
card_type=CardType[d["card_type"]],
wikidata_instance=d["wikidata_instance"],
text=d["text"],
attack=d["attack"],
defense=d["defense"],
cost=d["cost"],
)
def get_simulation_cards() -> list[Card]:
if os.path.exists(SIMULATION_CARDS_PATH):
with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
data = json.load(f)
return [_dict_to_card(d) for d in data]
print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
cards = generate_cards(SIMULATION_CARD_COUNT)
with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
return cards
# ==================== Single game ====================
PLAYER1_ID = "p1"
PLAYER2_ID = "p2"
MAX_TURNS = 300 # safety cap to prevent infinite games
def _make_instances(deck: list[Card]) -> list[CardInstance]:
return [
CardInstance(
instance_id=str(uuid.uuid4()),
card_id=card.name,
name=card.name,
attack=card.attack,
defense=card.defense,
max_defense=card.defense,
cost=card.cost,
card_type=card.card_type.name,
card_rarity=card.card_rarity.name,
image_link=card.image_link or "",
text=card.text or "",
)
for card in deck
]
async def simulate_game(
cards: list[Card],
difficulty1: int,
personality1: AIPersonality,
difficulty2: int,
personality2: AIPersonality,
) -> str | None:
"""
Simulate a single game between two AIs choosing from `cards`.
Player 1 always goes first.
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
Designed to be awaited inside asyncio.gather() to run many games concurrently.
"""
deck1 = choose_cards(cards, difficulty1, personality1)
deck2 = choose_cards(cards, difficulty2, personality2)
instances1 = _make_instances(deck1)
instances2 = _make_instances(deck2)
random.shuffle(instances1)
random.shuffle(instances2)
deck_type1 = compute_deck_type(deck1) or "Balanced"
deck_type2 = compute_deck_type(deck2) or "Balanced"
p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
# P1 always goes first
p1.increment_energy_cap()
p2.increment_energy_cap()
p1.refill_energy()
p1.draw_to_full()
state = GameState(
game_id=str(uuid.uuid4()),
players={PLAYER1_ID: p1, PLAYER2_ID: p2},
player_order=[PLAYER1_ID, PLAYER2_ID],
active_player_id=PLAYER1_ID,
phase="main",
turn=1,
)
configs = {
PLAYER1_ID: (difficulty1, personality1),
PLAYER2_ID: (difficulty2, personality2),
}
for _ in range(MAX_TURNS):
if state.result:
break
active_id = state.active_player_id
difficulty, personality = configs[active_id]
player = state.players[active_id]
opponent = state.players[state.opponent_id(active_id)]
plan = await choose_plan(player, opponent, personality, difficulty)
for slot in plan.sacrifice_slots:
if player.board[slot] is not None:
action_sacrifice(state, slot)
plays = list(plan.plays)
random.shuffle(plays)
for card, slot in plays:
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
if hand_idx is None:
continue
if player.board[slot] is not None:
continue
if card.cost > player.energy:
continue
action_play_card(state, hand_idx, slot)
action_end_turn(state)
if state.result and state.result.winner_id:
return state.result.winner_id
return None
# ==================== Process-pool worker ====================
# These must be module-level so they are picklable.
_worker_cards: list[Card] = []
def _init_worker(cards: list[Card]) -> None:
global _worker_cards
_worker_cards = cards
def _run_game_sync(args: tuple) -> str | None:
"""Synchronous entry point for a worker process."""
d1, p1_name, d2, p2_name = args
return asyncio.run(simulate_game(
_worker_cards,
d1, AIPersonality(p1_name),
d2, AIPersonality(p2_name),
))
# ==================== Tournament ====================
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
if difficulties is None:
difficulties = list(range(1, 11))
return [
(personality, difficulty)
for personality in AIPersonality
for difficulty in difficulties
]
def _player_label(personality: AIPersonality, difficulty: int) -> str:
return f"{personality.value[:3].upper()}-{difficulty}"
async def run_tournament(
cards: list[Card],
games_per_matchup: int = 5,
difficulties: list[int] | None = None,
) -> dict[tuple[int, int], int]:
"""
Pit every (personality, difficulty) pair against every other, as both
first and second player.
`difficulties` selects which difficulty levels to include (default: 1-10).
Returns a wins dict keyed by (first_player_index, second_player_index)
where the value is how many of `games_per_matchup` games the first player won.
Games run in parallel across all CPU cores via ProcessPoolExecutor.
Cards are sent to each worker once at startup, not once per game.
"""
players = _all_players(difficulties)
n = len(players)
# Build the flat list of (i, j, args) for every game
indexed_args: list[tuple[int, int, tuple]] = []
for i in range(n):
p1_personality, p1_difficulty = players[i]
for j in range(n):
p2_personality, p2_difficulty = players[j]
args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
for _ in range(games_per_matchup):
indexed_args.append((i, j, args))
total_games = len(indexed_args)
n_workers = os.cpu_count() or 1
print(f"Running {total_games} games across {n_workers} workers "
f"({n} players, {games_per_matchup} games per ordered pair)...")
done = [0]
report_every = max(1, total_games // 200)
loop = asyncio.get_running_loop()
async def tracked(future):
result = await future
done[0] += 1
if done[0] % report_every == 0 or done[0] == total_games:
pct = done[0] / total_games * 100
print(f" {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
return result
with ProcessPoolExecutor(
max_workers=n_workers,
initializer=_init_worker,
initargs=(cards,),
) as executor:
futures = [
loop.run_in_executor(executor, _run_game_sync, args)
for _, _, args in indexed_args
]
results = await asyncio.gather(*[tracked(f) for f in futures])
print("\nFinished")
wins: dict[tuple[int, int], int] = {}
ties = 0
for (i, j, _), winner in zip(indexed_args, results):
key = (i, j)
if key not in wins:
wins[key] = 0
if winner == PLAYER1_ID:
wins[key] += 1
elif winner is None:
ties += 1
print(f"Ties: {ties}")
return wins
def rank_players(
wins: dict[tuple[int, int], int],
games_per_matchup: int,
players: list[tuple[AIPersonality, int]],
) -> list[int]:
"""
Rank player indices by total wins (as first + second player combined).
Returns indices sorted worst-to-best.
"""
n = len(players)
total_wins = [0] * n
for (i, j), p1_wins in wins.items():
if i == j:
continue # self-matchups are symmetric; skip to avoid double-counting
p2_wins = games_per_matchup - p1_wins
total_wins[i] += p1_wins
total_wins[j] += p2_wins
return sorted(range(n), key=lambda k: total_wins[k])
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
def save_tournament(
wins: dict[tuple[int, int], int],
games_per_matchup: int,
players: list[tuple[AIPersonality, int]],
path: str = TOURNAMENT_RESULTS_PATH,
):
data = {
"games_per_matchup": games_per_matchup,
"players": [
{"personality": p.value, "difficulty": d}
for p, d in players
],
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
}
with open(path, "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
print(f"Tournament results saved to {path}")
def load_tournament(path: str = TOURNAMENT_RESULTS_PATH) -> tuple[dict[tuple[int, int], int], int, list[tuple[AIPersonality, int]]]:
"""Returns (wins, games_per_matchup, players)."""
with open(path, "r", encoding="utf-8") as f:
data = json.load(f)
wins = {
(int(k.split(",")[0]), int(k.split(",")[1])): v
for k, v in data["wins"].items()
}
players = [
(AIPersonality(p["personality"]), p["difficulty"])
for p in data["players"]
]
return wins, data["games_per_matchup"], players
def draw_grid(
wins: dict[tuple[int, int], int],
games_per_matchup: int = 5,
players: list[tuple[AIPersonality, int]] | None = None,
output_path: str = "tournament_grid.png",
):
"""
Draw a heatmap grid of tournament results.
Rows = first player
Cols = second player
Color = red if first player won more of their games in that cell
green if second player won more
× = one player swept all games in that cell
"""
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import numpy as np
if players is None:
players = _all_players()
n = len(players)
ranked = rank_players(wins, games_per_matchup, players) # worst-to-best indices
labels = [_player_label(*players[i]) for i in ranked]
# Build value matrix: (p1_wins - p2_wins) / games_per_matchup ∈ [-1, 1], NaN on diagonal
matrix = np.full((n, n), np.nan)
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
p1_wins = wins.get((i, j), 0)
matrix[row, col] = (p1_wins - (games_per_matchup - p1_wins)) / games_per_matchup
cell_size = 0.22
fig_size = n * cell_size + 3
fig, ax = plt.subplots(figsize=(fig_size, fig_size))
cmap = mcolors.LinearSegmentedColormap.from_list(
"p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"] # pastel green → blue → red
)
norm = mcolors.Normalize(vmin=-1, vmax=1)
img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
# × marks for sweeps
for row, i in enumerate(ranked):
for col, j in enumerate(ranked):
p1_wins = wins.get((i, j), 0)
if p1_wins == games_per_matchup or p1_wins == 0:
ax.text(col, row, "×", ha="center", va="center",
fontsize=5, color="black", fontweight="bold", zorder=3)
ax.set_xticks(range(n))
ax.set_yticks(range(n))
ax.set_xticklabels(labels, rotation=90, fontsize=4)
ax.set_yticklabels(labels, fontsize=4)
ax.xaxis.set_label_position("top")
ax.xaxis.tick_top()
ax.set_xlabel("Second player", labelpad=8, fontsize=8)
ax.set_ylabel("First player", labelpad=8, fontsize=8)
ax.set_title(
"Tournament results — red: first player wins more, green: second player wins more",
pad=14, fontsize=9,
)
plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
label="(P1 wins - P2 wins) / games per cell")
plt.tight_layout()
plt.savefig(output_path, dpi=150, bbox_inches="tight")
plt.close()
print(f"Grid saved to {output_path}")
if __name__ == "__main__":
import sys
GAMES_PER_MATCHUP = 50
difficulties = list(range(1, 11))
card_pool = get_simulation_cards()
players = _all_players(difficulties)
wins = asyncio.run(run_tournament(card_pool, games_per_matchup=GAMES_PER_MATCHUP, difficulties=difficulties))
save_tournament(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)
draw_grid(wins, games_per_matchup=GAMES_PER_MATCHUP, players=players)

File diff suppressed because it is too large Load Diff