🐐
This commit is contained in:
0
backend/ai/__init__.py
Normal file
0
backend/ai/__init__.py
Normal file
176
backend/ai/card_pick_nn.py
Normal file
176
backend/ai/card_pick_nn.py
Normal file
@@ -0,0 +1,176 @@
|
||||
import os
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ai.nn import NeuralNet, _softmax
|
||||
|
||||
# Separate weights file so this NN trains independently from the plan NN.
|
||||
CARD_PICK_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "card_pick_weights.json")
|
||||
|
||||
N_CARD_FEATURES = 15
|
||||
|
||||
# Normalization constants — chosen to cover the realistic stat range for generated cards.
|
||||
_MAX_ATK = 50.0
|
||||
_MAX_DEF = 100.0
|
||||
|
||||
|
||||
def _precompute_static_features(allowed: list) -> np.ndarray:
|
||||
"""
|
||||
Vectorized precomputation of the 7 per-card static features for the whole pool.
|
||||
Returns (n, 7) float32. Called once per choose_cards() invocation.
|
||||
"""
|
||||
n = len(allowed)
|
||||
atk = np.array([c.attack for c in allowed], dtype=np.float32)
|
||||
defn = np.array([c.defense for c in allowed], dtype=np.float32)
|
||||
cost = np.array([c.cost for c in allowed], dtype=np.float32)
|
||||
rar = np.array([c.card_rarity.value for c in allowed], dtype=np.float32)
|
||||
typ = np.array([c.card_type.value for c in allowed], dtype=np.float32)
|
||||
|
||||
exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
|
||||
total = atk + defn
|
||||
atk_ratio = np.where(total > 0, atk / total, 0.5)
|
||||
pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
|
||||
|
||||
out = np.empty((n, 7), dtype=np.float32)
|
||||
out[:, 0] = atk / _MAX_ATK
|
||||
out[:, 1] = defn / _MAX_DEF
|
||||
out[:, 2] = cost / 10.0
|
||||
out[:, 3] = rar / 5.0
|
||||
out[:, 4] = atk_ratio
|
||||
out[:, 5] = pcv_norm
|
||||
out[:, 6] = typ / 9.0
|
||||
return out
|
||||
|
||||
|
||||
class CardPickPlayer:
|
||||
"""
|
||||
Uses a NeuralNet to sequentially select cards from a pool until the cost
|
||||
budget is exhausted. API mirrors NeuralPlayer so training code stays uniform.
|
||||
|
||||
In training mode: samples stochastically (softmax) and records the
|
||||
trajectory for a REINFORCE update after the game ends.
|
||||
In inference mode: picks the highest-scoring affordable card at each step.
|
||||
|
||||
Performance design:
|
||||
- Static per-card features (7) are computed once via vectorized numpy.
|
||||
- Context features (8) use running totals updated by O(1) increments.
|
||||
- Picked cards are tracked with a boolean mask; no list.remove() calls.
|
||||
- Each pick step does one small forward pass over the affordable subset only.
|
||||
"""
|
||||
|
||||
def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
|
||||
self.net = net
|
||||
self.training = training
|
||||
self.temperature = temperature
|
||||
self.trajectory: list[tuple[np.ndarray, int]] = [] # (features_matrix, chosen_idx)
|
||||
|
||||
def choose_cards(self, allowed: list, difficulty: int) -> list:
|
||||
"""
|
||||
allowed: pre-filtered list of Card objects (cost ≤ max_card_cost already applied).
|
||||
Returns the selected deck as a list of Cards.
|
||||
"""
|
||||
BUDGET = 50
|
||||
n = len(allowed)
|
||||
|
||||
static = _precompute_static_features(allowed) # (n, 7) — computed once
|
||||
costs = np.array([c.cost for c in allowed], dtype=np.float32)
|
||||
picked = np.zeros(n, dtype=bool)
|
||||
|
||||
budget_remaining = BUDGET
|
||||
selected: list = []
|
||||
|
||||
# Running totals for context features — incremented O(1) per pick.
|
||||
n_picked = 0
|
||||
sum_atk = 0.0
|
||||
sum_def = 0.0
|
||||
sum_cost = 0.0
|
||||
n_cheap = 0 # cost ≤ 3
|
||||
n_high = 0 # cost ≥ 6
|
||||
|
||||
diff_norm = difficulty / 10.0
|
||||
|
||||
while True:
|
||||
mask = (~picked) & (costs <= budget_remaining)
|
||||
if not mask.any():
|
||||
break
|
||||
|
||||
idxs = np.where(mask)[0]
|
||||
|
||||
# Context row — same for every candidate this step, broadcast via tile.
|
||||
if n_picked > 0:
|
||||
ctx = np.array([
|
||||
n_picked / 30.0,
|
||||
budget_remaining / 50.0,
|
||||
sum_atk / n_picked / _MAX_ATK,
|
||||
sum_def / n_picked / _MAX_DEF,
|
||||
sum_cost / n_picked / 10.0,
|
||||
n_cheap / n_picked,
|
||||
n_high / n_picked,
|
||||
diff_norm,
|
||||
], dtype=np.float32)
|
||||
else:
|
||||
ctx = np.array([
|
||||
0.0, budget_remaining / 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, diff_norm,
|
||||
], dtype=np.float32)
|
||||
|
||||
features = np.concatenate(
|
||||
[static[idxs], np.tile(ctx, (len(idxs), 1))],
|
||||
axis=1,
|
||||
)
|
||||
scores = self.net.forward(features)
|
||||
|
||||
if self.training:
|
||||
probs = _softmax((scores / self.temperature).astype(np.float64))
|
||||
probs = np.clip(probs, 1e-10, None)
|
||||
probs /= probs.sum()
|
||||
local_idx = int(np.random.choice(len(idxs), p=probs))
|
||||
self.trajectory.append((features, local_idx))
|
||||
else:
|
||||
local_idx = int(np.argmax(scores))
|
||||
|
||||
global_idx = idxs[local_idx]
|
||||
card = allowed[global_idx]
|
||||
picked[global_idx] = True
|
||||
selected.append(card)
|
||||
|
||||
# Incremental context update — O(1).
|
||||
budget_remaining -= card.cost
|
||||
n_picked += 1
|
||||
sum_atk += card.attack
|
||||
sum_def += card.defense
|
||||
sum_cost += card.cost
|
||||
if card.cost <= 3: n_cheap += 1
|
||||
if card.cost >= 6: n_high += 1
|
||||
|
||||
return selected
|
||||
|
||||
def compute_grads(self, outcome: float) -> tuple[list, list] | None:
|
||||
"""
|
||||
REINFORCE gradients averaged over the pick trajectory.
|
||||
outcome: centered reward (win/loss minus baseline).
|
||||
Returns (grads_w, grads_b), or None if no picks were made.
|
||||
"""
|
||||
if not self.trajectory:
|
||||
return None
|
||||
|
||||
acc_gw = [np.zeros_like(w) for w in self.net.weights]
|
||||
acc_gb = [np.zeros_like(b) for b in self.net.biases]
|
||||
|
||||
for features, chosen_idx in self.trajectory:
|
||||
scores = self.net.forward(features)
|
||||
probs = _softmax(scores.astype(np.float64)).astype(np.float32)
|
||||
upstream = -probs.copy()
|
||||
upstream[chosen_idx] += 1.0
|
||||
upstream *= outcome
|
||||
gw, gb = self.net.backward(upstream)
|
||||
for i in range(len(acc_gw)):
|
||||
acc_gw[i] += gw[i]
|
||||
acc_gb[i] += gb[i]
|
||||
|
||||
n = len(self.trajectory)
|
||||
for i in range(len(acc_gw)):
|
||||
acc_gw[i] /= n
|
||||
acc_gb[i] /= n
|
||||
|
||||
self.trajectory.clear()
|
||||
return acc_gw, acc_gb
|
||||
1
backend/ai/card_pick_weights.json
Normal file
1
backend/ai/card_pick_weights.json
Normal file
File diff suppressed because one or more lines are too long
459
backend/ai/engine.py
Normal file
459
backend/ai/engine.py
Normal file
@@ -0,0 +1,459 @@
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
import random
|
||||
from dataclasses import dataclass
|
||||
from enum import Enum
|
||||
from itertools import combinations, permutations
|
||||
|
||||
import numpy as np
|
||||
|
||||
from game.card import Card
|
||||
from game.rules import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE, PlayerState
|
||||
|
||||
logger = logging.getLogger("app")
|
||||
|
||||
AI_USER_ID = "ai"
|
||||
|
||||
class AIPersonality(Enum):
|
||||
AGGRESSIVE = "aggressive"
|
||||
DEFENSIVE = "defensive"
|
||||
BALANCED = "balanced"
|
||||
GREEDY = "greedy" # prioritizes high cost cards, willing to sacrifice
|
||||
SWARM = "swarm"
|
||||
CONTROL = "control"
|
||||
ARBITRARY = "arbitrary"
|
||||
JEBRASKA = "jebraska" # trained neural network plan scorer
|
||||
|
||||
def get_random_personality() -> AIPersonality:
|
||||
return random.choice(list(AIPersonality))
|
||||
|
||||
def calculate_exact_cost(attack: int, defense: int) -> float:
|
||||
"""Calculate the exact cost before rounding (matches card.py formula)."""
|
||||
return min(10.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))
|
||||
|
||||
def get_power_curve_value(card) -> float:
|
||||
"""
|
||||
Returns how much above the power curve a card is.
|
||||
Positive values mean the card is a better-than-expected deal for its cost.
|
||||
"""
|
||||
exact_cost = calculate_exact_cost(card.attack, card.defense)
|
||||
return exact_cost - card.cost
|
||||
|
||||
|
||||
def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
|
||||
BUDGET = 50
|
||||
|
||||
if difficulty >= 6:
|
||||
max_card_cost = difficulty + 1
|
||||
else:
|
||||
max_card_cost = 6
|
||||
|
||||
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
|
||||
|
||||
# Vectorized scoring over all allowed cards at once
|
||||
atk = np.array([c.attack for c in allowed], dtype=np.float32)
|
||||
defn = np.array([c.defense for c in allowed], dtype=np.float32)
|
||||
cost = np.array([c.cost for c in allowed], dtype=np.float32)
|
||||
|
||||
exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
|
||||
pcv_norm = np.clip(exact_cost - cost, 0.0, 1.0)
|
||||
cost_norm = cost / max_card_cost
|
||||
totals = atk + defn
|
||||
atk_ratio = np.where(totals > 0, atk / totals, 0.5)
|
||||
def_not_one = np.where(defn != 1, 1.0, 0.0)
|
||||
|
||||
if personality == AIPersonality.AGGRESSIVE:
|
||||
# (1-cost_norm) penalizes expensive cards. High-attack cards are inherently expensive,
|
||||
# so without this the second pass drifts toward costly cards at higher difficulty,
|
||||
# shrinking the deck. The bonus grows with max_card_cost and exactly offsets that drift.
|
||||
scores = 0.50 * atk_ratio + 0.35 * pcv_norm + 0.15 * (1.0 - cost_norm) + 0.10 * def_not_one
|
||||
elif personality == AIPersonality.DEFENSIVE:
|
||||
# Small (1-cost_norm) for the same anti-shrinkage reason; lighter because high-defense
|
||||
# cards don't correlate as strongly with cost as high-attack cards do.
|
||||
scores = 0.10 * (1.0 - atk_ratio) + 0.80 * pcv_norm + 0.10 * cost_norm
|
||||
elif personality == AIPersonality.GREEDY:
|
||||
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
|
||||
scores = 0.20 * cost_norm + 0.80 * pcv_norm
|
||||
elif personality == AIPersonality.SWARM:
|
||||
scores = 0.40 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm + 0.05 * def_not_one
|
||||
elif personality == AIPersonality.CONTROL:
|
||||
# Small cost_norm keeps flavour without causing severe deck shrinkage at D10
|
||||
scores = 0.85 * pcv_norm + 0.15 * cost_norm
|
||||
elif personality == AIPersonality.BALANCED:
|
||||
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
|
||||
elif personality == AIPersonality.JEBRASKA:
|
||||
# Delegate entirely to the card-pick NN; skip the heuristic scoring path.
|
||||
from ai.card_pick_nn import CardPickPlayer, CARD_PICK_WEIGHTS_PATH
|
||||
from ai.nn import NeuralNet
|
||||
if not hasattr(choose_cards, "_card_pick_net"):
|
||||
choose_cards._card_pick_net = (
|
||||
NeuralNet.load(CARD_PICK_WEIGHTS_PATH)
|
||||
if os.path.exists(CARD_PICK_WEIGHTS_PATH) else None
|
||||
)
|
||||
net = choose_cards._card_pick_net
|
||||
if net is not None:
|
||||
return CardPickPlayer(net, training=False).choose_cards(allowed, difficulty)
|
||||
# Fall through to BALANCED heuristic if weights aren't trained yet.
|
||||
scores = 0.60 * pcv_norm + 0.25 * atk_ratio + 0.15 * (1.0 - atk_ratio)
|
||||
else: # ARBITRARY
|
||||
w = 0.09 * difficulty
|
||||
scores = w * pcv_norm + (1.0 - w) * np.random.random(len(allowed)).astype(np.float32)
|
||||
|
||||
# Small noise floor at D10 prevents fully deterministic deck building.
|
||||
noise = (max(0,12 - difficulty)**2) * 0.008
|
||||
scores = scores + np.random.normal(0, noise, len(allowed)).astype(np.float32)
|
||||
|
||||
order = np.argsort(-scores)
|
||||
sorted_cards = [allowed[i] for i in order]
|
||||
|
||||
early_budget = {
|
||||
AIPersonality.GREEDY: 20, # cheap cards are sacrifice fodder for big plays
|
||||
AIPersonality.SWARM: 12,
|
||||
AIPersonality.AGGRESSIVE: 18, # raised: ensures cheap high-attack fodder regardless of difficulty
|
||||
AIPersonality.DEFENSIVE: 15, # raised: stable cheap-card base across difficulty levels
|
||||
AIPersonality.CONTROL: 8,
|
||||
AIPersonality.BALANCED: 25, # spread the deck across all cost levels
|
||||
AIPersonality.JEBRASKA: 25, # fallback (no trained weights yet)
|
||||
AIPersonality.ARBITRARY: 8,
|
||||
}[personality]
|
||||
|
||||
selected: list[Card] = []
|
||||
total_cost = 0
|
||||
|
||||
# First pass: secure early-game cards
|
||||
cheap_spent = 0
|
||||
for card in sorted_cards:
|
||||
if cheap_spent >= early_budget:
|
||||
break
|
||||
if card.cost > 3 or total_cost + card.cost > BUDGET:
|
||||
continue
|
||||
selected.append(card)
|
||||
total_cost += card.cost
|
||||
cheap_spent += card.cost
|
||||
|
||||
# Second pass: fill remaining budget greedily by score
|
||||
taken = {id(c) for c in selected}
|
||||
for card in sorted_cards:
|
||||
if total_cost >= BUDGET:
|
||||
break
|
||||
if id(card) in taken or total_cost + card.cost > BUDGET:
|
||||
continue
|
||||
selected.append(card)
|
||||
total_cost += card.cost
|
||||
|
||||
return selected
|
||||
|
||||
|
||||
@dataclass
|
||||
class MovePlan:
|
||||
sacrifice_slots: list[int]
|
||||
plays: list[tuple] # (CardInstance, board_slot: int)
|
||||
label: str = ""
|
||||
|
||||
|
||||
def _affordable_subsets(hand, energy, start=0):
|
||||
"""Yield every subset of cards from hand whose total cost fits within energy."""
|
||||
yield []
|
||||
for i in range(start, len(hand)):
|
||||
card = hand[i]
|
||||
if card.cost <= energy:
|
||||
for rest in _affordable_subsets(hand, energy - card.cost, i + 1):
|
||||
yield [card] + rest
|
||||
|
||||
|
||||
def _plans_for_sacrifice(player, opponent, sacrifice_slots):
|
||||
"""Generate one plan per affordable card subset for a given sacrifice set."""
|
||||
board = list(player.board)
|
||||
energy = player.energy
|
||||
|
||||
for slot in sacrifice_slots:
|
||||
if board[slot] is not None:
|
||||
board[slot] = None
|
||||
energy += 1
|
||||
|
||||
hand = list(player.hand)
|
||||
empty_slots = [i for i, c in enumerate(board) if c is None]
|
||||
en_board = opponent.board
|
||||
|
||||
return [
|
||||
MovePlan(
|
||||
sacrifice_slots=list(sacrifice_slots),
|
||||
plays=list(zip(cards, scoring_slots)),
|
||||
label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
|
||||
)
|
||||
for cards in _affordable_subsets(hand, energy)
|
||||
for scoring_slots in permutations(empty_slots, len(cards))
|
||||
]
|
||||
|
||||
|
||||
def generate_plans(player, opponent) -> list[MovePlan]:
|
||||
plans = []
|
||||
|
||||
# Sacrifice n board cards
|
||||
occupied = [s for s in range(BOARD_SIZE) if player.board[s] is not None]
|
||||
for n in range(len(occupied) + 1):
|
||||
for slots in combinations(occupied, n):
|
||||
plans += _plans_for_sacrifice(player, opponent, list(slots))
|
||||
|
||||
# Idle: do nothing
|
||||
plans.append(MovePlan(sacrifice_slots=[], plays=[], label="idle"))
|
||||
|
||||
return plans
|
||||
|
||||
def score_plans_batch(
|
||||
plans: list[MovePlan],
|
||||
player: PlayerState,
|
||||
opponent: PlayerState,
|
||||
personality: AIPersonality,
|
||||
) -> np.ndarray:
|
||||
n = len(plans)
|
||||
|
||||
# Pre-compute PCV for every hand card once
|
||||
pcv_cache = {
|
||||
id(c): max(0.0, min(1.0, get_power_curve_value(c)))
|
||||
for c in player.hand
|
||||
}
|
||||
|
||||
# Build board-state arrays
|
||||
board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
|
||||
board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
|
||||
n_sac = np.zeros(n, dtype=np.float32)
|
||||
sac_val = np.zeros(n, dtype=np.float32)
|
||||
play_val = np.zeros(n, dtype=np.float32)
|
||||
pcv_score = np.full(n, 0.5, dtype=np.float32)
|
||||
|
||||
for idx, plan in enumerate(plans):
|
||||
board = list(player.board)
|
||||
for slot in plan.sacrifice_slots:
|
||||
board_slot = board[slot]
|
||||
if board_slot is not None:
|
||||
sac_val[idx] += board_slot.cost
|
||||
board[slot] = None
|
||||
n_sac[idx] = len(plan.sacrifice_slots)
|
||||
for card, slot in plan.plays:
|
||||
board[slot] = card
|
||||
play_val[idx] += card.cost
|
||||
for slot in range(BOARD_SIZE):
|
||||
board_slot = board[slot]
|
||||
if board_slot is not None:
|
||||
board_atk[idx, slot] = board_slot.attack
|
||||
board_occ[idx, slot] = True
|
||||
if plan.plays:
|
||||
pcv_vals = [pcv_cache.get(id(c), 0.5) for c, _ in plan.plays]
|
||||
pcv_score[idx] = sum(pcv_vals) / len(pcv_vals)
|
||||
|
||||
# Enemy board — same for every plan
|
||||
en_atk = np.array([c.attack if c else 0 for c in opponent.board], dtype=np.float32)
|
||||
en_def = np.array([c.defense if c else 0 for c in opponent.board], dtype=np.float32)
|
||||
en_occ = np.array([c is not None for c in opponent.board], dtype=np.bool_)
|
||||
enemy_occupied = int(en_occ.sum())
|
||||
|
||||
# --- Metrics (all shape (n,)) ---
|
||||
direct_damage = (board_atk * ~en_occ).sum(axis=1)
|
||||
blocking = board_occ & en_occ # (n, 5)
|
||||
blocking_slots = blocking.sum(axis=1).astype(np.float32)
|
||||
cards_on_board = board_occ.sum(axis=1).astype(np.float32)
|
||||
cards_destroyed = ((board_atk >= en_def) & blocking).sum(axis=1).astype(np.float32)
|
||||
unblocked_in = (en_atk * ~board_occ).sum(axis=1)
|
||||
|
||||
atk_score = np.minimum(1.0, direct_damage / max(opponent.life, 1))
|
||||
block_score = blocking_slots / enemy_occupied if enemy_occupied > 0 else np.ones(n, dtype=np.float32)
|
||||
open_slots = BOARD_SIZE - enemy_occupied
|
||||
cover_score = (
|
||||
(cards_on_board - blocking_slots) / open_slots
|
||||
if open_slots > 0
|
||||
else np.ones(n, dtype=np.float32)
|
||||
)
|
||||
destroy_score = cards_destroyed / enemy_occupied if enemy_occupied > 0 else np.zeros(n, dtype=np.float32)
|
||||
threat_score = 1.0 - np.minimum(1.0, unblocked_in / max(player.life, 1))
|
||||
|
||||
opp_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
|
||||
my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
|
||||
attrition_score = my_cards_left / (my_cards_left + max(opp_cards_left, 1))
|
||||
|
||||
net_value = play_val - sac_val
|
||||
net_value_norm = np.clip((net_value + 10) / 20, 0.0, 1.0)
|
||||
|
||||
# --- Sacrifice penalty ---
|
||||
energy_leftover = player.energy + n_sac - play_val
|
||||
wasted_energy = np.maximum(0, np.minimum(n_sac, energy_leftover))
|
||||
wasted_penalty = np.where(n_sac > 0, wasted_energy / np.maximum(n_sac, 1), 0.0)
|
||||
swap_penalty = np.clip((n_sac - net_value) / np.maximum(n_sac, 1), 0.0, 1.0)
|
||||
sac_penalty = np.where(n_sac > 0, 0.65 * wasted_penalty + 0.35 * swap_penalty, 0.0)
|
||||
|
||||
# --- Personality weights ---
|
||||
if personality == AIPersonality.AGGRESSIVE:
|
||||
score = (0.30 * atk_score + 0.07 * block_score + 0.15 * cover_score +
|
||||
0.08 * net_value_norm + 0.25 * destroy_score +
|
||||
0.08 * attrition_score + 0.04 * pcv_score + 0.03 * threat_score)
|
||||
elif personality == AIPersonality.DEFENSIVE:
|
||||
score = (0.12 * atk_score + 0.20 * block_score + 0.18 * cover_score +
|
||||
0.04 * net_value_norm + 0.18 * destroy_score +
|
||||
0.15 * attrition_score + 0.05 * pcv_score + 0.08 * threat_score)
|
||||
elif personality == AIPersonality.SWARM:
|
||||
score = (0.25 * atk_score + 0.10 * block_score + 0.35 * cover_score +
|
||||
0.05 * net_value_norm + 0.05 * destroy_score +
|
||||
0.10 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
|
||||
elif personality == AIPersonality.GREEDY:
|
||||
score = (0.15 * atk_score + 0.05 * block_score + 0.18 * cover_score +
|
||||
0.38 * net_value_norm + 0.05 * destroy_score +
|
||||
0.09 * attrition_score + 0.05 * pcv_score + 0.05 * threat_score)
|
||||
elif personality == AIPersonality.CONTROL:
|
||||
score = (0.10 * atk_score + 0.05 * block_score + 0.05 * cover_score +
|
||||
0.20 * net_value_norm + 0.05 * destroy_score +
|
||||
0.10 * attrition_score + 0.40 * pcv_score + 0.05 * threat_score)
|
||||
elif personality == AIPersonality.BALANCED:
|
||||
score = (0.12 * atk_score + 0.13 * block_score + 0.15 * cover_score +
|
||||
0.10 * net_value_norm + 0.12 * destroy_score +
|
||||
0.15 * attrition_score + 0.12 * pcv_score + 0.11 * threat_score)
|
||||
else: # ARBITRARY
|
||||
score = (0.50 * np.random.random(n).astype(np.float32) +
|
||||
0.06 * atk_score + 0.06 * block_score + 0.08 * cover_score +
|
||||
0.05 * net_value_norm + 0.06 * destroy_score +
|
||||
0.08 * attrition_score + 0.06 * pcv_score + 0.05 * threat_score)
|
||||
|
||||
# --- Context adjustments ---
|
||||
score = np.where(direct_damage >= opponent.life, np.maximum(score, 0.95), score)
|
||||
score = np.where(unblocked_in >= player.life, np.minimum(score, 0.05), score)
|
||||
|
||||
if opponent.deck_type in ("God Card", "Pantheon"):
|
||||
score = np.minimum(1.0, score + 0.08 * cover_score)
|
||||
if opponent.deck_type in ("Aggro", "Rush"):
|
||||
score = np.minimum(1.0, score + 0.06 * block_score + 0.04 * threat_score)
|
||||
if opponent.deck_type == "Wall":
|
||||
score = np.minimum(1.0, score + 0.06 * atk_score)
|
||||
if opponent.life < STARTING_LIFE * 0.3:
|
||||
score = np.minimum(1.0, score + 0.06 * atk_score)
|
||||
if player.life < STARTING_LIFE * 0.3:
|
||||
score = np.minimum(1.0, score + 0.06 * threat_score + 0.04 * block_score)
|
||||
if opp_cards_left <= 5:
|
||||
score = np.where(cards_on_board > 0, np.minimum(1.0, score + 0.05), score)
|
||||
|
||||
return np.maximum(0.0, score - sac_penalty)
|
||||
|
||||
|
||||
def choose_plan(player: PlayerState, opponent: PlayerState, personality: AIPersonality, difficulty: int) -> MovePlan:
|
||||
plans = generate_plans(player, opponent)
|
||||
|
||||
if personality == AIPersonality.JEBRASKA:
|
||||
from ai.nn import NeuralNet
|
||||
import os
|
||||
_weights = os.path.join(os.path.dirname(__file__), "nn_weights.json")
|
||||
if not hasattr(choose_plan, "_neural_net"):
|
||||
choose_plan._neural_net = NeuralNet.load(_weights) if os.path.exists(_weights) else None
|
||||
net = choose_plan._neural_net
|
||||
if net is not None:
|
||||
from ai.nn import extract_plan_features
|
||||
scores = net.forward(extract_plan_features(plans, player, opponent))
|
||||
else: # fallback to BALANCED if weights not found
|
||||
scores = score_plans_batch(plans, player, opponent, AIPersonality.BALANCED)
|
||||
else:
|
||||
scores = score_plans_batch(plans, player, opponent, personality)
|
||||
|
||||
noise_scale = ((max(0,12 - difficulty)**2) - 4) * 0.008
|
||||
noise = np.random.normal(0, noise_scale, len(scores)).astype(np.float32)
|
||||
return plans[int(np.argmax(scores + noise))]
|
||||
|
||||
async def run_ai_turn(game_id: str):
|
||||
from game.manager import (
|
||||
active_games, connections, active_deck_ids,
|
||||
serialize_state, record_game_result, calculate_combat_animation_time
|
||||
)
|
||||
|
||||
state = active_games.get(game_id)
|
||||
if not state or state.result:
|
||||
return
|
||||
if state.active_player_id != AI_USER_ID:
|
||||
return
|
||||
|
||||
human_id = state.opponent_id(AI_USER_ID)
|
||||
waited = 0
|
||||
while not connections[game_id].get(human_id) and waited < 10:
|
||||
await asyncio.sleep(0.5)
|
||||
waited += 0.5
|
||||
|
||||
await asyncio.sleep(calculate_combat_animation_time(state.last_combat_events))
|
||||
|
||||
player = state.players[AI_USER_ID]
|
||||
opponent = state.players[human_id]
|
||||
difficulty = state.ai_difficulty
|
||||
personality = (
|
||||
AIPersonality(state.ai_personality)
|
||||
if state.ai_personality
|
||||
else AIPersonality.BALANCED
|
||||
)
|
||||
|
||||
ws = connections[game_id].get(human_id)
|
||||
|
||||
async def send_state(s):
|
||||
if ws:
|
||||
try:
|
||||
await ws.send_json({"type": "state", "state": serialize_state(s, human_id)})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
async def send_sacrifice_anim(instance_id):
|
||||
if ws:
|
||||
try:
|
||||
await ws.send_json({"type": "sacrifice_animation", "instance_id": instance_id})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
best_plan = choose_plan(player, opponent, personality, difficulty)
|
||||
|
||||
logger.info(
|
||||
f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} " +
|
||||
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
|
||||
)
|
||||
|
||||
for slot in best_plan.sacrifice_slots:
|
||||
card_slot = player.board[slot]
|
||||
if card_slot is None:
|
||||
continue
|
||||
await send_sacrifice_anim(card_slot.instance_id)
|
||||
await asyncio.sleep(0.65)
|
||||
action_sacrifice(state, slot)
|
||||
await send_state(state)
|
||||
await asyncio.sleep(0.35)
|
||||
|
||||
# Shuffle play order so the AI doesn't always fill slots left-to-right
|
||||
plays = list(best_plan.plays)
|
||||
random.shuffle(plays)
|
||||
|
||||
for card, slot in plays:
|
||||
# Re-look up hand index each time (hand shrinks as cards are played)
|
||||
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
|
||||
if hand_idx is None:
|
||||
continue
|
||||
if player.board[slot] is not None:
|
||||
continue
|
||||
if card.cost > player.energy:
|
||||
continue
|
||||
action_play_card(state, hand_idx, slot)
|
||||
await send_state(state)
|
||||
await asyncio.sleep(0.5)
|
||||
|
||||
action_end_turn(state)
|
||||
await send_state(state)
|
||||
|
||||
if state.result:
|
||||
from core.database import SessionLocal
|
||||
db = SessionLocal()
|
||||
try:
|
||||
record_game_result(state, db)
|
||||
if ws:
|
||||
await ws.send_json({
|
||||
"type": "state",
|
||||
"state": serialize_state(state, human_id),
|
||||
})
|
||||
finally:
|
||||
db.close()
|
||||
active_deck_ids.pop(human_id, None)
|
||||
active_deck_ids.pop(AI_USER_ID, None)
|
||||
active_games.pop(game_id, None)
|
||||
connections.pop(game_id, None)
|
||||
return
|
||||
|
||||
if state.active_player_id == AI_USER_ID:
|
||||
asyncio.create_task(run_ai_turn(game_id))
|
||||
266
backend/ai/nn.py
Normal file
266
backend/ai/nn.py
Normal file
@@ -0,0 +1,266 @@
|
||||
import json
|
||||
|
||||
import numpy as np
|
||||
|
||||
# Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15) | opp_deck_type(8)]
|
||||
N_FEATURES = 64
|
||||
|
||||
_DECK_TYPES = ["Balanced", "Aggro", "Wall", "Rush", "Control", "God Card", "Pantheon", "Unplayable"]
|
||||
_DECK_TYPE_IDX = {dt: i for i, dt in enumerate(_DECK_TYPES)}
|
||||
|
||||
_MAX_ATK = 50.0
|
||||
_MAX_DEF = 100.0
|
||||
_MAX_DECK = 30.0
|
||||
|
||||
|
||||
def _softmax(x: np.ndarray) -> np.ndarray:
|
||||
e = np.exp(x - x.max())
|
||||
return e / e.sum()
|
||||
|
||||
|
||||
class NeuralNet:
|
||||
"""
|
||||
Fully-connected plan scorer: n_features → 64 → 32 → 1
|
||||
Pure numpy so it can be pickled into worker processes.
|
||||
Optimizer: Adam.
|
||||
"""
|
||||
|
||||
def __init__(self, n_features: int = N_FEATURES, hidden: tuple = (64, 32), seed: int | None = None):
|
||||
rng = np.random.RandomState(seed)
|
||||
sizes = [n_features] + list(hidden) + [1]
|
||||
|
||||
self.weights: list[np.ndarray] = []
|
||||
self.biases: list[np.ndarray] = []
|
||||
self.m_w: list[np.ndarray] = []
|
||||
self.v_w: list[np.ndarray] = []
|
||||
self.m_b: list[np.ndarray] = []
|
||||
self.v_b: list[np.ndarray] = []
|
||||
self.t = 0
|
||||
|
||||
for fan_in, fan_out in zip(sizes, sizes[1:]):
|
||||
w = rng.randn(fan_in, fan_out).astype(np.float32) * np.sqrt(2.0 / fan_in)
|
||||
b = np.zeros(fan_out, dtype=np.float32)
|
||||
self.weights.append(w)
|
||||
self.biases.append(b)
|
||||
self.m_w.append(np.zeros_like(w))
|
||||
self.v_w.append(np.zeros_like(w))
|
||||
self.m_b.append(np.zeros_like(b))
|
||||
self.v_b.append(np.zeros_like(b))
|
||||
|
||||
self._acts: list[np.ndarray] = []
|
||||
self._pre_acts: list[np.ndarray] = []
|
||||
|
||||
def forward(self, X: np.ndarray) -> np.ndarray:
|
||||
"""X: (n, n_features) → scores: (n,)"""
|
||||
h = X.astype(np.float32)
|
||||
self._acts = [h]
|
||||
self._pre_acts = []
|
||||
for i, (W, b) in enumerate(zip(self.weights, self.biases)):
|
||||
z = h @ W + b
|
||||
self._pre_acts.append(z)
|
||||
h = np.maximum(0.0, z) if i < len(self.weights) - 1 else z
|
||||
self._acts.append(h)
|
||||
return h.squeeze(-1)
|
||||
|
||||
def backward(self, upstream: np.ndarray) -> tuple[list, list]:
|
||||
"""
|
||||
upstream: (n,) — dJ/d(scores), gradient for ascent.
|
||||
Returns (grads_w, grads_b).
|
||||
"""
|
||||
n = len(upstream)
|
||||
delta = upstream[:, None] # (n, 1)
|
||||
grads_w = [None] * len(self.weights)
|
||||
grads_b = [None] * len(self.biases)
|
||||
for i in range(len(self.weights) - 1, -1, -1):
|
||||
h_in = self._acts[i] # (n, in_size)
|
||||
grads_w[i] = h_in.T @ delta / n
|
||||
grads_b[i] = delta.mean(axis=0)
|
||||
if i > 0:
|
||||
delta = (delta @ self.weights[i].T) * (self._pre_acts[i - 1] > 0)
|
||||
return grads_w, grads_b
|
||||
|
||||
def adam_update(self, grads_w: list, grads_b: list,
|
||||
lr: float = 1e-3, beta1: float = 0.9,
|
||||
beta2: float = 0.999, eps: float = 1e-8,
|
||||
grad_clip: float = 1.0) -> None:
|
||||
# Global gradient norm clipping
|
||||
all_grads = [g for g in grads_w + grads_b if g is not None]
|
||||
global_norm = np.sqrt(sum(np.sum(g * g) for g in all_grads))
|
||||
if global_norm > grad_clip:
|
||||
scale = grad_clip / global_norm
|
||||
grads_w = [g * scale for g in grads_w]
|
||||
grads_b = [g * scale for g in grads_b]
|
||||
|
||||
self.t += 1
|
||||
bc1 = 1 - beta1 ** self.t
|
||||
bc2 = 1 - beta2 ** self.t
|
||||
for i, (gw, gb) in enumerate(zip(grads_w, grads_b)):
|
||||
self.m_w[i] = beta1 * self.m_w[i] + (1 - beta1) * gw
|
||||
self.v_w[i] = beta2 * self.v_w[i] + (1 - beta2) * gw * gw
|
||||
self.weights[i] += lr * (self.m_w[i] / bc1) / (np.sqrt(self.v_w[i] / bc2) + eps)
|
||||
|
||||
self.m_b[i] = beta1 * self.m_b[i] + (1 - beta1) * gb
|
||||
self.v_b[i] = beta2 * self.v_b[i] + (1 - beta2) * gb * gb
|
||||
self.biases[i] += lr * (self.m_b[i] / bc1) / (np.sqrt(self.v_b[i] / bc2) + eps)
|
||||
|
||||
def save(self, path: str) -> None:
|
||||
data = {
|
||||
"weights": [w.tolist() for w in self.weights],
|
||||
"biases": [b.tolist() for b in self.biases],
|
||||
"m_w": [m.tolist() for m in self.m_w],
|
||||
"v_w": [v.tolist() for v in self.v_w],
|
||||
"m_b": [m.tolist() for m in self.m_b],
|
||||
"v_b": [v.tolist() for v in self.v_b],
|
||||
"t": self.t,
|
||||
}
|
||||
with open(path, "w") as f:
|
||||
json.dump(data, f)
|
||||
|
||||
@classmethod
|
||||
def load(cls, path: str) -> "NeuralNet":
|
||||
with open(path) as f:
|
||||
data = json.load(f)
|
||||
net = cls.__new__(cls)
|
||||
net.weights = [np.array(w, dtype=np.float32) for w in data["weights"]]
|
||||
net.biases = [np.array(b, dtype=np.float32) for b in data["biases"]]
|
||||
net.m_w = [np.array(m, dtype=np.float32) for m in data["m_w"]]
|
||||
net.v_w = [np.array(v, dtype=np.float32) for v in data["v_w"]]
|
||||
net.m_b = [np.array(m, dtype=np.float32) for m in data["m_b"]]
|
||||
net.v_b = [np.array(v, dtype=np.float32) for v in data["v_b"]]
|
||||
net.t = data["t"]
|
||||
net._acts = []
|
||||
net._pre_acts = []
|
||||
return net
|
||||
|
||||
|
||||
def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
"""
|
||||
Returns (n_plans, N_FEATURES) float32 array.
|
||||
Layout: [state(8) | my_board(15) | opp_board(15) | plan(3) | result_board(15)]
|
||||
"""
|
||||
from game.rules import BOARD_SIZE, HAND_SIZE, MAX_ENERGY_CAP, STARTING_LIFE
|
||||
|
||||
n = len(plans)
|
||||
|
||||
# state (same for every plan)
|
||||
state = np.array([
|
||||
player.life / STARTING_LIFE,
|
||||
opponent.life / STARTING_LIFE,
|
||||
player.energy / MAX_ENERGY_CAP,
|
||||
player.energy_cap / MAX_ENERGY_CAP,
|
||||
len(player.hand) / HAND_SIZE,
|
||||
len(opponent.hand) / HAND_SIZE,
|
||||
len(player.deck) / _MAX_DECK,
|
||||
len(opponent.deck) / _MAX_DECK,
|
||||
], dtype=np.float32)
|
||||
|
||||
# current boards (same for every plan)
|
||||
my_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
|
||||
opp_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
|
||||
for slot in range(BOARD_SIZE):
|
||||
c = player.board[slot]
|
||||
if c is not None:
|
||||
my_board[slot * 3] = c.attack / _MAX_ATK
|
||||
my_board[slot * 3 + 1] = c.defense / _MAX_DEF
|
||||
my_board[slot * 3 + 2] = 1.0
|
||||
c = opponent.board[slot]
|
||||
if c is not None:
|
||||
opp_board[slot * 3] = c.attack / _MAX_ATK
|
||||
opp_board[slot * 3 + 1] = c.defense / _MAX_DEF
|
||||
opp_board[slot * 3 + 2] = 1.0
|
||||
|
||||
# per-plan features
|
||||
plan_part = np.zeros((n, 3 + BOARD_SIZE * 3), dtype=np.float32)
|
||||
for idx, plan in enumerate(plans):
|
||||
# simulate board result
|
||||
result = list(player.board)
|
||||
for slot in plan.sacrifice_slots:
|
||||
result[slot] = None
|
||||
for card, slot in plan.plays:
|
||||
result[slot] = card
|
||||
|
||||
total_cost = sum(c.cost for c, _ in plan.plays) if plan.plays else 0
|
||||
plan_part[idx, 0] = len(plan.sacrifice_slots) / BOARD_SIZE
|
||||
plan_part[idx, 1] = len(plan.plays) / HAND_SIZE
|
||||
plan_part[idx, 2] = total_cost / (MAX_ENERGY_CAP + BOARD_SIZE)
|
||||
|
||||
for slot in range(BOARD_SIZE):
|
||||
c = result[slot]
|
||||
if c is not None:
|
||||
plan_part[idx, 3 + slot * 3] = c.attack / _MAX_ATK
|
||||
plan_part[idx, 3 + slot * 3 + 1] = c.defense / _MAX_DEF
|
||||
plan_part[idx, 3 + slot * 3 + 2] = 1.0
|
||||
|
||||
# opponent deck type one-hot (same for every plan)
|
||||
opp_deck_oh = np.zeros(len(_DECK_TYPES), dtype=np.float32)
|
||||
opp_deck_oh[_DECK_TYPE_IDX.get(opponent.deck_type, 0)] = 1.0
|
||||
|
||||
state_t = np.tile(state, (n, 1))
|
||||
my_board_t = np.tile(my_board, (n, 1))
|
||||
opp_board_t = np.tile(opp_board, (n, 1))
|
||||
opp_deck_t = np.tile(opp_deck_oh, (n, 1))
|
||||
|
||||
return np.concatenate([state_t, my_board_t, opp_board_t, plan_part, opp_deck_t], axis=1)
|
||||
|
||||
|
||||
class NeuralPlayer:
|
||||
"""
|
||||
Wraps a NeuralNet for use in game simulation.
|
||||
In training mode, samples plans stochastically and records the trajectory
|
||||
for a REINFORCE update after the game ends.
|
||||
In inference mode, picks the highest-scoring plan deterministically.
|
||||
"""
|
||||
|
||||
def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
|
||||
self.net = net
|
||||
self.training = training
|
||||
self.temperature = temperature
|
||||
self.trajectory: list[tuple[np.ndarray, int]] = [] # (features, chosen_idx)
|
||||
|
||||
def choose_plan(self, player, opponent):
|
||||
from ai.engine import generate_plans
|
||||
plans = generate_plans(player, opponent)
|
||||
features = extract_plan_features(plans, player, opponent)
|
||||
scores = self.net.forward(features)
|
||||
|
||||
if self.training:
|
||||
probs = _softmax((scores / self.temperature).astype(np.float64))
|
||||
probs = np.clip(probs, 1e-10, None)
|
||||
probs /= probs.sum()
|
||||
chosen_idx = int(np.random.choice(len(plans), p=probs))
|
||||
self.trajectory.append((features, chosen_idx))
|
||||
else:
|
||||
chosen_idx = int(np.argmax(scores))
|
||||
|
||||
return plans[chosen_idx]
|
||||
|
||||
def compute_grads(self, outcome: float) -> tuple[list, list] | None:
|
||||
"""
|
||||
Computes averaged REINFORCE gradients for this trajectory without updating weights.
|
||||
outcome: centered reward (win/loss minus baseline).
|
||||
Returns (grads_w, grads_b), or None if trajectory is empty.
|
||||
"""
|
||||
if not self.trajectory:
|
||||
return None
|
||||
|
||||
acc_gw = [np.zeros_like(w) for w in self.net.weights]
|
||||
acc_gb = [np.zeros_like(b) for b in self.net.biases]
|
||||
|
||||
for features, chosen_idx in self.trajectory:
|
||||
scores = self.net.forward(features)
|
||||
probs = _softmax(scores.astype(np.float64)).astype(np.float32)
|
||||
upstream = -probs.copy()
|
||||
upstream[chosen_idx] += 1.0
|
||||
upstream *= outcome
|
||||
gw, gb = self.net.backward(upstream)
|
||||
for i in range(len(acc_gw)):
|
||||
acc_gw[i] += gw[i]
|
||||
acc_gb[i] += gb[i]
|
||||
|
||||
n = len(self.trajectory)
|
||||
for i in range(len(acc_gw)):
|
||||
acc_gw[i] /= n
|
||||
acc_gb[i] /= n
|
||||
|
||||
self.trajectory.clear()
|
||||
return acc_gw, acc_gb
|
||||
1
backend/ai/nn_weights.json
Normal file
1
backend/ai/nn_weights.json
Normal file
File diff suppressed because one or more lines are too long
634
backend/ai/simulate.py
Normal file
634
backend/ai/simulate.py
Normal file
@@ -0,0 +1,634 @@
|
||||
import asyncio
|
||||
import json
|
||||
import math
|
||||
import os
|
||||
import random
|
||||
import uuid
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from datetime import datetime
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from game.card import Card, CardType, CardRarity, generate_cards, compute_deck_type
|
||||
from game.rules import (
|
||||
CardInstance, PlayerState, GameState,
|
||||
action_play_card, action_sacrifice, action_end_turn,
|
||||
)
|
||||
from ai.engine import AIPersonality, choose_cards, choose_plan
|
||||
|
||||
SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_cards.json")
|
||||
SIMULATION_CARD_COUNT = 1000
|
||||
|
||||
|
||||
def _card_to_dict(card: Card) -> dict:
|
||||
return {
|
||||
"name": card.name,
|
||||
"generated_at": card.generated_at.isoformat(),
|
||||
"image_link": card.image_link,
|
||||
"card_rarity": card.card_rarity.name,
|
||||
"card_type": card.card_type.name,
|
||||
"wikidata_instance": card.wikidata_instance,
|
||||
"text": card.text,
|
||||
"attack": card.attack,
|
||||
"defense": card.defense,
|
||||
"cost": card.cost,
|
||||
}
|
||||
|
||||
|
||||
def _dict_to_card(d: dict) -> Card:
|
||||
return Card(
|
||||
name=d["name"],
|
||||
generated_at=datetime.fromisoformat(d["generated_at"]),
|
||||
image_link=d["image_link"],
|
||||
card_rarity=CardRarity[d["card_rarity"]],
|
||||
card_type=CardType[d["card_type"]],
|
||||
wikidata_instance=d["wikidata_instance"],
|
||||
text=d["text"],
|
||||
attack=d["attack"],
|
||||
defense=d["defense"],
|
||||
cost=d["cost"],
|
||||
)
|
||||
|
||||
|
||||
def get_simulation_cards() -> list[Card]:
|
||||
if os.path.exists(SIMULATION_CARDS_PATH):
|
||||
with open(SIMULATION_CARDS_PATH, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
return [_dict_to_card(d) for d in data]
|
||||
|
||||
print(f"Generating {SIMULATION_CARD_COUNT} cards (this may take a while)...")
|
||||
cards = generate_cards(SIMULATION_CARD_COUNT)
|
||||
|
||||
with open(SIMULATION_CARDS_PATH, "w", encoding="utf-8") as f:
|
||||
json.dump([_card_to_dict(c) for c in cards], f, ensure_ascii=False, indent=2)
|
||||
|
||||
print(f"Saved {len(cards)} cards to {SIMULATION_CARDS_PATH}")
|
||||
return cards
|
||||
|
||||
|
||||
PLAYER1_ID = "p1"
|
||||
PLAYER2_ID = "p2"
|
||||
MAX_TURNS = 300 # safety cap to prevent infinite games
|
||||
|
||||
|
||||
def _make_instances(deck: list[Card]) -> list[CardInstance]:
|
||||
return [
|
||||
CardInstance(
|
||||
instance_id=str(uuid.uuid4()),
|
||||
card_id=card.name,
|
||||
name=card.name,
|
||||
attack=card.attack,
|
||||
defense=card.defense,
|
||||
max_defense=card.defense,
|
||||
cost=card.cost,
|
||||
card_type=card.card_type.name,
|
||||
card_rarity=card.card_rarity.name,
|
||||
image_link=card.image_link or "",
|
||||
text=card.text or "",
|
||||
)
|
||||
for card in deck
|
||||
]
|
||||
|
||||
|
||||
def simulate_game(
|
||||
cards: list[Card],
|
||||
difficulty1: int,
|
||||
personality1: AIPersonality,
|
||||
difficulty2: int,
|
||||
personality2: AIPersonality,
|
||||
) -> str | None:
|
||||
"""
|
||||
Simulate a single game between two AIs choosing from `cards`.
|
||||
Player 1 always goes first.
|
||||
|
||||
Returns "p1", "p2", or None if the game exceeds MAX_TURNS.
|
||||
"""
|
||||
deck1 = choose_cards(cards, difficulty1, personality1)
|
||||
deck2 = choose_cards(cards, difficulty2, personality2)
|
||||
|
||||
instances1 = _make_instances(deck1)
|
||||
instances2 = _make_instances(deck2)
|
||||
random.shuffle(instances1)
|
||||
random.shuffle(instances2)
|
||||
|
||||
deck_type1 = compute_deck_type(deck1) or "Balanced"
|
||||
deck_type2 = compute_deck_type(deck2) or "Balanced"
|
||||
|
||||
p1 = PlayerState(user_id=PLAYER1_ID, username="AI1", deck_type=deck_type1, deck=instances1)
|
||||
p2 = PlayerState(user_id=PLAYER2_ID, username="AI2", deck_type=deck_type2, deck=instances2)
|
||||
|
||||
# P1 always goes first
|
||||
p1.increment_energy_cap()
|
||||
p2.increment_energy_cap()
|
||||
p1.refill_energy()
|
||||
p1.draw_to_full()
|
||||
|
||||
state = GameState(
|
||||
game_id=str(uuid.uuid4()),
|
||||
players={PLAYER1_ID: p1, PLAYER2_ID: p2},
|
||||
player_order=[PLAYER1_ID, PLAYER2_ID],
|
||||
active_player_id=PLAYER1_ID,
|
||||
phase="main",
|
||||
turn=1,
|
||||
)
|
||||
|
||||
configs = {
|
||||
PLAYER1_ID: (difficulty1, personality1),
|
||||
PLAYER2_ID: (difficulty2, personality2),
|
||||
}
|
||||
|
||||
for _ in range(MAX_TURNS):
|
||||
if state.result:
|
||||
break
|
||||
|
||||
active_id = state.active_player_id
|
||||
difficulty, personality = configs[active_id]
|
||||
player = state.players[active_id]
|
||||
opponent = state.players[state.opponent_id(active_id)]
|
||||
|
||||
plan = choose_plan(player, opponent, personality, difficulty)
|
||||
|
||||
for slot in plan.sacrifice_slots:
|
||||
if player.board[slot] is not None:
|
||||
action_sacrifice(state, slot)
|
||||
|
||||
plays = list(plan.plays)
|
||||
random.shuffle(plays)
|
||||
for card, slot in plays:
|
||||
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
|
||||
if hand_idx is None:
|
||||
continue
|
||||
if player.board[slot] is not None:
|
||||
continue
|
||||
if card.cost > player.energy:
|
||||
continue
|
||||
action_play_card(state, hand_idx, slot)
|
||||
|
||||
action_end_turn(state)
|
||||
|
||||
if state.result and state.result.winner_id:
|
||||
return state.result.winner_id
|
||||
return None
|
||||
|
||||
|
||||
# These must be module-level so they are picklable.
|
||||
|
||||
_worker_cards: list[Card] = []
|
||||
|
||||
def _init_worker(cards: list[Card]) -> None:
|
||||
global _worker_cards
|
||||
_worker_cards = cards
|
||||
|
||||
def _run_game_sync(args: tuple) -> str | None:
|
||||
d1, p1_name, d2, p2_name = args
|
||||
return simulate_game(
|
||||
_worker_cards,
|
||||
d1, AIPersonality(p1_name),
|
||||
d2, AIPersonality(p2_name),
|
||||
)
|
||||
|
||||
|
||||
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
|
||||
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
|
||||
if difficulties is None:
|
||||
difficulties = list(range(1, 11))
|
||||
return [
|
||||
(personality, difficulty)
|
||||
for personality in AIPersonality
|
||||
for difficulty in difficulties
|
||||
]
|
||||
|
||||
|
||||
def _player_label(personality: AIPersonality, difficulty: int) -> str:
|
||||
return f"{personality.value[:3].upper()}-{difficulty}"
|
||||
|
||||
|
||||
async def run_tournament(
|
||||
cards: list[Card],
|
||||
games_per_matchup: int = 5,
|
||||
difficulties: list[int] | None = None,
|
||||
) -> dict[tuple[int, int], int]:
|
||||
"""
|
||||
Pit every (personality, difficulty) pair against every other, as both
|
||||
first and second player.
|
||||
|
||||
`difficulties` selects which difficulty levels to include (default: 1-10).
|
||||
|
||||
Returns a wins dict keyed by (first_player_index, second_player_index)
|
||||
where the value is how many of `games_per_matchup` games the first player won.
|
||||
|
||||
Games run in parallel across all CPU cores via ProcessPoolExecutor.
|
||||
Cards are sent to each worker once at startup, not once per game.
|
||||
"""
|
||||
players = _all_players(difficulties)
|
||||
n = len(players)
|
||||
|
||||
indexed_args: list[tuple[int, int, tuple]] = []
|
||||
for i in range(n):
|
||||
p1_personality, p1_difficulty = players[i]
|
||||
for j in range(n):
|
||||
p2_personality, p2_difficulty = players[j]
|
||||
args = (p1_difficulty, p1_personality.value, p2_difficulty, p2_personality.value)
|
||||
for _ in range(games_per_matchup):
|
||||
indexed_args.append((i, j, args))
|
||||
|
||||
total_games = len(indexed_args)
|
||||
n_workers = os.cpu_count() or 1
|
||||
print(f"Running {total_games} games across {n_workers} workers "
|
||||
f"({n} players, {games_per_matchup} games per ordered pair)...")
|
||||
|
||||
done = [0]
|
||||
report_every = max(1, total_games // 200)
|
||||
|
||||
loop = asyncio.get_running_loop()
|
||||
|
||||
async def tracked(future):
|
||||
result = await future
|
||||
done[0] += 1
|
||||
if done[0] % report_every == 0 or done[0] == total_games:
|
||||
pct = done[0] / total_games * 100
|
||||
print(f" {done[0]}/{total_games} games done ({pct:.1f}%)", end="\r", flush=True)
|
||||
return result
|
||||
|
||||
with ProcessPoolExecutor(
|
||||
max_workers=n_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(cards,),
|
||||
) as executor:
|
||||
futures = [
|
||||
loop.run_in_executor(executor, _run_game_sync, args)
|
||||
for _, _, args in indexed_args
|
||||
]
|
||||
results = await asyncio.gather(*[tracked(f) for f in futures])
|
||||
|
||||
print("\nFinished")
|
||||
|
||||
wins: dict[tuple[int, int], int] = {}
|
||||
ties = 0
|
||||
for (i, j, _), winner in zip(indexed_args, results):
|
||||
key = (i, j)
|
||||
if key not in wins:
|
||||
wins[key] = 0
|
||||
if winner == PLAYER1_ID:
|
||||
wins[key] += 1
|
||||
elif winner is None:
|
||||
ties += 1
|
||||
|
||||
print(f"Ties: {ties}")
|
||||
|
||||
return wins
|
||||
|
||||
|
||||
def _sprt_check(wins: int, total: int, log_win: float, log_loss: float, log_B: float) -> bool:
|
||||
"""
|
||||
Return True when the SPRT has reached a decision for this matchup.
|
||||
|
||||
Tests H0: win_rate = 0.5 vs H1: win_rate = p_decisive (or 1-p_decisive).
|
||||
log_win = log(p_decisive / 0.5)
|
||||
log_loss = log((1 - p_decisive) / 0.5)
|
||||
|
||||
LLR drifts slowly for near-50% matchups and quickly for lopsided ones.
|
||||
Decided when LLR crosses ±log_B.
|
||||
"""
|
||||
llr = wins * log_win + (total - wins) * log_loss
|
||||
return llr >= log_B or llr <= -log_B
|
||||
|
||||
|
||||
async def run_tournament_adaptive(
|
||||
cards: list[Card],
|
||||
difficulties: list[int] | None = None,
|
||||
min_games: int = 5,
|
||||
max_games: int = 200,
|
||||
p_decisive: float = 0.65,
|
||||
alpha: float = 0.05,
|
||||
) -> tuple[dict[tuple[int, int], int], dict[tuple[int, int], int]]:
|
||||
"""
|
||||
Like run_tournament but allocates games adaptively.
|
||||
|
||||
Each ordered pair (i, j) plays until SPRT decides one player is dominant
|
||||
(win rate ≥ p_decisive with confidence 1-alpha) or max_games is reached.
|
||||
Close matchups play more games; lopsided ones stop early.
|
||||
|
||||
Returns (wins, played):
|
||||
wins[(i, j)] — how many games player i won as first player against j
|
||||
played[(i, j)] — how many games were played for that pair
|
||||
|
||||
Each round, all currently-undecided pairs play one game in parallel across
|
||||
all CPU cores, preserving full parallelism while adapting per-pair budgets.
|
||||
"""
|
||||
players = _all_players(difficulties)
|
||||
n = len(players)
|
||||
all_pairs = [(i, j) for i in range(n) for j in range(n)]
|
||||
|
||||
wins: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
|
||||
played: dict[tuple[int, int], int] = {pair: 0 for pair in all_pairs}
|
||||
decided: set[tuple[int, int]] = set()
|
||||
|
||||
# Precompute SPRT constants (H0: p=0.5, H1: p=p_decisive)
|
||||
log_B = math.log((1 - alpha) / alpha)
|
||||
log_win = math.log(p_decisive / 0.5)
|
||||
log_loss = math.log((1 - p_decisive) / 0.5)
|
||||
|
||||
def make_args(i: int, j: int) -> tuple:
|
||||
p1, d1 = players[i]
|
||||
p2, d2 = players[j]
|
||||
return (d1, p1.value, d2, p2.value)
|
||||
|
||||
n_workers = os.cpu_count() or 1
|
||||
loop = asyncio.get_running_loop()
|
||||
total_played = 0
|
||||
max_possible = len(all_pairs) * max_games
|
||||
|
||||
print(
|
||||
f"Adaptive tournament: {n} players, {len(all_pairs)} pairs, "
|
||||
f"SPRT p_decisive={p_decisive} alpha={alpha}, "
|
||||
f"min={min_games} max={max_games} games/pair\n"
|
||||
f"Worst case: {max_possible:,} games across {n_workers} workers"
|
||||
)
|
||||
|
||||
with ProcessPoolExecutor(
|
||||
max_workers=n_workers,
|
||||
initializer=_init_worker,
|
||||
initargs=(cards,),
|
||||
) as executor:
|
||||
round_num = 0
|
||||
while True:
|
||||
pending = [
|
||||
pair for pair in all_pairs
|
||||
if pair not in decided and played[pair] < max_games
|
||||
]
|
||||
if not pending:
|
||||
break
|
||||
|
||||
round_num += 1
|
||||
batch = [(i, j, make_args(i, j)) for (i, j) in pending]
|
||||
futures = [
|
||||
loop.run_in_executor(executor, _run_game_sync, args)
|
||||
for _, _, args in batch
|
||||
]
|
||||
results = await asyncio.gather(*futures)
|
||||
|
||||
newly_decided = 0
|
||||
for (i, j, _), winner in zip(batch, results):
|
||||
played[(i, j)] += 1
|
||||
if winner == PLAYER1_ID:
|
||||
wins[(i, j)] += 1
|
||||
total_played += 1
|
||||
|
||||
if (played[(i, j)] >= min_games
|
||||
and _sprt_check(wins[(i, j)], played[(i, j)], log_win, log_loss, log_B)):
|
||||
decided.add((i, j))
|
||||
newly_decided += 1
|
||||
|
||||
remaining = len(all_pairs) - len(decided)
|
||||
pct = total_played / max_possible * 100
|
||||
print(
|
||||
f" Round {round_num:3d}: {len(pending):5d} games, "
|
||||
f"+{newly_decided:4d} decided, "
|
||||
f"{remaining:5d} pairs left, "
|
||||
f"{total_played:,} total ({pct:.1f}% of worst case)",
|
||||
end="\r", flush=True,
|
||||
)
|
||||
|
||||
savings = max_possible - total_played
|
||||
print(
|
||||
f"\nFinished: {total_played:,} games played "
|
||||
f"(saved {savings:,} vs fixed, "
|
||||
f"{savings / max_possible * 100:.1f}% reduction)"
|
||||
)
|
||||
print(
|
||||
f"Early decisions: {len(decided)}/{len(all_pairs)} pairs "
|
||||
f"({len(decided) / len(all_pairs) * 100:.1f}%)"
|
||||
)
|
||||
|
||||
return wins, played
|
||||
|
||||
|
||||
def compute_bradley_terry(
|
||||
wins: dict[tuple[int, int], int],
|
||||
n: int,
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
iterations: int = 1000,
|
||||
) -> list[float]:
|
||||
"""
|
||||
Compute Bradley-Terry strength parameters for all n players.
|
||||
|
||||
For each pair (i, j): w_ij wins for i, w_ji wins for j.
|
||||
Iteratively updates: strength[i] = sum_j(w_ij) / sum_j((w_ij+w_ji) / (s[i]+s[j]))
|
||||
|
||||
Returns a list of strength values indexed by player. Unlike Elo, this is
|
||||
path-independent and converges to a unique maximum-likelihood solution.
|
||||
"""
|
||||
w: list[list[int]] = [[0] * n for _ in range(n)]
|
||||
for (i, j), p1_wins in wins.items():
|
||||
g = played[(i, j)] if played is not None else games_per_matchup
|
||||
if g:
|
||||
w[i][j] += p1_wins
|
||||
w[j][i] += g - p1_wins
|
||||
|
||||
strength = [1.0] * n
|
||||
for _ in range(iterations):
|
||||
new_strength = [0.0] * n
|
||||
for i in range(n):
|
||||
wins_i = sum(w[i][j] for j in range(n) if j != i)
|
||||
denom = sum(
|
||||
(w[i][j] + w[j][i]) / (strength[i] + strength[j])
|
||||
for j in range(n)
|
||||
if j != i and (w[i][j] + w[j][i]) > 0
|
||||
)
|
||||
new_strength[i] = wins_i / denom if denom > 0 else strength[i]
|
||||
# Normalize so the mean stays at 1.0
|
||||
mean = sum(new_strength) / n
|
||||
strength = [s / mean for s in new_strength]
|
||||
|
||||
return strength
|
||||
|
||||
|
||||
def rank_players(
|
||||
wins: dict[tuple[int, int], int],
|
||||
players: list[tuple[AIPersonality, int]],
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
) -> list[int]:
|
||||
"""
|
||||
Rank player indices by Bradley-Terry strength. Returns indices sorted worst-to-best.
|
||||
|
||||
Provide either `played` (adaptive tournament) or `games_per_matchup` (fixed).
|
||||
"""
|
||||
if played is None and games_per_matchup is None:
|
||||
raise ValueError("Provide either played or games_per_matchup")
|
||||
|
||||
ratings = compute_bradley_terry(wins, len(players), played=played, games_per_matchup=games_per_matchup)
|
||||
return sorted(range(len(players)), key=lambda i: ratings[i])
|
||||
|
||||
|
||||
TOURNAMENT_RESULTS_PATH = os.path.join(os.path.dirname(__file__), "tournament_results.json")
|
||||
|
||||
|
||||
def save_tournament(
|
||||
wins: dict[tuple[int, int], int],
|
||||
players: list[tuple[AIPersonality, int]],
|
||||
path: str = TOURNAMENT_RESULTS_PATH,
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
):
|
||||
data = {
|
||||
"players": [
|
||||
{"personality": p.value, "difficulty": d}
|
||||
for p, d in players
|
||||
],
|
||||
"wins": {f"{i},{j}": w for (i, j), w in wins.items()},
|
||||
}
|
||||
if played is not None:
|
||||
data["played"] = {f"{i},{j}": g for (i, j), g in played.items()}
|
||||
if games_per_matchup is not None:
|
||||
data["games_per_matchup"] = games_per_matchup
|
||||
with open(path, "w", encoding="utf-8") as f:
|
||||
json.dump(data, f, indent=2)
|
||||
print(f"Tournament results saved to {path}")
|
||||
|
||||
|
||||
def load_tournament(
|
||||
path: str = TOURNAMENT_RESULTS_PATH,
|
||||
) -> tuple[
|
||||
dict[tuple[int, int], int],
|
||||
dict[tuple[int, int], int] | None,
|
||||
int | None,
|
||||
list[tuple[AIPersonality, int]],
|
||||
]:
|
||||
"""Returns (wins, played, games_per_matchup, players).
|
||||
|
||||
`played` is None for legacy fixed-game files (use games_per_matchup instead).
|
||||
`games_per_matchup` is None for adaptive files (use played instead).
|
||||
"""
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
def parse_pair_dict(d: dict) -> dict[tuple[int, int], int]:
|
||||
return {(int(k.split(",")[0]), int(k.split(",")[1])): v for k, v in d.items()}
|
||||
|
||||
wins = parse_pair_dict(data["wins"])
|
||||
played = parse_pair_dict(data["played"]) if "played" in data else None
|
||||
games_per_matchup = data.get("games_per_matchup")
|
||||
players = [
|
||||
(AIPersonality(p["personality"]), p["difficulty"])
|
||||
for p in data["players"]
|
||||
]
|
||||
return wins, played, games_per_matchup, players
|
||||
|
||||
|
||||
def draw_grid(
|
||||
wins: dict[tuple[int, int], int],
|
||||
players: list[tuple[AIPersonality, int]] | None = None,
|
||||
output_path: str = "tournament_grid.png",
|
||||
played: dict[tuple[int, int], int] | None = None,
|
||||
games_per_matchup: int | None = None,
|
||||
ranked: list[int] | None = None,
|
||||
):
|
||||
"""
|
||||
Draw a heatmap grid of tournament results.
|
||||
|
||||
Rows = first player
|
||||
Cols = second player
|
||||
Color = red if first player won more of their games in that cell
|
||||
green if second player won more
|
||||
× = one player swept all games in that cell
|
||||
"""
|
||||
import matplotlib
|
||||
matplotlib.use("Agg")
|
||||
import matplotlib.pyplot as plt
|
||||
import matplotlib.colors as mcolors
|
||||
import numpy as np
|
||||
|
||||
if played is None and games_per_matchup is None:
|
||||
raise ValueError("Provide either played or games_per_matchup")
|
||||
|
||||
if players is None:
|
||||
players = _all_players()
|
||||
n = len(players)
|
||||
if ranked is None:
|
||||
ranked = rank_players(wins, players, played=played, games_per_matchup=games_per_matchup)
|
||||
|
||||
labels = [_player_label(*players[i]) for i in ranked]
|
||||
|
||||
def games(i: int, j: int) -> int:
|
||||
return_value = played[(i, j)] if played is not None else games_per_matchup
|
||||
return return_value if return_value is not None else 0
|
||||
|
||||
# Build value matrix: (p1_wins - p2_wins) / total_games ∈ [-1, 1]
|
||||
matrix = np.full((n, n), np.nan)
|
||||
for row, i in enumerate(ranked):
|
||||
for col, j in enumerate(ranked):
|
||||
g = games(i, j)
|
||||
p1_wins = wins.get((i, j), 0)
|
||||
matrix[row, col] = (p1_wins - (g - p1_wins)) / g if g > 0 else 0.0
|
||||
|
||||
cell_size = 0.22
|
||||
fig_size = n * cell_size + 3
|
||||
fig, ax = plt.subplots(figsize=(fig_size, fig_size))
|
||||
|
||||
cmap = mcolors.LinearSegmentedColormap.from_list(
|
||||
"p1_p2", ["#90EE90", "#67A2E0", "#D74E4E"] # pastel green → blue → red
|
||||
)
|
||||
norm = mcolors.Normalize(vmin=-1, vmax=1)
|
||||
|
||||
img = ax.imshow(matrix, cmap=cmap, norm=norm, aspect="equal", interpolation="none")
|
||||
|
||||
# × marks for sweeps
|
||||
for row, i in enumerate(ranked):
|
||||
for col, j in enumerate(ranked):
|
||||
g = games(i, j)
|
||||
p1_wins = wins.get((i, j), 0)
|
||||
if p1_wins == g or p1_wins == 0:
|
||||
ax.text(col, row, "×", ha="center", va="center",
|
||||
fontsize=5, color="black", fontweight="bold", zorder=3)
|
||||
|
||||
ax.set_xticks(range(n))
|
||||
ax.set_yticks(range(n))
|
||||
ax.set_xticklabels(labels, rotation=90, fontsize=4)
|
||||
ax.set_yticklabels(labels, fontsize=4)
|
||||
ax.xaxis.set_label_position("top")
|
||||
ax.xaxis.tick_top()
|
||||
|
||||
ax.set_xlabel("Second player", labelpad=8, fontsize=8)
|
||||
ax.set_ylabel("First player", labelpad=8, fontsize=8)
|
||||
ax.set_title(
|
||||
"Tournament results — red: first player wins more, green: second player wins more",
|
||||
pad=14, fontsize=9,
|
||||
)
|
||||
|
||||
plt.colorbar(img, ax=ax, fraction=0.015, pad=0.01,
|
||||
label="(P1 wins - P2 wins) / games per cell")
|
||||
|
||||
plt.tight_layout()
|
||||
plt.savefig(output_path, dpi=150, bbox_inches="tight")
|
||||
plt.close()
|
||||
print(f"Grid saved to {output_path}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
difficulties = list(range(8, 11))
|
||||
|
||||
card_pool = get_simulation_cards()
|
||||
players = _all_players(difficulties)
|
||||
wins, played = asyncio.run(run_tournament_adaptive(
|
||||
card_pool,
|
||||
difficulties=difficulties,
|
||||
min_games=20,
|
||||
max_games=1000,
|
||||
p_decisive=0.65,
|
||||
alpha=0.05,
|
||||
))
|
||||
save_tournament(wins, players=players, played=played)
|
||||
|
||||
ratings = compute_bradley_terry(wins, len(players), played=played)
|
||||
ranked = sorted(range(len(players)), key=lambda i: ratings[i]) # worst-to-best
|
||||
draw_grid(wins, players=players, played=played, ranked=ranked)
|
||||
|
||||
print("\nFinal Elo ratings (best to worst):")
|
||||
for rank, i in enumerate(reversed(ranked), 1):
|
||||
personality, difficulty = players[i]
|
||||
label = _player_label(personality, difficulty)
|
||||
print(f" {rank:2d}. {label:<12} {ratings[i]:.1f}")
|
||||
278
backend/ai/train_nn.py
Normal file
278
backend/ai/train_nn.py
Normal file
@@ -0,0 +1,278 @@
|
||||
import os
|
||||
import random
|
||||
import uuid
|
||||
from collections import deque
|
||||
|
||||
import numpy as np
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from game.card import compute_deck_type
|
||||
from ai.engine import AIPersonality, choose_cards, choose_plan
|
||||
from game.rules import PlayerState, GameState, action_play_card, action_sacrifice, action_end_turn
|
||||
from ai.simulate import get_simulation_cards, _make_instances, MAX_TURNS
|
||||
from ai.nn import NeuralNet, NeuralPlayer
|
||||
from ai.card_pick_nn import CardPickPlayer, N_CARD_FEATURES, CARD_PICK_WEIGHTS_PATH
|
||||
|
||||
NN_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "nn_weights.json")
|
||||
|
||||
P1 = "p1"
|
||||
P2 = "p2"
|
||||
|
||||
FIXED_PERSONALITIES = [
|
||||
p for p in AIPersonality
|
||||
if p not in (
|
||||
AIPersonality.ARBITRARY,
|
||||
AIPersonality.JEBRASKA
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
def _build_player(pid: str, name: str, cards: list, difficulty: int, personality: AIPersonality,
|
||||
deck_pool: dict | None = None) -> PlayerState:
|
||||
if deck_pool and personality in deck_pool:
|
||||
deck = random.choice(deck_pool[personality])
|
||||
else:
|
||||
deck = choose_cards(cards, difficulty, personality)
|
||||
instances = _make_instances(deck)
|
||||
random.shuffle(instances)
|
||||
p = PlayerState(
|
||||
user_id=pid, username=name,
|
||||
deck_type=compute_deck_type(deck) or "Balanced",
|
||||
deck=instances,
|
||||
)
|
||||
return p
|
||||
|
||||
|
||||
def _build_nn_player(pid: str, name: str, cards: list, difficulty: int,
|
||||
card_pick_player: CardPickPlayer) -> PlayerState:
|
||||
"""Build a PlayerState using the card-pick NN for deck selection."""
|
||||
max_card_cost = difficulty + 1 if difficulty >= 6 else 6
|
||||
allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)
|
||||
deck = card_pick_player.choose_cards(allowed, difficulty)
|
||||
instances = _make_instances(deck)
|
||||
random.shuffle(instances)
|
||||
return PlayerState(
|
||||
user_id=pid, username=name,
|
||||
deck_type=compute_deck_type(deck) or "Balanced",
|
||||
deck=instances,
|
||||
)
|
||||
|
||||
|
||||
def run_episode(
|
||||
p1_state: PlayerState,
|
||||
p2_state: PlayerState,
|
||||
p1_ctrl, # (player, opponent) -> MovePlan
|
||||
p2_ctrl, # (player, opponent) -> MovePlan
|
||||
) -> str | None:
|
||||
"""Returns winner_id (P1 or P2) or None on timeout."""
|
||||
p1_state.increment_energy_cap()
|
||||
p2_state.increment_energy_cap()
|
||||
p1_state.refill_energy()
|
||||
p1_state.draw_to_full()
|
||||
|
||||
state = GameState(
|
||||
game_id=str(uuid.uuid4()),
|
||||
players={P1: p1_state, P2: p2_state},
|
||||
player_order=[P1, P2],
|
||||
active_player_id=P1,
|
||||
phase="main",
|
||||
turn=1,
|
||||
)
|
||||
ctrls = {P1: p1_ctrl, P2: p2_ctrl}
|
||||
|
||||
for _ in range(MAX_TURNS):
|
||||
if state.result:
|
||||
break
|
||||
active_id = state.active_player_id
|
||||
player = state.players[active_id]
|
||||
opponent = state.players[state.opponent_id(active_id)]
|
||||
|
||||
plan = ctrls[active_id](player, opponent)
|
||||
|
||||
for slot in plan.sacrifice_slots:
|
||||
if player.board[slot] is not None:
|
||||
action_sacrifice(state, slot)
|
||||
|
||||
plays = list(plan.plays)
|
||||
random.shuffle(plays)
|
||||
for card, slot in plays:
|
||||
hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
|
||||
if hand_idx is None or player.board[slot] is not None or card.cost > player.energy:
|
||||
continue
|
||||
action_play_card(state, hand_idx, slot)
|
||||
|
||||
action_end_turn(state)
|
||||
|
||||
return state.result.winner_id if state.result else None
|
||||
|
||||
|
||||
def train(
|
||||
n_episodes: int = 50_000,
|
||||
self_play_start: int = 0,
|
||||
self_play_max_frac: float = 0.9,
|
||||
lr: float = 1e-3,
|
||||
opp_difficulty: int = 10,
|
||||
temperature: float = 1.0,
|
||||
batch_size: int = 500,
|
||||
save_every: int = 5_000,
|
||||
save_path: str = NN_WEIGHTS_PATH,
|
||||
) -> NeuralNet:
|
||||
cards = get_simulation_cards()
|
||||
|
||||
# Pre-build a pool of opponent decks per personality to avoid rebuilding from scratch each episode.
|
||||
DECK_POOL_SIZE = 100
|
||||
opp_deck_pool: dict[AIPersonality, list] = {
|
||||
p: [choose_cards(cards, opp_difficulty, p) for _ in range(DECK_POOL_SIZE)]
|
||||
for p in FIXED_PERSONALITIES
|
||||
}
|
||||
|
||||
if os.path.exists(save_path):
|
||||
print(f"Resuming plan net from {save_path}")
|
||||
net = NeuralNet.load(save_path)
|
||||
else:
|
||||
print("Initializing new plan network")
|
||||
net = NeuralNet(seed=42)
|
||||
|
||||
cp_path = CARD_PICK_WEIGHTS_PATH
|
||||
if os.path.exists(cp_path):
|
||||
print(f"Resuming card-pick net from {cp_path}")
|
||||
card_pick_net = NeuralNet.load(cp_path)
|
||||
else:
|
||||
print("Initializing new card-pick network")
|
||||
card_pick_net = NeuralNet(n_features=N_CARD_FEATURES, hidden=(32, 16), seed=43)
|
||||
|
||||
recent_outcomes: deque[int] = deque(maxlen=1000) # rolling window for win rate display
|
||||
baseline = 0.0 # EMA of recent outcomes; subtracted before each update
|
||||
baseline_alpha = 0.99 # decay — roughly a 100-episode window
|
||||
|
||||
batch_gw = [np.zeros_like(w) for w in net.weights]
|
||||
batch_gb = [np.zeros_like(b) for b in net.biases]
|
||||
batch_count = 0
|
||||
|
||||
cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
|
||||
cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
|
||||
cp_batch_count = 0
|
||||
|
||||
for episode in range(1, n_episodes + 1):
|
||||
# Ramp self-play fraction linearly from 0 to self_play_max_frac
|
||||
if episode >= self_play_start:
|
||||
progress = (episode - self_play_start) / max(1, n_episodes - self_play_start)
|
||||
self_play_prob = self_play_max_frac * progress
|
||||
else:
|
||||
self_play_prob = 0.0
|
||||
|
||||
# Randomly decide who goes first (NN is always P1 for simplicity)
|
||||
nn_goes_first = random.random() < 0.5
|
||||
|
||||
if random.random() < self_play_prob:
|
||||
nn1 = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
nn2 = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
cp1 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
|
||||
cp2 = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
|
||||
|
||||
p1_state = _build_nn_player(P1, "NN1", cards, 10, cp1)
|
||||
p2_state = _build_nn_player(P2, "NN2", cards, 10, cp2)
|
||||
|
||||
if not nn_goes_first:
|
||||
p1_state, p2_state = p2_state, p1_state
|
||||
|
||||
winner = run_episode(p1_state, p2_state, nn1.choose_plan, nn2.choose_plan)
|
||||
p1_outcome = 1.0 if winner == P1 else -1.0
|
||||
baseline = baseline_alpha * baseline + (1 - baseline_alpha) * p1_outcome
|
||||
|
||||
for player_grads in [nn1.compute_grads(p1_outcome - baseline),
|
||||
nn2.compute_grads(-p1_outcome - baseline)]:
|
||||
if player_grads is not None:
|
||||
gw, gb = player_grads
|
||||
for i in range(len(batch_gw)):
|
||||
batch_gw[i] += gw[i]
|
||||
batch_gb[i] += gb[i]
|
||||
batch_count += 1
|
||||
|
||||
for cp_grads in [cp1.compute_grads(p1_outcome - baseline),
|
||||
cp2.compute_grads(-p1_outcome - baseline)]:
|
||||
if cp_grads is not None:
|
||||
gw, gb = cp_grads
|
||||
for i in range(len(cp_batch_gw)):
|
||||
cp_batch_gw[i] += gw[i]
|
||||
cp_batch_gb[i] += gb[i]
|
||||
cp_batch_count += 1
|
||||
|
||||
else:
|
||||
opp_personality = random.choice(FIXED_PERSONALITIES)
|
||||
nn_player = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
cp_player = CardPickPlayer(card_pick_net, training=True, temperature=temperature)
|
||||
opp_ctrl = lambda p, o, pers=opp_personality, diff=opp_difficulty: choose_plan(p, o, pers, diff)
|
||||
|
||||
if nn_goes_first:
|
||||
nn_id = P1
|
||||
p1_state = _build_nn_player(P1, "NN", cards, 10, cp_player)
|
||||
p2_state = _build_player(P2, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
|
||||
winner = run_episode(p1_state, p2_state, nn_player.choose_plan, opp_ctrl)
|
||||
else:
|
||||
nn_id = P2
|
||||
p1_state = _build_player(P1, "OPP", cards, opp_difficulty, opp_personality, opp_deck_pool)
|
||||
p2_state = _build_nn_player(P2, "NN", cards, 10, cp_player)
|
||||
winner = run_episode(p1_state, p2_state, opp_ctrl, nn_player.choose_plan)
|
||||
|
||||
nn_outcome = 1.0 if winner == nn_id else -1.0
|
||||
player_grads = nn_player.compute_grads(nn_outcome - baseline)
|
||||
baseline = baseline_alpha * baseline + (1 - baseline_alpha) * nn_outcome
|
||||
|
||||
if player_grads is not None:
|
||||
gw, gb = player_grads
|
||||
for i in range(len(batch_gw)):
|
||||
batch_gw[i] += gw[i]
|
||||
batch_gb[i] += gb[i]
|
||||
batch_count += 1
|
||||
|
||||
cp_grads = cp_player.compute_grads(nn_outcome - baseline)
|
||||
if cp_grads is not None:
|
||||
gw, gb = cp_grads
|
||||
for i in range(len(cp_batch_gw)):
|
||||
cp_batch_gw[i] += gw[i]
|
||||
cp_batch_gb[i] += gb[i]
|
||||
cp_batch_count += 1
|
||||
|
||||
recent_outcomes.append(1 if winner == nn_id else 0)
|
||||
|
||||
if batch_count >= batch_size:
|
||||
for i in range(len(batch_gw)):
|
||||
batch_gw[i] /= batch_count
|
||||
batch_gb[i] /= batch_count
|
||||
net.adam_update(batch_gw, batch_gb, lr=lr)
|
||||
batch_gw = [np.zeros_like(w) for w in net.weights]
|
||||
batch_gb = [np.zeros_like(b) for b in net.biases]
|
||||
batch_count = 0
|
||||
|
||||
if cp_batch_count >= batch_size:
|
||||
for i in range(len(cp_batch_gw)):
|
||||
cp_batch_gw[i] /= cp_batch_count
|
||||
cp_batch_gb[i] /= cp_batch_count
|
||||
card_pick_net.adam_update(cp_batch_gw, cp_batch_gb, lr=lr)
|
||||
cp_batch_gw = [np.zeros_like(w) for w in card_pick_net.weights]
|
||||
cp_batch_gb = [np.zeros_like(b) for b in card_pick_net.biases]
|
||||
cp_batch_count = 0
|
||||
|
||||
if episode % 1000 == 0 or episode == n_episodes:
|
||||
wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
|
||||
print(f"\r[{episode:>6}/{n_episodes}] win rate (last {len(recent_outcomes)}): {wr:.1%} "
|
||||
f"self-play frac: {self_play_prob:.0%}", flush=True)
|
||||
else:
|
||||
print(f" {episode % 1000}/1000", end="\r", flush=True)
|
||||
|
||||
if episode % save_every == 0:
|
||||
net.save(save_path)
|
||||
card_pick_net.save(cp_path)
|
||||
print(f" → saved to {save_path} and {cp_path}")
|
||||
|
||||
net.save(save_path)
|
||||
card_pick_net.save(cp_path)
|
||||
wr = sum(recent_outcomes) / len(recent_outcomes) if recent_outcomes else 0.0
|
||||
print(f"Done. Final win rate (last {len(recent_outcomes)}): {wr:.1%}")
|
||||
return net
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
train()
|
||||
Reference in New Issue
Block a user