🐐
This commit is contained in:
@@ -13,19 +13,17 @@ logger = logging.getLogger("app")
|
||||
AI_USER_ID = "ai"
|
||||
|
||||
class AIPersonality(Enum):
|
||||
AGGRESSIVE = "aggressive" # Prefers high attack cards, plays aggressively
|
||||
DEFENSIVE = "defensive" # Prefers high defense cards, plays conservatively
|
||||
BALANCED = "balanced" # Mix of offense and defense
|
||||
GREEDY = "greedy" # Prioritizes high cost cards, willing to sacrifice
|
||||
SWARM = "swarm" # Prefers low cost cards, fills board quickly
|
||||
CONTROL = "control" # Focuses on board control and efficiency
|
||||
ARBITRARY = "arbitrary" # Just does whatever
|
||||
JEBRASKA = "jebraska" # Trained neural network plan scorer
|
||||
AGGRESSIVE = "aggressive"
|
||||
DEFENSIVE = "defensive"
|
||||
BALANCED = "balanced"
|
||||
GREEDY = "greedy" # prioritizes high cost cards, willing to sacrifice
|
||||
SWARM = "swarm"
|
||||
CONTROL = "control"
|
||||
ARBITRARY = "arbitrary"
|
||||
JEBRASKA = "jebraska" # trained neural network plan scorer
|
||||
|
||||
def get_random_personality() -> AIPersonality:
|
||||
"""Returns a random AI personality."""
|
||||
# return random.choice(list(AIPersonality))
|
||||
return AIPersonality.JEBRASKA
|
||||
return random.choice(list(AIPersonality))
|
||||
|
||||
def calculate_exact_cost(attack: int, defense: int) -> float:
|
||||
"""Calculate the exact cost before rounding (matches card.py formula)."""
|
||||
@@ -130,8 +128,6 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
|
||||
return selected
|
||||
|
||||
|
||||
# ==================== Turn planning ====================
|
||||
|
||||
@dataclass
|
||||
class MovePlan:
|
||||
sacrifice_slots: list[int]
|
||||
@@ -175,7 +171,6 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):
|
||||
|
||||
|
||||
def generate_plans(player, opponent) -> list[MovePlan]:
|
||||
"""Generate diverse candidate move plans covering a range of strategies."""
|
||||
plans = []
|
||||
|
||||
# Sacrifice n board cards
|
||||
@@ -189,8 +184,6 @@ def generate_plans(player, opponent) -> list[MovePlan]:
|
||||
|
||||
return plans
|
||||
|
||||
# ==================== Turn execution ====================
|
||||
|
||||
def score_plans_batch(
|
||||
plans: list[MovePlan],
|
||||
player: PlayerState,
|
||||
@@ -205,7 +198,7 @@ def score_plans_batch(
|
||||
for c in player.hand
|
||||
}
|
||||
|
||||
# Build board-state arrays with one Python loop (unavoidable)
|
||||
# Build board-state arrays
|
||||
board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
|
||||
board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
|
||||
n_sac = np.zeros(n, dtype=np.float32)
|
||||
@@ -390,7 +383,6 @@ async def run_ai_turn(game_id: str):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- Generate and score candidate plans ---
|
||||
best_plan = choose_plan(player, opponent, personality, difficulty)
|
||||
|
||||
logger.info(
|
||||
@@ -398,7 +390,6 @@ async def run_ai_turn(game_id: str):
|
||||
f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
|
||||
)
|
||||
|
||||
# --- Execute sacrifices ---
|
||||
for slot in best_plan.sacrifice_slots:
|
||||
card_slot = player.board[slot]
|
||||
if card_slot is None:
|
||||
@@ -409,7 +400,6 @@ async def run_ai_turn(game_id: str):
|
||||
await send_state(state)
|
||||
await asyncio.sleep(0.35)
|
||||
|
||||
# --- Execute plays ---
|
||||
# Shuffle play order so the AI doesn't always fill slots left-to-right
|
||||
plays = list(best_plan.plays)
|
||||
random.shuffle(plays)
|
||||
|
||||
@@ -132,8 +132,6 @@ class NeuralNet:
|
||||
return net
|
||||
|
||||
|
||||
# ==================== Feature extraction ====================
|
||||
|
||||
def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
"""
|
||||
Returns (n_plans, N_FEATURES) float32 array.
|
||||
@@ -143,7 +141,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
|
||||
n = len(plans)
|
||||
|
||||
# ---- state (same for every plan) ----
|
||||
# state (same for every plan)
|
||||
state = np.array([
|
||||
player.life / STARTING_LIFE,
|
||||
opponent.life / STARTING_LIFE,
|
||||
@@ -155,7 +153,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
len(opponent.deck) / _MAX_DECK,
|
||||
], dtype=np.float32)
|
||||
|
||||
# ---- current boards (same for every plan) ----
|
||||
# current boards (same for every plan)
|
||||
my_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
|
||||
opp_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
|
||||
for slot in range(BOARD_SIZE):
|
||||
@@ -170,7 +168,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
opp_board[slot * 3 + 1] = c.defense / _MAX_DEF
|
||||
opp_board[slot * 3 + 2] = 1.0
|
||||
|
||||
# ---- per-plan features ----
|
||||
# per-plan features
|
||||
plan_part = np.zeros((n, 3 + BOARD_SIZE * 3), dtype=np.float32)
|
||||
for idx, plan in enumerate(plans):
|
||||
# simulate board result
|
||||
@@ -192,7 +190,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
plan_part[idx, 3 + slot * 3 + 1] = c.defense / _MAX_DEF
|
||||
plan_part[idx, 3 + slot * 3 + 2] = 1.0
|
||||
|
||||
# ---- opponent deck type one-hot (same for every plan) ----
|
||||
# opponent deck type one-hot (same for every plan)
|
||||
opp_deck_oh = np.zeros(len(_DECK_TYPES), dtype=np.float32)
|
||||
opp_deck_oh[_DECK_TYPE_IDX.get(opponent.deck_type, 0)] = 1.0
|
||||
|
||||
@@ -204,8 +202,6 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
|
||||
return np.concatenate([state_t, my_board_t, opp_board_t, plan_part, opp_deck_t], axis=1)
|
||||
|
||||
|
||||
# ==================== Neural player ====================
|
||||
|
||||
class NeuralPlayer:
|
||||
"""
|
||||
Wraps a NeuralNet for use in game simulation.
|
||||
|
||||
@@ -21,8 +21,6 @@ SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_card
|
||||
SIMULATION_CARD_COUNT = 1000
|
||||
|
||||
|
||||
# ==================== Card pool ====================
|
||||
|
||||
def _card_to_dict(card: Card) -> dict:
|
||||
return {
|
||||
"name": card.name,
|
||||
@@ -69,8 +67,6 @@ def get_simulation_cards() -> list[Card]:
|
||||
return cards
|
||||
|
||||
|
||||
# ==================== Single game ====================
|
||||
|
||||
PLAYER1_ID = "p1"
|
||||
PLAYER2_ID = "p2"
|
||||
MAX_TURNS = 300 # safety cap to prevent infinite games
|
||||
@@ -176,7 +172,6 @@ def simulate_game(
|
||||
return None
|
||||
|
||||
|
||||
# ==================== Process-pool worker ====================
|
||||
# These must be module-level so they are picklable.
|
||||
|
||||
_worker_cards: list[Card] = []
|
||||
@@ -186,7 +181,6 @@ def _init_worker(cards: list[Card]) -> None:
|
||||
_worker_cards = cards
|
||||
|
||||
def _run_game_sync(args: tuple) -> str | None:
|
||||
"""Synchronous entry point for a worker process."""
|
||||
d1, p1_name, d2, p2_name = args
|
||||
return simulate_game(
|
||||
_worker_cards,
|
||||
@@ -195,8 +189,6 @@ def _run_game_sync(args: tuple) -> str | None:
|
||||
)
|
||||
|
||||
|
||||
# ==================== Tournament ====================
|
||||
|
||||
def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
|
||||
"""Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
|
||||
if difficulties is None:
|
||||
@@ -232,7 +224,6 @@ async def run_tournament(
|
||||
players = _all_players(difficulties)
|
||||
n = len(players)
|
||||
|
||||
# Build the flat list of (i, j, args) for every game
|
||||
indexed_args: list[tuple[int, int, tuple]] = []
|
||||
for i in range(n):
|
||||
p1_personality, p1_difficulty = players[i]
|
||||
|
||||
@@ -20,8 +20,6 @@ P2 = "p2"
|
||||
FIXED_PERSONALITIES = [p for p in AIPersonality if p != AIPersonality.ARBITRARY]
|
||||
|
||||
|
||||
# ==================== Game runner ====================
|
||||
|
||||
def _build_player(pid: str, name: str, cards: list, difficulty: int, personality: AIPersonality) -> PlayerState:
|
||||
deck = choose_cards(cards, difficulty, personality)
|
||||
instances = _make_instances(deck)
|
||||
@@ -82,8 +80,6 @@ def run_episode(
|
||||
return state.result.winner_id if state.result else None
|
||||
|
||||
|
||||
# ==================== Training loop ====================
|
||||
|
||||
def train(
|
||||
n_episodes: int = 20_000,
|
||||
self_play_start: int = 5_000,
|
||||
@@ -124,7 +120,6 @@ def train(
|
||||
nn_goes_first = random.random() < 0.5
|
||||
|
||||
if random.random() < self_play_prob:
|
||||
# ---- Self-play ----
|
||||
nn1 = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
nn2 = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
|
||||
@@ -148,7 +143,6 @@ def train(
|
||||
batch_count += 1
|
||||
|
||||
else:
|
||||
# ---- NN vs fixed opponent ----
|
||||
opp_personality = random.choice(FIXED_PERSONALITIES)
|
||||
nn_player = NeuralPlayer(net, training=True, temperature=temperature)
|
||||
opp_ctrl = lambda p, o, pers=opp_personality, diff=opp_difficulty: choose_plan(p, o, pers, diff)
|
||||
|
||||
Reference in New Issue
Block a user