🐐

2026-03-26 08:16:54 +01:00
parent ef4496aa5d
commit ec7dea2d72
5 changed files with 18 additions and 46 deletions
@@ -13,19 +13,17 @@ logger = logging.getLogger("app")
 AI_USER_ID = "ai"

 class AIPersonality(Enum):
-  AGGRESSIVE = "aggressive"   # Prefers high attack cards, plays aggressively
-  DEFENSIVE = "defensive"     # Prefers high defense cards, plays conservatively
-  BALANCED = "balanced"       # Mix of offense and defense
-  GREEDY = "greedy"           # Prioritizes high cost cards, willing to sacrifice
-  SWARM = "swarm"             # Prefers low cost cards, fills board quickly
-  CONTROL = "control"         # Focuses on board control and efficiency
-  ARBITRARY = "arbitrary"     # Just does whatever
-  JEBRASKA  = "jebraska"      # Trained neural network plan scorer
+  AGGRESSIVE = "aggressive"
+  DEFENSIVE = "defensive"
+  BALANCED = "balanced"
+  GREEDY = "greedy"           # prioritizes high cost cards, willing to sacrifice
+  SWARM = "swarm"
+  CONTROL = "control"
+  ARBITRARY = "arbitrary"
+  JEBRASKA  = "jebraska"      # trained neural network plan scorer

 def get_random_personality() -> AIPersonality:
-  """Returns a random AI personality."""
-  # return random.choice(list(AIPersonality))
-  return AIPersonality.JEBRASKA
+  return random.choice(list(AIPersonality))

 def calculate_exact_cost(attack: int, defense: int) -> float:
  """Calculate the exact cost before rounding (matches card.py formula)."""
@@ -130,8 +128,6 @@ def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality)
  return selected


-# ==================== Turn planning ====================
-
@dataclass
 class MovePlan:
  sacrifice_slots: list[int]
@@ -175,7 +171,6 @@ def _plans_for_sacrifice(player, opponent, sacrifice_slots):


 def generate_plans(player, opponent) -> list[MovePlan]:
-  """Generate diverse candidate move plans covering a range of strategies."""
  plans = []

  # Sacrifice n board cards
@@ -189,8 +184,6 @@ def generate_plans(player, opponent) -> list[MovePlan]:

  return plans

-# ==================== Turn execution ====================
-
 def score_plans_batch(
  plans: list[MovePlan],
  player: PlayerState,
@@ -205,7 +198,7 @@ def score_plans_batch(
    for c in player.hand
  }

-  # Build board-state arrays with one Python loop (unavoidable)
+  # Build board-state arrays
  board_atk = np.zeros((n, BOARD_SIZE), dtype=np.float32)
  board_occ = np.zeros((n, BOARD_SIZE), dtype=np.bool_)
  n_sac     = np.zeros(n, dtype=np.float32)
@@ -390,7 +383,6 @@ async def run_ai_turn(game_id: str):
      except Exception:
        pass

-  # --- Generate and score candidate plans ---
  best_plan = choose_plan(player, opponent, personality, difficulty)

  logger.info(
@@ -398,7 +390,6 @@ async def run_ai_turn(game_id: str):
    f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
  )

-  # --- Execute sacrifices ---
  for slot in best_plan.sacrifice_slots:
    card_slot = player.board[slot]
    if card_slot is None:
@@ -409,7 +400,6 @@ async def run_ai_turn(game_id: str):
    await send_state(state)
    await asyncio.sleep(0.35)

-  # --- Execute plays ---
  # Shuffle play order so the AI doesn't always fill slots left-to-right
  plays = list(best_plan.plays)
  random.shuffle(plays)
@@ -132,8 +132,6 @@ class NeuralNet:
    return net


-# ==================== Feature extraction ====================
-
 def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
  """
  Returns (n_plans, N_FEATURES) float32 array.
@@ -143,7 +141,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:

  n = len(plans)

-  # ---- state (same for every plan) ----
+  # state (same for every plan)
  state = np.array([
    player.life    / STARTING_LIFE,
    opponent.life    / STARTING_LIFE,
@@ -155,7 +153,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
    len(opponent.deck) / _MAX_DECK,
  ], dtype=np.float32)

-  # ---- current boards (same for every plan) ----
+  # current boards (same for every plan)
  my_board  = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
  opp_board = np.zeros(BOARD_SIZE * 3, dtype=np.float32)
  for slot in range(BOARD_SIZE):
@@ -170,7 +168,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
      opp_board[slot * 3 + 1] = c.defense / _MAX_DEF
      opp_board[slot * 3 + 2] = 1.0

-  # ---- per-plan features ----
+  # per-plan features
  plan_part = np.zeros((n, 3 + BOARD_SIZE * 3), dtype=np.float32)
  for idx, plan in enumerate(plans):
    # simulate board result
@@ -192,7 +190,7 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
        plan_part[idx, 3 + slot * 3 + 1] = c.defense / _MAX_DEF
        plan_part[idx, 3 + slot * 3 + 2] = 1.0

-  # ---- opponent deck type one-hot (same for every plan) ----
+  # opponent deck type one-hot (same for every plan)
  opp_deck_oh = np.zeros(len(_DECK_TYPES), dtype=np.float32)
  opp_deck_oh[_DECK_TYPE_IDX.get(opponent.deck_type, 0)] = 1.0

@@ -204,8 +202,6 @@ def extract_plan_features(plans: list, player, opponent) -> np.ndarray:
  return np.concatenate([state_t, my_board_t, opp_board_t, plan_part, opp_deck_t], axis=1)


-# ==================== Neural player ====================
-
 class NeuralPlayer:
  """
  Wraps a NeuralNet for use in game simulation.
@@ -21,8 +21,6 @@ SIMULATION_CARDS_PATH = os.path.join(os.path.dirname(__file__), "simulation_card
 SIMULATION_CARD_COUNT = 1000


-# ==================== Card pool ====================
-
 def _card_to_dict(card: Card) -> dict:
  return {
    "name": card.name,
@@ -69,8 +67,6 @@ def get_simulation_cards() -> list[Card]:
  return cards


-# ==================== Single game ====================
-
 PLAYER1_ID = "p1"
 PLAYER2_ID = "p2"
 MAX_TURNS = 300  # safety cap to prevent infinite games
@@ -176,7 +172,6 @@ def simulate_game(
  return None


-# ==================== Process-pool worker ====================
 # These must be module-level so they are picklable.

 _worker_cards: list[Card] = []
@@ -186,7 +181,6 @@ def _init_worker(cards: list[Card]) -> None:
  _worker_cards = cards

 def _run_game_sync(args: tuple) -> str | None:
-  """Synchronous entry point for a worker process."""
  d1, p1_name, d2, p2_name = args
  return simulate_game(
    _worker_cards,
@@ -195,8 +189,6 @@ def _run_game_sync(args: tuple) -> str | None:
  )


-# ==================== Tournament ====================
-
 def _all_players(difficulties: list[int] | None = None) -> list[tuple[AIPersonality, int]]:
  """Return all (personality, difficulty) combinations for the given difficulties (default 1-10)."""
  if difficulties is None:
@@ -232,7 +224,6 @@ async def run_tournament(
  players = _all_players(difficulties)
  n = len(players)

-  # Build the flat list of (i, j, args) for every game
  indexed_args: list[tuple[int, int, tuple]] = []
  for i in range(n):
    p1_personality, p1_difficulty = players[i]
@@ -20,8 +20,6 @@ P2 = "p2"
 FIXED_PERSONALITIES = [p for p in AIPersonality if p != AIPersonality.ARBITRARY]


-# ==================== Game runner ====================
-
 def _build_player(pid: str, name: str, cards: list, difficulty: int, personality: AIPersonality) -> PlayerState:
  deck = choose_cards(cards, difficulty, personality)
  instances = _make_instances(deck)
@@ -82,8 +80,6 @@ def run_episode(
  return state.result.winner_id if state.result else None


-# ==================== Training loop ====================
-
 def train(
  n_episodes: int = 20_000,
  self_play_start: int = 5_000,
@@ -124,7 +120,6 @@ def train(
    nn_goes_first = random.random() < 0.5

    if random.random() < self_play_prob:
-      # ---- Self-play ----
      nn1 = NeuralPlayer(net, training=True, temperature=temperature)
      nn2 = NeuralPlayer(net, training=True, temperature=temperature)

@@ -148,7 +143,6 @@ def train(
          batch_count += 1

    else:
-      # ---- NN vs fixed opponent ----
      opp_personality = random.choice(FIXED_PERSONALITIES)
      nn_player = NeuralPlayer(net, training=True, temperature=temperature)
      opp_ctrl  = lambda p, o, pers=opp_personality, diff=opp_difficulty: choose_plan(p, o, pers, diff)