🐐

2026-03-19 22:34:02 +01:00
parent d1a39620a7
commit fa05447895
18 changed files with 796 additions and 369 deletions
--- a/backend/ai.py
+++ b/backend/ai.py
@@ -1,8 +1,13 @@
 import asyncio
 import random
+import logging
+from dataclasses import dataclass
 from enum import Enum
+from itertools import combinations
 from card import Card
-from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE
+from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE
+
+logger = logging.getLogger("app")

 AI_USER_ID = "ai"

@@ -21,244 +26,418 @@ def get_random_personality() -> AIPersonality:

 def calculate_exact_cost(attack: int, defense: int) -> float:
  """Calculate the exact cost before rounding (matches card.py formula)."""
-  return min(12.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))
+  return min(11.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))

-def get_power_curve_value(card: Card) -> float:
+def get_power_curve_value(card) -> float:
  """
-  Returns how much "above the power curve" a card is.
-  Positive values mean the card is better than expected for its cost.
+  Returns how much above the power curve a card is.
+  Positive values mean the card is a better-than-expected deal for its cost.
  """
  exact_cost = calculate_exact_cost(card.attack, card.defense)
  return exact_cost - card.cost

-def get_card_efficiency(card: Card) -> float:
-  """
-  Returns the total stats per cost ratio.
-  Higher is better (more stats for the cost).
-  """
-  if card.cost == 0:
-    return 0
-  return (card.attack + card.defense) / card.cost
-
-def score_card_for_personality(card: Card, personality: AIPersonality) -> float:
-  """
-  Score a card based on how well it fits the AI personality.
-  Higher scores are better fits.
-  """
-  if personality == AIPersonality.AGGRESSIVE:
-    # Prefer high attack, attack > defense
-    attack_bias = card.attack * 1.5
-    return attack_bias + (card.attack - card.defense) * 0.5
-
-  elif personality == AIPersonality.DEFENSIVE:
-    # Prefer high defense, defense > attack
-    defense_bias = card.defense * 1.5
-    return defense_bias + (card.defense - card.attack) * 0.5
-
-  elif personality == AIPersonality.BALANCED:
-    # Prefer balanced stats
-    stat_diff = abs(card.attack - card.defense)
-    balance_score = (card.attack + card.defense) - stat_diff * 0.3
-    return balance_score
-
-  elif personality == AIPersonality.GREEDY:
-    # Prefer high cost cards
-    return card.cost * 2 + (card.attack + card.defense) * 0.5
-
-  elif personality == AIPersonality.SWARM:
-    # Prefer low cost cards
-    low_cost_bonus = (13 - card.cost) * 1.5
-    return low_cost_bonus + (card.attack + card.defense) * 0.3
-
-  elif personality == AIPersonality.CONTROL:
-    # Prefer efficient cards (good stats per cost)
-    efficiency = get_card_efficiency(card)
-    total_stats = card.attack + card.defense
-    return efficiency * 5 + total_stats * 0.2
-
-  elif personality == AIPersonality.ARBITRARY:
-    # Does whatever
-    return random.random()*100
-
-  return card.attack + card.defense
-
-def energy_curve(difficulty: int, personality: AIPersonality) -> tuple[int, int, int]:
-  """Calculate a desired energy curve based on difficulty, personality, and a random factor"""
-
-  # First: cards with cost 1-3
-  # Second: cards with cost 4-6
-  # Third is inferred, and is cards with cost 7+
-  diff_low, diff_mid = [
-    (12, 8), # 1
-    (11, 9), # 2
-    (10, 9), # 3
-    ( 9,10), # 4
-    ( 9, 9), # 5
-    ( 9, 8), # 6
-    ( 8, 9), # 7
-    ( 7,10), # 8
-    ( 7, 9), # 9
-    ( 6, 9), # 10
-  ][difficulty - 1]
-
-  r1 = random.randint(0,20)
-  r2 = random.randint(0,20-r1)
-  pers_low, pers_mid = {
-    AIPersonality.AGGRESSIVE: ( 8,10),
-    AIPersonality.ARBITRARY:  (r1,r2),
-    AIPersonality.BALANCED:   ( 7,10),
-    AIPersonality.CONTROL:    ( 3, 8),
-    AIPersonality.DEFENSIVE:  ( 6, 8),
-    AIPersonality.GREEDY:     ( 3, 7),
-    AIPersonality.SWARM:      (15, 3),
-  }[personality]
-
-  # Blend difficulty (70%) and personality (30%) curves
-  blended_low = diff_low * 0.7 + pers_low * 0.3
-  blended_mid = diff_mid * 0.7 + pers_mid * 0.3
-
-  # Add small random variance (±1)
-  low = int(blended_low + random.uniform(-1, 1))
-  mid = int(blended_mid + random.uniform(-1, 1))
-
-  # Ensure low + mid doesn't exceed 20
-  if low + mid > 20:
-    # Scale down proportionally
-    total = low + mid
-    low = int((low / total) * 20)
-    mid = 20 - low
-    high = 0
-  else:
-    high = 20 - low - mid
-
-  # Apply difficulty constraints
-  if difficulty == 1:
-    # Difficulty 1: absolutely no high-cost cards
-    if high > 0:
-      # Redistribute high cards to low and mid
-      low += high // 2
-      mid += high - (high // 2)
-      high = 0
-
-  # Final bounds checking
-  low = max(0, min(20, low))
-  mid = max(0, min(20 - low, mid))
-  high = max(0, 20 - low - mid)
-
-  return (low, mid, high)

 def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
-  """
-  Choose 20 cards from available cards based on difficulty and personality.
+  BUDGET = 50

-  Difficulty (1-10) affects:
-  - Higher difficulty = prefers cards above the power curve
-  - Lower difficulty = prefers low-cost cards for early game playability
-  - Lower difficulty = avoids taking the ridiculously good high-cost cards
+  logger.info(f"Personality: {personality.value}")
+  logger.info(f"Difficulty: {difficulty}")
+  card_strings = [
+    f"{c.name} {c.cost}"
+    for c in sorted(cards, key=lambda x: x.cost)[::-1][:20]
+  ]
+  logger.info("Cards:\n"+("\n".join(card_strings)))

-  Personality affects which types of cards are preferred.
-  """
-  if len(cards) < 20:
-    return cards
+  # God cards (cost 7-11) are gated by difficulty. Below difficulty 7 they are excluded.
+  # Each level from 7 upward unlocks a higher cost tier; at difficulty 10 all are allowed.
+  if difficulty >= 6:
+    max_card_cost = difficulty+1
+  else:
+    max_card_cost = 6

-  # Get target energy curve based on difficulty and personality
-  target_low, target_mid, target_high = energy_curve(difficulty, personality)
+  allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)

-  selected = []
-  remaining = list(cards)
+  def card_score(card: Card) -> float:
+    pcv = get_power_curve_value(card)
+    # Normalize pcv to [0, 1].
+    pcv_norm = max(0.0, min(1.0, pcv))

-  # Fill each cost bracket by distributing across individual cost levels
-  for cost_min, cost_max, target_count in [(1, 3, target_low), (4, 6, target_mid), (7, 12, target_high)]:
-    if target_count == 0:
+    cost_norm = card.cost / max_card_cost   # [0, 1]; higher = more expensive
+    total = card.attack + card.defense
+    atk_ratio = card.attack / total if total else 0.5
+
+    if personality == AIPersonality.AGGRESSIVE:
+      # Prefers high-attack cards; slight bias toward high cost for raw power
+      return 0.50 * atk_ratio + 0.30 * pcv_norm + 0.20 * cost_norm
+
+    if personality == AIPersonality.DEFENSIVE:
+      # Prefers high-defense cards; same cost bias
+      return 0.50 * (1.0 - atk_ratio) + 0.30 * pcv_norm + 0.20 * cost_norm
+
+    if personality == AIPersonality.GREEDY:
+      # Fills budget with the fewest, most expensive cards possible
+      return 0.70 * cost_norm + 0.30 * pcv_norm
+
+    if personality == AIPersonality.SWARM:
+      # Cheap cards
+      return 0.45 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm
+
+    if personality == AIPersonality.CONTROL:
+      # Values efficiency above all: wants cards that are above the power curve,
+      # with a secondary preference for higher cost
+      return 0.70 * pcv_norm + 0.30 * cost_norm
+
+    if personality == AIPersonality.BALANCED:
+      # Blends everything: efficiency, cost spread, and a slight attack lean
+      return 0.40 * pcv_norm + 0.35 * cost_norm + 0.15 * atk_ratio + 0.10 * (1.0 - atk_ratio)
+
+    # ARBITRARY: mostly random at lower difficulties
+    return (0.05 * difficulty) * pcv_norm + (1 - (0.05 * difficulty)) * random.random()
+
+  # Higher difficulty -> less noise -> more optimal deck composition
+  noise = ((10 - difficulty) / 9.0) * 0.50
+
+  scored = sorted(
+    [(card_score(c) + random.gauss(0, noise), c) for c in allowed],
+    key=lambda x: x[0],
+    reverse=True,
+  )
+
+  # Minimum budget reserved for cheap (cost 1-3) cards to ensure early-game presence.
+  # Without cheap cards the AI will play nothing for the first several turns.
+  early_budget = {
+    AIPersonality.GREEDY:      4,
+    AIPersonality.SWARM:      12,
+    AIPersonality.AGGRESSIVE:  8,
+    AIPersonality.DEFENSIVE:  10,
+    AIPersonality.CONTROL:     8,
+    AIPersonality.BALANCED:   10,
+    AIPersonality.ARBITRARY:   8,
+  }[personality]
+
+  selected: list[Card] = []
+  total_cost = 0
+
+  # First pass: secure early-game cards
+  cheap_spent = 0
+  for _, card in scored:
+    if cheap_spent >= early_budget:
+      break
+    if card.cost > 3 or total_cost + card.cost > BUDGET:
      continue
-
-    bracket_cards = [c for c in remaining if cost_min <= c.cost <= cost_max]
-    if not bracket_cards:
-      continue
-
-    # Group cards by exact cost
-    by_cost = {}
-    for card in bracket_cards:
-      if card.cost not in by_cost:
-        by_cost[card.cost] = []
-      by_cost[card.cost].append(card)
-
-    # Distribute target_count across available costs
-    available_costs = sorted(by_cost.keys())
-    if not available_costs:
-      continue
-
-    # Calculate how many cards to take from each cost level
-    per_cost = max(1, target_count // len(available_costs))
-    remainder = target_count % len(available_costs)
-
-    for cost in available_costs:
-      cost_cards = by_cost[cost]
-      # Score cards at this specific cost level
-      cost_scores = []
-      for card in cost_cards:
-        # Base score from personality (but normalize by cost to avoid bias)
-        personality_score = score_card_for_personality(card, personality)
-        # Normalize: divide by cost to make 1-cost and 3-cost comparable
-        # Then multiply by average cost in bracket for scaling
-        avg_bracket_cost = (cost_min + cost_max) / 2
-        normalized_score = (personality_score / max(1, card.cost)) * avg_bracket_cost
-
-        # Power curve bonus
-        power_curve = get_power_curve_value(card)
-        difficulty_factor = (difficulty - 5.5) / 4.5
-        power_curve_score = power_curve * difficulty_factor * 5
-
-        # For low difficulties, heavily penalize high-cost cards with good stats
-        if difficulty <= 4 and card.cost >= 7:
-          power_penalty = max(0, power_curve) * -10
-          normalized_score += power_penalty
-
-        total_score = normalized_score + power_curve_score
-        cost_scores.append((card, total_score))
-
-      # Sort and take best from this cost level
-      cost_scores.sort(key=lambda x: x[1], reverse=True)
-      # Take per_cost, plus 1 extra if this is one of the remainder slots
-      to_take = per_cost
-      if remainder > 0:
-        to_take += 1
-        remainder -= 1
-      to_take = min(to_take, len(cost_scores))
-
-      for i in range(to_take):
-        card = cost_scores[i][0]
-        selected.append(card)
-        remaining.remove(card)
-        if len(selected) >= 20:
-          break
-
-      if len(selected) >= 20:
-        break
-
-  # Fill remaining slots with best available cards
-  # This handles cases where brackets didn't have enough cards
-  while len(selected) < 20 and remaining:
-    remaining_scores = []
-    for card in remaining:
-      personality_score = score_card_for_personality(card, personality)
-      power_curve = get_power_curve_value(card)
-      difficulty_factor = (difficulty - 5.5) / 4.5
-      power_curve_score = power_curve * difficulty_factor * 5
-
-      # For remaining slots, add a slight preference for lower cost cards
-      # to ensure we have early-game plays
-      cost_penalty = (card.cost - 4) * 0.5  # Neutral at 4, penalty for higher
-
-      total_score = personality_score + power_curve_score - cost_penalty
-      remaining_scores.append((card, total_score))
-
-    remaining_scores.sort(key=lambda x: x[1], reverse=True)
-    card = remaining_scores[0][0]
    selected.append(card)
-    remaining.remove(card)
+    total_cost += card.cost
+    cheap_spent += card.cost

-  return selected[:20]
+  # Second pass: fill remaining budget greedily by score
+  taken = {id(c) for c in selected}
+  for _, card in scored:
+    if total_cost >= BUDGET:
+      break
+    if id(card) in taken or total_cost + card.cost > BUDGET:
+      continue
+    selected.append(card)
+    total_cost += card.cost
+
+
+  card_strings = [
+    f"{c.name} {c.cost}"
+    for c in sorted(selected, key=lambda x: x.cost)
+  ]
+  logger.info("Selected:\n"+("\n".join(card_strings)))
+
+  return selected
+
+
+# ==================== Turn planning ====================
+
+@dataclass
+class MovePlan:
+  sacrifice_slots: list[int]
+  plays: list[tuple]   # (CardInstance, board_slot: int)
+  label: str = ""
+
+
+def _affordable_subsets(hand, energy, start=0):
+  """Yield every subset of cards from hand whose total cost fits within energy."""
+  yield []
+  for i in range(start, len(hand)):
+    card = hand[i]
+    if card.cost <= energy:
+      for rest in _affordable_subsets(hand, energy - card.cost, i + 1):
+        yield [card] + rest
+
+
+def _plans_for_sacrifice(player, opponent, sacrifice_slots):
+  """Generate one plan per affordable card subset for a given sacrifice set."""
+  board = list(player.board)
+  energy = player.energy
+
+  for slot in sacrifice_slots:
+    if board[slot] is not None:
+      board[slot] = None
+      energy += 1
+
+  hand = list(player.hand)
+  empty_slots = [i for i, c in enumerate(board) if c is None]
+  en_board = opponent.board
+
+  # For scoring: open enemy slots first so the simulation reflects
+  # direct-damage potential accurately.
+  scoring_slots = (
+    [s for s in empty_slots if en_board[s] is None] +
+    [s for s in empty_slots if en_board[s] is not None]
+  )
+
+  return [
+    MovePlan(
+      sacrifice_slots=list(sacrifice_slots),
+      plays=list(zip(cards, scoring_slots)),
+      label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
+    )
+    for cards in _affordable_subsets(hand, energy)
+  ]
+
+
+def generate_plans(player, opponent) -> list[MovePlan]:
+  """Generate diverse candidate move plans covering a range of strategies."""
+  plans = []
+
+  # Sacrifice n board cards
+  occupied = [s for s in range(BOARD_SIZE) if player.board[s] is not None]
+  for n in range(len(occupied) + 1):
+    for slots in combinations(occupied, n):
+      plans += _plans_for_sacrifice(player, opponent, list(slots))
+
+  # Idle: do nothing
+  plans.append(MovePlan(sacrifice_slots=[], plays=[], label="idle"))
+
+  return plans
+
+
+def score_plan(plan: MovePlan, player, opponent, personality: AIPersonality) -> float:
+  """
+  Score a plan from ~0.0 to ~1.0 based on the projected board state after
+  executing it. Higher is better.
+  """
+  # Simulate board after sacrifices + plays
+  board = list(player.board)
+  energy = player.energy
+  for slot in plan.sacrifice_slots:
+    if board[slot] is not None:
+      board[slot] = None
+      energy += 1
+  for card, slot in plan.plays:
+    board[slot] = card
+
+  en_board = opponent.board
+  enemy_occupied = sum(1 for c in en_board if c is not None)
+
+  # --- Combat metrics ---
+  direct_damage = 0      # AI attacks going straight to opponent life
+  board_damage = 0       # AI attacks hitting enemy cards
+  blocking_slots = 0     # Slots where AI blocks an enemy card
+  cards_destroyed = 0    # Enemy cards the AI would destroy this turn
+  unblocked_incoming = 0 # Enemy attacks that go straight to AI life
+  cards_on_board = 0
+
+  for slot in range(BOARD_SIZE):
+    my = board[slot]
+    en = en_board[slot]
+    if my:
+      cards_on_board += 1
+    if my and en is None:
+      direct_damage += my.attack
+    if my and en:
+      board_damage += my.attack
+      blocking_slots += 1
+      if my.attack >= en.defense:
+        cards_destroyed += 1
+    if not my and en:
+      unblocked_incoming += en.attack
+
+  # --- Normalize to [0, 1] ---
+  # How threatening is the attack relative to what remains of opponent's life?
+  atk_score = min(1.0, direct_damage / max(opponent.life, 1))
+
+  # What fraction of enemy slots are blocked?
+  block_score = (blocking_slots / enemy_occupied) if enemy_occupied > 0 else 1.0
+
+  # What fraction of all slots are filled?
+  cover_score = cards_on_board / BOARD_SIZE
+
+  # What fraction of enemy cards do are destroyed?
+  destroy_score = (cards_destroyed / enemy_occupied) if enemy_occupied > 0 else 0.0
+
+  # How safe is the AI from unblocked hits relative to its own life?
+  threat_score = 1.0 - min(1.0, unblocked_incoming / max(player.life, 1))
+
+  # How many cards compared to the enemy?
+  opponent_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
+  my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
+  attrition_score = my_cards_left/(my_cards_left + opponent_cards_left)
+
+  # Net value: cost of cards played minus cost of cards sacrificed.
+  n_sac = len(plan.sacrifice_slots)
+  sac_value = sum(player.board[s].cost for s in plan.sacrifice_slots if player.board[s] is not None)
+  play_value = sum(c.cost for c, _ in plan.plays)
+  net_value = play_value - sac_value
+  net_value_norm = max(0.0, min(1.0, (net_value + 10) / 20))
+
+  # Sacrifice penalty. Applied as a flat deduction after personality scoring.
+  sacrifice_penalty = 0.0
+  if n_sac > 0:
+    # Penalty 1: wasted energy. Each sacrifice gives +1 energy; if that energy
+    # goes unspent it was pointless. Weighted heavily.
+    energy_leftover = player.energy + n_sac - play_value
+    wasted_sac_energy = max(0, min(n_sac, energy_leftover))
+    wasted_penalty = wasted_sac_energy / n_sac
+
+    # Penalty 2: low-value swap. Each sacrifice should at minimum unlock a card
+    # that costs more than the one removed (net_value > n_sac means each
+    # sacrifice bought at least one extra cost point). Anything less is a bad trade.
+    swap_penalty = max(0.0, min(1.0, (n_sac - net_value) / max(n_sac, 1)))
+
+    sacrifice_penalty = 0.65 * wasted_penalty + 0.35 * swap_penalty
+
+  # Power curve value of the cards played (are they good value for their cost?)
+  if plan.plays:
+    pcv_scores = [max(0.0, min(1.0, get_power_curve_value(c))) for c, _ in plan.plays]
+    pcv_score = sum(pcv_scores) / len(pcv_scores)
+  else:
+    pcv_score = 0.5
+
+  # --- Personality weights ---
+  if personality == AIPersonality.AGGRESSIVE:
+    # Maximize direct damage
+    score = (
+      0.40 * atk_score +
+      0.10 * block_score +
+      0.10 * cover_score +
+      0.10 * net_value_norm +
+      0.15 * destroy_score +
+      0.05 * attrition_score +
+      0.05 * pcv_score +
+      0.05 * threat_score
+    )
+
+  elif personality == AIPersonality.DEFENSIVE:
+    # Block everything
+    score = (
+      0.05 * atk_score +
+      0.35 * block_score +
+      0.20 * cover_score +
+      0.05 * net_value_norm +
+      0.05 * destroy_score +
+      0.10 * attrition_score +
+      0.05 * pcv_score +
+      0.15 * threat_score
+    )
+
+  elif personality == AIPersonality.SWARM:
+    # Fill the board and press with direct damage
+    score = (
+      0.25 * atk_score +
+      0.10 * block_score +
+      0.35 * cover_score +
+      0.05 * net_value_norm +
+      0.05 * destroy_score +
+      0.10 * attrition_score +
+      0.05 * pcv_score +
+      0.05 * threat_score
+    )
+
+  elif personality == AIPersonality.GREEDY:
+    # High-value card plays, willing to sacrifice weak cards for strong ones
+    score = (
+      0.20 * atk_score +
+      0.05 * block_score +
+      0.10 * cover_score +
+      0.40 * net_value_norm +
+      0.05 * destroy_score +
+      0.05 * attrition_score +
+      0.10 * pcv_score +
+      0.05 * threat_score
+    )
+
+  elif personality == AIPersonality.CONTROL:
+    # Efficiency
+    score = (
+      0.10 * atk_score +
+      0.05 * block_score +
+      0.05 * cover_score +
+      0.20 * net_value_norm +
+      0.05 * destroy_score +
+      0.10 * attrition_score +
+      0.40 * pcv_score +
+      0.05 * threat_score
+    )
+
+  elif personality == AIPersonality.BALANCED:
+    score = (
+      0.10 * atk_score +
+      0.15 * block_score +
+      0.10 * cover_score +
+      0.10 * net_value_norm +
+      0.10 * destroy_score +
+      0.10 * attrition_score +
+      0.15 * pcv_score +
+      0.10 * threat_score
+    )
+
+  else:  # ARBITRARY
+    score = (
+      0.60 * random.random() +
+      0.05 * atk_score +
+      0.05 * block_score +
+      0.05 * cover_score +
+      0.05 * net_value_norm +
+      0.05 * destroy_score +
+      0.05 * attrition_score +
+      0.05 * pcv_score +
+      0.05 * threat_score
+    )
+
+  # --- Context adjustments ---
+
+  # Lethal takes priority regardless of personality
+  if direct_damage >= opponent.life:
+    score = max(score, 0.95)
+
+  if unblocked_incoming >= player.life:
+    score = min(score, 0.05)
+
+  # Against god-card decks: cover all slots so their big cards can't attack freely
+  if opponent.deck_type in ("God Card", "Pantheon"):
+    score = min(1.0, score + 0.08 * cover_score)
+
+  # Against aggro/rush: need to block more urgently
+  if opponent.deck_type in ("Aggro", "Rush"):
+    score = min(1.0, score + 0.06 * block_score + 0.04 * threat_score)
+
+  # Against wall decks: direct damage matters more than destroying cards
+  if opponent.deck_type == "Wall":
+    score = min(1.0, score + 0.06 * atk_score)
+
+  # Press the advantage when opponent is low on life
+  if opponent.life < STARTING_LIFE * 0.3:
+    score = min(1.0, score + 0.06 * atk_score)
+
+  # Prioritize survival when low on life
+  if player.life < STARTING_LIFE * 0.3:
+    score = min(1.0, score + 0.06 * threat_score + 0.04 * block_score)
+
+  # Opponent running low on cards: keep a card on board for attrition win condition
+  if opponent_cards_left <= 5 and cards_on_board > 0:
+    score = min(1.0, score + 0.05)
+
+  # Apply sacrifice penalty last so it can override all other considerations.
+  score = max(0.0, score - sacrifice_penalty)
+
+  return score
+
+
+# ==================== Turn execution ====================

 async def run_ai_turn(game_id: str):
  from game_manager import (
@@ -281,46 +460,78 @@ async def run_ai_turn(game_id: str):
  await asyncio.sleep(calculate_combat_animation_time(state.last_combat_events))

  player = state.players[AI_USER_ID]
+  opponent = state.players[human_id]
+  difficulty = state.ai_difficulty
+  personality = (
+    AIPersonality(state.ai_personality)
+    if state.ai_personality
+    else AIPersonality.BALANCED
+  )

  ws = connections[game_id].get(human_id)
-  async def send_state(state):
+
+  async def send_state(s):
    if ws:
      try:
-        await ws.send_json({
-          "type": "state",
-          "state": serialize_state(state, human_id),
-        })
+        await ws.send_json({"type": "state", "state": serialize_state(s, human_id)})
      except Exception:
        pass

-  most_expensive_in_hand = max((c.cost for c in player.hand), default=0)
-  if player.energy < most_expensive_in_hand:
-    for slot in range(BOARD_SIZE):
-      slot_card = player.board[slot]
-      if slot_card is not None and player.energy + slot_card.cost <= most_expensive_in_hand:
-        if ws:
-          try:
-            await ws.send_json({
-              "type": "sacrifice_animation",
-              "instance_id": slot_card.instance_id,
-            })
-          except Exception:
-            pass
-        await asyncio.sleep(0.65)
-        action_sacrifice(state, slot)
-        await send_state(state)
-        await asyncio.sleep(0.35)
+  async def send_sacrifice_anim(instance_id):
+    if ws:
+      try:
+        await ws.send_json({"type": "sacrifice_animation", "instance_id": instance_id})
+      except Exception:
+        pass

-  play_order = list(range(BOARD_SIZE))
-  random.shuffle(play_order)
-  for slot in play_order:
+  # --- Generate and score candidate plans ---
+  plans = generate_plans(player, opponent)
+
+  if difficulty <= 2:
+    # Actively bad
+    scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, 0.15*difficulty), p)
+              for p in plans]
+    best_plan = min(scored, key=lambda x: x[0])[1]
+  elif difficulty == 3:
+    # Fully random
+    best_plan = random.choice(plans)
+  else:
+    noise = max(0.0, ((8 - difficulty) / 6.0) * 0.30)
+    scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, noise), p)
+              for p in plans]
+    best_plan = max(scored, key=lambda x: x[0])[1]
+
+  logger.info(
+    f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} plans={len(plans)} " +
+    f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
+  )
+
+  # --- Execute sacrifices ---
+  for slot in best_plan.sacrifice_slots:
+    card_slot = player.board[slot]
+    if card_slot is None:
+      continue
+    await send_sacrifice_anim(card_slot.instance_id)
+    await asyncio.sleep(0.65)
+    action_sacrifice(state, slot)
+    await send_state(state)
+    await asyncio.sleep(0.35)
+
+  # --- Execute plays ---
+  # Shuffle play order so the AI doesn't always fill slots left-to-right
+  plays = list(best_plan.plays)
+  random.shuffle(plays)
+
+  for card, slot in plays:
+    # Re-look up hand index each time (hand shrinks as cards are played)
+    hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
+    if hand_idx is None:
+      continue
    if player.board[slot] is not None:
      continue
-    affordable = [i for i, c in enumerate(player.hand) if c.cost <= player.energy]
-    if not affordable:
-      break
-    best = max(affordable, key=lambda i: player.hand[i].cost)
-    action_play_card(state, best, slot)
+    if card.cost > player.energy:
+      continue
+    action_play_card(state, hand_idx, slot)
    await send_state(state)
    await asyncio.sleep(0.5)