wiki-tcg/backend/ai.py

import asyncio
import random
import logging
from dataclasses import dataclass
from enum import Enum
from itertools import combinations
from card import Card
from game import action_play_card, action_sacrifice, action_end_turn, BOARD_SIZE, STARTING_LIFE

logger = logging.getLogger("app")

AI_USER_ID = "ai"

class AIPersonality(Enum):
  AGGRESSIVE = "aggressive"   # Prefers high attack cards, plays aggressively
  DEFENSIVE = "defensive"     # Prefers high defense cards, plays conservatively
  BALANCED = "balanced"       # Mix of offense and defense
  GREEDY = "greedy"           # Prioritizes high cost cards, willing to sacrifice
  SWARM = "swarm"             # Prefers low cost cards, fills board quickly
  CONTROL = "control"         # Focuses on board control and efficiency
  ARBITRARY = "arbitrary"     # Just does whatever

def get_random_personality() -> AIPersonality:
  """Returns a random AI personality."""
  return random.choice(list(AIPersonality))

def calculate_exact_cost(attack: int, defense: int) -> float:
  """Calculate the exact cost before rounding (matches card.py formula)."""
  return min(11.0, max(1.0, ((attack**2 + defense**2)**0.18) / 1.5))

def get_power_curve_value(card) -> float:
  """
  Returns how much above the power curve a card is.
  Positive values mean the card is a better-than-expected deal for its cost.
  """
  exact_cost = calculate_exact_cost(card.attack, card.defense)
  return exact_cost - card.cost


def choose_cards(cards: list[Card], difficulty: int, personality: AIPersonality) -> list[Card]:
  BUDGET = 50

  logger.info(f"Personality: {personality.value}")
  logger.info(f"Difficulty: {difficulty}")
  card_strings = [
    f"{c.name} {c.cost}"
    for c in sorted(cards, key=lambda x: x.cost)[::-1][:20]
  ]
  logger.info("Cards:\n"+("\n".join(card_strings)))

  # God cards (cost 7-11) are gated by difficulty. Below difficulty 7 they are excluded.
  # Each level from 7 upward unlocks a higher cost tier; at difficulty 10 all are allowed.
  if difficulty >= 6:
    max_card_cost = difficulty+1
  else:
    max_card_cost = 6

  allowed = [c for c in cards if c.cost <= max_card_cost] or list(cards)

  def card_score(card: Card) -> float:
    pcv = get_power_curve_value(card)
    # Normalize pcv to [0, 1].
    pcv_norm = max(0.0, min(1.0, pcv))

    cost_norm = card.cost / max_card_cost   # [0, 1]; higher = more expensive
    total = card.attack + card.defense
    atk_ratio = card.attack / total if total else 0.5

    if personality == AIPersonality.AGGRESSIVE:
      # Prefers high-attack cards; slight bias toward high cost for raw power
      return 0.50 * atk_ratio + 0.30 * pcv_norm + 0.20 * cost_norm

    if personality == AIPersonality.DEFENSIVE:
      # Prefers high-defense cards; same cost bias
      return 0.50 * (1.0 - atk_ratio) + 0.30 * pcv_norm + 0.20 * cost_norm

    if personality == AIPersonality.GREEDY:
      # Fills budget with the fewest, most expensive cards possible
      return 0.70 * cost_norm + 0.30 * pcv_norm

    if personality == AIPersonality.SWARM:
      # Cheap cards
      return 0.45 * (1.0 - cost_norm) + 0.35 * atk_ratio + 0.20 * pcv_norm

    if personality == AIPersonality.CONTROL:
      # Values efficiency above all: wants cards that are above the power curve,
      # with a secondary preference for higher cost
      return 0.70 * pcv_norm + 0.30 * cost_norm

    if personality == AIPersonality.BALANCED:
      # Blends everything: efficiency, cost spread, and a slight attack lean
      return 0.40 * pcv_norm + 0.35 * cost_norm + 0.15 * atk_ratio + 0.10 * (1.0 - atk_ratio)

    # ARBITRARY: mostly random at lower difficulties
    return (0.05 * difficulty) * pcv_norm + (1 - (0.05 * difficulty)) * random.random()

  # Higher difficulty -> less noise -> more optimal deck composition
  noise = ((10 - difficulty) / 9.0) * 0.50

  scored = sorted(
    [(card_score(c) + random.gauss(0, noise), c) for c in allowed],
    key=lambda x: x[0],
    reverse=True,
  )

  # Minimum budget reserved for cheap (cost 1-3) cards to ensure early-game presence.
  # Without cheap cards the AI will play nothing for the first several turns.
  early_budget = {
    AIPersonality.GREEDY:      4,
    AIPersonality.SWARM:      12,
    AIPersonality.AGGRESSIVE:  8,
    AIPersonality.DEFENSIVE:  10,
    AIPersonality.CONTROL:     8,
    AIPersonality.BALANCED:   10,
    AIPersonality.ARBITRARY:   8,
  }[personality]

  selected: list[Card] = []
  total_cost = 0

  # First pass: secure early-game cards
  cheap_spent = 0
  for _, card in scored:
    if cheap_spent >= early_budget:
      break
    if card.cost > 3 or total_cost + card.cost > BUDGET:
      continue
    selected.append(card)
    total_cost += card.cost
    cheap_spent += card.cost

  # Second pass: fill remaining budget greedily by score
  taken = {id(c) for c in selected}
  for _, card in scored:
    if total_cost >= BUDGET:
      break
    if id(card) in taken or total_cost + card.cost > BUDGET:
      continue
    selected.append(card)
    total_cost += card.cost


  card_strings = [
    f"{c.name} {c.cost}"
    for c in sorted(selected, key=lambda x: x.cost)
  ]
  logger.info("Selected:\n"+("\n".join(card_strings)))

  return selected


# ==================== Turn planning ====================

@dataclass
class MovePlan:
  sacrifice_slots: list[int]
  plays: list[tuple]   # (CardInstance, board_slot: int)
  label: str = ""


def _affordable_subsets(hand, energy, start=0):
  """Yield every subset of cards from hand whose total cost fits within energy."""
  yield []
  for i in range(start, len(hand)):
    card = hand[i]
    if card.cost <= energy:
      for rest in _affordable_subsets(hand, energy - card.cost, i + 1):
        yield [card] + rest


def _plans_for_sacrifice(player, opponent, sacrifice_slots):
  """Generate one plan per affordable card subset for a given sacrifice set."""
  board = list(player.board)
  energy = player.energy

  for slot in sacrifice_slots:
    if board[slot] is not None:
      board[slot] = None
      energy += 1

  hand = list(player.hand)
  empty_slots = [i for i, c in enumerate(board) if c is None]
  en_board = opponent.board

  # For scoring: open enemy slots first so the simulation reflects
  # direct-damage potential accurately.
  scoring_slots = (
    [s for s in empty_slots if en_board[s] is None] +
    [s for s in empty_slots if en_board[s] is not None]
  )

  return [
    MovePlan(
      sacrifice_slots=list(sacrifice_slots),
      plays=list(zip(cards, scoring_slots)),
      label=f"sac{len(sacrifice_slots)}_play{len(cards)}",
    )
    for cards in _affordable_subsets(hand, energy)
  ]


def generate_plans(player, opponent) -> list[MovePlan]:
  """Generate diverse candidate move plans covering a range of strategies."""
  plans = []

  # Sacrifice n board cards
  occupied = [s for s in range(BOARD_SIZE) if player.board[s] is not None]
  for n in range(len(occupied) + 1):
    for slots in combinations(occupied, n):
      plans += _plans_for_sacrifice(player, opponent, list(slots))

  # Idle: do nothing
  plans.append(MovePlan(sacrifice_slots=[], plays=[], label="idle"))

  return plans


def score_plan(plan: MovePlan, player, opponent, personality: AIPersonality) -> float:
  """
  Score a plan from ~0.0 to ~1.0 based on the projected board state after
  executing it. Higher is better.
  """
  # Simulate board after sacrifices + plays
  board = list(player.board)
  energy = player.energy
  for slot in plan.sacrifice_slots:
    if board[slot] is not None:
      board[slot] = None
      energy += 1
  for card, slot in plan.plays:
    board[slot] = card

  en_board = opponent.board
  enemy_occupied = sum(1 for c in en_board if c is not None)

  # --- Combat metrics ---
  direct_damage = 0      # AI attacks going straight to opponent life
  board_damage = 0       # AI attacks hitting enemy cards
  blocking_slots = 0     # Slots where AI blocks an enemy card
  cards_destroyed = 0    # Enemy cards the AI would destroy this turn
  unblocked_incoming = 0 # Enemy attacks that go straight to AI life
  cards_on_board = 0

  for slot in range(BOARD_SIZE):
    my = board[slot]
    en = en_board[slot]
    if my:
      cards_on_board += 1
    if my and en is None:
      direct_damage += my.attack
    if my and en:
      board_damage += my.attack
      blocking_slots += 1
      if my.attack >= en.defense:
        cards_destroyed += 1
    if not my and en:
      unblocked_incoming += en.attack

  # --- Normalize to [0, 1] ---
  # How threatening is the attack relative to what remains of opponent's life?
  atk_score = min(1.0, direct_damage / max(opponent.life, 1))

  # What fraction of enemy slots are blocked?
  block_score = (blocking_slots / enemy_occupied) if enemy_occupied > 0 else 1.0

  # What fraction of all slots are filled?
  cover_score = cards_on_board / BOARD_SIZE

  # What fraction of enemy cards do are destroyed?
  destroy_score = (cards_destroyed / enemy_occupied) if enemy_occupied > 0 else 0.0

  # How safe is the AI from unblocked hits relative to its own life?
  threat_score = 1.0 - min(1.0, unblocked_incoming / max(player.life, 1))

  # How many cards compared to the enemy?
  opponent_cards_left = len(opponent.deck) + len(opponent.hand) + enemy_occupied
  my_cards_left = len(player.deck) + len(player.hand) + blocking_slots
  attrition_score = my_cards_left/(my_cards_left + opponent_cards_left)

  # Net value: cost of cards played minus cost of cards sacrificed.
  n_sac = len(plan.sacrifice_slots)
  sac_value = sum(player.board[s].cost for s in plan.sacrifice_slots if player.board[s] is not None)
  play_value = sum(c.cost for c, _ in plan.plays)
  net_value = play_value - sac_value
  net_value_norm = max(0.0, min(1.0, (net_value + 10) / 20))

  # Sacrifice penalty. Applied as a flat deduction after personality scoring.
  sacrifice_penalty = 0.0
  if n_sac > 0:
    # Penalty 1: wasted energy. Each sacrifice gives +1 energy; if that energy
    # goes unspent it was pointless. Weighted heavily.
    energy_leftover = player.energy + n_sac - play_value
    wasted_sac_energy = max(0, min(n_sac, energy_leftover))
    wasted_penalty = wasted_sac_energy / n_sac

    # Penalty 2: low-value swap. Each sacrifice should at minimum unlock a card
    # that costs more than the one removed (net_value > n_sac means each
    # sacrifice bought at least one extra cost point). Anything less is a bad trade.
    swap_penalty = max(0.0, min(1.0, (n_sac - net_value) / max(n_sac, 1)))

    sacrifice_penalty = 0.65 * wasted_penalty + 0.35 * swap_penalty

  # Power curve value of the cards played (are they good value for their cost?)
  if plan.plays:
    pcv_scores = [max(0.0, min(1.0, get_power_curve_value(c))) for c, _ in plan.plays]
    pcv_score = sum(pcv_scores) / len(pcv_scores)
  else:
    pcv_score = 0.5

  # --- Personality weights ---
  if personality == AIPersonality.AGGRESSIVE:
    # Maximize direct damage
    score = (
      0.40 * atk_score +
      0.10 * block_score +
      0.10 * cover_score +
      0.10 * net_value_norm +
      0.15 * destroy_score +
      0.05 * attrition_score +
      0.05 * pcv_score +
      0.05 * threat_score
    )

  elif personality == AIPersonality.DEFENSIVE:
    # Block everything
    score = (
      0.05 * atk_score +
      0.35 * block_score +
      0.20 * cover_score +
      0.05 * net_value_norm +
      0.05 * destroy_score +
      0.10 * attrition_score +
      0.05 * pcv_score +
      0.15 * threat_score
    )

  elif personality == AIPersonality.SWARM:
    # Fill the board and press with direct damage
    score = (
      0.25 * atk_score +
      0.10 * block_score +
      0.35 * cover_score +
      0.05 * net_value_norm +
      0.05 * destroy_score +
      0.10 * attrition_score +
      0.05 * pcv_score +
      0.05 * threat_score
    )

  elif personality == AIPersonality.GREEDY:
    # High-value card plays, willing to sacrifice weak cards for strong ones
    score = (
      0.20 * atk_score +
      0.05 * block_score +
      0.10 * cover_score +
      0.40 * net_value_norm +
      0.05 * destroy_score +
      0.05 * attrition_score +
      0.10 * pcv_score +
      0.05 * threat_score
    )

  elif personality == AIPersonality.CONTROL:
    # Efficiency
    score = (
      0.10 * atk_score +
      0.05 * block_score +
      0.05 * cover_score +
      0.20 * net_value_norm +
      0.05 * destroy_score +
      0.10 * attrition_score +
      0.40 * pcv_score +
      0.05 * threat_score
    )

  elif personality == AIPersonality.BALANCED:
    score = (
      0.10 * atk_score +
      0.15 * block_score +
      0.10 * cover_score +
      0.10 * net_value_norm +
      0.10 * destroy_score +
      0.10 * attrition_score +
      0.15 * pcv_score +
      0.10 * threat_score
    )

  else:  # ARBITRARY
    score = (
      0.60 * random.random() +
      0.05 * atk_score +
      0.05 * block_score +
      0.05 * cover_score +
      0.05 * net_value_norm +
      0.05 * destroy_score +
      0.05 * attrition_score +
      0.05 * pcv_score +
      0.05 * threat_score
    )

  # --- Context adjustments ---

  # Lethal takes priority regardless of personality
  if direct_damage >= opponent.life:
    score = max(score, 0.95)

  if unblocked_incoming >= player.life:
    score = min(score, 0.05)

  # Against god-card decks: cover all slots so their big cards can't attack freely
  if opponent.deck_type in ("God Card", "Pantheon"):
    score = min(1.0, score + 0.08 * cover_score)

  # Against aggro/rush: need to block more urgently
  if opponent.deck_type in ("Aggro", "Rush"):
    score = min(1.0, score + 0.06 * block_score + 0.04 * threat_score)

  # Against wall decks: direct damage matters more than destroying cards
  if opponent.deck_type == "Wall":
    score = min(1.0, score + 0.06 * atk_score)

  # Press the advantage when opponent is low on life
  if opponent.life < STARTING_LIFE * 0.3:
    score = min(1.0, score + 0.06 * atk_score)

  # Prioritize survival when low on life
  if player.life < STARTING_LIFE * 0.3:
    score = min(1.0, score + 0.06 * threat_score + 0.04 * block_score)

  # Opponent running low on cards: keep a card on board for attrition win condition
  if opponent_cards_left <= 5 and cards_on_board > 0:
    score = min(1.0, score + 0.05)

  # Apply sacrifice penalty last so it can override all other considerations.
  score = max(0.0, score - sacrifice_penalty)

  return score


# ==================== Turn execution ====================

async def run_ai_turn(game_id: str):
  from game_manager import (
    active_games, connections, active_deck_ids,
    serialize_state, record_game_result, calculate_combat_animation_time
  )

  state = active_games.get(game_id)
  if not state or state.result:
    return
  if state.active_player_id != AI_USER_ID:
    return

  human_id = state.opponent_id(AI_USER_ID)
  waited = 0
  while not connections[game_id].get(human_id) and waited < 10:
    await asyncio.sleep(0.5)
    waited += 0.5

  await asyncio.sleep(calculate_combat_animation_time(state.last_combat_events))

  player = state.players[AI_USER_ID]
  opponent = state.players[human_id]
  difficulty = state.ai_difficulty
  personality = (
    AIPersonality(state.ai_personality)
    if state.ai_personality
    else AIPersonality.BALANCED
  )

  ws = connections[game_id].get(human_id)

  async def send_state(s):
    if ws:
      try:
        await ws.send_json({"type": "state", "state": serialize_state(s, human_id)})
      except Exception:
        pass

  async def send_sacrifice_anim(instance_id):
    if ws:
      try:
        await ws.send_json({"type": "sacrifice_animation", "instance_id": instance_id})
      except Exception:
        pass

  # --- Generate and score candidate plans ---
  plans = generate_plans(player, opponent)

  if difficulty <= 2:
    # Actively bad
    scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, 0.15*difficulty), p)
              for p in plans]
    best_plan = min(scored, key=lambda x: x[0])[1]
  elif difficulty == 3:
    # Fully random
    best_plan = random.choice(plans)
  else:
    noise = max(0.0, ((8 - difficulty) / 6.0) * 0.30)
    scored = [(score_plan(p, player, opponent, personality) + random.gauss(0, noise), p)
              for p in plans]
    best_plan = max(scored, key=lambda x: x[0])[1]

  logger.info(
    f"AI turn: d={difficulty} p={personality.value} plan={best_plan.label} plans={len(plans)} " +
    f"sac={best_plan.sacrifice_slots} plays={[c.name for c, _ in best_plan.plays]}"
  )

  # --- Execute sacrifices ---
  for slot in best_plan.sacrifice_slots:
    card_slot = player.board[slot]
    if card_slot is None:
      continue
    await send_sacrifice_anim(card_slot.instance_id)
    await asyncio.sleep(0.65)
    action_sacrifice(state, slot)
    await send_state(state)
    await asyncio.sleep(0.35)

  # --- Execute plays ---
  # Shuffle play order so the AI doesn't always fill slots left-to-right
  plays = list(best_plan.plays)
  random.shuffle(plays)

  for card, slot in plays:
    # Re-look up hand index each time (hand shrinks as cards are played)
    hand_idx = next((i for i, c in enumerate(player.hand) if c is card), None)
    if hand_idx is None:
      continue
    if player.board[slot] is not None:
      continue
    if card.cost > player.energy:
      continue
    action_play_card(state, hand_idx, slot)
    await send_state(state)
    await asyncio.sleep(0.5)

  action_end_turn(state)
  await send_state(state)

  if state.result:
    from database import SessionLocal
    db = SessionLocal()
    try:
      record_game_result(state, db)
      if ws:
        await ws.send_json({
          "type": "state",
          "state": serialize_state(state, human_id),
        })
    finally:
      db.close()
    active_deck_ids.pop(human_id, None)
    active_deck_ids.pop(AI_USER_ID, None)
    active_games.pop(game_id, None)
    connections.pop(game_id, None)
    return

  if state.active_player_id == AI_USER_ID:
    asyncio.create_task(run_ai_turn(game_id))