🐐

2026-04-01 18:31:33 +02:00
parent 6e23e32bb0
commit b5c7c5305a
95 changed files with 9609 additions and 2374 deletions
--- a/backend/ai/card_pick_nn.py
+++ b/backend/ai/card_pick_nn.py
@@ -0,0 +1,176 @@
+import os
+
+import numpy as np
+
+from ai.nn import NeuralNet, _softmax
+
+# Separate weights file so this NN trains independently from the plan NN.
+CARD_PICK_WEIGHTS_PATH = os.path.join(os.path.dirname(__file__), "card_pick_weights.json")
+
+N_CARD_FEATURES = 15
+
+# Normalization constants — chosen to cover the realistic stat range for generated cards.
+_MAX_ATK = 50.0
+_MAX_DEF = 100.0
+
+
+def _precompute_static_features(allowed: list) -> np.ndarray:
+  """
+  Vectorized precomputation of the 7 per-card static features for the whole pool.
+  Returns (n, 7) float32. Called once per choose_cards() invocation.
+  """
+  n = len(allowed)
+  atk  = np.array([c.attack           for c in allowed], dtype=np.float32)
+  defn = np.array([c.defense          for c in allowed], dtype=np.float32)
+  cost = np.array([c.cost             for c in allowed], dtype=np.float32)
+  rar  = np.array([c.card_rarity.value for c in allowed], dtype=np.float32)
+  typ  = np.array([c.card_type.value   for c in allowed], dtype=np.float32)
+
+  exact_cost = np.minimum(10.0, np.maximum(1.0, ((atk**2 + defn**2)**0.18) / 1.5))
+  total      = atk + defn
+  atk_ratio  = np.where(total > 0, atk / total, 0.5)
+  pcv_norm   = np.clip(exact_cost - cost, 0.0, 1.0)
+
+  out = np.empty((n, 7), dtype=np.float32)
+  out[:, 0] = atk  / _MAX_ATK
+  out[:, 1] = defn / _MAX_DEF
+  out[:, 2] = cost / 10.0
+  out[:, 3] = rar  / 5.0
+  out[:, 4] = atk_ratio
+  out[:, 5] = pcv_norm
+  out[:, 6] = typ  / 9.0
+  return out
+
+
+class CardPickPlayer:
+  """
+  Uses a NeuralNet to sequentially select cards from a pool until the cost
+  budget is exhausted.  API mirrors NeuralPlayer so training code stays uniform.
+
+  In training mode: samples stochastically (softmax) and records the
+  trajectory for a REINFORCE update after the game ends.
+  In inference mode: picks the highest-scoring affordable card at each step.
+
+  Performance design:
+    - Static per-card features (7) are computed once via vectorized numpy.
+    - Context features (8) use running totals updated by O(1) increments.
+    - Picked cards are tracked with a boolean mask; no list.remove() calls.
+    - Each pick step does one small forward pass over the affordable subset only.
+  """
+
+  def __init__(self, net: NeuralNet, training: bool = False, temperature: float = 1.0):
+    self.net = net
+    self.training = training
+    self.temperature = temperature
+    self.trajectory: list[tuple[np.ndarray, int]] = []  # (features_matrix, chosen_idx)
+
+  def choose_cards(self, allowed: list, difficulty: int) -> list:
+    """
+    allowed: pre-filtered list of Card objects (cost ≤ max_card_cost already applied).
+    Returns the selected deck as a list of Cards.
+    """
+    BUDGET = 50
+    n = len(allowed)
+
+    static   = _precompute_static_features(allowed)   # (n, 7) — computed once
+    costs    = np.array([c.cost for c in allowed], dtype=np.float32)
+    picked   = np.zeros(n, dtype=bool)
+
+    budget_remaining = BUDGET
+    selected: list = []
+
+    # Running totals for context features — incremented O(1) per pick.
+    n_picked   = 0
+    sum_atk    = 0.0
+    sum_def    = 0.0
+    sum_cost   = 0.0
+    n_cheap    = 0   # cost ≤ 3
+    n_high     = 0   # cost ≥ 6
+
+    diff_norm = difficulty / 10.0
+
+    while True:
+      mask = (~picked) & (costs <= budget_remaining)
+      if not mask.any():
+        break
+
+      idxs = np.where(mask)[0]
+
+      # Context row — same for every candidate this step, broadcast via tile.
+      if n_picked > 0:
+        ctx = np.array([
+          n_picked / 30.0,
+          budget_remaining / 50.0,
+          sum_atk / n_picked / _MAX_ATK,
+          sum_def / n_picked / _MAX_DEF,
+          sum_cost / n_picked / 10.0,
+          n_cheap / n_picked,
+          n_high  / n_picked,
+          diff_norm,
+        ], dtype=np.float32)
+      else:
+        ctx = np.array([
+          0.0, budget_remaining / 50.0, 0.0, 0.0, 0.0, 0.0, 0.0, diff_norm,
+        ], dtype=np.float32)
+
+      features = np.concatenate(
+        [static[idxs], np.tile(ctx, (len(idxs), 1))],
+        axis=1,
+      )
+      scores = self.net.forward(features)
+
+      if self.training:
+        probs = _softmax((scores / self.temperature).astype(np.float64))
+        probs = np.clip(probs, 1e-10, None)
+        probs /= probs.sum()
+        local_idx = int(np.random.choice(len(idxs), p=probs))
+        self.trajectory.append((features, local_idx))
+      else:
+        local_idx = int(np.argmax(scores))
+
+      global_idx = idxs[local_idx]
+      card = allowed[global_idx]
+      picked[global_idx] = True
+      selected.append(card)
+
+      # Incremental context update — O(1).
+      budget_remaining -= card.cost
+      n_picked  += 1
+      sum_atk   += card.attack
+      sum_def   += card.defense
+      sum_cost  += card.cost
+      if card.cost <= 3: n_cheap += 1
+      if card.cost >= 6: n_high  += 1
+
+    return selected
+
+  def compute_grads(self, outcome: float) -> tuple[list, list] | None:
+    """
+    REINFORCE gradients averaged over the pick trajectory.
+    outcome: centered reward (win/loss minus baseline).
+    Returns (grads_w, grads_b), or None if no picks were made.
+    """
+    if not self.trajectory:
+      return None
+
+    acc_gw = [np.zeros_like(w) for w in self.net.weights]
+    acc_gb = [np.zeros_like(b) for b in self.net.biases]
+
+    for features, chosen_idx in self.trajectory:
+      scores   = self.net.forward(features)
+      probs    = _softmax(scores.astype(np.float64)).astype(np.float32)
+      upstream = -probs.copy()
+      upstream[chosen_idx] += 1.0
+      upstream *= outcome
+      gw, gb = self.net.backward(upstream)
+      for i in range(len(acc_gw)):
+        acc_gw[i] += gw[i]
+        acc_gb[i] += gb[i]
+
+    n = len(self.trajectory)
+    for i in range(len(acc_gw)):
+      acc_gw[i] /= n
+      acc_gb[i] /= n
+
+    self.trajectory.clear()
+    return acc_gw, acc_gb