This commit is contained in:
Nikolaj
2026-03-16 14:01:22 +01:00
parent 5d54d1cf7b
commit 65b719334f
19 changed files with 848 additions and 96 deletions

View File

@@ -1,3 +1,4 @@
import logging
from math import sqrt, cbrt
from enum import Enum
from typing import NamedTuple
@@ -5,6 +6,8 @@ from urllib.parse import quote
from datetime import datetime, timedelta
from time import sleep
logger = logging.getLogger("app")
class CardType(Enum):
other = 0
person = 1
@@ -15,6 +18,7 @@ class CardType(Enum):
group = 6
science_thing = 7
vehicle = 8
business = 9
class CardRarity(Enum):
common = 0
@@ -94,6 +98,7 @@ WIKIDATA_INSTANCE_TYPE_MAP = {
"Q482994": CardType.artwork, # album
"Q134556": CardType.artwork, # single
"Q169930": CardType.artwork, # EP
"Q196600": CardType.artwork, # media franchise
"Q202866": CardType.artwork, # animated film
"Q734698": CardType.artwork, # collectible card game
"Q506240": CardType.artwork, # television film
@@ -108,9 +113,11 @@ WIKIDATA_INSTANCE_TYPE_MAP = {
"Q47461344": CardType.artwork, # written work
"Q71631512": CardType.artwork, # tabletop role-playing game supplement
"Q21198342": CardType.artwork, # manga series
"Q58483083": CardType.artwork, # dramatico-musical work
"Q24634210": CardType.artwork, # podcast show
"Q105543609": CardType.artwork, # musical work / composition
"Q106499608": CardType.artwork, # literary reading
"Q117467246": CardType.artwork, # animated television series
"Q515": CardType.location, # city
"Q8502": CardType.location, # mountain
@@ -137,31 +144,50 @@ WIKIDATA_INSTANCE_TYPE_MAP = {
"Q7278": CardType.group, # political party
"Q476028": CardType.group, # association football club
"Q732717": CardType.group, # law enforcement agency
"Q215380": CardType.group, # musical group
"Q176799": CardType.group, # military unit
"Q178790": CardType.group, # labor union
"Q2367225": CardType.group, # university and college sports club
"Q4801149": CardType.group, # artillery brigade
"Q9248092": CardType.group, # infantry division
"Q7210356": CardType.group, # political organization
"Q5419137": CardType.group, # veterans' organization
"Q12973014": CardType.group, # sports team
"Q11446438": CardType.group, # female idol group
"Q135408445": CardType.group, # men's national association football team
"Q7187": CardType.science_thing, # gene
"Q8054": CardType.science_thing, # protein
"Q65943": CardType.science_thing, # theorem
"Q12140": CardType.science_thing, # medication
"Q11276": CardType.science_thing, # globular cluster
"Q898273": CardType.science_thing, # protein domain
"Q168845": CardType.science_thing, # star cluster
"Q1840368": CardType.science_thing, # cloud type
"Q113145171": CardType.science_thing, # type of chemical entity
"Q1420": CardType.vehicle, # car
"Q11446": CardType.vehicle, # ship
"Q43193": CardType.vehicle, # truck
"Q25956": CardType.vehicle, # space station
"Q39804": CardType.vehicle, # cruise ship
"Q811704": CardType.vehicle, # rolling stock class
"Q673687": CardType.vehicle, # racing automobile
"Q174736": CardType.vehicle, # destroyer
"Q484000": CardType.vehicle, # unmanned aerial vehicle
"Q559026": CardType.vehicle, # ship class
"Q830335": CardType.vehicle, # protected cruiser
"Q928235": CardType.vehicle, # sloop-of-war
"Q391022": CardType.vehicle, # research vessel
"Q1185562": CardType.vehicle, # light aircraft carrier
"Q7233751": CardType.vehicle, # post ship
"Q3231690": CardType.vehicle, # automobile model
"Q1428357": CardType.vehicle, # submarine class
"Q1499623": CardType.vehicle, # destroyer escort
"Q4818021": CardType.vehicle, # attack submarine
"Q4830453": CardType.business, # business
}
import asyncio
@@ -185,13 +211,15 @@ async def _get_random_summary_async(client: httpx.AsyncClient) -> dict:
headers=HEADERS,
follow_redirects=False,
)
except httpx.ReadTimeout:
except:
return {}
if not response.is_success:
print("Error in request:")
print(response.status_code)
print(response.text)
logger.error(
"Error in request:" +
str(response.status_code) +
response.text
)
return {}
return response.json()
@@ -212,22 +240,28 @@ async def _get_page_summary_async(client: httpx.AsyncClient, title: str) -> dict
headers=HEADERS,
follow_redirects=False,
)
except httpx.ReadTimeout:
except:
return {}
if not response.is_success:
print("Error in request:")
print(response.status_code)
print(response.text)
logger.error(
"Error in request:" +
str(response.status_code) +
response.text
)
return {}
return response.json()
async def _infer_card_type_async(client: httpx.AsyncClient, entity_id: str) -> tuple[CardType, str, int]:
response = await client.get(
"https://www.wikidata.org/wiki/Special:EntityData/" + entity_id + ".json",
headers=HEADERS
)
try:
response = await client.get(
"https://www.wikidata.org/wiki/Special:EntityData/" + entity_id + ".json",
headers=HEADERS
)
except:
return CardType.other, "", 0
if not response.is_success:
return CardType.other, "", 0
@@ -253,10 +287,13 @@ async def _infer_card_type_async(client: httpx.AsyncClient, entity_id: str) -> t
async def _get_wikirank_score(client: httpx.AsyncClient, title: str) -> float | None:
"""Returns a quality score from 0-100, or None if unavailable."""
response = await client.get(
f"https://api.wikirank.net/api.php?name={quote(title, safe="")}&lang=en",
headers=HEADERS
)
try:
response = await client.get(
f"https://api.wikirank.net/api.php?name={quote(title, safe='')}&lang=en",
headers=HEADERS
)
except:
return None
if not response.is_success:
return None
data = response.json()
@@ -307,7 +344,7 @@ async def _get_monthly_pageviews(client: httpx.AsyncClient, title: str) -> int |
return None
items = response.json().get("items", [])
return items[0]["views"] if items else None
except httpx.ReadError:
except Exception:
return None
def _pageviews_to_defense(views: int | None) -> int:
@@ -340,8 +377,8 @@ async def _get_card_async(client: httpx.AsyncClient, page_title: str|None = None
rarity = _score_to_rarity(score)
multiplier = RARITY_MULTIPLIER[rarity]
attack = int(language_count*1.5*multiplier**2)
defense = int(_pageviews_to_defense(views)*max(multiplier,(multiplier**2)/2))
attack = min(2500,int(((language_count*1.5)**1.2)*multiplier**2))
defense = min(2500,int(_pageviews_to_defense(views)*max(multiplier,(multiplier**2)/2)))
return Card(
name=summary["title"],
@@ -353,13 +390,15 @@ async def _get_card_async(client: httpx.AsyncClient, page_title: str|None = None
text=text,
attack=attack,
defense=defense,
cost=min(12,max(1,int(cbrt(attack+defense)/1.5)))
cost=min(12,max(1,int(((attack**2+defense**2)**0.18)/1.5)))
)
async def _get_cards_async(size: int) -> list[Card]:
logger.debug(f"Generating {size} cards")
async with httpx.AsyncClient(follow_redirects=True) as client:
cards = await asyncio.gather(*[_get_card_async(client) for _ in range(size)])
return [c for c in cards if c is not None]
return [c for c in cards if c is not None]
async def _get_specific_card_async(title: str) -> Card|None:
async with httpx.AsyncClient(follow_redirects=True) as client:
@@ -367,33 +406,29 @@ async def _get_specific_card_async(title: str) -> Card|None:
# Sync entrypoints
def generate_cards(size: int) -> list[Card]:
print(f"Generating {size} cards")
batches = [10 for _ in range(size//10)] + ([size%10] if size%10 != 0 else [])
n_batches = len(batches)
cards = []
for i in range(n_batches):
b = batches[i]
print(f"Generating batch of {b} cards (batch {i+1}/{n_batches})")
if i != 0:
sleep(5)
cards += asyncio.run(_get_cards_async(b))
return cards
return asyncio.run(_get_cards_async(size))
def generate_card(title: str) -> Card|None:
return asyncio.run(_get_specific_card_async(title))
# for card in generate_cards(5):
# print(card)
# rarities = []
# cards = []
# for i in range(20):
# print(i)
# cards += generate_cards(10)
# sleep(3)
# costs = []
# from collections import Counter
# for card in generate_cards(1000):
# rarities.append(card.card_rarity)
# for card in cards:
# costs.append((card.card_rarity,card.cost))
# if card.card_rarity == CardRarity.legendary:
# print(card)
# print(Counter(rarities))
# print(Counter(costs))
# for card in generate_cards(100):
# if card.card_type == CardType.other: