from __future__ import annotations

import asyncio
import random
import time
from urllib.parse import urlparse

from common_logging import get_logger

logger = get_logger(__name__)

_CATEGORY_RATE: dict[int, int] = {1: 8, 2: 8, 3: 8, 4: 6, 5: 10, 6: 12, 7: 10, 8: 12, 9: 8}
_DEFAULT_RATE = 10

class _TokenBucket:

    def __init__(self, rate_per_minute: int):
        self.rate = rate_per_minute
        self._timestamps: list[float] = []
        self._lock = asyncio.Lock()

    async def acquire(self):
        async with self._lock:
            now = time.monotonic()
            window = 60.0
            self._timestamps = [t for t in self._timestamps if now - t < window]
            if len(self._timestamps) >= self.rate:
                oldest = self._timestamps[0]
                wait = window - (now - oldest) + 0.05
                if wait > 0:
                    logger.debug(f'令牌桶满，等待 {wait:.2f}s')
                    await asyncio.sleep(wait)
                    now = time.monotonic()
                    self._timestamps = [t for t in self._timestamps if now - t < window]
            self._timestamps.append(time.monotonic())
_buckets: dict[tuple, _TokenBucket] = {}
_buckets_lock = asyncio.Lock()

async def _get_bucket(domain: str, category_id: int) -> _TokenBucket:
    key = (domain, category_id)
    async with _buckets_lock:
        if key not in _buckets:
            rate = _CATEGORY_RATE.get(category_id, _DEFAULT_RATE)
            _buckets[key] = _TokenBucket(rate)
        return _buckets[key]
_UA_PROFILES = [{'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'lang': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'platform': 'Win32'}, {'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'lang': 'zh-CN,zh;q=0.9,en;q=0.8', 'platform': 'Win32'}, {'ua': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'lang': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'platform': 'MacIntel'}, {'ua': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 'lang': 'zh-CN,zh;q=0.9', 'platform': 'MacIntel'}, {'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0', 'lang': 'zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2', 'platform': 'Win32'}, {'ua': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:121.0) Gecko/20100101 Firefox/121.0', 'lang': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3', 'platform': 'MacIntel'}, {'ua': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0', 'lang': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'platform': 'Win32'}, {'ua': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'lang': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 'platform': 'Linux x86_64'}]
_REFERERS = {1: 'https://fgk.chinatax.gov.cn/zcfgk/c100009/listflfg_fg.html', 2: 'https://fgk.chinatax.gov.cn/zcfgk/c100010/listflfg_fg.html', 3: 'https://fgk.chinatax.gov.cn/zcfgk/c102440/listflfg.html', 4: 'https://fgk.chinatax.gov.cn/zcfgk/c100011/list_guizhang.html', 5: 'https://fgk.chinatax.gov.cn/zcfgk/c102416/listflfg.html', 6: 'https://fgk.chinatax.gov.cn/zcfgk/c100012/listflfg.html', 7: 'https://fgk.chinatax.gov.cn/zcfgk/c100013/listflfg.html', 8: 'https://fgk.chinatax.gov.cn/zcfgk/c102424/listflfg.html', 9: 'https://fgk.chinatax.gov.cn/zcfgk/c100015/list_zcjd.html'}
_DEFAULT_REFERER = 'https://fgk.chinatax.gov.cn/'

class AntiBlockingStrategy:
    USER_AGENTS = [p['ua'] for p in _UA_PROFILES]

    def __init__(self, delay_min: float=1.0, delay_max: float=5.0, category_id: int=0, proxy_list: list[str] | None=None, domain: str='fgk.chinatax.gov.cn', max_requests_per_minute: int | None=None):
        self.delay_min = delay_min
        self.delay_max = delay_max
        self.category_id = category_id
        self.proxy_list = proxy_list or []
        self.domain = domain
        mid = (delay_min + delay_max) / 2
        self._delay_mu = mid
        self._delay_sigma = (delay_max - delay_min) / 4
        self._override_rate = max_requests_per_minute
        self.request_times: list[float] = []
        self.max_requests_per_minute = max_requests_per_minute or _CATEGORY_RATE.get(category_id, _DEFAULT_RATE)

    async def before_request(self, url: str=''):
        domain = _extract_domain(url) if url else self.domain
        category_id = self.category_id
        bucket = await _get_bucket(domain, category_id)
        if self._override_rate and bucket.rate != self._override_rate:
            bucket.rate = self._override_rate
        await bucket.acquire()
        await self._apply_normal_delay()

    def get_headers(self, category_id: int | None=None) -> dict:
        profile = random.choice(_UA_PROFILES)
        cid = category_id if category_id is not None else self.category_id
        referer = _REFERERS.get(cid, _DEFAULT_REFERER)
        return {'User-Agent': profile['ua'], 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 'Accept-Language': profile['lang'], 'Accept-Encoding': 'gzip, deflate, br', 'Referer': referer, 'Connection': 'keep-alive', 'Upgrade-Insecure-Requests': '1', 'Cache-Control': 'max-age=0', 'Sec-Fetch-Dest': 'document', 'Sec-Fetch-Mode': 'navigate', 'Sec-Fetch-Site': 'same-origin'}

    def get_proxy(self) -> str | None:
        if not self.proxy_list:
            return None
        return random.choice(self.proxy_list)

    async def apply_delay(self):
        await self._apply_normal_delay()

    async def check_rate_limit(self):
        pass

    async def _apply_normal_delay(self):
        delay = random.gauss(self._delay_mu, self._delay_sigma)
        delay = max(self.delay_min, min(self.delay_max, delay))
        logger.debug(f'延迟 {delay:.2f}s')
        await asyncio.sleep(delay)

def _extract_domain(url: str) -> str:
    try:
        return urlparse(url).netloc or url
    except Exception:
        return url
