import hashlib
from abc import ABC, abstractmethod
from collections.abc import AsyncIterator
from dataclasses import dataclass, field

from common_logging import get_logger

logger = get_logger(__name__)


@dataclass
class DocumentItem:
    url: str
    title: str
    date: str | None = None
    extra: dict = field(default_factory=dict)

@dataclass
class ParsedDocument:
    source_url: str
    title: str
    doc_type: str
    region_code: str
    content_markdown: str
    content_text: str
    content_hash: str
    content_html: str | None = None
    doc_number: str | None = None
    issuing_authority: str | None = None
    issue_date: str | None = None
    effective_date: str | None = None
    doc_status: str | None = None
    attachments: list | None = None
    fetch_strategy: str = 'normal'
    supersedes: list | None = None
    references: list | None = None
    qa_question: str | None = None
    qa_answer: str | None = None
    interpretation_form: str | None = None
    inline_images: list | None = None
    inline_videos: list | None = None

    @staticmethod
    def compute_hash(text: str) -> str:
        return hashlib.sha256(text.encode()).hexdigest()

class BaseSourceAdapter(ABC):

    def __init__(self, source, engine):
        self.source = source
        self.engine = engine

    @abstractmethod
    async def list_documents(self, page: int=1) -> list[DocumentItem]:
        ...

    @abstractmethod
    async def parse_document(self, item: DocumentItem) -> ParsedDocument | None:
        ...

    @abstractmethod
    async def get_total_pages(self) -> int:
        ...

    async def warmup(self, client) -> None:  # noqa: B027
        pass

    async def list_all_documents(self, start_page: int | None=None, end_page: int | None=None) -> AsyncIterator[DocumentItem]:
        total = await self.get_total_pages()
        page_start = start_page or 1
        page_end = min(end_page, total) if end_page else total
        for page in range(page_start, page_end + 1):
            items = await self.list_documents(page)
            for item in items:
                yield item
            if not items:
                break
