from __future__ import annotations

import re
from collections.abc import Iterable
from dataclasses import dataclass, field
from datetime import date
from typing import Any

from sqlalchemy.orm import Session, load_only

from app.models.tax_data import TaxDocument
from app.services.tax_data_processor.relation_identity import (

    clear_duplicate_external_superseded_by,
    is_same_legal_reference,
    normalize_legal_doc_number,
    normalize_relation_title,
    relation_identity_key,
)

from common_logging import get_logger

logger = get_logger(__name__)

REGULATION_DOC_TYPES = {'regulation', 'local_policy', 'normative'}
LOCAL_RELATION_LEVEL = 20
SOURCE_STATUS_MAP = {'全文有效': 'effective', '全文废止': 'obsolete', '全文失效': 'obsolete', '部分废止': 'partially_obsolete', '部分失效': 'partially_obsolete', '已修订': 'amended', '部分修订': 'amended'}
SOURCE_STATUS_RE = re.compile('<p[^>]+class=["\\\'][^"\\\']*arc_date[^"\\\']*["\\\'][^>]*>.*?<span[^>]+class=["\\\'][^"\\\']*xg[^"\\\']*["\\\'][^>]*>(.*?)</span>', re.S)
RELATION_RANK = {'amends': 1, 'partially_supersedes': 2, 'supersedes': 3}
STATUS_RANK = {None: 0, '': 0, 'effective': 1, 'amended': 2, 'partially_obsolete': 3, 'obsolete': 4}
STATUS_BY_RELATION = {'amends': 'amended', 'partially_supersedes': 'partially_obsolete', 'supersedes': 'obsolete'}
_TITLE_AMENDMENT_RE = re.compile('^(?P<title>.+?)修正案(?:[（(][^）)]{1,20}[）)])?$')
_TITLE_AMEND_PATTERNS = (re.compile('关于(?:修改|修订)[《〈](?P<title>[^》〉\\n]{2,100})[》〉]'), re.compile('关于[^。\\n]{0,80}修订(?:后|后的|发布|印发)?[^《〈。\\n]{0,20}[《〈](?P<title>[^》〉\\n]{2,100})[》〉]'))

@dataclass(frozen=True)
class RelationTarget:
    id: int
    title: str
    doc_number: str | None
    source_url: str | None
    category_id: int
    issue_date: date | None
    doc_type: str | None
    source_status: str | None = None

@dataclass
class RelationBuildStats:
    documents_processed: int = 0
    documents_changed: int = 0
    supersedes_self_removed: int = 0
    references_self_removed: int = 0
    references_removed_by_supersedes: int = 0
    supersedes_deduped: int = 0
    references_deduped: int = 0
    supersedes_resolved: int = 0
    references_resolved: int = 0
    supersedes_unresolved: int = 0
    references_unresolved: int = 0
    supersedes_demoted_to_references: int = 0
    references_invalid_target_removed: int = 0
    non_regulation_documents_skipped: int = 0
    reverse_links_changed: int = 0
    invalid_reverse_links_cleared: int = 0
    statuses_changed: int = 0
    status_review_needed: int = 0
    duplicate_external_cleared: int = 0
    errors: int = 0
    samples: list[str] = field(default_factory=list)

    def add(self, other: RelationBuildStats) -> None:
        for name in self.__dataclass_fields__:
            if name == 'samples':
                self.samples.extend(other.samples)
            else:
                setattr(self, name, getattr(self, name) + getattr(other, name))

    def as_dict(self) -> dict[str, Any]:
        return {name: getattr(self, name) for name in self.__dataclass_fields__ if name != 'samples'} | {'samples': self.samples}

@dataclass(frozen=True)
class ReverseCandidate:
    source_id: int
    target_id: int
    relation: str
    source_issue_date: date | None

class RelationResolver:

    def __init__(self, docs: Iterable[TaxDocument]):
        self.docs_by_id: dict[int, RelationTarget] = {}
        self.by_source_url: dict[str, RelationTarget] = {}
        self.by_doc_number: dict[str, RelationTarget | None] = {}
        self.by_title: dict[str, RelationTarget | None] = {}
        self.title_targets: list[tuple[str, RelationTarget]] = []
        for doc in docs:
            target = RelationTarget(id=doc.id, title=doc.title, doc_number=doc.doc_number, source_url=doc.source_url, category_id=doc.category_id, issue_date=doc.issue_date, doc_type=doc.doc_type, source_status=getattr(doc, 'source_status', None) or source_status_from_html(getattr(doc, 'content_html', None)))
            self.docs_by_id[target.id] = target
            if target.source_url:
                self.by_source_url[target.source_url.strip()] = target
            for key in self._doc_number_keys(target.doc_number):
                self._add_unique(self.by_doc_number, key, target)
            title_key = normalize_relation_title(target.title)
            if title_key:
                self.title_targets.append((title_key, target))
                self._add_unique(self.by_title, title_key, target)

    @classmethod
    def from_db(cls, db: Session) -> RelationResolver:
        docs = db.query(TaxDocument).options(load_only(TaxDocument.id, TaxDocument.title, TaxDocument.doc_number, TaxDocument.source_url, TaxDocument.category_id, TaxDocument.issue_date, TaxDocument.doc_type, TaxDocument.content_html)).order_by(TaxDocument.id).all()
        return cls(docs)

    def lookup(self, doc_number: str | None=None, title: str | None=None, source_url: str | None=None) -> RelationTarget | None:
        if source_url:
            target = self.by_source_url.get(source_url.strip())
            if target:
                return target
        raw_number = (doc_number or '').strip()
        if raw_number.startswith(('《', '〈')):
            target = self.by_title.get(normalize_relation_title(raw_number))
            if target:
                return target
        else:
            for key in self._doc_number_keys(raw_number):
                target = self.by_doc_number.get(key)
                if target:
                    return target
        title_key = normalize_relation_title(title)
        if title_key:
            target = self.by_title.get(title_key)
            if target:
                return target
        return None

    def lookup_containing_title(self, title: str | None, source_doc: TaxDocument) -> RelationTarget | None:
        title_key = normalize_relation_title(title)
        if len(title_key) < 4:
            return None
        candidates = [target for target_key, target in self.title_targets if target.id != source_doc.id and is_regulation_like(target) and (title_key in target_key or target_key in title_key)]
        if not candidates:
            return None
        return self._best_title_candidate(candidates, source_doc)

    @staticmethod
    def _best_title_candidate(candidates: list[RelationTarget], source_doc: TaxDocument) -> RelationTarget:
        source_date = getattr(source_doc, 'issue_date', None)
        source_category = getattr(source_doc, 'category_id', None)

        def prefer_same_category(pool: list[RelationTarget]) -> list[RelationTarget]:
            same_category = [target for target in pool if target.category_id == source_category]
            return same_category or pool
        if source_date:
            not_after = [target for target in candidates if target.issue_date and target.issue_date <= source_date]
            if not_after:
                pool = prefer_same_category(not_after)
                return max(pool, key=lambda target: (target.issue_date or date.min, target.id))
            after = [target for target in candidates if target.issue_date]
            if after:
                pool = prefer_same_category(after)
                return min(pool, key=lambda target: (target.issue_date or date.max, target.id))
        pool = prefer_same_category(candidates)
        return max(pool, key=lambda target: (target.issue_date or date.min, target.id))

    def is_self_reference(self, source_doc: TaxDocument, doc_number: str | None=None, title: str | None=None, source_url: str | None=None, target: RelationTarget | None=None) -> bool:
        if target and target.id == source_doc.id:
            return True
        if source_url and source_doc.source_url and (source_url.strip() == source_doc.source_url.strip()):
            return True
        raw_number = (doc_number or '').strip()
        own_number = normalize_legal_doc_number(source_doc.doc_number)
        if raw_number and (not raw_number.startswith(('《', '〈'))) and own_number:
            if normalize_legal_doc_number(raw_number) == own_number:
                return True
        own_title = normalize_relation_title(source_doc.title)
        raw_title = normalize_relation_title(title or (raw_number if raw_number.startswith(('《', '〈')) else None))
        return bool(raw_title and own_title and (raw_title == own_title))

    @staticmethod
    def _doc_number_keys(doc_number: str | None) -> set[str]:
        if not doc_number:
            return set()
        stripped = doc_number.strip()
        compact = stripped.replace(' ', '').replace('\u3000', '')
        normalized = normalize_legal_doc_number(stripped)
        keys = {stripped, compact, normalized}
        if normalized.startswith('中华人民共和国主席令'):
            keys.add(normalized.replace('中华人民共和国主席令', '主席令', 1))
        return {key for key in keys if key}

    @staticmethod
    def _add_unique(index: dict[str, RelationTarget | None], key: str, target: RelationTarget) -> None:
        existing = index.get(key)
        if existing is None and key in index:
            return
        if existing and existing.id != target.id:
            index[key] = None
            return
        index[key] = target

def build_relations_for_document(doc: TaxDocument, resolver: RelationResolver, extractor) -> tuple[list[dict[str, Any]] | None, list[dict[str, Any]] | None, RelationBuildStats]:
    if not is_regulation_like(doc):
        return (None, None, RelationBuildStats(documents_processed=1, non_regulation_documents_skipped=1))
    rel = extractor.extract(doc.content_markdown or '')
    title_supersedes = _title_inferred_supersedes(doc, resolver)
    if title_supersedes:
        rel = {**rel, 'supersedes': [*(rel.get('supersedes') or []), *title_supersedes]}
    return normalize_extracted_relations(doc, rel, resolver)

def normalize_extracted_relations(doc: TaxDocument, rel: dict[str, Any], resolver: RelationResolver) -> tuple[list[dict[str, Any]] | None, list[dict[str, Any]] | None, RelationBuildStats]:
    stats = RelationBuildStats(documents_processed=1)
    if not is_regulation_like(doc):
        stats.non_regulation_documents_skipped = 1
        return (None, None, stats)
    doc_num_to_title = _expand_doc_num_to_title(rel.get('doc_num_to_title', {}))
    supersedes_by_key: dict[str, dict[str, Any]] = {}
    demoted_references: list[dict[str, Any]] = []
    for raw_item in rel.get('supersedes', []) or []:
        if not isinstance(raw_item, dict):
            continue
        raw_number = (raw_item.get('doc_number') or '').strip()
        raw_title = raw_item.get('title') or doc_num_to_title.get(_doc_num_title_key(raw_number))
        source_url = raw_item.get('source_url')
        relation = raw_item.get('relation') or 'supersedes'
        if relation not in RELATION_RANK:
            relation = 'supersedes'
        target = resolver.lookup(raw_number, raw_title, source_url)
        if resolver.is_self_reference(doc, raw_number, raw_title, source_url, target):
            stats.supersedes_self_removed += 1
            continue
        if not can_apply_legal_relation(doc, target, relation):
            stats.supersedes_demoted_to_references += 1
            demoted_references.append(_relation_entry(raw_number, raw_title, target, source_url=source_url))
            continue
        entry = _relation_entry(raw_number, raw_title, target, relation=relation, source_url=source_url)
        key = _primary_key(entry)
        if not key:
            continue
        _add_supersedes_entry(supersedes_by_key, key, entry, stats)
        if target:
            stats.supersedes_resolved += 1
        else:
            stats.supersedes_unresolved += 1
    supersedes = list(supersedes_by_key.values())
    supersedes_keys = _all_relation_keys(supersedes)
    references_by_key: dict[str, dict[str, Any]] = {}
    for entry in demoted_references:
        if not can_keep_reference(doc, entry.get('doc_id'), resolver):
            stats.references_invalid_target_removed += 1
            continue
        keys = _entry_keys(entry)
        if keys & supersedes_keys:
            stats.references_removed_by_supersedes += 1
            continue
        if _add_reference_entry(references_by_key, entry, stats):
            if entry.get('doc_id'):
                stats.references_resolved += 1
            else:
                stats.references_unresolved += 1
    for url in rel.get('reference_urls', []) or []:
        target = resolver.lookup(source_url=url)
        if not target:
            continue
        if resolver.is_self_reference(doc, source_url=url, target=target):
            stats.references_self_removed += 1
            continue
        entry = _relation_entry(target.doc_number or '', target.title, target, source_url=url)
        if not can_keep_reference(doc, entry.get('doc_id'), resolver):
            stats.references_invalid_target_removed += 1
            continue
        keys = _entry_keys(entry)
        if keys & supersedes_keys:
            stats.references_removed_by_supersedes += 1
            continue
        _add_reference_entry(references_by_key, entry, stats)
        stats.references_resolved += 1
    for raw_item in rel.get('references', []) or []:
        if not isinstance(raw_item, dict):
            continue
        raw_number = (raw_item.get('doc_number') or '').strip()
        raw_title = raw_item.get('title') or doc_num_to_title.get(_doc_num_title_key(raw_number))
        source_url = raw_item.get('source_url')
        target = resolver.lookup(raw_number, raw_title, source_url)
        if resolver.is_self_reference(doc, raw_number, raw_title, source_url, target):
            stats.references_self_removed += 1
            continue
        entry = _relation_entry(raw_number, raw_title, target, source_url=source_url)
        if not can_keep_reference(doc, entry.get('doc_id'), resolver):
            stats.references_invalid_target_removed += 1
            continue
        keys = _entry_keys(entry)
        if keys & supersedes_keys:
            stats.references_removed_by_supersedes += 1
            continue
        if _add_reference_entry(references_by_key, entry, stats):
            if target:
                stats.references_resolved += 1
            else:
                stats.references_unresolved += 1
    references = list(references_by_key.values())
    return (supersedes or None, references or None, stats)

def rebuild_document_relations(db: Session, batch_size: int=200, dry_run: bool=False, logger=None, limit: int=0) -> RelationBuildStats:
    from app.services.tax_data_processor.parsers.relationship_extractor import RelationshipExtractor

    extractor = RelationshipExtractor()
    resolver = RelationResolver.from_db(db)
    stats = RelationBuildStats()
    relations_by_doc_id: dict[int, list[dict[str, Any]] | None] = {}
    query = db.query(TaxDocument).options(load_only(TaxDocument.id, TaxDocument.title, TaxDocument.doc_number, TaxDocument.source_url, TaxDocument.category_id, TaxDocument.issue_date, TaxDocument.doc_type, TaxDocument.content_markdown, TaxDocument.supersedes, TaxDocument.references)).filter(TaxDocument.content_markdown.isnot(None)).order_by(TaxDocument.id)
    total = query.count()
    if limit:
        total = min(total, limit)
    processed = 0
    offset = 0
    while processed < total:
        current_limit = min(batch_size, total - processed)
        batch = query.offset(offset).limit(current_limit).all()
        if not batch:
            break
        for doc in batch:
            try:
                supersedes, references, doc_stats = build_relations_for_document(doc, resolver, extractor)
                stats.add(doc_stats)
                relations_by_doc_id[doc.id] = supersedes
                if _json_relation_value(doc.supersedes) != _json_relation_value(supersedes) or _json_relation_value(doc.references) != _json_relation_value(references):
                    stats.documents_changed += 1
                    if len(stats.samples) < 20:
                        stats.samples.append(f'id={doc.id} title={doc.title[:80]}')
                    if not dry_run:
                        doc.supersedes = supersedes
                        doc.references = references
            except Exception as exc:
                stats.errors += 1
                if logger:
                    logger.warning(f'[build_relations] doc_id={doc.id} 处理异常: {exc}')
            processed += 1
        if not dry_run:
            db.commit()
        offset += len(batch)
        if logger:
            logger.info(f'[build_relations] 进度: {processed}/{total} changed={stats.documents_changed}')
    for target in resolver.docs_by_id.values():
        if target.id not in relations_by_doc_id:
            existing = db.query(TaxDocument.supersedes).filter(TaxDocument.id == target.id).scalar()
            relations_by_doc_id[target.id] = existing if isinstance(existing, list) else None
    reverse_stats = apply_reverse_relation_updates(db, resolver, relations_by_doc_id, dry_run=dry_run)
    stats.add(reverse_stats)
    if not dry_run:
        db.commit()
    return stats

def apply_reverse_relation_updates(db: Session, resolver: RelationResolver, relations_by_doc_id: dict[int, list[dict[str, Any]] | None], dry_run: bool=False) -> RelationBuildStats:
    stats = RelationBuildStats()
    candidates = _reverse_candidates(resolver, relations_by_doc_id)
    linked_docs = db.query(TaxDocument).filter(TaxDocument.superseded_by_doc_id.isnot(None)).all()
    doc_ids = set(candidates) | {candidate.source_id for candidate in candidates.values()} | {doc.id for doc in linked_docs} | {doc.superseded_by_doc_id for doc in linked_docs if doc.superseded_by_doc_id}
    docs = db.query(TaxDocument).filter(TaxDocument.id.in_(doc_ids)).all() if doc_ids else []
    docs_by_id = {doc.id: doc for doc in docs}
    candidate_target_ids = set(candidates)
    for target_doc in linked_docs:
        if target_doc.id in candidate_target_ids:
            continue
        source_doc = docs_by_id.get(target_doc.superseded_by_doc_id)
        if source_doc and can_apply_legal_relation(source_doc, target_doc):
            continue
        stats.invalid_reverse_links_cleared += 1
        if target_doc.doc_status in ('obsolete', 'partially_obsolete', 'amended'):
            stats.status_review_needed += 1
        if not dry_run:
            target_doc.superseded_by_doc_id = None
    for target_id, candidate in candidates.items():
        target_doc = docs_by_id.get(target_id)
        source_doc = docs_by_id.get(candidate.source_id)
        if not target_doc or not source_doc:
            continue
        if target_doc.superseded_by_doc_id != candidate.source_id:
            stats.reverse_links_changed += 1
            if not dry_run:
                target_doc.superseded_by_doc_id = candidate.source_id
        should_clear_external = is_same_legal_reference(target_doc.superseded_by_doc_number, target_doc.superseded_by_title, source_doc.doc_number, source_doc.title)
        if should_clear_external:
            stats.duplicate_external_cleared += 1
            if not dry_run:
                clear_duplicate_external_superseded_by(target_doc, source_doc)
        proposed_status = _status_from_relation(candidate.relation, target_doc)
        if proposed_status and proposed_status != target_doc.doc_status:
            stats.statuses_changed += 1
            if not dry_run:
                target_doc.doc_status = proposed_status
    return stats

def _reverse_candidates(resolver: RelationResolver, relations_by_doc_id: dict[int, list[dict[str, Any]] | None]) -> dict[int, ReverseCandidate]:
    candidates: dict[int, ReverseCandidate] = {}
    for source_id, supersedes in relations_by_doc_id.items():
        source = resolver.docs_by_id.get(source_id)
        if not source or not isinstance(supersedes, list):
            continue
        for item in supersedes:
            if not isinstance(item, dict):
                continue
            target_id = item.get('doc_id')
            if not target_id or target_id == source_id:
                continue
            target = resolver.docs_by_id.get(target_id)
            relation = item.get('relation') or 'supersedes'
            if not target or not can_apply_legal_relation(source, target, relation):
                continue
            candidate = ReverseCandidate(source_id=source_id, target_id=target_id, relation=relation, source_issue_date=source.issue_date)
            existing = candidates.get(target_id)
            if not existing or _candidate_sort_key(candidate) > _candidate_sort_key(existing):
                candidates[target_id] = candidate
    return candidates

def _candidate_sort_key(candidate: ReverseCandidate) -> tuple[int, date, int]:
    return (RELATION_RANK.get(candidate.relation, 0), candidate.source_issue_date or date.min, candidate.source_id)

def is_regulation_like(doc: Any) -> bool:
    return getattr(doc, 'doc_type', None) in REGULATION_DOC_TYPES

def legal_relation_level(doc: Any) -> int | None:
    if not is_regulation_like(doc):
        return None
    if getattr(doc, 'doc_type', None) == 'regulation':
        category_id = getattr(doc, 'category_id', None)
        if isinstance(category_id, int) and 1 <= category_id <= 8:
            return category_id
    return LOCAL_RELATION_LEVEL

def source_status_from_html(content_html: str | None) -> str | None:
    if not content_html:
        return None
    match = SOURCE_STATUS_RE.search(content_html)
    if not match:
        return None
    status_text = re.sub('<[^>]+>', '', match.group(1)).strip()
    for label, status in SOURCE_STATUS_MAP.items():
        if label in status_text:
            return status
    return None

def can_apply_legal_relation(source: Any, target: Any | None, relation: str | None=None) -> bool:
    if not is_regulation_like(source):
        return False
    relation = relation or 'supersedes'
    if target is None:
        return True
    if not is_regulation_like(target):
        return False
    if relation != 'amends':
        source_status = getattr(target, 'source_status', None)
        if source_status is None:
            source_status = source_status_from_html(getattr(target, 'content_html', None))
        if source_status == 'effective':
            return False
        if source.issue_date and target.issue_date and (source.issue_date < target.issue_date):
            return False
    source_level = legal_relation_level(source)
    target_level = legal_relation_level(target)
    if source_level is None or target_level is None:
        return False
    if source_level > target_level:
        return False
    return True

def can_keep_reference(source: Any, target_id: int | None, resolver: RelationResolver) -> bool:
    if not target_id:
        return True
    target = resolver.docs_by_id.get(target_id)
    return bool(target and is_regulation_like(target))

def _status_from_relation(relation: str, target_doc: TaxDocument) -> str | None:
    if target_doc.doc_type not in REGULATION_DOC_TYPES:
        return None
    proposed = STATUS_BY_RELATION.get(relation)
    if not proposed:
        return None
    current_rank = STATUS_RANK.get(target_doc.doc_status, 0)
    proposed_rank = STATUS_RANK.get(proposed, 0)
    if proposed_rank >= current_rank:
        return proposed
    return None

def _relation_entry(raw_number: str, raw_title: str | None, target: RelationTarget | None, relation: str | None=None, source_url: str | None=None) -> dict[str, Any]:
    entry: dict[str, Any] = {'doc_number': target.doc_number if target and target.doc_number else raw_number, 'doc_id': target.id if target else None, 'title': target.title if target else raw_title}
    if relation:
        entry['relation'] = relation
    if source_url:
        entry['source_url'] = source_url
    return entry

def _title_inferred_supersedes(doc: TaxDocument, resolver: RelationResolver) -> list[dict[str, Any]]:
    title = (doc.title or '').strip()
    if not title:
        return []
    candidate_titles: list[str] = []
    if (match := _TITLE_AMENDMENT_RE.match(title)):
        candidate_titles.append(match.group('title'))
    for pattern in _TITLE_AMEND_PATTERNS:
        candidate_titles.extend(match.group('title') for match in pattern.finditer(title))
    entries: list[dict[str, Any]] = []
    seen: set[str] = set()
    own_title_key = normalize_relation_title(title)
    for candidate_title in candidate_titles:
        normalized_title = normalize_relation_title(candidate_title)
        if not normalized_title or normalized_title == own_title_key or normalized_title in seen:
            continue
        target = resolver.lookup(title=candidate_title) or resolver.lookup_containing_title(candidate_title, doc)
        if not target or target.id == doc.id:
            continue
        seen.add(normalized_title)
        entry = {'doc_number': f'《{candidate_title.strip()}》', 'title': candidate_title.strip(), 'relation': 'amends'}
        if target.source_url:
            entry['source_url'] = target.source_url
        entries.append(entry)
    return entries

def _add_supersedes_entry(entries_by_key: dict[str, dict[str, Any]], key: str, entry: dict[str, Any], stats: RelationBuildStats) -> None:
    existing = entries_by_key.get(key)
    if not existing:
        entries_by_key[key] = entry
        return
    stats.supersedes_deduped += 1
    if RELATION_RANK.get(entry.get('relation'), 0) > RELATION_RANK.get(existing.get('relation'), 0):
        entries_by_key[key] = entry

def _add_reference_entry(entries_by_key: dict[str, dict[str, Any]], entry: dict[str, Any], stats: RelationBuildStats) -> bool:
    key = _primary_key(entry)
    if not key:
        return False
    if key in entries_by_key:
        stats.references_deduped += 1
        return False
    entries_by_key[key] = entry
    return True

def _entry_keys(entry: dict[str, Any]) -> set[str]:
    keys = {relation_identity_key(doc_id=entry.get('doc_id'), doc_number=entry.get('doc_number'), title=entry.get('title'), source_url=entry.get('source_url'))}
    number_key = relation_identity_key(doc_number=entry.get('doc_number'))
    title_key = relation_identity_key(title=entry.get('title'))
    url_key = relation_identity_key(source_url=entry.get('source_url'))
    keys.update(key for key in (number_key, title_key, url_key) if key)
    return {key for key in keys if key}

def _all_relation_keys(entries: Iterable[dict[str, Any]]) -> set[str]:
    keys: set[str] = set()
    for entry in entries:
        keys.update(_entry_keys(entry))
    return keys

def _primary_key(entry: dict[str, Any]) -> str:
    keys = _entry_keys(entry)
    id_key = relation_identity_key(doc_id=entry.get('doc_id'))
    if id_key in keys:
        return id_key
    return sorted(keys)[0] if keys else ''

def _json_relation_value(value: Any) -> list[dict[str, Any]] | None:
    return value if isinstance(value, list) and value else None

def _expand_doc_num_to_title(mapping: dict[str, str]) -> dict[str, str]:
    expanded: dict[str, str] = {}
    for raw_number, title in mapping.items():
        expanded[_doc_num_title_key(raw_number)] = title
        normalized = normalize_legal_doc_number(raw_number)
        if normalized:
            expanded[normalized] = title
    return expanded

def _doc_num_title_key(doc_number: str | None) -> str:
    return (doc_number or '').replace(' ', '').replace('\u3000', '')
