Source code for animedex.backends.ann.models

"""Rich ANN Encyclopedia dataclasses.

ANN's public encyclopedia API is XML-only. The high-level backend
parses XML into a generic node tree first, then validates the
backend-specific rich models below. A 200 response carrying
``<warning>...`` is an empty-result signal and is preserved on the
rich response models instead of being raised as an error.
"""

from __future__ import annotations

from datetime import date
from typing import Any, Dict, List, Optional

from pydantic import Field

from animedex.models.anime import Anime, AnimeTitle
from animedex.models.character import Character, Staff
from animedex.models.common import BackendRichModel, SourceTag
from animedex.render.xml import ATTRS_KEY, CHILDREN_BY_TAG_KEY, CHILDREN_KEY, TAG_KEY, TAIL_KEY, TEXT_KEY


[docs] class AnnXmlNode(BackendRichModel): """A lossless XML node produced by :mod:`animedex.render.xml`.""" tag: str = Field(alias=TAG_KEY) attrs: Dict[str, str] = Field(default_factory=dict, alias=ATTRS_KEY) text: Optional[str] = Field(default=None, alias=TEXT_KEY) tail: Optional[str] = Field(default=None, alias=TAIL_KEY) children: List["AnnXmlNode"] = Field(default_factory=list, alias=CHILDREN_KEY) children_by_tag: Dict[str, List["AnnXmlNode"]] = Field(default_factory=dict, alias=CHILDREN_BY_TAG_KEY)
[docs] @classmethod def from_adapter(cls, node: Dict[str, Any]) -> "AnnXmlNode": """Build an :class:`AnnXmlNode` from the generic XML adapter shape.""" return cls.model_validate(node)
[docs] def by_tag(self, tag: str) -> List["AnnXmlNode"]: """Return child nodes with ``tag`` from the grouped index.""" return list(self.children_by_tag.get(tag) or [])
[docs] def first_text(self, tag: str) -> Optional[str]: """Return text from the first direct child named ``tag``.""" rows = self.by_tag(tag) return rows[0].text if rows else None
[docs] class AnnInfo(BackendRichModel): """One ANN ``<info>`` node.""" attrs: Dict[str, str] = {} text: Optional[str] = None children: List[AnnXmlNode] = [] @property def type(self) -> Optional[str]: """Return the ANN info type, such as ``"Main title"``.""" return self.attrs.get("type")
[docs] class AnnPersonRef(BackendRichModel): """A referenced ANN person from staff or cast rows.""" id: Optional[str] = None name: Optional[str] = None
[docs] def to_common_staff(self, source_tag: Optional[SourceTag], occupations: Optional[List[str]] = None) -> Staff: """Project the person reference onto :class:`~animedex.models.character.Staff`.""" return Staff( id=f"ann:person:{self.id or self.name or 'unknown'}", name=self.name or "", occupations=occupations or [], source=source_tag or _default_src(), )
[docs] class AnnCompanyRef(BackendRichModel): """A referenced ANN company from credit rows.""" id: Optional[str] = None name: Optional[str] = None
[docs] class AnnStaff(BackendRichModel): """One ANN ``<staff>`` row.""" attrs: Dict[str, str] = {} task: Optional[str] = None person: Optional[AnnPersonRef] = None
[docs] def to_common(self, source_tag: Optional[SourceTag] = None) -> Staff: """Project this staff credit onto the common staff shape.""" person = self.person or AnnPersonRef() occupations = [self.task] if self.task else [] return person.to_common_staff(source_tag, occupations)
[docs] class AnnCast(BackendRichModel): """One ANN ``<cast>`` row.""" attrs: Dict[str, str] = {} role: Optional[str] = None person: Optional[AnnPersonRef] = None
[docs] def to_common(self, source_tag: Optional[SourceTag] = None) -> Character: """Project the cast row onto a character role record.""" return Character( id=f"ann:character:{self.role or 'unknown'}", name=self.role or "", role=self.attrs.get("lang"), source=source_tag or _default_src(), )
[docs] class AnnCredit(BackendRichModel): """One ANN ``<credit>`` row.""" attrs: Dict[str, str] = {} task: Optional[str] = None company: Optional[AnnCompanyRef] = None
[docs] class AnnEpisode(BackendRichModel): """One ANN ``<episode>`` row.""" attrs: Dict[str, str] = {} titles: List[AnnInfo] = []
[docs] class AnnRelation(BackendRichModel): """One related ANN encyclopedia entry.""" direction: str attrs: Dict[str, str] = {}
[docs] class AnnAnime(BackendRichModel): """One ANN ``<anime>`` encyclopedia entry.""" id: str gid: Optional[str] = None type: Optional[str] = None name: Optional[str] = None precision: Optional[str] = None generated_on: Optional[str] = None info: List[AnnInfo] = [] staff: List[AnnStaff] = [] cast: List[AnnCast] = [] credits: List[AnnCredit] = [] episodes: List[AnnEpisode] = [] reviews: List[AnnLink] = [] releases: List[AnnLink] = [] news: List[AnnLink] = [] relations: List[AnnRelation] = [] raw: AnnXmlNode source_tag: Optional[SourceTag] = None
[docs] def info_by_type(self, type_name: str) -> List[AnnInfo]: """Return ``<info>`` rows matching an ANN type string.""" return [row for row in self.info if row.type == type_name]
[docs] def first_info_text(self, type_name: str) -> Optional[str]: """Return text from the first matching ``<info>`` row.""" rows = self.info_by_type(type_name) return rows[0].text if rows else None
[docs] def to_common(self) -> Anime: """Project this ANN entry onto :class:`~animedex.models.anime.Anime`.""" title = self.first_info_text("Main title") or self.name or "" native = None synonyms = [] for row in self.info_by_type("Alternative title"): if row.text: synonyms.append(row.text) if native is None and row.attrs.get("lang") == "JA": native = row.text genres = [row.text for row in self.info_by_type("Genres") if row.text] themes = [row.text for row in self.info_by_type("Themes") if row.text] picture = self.info_by_type("Picture") cover_url = picture[0].attrs.get("src") if picture else None return Anime( id=f"ann:{self.id}", title=AnimeTitle(romaji=title, english=title, native=native), episodes=_parse_optional_int(self.first_info_text("Number of episodes")), studios=[credit.company.name for credit in self.credits if credit.company and credit.company.name], description=self.first_info_text("Plot Summary"), genres=genres, tags=themes, format=_normalise_format(self.type), aired_from=_parse_vintage_start(self.first_info_text("Vintage")), cover_image_url=cover_url, age_rating=self.first_info_text("Objectionable content"), title_synonyms=synonyms, ids={"ann": self.id}, source=self.source_tag or _default_src(), )
[docs] class AnnAnimeResponse(BackendRichModel): """ANN ``api.xml`` response containing anime entries and warnings.""" warnings: List[str] = [] anime: List[AnnAnime] = [] raw: AnnXmlNode source_tag: Optional[SourceTag] = None
[docs] class AnnReportItem(BackendRichModel): """One row from ``reports.xml``.""" fields: Dict[str, Any] = {} raw: AnnXmlNode source_tag: Optional[SourceTag] = None
[docs] class AnnReport(BackendRichModel): """ANN ``reports.xml`` response.""" attrs: Dict[str, str] = {} args: Dict[str, str] = {} items: List[AnnReportItem] = [] warnings: List[str] = [] raw: AnnXmlNode source_tag: Optional[SourceTag] = None
def _default_src() -> SourceTag: """Construct a fallback :class:`SourceTag` for direct model use.""" from datetime import datetime, timezone return SourceTag(backend="ann", fetched_at=datetime.now(timezone.utc)) def _parse_optional_int(value: Optional[str]) -> Optional[int]: if value is None: return None try: return int(value) except (TypeError, ValueError): return None def _parse_vintage_start(value: Optional[str]) -> Optional[date]: if not value: return None first = value.split(" to ", 1)[0].strip() for fmt in ("%Y-%m-%d", "%Y-%m", "%Y"): try: if fmt == "%Y": return date(int(first), 1, 1) if fmt == "%Y-%m": year, month = [int(part) for part in first.split("-", 1)] return date(year, month, 1) year, month, day = [int(part) for part in first.split("-", 2)] return date(year, month, day) except (TypeError, ValueError): continue return None def _normalise_format(value: Optional[str]) -> Optional[str]: if not value: return None norm = value.upper().replace(" ", "_") if norm in {"TV", "MOVIE", "OVA", "ONA", "SPECIAL", "MUSIC"}: return norm return None def _info_from_node(node: AnnXmlNode) -> AnnInfo: return AnnInfo(attrs=node.attrs, text=node.text, children=node.children) def _link_from_node(node: AnnXmlNode) -> AnnLink: return AnnLink(attrs=node.attrs, text=node.text) def _person_from_node(node: Optional[AnnXmlNode]) -> Optional[AnnPersonRef]: if node is None: return None return AnnPersonRef(id=node.attrs.get("id"), name=node.text) def _company_from_node(node: Optional[AnnXmlNode]) -> Optional[AnnCompanyRef]: if node is None: return None return AnnCompanyRef(id=node.attrs.get("id"), name=node.text) def _first(nodes: List[AnnXmlNode]) -> Optional[AnnXmlNode]: return nodes[0] if nodes else None
[docs] def anime_from_node(node: AnnXmlNode, source_tag: SourceTag) -> AnnAnime: """Build :class:`AnnAnime` from an adapted XML ``anime`` node.""" info = [_info_from_node(row) for row in node.by_tag("info")] staff = [ AnnStaff(attrs=row.attrs, task=row.first_text("task"), person=_person_from_node(_first(row.by_tag("person")))) for row in node.by_tag("staff") ] cast = [ AnnCast(attrs=row.attrs, role=row.first_text("role"), person=_person_from_node(_first(row.by_tag("person")))) for row in node.by_tag("cast") ] credits = [ AnnCredit( attrs=row.attrs, task=row.first_text("task"), company=_company_from_node(_first(row.by_tag("company"))), ) for row in node.by_tag("credit") ] episodes = [ AnnEpisode(attrs=row.attrs, titles=[_info_from_node(t) for t in row.by_tag("title")]) for row in node.by_tag("episode") ] relations = [ *(AnnRelation(direction="prev", attrs=row.attrs) for row in node.by_tag("related-prev")), *(AnnRelation(direction="next", attrs=row.attrs) for row in node.by_tag("related-next")), ] return AnnAnime.model_validate( { "id": node.attrs.get("id"), "gid": node.attrs.get("gid"), "type": node.attrs.get("type"), "name": node.attrs.get("name"), "precision": node.attrs.get("precision"), "generated_on": node.attrs.get("generated-on"), "info": info, "staff": staff, "cast": cast, "credits": credits, "episodes": episodes, "reviews": [_link_from_node(row) for row in node.by_tag("review")], "releases": [_link_from_node(row) for row in node.by_tag("release")], "news": [_link_from_node(row) for row in node.by_tag("news")], "relations": relations, "raw": node, "source_tag": source_tag, } )
[docs] def anime_response_from_root(root: AnnXmlNode, source_tag: SourceTag) -> AnnAnimeResponse: """Build an :class:`AnnAnimeResponse` from an ANN ``<ann>`` root.""" warnings = [row.text for row in root.by_tag("warning") if row.text] anime = [anime_from_node(row, source_tag) for row in root.by_tag("anime")] return AnnAnimeResponse(warnings=warnings, anime=anime, raw=root, source_tag=source_tag)
[docs] def report_from_root(root: AnnXmlNode, source_tag: SourceTag) -> AnnReport: """Build an :class:`AnnReport` from a ``<report>`` root.""" args = {} for args_node in root.by_tag("args"): for child in args_node.children: args[child.tag] = child.text or "" items = [] for item in root.by_tag("item"): fields = {child.tag: child.text for child in item.children} items.append(AnnReportItem(fields=fields, raw=item, source_tag=source_tag)) warnings = [row.text for row in root.by_tag("warning") if row.text] return AnnReport(attrs=root.attrs, args=args, items=items, warnings=warnings, raw=root, source_tag=source_tag)
[docs] def selftest() -> bool: """Smoke-test the ANN rich models and warning path.""" from datetime import datetime, timezone src = SourceTag(backend="ann", fetched_at=datetime.now(timezone.utc)) raw = AnnXmlNode.from_adapter( { "_tag": "ann", "_attrs": {}, "_children": [ {"_tag": "warning", "_attrs": {}, "_text": "no result", "_children": [], "_children_by_tag": {}} ], "_children_by_tag": { "warning": [ {"_tag": "warning", "_attrs": {}, "_text": "no result", "_children": [], "_children_by_tag": {}} ] }, } ) response = anime_response_from_root(raw, src) assert response.warnings == ["no result"] anime = AnnAnime( id="1", name="Angel Links", type="TV", info=[AnnInfo(attrs={"type": "Main title", "lang": "EN"}, text="Angel Links")], raw=raw, source_tag=src, ) assert anime.to_common().id == "ann:1" return True