Source code for tibiapy.parsers.character

"""Models related to the Tibia.com character page."""
from __future__ import annotations

import logging
import re
from collections import OrderedDict
from typing import TYPE_CHECKING, Callable, Optional

from tibiapy.builders import CharacterBuilder
from tibiapy.enums import Sex, Vocation
from tibiapy.errors import InvalidContentError
from tibiapy.models import (
    AccountBadge,
    AccountInformation,
    Achievement,
    Character,
    CharacterHouse,
    Death,
    DeathParticipant,
    GuildMembership,
    OtherCharacter,
)
from tibiapy.utils import (
    clean_text,
    get_rows,
    parse_integer,
    parse_link_info,
    parse_popup,
    parse_tibia_date,
    parse_tibia_datetime,
    parse_tibiacom_content,
    split_list,
    try_enum,
)

if TYPE_CHECKING:
    import bs4

# Extracts the scheduled deletion date of a character."""
deleted_regexp = re.compile(r"([^,]+), will be deleted at (.*)")
# Extracts the death's level and killers.
death_regexp = re.compile(r"Level (?P<level>\d+) by (?P<killers>.*)\.</td>")
# From the killers list, filters out the assists.
death_assisted = re.compile(r"(?P<killers>.+)\.<br/>Assisted by (?P<assists>.+)")
# From a killer entry, extracts the summoned creature
death_summon = re.compile(r"(?P<summon>an? .+) of (?P<name>[^<]+)")
link_search = re.compile(r"<a[^>]+>[^<]+</a>")
# Extracts the contents of a tag
link_content = re.compile(r">([^<]+)<")

house_regexp = re.compile(r"paid until (.*)")

title_regexp = re.compile(r"(.*)\((\d+) titles? unlocked\)")
badge_popup_regexp = re.compile(r"\$\(this\),\s+'([^']+)',\s+'([^']+)',")

traded_label = "(traded)"

__all__ = (
    "CharacterParser",
)

logger = logging.getLogger(__name__)


[docs] class CharacterParser: """A parser for characters from Tibia.com."""
[docs] @classmethod def from_content(cls, content: str) -> Optional[Character]: """Create an instance of the class from the html content of the character's page. Parameters ---------- content: The HTML content of the page. Returns ------- The character contained in the page, or None if the character doesn't exist Raises ------ InvalidContent If content is not the HTML of a character's page. """ parsed_content = parse_tibiacom_content(content) tables = cls._parse_tables(parsed_content) builder = CharacterBuilder() if not tables: messsage_table = parsed_content.select_one("div.TableContainer") if messsage_table and "Could not find character" in messsage_table.text: return None table_parsers = { "Character Information": lambda t: cls._parse_character_information(builder, t), "Account Badges": lambda t: cls._parse_account_badges(builder, t), "Account Achievements": lambda t: cls._parse_achievements(builder, t), "Account Information": lambda t: cls._parse_account_information(builder, t), "Character Deaths": lambda t: cls._parse_deaths(builder, t), "Characters": lambda t: cls._parse_other_characters(builder, t), } if "Character Information" not in tables: raise InvalidContentError("content does not contain a tibia.com character information page.") for title, table in tables.items(): if title in table_parsers: action = table_parsers[title] action(table) return builder.build()
@classmethod def _parse_account_information(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's account information.""" acc_info = {} for row in rows: cols_raw = row.select("td") cols = [ele.text.strip() for ele in cols_raw] field, value = cols field = clean_text(field).replace(" ", "_").replace(":", "").lower() value = clean_text(value) acc_info[field] = value created = parse_tibia_datetime(acc_info["created"]) loyalty_title = None if acc_info["loyalty_title"] == "(no title)" else acc_info["loyalty_title"] position = acc_info.get("position") builder.account_information(AccountInformation(created=created, loyalty_title=loyalty_title, position=position)) @classmethod def _parse_achievements(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's displayed achievements.""" for row in rows: cols = row.select("td") if len(cols) != 2: continue field, value = cols grade = str(field).count("achievement-grade-symbol") name = value.text.strip() secret_image = value.select_one("img") secret = secret_image is not None builder.add_achievement(Achievement(name=name, grade=grade, is_secret=secret)) @classmethod def _parse_account_badges(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's displayed badges.""" row = rows[0] columns = row.select("td > span") for column in columns: popup_span = column.select_one("span.HelperDivIndicator") popup = parse_popup(popup_span["onmouseover"]) name = popup[0] description = popup[1].text icon_image = column.select_one("img") icon_url = icon_image["src"] builder.add_account_badge(AccountBadge(name=name, icon_url=icon_url, description=description)) @classmethod def _parse_character_information(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's basic information and applies the found values.""" field_actions: dict[str, Callable[[bs4.Tag, str], None]] = { "name": lambda rv, v: cls._parse_name_field(builder, v), "title": lambda rv, v: cls._parse_titles(builder, v), "former names": lambda rv, v: builder.former_names([fn.strip() for fn in v.split(",")]), "former world": lambda rv, v: builder.former_world(v), "sex": lambda rv, v: builder.sex(try_enum(Sex, v)), "vocation": lambda rv, v: builder.vocation(try_enum(Vocation, v)), "level": lambda rv, v: builder.level(parse_integer(v)), "achievement points": lambda rv, v: builder.achievement_points(parse_integer(v)), "world": lambda rv, v: builder.world(v), "residence": lambda rv, v: builder.residence(v), "last login": lambda rv, v: builder.last_login(None) if "never logged" in v.lower() else builder.last_login( parse_tibia_datetime(v), ), "position": lambda rv, v: builder.position(v), "comment": lambda rv, v: builder.comment(v), "account status": lambda rv, v: builder.is_premium("premium" in v.lower()), "married to": lambda rv, v: builder.married_to(v), "house": lambda rv, v: cls._parse_house_column(builder, rv), "guild membership": lambda rv, v: cls._parse_guild_column(builder, rv), } for row in rows: raw_field, raw_value = row.select("td") field, value = clean_text(raw_field), clean_text(raw_value) field = field.replace(":", "").lower() if field in field_actions: action = field_actions[field] action(raw_value, value) else: logger.debug("Unhandled character information field found: %s", field) @classmethod def _parse_name_field(cls, builder: CharacterBuilder, value: str) -> None: if m := deleted_regexp.match(value): value = m.group(1) builder.name(value) builder.deletion_date(parse_tibia_datetime(m.group(2))) else: builder.name(value) if traded_label in value: builder.name(value.replace(traded_label, "").strip()) builder.traded(True) @classmethod def _parse_titles(cls, builder: CharacterBuilder, value: str) -> None: if m := title_regexp.match(value): name = m.group(1).strip() unlocked = int(m.group(2)) if name == "None": name = None builder.title(name) builder.unlocked_titles(unlocked) @classmethod def _parse_house_column(cls, builder: CharacterBuilder, column: bs4.Tag) -> None: house_text = clean_text(column) m = house_regexp.search(house_text) paid_until = m.group(1) paid_until_date = parse_tibia_date(paid_until) house_link_tag = column.select_one("a") house_link = parse_link_info(house_link_tag) builder.add_house( CharacterHouse( id=int(house_link["query"]["houseid"]), name=house_link["text"], town=house_link["query"]["town"], paid_until=paid_until_date, world=house_link["query"]["world"], ), ) @classmethod def _parse_guild_column(cls, builder: CharacterBuilder, column: bs4.Tag) -> None: guild_link = column.select_one("a") value = clean_text(column) rank = value.split("of the")[0] builder.guild_membership(GuildMembership(name=clean_text(guild_link), rank=rank.strip())) @classmethod def _parse_deaths(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's recent deaths.""" for row in rows: cols = row.select("td") if len(cols) != 2: builder.deaths_truncated(True) break date_column, desc_column = cols death_time = parse_tibia_datetime(date_column.text) if not (death_info := death_regexp.search(str(desc_column))): continue level = int(death_info.group("level")) killers_desc = death_info.group("killers") assists_name_list = [] # Check if the killers list contains assists if assist_match := death_assisted.search(killers_desc): # Filter out assists killers_desc = assist_match.group("killers") # Split assists into a list. assists_desc = assist_match.group("assists") assists_name_list = link_search.findall(assists_desc) killers_name_list = split_list(killers_desc) killers_list = [cls._parse_participant(k) for k in killers_name_list] assists_list = [cls._parse_participant(k) for k in assists_name_list] builder.add_death(Death( level=level, killers=killers_list, assists=assists_list, time=death_time, )) @classmethod def _parse_participant(cls, killer: str) -> DeathParticipant: """Parse a participant's information from their raw HTML string.""" # If the killer contains a link, it is a player. name = clean_text(killer) player = False traded = False summon = None if traded_label in killer: name = clean_text(killer).replace(traded_label, "").strip() traded = True player = True if "href" in killer: m = link_content.search(killer) name = clean_text(m.group(1)) player = True # Check if it contains a summon. if m := death_summon.search(name): summon = clean_text(m.group("summon")) name = clean_text(m.group("name")) return DeathParticipant(name=name, is_player=player, summon=summon, is_traded=traded) @classmethod def _parse_other_characters(cls, builder: CharacterBuilder, rows: list[bs4.Tag]) -> None: """Parse the character's other visible characters.""" for row in rows[1:]: cols_raw = row.select("td") cols = [ele.text.strip() for ele in cols_raw] if len(cols) != 4: continue name, world, status, *__ = cols _, *name = clean_text(name).split(" ") name = " ".join(name) traded = False if traded_label in name: name = name.replace(traded_label, "").strip() traded = True main_img = cols_raw[0].select_one("img") main = False if main_img and main_img["title"] == "Main Character": main = True position = None if "CipSoft Member" in status: position = "CipSoft Member" builder.add_other_character(OtherCharacter( name=name, world=world, is_online="online" in status, is_deleted="deleted" in status, is_main=main, position=position, is_traded=traded, )) @classmethod def _parse_tables(cls, parsed_content: bs4.BeautifulSoup) -> dict[str, list[bs4.Tag]]: """Parse the tables contained in a character's page and returns a mapping of their titles and rows.""" tables = parsed_content.select('table[width="100%"]') output = OrderedDict() for table in tables: if container := table.find_parent("div", {"class": "TableContainer"}): caption_container = container.select_one("div.CaptionContainer") title = caption_container.text.strip() offset = 0 else: title = table.select_one("td").text.strip() offset = 1 output[title] = get_rows(table)[offset:] return output