Source code for tibiapy.forum

import datetime
import re
from typing import List, Optional

import bs4

from tibiapy import abc, errors, GuildMembership
from tibiapy.enums import ThreadStatus, Vocation
from tibiapy.utils import convert_line_breaks, get_tibia_url, parse_tibia_datetime, parse_tibia_forum_datetime, \
    parse_tibiacom_content, split_list, try_enum

__all__ = (
    'CMPost',
    'CMPostArchive',
    'ForumAnnouncement',
    'ForumBoard',
    'ForumEmoticon',
    'ForumPost',
    'ForumThread',
    'LastPost',
    'ListedAnnouncement',
    'ListedBoard',
    'ListedThread',
    'ForumAuthor',
)

section_id_regex = re.compile(r'sectionid=(\d+)')
board_id_regex = re.compile(r'boardid=(\d+)')
post_id_regex = re.compile(r'postid=(\d+)')
thread_id_regex = re.compile(r'threadid=(\d+)')
announcement_id_regex = re.compile(r'announcementid=(\d+)')
page_number_regex = re.compile(r'pagenumber=(\d+)')
timezone_regex = re.compile(r'times are (CEST?)')
filename_regex = re.compile(r'([\w_]+.gif)')
pages_regex = re.compile(r'\(Pages[^)]+\)')

author_info_regex = re.compile(r'Inhabitant of (\w+)\nVocation: ([\w\s]+)\nLevel: (\d+)')
author_posts_regex = re.compile(r'Posts: (\d+)')
guild_regexp = re.compile(r'([\s\w()]+)\sof the\s(.+)')
guild_title_regexp = re.compile(r'([^(]+)\s\(([^)]+)\)')
post_dates_regex = re.compile(r'(\d{2}\.\d{2}\.\d{4}\s\d{2}:\d{2}:\d{2})')
edited_by_regex = re.compile(r'Edited by (.*) on \d{2}')

signature_separator = "________________"


[docs]class CMPost(abc.BasePost, abc.Serializable): """Represents a CM Post entry. .. versionadded:: 3.0.0 Attributes ---------- post_id: :class:`int` The ID of the post. date: :class:`datetime.date` The date when the post was made. board: :class:`str` The name of the board where the post was made. thread_title: :class:`str` The title of the thread where the post is. """ __slots__ = ( "post_id", "date", "board", "thread_title", ) def __init__(self, **kwargs): self.post_id: int = kwargs.get("post_id") self.date: datetime.datetime = kwargs.get("date") self.board: str = kwargs.get("board") self.thread_title: str = kwargs.get("thread_title") def __repr__(self): return f"<{self.__class__.__name__} post_id={self.post_id} date={self.date!r} " \ f"thread_title={self.thread_title!r} board={self.board}>"
[docs]class CMPostArchive(abc.Serializable): """Represents the CM Post Archive. The CM Post Archive is a collection of posts made in the forum by community managers. .. versionadded:: 3.0.0 Attributes ---------- start_date: :class:`datetime.date` The start date of the displayed posts. end_date: :class:`datetime.date` The end date of the displayed posts. page: :class:`int` The currently displayed page. total_pages: :class:`int` The number of pages available. results_count: :class:`int` The total number of results available in the selected date range. posts: :class:`list` of :class:`CMPost` The list of posts for the selected range.""" __slots__ = ( "start_date", "end_date", "page", "total_pages", "results_count", "posts", ) def __init__(self, **kwargs): self.start_date: datetime.date = kwargs.get("start_date") self.end_date: datetime.date = kwargs.get("end_date") self.page: int = kwargs.get("page", 1) self.total_pages: int = kwargs.get("total_pages", 1) self.results_count: int = kwargs.get("results_count", 0) self.posts: List[CMPost] = kwargs.get("posts", []) def __repr__(self): return f"<{self.__class__.__name__} start_date={self.start_date!r} end_date={self.end_date!r} " \ f"result_count={self.results_count} page={self.page} total_pages={self.total_pages}>" # region Properties @property def url(self): """:class:`str`: The URL of the CM Post Archive with the current parameters.""" return self.get_url(self.start_date, self.end_date, self.page) @property def previous_page_url(self): """:class:`str`: The URL to the previous page of the current CM Post Archive results, if there's any.""" return self.get_page_url(self.page - 1) if self.page > 1 else None @property def next_page_url(self): """:class:`str`: The URL to the next page of the current CM Post Archive results, if there's any.""" return self.get_page_url(self.page + 1) if self.page < self.total_pages else None # endregion # region Public Methods
[docs] def get_page_url(self, page): """Gets the URL of the CM Post Archive at a specific page, with the current date parameters. Parameters ---------- page: :class:`int` The desired page. Returns ------- :class:`str` The URL to the desired page. """ if page <= 0: raise ValueError("page must be 1 or greater") return self.get_url(self.start_date, self.end_date, page)
[docs] @classmethod def get_url(cls, start_date, end_date, page=1): """Gets the URL to the CM Post Archive for the given date range. Parameters ---------- start_date: :class: `datetime.date` The start date to display. end_date: :class: `datetime.date` The end date to display. page: :class:`int` The desired page to display. Returns ------- :class:`str` The URL to the CM Post Archive Raises ------ TypeError: Either of the dates is not an instance of :class:`datetime.date` ValueError: If ``start_date`` is more recent than ``end_date``. """ if not isinstance(start_date, datetime.date): raise TypeError(f"start_date: expected datetime.date instance, {type(start_date)} found.") if not isinstance(end_date, datetime.date): raise TypeError(f"start_date: expected datetime.date instance, {type(start_date)} found.") if end_date < start_date: raise ValueError("start_date can't be more recent than end_date.") if page < 1: raise ValueError("page must be 1 or greater.") return get_tibia_url("forum", "forum", action="cm_post_archive", startday=start_date.day, startmonth=start_date.month, startyear=start_date.year, endday=end_date.day, endmonth=end_date.month, endyear=end_date.year, currentpage=page)
[docs] @classmethod def from_content(cls, content): """Parses the content of the CM Post Archive page from Tibia.com Parameters ---------- content: :class:`str` The HTML content of the CM Post Archive in Tibia.com Returns ------- :class:`CMPostArchive` The CM Post archive found in the page. Raises ------ InvalidContent If content is not the HTML content of the CM Post Archive in Tibia.com """ parsed_content = parse_tibiacom_content(content) form = parsed_content.find("form") try: start_month_selector, start_day_selector, start_year_selector, \ end_month_selector, end_day_selector, end_year_selector = form.find_all("select") start_date = cls._get_selected_date(start_month_selector, start_day_selector, start_year_selector) end_date = cls._get_selected_date(end_month_selector, end_day_selector, end_year_selector) except (AttributeError, ValueError) as e: raise errors.InvalidContent("content does not belong to the CM Post Archive in Tibia.com", e) cm_archive = cls(start_date=start_date, end_date=end_date) table = parsed_content.find("table", attrs={"class", "Table3"}) if not table: return cm_archive inner_table_container = table.find("div", attrs={"class", "InnerTableContainer"}) inner_table = inner_table_container.find("table") inner_table_rows = inner_table.find_all("tr") inner_table_rows = [e for e in inner_table_rows if e.parent == inner_table] table_content = inner_table_container.find("table", attrs={"class", "TableContent"}) header_row, *rows = table_content.find_all("tr") for row in rows: columns = row.find_all("td") date_column = columns[0] date = parse_tibia_datetime(date_column.text.replace("\xa0", " ")) board_thread_column = columns[1] convert_line_breaks(board_thread_column) board, thread = board_thread_column.text.splitlines() link_column = columns[2] post_link = link_column.find("a") post_link_url = post_link["href"] post_id = int(post_id_regex.search(post_link_url).group(1)) cm_archive.posts.append(CMPost(date=date, board=board, thread_title=thread, post_id=post_id)) if not cm_archive.posts: return cm_archive pages_column, results_column = inner_table_rows[-1].find_all("div") page_links = pages_column.find_all("a") listed_pages = [int(p.text) for p in page_links] if listed_pages: cm_archive.page = next((x for x in range(1, listed_pages[-1] + 1) if x not in listed_pages), 0) cm_archive.total_pages = max(int(page_links[-1].text), cm_archive.page) if not cm_archive.page: cm_archive.total_pages += 1 cm_archive.page = cm_archive.total_pages cm_archive.results_count = int(results_column.text.split(":")[-1]) return cm_archive
# endregion # region Private Methods @classmethod def _get_selected_date(cls, month_selector, day_selector, year_selector): """Gets the date made from the selected options in the selectors. Parameters ---------- month_selector: :class:`bs4.Tag` The month selector. day_selector: :class:`bs4.Tag` The day selector. year_selector: :class:`bs4.Tag` The year selector. Returns ------- :class:`datetime.date` The selected date. """ selected_month = month_selector.find("option", {"selected": True}) or month_selector.find("option") selected_day = day_selector.find("option", {"selected": True}) or day_selector.find("option") selected_year = year_selector.find("option", {"selected": True}) or year_selector.find("option") try: return datetime.date(year=int(selected_year["value"]), month=int(selected_month["value"]), day=int(selected_day["value"])) except ValueError: return None
# endregion
[docs]class ForumAnnouncement(abc.BaseAnnouncement, abc.Serializable): """Represent's a forum announcement. These are a special kind of thread that are shown at the top of boards. They cannot be replied to and they show no view counts. .. versionadded:: 3.0.0 Attributes ---------- announcement_id: :class:`int` The id of the announcement. board: :class:`str` The board this thread belongs to. section: :class:`str` The board section this thread belongs to. board_id: :class:`int` The internal id of the board the post is in. section_id: :class:`int` The internal id of the section the post is in. author: :class:`ForumAuthor` The author of the announcement. title: :class:`str` The title of the announcement. content: :class:`str` The HTML content of the announcement. start_date: :class:`datetime.datetime` The starting date of the announcement. end_date: :class:`datetime.datetime` The end date of the announcement. """ __slots__ = ( "announcement_id", "board", "board_id", "section", "section_id", "author", "title", "content", "start_date", "end_date", ) def __init__(self, **kwargs): self.title: str = kwargs.get("title") self.announcement_id: int = kwargs.get("announcement_id", 0) self.board: str = kwargs.get("board") self.board_id: int = kwargs.get("board_id", 0) self.section: str = kwargs.get("section") self.section_id: int = kwargs.get("section_id", 0) self.author: ForumAuthor = kwargs.get("author") self.content: str = kwargs.get("content") self.start_date: datetime.datetime = kwargs.get("start_date") self.end_date: datetime.datetime = kwargs.get("end_date") def __repr__(self): return f"<{self.__class__.__name__} title={self.title!r} board={self.announcement_id!r}>"
[docs] @classmethod def from_content(cls, content, announcement_id=0): """Parses the content of an announcement's page from Tibia.com Parameters ---------- content: :class:`str` The HTML content of an announcement in Tibia.com announcement_id: :class:`int` The id of the announcement. Since there is no way to obtain the id from the page, the id may be passed to assing. Returns ------- :class:`ForumAnnouncement` The announcement contained in the page or :obj:`None` if not found. Raises ------ InvalidContent If content is not the HTML content of an announcement page in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not be found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum announcement.") forum_info_table, posts_table, footer_table = root_tables section_link, board_link, *_ = forum_info_table.find_all("a") section = section_link.text section_id = int(section_id_regex.search(section_link["href"]).group(1)) board = board_link.text board_id = int(board_id_regex.search(board_link["href"]).group(1)) announcement = cls(section=section, section_id=section_id, board=board, board_id=board_id, announcement_id=announcement_id) timezone = timezone_regex.search(footer_table.text).group(1) offset = 1 if timezone == "CES" else 2 announcement_container = posts_table.find("td", attrs={"class": "CipPost"}) character_info_container = announcement_container.find("div", attrs={"class": "PostCharacterText"}) announcement.author = ForumAuthor._parse_author_table(character_info_container) post_container = posts_table.find("div", attrs={"class": "PostText"}) title_tag = post_container.find("b") announcement.title = title_tag.text dates_container = post_container.find("font") dates = post_dates_regex.findall(dates_container.text) announcement_content = post_container.encode_contents().decode() _, announcement_content = announcement_content.split("<hr/>", 1) announcement.content = announcement_content announcement.start_date, announcement.end_date = (parse_tibia_forum_datetime(date, offset) for date in dates) return announcement
[docs]class ForumAuthor(abc.BaseCharacter, abc.Serializable): """Represents a post's author. .. versionadded:: 3.0.0 Attributes ---------- name: :class:`str` The name of the character, author of the post. level: :class:`int` The level of the character. world: :class:`str` The world the character belongs to. position: :class:`str` The character's position, if any. title: :class:`str` The character's selected title, if any. vocation: :class:`Vocation` The vocation of the character. guild: :class:`GuildMembership` The guild the author belongs to, if any. posts: :class:`int` The number of posts this character has made. deleted: :class:`bool` Whether the author is deleted or not. """ __slots__ = ( "name", "level", "world", "vocation", "title", "position", "guild", "posts", "deleted", ) def __init__(self, name, **kwargs): self.name: str = name self.level: int = kwargs.get("level", 2) self.world: str = kwargs.get("world") self.vocation: Vocation = try_enum(Vocation, kwargs.get("vocation")) self.title: Optional[str] = kwargs.get("title") self.position: Optional[str] = kwargs.get("position") self.guild: Optional[GuildMembership] = kwargs.get("guild") self.posts: int = kwargs.get("posts", 0) self.deleted: bool = kwargs.get("deleted", False) def __repr__(self): return f"<{self.__class__.__name__} name={self.name!r} level={self.level} world={self.world!r} " \ f"vocation={self.vocation!r}>" @classmethod def _parse_author_table(cls, character_info_container): """Parses the table containing the author's information. Parameters ---------- character_info_container: :class:`bs4.Tag` The cotnainer with the character's information. Returns ------- :class:`ForumAuthor` The author's information. """ # First link belongs to character char_link = character_info_container.find("a") if not char_link: return ForumAuthor(name=character_info_container.text, deleted=True) author = cls(char_link.text) position_info = character_info_container.find("font", attrs={"class": "ff_smallinfo"}) # Position and titles are shown the same way. If we have two, the title is first and then the position. # However, if the character only has one of them, there's no way to know which is it unless we validate against # possible types if position_info and position_info.parent == character_info_container: convert_line_breaks(position_info) titles = [title for title in position_info.text.splitlines() if title] positions = ["Tutor", "Community Manager", "Customer Support", "Programmer", "Game Content Designer", "Tester"] for _title in titles: if _title in positions: author.position = _title else: author.title = _title char_info = character_info_container.find("font", attrs={"class": "ff_infotext"}) guild_info = char_info.find("font", attrs={"class": "ff_smallinfo"}) convert_line_breaks(char_info) char_info_text = char_info.text info_match = author_info_regex.search(char_info_text) if info_match: author.world = info_match.group(1) author.vocation = try_enum(Vocation, info_match.group(2)) author.level = int(info_match.group(3)) if guild_info: guild_match = guild_regexp.search(guild_info.text) guild_name = guild_match.group(2) title_match = guild_title_regexp.search(guild_name) title = None if title_match: guild_name = title_match.group(1) title = title_match.group(2) author.guild = GuildMembership(name=guild_name, rank=guild_match.group(1), title=title) author.posts = int(author_posts_regex.search(char_info_text).group(1)) return author
[docs]class ForumBoard(abc.BaseBoard, abc.Serializable): """Represents a forum's board. .. versionadded:: 3.0.0 Attributes ---------- name: :class:`str` The name of the board. section: :class:`str` The section of the board. current_page: :class:`int` The current page being viewed. pages: :class:`int` The number of total_pages the board has for the current display range. age: :class:`ìnt` The maximum age of the displayed threads, in days. -1 means all threads will be shown. announcements: list of :class:`ListedAnnouncement` The list of announcements currently visible. threads: list of :class:`ListedThread` The list of threads currently visible. """ def __init__(self, **kwargs): self.name: str = kwargs.get("name") self.section: str = kwargs.get("section") self.board_id: int = kwargs.get("board_id", 0) self.page: int = kwargs.get("page", 1) self.total_pages: int = kwargs.get("total_pages", 1) self.age: int = kwargs.get("age", 30) self.announcements: List[ListedAnnouncement] = kwargs.get("announcements", []) self.threads: List[ListedThread] = kwargs.get("threads", []) __slots__ = ( "name", "section", "board_id", "page", "total_pages", "age", "announcements", "threads", ) def __repr__(self): return f"<{self.__class__.__name__} name={self.name!r} section={self.section!r}>" # region Properties @property def url(self): """:class:`str`: The URL of this board.""" return self.get_url(self.board_id, self.page, self.age) @property def previous_page_url(self): """:class:`str`: The URL to the previous page of the board, if there's any.""" return self.get_page_url(self.page - 1) if self.page > 1 else None @property def next_page_url(self): """:class:`str`: The URL to the next page of the board, if there's any.""" return self.get_page_url(self.page + 1) if self.page < self.total_pages else None # endregion # region Public Methods
[docs] def get_page_url(self, page): """Gets the URL to a given page of the board. Parameters ---------- page: :class:`int` The desired page. Returns ------- :class:`str` The URL to the desired page. """ if page <= 0: raise ValueError("page must be 1 or greater") return self.get_url(self.board_id, page, self.age)
[docs] @classmethod def from_content(cls, content): """Parses the board's HTML content from Tibia.com. Parameters ---------- content: :class:`str` The HTML content of the board. Returns ------- :class:`ForumBoard` The forum board contained. Raises ------ InvalidContent` Content is not a board in Tibia.com """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") try: header_table, time_selector_table, threads_table, timezone_table, boardjump_table, *_ = tables except ValueError as e: raise errors.InvalidContent("content is not a forum board", e) header_text = header_table.text.strip() section, name = split_list(header_text, "|", "|") board = cls(name=name, section=section) thread_rows = threads_table.find_all("tr") age_selector = time_selector_table.find("select") if not age_selector: return cls(section=section, name=name) selected_age = age_selector.find("option", {"selected": True}) if selected_age: board.age = int(selected_age["value"]) board_selector = boardjump_table.find("select") selected_board = board_selector.find("option", {"selected": True}) board.board_id = int(selected_board["value"]) page_info = threads_table.find("td", attrs={"class": "ff_info"}) if page_info: current_page_text = page_info.find("span") page_links = page_info.find_all("a") if current_page_text: board.page = int(current_page_text.text) board.total_pages = max(board.page, int(page_number_regex.search(page_links[-1]["href"]).group(1))) for thread_row in thread_rows[1:]: columns = thread_row.find_all("td") if len(columns) != 7: continue entry = cls._parse_thread_row(columns) if isinstance(entry, ListedThread): board.threads.append(entry) cip_border = thread_row.find("div", attrs={"class": "CipBorder"}) if cip_border: entry.golden_frame = True elif isinstance(entry, ListedAnnouncement): board.announcements.append(entry) return board
# endregion # region Private Methods @classmethod def _parse_thread_row(cls, columns): """Parses the thread row, containing a single thread or announcement. Parameters ---------- columns: :class:`bs4.ResultSet` The list of columns the thread contains. Returns ------- :class:`ListedThread` or :class:`ListedAnnouncement` """ # First Column: Thread's status status = None status_column = columns[0] status_img = status_column.find("img") status_icon = None if status_img: url = status_img["src"] filename = filename_regex.search(url).group(1) status_icon = url status = ThreadStatus.from_icon(filename) # Second column: Thread's emoticon emoticon = None emoticon_column = columns[1] emoticon_img = emoticon_column.find("img") if emoticon_img and emoticon_img.get("alt"): url = emoticon_img["src"] name = emoticon_img["alt"] emoticon = ForumEmoticon(name, url) # Third Column: Thread's title and number of total_pages pages = 1 thread_column = columns[2] title = thread_column.text.strip() try: thread_link, *page_links = thread_column.find_all("a") except ValueError: return if page_links: last_page_link = page_links[-1] pages = int(page_number_regex.search(last_page_link["href"]).group(1)) title = pages_regex.sub("", title).strip() thread_id_match = thread_id_regex.search(thread_link["href"]) # Fourth Column: Thread startert thread_starter_column = columns[3] thread_starter = thread_starter_column.text.strip() if thread_id_match: thread_id = int(thread_id_match.group(1)) # Fifth Column: Number of replies replies_column = columns[4] replies = int(replies_column.text) # Sixth Column: Number of views views_column = columns[5] views = int(views_column.text) # Seventh Column: Last post information last_post_column = columns[6] last_post = LastPost._parse_column(last_post_column) entry = ListedThread(title=title, thread_id=thread_id, thread_starter=thread_starter, replies=replies, views=views, last_post=last_post, emoticon=emoticon, status=status, pages=pages, status_icon=status_icon) else: title = title.replace("Announcement: ", "") announcement_id = int(announcement_id_regex.search(thread_link["href"]).group(1)) entry = ListedAnnouncement(title=title, announcement_id=announcement_id, announcement_author=thread_starter) return entry
# endregion
[docs]class ForumEmoticon(abc.Serializable): """Represents a forum's emoticon. .. versionadded:: 3.0.0 Attributes ---------- name: :class:`str` The emoticon's name. url: :class:`str` The URL to the emoticon`s image. """ __slots__ = ( "name", "url", ) def __init__(self, name, url): self.name = name self.url = url def __repr__(self): return f"<{self.__class__.__name__} name={self.name!r} url={self.url!r}>"
[docs]class ForumPost(abc.BasePost, abc.Serializable): """Represents a forum post. .. versionadded:: 3.0.0 Attributes ---------- author: :class:`ForumAuthor` The author of the post. emoticon: :class:`ForumEmoticon` The emoticon selected for the post. title: :class:`str`, optional The title of the post. content: :class:`str` The content of the post. signature: :class:`str` The signature of the post. post_id: :class:`int` The id of the post. posted_date: :class:`datetime.datetime` The date when the post was made. edited_date: :class:`datetime.datetime`, optional The date when the post was last edited, if applicable. edited_by: :class:`str`, optional The character that edited the post. This is usually the same author, but in some occasions staff members edit the posts of others. """ __slots__ = ( "author", "emoticon", "title", "signature", "emoticon", "post_id", "posted_date", "edited_date", "edited_by", "golden_frame", "content", ) def __init__(self, **kwargs): self.author = kwargs.get("author") self.emoticon = kwargs.get("emoticon") self.title = kwargs.get("title") self.content = kwargs.get("content") self.signature = kwargs.get("signature") self.emoticon = kwargs.get("emoticon") self.post_id = kwargs.get("post_id") self.golden_frame = kwargs.get("golden_frame") self.posted_date = kwargs.get("posted_date") self.edited_date = kwargs.get("edited_date") self.edited_by = kwargs.get("edited_by") def __repr__(self): return f"<{self.__class__.__name__} title={self.title!r} post_id={self.post_id}>"
[docs]class ForumThread(abc.BaseThread, abc.Serializable): """Represents a forum thread. .. versionadded:: 3.0.0 Attributes ---------- title: :class:`str` The title of the thread. thread_id: :class:`int` The thread's number. board: :class:`str` The board this thread belongs to. section: :class:`str` The board section this thread belongs to. previous_topic_number: :class:`int` The number of the previous topic. next_topic_number: :class:`int` The number of the next topic. pages: :class:`int` The number of total_pages this thread has. current_page: :class:`int` The page being viewed. posts: list of :class:`ForumPost` The list of posts the thread has. golden_frame: :class:`bool` Whether the thread has a golden frame or not. In the Proposals board,a golden frame means the thread has a reply by a staff member. anchored_post: :class:`ForumPost` The post where the page is anchored to, if any. When a post is fetched directly, the thread that contains it is displayed, anchored to the specific post. """ __slots__ = ( "title", "thread_id", "board", "section", "previous_topic_number", "next_topic_number", "page", "total_pages", "golden_frame", "anchored_post", "posts", ) def __init__(self, **kwargs): self.title: str = kwargs.get("title") self.thread_id: int = kwargs.get("thread_id", 0) self.board: str = kwargs.get("board") self.section: str = kwargs.get("section") self.previous_topic_number: int = kwargs.get("previous_topic_number", 0) self.next_topic_number: int = kwargs.get("next_topic_number", 0) self.page: int = kwargs.get("page", 1) self.total_pages: int = kwargs.get("total_pages", 1) self.posts: List[ForumPost] = kwargs.get("posts", []) self.golden_frame: bool = kwargs.get("golden_frame", False) self.anchored_post: Optional[ForumPost] = None def __repr__(self): return f"<{self.__class__.__name__} title={self.title!r} board={self.board!r} section={self.section!r}>" # region Properties @property def url(self): """:class:`str`: The URL of this thread and current page.""" return self.get_url(self.thread_id, self.page) @property def previous_page_url(self): """:class:`str`: The URL to the previous page of the thread, if there's any.""" return self.get_page_url(self.page - 1) if self.page > 1 else None @property def next_page_url(self): """:class:`str`: The URL to the next page of the thread, if there's any.""" return self.get_page_url(self.page + 1) if self.page < self.total_pages else None @property def previous_thread_url(self): """:class:`str`: The URL to the previous topic of the board, if there's any.""" return self.get_url(self.previous_topic_number) if self.previous_topic_number else None @property def next_thread_url(self): """:class:`str`: The URL to the next topic of the board, if there's any.""" return self.get_url(self.next_topic_number) if self.next_topic_number else None # endregion # region Public Methods
[docs] def get_page_url(self, page): """Gets the URL to a given page of the board. Parameters ---------- page: :class:`int` The desired page. Returns ------- :class:`str` The URL to the desired page. """ if page <= 0: raise ValueError("page must be 1 or greater") return self.get_url(self.thread_id, page)
[docs] @classmethod def from_content(cls, content): """Creates an instance of the class from the html content of the thread's page. Parameters ---------- content: :class:`str` The HTML content of the page. Returns ------- :class:`ForumThread` The thread contained in the page, or None if the thread doesn't exist Raises ------ InvalidContent If content is not the HTML of a thread's page. """ parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table") root_tables = [t for t in tables if "BoxContent" in t.parent.attrs.get("class", [])] if not root_tables: error_table = parsed_content.find("table", attrs={"class": "Table1"}) if error_table and "not found" in error_table.text: return None raise errors.InvalidContent("content is not a Tibia.com forum thread.") try: if len(root_tables) == 4: forum_info_table, title_table, posts_table, footer_table = root_tables else: forum_info_table, title_table, footer_table = root_tables posts_table = None except ValueError as e: raise errors.InvalidContent("content is not a Tibia.com forum thread.", e) header_text = forum_info_table.text section, board, *_ = split_list(header_text, "|", "|") thread = cls(section=section, board=board) thread.title = title_table.text.strip() golden_frame = title_table.find("div", attrs={"class": "CipPost"}) thread.golden_frame = golden_frame is not None timezone = timezone_regex.search(footer_table.text).group(1) time_page_column, navigation_column = footer_table.find_all("td", attrs={"class", "ff_white"}) page_links = time_page_column.find_all("a") if page_links: last_link = page_links[-1]["href"] thread.page = int(footer_table.find("span").text) thread.total_pages = max(int(page_number_regex.search(last_link).group(1)), thread.page) navigation_links = navigation_column.find_all("a") if len(navigation_links) == 2: prev_link, next_link = navigation_links prev_link_url = prev_link["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) next_link_url = next_link["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) elif "Previous" in navigation_links[0].text: prev_link_url = navigation_links[0]["href"] thread.previous_topic_number = int(thread_id_regex.search(prev_link_url).group(1)) else: next_link_url = navigation_links[0]["href"] thread.next_topic_number = int(thread_id_regex.search(next_link_url).group(1)) offset = 1 if timezone == "CES" else 2 if posts_table: thread_info_table, *post_tables = posts_table.find_all("div", attrs={"class": "ForumPost"}) inner_info_table = thread_info_table.find("table") thread_num_col, thread_pages_col, thread_navigation_col = inner_info_table.find_all("td") thread.thread_id = int(thread_num_col.text.replace("Thread #", "")) for post_table in post_tables: post = cls._parse_post_table(post_table, offset) thread.posts.append(post) return thread
# endregion # region Private Methods @classmethod def _parse_post_table(cls, post_table, offset=1): """Parses the table containing a single posts, extracting its information. Parameters ---------- post_table: :class:`bs4.Tag` The parsed HTML content of the table. offset: :class:`int` The UTC offset used for the timestamps. Since the timestamps found in the post contain no timezone information, the offset is extracted from another section and passed here to adjust them accordingly. Returns ------- :class:`ForumPost` The post contained in the table. """ golden_frame = post_table.find("div", attrs={"class": "CipBorderTop"}) character_info_container = post_table.find("div", attrs={"class": "PostCharacterText"}) post_author = ForumAuthor._parse_author_table(character_info_container) content_container = post_table.find("div", attrs={"class": "PostText"}) content = content_container.encode_contents().decode() title = None signature = None if signature_separator in content: content, _ = content.split(signature_separator) title_raw, content = content.split("<br/><br/>", 1) emoticon = None if title_raw: title_html = bs4.BeautifulSoup(title_raw, 'lxml') emoticon_img = title_html.find("img") if emoticon_img: emoticon = ForumEmoticon(emoticon_img["alt"], emoticon_img["src"]) title_tag = title_html.find("b") if title_tag: title = title_tag.text signature_container = post_table.find("td", attrs={"class": "ff_pagetext"}) if signature_container: signature = signature_container.encode_contents().decode() post_details = post_table.find('div', attrs={"class": "PostDetails"}) dates = post_dates_regex.findall(post_details.text) edited_date = None edited_by = None posted_date = parse_tibia_forum_datetime(dates[0], offset) if len(dates) > 1: edited_date = parse_tibia_forum_datetime(dates[1], offset) edited_by = edited_by_regex.search(post_details.text).group(1) post_details = post_table.find('div', attrs={"class": "AdditionalBox"}) post_number = post_details.text.replace("Post #", "") post_id = int(post_number) post = ForumPost(author=post_author, content=content, signature=signature, posted_date=posted_date, edited_date=edited_date, edited_by=edited_by, post_id=post_id, title=title, emoticon=emoticon, golden_frame=golden_frame is not None) return post
# endregion
[docs]class LastPost(abc.BasePost, abc.Serializable): """Represents a forum thread. .. versionadded:: 3.0.0 Attributes ---------- author: :class:`str` The name of the character that made the last post. post_id: :class:`int` The internal id of the post. date: :class:`datetime.datetime` The date when the last post was made. deleted: :class:`bool` Whether the last post's author is a character that is already deleted. """ def __init__(self, author, post_id, date, *, deleted=False): self.author: str = author self.post_id: int = post_id self.date: datetime.datetime = date self.deleted: bool = deleted __slots__ = ( "author", "post_id", "date", "deleted", ) def __repr__(self): return f"<{self.__class__.__name__} author={self.author!r} post_id={self.post_id} date={self.date!r}>" @property def author_url(self): """:class:`str`: The URL to the author's character information page.""" return abc.BaseCharacter.get_url(self.author) @classmethod def _parse_column(cls, last_post_column, offset=1): """Parses the column containing the last post information and extracts its data. Parameters ---------- last_post_column: :class:`bs4.Tag`: The column containing the last post. offset: :class:`int` Since the timestamps have no offset information, it may be passed to fill it out. Returns ------- Optional[:class:`LastPost`]: The last post described in the column, if any. """ last_post_info = last_post_column.find("div", attrs={"class": "LastPostInfo"}) if last_post_info is None: return None permalink = last_post_info.find("a") link = permalink['href'] post_id = int(post_id_regex.search(link).group(1)) date_text = last_post_info.text.replace("\xa0", " ").strip() last_post_date = parse_tibia_forum_datetime(date_text, offset) last_post_author_tag = last_post_column.find("font") author_link = last_post_author_tag.find("a") deleted = author_link is None author = last_post_author_tag.text.replace("by", "", 1).replace("\xa0", " ").strip() return cls(author, post_id, last_post_date, deleted=deleted)
[docs]class ListedAnnouncement(abc.BaseAnnouncement, abc.Serializable): """Represents an announcement in the forum boards. .. versionadded:: 3.0.0 Attributes ---------- title: :class:`str` The title of the announcement. announcement_id: :class:`int` The internal id of the announcement. announcement_author: :class:`str` The character that made the announcement. """ def __init__(self, **kwargs): self.title = kwargs.get("title") self.announcement_id = kwargs.get("announcement_id") self.announcement_author = kwargs.get("announcement_author") __slots__ = ( "title", "announcement_id", "announcement_author", ) def __repr__(self): return "<{0.__class__.__name__} title={0.title!r} announcement_id={0.announcement_id} " \ "announcement_author={0.announcement_author!r}>".format(self)
[docs]class ListedBoard(abc.BaseBoard, abc.Serializable): """Represents a board in the list of boards. This is the board information available when viewing a section (e.g. World, Trade, Community) .. versionadded:: 3.0.0 Attributes ---------- name: :class:`str` The name of the board. board_id: :class:`int` The board's internal id. description: :class:`str` The description of the board. posts: :class:`int` The number of posts in this board. threads: :class:`int` The number of threads in this board. last_post: :class:`LastPost` The information of the last post made in this board. """ def __init__(self, **kwargs): self.name: str = kwargs.get("name") self.board_id: int = kwargs.get("board_id") self.description: str = kwargs.get("description") self.posts: int = kwargs.get("posts") self.threads: int = kwargs.get("threads") self.last_post: Optional[LastPost] = kwargs.get("last_post") __slots__ = ( "name", "board_id", "description", "posts", "threads", "last_post", ) def __repr__(self): return f"<{self.__class__.__name__} name={self.name!r} board_id={self.board_id} posts={self.posts} " \ f"threads={self.threads} description={self.description!r}>" # region Public Methods
[docs] @classmethod def list_from_content(cls, content): """Parses the content of a board list Tibia.com into a list of boards. Parameters ---------- content: :class:`str` The raw HTML response from the board list. Returns ------- :class:`list` of :class:`ListedBoard` Raises ------ InvalidContent` Content is not a board list in Tibia.com """ try: parsed_content = parse_tibiacom_content(content) tables = parsed_content.find_all("table", attrs={"width": "100%"}) _, board_list_table, timezone_table = tables _, *board_rows = board_list_table.find_all("tr") timezone_text = timezone_table.text timezone = timezone_regex.search(timezone_text).group(1) offset = 1 if timezone == "CES" else 2 boards = [] for board_row in board_rows[:-3]: try: board = cls._parse_board_row(board_row, offset) except IndexError: continue else: boards.append(board) return boards except ValueError as e: raise errors.InvalidContent("content does not belong to a forum section.", e)
# endregion # region Private Methods @classmethod def _parse_board_row(cls, board_row, offset=1): """Parses a row containing a board and extracts its information. Parameters ---------- board_row: :class:`bs4.Tag` The row's parsed content. offset: :class:`int` Since the displayed dates do not contain information, it is neccessary to extract the used timezone from somewhere else and pass it to this method to adjust them accordingly. Returns ------- :class:`ListedBoard` The board contained in this row. """ columns = board_row.find_all("td") # Second Column: Name and description name_column = columns[1] board_link_tag = name_column.find("a") description_tag = name_column.find("font") description = description_tag.text name = board_link_tag.text link = board_link_tag['href'] board_id = int(board_id_regex.search(link).group(1)) # Third Column: Post count posts_column = columns[2] posts = int(posts_column.text) # Fourth Column: View count threads_column = columns[3] threads = int(threads_column.text) # Fifth Column: Last post information last_post_column = columns[4] last_post = LastPost._parse_column(last_post_column, offset) return cls(name=name, board_id=board_id, description=description, posts=posts, threads=threads, last_post=last_post)
# endregion
[docs]class ListedThread(abc.BaseThread, abc.Serializable): """Represents a thread in a forum board. .. versionadded:: 3.0.0 Attributes ---------- title: :class:`str` The title of the thread. thread_id: :class:`int` The internal id of the thread. thread_started: :class:`str` The character that started the thread. replies: :class:`int` The number of replies. views: :class:`int` The number of views. last_post: :class:`LastPost` The information of the last post made in this board. status: :class:`ThreadStatus` The status of the thread. status_icon: :class:`str` The URL of the icon displayed as status. emoticon: :class:`ForumEmoticon` The emoticon used for the thread. pages: :class:`int` The number of total_pages the thread has. golden_frame: :class:`bool` Whether the thread has a gold frame or not. In the Proposals board, the gold frame indicates that a staff member has replied in the thread. """ def __init__(self, **kwargs): self.title = kwargs.get("title") self.thread_id = kwargs.get("thread_id") self.thread_starter = kwargs.get("thread_starter") self.replies = kwargs.get("replies") self.views = kwargs.get("views") self.last_post = kwargs.get("last_post") self.status = kwargs.get("status") self.status_icon = kwargs.get("status_icon") self.icon = kwargs.get("icon") self.emoticon = kwargs.get("emoticon") self.pages = kwargs.get("total_pages", 1) self.golden_frame = kwargs.get("golden_frame", False) __slots__ = ( "title", "thread_id", "thread_starter", "replies", "views", "last_post", "status", "status_icon", "emoticon", "pages", "golden_frame", ) def __repr__(self): return f"<{self.__class__.__name__} title={self.title!r} thread_id={self.thread_id} " \ f"thread_starter={self.thread_starter!r} replies={self.replies} views={self.views}>"