"""These are functions used thorough the module that may not be intended for public use."""
import datetime
import functools
import itertools
import re
import urllib.parse
import warnings
from collections import OrderedDict
from typing import Dict, Optional, Tuple, Type, TypeVar, Union
import bs4
TIBIA_CASH_PATTERN = re.compile(r'(\d*\.?\d*)\s?k*$')
[docs]def convert_line_breaks(element):
"""Convert the <br> tags in a HTML elements to actual line breaks.
Parameters
----------
element: :class:`bs4.Tag`
A BeautifulSoup object.
"""
for br in element.find_all("br"):
br.replace_with("\n")
[docs]def get_tibia_url(section, subtopic=None, *args, anchor=None, test=False, **kwargs):
"""Build a URL to Tibia.com with the given parameters.
Parameters
----------
section: :class:`str`
The desired section (e.g. community, abouttibia, manual, library)
subtopic: :class:`str`, optional
The desired subtopic (e.g. characters, guilds, houses, etc)
anchor: :class:`str`
A link anchor to add to the link.
args:
A list of key-value pairs to add as query parameters.
This allows passing multiple parameters with the same name.
kwargs:
Additional parameters to pass to the url as query parameters (e.g name, world, houseid, etc)
test: :class:`bool`
Whether to use the test website or not.
Returns
-------
:class:`str`
The generated Tibia.com URL.
Examples
--------
>>> get_tibia_url("community", "houses", page="view", houseid=55302, world="Gladera")
https://www.tibia.com/community/?subtopic=houses&page=view&houseid=55302&world=Gladera
You can also build a dictionary and pass it like:
>>> params = {'world': "Gladera"}
>>> get_tibia_url("community", "worlds", **params)
https://www.tibia.com/community/?subtopic=worlds&world=Gladera
"""
base_url = "www.tibia.com" if not test else "www.test.tibia.com"
url = f"https://{base_url}/{section}/?"
params = OrderedDict(subtopic=subtopic) if subtopic else OrderedDict()
if kwargs:
for key, value in kwargs.items():
if isinstance(value, str):
value = value.encode('iso-8859-1')
if value is None:
continue
params[key] = value
url += urllib.parse.urlencode(params)
if args:
url += "&"
url += urllib.parse.urlencode(args)
if anchor:
url += f"#{anchor}"
return url
[docs]def parse_integer(number: str, default: Optional[int] = 0):
"""Parse a string representing an integer, ignoring commas or periods.
Parameters
----------
number: :class:`str`
A string representing a number.
default: :class:`int`
The default value to use if the string is not numeric.
By default, 0 is used.
Returns
-------
:class:`int`
The represented integer, or the default value if invalid.
"""
if number is None:
return default
try:
number = re.sub(r'[,.]', '', number.strip())
return int(number)
except ValueError:
return default
[docs]def parse_link_info(link_tag):
"""Parse the information of a link tag.
It will parse the link's content, target URL as well as the query parameters where applicable.
Parameters
----------
link_tag: :class:`bs4.Tag`
The link tag object.
Returns
-------
:class:`dict`:
A dictionary containing the link's data.
Examples
--------
>>> # <a href="https://www.tibia.com/community/?subtopic=houses&page=view&houseid=55302&world=Gladera">House</>
>>> parse_link_info(link_tag)
{
"text": "House",
"url": "https://www.tibia.com/community/?subtopic=houses&page=view&houseid=55302&world=Gladera"
"query": {
"subtopic": "houses",
"page": "view",
"houseid": "55302",
"world": "Gladera"
}
}
When parsing links that have multiple query parameters, they are displayed as a list.
Empty parameters are omitted.
>>> # <a href="https://example.com/?world=&beprotection=-1&worldtypes[]=0&worldtypes[]=3">Link</a>
>>> parse_link_info(link_tag)
{
"text": "Link",
"url": "https://example.com/?world=&beprotection=-1&worldtypes[]=0&worldtypes[]=3"
"query": {
"beprotection": "-1",
"worldtypes": ["0", "3"]
}
}
"""
url = link_tag["href"]
info = {"text": link_tag.text.strip(), "url": url, "query": {}}
parsed_url = urllib.parse.urlparse(url)
if parsed_url.query:
query_params = urllib.parse.parse_qs(parsed_url.query)
for param, value in query_params.items():
if len(value) == 1:
info["query"][param] = value[0]
else:
info["query"][param] = value
return info
[docs]def parse_tibia_datetime(datetime_str) -> Optional[datetime.datetime]:
"""Parse date and time from the format used in Tibia.com.
Accepted format:
- ``MMM DD YYYY, HH:mm:ss ZZZ``, e.g. ``Dec 10 2018, 21:53:37 CET``.
- ``MMM DD YYYY, HH:mm ZZZ``, e.g. ``Dec 10 2018, 21:53 CET``.
Parameters
-------------
datetime_str: :class:`str`
The date and time as represented in Tibia.com
Returns
-----------
:class:`datetime.datetime`, optional
The represented datetime, in UTC (timezone aware).
"""
try:
datetime_str = datetime_str.replace(",", "").replace(" ", " ").strip()
# Extracting timezone
tz = datetime_str[-4:].strip()
# Convert time string to time object
# Removing timezone cause CEST and CET are not supported
try:
t = datetime.datetime.strptime(datetime_str[:-4].strip(), "%b %d %Y %H:%M:%S")
except ValueError:
t = datetime.datetime.strptime(datetime_str[:-4].strip(), "%b %d %Y %H:%M")
# Getting the offset
if tz == "CET":
utc_offset = 1
elif tz == "CEST":
utc_offset = 2
else:
return None
# Add/subtract hours to get the real time
t = t - datetime.timedelta(hours=utc_offset)
return t.replace(tzinfo=datetime.timezone.utc)
except (ValueError, AttributeError):
return None
[docs]def parse_tibia_date(date_str) -> Optional[datetime.date]:
"""Parse a date from the format used in Tibia.com.
Accepted format:
- ``MMM DD YYYY``, e.g. ``Jul 23 2015``
Parameters
-----------
date_str: :class:`str`
The date as represented in Tibia.com
Returns
-----------
:class:`datetime.date`, optional
The represented date, in UTC (timezone aware).
"""
try:
t = datetime.datetime.strptime(date_str.strip(), "%b %d %Y")
return t.date()
except (ValueError, AttributeError):
return None
[docs]def parse_tibia_forum_datetime(datetime_str, utc_offset=1):
"""Parse a date in the format used in the Tibia.com forums.
Accepted format:
- ``DD.MM.YY HH:mm:ss``, e.g. ``23.07.2015 21:30:30``
Parameters
----------
datetime_str: :class:`str`
The string containing the date and time.
utc_offset: :class:`int`
The UTC offset to apply to the parsed datetime.
Since the timestamps contain no timezone information, it can be passed as an additional parameter.
By default CET (+1) is considered.
Returns
-------
:class:`datetime`
The represented datetime, in UTC (timezone aware).
"""
t = datetime.datetime.strptime(datetime_str.strip(), "%d.%m.%Y %H:%M:%S")
# Add/subtract hours to get the real time
t = t - datetime.timedelta(hours=utc_offset)
return t.replace(tzinfo=datetime.timezone.utc)
[docs]def parse_tibia_full_date(date_str) -> Optional[datetime.date]:
"""Parse a date in the fuller format used in Tibia.com.
Accepted format:
- ``MMMM DD, YYYY``, e.g. ``July 23, 2015``
Parameters
-----------
date_str: :class:`str`
The date as represented in Tibia.com
Returns
-----------
:class:`datetime.date`, optional
The represented date, in UTC (timezone aware).
"""
try:
t = datetime.datetime.strptime(date_str.strip(), "%B %d, %Y")
return t.date()
except (ValueError, AttributeError):
return None
[docs]def parse_number_words(text_num):
"""Parse the word representation of a number to a integer.
Parameters
----------
text_num: :class:`str`
The text representation of a number.
Returns
-------
:class:`int`
The number represented by the string.
"""
numwords = {}
units = [
"zero", "one", "two", "three", "four", "five", "six", "seven", "eight",
"nine", "ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen",
"sixteen", "seventeen", "eighteen", "nineteen",
]
tens = ["", "", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
scales = ["hundred", "thousand", "million", "billion", "trillion"]
numwords["and"] = (1, 0)
for idx, word in enumerate(units):
numwords[word] = (1, idx)
for idx, word in enumerate(tens):
numwords[word] = (1, idx * 10)
for idx, word in enumerate(scales):
numwords[word] = (10 ** (idx * 3 or 2), 0)
current = result = 0
text_num = text_num.replace("-", " ")
for word in text_num.split():
if word not in numwords:
return 0
scale, increment = numwords[word]
current = current * scale + increment
if scale > 100:
result += current
current = 0
return result + current
[docs]def try_datetime(obj) -> Optional[datetime.datetime]:
"""Attempt to convert an object into a datetime.
If the date format is known, it's recommended to use the corresponding function
This is meant to be used in constructors.
Parameters
----------
obj: :class:`str`, :class:`dict`, :class:`datetime.datetime`
The object to convert.
Returns
-------
:class:`datetime.datetime`, optional
The represented datetime, in UTC (timezone aware), or :obj:`None` if conversion wasn't possible.
"""
if obj is None:
return None
if isinstance(obj, datetime.datetime):
return obj
return parse_tibia_datetime(obj)
[docs]def try_date(obj) -> Optional[datetime.date]:
"""Attempt to convert an object into a date.
If the date format is known, it's recommended to use the corresponding function
This is meant to be used in constructors.
Parameters
----------
obj: :class:`str`, :class:`datetime.datetime`, :class:`datetime.date`
The object to convert.
Returns
-------
:class:`datetime.date`, optional
The represented date, in UTC (timezone aware).
"""
if obj is None:
return None
if isinstance(obj, datetime.datetime):
return obj.date()
if isinstance(obj, datetime.date):
return obj
res = parse_tibia_date(obj)
if res is not None:
return res
return parse_tibia_full_date(obj)
[docs]def parse_tibiacom_content(content, *, html_class="BoxContent", tag="div", builder="lxml"):
"""Parse HTML content from Tibia.com into a BeautifulSoup object.
Parameters
----------
content: :class:`str`
The raw HTML content from Tibia.com
html_class: :class:`str`
The HTML class of the parsed element. The default value is ``BoxContent``.
tag: :class:`str`
The HTML tag select. The default value is ``div``.
builder: :class:`str`
The builder to use. The default value is ``lxml``.
Returns
-------
:class:`bs4.BeautifulSoup`, optional
The parsed content.
"""
strainer = bs4.SoupStrainer(tag, class_=html_class) if builder != "html5lib" else None
return bs4.BeautifulSoup(content.replace('ISO-8859-1', 'utf-8', 1), builder, parse_only=strainer)
[docs]def parse_tibiacom_tables(parsed_content) -> Dict[str, bs4.Tag]:
"""Parse tables from Tibia.com into a mapping by the tables title.
This is used for the table style used in Tibia.com, where a table is wrapped in a container with a title.
Parameters
----------
parsed_content: :class:`bs4.BeautifulSoup`
The content to find the tables in.
Returns
-------
:class:`dict`
A dictionary mapping the container titles and the contained table.
"""
table_containers = parsed_content.find_all("div", attrs={"class": "TableContainer"})
tables = {}
for table_container in table_containers:
text_tag = table_container.find("div", attrs={"class": "Text"})
table = table_container.find("table", attrs={"class": "TableContent"})
if not table:
continue
tables[text_tag.text.strip()] = table
return tables
T = TypeVar('T')
D = TypeVar('D')
[docs]def try_enum(cls: Type[T], val, default: D = None) -> Union[T, D]:
"""Attempt to convert a value into their enum value.
Parameters
----------
cls: :class:`Enum`
The enum to convert to.
val:
The value to try to convert to Enum
default: optional
The value to return if no enum value is found.
Returns
-------
obj:
The enum value if found, otherwise None.
"""
if isinstance(val, cls):
return val
try:
return cls(val)
except ValueError:
try:
if isinstance(val, str):
val = val.upper()
return cls._member_map_[val]
except KeyError:
return default
[docs]def parse_tibia_money(argument):
"""Parse a string that may contain 'k' as thousand suffix.
Parameters
----------
argument: :class:`str`
A numeric string using k as a prefix for thousands.
Returns
-------
:class:`int`:
The value represented by the string.
"""
try:
return int(argument)
except ValueError:
argument = argument.replace(",", "").strip().lower()
m = TIBIA_CASH_PATTERN.match(argument)
if not m or not m.group(1):
raise ValueError("not a numeric value")
num = float(m.group(1))
k_count = argument.count("k")
num *= pow(1000, k_count)
return int(num)
[docs]def split_list(items, separator=",", last_separator=" and "):
"""Split a string listing elements into an actual list.
Parameters
----------
items: :class:`str`
A string listing elements.
separator: :class:`str`
The separator between each item. A comma by default.
last_separator: :class:`str`
The separator used for the last item. ' and ' by default.
Returns
-------
:class:`list` of :class:`str`
A list containing each one of the items.
"""
if items is None:
return None
items = items.split(separator)
last_item = items[-1]
last_split = last_item.split(last_separator)
if len(last_split) > 1:
items[-1] = last_separator.join(last_split[:-1])
items.append(last_split[-1])
return [e.strip() for e in items]
def _recursive_strip(value): # pragma: no cover
if isinstance(value, dict):
return {k: _recursive_strip(v) for k, v in value.items()}
if isinstance(value, list):
return [_recursive_strip(i) for i in value]
if isinstance(value, str):
return value.strip()
return value
def deprecated(instead=None): # pragma: no cover
def actual_decorator(func):
@functools.wraps(func)
def decorated(*args, **kwargs):
warnings.simplefilter('always', DeprecationWarning)
if instead:
fmt = "{0.__name__} is deprecated, use {1} instead."
else:
fmt = '{0.__name__} is deprecated.'
warnings.warn(fmt.format(func, instead), stacklevel=3, category=DeprecationWarning)
warnings.simplefilter('default', DeprecationWarning)
return func(*args, **kwargs)
return decorated
return actual_decorator
results_pattern = re.compile(r'Results: ([\d,]+)')
page_pattern = re.compile(r'page=(\d+)')