"""
Author: PH01L
Email: phoil@osrsbox.com
Website: https://www.osrsbox.com
Description:
Various methods to help clean OSRS Wiki wikitext entries.
Copyright (c) 2021, PH01L
###############################################################################
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see .
###############################################################################
"""
import re
import dateparser
def clean_wikitext(value: str) -> str:
"""Generic infobox property cleaner.
This helper method is a generic cleaner for all infobox template properties.
The value is string cast, stipped of new line characters, then any square
brackets (wikitext links) are stripped, then anything in trailing brackets,
then any HTML line breaks are removed.
:param value: Template value extracted in raw wikitext format.
:return value: Cleaned template value.
"""
value = str(value)
value = value.strip()
value = re.sub(r'[\[\]]+', '', value) # Removes all "[" and "]"
value = re.sub(r' \([^()]*\)', '', value) # Removes " (anything)"
value = re.sub(r'', '', value) # Removes ""
value = re.sub(r'
bool:
"""Convert the members property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: Template value converted into a boolean.
"""
if value.lower() in ["true", "yes"]:
return True
else:
return False
def release_date(value: str) -> str:
"""Convert the release date entry to an ISO 8601 date str.
From the wiki, the usual date format is: dd Month YYYY
But it will have wikitext markup: [[31 October]] [[2005]]
Returned value is ISO 8601 date string: YYYY-MM-DD
:param value: Template value extracted from raw wikitext.
:return value: A cleaned release date in ISO 8601 date format.
"""
if not value:
return None
value = value.replace("[", "").replace("]", "")
print(value)
try:
return dateparser.parse(value).date().isoformat()
except (ValueError, AttributeError):
return None
def hitpoints(value: str) -> int:
"""Convert the hitpoints entry to an integer.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned hitpoints value as an integer.
"""
if not value:
return None
try:
return int(value)
except ValueError:
return None
def max_hit(value: str) -> int:
"""Convert the max_hit entry to an integer.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned max_hit value as an integer.
"""
if not value:
return None
value = re.split("[ ,]", value)[0]
try:
return int(value)
except ValueError:
return None
def attack_type(value: str) -> list:
"""Convert the attack type entry to a list of strings.
:param value: The extracted raw wiki text.
:return value: A cleaned attack_type value as a list of strings.
"""
value_list = []
if value is None or value == "":
return value_list
value = value.lower()
value = value.replace("[", "").replace("]", "")
# Check for specific attack type strings...
if "melee" in value:
value_list.append("melee")
if "slash" in value:
value_list.append("slash")
if "crush" in value:
value_list.append("crush")
if "stab" in value:
value_list.append("stab")
if "ranged" in value:
value_list.append("ranged")
if "magic" in value:
value_list.append("magic")
if "typeless" in value:
value_list.append("typeless")
if "dragonfire" in value:
value_list.append("dragonfire")
return value_list
def attack_speed(value: str) -> int:
"""Convert the attack_speed entry to an integer.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned attack_speed value as an integer.
"""
if not value:
return None
try:
return int(value)
except ValueError:
return None
def aggressive(value: str) -> bool:
"""Convert the aggressive property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned aggressive value as a boolean.
"""
value = clean_wikitext(value)
if value.lower() in ["true", "yes"]:
return True
elif value.split(" ")[0].lower in ["true", "yes"]:
return True
else:
return False
def poisonous(value: str) -> bool:
"""Convert the poisonous property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned poisonous value as a boolean.
"""
if not value:
return False
value = clean_wikitext(value)
if value.lower() in ["true", "yes"]:
return True
elif value.split(" ")[0].lower in ["true", "yes"]:
return True
else:
return False
def venomous(value: str) -> bool:
"""Convert the venomous property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned venomous value as a boolean.
"""
if not value:
return False
if "venom" in value.lower():
return True
else:
return False
def immune_poison(value: str) -> bool:
"""Convert the immune_poison property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned immune_poison value as a boolean.
"""
if not value:
return False
if value.lower() in ["true", "yes"]:
return True
else:
return False
def immune_venom(value: str) -> bool:
"""Convert the immune_venom property to a boolean.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned immune_venom value as a boolean.
"""
if not value:
return False
if value.lower() in ["true", "yes"]:
return True
else:
return False
def attributes(value: str) -> str:
"""Convert the attributes text entry to a list.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned attributes value as a list.
"""
attributes_list = list()
if value is None or value == "":
return attributes_list
value = value.lower()
# Check for specific melee attack types
if "demon" in value:
attributes_list.append("demon")
if "dragon" in value:
attributes_list.append("dragon")
if "fiery" in value:
attributes_list.append("fiery")
if "golem" in value:
attributes_list.append("golem")
if "kalphite" in value:
attributes_list.append("kalphite")
if "leafy" in value:
attributes_list.append("leafy")
if "penance" in value:
attributes_list.append("penance")
if "shade" in value:
attributes_list.append("shade")
if "spectral" in value:
attributes_list.append("spectral")
if "undead" in value:
attributes_list.append("undead")
if "vampyre" in value:
attributes_list.append("vampyre")
if "xerician" in value:
attributes_list.append("xerician")
return attributes_list
def category(value: str) -> str:
"""Convert the category text entry from an OSRS Wiki infobox.
:param value: The extracted raw wiki text.
:return: A cleaned categories property value.
"""
category_list = list()
if value is None or value == "" or value.lower() == "no" or "