""" Author: PH01L Email: phoil@osrsbox.com Website: https://www.osrsbox.com Description: Various methods to help clean OSRS Wiki wikitext entries. Copyright (c) 2021, PH01L ############################################################################### This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ############################################################################### """ import re import dateparser def clean_wikitext(value: str) -> str: """Generic infobox property cleaner. This helper method is a generic cleaner for all infobox template properties. The value is string cast, stipped of new line characters, then any square brackets (wikitext links) are stripped, then anything in trailing brackets, then any HTML line breaks are removed. :param value: Template value extracted in raw wikitext format. :return value: Cleaned template value. """ value = str(value) value = value.strip() value = re.sub(r'[\[\]]+', '', value) # Removes all "[" and "]" value = re.sub(r' \([^()]*\)', '', value) # Removes " (anything)" value = re.sub(r'', '', value) # Removes "" value = re.sub(r' bool: """Convert the members property to a boolean. :param value: Template value extracted from raw wikitext. :return value: Template value converted into a boolean. """ if value.lower() in ["true", "yes"]: return True else: return False def release_date(value: str) -> str: """Convert the release date entry to an ISO 8601 date str. From the wiki, the usual date format is: dd Month YYYY But it will have wikitext markup: [[31 October]] [[2005]] Returned value is ISO 8601 date string: YYYY-MM-DD :param value: Template value extracted from raw wikitext. :return value: A cleaned release date in ISO 8601 date format. """ if not value: return None value = value.replace("[", "").replace("]", "") print(value) try: return dateparser.parse(value).date().isoformat() except (ValueError, AttributeError): return None def hitpoints(value: str) -> int: """Convert the hitpoints entry to an integer. :param value: Template value extracted from raw wikitext. :return value: A cleaned hitpoints value as an integer. """ if not value: return None try: return int(value) except ValueError: return None def max_hit(value: str) -> int: """Convert the max_hit entry to an integer. :param value: Template value extracted from raw wikitext. :return value: A cleaned max_hit value as an integer. """ if not value: return None value = re.split("[ ,]", value)[0] try: return int(value) except ValueError: return None def attack_type(value: str) -> list: """Convert the attack type entry to a list of strings. :param value: The extracted raw wiki text. :return value: A cleaned attack_type value as a list of strings. """ value_list = [] if value is None or value == "": return value_list value = value.lower() value = value.replace("[", "").replace("]", "") # Check for specific attack type strings... if "melee" in value: value_list.append("melee") if "slash" in value: value_list.append("slash") if "crush" in value: value_list.append("crush") if "stab" in value: value_list.append("stab") if "ranged" in value: value_list.append("ranged") if "magic" in value: value_list.append("magic") if "typeless" in value: value_list.append("typeless") if "dragonfire" in value: value_list.append("dragonfire") return value_list def attack_speed(value: str) -> int: """Convert the attack_speed entry to an integer. :param value: Template value extracted from raw wikitext. :return value: A cleaned attack_speed value as an integer. """ if not value: return None try: return int(value) except ValueError: return None def aggressive(value: str) -> bool: """Convert the aggressive property to a boolean. :param value: Template value extracted from raw wikitext. :return value: A cleaned aggressive value as a boolean. """ value = clean_wikitext(value) if value.lower() in ["true", "yes"]: return True elif value.split(" ")[0].lower in ["true", "yes"]: return True else: return False def poisonous(value: str) -> bool: """Convert the poisonous property to a boolean. :param value: Template value extracted from raw wikitext. :return value: A cleaned poisonous value as a boolean. """ if not value: return False value = clean_wikitext(value) if value.lower() in ["true", "yes"]: return True elif value.split(" ")[0].lower in ["true", "yes"]: return True else: return False def venomous(value: str) -> bool: """Convert the venomous property to a boolean. :param value: Template value extracted from raw wikitext. :return value: A cleaned venomous value as a boolean. """ if not value: return False if "venom" in value.lower(): return True else: return False def immune_poison(value: str) -> bool: """Convert the immune_poison property to a boolean. :param value: Template value extracted from raw wikitext. :return value: A cleaned immune_poison value as a boolean. """ if not value: return False if value.lower() in ["true", "yes"]: return True else: return False def immune_venom(value: str) -> bool: """Convert the immune_venom property to a boolean. :param value: Template value extracted from raw wikitext. :return value: A cleaned immune_venom value as a boolean. """ if not value: return False if value.lower() in ["true", "yes"]: return True else: return False def attributes(value: str) -> str: """Convert the attributes text entry to a list. :param value: Template value extracted from raw wikitext. :return value: A cleaned attributes value as a list. """ attributes_list = list() if value is None or value == "": return attributes_list value = value.lower() # Check for specific melee attack types if "demon" in value: attributes_list.append("demon") if "dragon" in value: attributes_list.append("dragon") if "fiery" in value: attributes_list.append("fiery") if "golem" in value: attributes_list.append("golem") if "kalphite" in value: attributes_list.append("kalphite") if "leafy" in value: attributes_list.append("leafy") if "penance" in value: attributes_list.append("penance") if "shade" in value: attributes_list.append("shade") if "spectral" in value: attributes_list.append("spectral") if "undead" in value: attributes_list.append("undead") if "vampyre" in value: attributes_list.append("vampyre") if "xerician" in value: attributes_list.append("xerician") return attributes_list def category(value: str) -> str: """Convert the category text entry from an OSRS Wiki infobox. :param value: The extracted raw wiki text. :return: A cleaned categories property value. """ category_list = list() if value is None or value == "" or value.lower() == "no" or "