osrsbox-db/builders/items/infobox_cleaner.py

495 lines
15 KiB
Python

"""
Author: PH01L
Email: phoil@osrsbox.com
Website: https://www.osrsbox.com
Description:
Various methods to help clean OSRS Wiki wikitext entries.
Copyright (c) 2021, PH01L
###############################################################################
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
###############################################################################
"""
import re
from datetime import datetime
import dateparser
# Source: runelite/runelite-wiki-scraper
WEIGHT_REDUCTION_EXTRACTOR = re.compile(
r"(?i)'''(?:In )?inventory:?''':? ([0-9.-]+) kg<br ?\/?> *'''Equipped:?''':? ([0-9.-]+)")
unequipable = [
286, # Orange goblin mail
287, # Blue goblin mail,
288, # Goblin mail
740, # Blue hat (Legends' Quest)
713, # Clue scroll
818, # Poisoned dart(p)
1235, # Poisoned dagger(p)
2422, # Blue partyhat (Draynor Bank Robbery)
2513, # Dragon chainbody (My Arm's Big Adventure)
4178, # Abyssal whip (My Arm's Big Adventure)
4180, # Dragon platelegs (My Arm's Big Adventure)
4181, # Mouth grip
4213, # New crystal bow
4235, # New crystal shield
5056, # Dwarven battleaxe
5057, # Dwarven battleaxe
5058, # Dwarven battleaxe
5059, # Dwarven battleaxe
5062, # Left boot
5063, # Right boot
5064, # Exquisite boots
5067, # Exquisite clothes
5684, # Poison dagger(p+)
5702, # Poison dagger(p++)
6788, # Torn robe
6789, # Torn robe
6818, # Bow-sword
6864, # Marionette handle
6893, # Leather boots (Mage Training Arena)
6894, # Adamant kiteshield (Mage Training Arena)
6895, # Adamant med helm (Mage Training Arena)
6897, # Rune longsword (Mage Training Arena)
6967, # Dragon med helm (My Arm's Big Adventure)
7804, # Zaros mjolnir
8871, # Crate with zanik [TODO: no attack speed]
8856, # Defensive shield [TODO: no weapon type]
9054, # Red goblin mail
9055, # Black goblin mail
9056, # Yellow goblin mail
9057, # Green goblin mail
9058, # Purple goblin mail
9059, # Pink goblin mail
9665, # Torch
9702, # Stick
9906, # Ghost buster 500
9907, # Ghost buster 500
9908, # Ghost buster 500
9909, # Ghost buster 500
9910, # Ghost buster 500
9911, # Ghost buster 500
9912, # Ghost buster 500
10840, # A jester stick
11165, # Phoenix crossbow
11167, # Phoenix crossbow
11700, # Bronze arrow (Barbarian Assault)
11701, # Iron arrow (Barbarian Assault)
11702, # Steel arrow (Barbarian Assault)
11703, # Mithril arrow (Barbarian Assault)
22664, # Scythe of vitur (JMod item)
22665, # Armadyl godsword (JMod item)
22666, # Rubber chicken (JMod item)
10556, # Attacker icon
22346, # Attacker icon
22347, # Attacker icon
22348, # Attacker icon
22349, # Attacker icon
22721, # Attacker icon
22722, # Attacker icon
22723, # Attacker icon
22729, # Attacker icon
22730, # Attacker icon
23460, # Attacker icon
23461, # Attacker icon
23462, # Attacker icon
23463, # Attacker icon
23464, # Attacker icon
23465, # Attacker icon
10558, # Defender icon
22340, # Defender icon
22341, # Defender icon
22342, # Defender icon
22343, # Defender icon
22344, # Defender icon
22345, # Defender icon
22725, # Defender icon
22726, # Defender icon
22727, # Defender icon
22728, # Defender icon
23466, # Defender icon
23467, # Defender icon
23468, # Defender icon
23469, # Defender icon
23470, # Defender icon
10557, # Collector icon
22312, # Collector icon
22313, # Collector icon
22314, # Collector icon
22315, # Collector icon
22337, # Collector icon
22338, # Collector icon
22339, # Collector icon
22724, # Collector icon
23471, # Collector icon
23472, # Collector icon
23473, # Collector icon
23474, # Collector icon
23475, # Collector icon
23476, # Collector icon
23477, # Collector icon
10559, # Healer icon
10567, # Healer icon
20802, # Healer icon
22308, # Healer icon
22309, # Healer icon
22310, # Healer icon
22311, # Healer icon
23478, # Healer icon
23479, # Healer icon
23480, # Healer icon
23481, # Healer icon
23482, # Healer icon
23483, # Healer icon
23484, # Healer icon
23485, # Healer icon
23486, # Healer icon
13538, # Shayzien supply gloves (1)
13539, # Shayzien supply boots (1)
13540, # Shayzien supply helm (1)
13541, # Shayzien supply greaves (1)
13542, # Shayzien supply platebody (1)
13543, # Shayzien supply gloves (2)
13544, # Shayzien supply boots (2)
13545, # Shayzien supply helm (2)
13546, # Shayzien supply greaves (2)
13547, # Shayzien supply platebody (2)
13548, # Shayzien supply gloves (3)
13549, # Shayzien supply boots (3)
13550, # Shayzien supply helm (3)
13551, # Shayzien supply greaves (3)
13552, # Shayzien supply platebody (3)
13553, # Shayzien supply gloves (4)
13554, # Shayzien supply boots (4)
13555, # Shayzien supply helm (4)
13556, # Shayzien supply greaves (4)
13557, # Shayzien supply platebody (4)
13558, # Shayzien supply gloves (5)
13559, # Shayzien supply boots (5)
13560, # Shayzien supply helm (5)
13561, # Shayzien supply greaves (5)
13562, # Shayzien supply platebody (5)
21428, # Wilderness cape (Wilderness Wars)
21429, # Wilderness cape (Wilderness Wars)
21430, # Wilderness cape (Wilderness Wars)
21431, # Wilderness cape (Wilderness Wars)
21432, # Wilderness cape (Wilderness Wars)
21433, # Wilderness champion amulet
21434, # Wilderness cape (Wilderness Wars, Champion)
21435, # Wilderness cape (Wilderness Wars, Champion)
21436, # Wilderness cape (Wilderness Wars, Champion)
21437, # Wilderness cape (Wilderness Wars, Champion)
21438, # Wilderness cape (Wilderness Wars, Champion)
22812, # Dragon knife (animation item)
22814, # Dragon knife (animation item)
25212, # Blue icon
25213, # Blue icon
25214, # Blue icon
25215, # Blue icon
25216, # Blue icon
25217, # Blue icon
25218, # Blue icon
25219, # Blue icon
25220, # Blue icon
25221, # Blue icon
25222, # Blue icon
25223, # Blue icon
25224, # Blue icon
25225, # Blue icon
25226, # Blue icon
25227, # Blue icon
25228, # Red icon
25229, # Red icon
25230, # Red icon
25231, # Red icon
25232, # Red icon
25233, # Red icon
25234, # Red icon
25235, # Red icon
25236, # Red icon
25237, # Red icon
25238, # Red icon
25239, # Red icon
25240, # Red icon
25241, # Red icon
25242, # Red icon
25243, # Red icon
25987, # Tumeken's heka
25989, # Tumeken's heka (uncharged)
]
def clean_wikitext(value: str) -> str:
"""Generic infobox property cleaner.
This helper method is a generic cleaner for all infobox template properties.
The value is string cast, stipped of new line characters, then any square
brackets (wikitext links) are stripped, then anything in trailing brackets,
then any HTML line breaks are removed.
:param value: Template value extracted in raw wikitext format.
:return value: Template value with square brackets stripped.
"""
value = str(value)
value = value.strip()
value = re.sub(r'[\[\]]+', '', value) # Removes all "[" and "]"
value = re.sub(r' \([^()]*\)', '', value) # Removes " (anything)"
value = re.sub(r'<!--(.*?)-->', '', value) # Removes "<!--anything-->"
value = re.sub(r'<br(.*)', '', value) # Removes "<br"
return value
def caller(value: str, prop: str):
"""Calls specific function based on property name.
Since there is a dict loop method to reduce code duplication,
there needs to be a way to call the function that matches the
property. This helps call the function and return the cleaned
values.
:param value: Template value extracted in raw wikitext format.
:param prop: The template property being processed.
:return value: Cleaned template value.
"""
value = globals()[prop](value)
return value
def weight(value: str, item_id: int) -> float:
"""Convert the weight entry from a OSRS Wiki infobox to a float.
:param value: Template value extracted in raw wikitext format.
:param item_id: The item ID number.
:return weight: The weight of an item.
"""
if value is None or value == "":
return None
item_id = int(item_id)
# Handle weight reducing items as ID changes when equipped
if item_id in [10073]:
# Spotted cape
return -2.2
elif item_id in [89, 10554, 10074]:
# Boots of lightness, Penance gloves, Spottier cape
return -4.5
elif item_id in [13342, 13340, 13341]:
# Max cape, Agility cape, Agility cape (t)
return -4.0
elif item_id in [11851, 13580, 13592, 13604, 13616, 13628, 13668, 21063]:
# Graceful hood variants
return -3.0
elif item_id in [11853, 13582, 13594, 13606, 13618, 13630, 13670, 21066]:
# Graceful cape variants
return -4.0
elif item_id in [11855, 13584, 13596, 13608, 13620, 13632, 13672, 21069]:
# Graceful top variants
return -5.0
elif item_id in [11857, 13586, 13598, 13610, 13622, 13634, 13674, 21072]:
# Graceful legs variants
return -6.0
elif item_id in [11859, 13588, 13600, 13612, 13624, 13636, 13676, 21075]:
# Graceful gloves variants
return -3.0
elif item_id in [11861, 13590, 13602, 13614, 13626, 13638, 13678, 21078]:
# Graceful boots variants
return -4.0
if value.endswith("kg"):
value = value[:-2].strip()
else:
value = str(value).strip()
# The weight property is usally quite clean...
# Try float cast the value, and return
try:
return float(value)
except ValueError:
pass
# Handle when there is inventory/equipable weights
# Source: runelite/runelite-wiki-scraper
reducer = WEIGHT_REDUCTION_EXTRACTOR.match(value)
if reducer:
# Get inventory weight (as worn wieght handled above)
# Group 1 is inventory, 2 is equipped
value = reducer.group(1)
value = float(value)
return value
return None
def quest(value: str) -> bool:
"""Convert the quest entry from an OSRS Wiki infobox to a boolean.
:param value: The extracted raw wiki text.
:return quest: A boolean to identify if an item is associated with a quest.
"""
if value is None:
return False
quest = str(value).strip().lower()
if quest in ["yes"]:
return True
elif "[[" in quest:
return True
else:
return False
def release_date(value: str) -> str:
"""Convert the release date entry to ISO 8601 date format.
:param value: The extracted raw wiki text.
:return release_date: A cleaned release date of an item.
"""
release_date = clean_wikitext(value)
if release_date == "":
return None
try:
release_date = datetime.strptime(release_date, "%d %B %Y")
return release_date.date().isoformat()
except ValueError:
pass
try:
release_date = dateparser.parse(release_date)
release_date = release_date.date().isoformat()
except (ValueError, TypeError, AttributeError):
return None
def tradeable(value: str) -> bool:
"""Convert the tradeable entry from an OSRS Wiki infobox to a boolean.
:param value: The extracted raw wiki text.
:return quest: A boolean to identify if an item is tradeable.
"""
# Clean a quest value
tradeable = str(value).strip().lower()
if tradeable in ["yes"]:
return True
else:
return False
def examine(value: str, name: str) -> str:
"""Convert the examine text entry from an OSRS Wiki infobox.
:param value: The extracted raw wiki text.
:param name: The name of the item being processed.
:return tradeable: A cleaned tradeable property of an item.
"""
examine = str(value)
examine = examine.strip()
# Apart from "Clue scroll (master)" and "Clue scroll (beginner)"
# clues have variable examine text
clue_scrolls = ["Clue scroll (easy)",
"Clue scroll (medium)",
"Clue scroll (hard)",
"Clue scroll (elite)"]
if name in clue_scrolls:
return "A clue!"
# Medium clue keys are also variable (but not other key variants)
# Set to first value available
if name == "Key (medium)":
return "A key to unlock a treasure chest."
# Ghrim's book
# Example: ''Managing Thine Kingdom for Noobes'' by A. Ghrim.
if name == "Ghrim's book":
examine = examine.replace("''", "")
return examine
# Pet smoke devil
# Example: <nowiki>*cough*</nowiki>
if name == "Pet smoke devil":
return "*cough*"
# Fix for quest related examine texts (mostly for keys)
examine = re.sub(r' \([^()]*\)', '', examine)
# Remove sic
examine = examine.replace("{{sic}}", "")
return examine
def stats(value: str) -> int:
"""Convert a item stat value to an integer.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned stat value as an int.
"""
try:
return int(value)
except ValueError:
return 0
def slot(value: str) -> str:
"""Convert a slot value to a cleaned string.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned slot value as an str.
"""
if "\n" in value: # TODO: Remove when comment fixed in fish sack barrel
value = value.split("\n")[0]
try:
return value.lower()
except ValueError:
return ""
def speed(value: str) -> int:
"""Convert a item weapon speed value to an integer.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned speed value as an int.
"""
try:
return int(value)
except ValueError:
return None
def weapon_type(value: str) -> str:
"""Convert a item weapon type value to a clean string.
:param value: Template value extracted from raw wikitext.
:return value: A cleaned weapon type value.
"""
if not value:
return None
value = value.replace("{", "").replace("}", "")
value = value.replace(" ", "_")
value = value.lower()
try:
value = value.split("|")[1]
except IndexError:
return None
return value