""" Author: PH01L Email: phoil@osrsbox.com Website: https://www.osrsbox.com Copyright (c) 2019, PH01L ############################################################################### This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see . ############################################################################### """ from base64 import decode import json import logging from pprint import pprint import aiofiles from pathlib import Path from typing import TypeVar import config LOG = logging.getLogger(__name__) SelfWikiPageText = TypeVar("SelfWikiPageText", bound="WikiPageText") class WikiPageText: """This class handles extraction of wiki text using an OSRS Wiki API query. :param base_url: The OSRS Wiki URL used for API queries. :param page_title: OSRS Wiki page titles used for API query. """ def __init__(self, base_url: str, page_title: str): self.base_url = base_url self.page_title = page_title self.wiki_text = None async def extract_page_wiki_text(self, session) -> SelfWikiPageText: """Extract wiki text from OSRS Wiki for a provided page name. This function uses the class attributes as input to query the OSRS Wiki API and extract the wiki text for a specific page. The page to query is determined by the page title. """ request = { "action": "parse", "prop": "wikitext", "format": "json", "page": self.page_title } pprint(f"Processing: {self.page_title}") # Perform HTTP GET request async with session.get(self.base_url, headers=config.custom_agent, params=request) as resp: try: data = await resp.json() except json.decoder.JSONDecodeError: pprint(f"Didn't get anything useful: {await resp.text()}") try: # Try to extract the wiki text from the HTTP response wiki_text = data["parse"]["wikitext"]["*"] except KeyError: # Set to None if wiki text extraction failed wiki_text = None self.wiki_text = wiki_text return self async def async_export_wiki_text_to_json(self, out_file_name: str): """Export all extracted wiki text to a JSON file. Querying the OSRS Wiki constantly is a bad approach. This function writes any extracted wiki text to a file to save re-querying the API. This function attempts to overwrite pre-existing wiki text entry in a file, where the key is the page title, and the value is the wiki text. :param out_file_name: The file name to save wiki text to. """ # Create dictionary for export json_data = {self.page_title: str(self.wiki_text)} out_file_name = Path(out_file_name) pprint(f"{json_data}") # Write dictionary to JSON file if not out_file_name.exists(): async with aiofiles.open(out_file_name, mode='w') as out_file: await out_file.write(json.dumps(json_data, indent=4)) else: async with aiofiles.open(out_file_name) as feeds_json: contents = await feeds_json.read() try: feeds = json.loads(contents) except json.decoder.json.JSONDecodeError: pprint(f"{self.page_title} - {self.wiki_text}") feeds[self.page_title] = str(self.wiki_text) async with open(out_file_name, mode='w') as out_file: await out_file.write(json.dumps(feeds, indent=4)) def export_wiki_text_to_json(self, out_file_name: str): """Export all extracted wiki text to a JSON file. Querying the OSRS Wiki constantly is a bad approach. This function writes any extracted wiki text to a file to save re-querying the API. This function attempts to overwrite pre-existing wiki text entry in a file, where the key is the page title, and the value is the wiki text. :param out_file_name: The file name to save wiki text to. """ # Create dictionary for export json_data = {self.page_title: str(self.wiki_text)} out_file_name = Path(out_file_name) pprint(f"Writing {self.page_title}") # Write dictionary to JSON file if not out_file_name.exists(): with open(out_file_name, mode='w') as out_file: out_file.write(json.dumps(json_data, indent=4)) else: with open(out_file_name) as feeds_json: feeds = json.load(feeds_json) feeds[self.page_title] = str(self.wiki_text) with open(out_file_name, mode='w') as out_file: out_file.write(json.dumps(feeds, indent=4))