Created
March 19, 2017 10:22
-
-
Save ErikGartner/8022223b9ee4dcfc66f4f5f343396af0 to your computer and use it in GitHub Desktop.
Whitewolf wikia dumper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
BASE_URL = 'http://whitewolf.wikia.com/' | |
def scrape_artifacts(offset=''): | |
path = 'api/v1/Articles/List' | |
query = { | |
'expand': 1, | |
'offset': offset | |
} | |
data = requests.get('{}{}'.format(BASE_URL, path), params=query) | |
if data.status_code != 200: | |
return (False, False) | |
js = data.json() | |
new_offset = False if 'offset' not in js else js['offset'] | |
return (js['items'], new_offset) | |
def scrape_all(): | |
offset = '' | |
full_db = [] | |
while offset is not False: | |
(items, offset) = scrape_artifacts(offset) | |
if items is False: | |
break | |
full_db.extend(items) | |
print('Scraping: offset: {}, size: {}, last: {}'.format(offset, | |
len(full_db), | |
items[-1]['title'])) | |
return full_db | |
def build_index(): | |
db = scrape_all() | |
with open('article_index.json', 'w') as of: | |
json.dump(db, of, indent=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment