Skip to content

Instantly share code, notes, and snippets.

@spoonerf
Created November 23, 2021 11:59
Show Gist options
  • Save spoonerf/9b35c4dec0691577a759d218a78b2786 to your computer and use it in GitHub Desktop.
Save spoonerf/9b35c4dec0691577a759d218a78b2786 to your computer and use it in GitHub Desktop.
import wikipedia
import pandas as pd
import tempfile
import zipfile
import os
import json
from owid import walden
raw_dataset = walden.Catalog().find_one("living_planet")
tmp_dir = tempfile.mkdtemp(prefix="etl-")
zipfile.ZipFile(raw_dataset.local_path).extractall(tmp_dir)
df = pd.read_csv(
os.path.join(tmp_dir, "Public data set/LPR2020data_public.csv"), encoding="latin-1"
)
binom = df['Binomial'].drop_duplicates().str.replace(r'_', ' ')
binom_dict = {}
for b in binom:
bw = wikipedia.search(b, results = 1)
binom_dict[b] = ''.join(bw)
with open('species_binomial_wiki.json', 'w') as fp:
json.dump(binom_dict, fp, indent = 2)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment