Skip to content

Instantly share code, notes, and snippets.

@gullyn
Created January 18, 2021 12:50
Show Gist options
  • Save gullyn/7f5bff96d462e953f9d2e96bdfaa6ad2 to your computer and use it in GitHub Desktop.
Save gullyn/7f5bff96d462e953f9d2e96bdfaa6ad2 to your computer and use it in GitHub Desktop.
code
import requests, json, re
def main():
states = [l.split(",") for l in open("states.txt", "r").read().split("\n")]
state_codes = {l.split(",")[0].lower(): l.split(",")[1].lower() for l in open("state_codes.csv", "r").read().split("\n")}
pops = {l.split(",")[0].lower(): int(l.split(",")[1]) for l in open("pops.csv", "r").read().split("\n")}
styling = ""
lengths = {}
ratios = []
for state in states:
length = page_length(state[0], True)
lengths[state[0]] = length / pops[state[1]]
ratios.append(length / pops[state[1]])
print(f"{state[1].title()}: {length}")
max_rat = 1.2
for state in states:
styling += f".{state_codes[state[1]]} {{fill: {get_color(lengths[state[0]], max_rat)};}}\n"
print(styling)
def page_length(title, recursive=False):
req = requests.get(f"https://en.wikipedia.org/w/api.php?action=parse&page={title}&format=json")
content = json.loads(req.content)["parse"]["text"]["*"]
sum_len = len(content)
if not recursive:
return sum_len
#print(f"{title.replace('_', ' ')}: {len(content)}")
links = get_links(content, title)
for link in links:
link_len = page_length(link)
#print(f"\t{link.replace('_', ' ')}: {link_len}")
sum_len += link_len
return sum_len
def get_links(content, title):
regex = r"Main articles?: (?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?)?(?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?){1,}"
matches = re.findall(regex, content)
new_matches = []
for match in matches:
for rm in match:
if len(rm) > 0 and title.split(" ")[0].lower() in rm.lower():
new_matches.append(rm.split("#")[0])
return new_matches
def get_color(length, max_rat):
ratio = max(min(length / max_rat, 1), 0)
r = round(255 - ratio * 128)
gb = round(255 - ratio * 255)
return f"rgba({gb}, {gb}, {r}, 1)"
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment