Created
January 18, 2021 12:50
-
-
Save gullyn/7f5bff96d462e953f9d2e96bdfaa6ad2 to your computer and use it in GitHub Desktop.
code
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, json, re | |
def main(): | |
states = [l.split(",") for l in open("states.txt", "r").read().split("\n")] | |
state_codes = {l.split(",")[0].lower(): l.split(",")[1].lower() for l in open("state_codes.csv", "r").read().split("\n")} | |
pops = {l.split(",")[0].lower(): int(l.split(",")[1]) for l in open("pops.csv", "r").read().split("\n")} | |
styling = "" | |
lengths = {} | |
ratios = [] | |
for state in states: | |
length = page_length(state[0], True) | |
lengths[state[0]] = length / pops[state[1]] | |
ratios.append(length / pops[state[1]]) | |
print(f"{state[1].title()}: {length}") | |
max_rat = 1.2 | |
for state in states: | |
styling += f".{state_codes[state[1]]} {{fill: {get_color(lengths[state[0]], max_rat)};}}\n" | |
print(styling) | |
def page_length(title, recursive=False): | |
req = requests.get(f"https://en.wikipedia.org/w/api.php?action=parse&page={title}&format=json") | |
content = json.loads(req.content)["parse"]["text"]["*"] | |
sum_len = len(content) | |
if not recursive: | |
return sum_len | |
#print(f"{title.replace('_', ' ')}: {len(content)}") | |
links = get_links(content, title) | |
for link in links: | |
link_len = page_length(link) | |
#print(f"\t{link.replace('_', ' ')}: {link_len}") | |
sum_len += link_len | |
return sum_len | |
def get_links(content, title): | |
regex = r"Main articles?: (?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?)?(?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?){1,}" | |
matches = re.findall(regex, content) | |
new_matches = [] | |
for match in matches: | |
for rm in match: | |
if len(rm) > 0 and title.split(" ")[0].lower() in rm.lower(): | |
new_matches.append(rm.split("#")[0]) | |
return new_matches | |
def get_color(length, max_rat): | |
ratio = max(min(length / max_rat, 1), 0) | |
r = round(255 - ratio * 128) | |
gb = round(255 - ratio * 255) | |
return f"rgba({gb}, {gb}, {r}, 1)" | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment