Created
January 17, 2021 12:48
-
-
Save gullyn/991dbd5e74738d28a1ce6f918eaa6dab to your computer and use it in GitHub Desktop.
Wikipedia page lengths
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, json, re | |
def main(): | |
states = [l.split(",") for l in open("states.txt", "r").read().split("\n")] | |
state_codes = {l.split(",")[0].lower(): l.split(",")[1].lower() for l in open("state_codes.csv", "r").read().split("\n")} | |
styling = "" | |
for state in states: | |
length = page_length(state[0], True) | |
print(f"{state[1].title()}: {length}") | |
styling += f".{state_codes[state[1]]} {{fill: {get_color(length)};}}\n" | |
print(styling) | |
def page_length(title, recursive=False): | |
req = requests.get(f"https://en.wikipedia.org/w/api.php?action=parse&page={title}&format=json") | |
content = json.loads(req.content)["parse"]["text"]["*"] | |
sum_len = len(content) | |
if not recursive: | |
return sum_len | |
links = get_links(content) | |
for link in links: | |
sum_len += page_length(link) | |
return sum_len | |
def get_links(content): | |
regex = r"Main articles?: (?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?)?(?:<a href=\"\/wiki\/(.+?)\".+?>.+?</a> ?a?n?d? ?){1,}" | |
matches = re.findall(regex, content) | |
new_matches = [] | |
for match in matches: | |
for rm in match: | |
if len(rm) > 0: | |
new_matches.append(rm.split("#")[0]) | |
return new_matches | |
def get_color(length): | |
ratio = max(min(length / 5800883, 1), 0) | |
r = round(255 - ratio * 128) | |
gb = round(255 - ratio * 255) | |
return f"rgba({r}, {gb}, {gb}, 1)" | |
if __name__ == "__main__": | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
ALABAMA | AL | |
---|---|---|
ALASKA | AK | |
ARIZONA | AZ | |
ARKANSAS | AR | |
CALIFORNIA | CA | |
COLORADO | CO | |
CONNECTICUT | CT | |
DELAWARE | DE | |
DISTRICT OF COLUMBIA | DC | |
FLORIDA | FL | |
GEORGIA | GA | |
HAWAII | HI | |
IDAHO | ID | |
ILLINOIS | IL | |
INDIANA | IN | |
IOWA | IA | |
KANSAS | KS | |
KENTUCKY | KY | |
LOUISIANA | LA | |
MAINE | ME | |
MARYLAND | MD | |
MASSACHUSETTS | MA | |
MICHIGAN | MI | |
MINNESOTA | MN | |
MISSISSIPPI | MS | |
MISSOURI | MO | |
MONTANA | MT | |
NEBRASKA | NE | |
NEVADA | NV | |
NEW HAMPSHIRE | NH | |
NEW JERSEY | NJ | |
NEW MEXICO | NM | |
NEW YORK | NY | |
NORTH CAROLINA | NC | |
NORTH DAKOTA | ND | |
OHIO | OH | |
OKLAHOMA | OK | |
OREGON | OR | |
PENNSYLVANIA | PA | |
RHODE ISLAND | RI | |
SOUTH CAROLINA | SC | |
SOUTH DAKOTA | SD | |
TENNESSEE | TN | |
TEXAS | TX | |
UTAH | UT | |
VERMONT | VT | |
VIRGINIA | VA | |
WASHINGTON | WA | |
WEST VIRGINIA | WV | |
WISCONSIN | WI | |
WYOMING | WY |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
California,california | |
Texas,texas | |
Florida,florida | |
New_York_(state),new york | |
Pennsylvania,pennsylvania | |
Illinois,illinois | |
Ohio,ohio | |
Georgia_(U.S._state),georgia | |
North_Carolina,north carolina | |
Michigan,michigan | |
New_Jersey,new jersey | |
Virginia,virginia | |
Washington_(state),washington | |
Arizona,arizona | |
Massachusetts,massachusetts | |
Tennessee,tennessee | |
Indiana,indiana | |
Missouri,missouri | |
Maryland,maryland | |
Wisconsin,wisconsin | |
Colorado,colorado | |
Minnesota,minnesota | |
South_Carolina,south carolina | |
Alabama,alabama | |
Louisiana,louisiana | |
Kentucky,kentucky | |
Oregon,oregon | |
Oklahoma,oklahoma | |
Connecticut,connecticut | |
Utah,utah | |
Iowa,iowa | |
Nevada,nevada | |
Arkansas,arkansas | |
Mississippi,mississippi | |
Kansas,kansas | |
New_Mexico,new mexico | |
Nebraska,nebraska | |
Idaho,idaho | |
West_Virginia,west virginia | |
Hawaii,hawaii | |
New_Hampshire,new hampshire | |
Maine,maine | |
Montana,montana | |
Rhode_Island,rhode island | |
Delaware,delaware | |
South_Dakota,south dakota | |
North_Dakota,north dakota | |
Alaska,alaska | |
Vermont,vermont | |
Wyoming,wyoming |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment