Last active
November 10, 2017 20:17
-
-
Save maxhawkins/7128380393aef64b5f66dbea92944431 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import html | |
import random | |
import requests | |
import sys | |
def get_abstract(abstract_id): | |
'''Fetch an abstract by id from pubmed's JSON API''' | |
abstract_id = str(abstract_id) | |
url = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esummary.fcgi?db=pubmed&retmode=json&rettype=abstract&id=' + abstract_id | |
resp = requests.get(url) | |
resp.raise_for_status() | |
data = resp.json() | |
results = data['result'] | |
item = results[abstract_id] | |
error = item.get('error', None) | |
if error == "cannot get document summary": | |
return None | |
if error != None: | |
raise StandardError(error) | |
return item | |
def find_latest_abstract(): | |
'''Do a binary search to find the most recently published paper id''' | |
first, last = 29121706, 35000000 | |
if get_abstract(last): | |
raise StandardError( | |
'find_latest_abstract: far-future abstract %d unexpectedly exists' % last) | |
while first <= last: | |
midpoint = int((first + last) / 2) | |
if get_abstract(midpoint): | |
first = midpoint + 1 | |
else: | |
last = midpoint - 1 | |
return first - 1 | |
def get_random_abstract(min_id, max_id): | |
'''Downloads a random abstract in the range, skipping deleted abstracts''' | |
while True: | |
selected_id = random.randint(min_id, max_id) | |
abstract = get_abstract(selected_id) | |
if abstract: | |
return abstract | |
def compose_tweet(abstract): | |
'''Turn a pubmed abstract into tweet text''' | |
title = html.unescape(abstract['title']) | |
url = 'https://www.ncbi.nlm.nih.gov/pubmed/%s' % abstract['uid'] | |
tweet_length = 280 | |
gap = "\n\n" | |
ellipsis = "…" | |
title_len = tweet_length - len(url) - len(gap) | |
if len(title) > title_len: | |
title = title[:title_len - len(ellipsis)] + ellipsis | |
return title + gap + url | |
def main(): | |
if (sys.version_info < (3, 0)): | |
print('python 3 is required to run this script') | |
return | |
earliest_id = 25635855 # a paper from roughly 10 yrs go | |
latest_id = find_latest_abstract() # the latest paper | |
abstract = get_random_abstract(earliest_id, latest_id) | |
tweet = compose_tweet(abstract) | |
print(tweet) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment