Created
October 21, 2021 22:39
-
-
Save neubig/d078c62a2e2cbbe153006087d42ae036 to your computer and use it in GitHub Desktop.
Comparing citations between EMNLP 2020 and EMNLP 2020 findings
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import sys | |
import time | |
sleep_time = 20 | |
def query_api(url, session): | |
global sleep_time | |
time.sleep(sleep_time / 1000.0) | |
r = session.get(url) | |
while r.status_code == 429: | |
sleep_time *= 2 | |
print( | |
f'WARNING: Hit rate limit. Increasing sleep to {sleep_time} ms', | |
file=sys.stderr, | |
) | |
time.sleep(sleep_time / 1000.0) | |
r = session.get(url) | |
if r.status_code != 200: | |
print(f'WARNING: Could not access url {url}', file=sys.stderr) | |
return None | |
else: | |
return r.json() | |
with open('s2key.txt', 'r') as f: | |
s2_key = next(f).strip() | |
session = requests.Session() | |
session.headers.update({'x-api-key': s2_key}) | |
def print_all_citations(conf, num_papers): | |
for pid in range(1,num_papers+1): | |
aclid=f'{conf}.{pid}' | |
s2url=f'https://api.semanticscholar.org/v1/paper/ACL:{aclid}' | |
paper_data = query_api(s2url, session) | |
if paper_data != None: | |
citations = len(paper_data['citations']) | |
print(f'{aclid}\t{citations}') | |
# EMNLP Papers | |
# print_all_citations('2020.emnlp-main', 752) | |
print_all_citations('2020.findings-emnlp', 447) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import matplotlib | |
import numpy as np | |
import os | |
from matplotlib import pyplot as plt | |
import sys | |
bar_colors = ["#7293CB", "#E1974C", "#84BA5B", "#D35E60", "#808585", "#9067A7", "#AB6857", "#CCC210"] | |
def make_bar_chart(datas, | |
output_directory, output_fig_file, bar_names, output_fig_format='png', | |
errs=None, title=None, xlabel=None, xticklabels=None, ylabel=None): | |
fig, ax = plt.subplots() | |
ind = np.arange(len(datas[0])) | |
width = 0.7/len(datas) | |
bars = [] | |
for i, data in enumerate(datas): | |
err = errs[i] if errs != None else None | |
bars.append(ax.bar(ind+i*width, data, width, color=bar_colors[i], bottom=0, yerr=err)) | |
# Set axis/title labels | |
if title is not None: | |
ax.set_title(title) | |
if xlabel is not None: | |
ax.set_xlabel(xlabel) | |
if ylabel is not None: | |
ax.set_ylabel(ylabel) | |
if xticklabels is not None: | |
ax.set_xticks(ind + width / 2) | |
ax.set_xticklabels(xticklabels) | |
plt.xticks(rotation=70) | |
else: | |
ax.xaxis.set_visible(False) | |
ax.legend(bars, bar_names) | |
ax.autoscale_view() | |
if not os.path.exists(output_directory): | |
os.makedirs(output_directory) | |
out_file = os.path.join(output_directory, f'{output_fig_file}.{output_fig_format}') | |
plt.savefig(out_file, format=output_fig_format, bbox_inches='tight') | |
histogram_buckets = [0,1,2,5,10,20,50,100,200,500,1000,10000] | |
conf_data = [[0 for _ in histogram_buckets] for _ in range(2)] | |
for line in sys.stdin: | |
pid, cites = line.strip().split() | |
cites = int(cites) | |
whichconf = 0 if ('emnlp-main' in pid) else 1 | |
for bid, bval in enumerate(histogram_buckets): | |
if cites <= bval: | |
conf_data[whichconf][bid] += 1 | |
break | |
norm_data = [[float(x)/sum(y) for x in y] for y in conf_data] | |
make_bar_chart(norm_data,'.','cites_diff', ['EMNLP 2020', 'EMNLP 2020 Findings'],xticklabels=[f'<={x}' for x in histogram_buckets], ylabel='ratio of papers') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment