Created
October 21, 2012 21:18
-
-
Save tingletech/3928539 to your computer and use it in GitHub Desktop.
Calculating the credible interval for user survey
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
porting R code to python | |
http://stats.stackexchange.com/questions/39319/bayesian-user-survey-with-a-credible-interval | |
""" | |
# pip install numpy | |
from numpy.random.mtrand import dirichlet | |
from numpy import array, sum, percentile | |
categories = [ | |
'k-12 teacher or librarian', | |
'k-12 student', | |
'college or graduate student', | |
'faculty or academic researcher', | |
'archivist or librarian', | |
'genealogist or family researcher', | |
'other' | |
] | |
observations = [65, 71, 532, 307, 369, 234, 584] | |
# just checking | |
assert len(categories) == len(observations) | |
# run 1000 simulations | |
simulations = 1000 | |
# set up a standard library array to hold the simulation results | |
results = [] | |
# monte carlo method | |
for n in xrange(simulations): | |
results.append( dirichlet([x+1 for x in observations]) ) | |
# convert results to a numpy array for better indexing/sums | |
results = array(results) | |
# calculate estimates | |
estimates = sum(results, axis=0) / simulations | |
# calculate credible interval and print results | |
for k in xrange(len(observations)): | |
q025 = "{:.0%}".format(percentile(results[:,k], 2.5)) | |
q975 = "{:.0%}".format(percentile(results[:,k], 97.5)) | |
percent = "{:.0%}".format(estimates[k]) | |
print categories[k], q025, percent, q975 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment