Skip to content

Instantly share code, notes, and snippets.

@luispedro
Created April 17, 2020 23:53
Show Gist options
  • Save luispedro/bbacfa6928dd3f142aba64bfa4bd3334 to your computer and use it in GitHub Desktop.
Save luispedro/bbacfa6928dd3f142aba64bfa4bd3334 to your computer and use it in GitHub Desktop.
import pymc3 as pm
from scipy import stats
import numpy as np
NR_TESTS = 3330
POSITIVES = 50
PRE_NEG = 401
PRE_NEG_POS = 2
PRE_PLUS = 37+75+85
PRE_PLUS_POS = 25 + 75 + 78
print("# Pre-Bayesian")
print("{}\t{}".format("Spec","p-value"))
for sp in np.linspace(.983, .995, 10):
print("{}\t{}".format(sp, 1-stats.binom(NR_TESTS, 1 - sp).cdf(POSITIVES)))
print("\n# Semi-Bayesian")
sp_dist = stats.beta(PRE_NEG - PRE_NEG_POS + 1, PRE_NEG_POS + 1)
cs = []
for _ in range(10_000):
sp = sp_dist.rvs()
cs.append(
stats.binom(NR_TESTS, 1 - sp).rvs())
cs = np.array(cs)
print("Empirical p-value for null: {}".format((cs >= 50).mean()))
print("\n# The full Bayesian")
with pm.Model() as model:
# Beta(1,1) priors for all the fractions
sp = pm.Beta('sp', 1,1)
sn = pm.Beta('sn', 1,1)
prev = pm.Beta('prev', 1, 1)
# These were the test evaluations
pm.Binomial('pre', PRE_NEG, 1 - sp, observed=PRE_NEG_POS)
pm.Binomial('pre_sn', PRE_PLUS, sn, observed=PRE_PLUS_POS)
# Total positives in the test set:
pos = pm.Binomial('pos', NR_TESTS, prev)
# Some are going to be true positives (at most POSITIVES):
tp = pm.Bound(pm.Binomial, upper=POSITIVES)('tp', pos, sn)
# We observe the sum of fp + tp, but it's not possible to directly express
# it that way in pymc, but it is possible to express it in this equivalent
# way:
p = pm.Binomial('fp', NR_TESTS - pos, 1 - sp, observed=(POSITIVES - tp))
t = pm.sample(10_000)
print("P(pos == 0): {}".format(np.mean(t.pos==0)))
t_prev = t.prev.copy()
for name, arr in [
('prevalence', t.prev),
('positives', t.pos)]:
arr = arr.copy()
arr.sort()
print("Credible interval for {}: {} -- {}".format(
name,
arr[len(arr)//20],
arr[-len(arr)//20]))
# OUTPUT
# # Pre-Bayesian
# Spec p-value
# 0.983 0.7916561584607545
# 0.9843333333333333 0.5837802613783531
# 0.9856666666666667 0.3359586144295388
# 0.987 0.13589251160874583
# 0.9883333333333333 0.03419905470535767
# 0.9896666666666667 0.004607969061608097
# 0.991 0.000272379260707889
# 0.9923333333333333 5.3119956933134205e-06
# 0.9936666666666667 2.1987185627736494e-08
# 0.995 9.120371124993198e-12
#
# # Semi-Bayesian
# Empirical p-value for null: 0.0732
#
# # The full Bayesian
# P(pos == 0): 0.0
# Credible interval for prevalence: 0.004510854444494588 -- 0.016812474319518215
# Credible interval for positives: 15 -- 51
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment