Skip to content

Instantly share code, notes, and snippets.

@endolith
Forked from kjordahl/bootstrap.ipynb
Created November 6, 2019 18:01
Show Gist options
  • Save endolith/43056072940974b233841b3ca0e32c08 to your computer and use it in GitHub Desktop.
Save endolith/43056072940974b233841b3ca0e32c08 to your computer and use it in GitHub Desktop.
Gaussian process bootstrap plots in iPython notebook
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
from sklearn.gaussian_process import GaussianProcess
# <codecell>
def pdense(x, y, sigma, M=1000):
""" Plot probability density of y with known stddev sigma
"""
assert len(x) == len(y) and len(x) == len(sigma)
N = len(x)
# TODO: better y ranging
ymin, ymax = min(y - 2 * sigma), max(y + 2 * sigma)
yy = np.linspace(ymin, ymax, M)
a = [np.exp(-((Y - yy) / s) ** 2) / s for Y, s in zip(y, sigma)]
A = np.array(a)
A = A.reshape(N, M)
plt.imshow(-A.T, cmap='gray', aspect='auto',
origin='lower', extent=(min(x)[0], max(x)[0], ymin, ymax))
plt.title('Density plot')
# <codecell>
def gpr(seed=0, N=20, M=1000, sigma=1.0):
""" from scikits.learn demo
"""
np.random.seed(seed)
def f(x):
"""The function to predict."""
return x * np.sin(x)
X = np.linspace(0.1, 9.9, 20)
X = np.atleast_2d(X).T
y = f(X).ravel()
y = np.random.normal(y, sigma)
x = np.atleast_2d(np.linspace(0, 10, M)).T
nugget = (sigma / y) ** 2
gp = GaussianProcess(corr='squared_exponential', theta0=1e-1,
thetaL=1e-1, thetaU=1.0,
nugget=nugget,
random_start=100)
gp.fit(X, y)
y2, MSE = gp.predict(x, eval_MSE=True)
s2 = np.sqrt(MSE)
return X, y, x, y2, s2
# <codecell>
X, y, x, y2, s2 = gpr(seed=0)
plt.figure(1)
pdense(x, y2, s2, M=1000)
plt.plot(X, y, 'r.')
plt.plot(x, y2, 'b:')
a = plt.gca()
a.set_ylim(-10, 15)
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.show()
# <codecell>
# Run the experiment many times
N = 200
Y = np.nan * np.ones((N, len(x)))
s = np.nan * np.ones((N, len(x)))
print 'Running trial',
for i in xrange(N):
X, y, x, y2, s2 = gpr(seed=i)
Y[i,:] = y2
s[i,:] = s2
print i,
print '\nDone!'
plt.plot(x, Y.T, 'b', alpha=0.15)
a = plt.gca()
a.set_ylim(-10, 15)
plt.title('Bootstrap spaghetti plot')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.show()
# <codecell>
ymin, ymax = -10, 15
bin_width = 0.15
y_bins = np.arange(ymin, ymax, bin_width)
H = np.zeros((len(y_bins)-1, len(x)))
m = np.zeros(x.shape)
for i in xrange(len(x)):
h, e = np.histogram(Y[:,i], bins=np.arange(ymin, ymax, bin_width), density=True)
H[:,i] = h
m[i] = np.median(Y[:,i])
hb = ndimage.gaussian_filter(H, sigma=1)
plt.imshow(-hb, cmap='gray', aspect='auto', origin='lower',
extent=(min(x)[0], max(x)[0], ymin, ymax))
plt.plot(x, m, 'b:')
a = plt.gca()
a.set_ylim(-10, 15)
plt.title('Bootstrap density plot')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
# <codecell>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment