Skip to content

Instantly share code, notes, and snippets.

@kjordahl
Created October 24, 2012 18:20
Show Gist options
  • Save kjordahl/3947841 to your computer and use it in GitHub Desktop.
Save kjordahl/3947841 to your computer and use it in GitHub Desktop.
Gaussian process bootstrap plots in iPython notebook
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# -*- coding: utf-8 -*-
# <nbformat>3.0</nbformat>
# <codecell>
import numpy as np
import matplotlib.pyplot as plt
from scipy import ndimage
from sklearn.gaussian_process import GaussianProcess
# <codecell>
def pdense(x, y, sigma, M=1000):
""" Plot probability density of y with known stddev sigma
"""
assert len(x) == len(y) and len(x) == len(sigma)
N = len(x)
# TODO: better y ranging
ymin, ymax = min(y - 2 * sigma), max(y + 2 * sigma)
yy = np.linspace(ymin, ymax, M)
a = [np.exp(-((Y - yy) / s) ** 2) / s for Y, s in zip(y, sigma)]
A = np.array(a)
A = A.reshape(N, M)
plt.imshow(-A.T, cmap='gray', aspect='auto',
origin='lower', extent=(min(x)[0], max(x)[0], ymin, ymax))
plt.title('Density plot')
# <codecell>
def gpr(seed=0, N=20, M=1000, sigma=1.0):
""" from scikits.learn demo
"""
np.random.seed(seed)
def f(x):
"""The function to predict."""
return x * np.sin(x)
X = np.linspace(0.1, 9.9, 20)
X = np.atleast_2d(X).T
y = f(X).ravel()
y = np.random.normal(y, sigma)
x = np.atleast_2d(np.linspace(0, 10, M)).T
nugget = (sigma / y) ** 2
gp = GaussianProcess(corr='squared_exponential', theta0=1e-1,
thetaL=1e-1, thetaU=1.0,
nugget=nugget,
random_start=100)
gp.fit(X, y)
y2, MSE = gp.predict(x, eval_MSE=True)
s2 = np.sqrt(MSE)
return X, y, x, y2, s2
# <codecell>
X, y, x, y2, s2 = gpr(seed=0)
plt.figure(1)
pdense(x, y2, s2, M=1000)
plt.plot(X, y, 'r.')
plt.plot(x, y2, 'b:')
a = plt.gca()
a.set_ylim(-10, 15)
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.show()
# <codecell>
# Run the experiment many times
N = 200
Y = np.nan * np.ones((N, len(x)))
s = np.nan * np.ones((N, len(x)))
print 'Running trial',
for i in xrange(N):
X, y, x, y2, s2 = gpr(seed=i)
Y[i,:] = y2
s[i,:] = s2
print i,
print '\nDone!'
plt.plot(x, Y.T, 'b', alpha=0.15)
a = plt.gca()
a.set_ylim(-10, 15)
plt.title('Bootstrap spaghetti plot')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
plt.show()
# <codecell>
ymin, ymax = -10, 15
bin_width = 0.15
y_bins = np.arange(ymin, ymax, bin_width)
H = np.zeros((len(y_bins)-1, len(x)))
m = np.zeros(x.shape)
for i in xrange(len(x)):
h, e = np.histogram(Y[:,i], bins=np.arange(ymin, ymax, bin_width), density=True)
H[:,i] = h
m[i] = np.median(Y[:,i])
hb = ndimage.gaussian_filter(H, sigma=1)
plt.imshow(-hb, cmap='gray', aspect='auto', origin='lower',
extent=(min(x)[0], max(x)[0], ymin, ymax))
plt.plot(x, m, 'b:')
a = plt.gca()
a.set_ylim(-10, 15)
plt.title('Bootstrap density plot')
plt.xlabel('$x$')
plt.ylabel('$f(x)$')
# <codecell>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment