Skip to content

Instantly share code, notes, and snippets.

@lhenkelm
Last active February 1, 2022 16:10
Show Gist options
  • Save lhenkelm/6d787e3395742a263ea66d2e868a2739 to your computer and use it in GitHub Desktop.
Save lhenkelm/6d787e3395742a263ea66d2e868a2739 to your computer and use it in GitHub Desktop.
random sampling for cabinetry's yield_stdev, but using batches of parameters instead of a list expression looping over individual examples
import numpy as np
import pyhf
import shapepolynomial
def random_sampling(
model: pyhf.pdf.Model,
parameters: np.ndarray,
uncertainty: np.ndarray,
corr_mat: np.ndarray,
*,
# allow setting batch size and number of toys independently
# since performance drops if arrays are too large for available memory
num_batches: int = 2,
batch_size: int = 25_000,
**model_kwargs,
):
# other details to forward here?
model_kwargs.setdefault('modifier_settings', model.config.modifier_settings)
model_kwargs.setdefault('poi_name', model.config.poi_name)
# required for my specifc example
model_kwargs.setdefault('validate', False)
model_kwargs.setdefault('modifier_set', shapepolynomial.modifier_set())
batched_model = pyhf.Model(model.spec, batch_size = batch_size, **model_kwargs)
rng = np.random.default_rng(1)
# compute variance iteratively to avoid large lists/arrays
running_mean = 0
running_variance = 0
for i_batch in range(num_batches):
par_b = rng.multivariate_normal(
parameters, uncertainty @ corr_mat @ uncertainty.T, size=batch_size
)
y_b = batched_model.expected_data(par_b, include_auxdata=False)
mean = np.mean(y_b, axis =0)
variance = np.var(y_b, axis=0)
cumulated_size = i_batch*batch_size
running_variance = running_variance*(cumulated_size -1)/(cumulated_size + batch_size -1)
running_variance += variance *(batch_size -1) /(cumulated_size + batch_size -1)
running_variance += (running_mean - mean)**2 * cumulated_size*batch_size/(cumulated_size+batch_size)
# update mean last, otherwise the variance is updated with a wrong mean correction
running_mean = cumulated_size*running_mean + batch_size*mean
running_mean /= cumulated_size + batch_size
yerr_boot = np.sqrt(running_variance)
# don't bother with unused total uncertainty per channel for now
return yerr_boot, None
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment