lhenkelm/monkey_stdev.py Secret

## monkey_stdev.py
import numpy as np
import pyhf

import shapepolynomial

def random_sampling(
      model: pyhf.pdf.Model,
      parameters: np.ndarray,
      uncertainty: np.ndarray,
      corr_mat: np.ndarray,
      *,
      # allow setting batch size and number of toys independently
      # since performance drops if arrays are too large for available memory
      num_batches: int = 2,
      batch_size: int = 25_000,
      **model_kwargs,
    ):
  # other details to forward here?
  model_kwargs.setdefault('modifier_settings', model.config.modifier_settings)
  model_kwargs.setdefault('poi_name', model.config.poi_name)

  # required for my specifc example
  model_kwargs.setdefault('validate', False)
  model_kwargs.setdefault('modifier_set', shapepolynomial.modifier_set())

  batched_model = pyhf.Model(model.spec, batch_size = batch_size, **model_kwargs)
  rng = np.random.default_rng(1)

  # compute variance iteratively to avoid large lists/arrays
  running_mean = 0
  running_variance = 0
  for i_batch in range(num_batches):
    par_b = rng.multivariate_normal(
        parameters, uncertainty @ corr_mat @ uncertainty.T, size=batch_size
    )
    y_b = batched_model.expected_data(par_b, include_auxdata=False)
    mean = np.mean(y_b, axis =0)
    variance = np.var(y_b, axis=0)

    cumulated_size = i_batch*batch_size

    running_variance =  running_variance*(cumulated_size -1)/(cumulated_size + batch_size -1)
    running_variance += variance        *(batch_size -1)    /(cumulated_size + batch_size -1)
    running_variance += (running_mean - mean)**2 * cumulated_size*batch_size/(cumulated_size+batch_size)
    # update mean last, otherwise the variance is updated with a wrong mean correction
    running_mean =  cumulated_size*running_mean + batch_size*mean
    running_mean /= cumulated_size + batch_size
  yerr_boot = np.sqrt(running_variance)
  # don't bother with unused total uncertainty per channel for now
  return yerr_boot, None
	import numpy as np
	import pyhf

	import shapepolynomial

	def random_sampling(
	model: pyhf.pdf.Model,
	parameters: np.ndarray,
	uncertainty: np.ndarray,
	corr_mat: np.ndarray,
	*,
	# allow setting batch size and number of toys independently
	# since performance drops if arrays are too large for available memory
	num_batches: int = 2,
	batch_size: int = 25_000,
	**model_kwargs,
	):
	# other details to forward here?
	model_kwargs.setdefault('modifier_settings', model.config.modifier_settings)
	model_kwargs.setdefault('poi_name', model.config.poi_name)

	# required for my specifc example
	model_kwargs.setdefault('validate', False)
	model_kwargs.setdefault('modifier_set', shapepolynomial.modifier_set())

	batched_model = pyhf.Model(model.spec, batch_size = batch_size, **model_kwargs)
	rng = np.random.default_rng(1)

	# compute variance iteratively to avoid large lists/arrays
	running_mean = 0
	running_variance = 0
	for i_batch in range(num_batches):
	par_b = rng.multivariate_normal(
	parameters, uncertainty @ corr_mat @ uncertainty.T, size=batch_size
	)
	y_b = batched_model.expected_data(par_b, include_auxdata=False)
	mean = np.mean(y_b, axis =0)
	variance = np.var(y_b, axis=0)

	cumulated_size = i_batch*batch_size

	running_variance = running_variance*(cumulated_size -1)/(cumulated_size + batch_size -1)
	running_variance += variance *(batch_size -1) /(cumulated_size + batch_size -1)
	running_variance += (running_mean - mean)*2 cumulated_size*batch_size/(cumulated_size+batch_size)
	# update mean last, otherwise the variance is updated with a wrong mean correction
	running_mean = cumulated_sizerunning_mean + batch_sizemean
	running_mean /= cumulated_size + batch_size
	yerr_boot = np.sqrt(running_variance)
	# don't bother with unused total uncertainty per channel for now
	return yerr_boot, None