gustavla/gist:9499068

## gistfile1.py
# Author: Nelle Varoquaux, Andrew Tulloch

# Uses the pool adjacent violators algorithm (PAVA), with the
# enhancement of searching for the longest decreasing subsequence to
# pool at each step.

import numpy as np
cimport numpy as np
cimport cython

ctypedef np.float64_t DOUBLE

np.import_array()


@cython.boundscheck(False)
@cython.wraparound(False)
@cython.cdivision(True)
def _isotonic_regression(np.ndarray[DOUBLE, ndim=1] y,
                         np.ndarray[DOUBLE, ndim=1] weight,
                         np.ndarray[DOUBLE, ndim=1] solution):
    cdef:
        DOUBLE numerator, denominator
        Py_ssize_t i, pooled, n, k, j

        DOUBLE[:] y_mv = y
        DOUBLE[:] weight_mv = weight
        DOUBLE[:] solution_mv = solution

    n = y.shape[0]
    # The algorithm proceeds by iteratively updating the solution
    # array.

    # TODO - should we just pass in a pre-copied solution
    # array and mutate that?
    for i in range(n):
        solution_mv[i] = y_mv[i]

    if n <= 1:
        return solution

    with nogil:
        n -= 1
        pooled = 1
        while pooled:
            # repeat until there are no more adjacent violators.
            i = 0
            pooled = 0
            while i < n:
                k = i
                while k < n and solution_mv[k] >= solution_mv[k + 1]:
                    k += 1
                if solution_mv[i] != solution_mv[k]:
                    # solution[i:k + 1] is a decreasing subsequence, so
                    # replace each point in the subsequence with the
                    # weighted average of the subsequence.

                    # TODO: explore replacing each subsequence with a
                    # _single_ weighted point, and reconstruct the whole
                    # sequence from the sequence of collapsed points.
                    # Theoretically should reduce running time, though
                    # initial experiments weren't promising.
                    numerator = 0.0
                    denominator = 0.0
                    for j in range(i, k + 1):
                        numerator += solution_mv[j] * weight_mv[j]
                        denominator += weight_mv[j]
                    for j in range(i, k + 1):
                        solution_mv[j] = numerator / denominator
                    pooled = 1
                i = k + 1

    return solution
	# Author: Nelle Varoquaux, Andrew Tulloch

	# Uses the pool adjacent violators algorithm (PAVA), with the
	# enhancement of searching for the longest decreasing subsequence to
	# pool at each step.

	import numpy as np
	cimport numpy as np
	cimport cython

	ctypedef np.float64_t DOUBLE

	np.import_array()


	@cython.boundscheck(False)
	@cython.wraparound(False)
	@cython.cdivision(True)
	def _isotonic_regression(np.ndarray[DOUBLE, ndim=1] y,
	np.ndarray[DOUBLE, ndim=1] weight,
	np.ndarray[DOUBLE, ndim=1] solution):
	cdef:
	DOUBLE numerator, denominator
	Py_ssize_t i, pooled, n, k, j

	DOUBLE[:] y_mv = y
	DOUBLE[:] weight_mv = weight
	DOUBLE[:] solution_mv = solution

	n = y.shape[0]
	# The algorithm proceeds by iteratively updating the solution
	# array.

	# TODO - should we just pass in a pre-copied solution
	# array and mutate that?
	for i in range(n):
	solution_mv[i] = y_mv[i]

	if n <= 1:
	return solution

	with nogil:
	n -= 1
	pooled = 1
	while pooled:
	# repeat until there are no more adjacent violators.
	i = 0
	pooled = 0
	while i < n:
	k = i
	while k < n and solution_mv[k] >= solution_mv[k + 1]:
	k += 1
	if solution_mv[i] != solution_mv[k]:
	# solution[i:k + 1] is a decreasing subsequence, so
	# replace each point in the subsequence with the
	# weighted average of the subsequence.

	# TODO: explore replacing each subsequence with a
	# _single_ weighted point, and reconstruct the whole
	# sequence from the sequence of collapsed points.
	# Theoretically should reduce running time, though
	# initial experiments weren't promising.
	numerator = 0.0
	denominator = 0.0
	for j in range(i, k + 1):
	numerator += solution_mv[j] * weight_mv[j]
	denominator += weight_mv[j]
	for j in range(i, k + 1):
	solution_mv[j] = numerator / denominator
	pooled = 1
	i = k + 1

	return solution