Skip to content

Instantly share code, notes, and snippets.

@kadereub
Last active October 5, 2024 20:33
Show Gist options
  • Save kadereub/9eae9cff356bb62cdbd672931e8e5ec4 to your computer and use it in GitHub Desktop.
Save kadereub/9eae9cff356bb62cdbd672931e8e5ec4 to your computer and use it in GitHub Desktop.
A numba implementation of numpy polfit
# Load relevant libraries
import numpy as np
import numba as nb
import matplotlib.pyplot as plt
# Goal is to implement a numba compatible polyfit (note does not include error handling)
# Define Functions Using Numba
# Idea here is to solve ax = b, using least squares, where a represents our coefficients e.g. x**2, x, constants
@nb.njit
def _coeff_mat(x, deg):
mat_ = np.zeros(shape=(x.shape[0],deg + 1))
const = np.ones_like(x)
mat_[:,0] = const
mat_[:, 1] = x
if deg > 1:
for n in range(2, deg + 1):
mat_[:, n] = x**n
return mat_
@nb.jit
def _fit_x(a, b):
# linalg solves ax = b
det_ = np.linalg.lstsq(a, b)[0]
return det_
@nb.jit
def fit_poly(x, y, deg):
a = _coeff_mat(x, deg)
p = _fit_x(a, y)
# Reverse order so p[0] is coefficient of highest order
return p[::-1]
@nb.jit
def eval_polynomial(P, x):
'''
Compute polynomial P(x) where P is a vector of coefficients, highest
order coefficient at P[0]. Uses Horner's Method.
'''
result = 0
for coeff in P:
result = x * result + coeff
return result
# Create Dummy Data and use existing numpy polyfit as test
x = np.linspace(0, 2, 20)
y = np.cos(x) + 0.3*np.random.rand(20)
p = np.poly1d(np.polyfit(x, y, 3))
t = np.linspace(0, 2, 200)
plt.plot(x, y, 'o', t, p(t), '-')
# Now plot using the Numba (amazing) functions
p_coeffs = fit_poly(x, y, deg=3)
plt.plot(x, y, 'o', t, eval_polynomial(p_coeffs, t), '-')
# Great Success...
# References
# 1. Numpy least squares docs -> https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.lstsq.html#numpy.linalg.lstsq
# 2. Numba linear alegbra supported funcs -> https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#linear-algebra
# 3. SO Post -> https://stackoverflow.com/questions/56181712/fitting-a-quadratic-function-in-python-without-numpy-polyfit
@MothNik
Copy link

MothNik commented Nov 7, 2020

Only the first run takes so long to start. After that, the function completes 100.000 runs in less than 5 seconds.

@anicusan
Copy link

For people preferring the newer numpy.polynomial.Polynomial format, here are the equivalent fitting, differentiation and evaluation functions:

import numpy as np
import numba as nb


@nb.jit
def polyfit(x, y, deg):
    '''Fit polynomial of order `deg` against x, y data points.'''
    mat = np.zeros(shape = (x.shape[0], deg + 1))
    mat[:, 0] = np.ones_like(x)
    for n in range(1, deg + 1):
        mat[:, n] = x**n

    p = np.linalg.lstsq(mat, y)[0]
    return p


@nb.jit
def polyder(p):
    '''Differentiate polynomial p.'''
    d = np.zeros(shape = (p.shape[0] - 1))
    for n in range(d.shape[0]):
        d[n] = (n + 1) * p[n + 1]
    return d


@nb.jit
def polyval(p, x):
    '''Evaluate polynomial p(x) using Horner's Method.

    New numpy.polynomial.Polynomial format:
        p[0] + p[1] * x + p[2] * x^2 + ...
    '''
    result = np.zeros_like(x)
    for coeff in p[::-1]:
        result = x * result + coeff
    return result


class NBPolynomial(np.polynomial.Polynomial):
    '''Create numpy.polynomial.Polynomial-compatible subclass using Numba
    accelerated fitting, differentiation and evaluation methods.'''

    def __init__(self, coef, domain = None, window = None):
        # `domain` and `window` are ignored; kept for class compatibility
        coef = np.array(coef, dtype = float)
        np.polynomial.Polynomial.__init__(self, coef, domain, window)

    @staticmethod
    def fit(x, y, deg, **kwargs):
        # Other `kwargs` are ignored; kept for class compatibility
        return NBPolynomial(polyfit(x, y, deg))

    def __call__(self, x):
        return polyval(self.coef, x)

@MothNik
Copy link

MothNik commented Nov 13, 2021

It's been a while and right now I currently have to rely a lot on polynomials. Thus I found that the functions fail from time to time whereas NumPy's polyfit never does. So, I had a look at NumPy's source code and I tripped over something definitely required here (even though this is somewhat of a loss in performance). NumPy stabilizes the Least Squares solution process by scaling the x-matrix of the lstsq-function, so that each of its columns has a Euclidean norm of 1. This avoids an SVD on a matrix with columns holding extremely small and extremely large values at the same time. After I changed the code to the following, it was consistent with polyfit except for some small numerical deviations at large polynomial degrees (> 15, polyfit already warns about being poorly conditioned then):

# Define Functions Using Numba
# Idea here is to solve ax = b, using least squares, where a represents our coefficients e.g. x**2, x, constants
@nb.njit
def _coeff_mat(x, deg):

    mat_ = np.ones(shape=(x.shape[0],deg + 1))
    mat_[:, 1] = x

    if deg > 1:
        for n in range(2, deg + 1):
            # here, the pow()-function was turned into multiplication, which gave some speedup for me (up to factor 2 for small degrees, ...
            # ... which is the normal application case)
            mat_[:, n] = mat_[:, n-1] * x

    # evaluation of the norm of each column by means of a loop
    scale_vect = np.empty((deg + 1, ))
    for n in range(0, deg + 1):
        
        # evaluation of the column's norm (stored for later processing)
        col_norm = np.linalg.norm(mat_[:, n])
        scale_vect[n] = col_norm
        
        # scaling the column to unit-length
        mat_[:, n] /= col_norm

    return mat_, scale_vect
    
@nb.jit
def _fit_x(a, b, scales):
    # linalg solves ax = b
    det_ = np.linalg.lstsq(a, b)[0]
    # due to the stabilization, the coefficients have the wrong scale, which is corrected now
    det_ /= scales

    return det_

@nb.jit
def fit_poly(x, y, deg):
    a, scales_ = _coeff_mat(x, deg)
    p = _fit_x(a, y, scales_)
    # Reverse order so p[0] is coefficient of highest order
    return p[::-1]

When I try this on the following example, both results coincide, which is not the case without stabilization (example was chosen arbitratily. The fit is not beautiful):

np.random.seed(0)
x = np.linspace(0, 1000, 1001)
y = np.sin(x / 50) + np.random.normal(loc = 0, scale = 0.1, size = x.shape)
print(fit_poly(x, y, 15))
print(np.polyfit(x, y, 15))

# yields 1) Numba WITH stabilization
# [ 1.06803123e-38 -7.86670125e-35  2.58893126e-31 -5.02381370e-28
#   6.39379654e-25 -5.62434018e-22  3.51838791e-19 -1.59040278e-16 
#   5.23155398e-14 -1.24417040e-11  2.06119644e-09 -2.15733857e-07
#   1.18192402e-05 -3.40815767e-04  1.89457941e-02  9.61349262e-02]
# 2) NumPy
# [ 1.06803112e-38 -7.86670037e-35  2.58893094e-31 -5.02381303e-28
#   6.39379559e-25 -5.62433924e-22  3.51838725e-19 -1.59040244e-16
#   5.23155274e-14 -1.24417007e-11  2.06119583e-09 -2.15733782e-07
#   1.18192344e-05 -3.40815496e-04  1.89457868e-02  9.61350739e-02]
# 3) Numba WITHOUT stabilization (just set 'col_norm' in '_coeff_mat' to 1 to reproduce, result is totally off)
# [-4.88325058e-42  1.33998617e-38 -1.21894982e-35  3.67359507e-33
#   1.71351341e-35  5.02633212e-38  1.18746039e-40  2.47266327e-43
#   4.74518526e-46  8.61203036e-49  1.42582088e-51  6.84227766e-49
#   6.48478703e-46 -1.41888554e-41 -1.44615344e-37  1.86939409e-65]

Unfortunately, the speedup is thus almost fully lost and NumPy is almost equal to the Numba-version for me. But there is one big pro of using Numba - which I'm very grateful for, thank you! - and this is that your functions can be called by other Numba-functions, which helped me speedup my code so much.

@mszczuj
Copy link

mszczuj commented Feb 16, 2022

I've attached my piece of code with explicit types referred, gives a 4x speedup over numpy polyfit

@numba.njit("f8[:,:](f8[:], i8)")
def _coeff_mat(x, deg):
    mat_ = np.zeros(shape=(x.shape[0],deg + 1), dtype=np.float64)
    const = np.ones_like(x)
    mat_[:,0] = const
    mat_[:, 1] = x
    if deg > 1:
        for n in range(2, deg + 1):
            mat_[:, n] = x**n
    return mat_

@numba.njit("f8[:](f8[:,:], f8[:])")
def _fit_x(a, b):
    # linalg solves ax = b
    det_ = np.linalg.lstsq(a, b)[0]
    return det_

@numba.njit("f8[:](f8[:], f8[:], i8)")
def fit_poly(x, y, deg):
    a = _coeff_mat(x, deg)
    p = _fit_x(a, y)
    # Reverse order so p[0] is coefficient of highest order
    return p[::-1]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment