-
-
Save kadereub/9eae9cff356bb62cdbd672931e8e5ec4 to your computer and use it in GitHub Desktop.
# Load relevant libraries | |
import numpy as np | |
import numba as nb | |
import matplotlib.pyplot as plt | |
# Goal is to implement a numba compatible polyfit (note does not include error handling) | |
# Define Functions Using Numba | |
# Idea here is to solve ax = b, using least squares, where a represents our coefficients e.g. x**2, x, constants | |
@nb.njit | |
def _coeff_mat(x, deg): | |
mat_ = np.zeros(shape=(x.shape[0],deg + 1)) | |
const = np.ones_like(x) | |
mat_[:,0] = const | |
mat_[:, 1] = x | |
if deg > 1: | |
for n in range(2, deg + 1): | |
mat_[:, n] = x**n | |
return mat_ | |
@nb.jit | |
def _fit_x(a, b): | |
# linalg solves ax = b | |
det_ = np.linalg.lstsq(a, b)[0] | |
return det_ | |
@nb.jit | |
def fit_poly(x, y, deg): | |
a = _coeff_mat(x, deg) | |
p = _fit_x(a, y) | |
# Reverse order so p[0] is coefficient of highest order | |
return p[::-1] | |
@nb.jit | |
def eval_polynomial(P, x): | |
''' | |
Compute polynomial P(x) where P is a vector of coefficients, highest | |
order coefficient at P[0]. Uses Horner's Method. | |
''' | |
result = 0 | |
for coeff in P: | |
result = x * result + coeff | |
return result | |
# Create Dummy Data and use existing numpy polyfit as test | |
x = np.linspace(0, 2, 20) | |
y = np.cos(x) + 0.3*np.random.rand(20) | |
p = np.poly1d(np.polyfit(x, y, 3)) | |
t = np.linspace(0, 2, 200) | |
plt.plot(x, y, 'o', t, p(t), '-') | |
# Now plot using the Numba (amazing) functions | |
p_coeffs = fit_poly(x, y, deg=3) | |
plt.plot(x, y, 'o', t, eval_polynomial(p_coeffs, t), '-') | |
# Great Success... | |
# References | |
# 1. Numpy least squares docs -> https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.lstsq.html#numpy.linalg.lstsq | |
# 2. Numba linear alegbra supported funcs -> https://numba.pydata.org/numba-doc/dev/reference/numpysupported.html#linear-algebra | |
# 3. SO Post -> https://stackoverflow.com/questions/56181712/fitting-a-quadratic-function-in-python-without-numpy-polyfit |
For people preferring the newer numpy.polynomial.Polynomial
format, here are the equivalent fitting, differentiation and evaluation functions:
import numpy as np
import numba as nb
@nb.jit
def polyfit(x, y, deg):
'''Fit polynomial of order `deg` against x, y data points.'''
mat = np.zeros(shape = (x.shape[0], deg + 1))
mat[:, 0] = np.ones_like(x)
for n in range(1, deg + 1):
mat[:, n] = x**n
p = np.linalg.lstsq(mat, y)[0]
return p
@nb.jit
def polyder(p):
'''Differentiate polynomial p.'''
d = np.zeros(shape = (p.shape[0] - 1))
for n in range(d.shape[0]):
d[n] = (n + 1) * p[n + 1]
return d
@nb.jit
def polyval(p, x):
'''Evaluate polynomial p(x) using Horner's Method.
New numpy.polynomial.Polynomial format:
p[0] + p[1] * x + p[2] * x^2 + ...
'''
result = np.zeros_like(x)
for coeff in p[::-1]:
result = x * result + coeff
return result
class NBPolynomial(np.polynomial.Polynomial):
'''Create numpy.polynomial.Polynomial-compatible subclass using Numba
accelerated fitting, differentiation and evaluation methods.'''
def __init__(self, coef, domain = None, window = None):
# `domain` and `window` are ignored; kept for class compatibility
coef = np.array(coef, dtype = float)
np.polynomial.Polynomial.__init__(self, coef, domain, window)
@staticmethod
def fit(x, y, deg, **kwargs):
# Other `kwargs` are ignored; kept for class compatibility
return NBPolynomial(polyfit(x, y, deg))
def __call__(self, x):
return polyval(self.coef, x)
It's been a while and right now I currently have to rely a lot on polynomials. Thus I found that the functions fail from time to time whereas NumPy's polyfit
never does. So, I had a look at NumPy's source code and I tripped over something definitely required here (even though this is somewhat of a loss in performance). NumPy stabilizes the Least Squares solution process by scaling the x-matrix of the lstsq
-function, so that each of its columns has a Euclidean norm of 1. This avoids an SVD on a matrix with columns holding extremely small and extremely large values at the same time. After I changed the code to the following, it was consistent with polyfit
except for some small numerical deviations at large polynomial degrees (> 15, polyfit
already warns about being poorly conditioned then):
# Define Functions Using Numba
# Idea here is to solve ax = b, using least squares, where a represents our coefficients e.g. x**2, x, constants
@nb.njit
def _coeff_mat(x, deg):
mat_ = np.ones(shape=(x.shape[0],deg + 1))
mat_[:, 1] = x
if deg > 1:
for n in range(2, deg + 1):
# here, the pow()-function was turned into multiplication, which gave some speedup for me (up to factor 2 for small degrees, ...
# ... which is the normal application case)
mat_[:, n] = mat_[:, n-1] * x
# evaluation of the norm of each column by means of a loop
scale_vect = np.empty((deg + 1, ))
for n in range(0, deg + 1):
# evaluation of the column's norm (stored for later processing)
col_norm = np.linalg.norm(mat_[:, n])
scale_vect[n] = col_norm
# scaling the column to unit-length
mat_[:, n] /= col_norm
return mat_, scale_vect
@nb.jit
def _fit_x(a, b, scales):
# linalg solves ax = b
det_ = np.linalg.lstsq(a, b)[0]
# due to the stabilization, the coefficients have the wrong scale, which is corrected now
det_ /= scales
return det_
@nb.jit
def fit_poly(x, y, deg):
a, scales_ = _coeff_mat(x, deg)
p = _fit_x(a, y, scales_)
# Reverse order so p[0] is coefficient of highest order
return p[::-1]
When I try this on the following example, both results coincide, which is not the case without stabilization (example was chosen arbitratily. The fit is not beautiful):
np.random.seed(0)
x = np.linspace(0, 1000, 1001)
y = np.sin(x / 50) + np.random.normal(loc = 0, scale = 0.1, size = x.shape)
print(fit_poly(x, y, 15))
print(np.polyfit(x, y, 15))
# yields 1) Numba WITH stabilization
# [ 1.06803123e-38 -7.86670125e-35 2.58893126e-31 -5.02381370e-28
# 6.39379654e-25 -5.62434018e-22 3.51838791e-19 -1.59040278e-16
# 5.23155398e-14 -1.24417040e-11 2.06119644e-09 -2.15733857e-07
# 1.18192402e-05 -3.40815767e-04 1.89457941e-02 9.61349262e-02]
# 2) NumPy
# [ 1.06803112e-38 -7.86670037e-35 2.58893094e-31 -5.02381303e-28
# 6.39379559e-25 -5.62433924e-22 3.51838725e-19 -1.59040244e-16
# 5.23155274e-14 -1.24417007e-11 2.06119583e-09 -2.15733782e-07
# 1.18192344e-05 -3.40815496e-04 1.89457868e-02 9.61350739e-02]
# 3) Numba WITHOUT stabilization (just set 'col_norm' in '_coeff_mat' to 1 to reproduce, result is totally off)
# [-4.88325058e-42 1.33998617e-38 -1.21894982e-35 3.67359507e-33
# 1.71351341e-35 5.02633212e-38 1.18746039e-40 2.47266327e-43
# 4.74518526e-46 8.61203036e-49 1.42582088e-51 6.84227766e-49
# 6.48478703e-46 -1.41888554e-41 -1.44615344e-37 1.86939409e-65]
Unfortunately, the speedup is thus almost fully lost and NumPy is almost equal to the Numba-version for me. But there is one big pro of using Numba - which I'm very grateful for, thank you! - and this is that your functions can be called by other Numba-functions, which helped me speedup my code so much.
I've attached my piece of code with explicit types referred, gives a 4x speedup over numpy polyfit
@numba.njit("f8[:,:](f8[:], i8)")
def _coeff_mat(x, deg):
mat_ = np.zeros(shape=(x.shape[0],deg + 1), dtype=np.float64)
const = np.ones_like(x)
mat_[:,0] = const
mat_[:, 1] = x
if deg > 1:
for n in range(2, deg + 1):
mat_[:, n] = x**n
return mat_
@numba.njit("f8[:](f8[:,:], f8[:])")
def _fit_x(a, b):
# linalg solves ax = b
det_ = np.linalg.lstsq(a, b)[0]
return det_
@numba.njit("f8[:](f8[:], f8[:], i8)")
def fit_poly(x, y, deg):
a = _coeff_mat(x, deg)
p = _fit_x(a, y)
# Reverse order so p[0] is coefficient of highest order
return p[::-1]
Only the first run takes so long to start. After that, the function completes 100.000 runs in less than 5 seconds.