Fully vectorize numpy polyfit

Overview

I'm having performance issues using polyfit because it cannot accept broadcast arrays. I know from this post that dependent data ycan be multidimensional if you use numpy.polynomial.polynomial.polyfit. However, measurement xcannot be multidimensional. Anyway, around?

Motivation

I need to calculate the rate of change of some data. To fit the experiment, I want to use the following method: take the data yand x, since the short sections of the data correspond to polynomials, and then use the set coefficient as an estimate of the rate of change.

Illustration

import numpy as np
import matplotlib.pyplot as plt

n = 100
x = np.linspace(0, 10, n)
y = np.sin(x)

window_length = 10
ydot = [np.polyfit(x[j:j+window_length], y[j:j+window_length], 1)[0] 
                                  for j in range(n - window_length)]
x_mids = [x[j+window_length/2] for j in range(n - window_length)]

plt.plot(x, y)
plt.plot(x_mids, ydot)

plt.show()

enter image description here

- ( ), - ( ).

, :

window_length = 10
vert_idx_list = np.arange(0, len(x) - window_length, 1)
hori_idx_list = np.arange(window_length)
A, B = np.meshgrid(hori_idx_list, vert_idx_list)
idx_array = A + B 

x_array = x[idx_array]
y_array = y[idx_array]

(n-window_length, window_length). , polyfit axis, , .

- , ?

+4
3

polyfit , :

y = [X].a

y - , [X] - , a - .

1- 1- . , "", 2x2, [X]. , :

>>> n = 10
>>> x = np.random.random(n)
>>> y = np.random.random(n)
>>> np.polyfit(x, y, 1)[0]
-0.29207474654700277
>>> (n*(x*y).sum() - x.sum()*y.sum()) / (n*(x*x).sum() - x.sum()*x.sum())
-0.29207474654700216

, , , - 1D

def sliding_fitted_slope(x, y, win):
    x = np.concatenate(([0], x))
    y = np.concatenate(([0], y))
    Sx = np.cumsum(x)
    Sy = np.cumsum(y)
    Sx2 = np.cumsum(x*x)
    Sxy = np.cumsum(x*y)

    Sx = Sx[win:] - Sx[:-win]
    Sy = Sy[win:] - Sy[:-win]
    Sx2 = Sx2[win:] - Sx2[:-win]
    Sxy = Sxy[win:] - Sxy[:-win]

    return (win*Sxy - Sx*Sy) / (win*Sx2 - Sx*Sx)

( , 1):

>>> np.allclose(sliding_fitted_slope(x, y, window_length),
                [np.polyfit(x[j:j+window_length], y[j:j+window_length], 1)[0]
                 for j in range(n - window_length + 1)])
True

%timeit sliding_fitted_slope(x, y, window_length)
10000 loops, best of 3: 34.5 us per loop

%%timeit
[np.polyfit(x[j:j+window_length], y[j:j+window_length], 1)[0]
 for j in range(n - window_length + 1)]
100 loops, best of 3: 10.1 ms per loop

300 .

+3

, , 20 . :

ydot = np.polynomial.polynomial.polyfit(x_array[0], y_array.T, 1)[-1]

, np.polyfit . np.polynomial.polynomial.polyfit (, -1 0).

, x (x_array[0]). , , x, , . , , x.

, !

+3

Using an alternative method to calculate the rate of change can be a solution both to increase speed and to increase accuracy.

n = 1000
x = np.linspace(0, 10, n)
y = np.sin(x)


def timingPolyfit(x,y):
    window_length = 10
    vert_idx_list = np.arange(0, len(x) - window_length, 1)
    hori_idx_list = np.arange(window_length)
    A, B = np.meshgrid(hori_idx_list, vert_idx_list)
    idx_array = A + B 

    x_array = x[idx_array]
    y_array = y[idx_array]

    ydot = np.polynomial.polynomial.polyfit(x_array[0], y_array.T, 1)[-1]
    x_mids = [x[j+window_length/2] for j in range(n - window_length)]

    return ydot, x_mids

def timingSimple(x,y):
    dy = (y[2:] - y[:-2])/2
    dx =  x[1] - x[0]
    dydx = dy/dx
    return dydx, x[1:-1]

y1, x1 = timingPolyfit(x,y)
y2, x2 = timingSimple(x,y)

polyfitError = np.abs(y1 - np.cos(x1))
simpleError = np.abs(y2 - np.cos(x2))

print("polyfit average error: {:.2e}".format(np.average(polyfitError)))
print("simple average error: {:.2e}".format(np.average(simpleError)))

result = %timeit -o timingPolyfit(x,y)
result2 = %timeit -o timingSimple(x,y)

print("simple is {0} times faster".format(result.best / result2.best))

polyfit average error: 3.09e-03 
simple average error: 1.09e-05 
100 loops, best of 3: 3.2 ms per loop 
100000 loops, best of 3: 9.46 µs per loop 
simple is 337.995634151131 times faster 

Relative error: Relative error

Results: Results-closeup

+1
source

All Articles