Is Cython code 3-4 times slower than Python / Numpy code?

I am trying to convert Python / Numpy code to Cython code for speed. However, Cython is MUCH slower (3-4 times) than Python / Numpy code. Am I using Cython correctly? Am I passing myc_rb_etc () arguments correctly in my Cython code? How about when I call the integration function? Thanks in advance for your help. Here is my Python / Numpy code:

from pylab import * import pylab as pl from numpy import * import numpy as np from scipy import integrate def myc_rb_e2f(y,t,k,d): M = y[0] E = y[1] CD = y[2] CE = y[3] R = y[4] RP = y[5] RE = y[6] S = 0.01 if t > 300: S = 5.0 #if t > 400 #S = 0.01 t1 = k[0]*S/(k[7]+S); t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E)); t3 = k[5]*M/(k[14]+M); t4 = k[11]*CD*RE/(k[16]+RE); t5 = k[12]*CE*RE/(k[17]+RE); t6 = k[2]*M/(k[14]+M); t7 = k[3]*S/(k[7]+S); t8 = k[6]*E/(k[15]+E); t9 = k[13]*RP/(k[18]+RP); t10 = k[9]*CD*R/(k[16]+R); t11 = k[10]*CE*R/(k[17]+R); dM = t1-d[0]*M dE = t2+t3+t4+t5-k[8]*R*Ed[1]*E dCD = t6+t7-d[2]*CD dCE = t8-d[3]*CE dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R dRP = t10+t11+t4+t5-t9-d[5]*RP dRE = k[8]*R*E-t4-t5-d[6]*RE dy = [dM,dE,dCD,dCE,dR,dRP,dRE] return dy t = np.zeros(10000) t = np.linspace(0.,3000.,10000.) # Initial concentrations of [M,E,CD,CE,R,RP,RE] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25]) E_simulated = np.zeros([10000,5000]) E_avg = np.zeros([10000]) k = np.zeros([19]) d = np.zeros([7]) for i in range (0,5000): k[0] = 1.+0.1*randn(1) k[1] = 0.15+0.05*randn(1) k[2] = 0.2+0.05*randn(1) k[3] = 0.2+0.05*randn(1) k[4] = 0.35+0.05*randn(1) k[5] = 0.001+0.0001*randn(1) k[6] = 0.5+0.05*randn(1) k[7] = 0.3+0.05*randn(1) k[8] = 30.+5.*randn(1) k[9] = 18.+3.*randn(1) k[10] = 18.+3.*randn(1) k[11] = 18.+3.*randn(1) k[12] = 18.+3.*randn(1) k[13] = 3.6+0.5*randn(1) k[14] = 0.15+0.05*randn(1) k[15] = 0.15+0.05*randn(1) k[16] = 0.92+0.1*randn(1) k[17] = 0.92+0.1*randn(1) k[18] = 0.01+0.001*randn(1) d[0] = 0.7+0.05*randn(1) d[1] = 0.25+0.025*randn(1) d[2] = 1.5+0.05*randn(1) d[3] = 1.5+0.05*randn(1) d[4] = 0.06+0.01*randn(1) d[5] = 0.06+0.01*randn(1) d[6] = 0.03+0.005*randn(1) r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d)) E_simulated[:,i] = r[:,1] for i in range(0,10000): E_avg[i] = sum(E_simulated[i,:])/5000. pl.plot(t,E_avg,'-ro') pl.show() 

Here is the code converted to Cython:

 cimport numpy as np import numpy as np from numpy import * import pylab as pl from pylab import * from scipy import integrate def myc_rb_e2f(y,t,k,d): cdef double M = y[0] cdef double E = y[1] cdef double CD = y[2] cdef double CE = y[3] cdef double R = y[4] cdef double RP = y[5] cdef double RE = y[6] cdef double S = 0.01 if t > 300.0: S = 5.0 #if t > 400 #S = 0.01 cdef double t1 = k[0]*S/(k[7]+S) cdef double t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E)) cdef double t3 = k[5]*M/(k[14]+M) cdef double t4 = k[11]*CD*RE/(k[16]+RE) cdef double t5 = k[12]*CE*RE/(k[17]+RE) cdef double t6 = k[2]*M/(k[14]+M) cdef double t7 = k[3]*S/(k[7]+S) cdef double t8 = k[6]*E/(k[15]+E) cdef double t9 = k[13]*RP/(k[18]+RP) cdef double t10 = k[9]*CD*R/(k[16]+R) cdef double t11 = k[10]*CE*R/(k[17]+R) cdef double dM = t1-d[0]*M cdef double dE = t2+t3+t4+t5-k[8]*R*Ed[1]*E cdef double dCD = t6+t7-d[2]*CD cdef double dCE = t8-d[3]*CE cdef double dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R cdef double dRP = t10+t11+t4+t5-t9-d[5]*RP cdef double dRE = k[8]*R*E-t4-t5-d[6]*RE dy = [dM,dE,dCD,dCE,dR,dRP,dRE] return dy def main(): cdef np.ndarray[double,ndim=1] t = np.zeros(10000) t = np.linspace(0.,3000.,10000.) # Initial concentrations of [M,E,CD,CE,R,RP,RE] cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25]) cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([10000,5000]) cdef np.ndarray[double,ndim=2] r = np.zeros([10000,7]) cdef np.ndarray[double,ndim=1] E_avg = np.zeros([10000]) cdef np.ndarray[double,ndim=1] k = np.zeros([19]) cdef np.ndarray[double,ndim=1] d = np.zeros([7]) cdef int i for i in range (0,5000): k[0] = 1.+0.1*randn(1) k[1] = 0.15+0.05*randn(1) k[2] = 0.2+0.05*randn(1) k[3] = 0.2+0.05*randn(1) k[4] = 0.35+0.05*randn(1) k[5] = 0.001+0.0001*randn(1) k[6] = 0.5+0.05*randn(1) k[7] = 0.3+0.05*randn(1) k[8] = 30.+5.*randn(1) k[9] = 18.+3.*randn(1) k[10] = 18.+3.*randn(1) k[11] = 18.+3.*randn(1) k[12] = 18.+3.*randn(1) k[13] = 3.6+0.5*randn(1) k[14] = 0.15+0.05*randn(1) k[15] = 0.15+0.05*randn(1) k[16] = 0.92+0.1*randn(1) k[17] = 0.92+0.1*randn(1) k[18] = 0.01+0.001*randn(1) d[0] = 0.7+0.05*randn(1) d[1] = 0.25+0.025*randn(1) d[2] = 1.5+0.05*randn(1) d[3] = 1.5+0.05*randn(1) d[4] = 0.06+0.01*randn(1) d[5] = 0.06+0.01*randn(1) d[6] = 0.03+0.005*randn(1) r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d)) E_simulated[:,i] = r[:,1] for i in range(0,10000): E_avg[i] = sum(E_simulated[i,:])/5000. pl.plot(t,E_avg,'-ro') pl.show() 

Here are a few pstats from cProfile of my Python / Numpy code:

ncalls tottime percall cumtime percall

5000 82.505 0.017 236.760 0.047 {scipy.integrate._odepack.odeint}

1 1.504 1.504 238.949 238.949 myc_rb_e2f.py:1(<module>)

5000 0.025 0.000 236.855 0.047 C:\Python27\lib\site-packages\scipy\integrate\odepack.py:18(odeint)

12291237 154.255 0.000 154.255 0.000 myc_rb_e2f.py:7(myc_rb_e2f)

+6
source share
2 answers

Modify the function definition to include parameter types:

 def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d): 

This will improve runtime by about 3 times compared to the numpy implementation and 6-7 times compared to your initial cython implementation. Just FYI, I lowered the number of iterations, so I didn't have to wait long for it to finish testing. It should increase with the desired number of iterations.

 [ pkerp@plastilin so]$ time python run_numpy.py real 0m47.572s user 0m45.702s sys 0m0.049s [ pkerp@plastilin so]$ time python run_cython1.py real 1m14.851s user 1m12.308s sys 0m0.135s [ pkerp@plastilin so]$ time python run_cython2.py real 0m15.774s user 0m14.115s sys 0m0.105s 

Edit:

In addition, you do not need to create a new array every time you want to return results from myc_rb_e2f . You can simply declare an array of results in main , pass it on every call, and then populate it. This will save you a lot of unnecessary highlighting. This is more than double the previous best run time:

 [ pkerp@plastilin so]$ time python run_cython3.py real 0m6.165s user 0m4.818s sys 0m0.152s 

And the code:

 cimport numpy as np import numpy as np from numpy import * import pylab as pl from pylab import * from scipy import integrate def myc_rb_e2f(np.ndarray[double,ndim=1]y, double t, np.ndarray[double, ndim=1] k, np.ndarray[double, ndim=1] d, np.ndarray[double, ndim=1] res): cdef double S = 0.01 if t > 300.0: S = 5.0 #if t > 400 #S = 0.01 cdef double t1 = k[0]*S/(k[7]+S) cdef double t2 = k[1]*(y[0]/(k[14]+y[0]))*(y[1]/(k[15]+y[1])) cdef double t3 = k[5]*y[0]/(k[14]+y[0]) cdef double t4 = k[11]*y[2]*y[6]/(k[16]+y[6]) cdef double t5 = k[12]*y[3]*y[6]/(k[17]+y[6]) cdef double t6 = k[2]*y[0]/(k[14]+y[0]) cdef double t7 = k[3]*S/(k[7]+S) cdef double t8 = k[6]*y[1]/(k[15]+y[1]) cdef double t9 = k[13]*y[5]/(k[18]+y[5]) cdef double t10 = k[9]*y[2]*y[4]/(k[16]+y[4]) cdef double t11 = k[10]*y[3]*y[4]/(k[17]+y[4]) cdef double dM = t1-d[0]*y[0] cdef double dE = t2+t3+t4+t5-k[8]*y[4]*y[1]-d[1]*y[1] cdef double dCD = t6+t7-d[2]*y[2] cdef double dCE = t8-d[3]*y[3] cdef double dR = k[4]+t9-k[8]*y[4]*y[1]-t10-t11-d[4]*y[4] cdef double dRP = t10+t11+t4+t5-t9-d[5]*y[5] cdef double dRE = k[8]*y[4]*y[1]-t4-t5-d[6]*y[6] res[0] = dM res[1] = dE res[2] = dCD res[3] = dCE res[4] = dR res[5] = dRP res[6] = dRE return res def main(): cdef np.ndarray[double,ndim=1] t = np.zeros(467) cdef np.ndarray[double,ndim=1] results = np.zeros(7) t = np.linspace(0.,3000.,467.) # Initial concentrations of [M,E,CD,CE,R,RP,RE] cdef np.ndarray[double,ndim=1] y0 = np.array([0.,0.,0.,0.,0.4,0.,0.25]) cdef np.ndarray[double,ndim=2] E_simulated = np.zeros([467,554]) cdef np.ndarray[double,ndim=2] r = np.zeros([467,7]) cdef np.ndarray[double,ndim=1] E_avg = np.zeros([467]) cdef np.ndarray[double,ndim=1] k = np.zeros([19]) cdef np.ndarray[double,ndim=1] d = np.zeros([7]) cdef int i for i in range (0,554): k[0] = 1.+0.1*randn(1) k[1] = 0.15+0.05*randn(1) k[2] = 0.2+0.05*randn(1) k[3] = 0.2+0.05*randn(1) k[4] = 0.35+0.05*randn(1) k[5] = 0.001+0.0001*randn(1) k[6] = 0.5+0.05*randn(1) k[7] = 0.3+0.05*randn(1) k[8] = 30.+5.*randn(1) k[9] = 18.+3.*randn(1) k[10] = 18.+3.*randn(1) k[11] = 18.+3.*randn(1) k[12] = 18.+3.*randn(1) k[13] = 3.6+0.5*randn(1) k[14] = 0.15+0.05*randn(1) k[15] = 0.15+0.05*randn(1) k[16] = 0.92+0.1*randn(1) k[17] = 0.92+0.1*randn(1) k[18] = 0.01+0.001*randn(1) d[0] = 0.7+0.05*randn(1) d[1] = 0.25+0.025*randn(1) d[2] = 1.5+0.05*randn(1) d[3] = 1.5+0.05*randn(1) d[4] = 0.06+0.01*randn(1) d[5] = 0.06+0.01*randn(1) d[6] = 0.03+0.005*randn(1) r = integrate.odeint(myc_rb_e2f,y0,t,args=(k,d,results)) E_simulated[:,i] = r[:,1] for i in range(0,467): E_avg[i] = sum(E_simulated[i,:])/554. #pl.plot(t,E_avg,'-ro') #pl.show() if __name__ == "__main__": main() 
+10
source

disclaimer: I am one of the main developers of the mentioned tool

As an alternative, extracting the myc_rb_e2f function into a single file and compiling with pythran gives x5 acceleration on my side.

Added dedicated Python code (type signature only):

 #pythran export myc_rb_e2f(float[], float, float[], float[]) def myc_rb_e2f(y,t,k,d): M = y[0] E = y[1] CD = y[2] CE = y[3] R = y[4] RP = y[5] RE = y[6] S = 0.01 if t > 300: S = 5.0 #if t > 400 #S = 0.01 t1 = k[0]*S/(k[7]+S); t2 = k[1]*(M/(k[14]+M))*(E/(k[15]+E)); t3 = k[5]*M/(k[14]+M); t4 = k[11]*CD*RE/(k[16]+RE); t5 = k[12]*CE*RE/(k[17]+RE); t6 = k[2]*M/(k[14]+M); t7 = k[3]*S/(k[7]+S); t8 = k[6]*E/(k[15]+E); t9 = k[13]*RP/(k[18]+RP); t10 = k[9]*CD*R/(k[16]+R); t11 = k[10]*CE*R/(k[17]+R); dM = t1-d[0]*M dE = t2+t3+t4+t5-k[8]*R*Ed[1]*E dCD = t6+t7-d[2]*CD dCE = t8-d[3]*CE dR = k[4]+t9-k[8]*R*E-t10-t11-d[4]*R dRP = t10+t11+t4+t5-t9-d[5]*RP dRE = k[8]*R*E-t4-t5-d[6]*RE dy = [dM,dE,dCD,dCE,dR,dRP,dRE] return dy 

compiled with:

 > pythran myc.py 

And it is imported using from myc import myc_rb_e2f instead of defining a function.

The source code runs at 103.46s on my laptop, the one that calls the routine compiled by pythran starts at 22.23s .

+1
source

All Articles