We have the numily get_pos_neg_bitwise vector function, which uses the mask = [132 20 192] and df.shape (500e3, 4), which we want to speed up with numba.
from numba import jit import numpy as np from time import time def get_pos_neg_bitwise(df, mask): """ In [1]: print mask [132 20 192] In [1]: print df [[ 1 162 97 41] [ 0 136 135 171] ..., [ 0 245 30 73]] """ check = (np.bitwise_and(mask, df[:, 1:]) == mask).all(axis=1) pos = (df[:, 0] == 1) & check neg = (df[:, 0] == 0) & check pos = np.nonzero(pos)[0] neg = np.nonzero(neg)[0] return (pos, neg)
Using the advice from @morningsun, we made this version of numba:
@jit(nopython=True) def numba_get_pos_neg_bitwise(df, mask): posneg = np.zeros((df.shape[0], 2)) for idx in range(df.shape[0]): vandmask = np.bitwise_and(df[idx, 1:], mask) # numba fail with # if np.all(vandmask == mask): vandm_equal_m = 1 for i, val in enumerate(vandmask): if val != mask[i]: vandm_equal_m = 0 break if vandm_equal_m == 1: if df[idx, 0] == 1: posneg[idx, 0] = 1 else: posneg[idx, 1] = 1 pos = list(np.nonzero(posneg[:, 0])[0]) neg = list(np.nonzero(posneg[:, 1])[0]) return (pos, neg)
But it is still 3 times slower than numpy one (~ 0.06s Vs ~ 0.02s).
if __name__ == '__main__': df = np.array(np.random.randint(256, size=(int(500e3), 4))) df[:, 0] = np.random.randint(2, size=(1, df.shape[0])) # set target to 0 or 1 mask = np.array([132, 20, 192]) start = time() pos, neg = get_pos_neg_bitwise(df, mask) msg = '==> pos, neg made; p={}, n={} in [{:.4} s] numpy' print msg.format(len(pos), len(neg), time() - start) start = time() msg = '==> pos, neg made; p={}, n={} in [{:.4} s] numba' pos, neg = numba_get_pos_neg_bitwise(df, mask) print msg.format(len(pos), len(neg), time() - start) start = time() pos, neg = numba_get_pos_neg_bitwise(df, mask) print msg.format(len(pos), len(neg), time() - start)
Did I miss something?
In [1]: %run numba_test2.py ==> pos, neg made; p=3852, n=3957 in [0.02306 s] numpy ==> pos, neg made; p=3852, n=3957 in [0.3492 s] numba ==> pos, neg made; p=3852, n=3957 in [0.06425 s] numba In [1]: