You can use list comprehension with concat and all :
import numpy as np import pandas as pd np.random.seed(123) x = pd.Series(np.random.randint(0,2,10), dtype='category') x.cat.categories = ['no', 'yes'] y = pd.Series(np.random.randint(0,2,10), dtype='category') y.cat.categories = ['no', 'yes'] z = pd.Series(np.random.randint(0,2,10), dtype='category') z.cat.categories = ['male', 'female'] a = pd.Series(np.random.randint(20,60,10), dtype='category') data = pd.DataFrame({'risk':x, 'smoking':y, 'sex':z, 'age':a}) print (data) age risk sex smoking 0 24 no male yes 1 23 yes male yes 2 22 no female no 3 40 no female yes 4 59 no female no 5 22 no male yes 6 40 no female no 7 27 yes male yes 8 55 yes male yes 9 48 no male no
tmp = {'risk':'no', 'smoking':'yes', 'sex':'female'} mask = pd.concat([data[x[0]].eq(x[1]) for x in tmp.items()], axis=1).all(axis=1) print (mask) 0 False 1 False 2 False 3 True 4 False 5 False 6 False 7 False 8 False 9 False dtype: bool df1 = data[mask] print (df1) age risk sex smoking 3 40 no female yes
L = [(x[0], x[1]) for x in tmp.items()] print (L) [('smoking', 'yes'), ('sex', 'female'), ('risk', 'no')] L = pd.concat([data[x[0]].eq(x[1]) for x in tmp.items()], axis=1) print (L) smoking sex risk 0 True False True 1 True False False 2 False True True 3 True True True 4 False True True 5 True False True 6 False True True 7 True False False 8 True False False 9 False False True
Dates :
len(data)=1M .
N = 1000000 np.random.seed(123) x = pd.Series(np.random.randint(0,2,N), dtype='category') x.cat.categories = ['no', 'yes'] y = pd.Series(np.random.randint(0,2,N), dtype='category') y.cat.categories = ['no', 'yes'] z = pd.Series(np.random.randint(0,2,N), dtype='category') z.cat.categories = ['male', 'female'] a = pd.Series(np.random.randint(20,60,N), dtype='category') data = pd.DataFrame({'risk':x, 'smoking':y, 'sex':z, 'age':a}) #[1000000 rows x 4 columns] print (data) tmp = {'risk':'no', 'smoking':'yes', 'sex':'female'} In [133]: %timeit (data[pd.concat([data[x[0]].eq(x[1]) for x in tmp.items()], axis=1).all(axis=1)]) 10 loops, best of 3: 89.1 ms per loop In [134]: %timeit (data.query(' and '.join(["{} == '{}'".format(k,v) for k,v in tmp.items()]))) 1 loop, best of 3: 237 ms per loop In [135]: %timeit (pd.merge(pd.DataFrame(tmp, index =[0]), data.reset_index()).set_index('index')) 1 loop, best of 3: 256 ms per loop