You can read csv in pieces. Since pd.read_csv returns an iterator when the chunksize parameter is chunksize , you can use itertools.takewhile to read only as many blocks as you need without reading the entire file.
import itertools as IT import pandas as pd chunksize = 10 ** 5 chunks = pd.read_csv(filename, chunksize=chunksize, header=None) chunks = IT.takewhile(lambda chunk: chunk['B'].iloc[-1] < 10, chunks) df = pd.concat(chunks) mask = df['B'] < 10 df = df.loc[mask]
Or, to avoid using df.loc[mask] to remove unnecessary lines from the last fragment, perhaps a cleaner solution would be to define a custom generator:
import itertools as IT import pandas as pd def valid(chunks): for chunk in chunks: mask = chunk['B'] < 10 if mask.all(): yield chunk else: yield chunk.loc[mask] break chunksize = 10 ** 5 chunks = pd.read_csv(filename, chunksize=chunksize, header=None) df = pd.concat(valid(chunks))
source share