If performance matters to you, make sure you time:
import sys import timeit import pandas as pd print('Python %s on %s' % (sys.version, sys.platform)) print('Pandas version %s' % pd.__version__) repeat = 3 numbers = 100 def time(statement, _setup=None): print (min( timeit.Timer(statement, setup=_setup or setup).repeat( repeat, numbers))) print("Format %m/%d/%y") setup = """import pandas as pd import io data = io.StringIO('''\ ProductCode,Date ''' + '''\ x1,07/29/15 x2,07/29/15 x3,07/29/15 x4,07/30/15 x5,07/29/15 x6,07/29/15 x7,07/29/15 y7,08/05/15 x8,08/05/15 z3,08/05/15 ''' * 100)""" time('pd.read_csv(data); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"]); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"],' 'infer_datetime_format=True); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"],' 'date_parser=lambda x: pd.datetime.strptime(x, "%m/%d/%y")); data.seek(0)') print("Format %Y-%m-%d %H:%M:%S") setup = """import pandas as pd import io data = io.StringIO('''\ ProductCode,Date ''' + '''\ x1,2016-10-15 00:00:43 x2,2016-10-15 00:00:56 x3,2016-10-15 00:00:56 x4,2016-10-15 00:00:12 x5,2016-10-15 00:00:34 x6,2016-10-15 00:00:55 x7,2016-10-15 00:00:06 y7,2016-10-15 00:00:01 x8,2016-10-15 00:00:00 z3,2016-10-15 00:00:02 ''' * 1000)""" time('pd.read_csv(data); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"]); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"],' 'infer_datetime_format=True); data.seek(0)') time('pd.read_csv(data, parse_dates=["Date"],' 'date_parser=lambda x: pd.datetime.strptime(x, "%Y-%m-%d %H:%M:%S")); data.seek(0)')
prints:
Python 3.7.1 (v3.7.1:260ec2c36a, Oct 20 2018, 03:13:28) [Clang 6.0 (clang-600.0.57)] on darwin Pandas version 0.23.4 Format %m/%d/%y 0.19123052499999993 8.20691274 8.143124389 1.2384357139999977 Format %Y-%m-%d %H:%M:%S 0.5238807110000039 0.9202787830000005 0.9832778819999959 12.002349824999996
So, with a date in iso8601 format ( %Y-%m-%d %H:%M:%S is apparently a date in iso8601 format, I think T can be removed and replaced with a space), you shouldn't specify infer_datetime_format (which has nothing to do with the more common or, apparently,) and passing your own analyzer only as crippled performance. On the other hand, date_parser really matters with non-standard day formats. Be sure to time before you optimize as usual.