Pandas Bad Lines Crash Warning

Is there a way in Pandas to capture the warning generated by setting error_bad_lines = False and warn_bad_lines = True? For example, the following script:

import pandas as pd
from StringIO import StringIO
data = StringIO("""a,b,c
                   1,2,3
                   4,5,6
                   6,7,8,9
                   1,2,5
                   3,4,5""")
pd.read_csv(data, warn_bad_lines=True, error_bad_lines=False)

gives a warning:

Skipping line 4: expected 3 fields, saw 4

I would like to keep this output in a line so that I can eventually write it to a log file in order to keep track of missing entries.

I tried to use the warning module , but it does not look as if this “warning” has traditional meaning. I am using Python 2.7 and Pandas 0.16.

Any help would be greatly appreciated.

+4
source share
1 answer

, pandas.
source1, source2

:

1.

import pandas as pd
import csv      

df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)

#compare length of rows by recommended value:
RECOMMENDED = 3

with open('data.csv') as csv_file:
    reader = csv.reader(csv_file, delimiter=',')
    for row in reader:
        if (len(row) != RECOMMENDED):
            print ("Length of row is: %r" % len(row) )
            print row

#compare length of rows by length of columns in df
lencols = len(df.columns)
print lencols

with open('data.csv') as csv_file:
    reader = csv.reader(csv_file, delimiter=',')
    for row in reader:
        if (len(row) != lencols):
            print ("Length of row is: %r" % len(row) )
            print row

2. sys.stdout

import pandas as pd
import os
import sys

class RedirectStdStreams(object):
    def __init__(self, stdout=None, stderr=None):
        self._stdout = stdout or sys.stdout
        self._stderr = stderr or sys.stderr

    def __enter__(self):
        self.old_stdout, self.old_stderr = sys.stdout, sys.stderr
        self.old_stdout.flush(); self.old_stderr.flush()
        sys.stdout, sys.stderr = self._stdout, self._stderr

    def __exit__(self, exc_type, exc_value, traceback):
        self._stdout.flush(); self._stderr.flush()
        sys.stdout = self.old_stdout
        sys.stderr = self.old_stderr


if __name__ == '__main__':

    devnull = open('log.txt', 'w')

    #replaces sys.stdout, sys.stderr, see http://stackoverflow.com/a/6796752/2901002
    with RedirectStdStreams(stdout=devnull, stderr=devnull):
        df = pd.read_csv('data.csv', warn_bad_lines=True, error_bad_lines=False)
+3

All Articles