My approach is a regex, as it can have more control over the pattern -
File contents
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
Main_data1;a;b;c;dss;e;1
Main_data2;aa;bb;sdc;d;e;2
Main_data3;aaa;bbb;ccce;d;e;3
Main_data4;aaaa;bbbb;cc;d;e;4
Main_data5;aaaaa;bbbbb;cccc;d;e;523233
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
Main_data1;a;b;c;dss;e;1
Main_data2;aa;bb;sdc;d;e;2
Main_data3;aaa;bbb;ccce;d;e;3
Main_data4;aaaa;bbbb;cc;d;e;4
Main_data5;aaaaa;bbbbb;cccc;d;e;523233
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ******** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
Main_data1;a;b;c;dss;e;1
Main_data2;aa;bb;sdc;d;e;2
Main_data3;aaa;bbb;ccce;d;e;3
Main_data4;aaaa;bbbb;cc;d;e;4
Main_data5;aaaaa;bbbbb;cccc;d;e;523233
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ******** SOME JUNK DATA ****
**** SOME JUNK DATA ****
**** SOME JUNK DATA ****
the code
import re
fl = open(r"C:\text.txt",'rb')
pattern = r'Main_data.*(?<=;)([0-9]{1,})'
data = []
for line in fl.readlines():
if re.search(pattern, line, re.IGNORECASE | re.MULTILINE):
data.append(re.search(pattern, line, re.IGNORECASE | re.MULTILINE).group(1))
else:
data.append('N')
strng = ','.join(data)
lsts = re.findall(r'(?<=,)[0-9,]+(?=,)',strng)
outpt = [i.split(',') for i in lsts]
print outpt
Output
[['1', '2', '3', '4', '523233'], ['1', '2', '3', '4', '523233'], ['1', '2', '3', '4', '523233']]
source
share