For xlsx, I like the solution posted earlier as https://stackoverflow.com/a/166189/ I use only modules from the standard library.
def xlsx(fname): import zipfile from xml.etree.ElementTree import iterparse z = zipfile.ZipFile(fname) strings = [el.text for e, el in iterparse(z.open('xl/sharedStrings.xml')) if el.tag.endswith('}t')] rows = [] row = {} value = '' for e, el in iterparse(z.open('xl/worksheets/sheet1.xml')): if el.tag.endswith('}v'): value = el.text if el.tag.endswith('}c'): if el.attrib.get('t') == 's': value = strings[int(value)] letter = el.attrib['r'] while letter[-1].isdigit(): letter = letter[:-1] row[letter] = value value = '' if el.tag.endswith('}row'): rows.append(row) row = {} return rows
Improvements added: loading contents by sheet name, using re to get a column, and checking if shared rows are used.
def xlsx(fname,sheet): import zipfile from xml.etree.ElementTree import iterparse import re z = zipfile.ZipFile(fname) if 'xl/sharedStrings.xml' in z.namelist():
Hans de Ridder Oct 28 '18 at 11:53 2018-10-28 11:53
source share