C # 2.0 The fastest way to parse an Excel spreadsheet

Possible duplicate:
Reading Excel files with C #

What is the fastest way to read large datasets from excel from Csharp. Sample code will be great.,

+4
source share
2 answers

In our desktop environment, I achieved the best combination of performance, flexibility, and stability using Excel through COM.

Access to Excel is always through one stream.

I use late binding (in VB.Net) to make my version of the application independent.

The rest of the application is developed in C #, only this part and some other small parts are in VB, because they are easier in VB.Net.

Dim workBook As Object = GetObject(fileName) Dim workSheet As Object = workBook.WorkSheets.Item(WorkSheetNr) Dim range As Object = workSheet.Cells.Item(1, 1) Dim range2 As Object = range.CurrentRegion Dim rrow As Integer = range2.Row ' For XL97, first convert to integer. XL97 will generate an error ' Dim rcolumn As Integer = range2.Column Dim top As Object = workSheet.Cells.Item(rrow, rcolumn) Dim bottom As Object = top.Offset(range2.Rows.Count - 1, range2.Columns.Count - 1) range = workSheet.Range(top, bottom) Dim values As Object(,) values = range.Value 

Here you have a two-dimensional array containing values ​​from Excel. The last statement gets data from Excel to .Net.

Since the limits on the size of the Excel worksheet, they cannot be very large, so memory should not be a problem.

We conducted several performance tests on several systems. It is optimized to make as few (out-of-process) COM calls as possible.

This method provided us with better performance, especially because the data is located directly in the array, and access to this data is faster than through a data set.

In this solution, Excel starts slowly. But if you need to process several files immediately after each other, the cost of running Excel is only done once.

Also, I would not use this solution in a server environment.

+2
source
 public class ExcelHeaderValues { public static string CUSIP = "CUSIP"; public static string ORIG = "ORIG"; public static string PRICE = "PRICE"; public static int COLUMNCOUNT = 3; } public class ExcelParser { private ArrayList collOutput = null; string sSheetName = String.Empty; string[] strValidColumn; int validRowCnt = 0; string sColumnPositions = String.Empty; OleDbCommand ExcelCommand; OleDbDataAdapter ExcelAdapter; OleDbConnection ExcelConnection = null; DataSet dsSheet = null; string path = string.Empty; string identifier = string.Empty; public ExcelParser() { collOutput = new ArrayList(); } public void Extract() { bool headermatch = false; string strCusip = string.Empty, strOrig = string.Empty, strPrice = string.Empty, strData = string.Empty; string strCusipPos = string.Empty, strPricePos = string.Empty, strOrigPos = string.Empty; string strColumnHeader = String.Empty; int reqColcount = 0; string[] strTemp; bool bTemp = false; bool validRow = false; DataTable schemaTable = GetSchemaTable(); validRowCnt = 0; foreach (DataRow dr in schemaTable.Rows) { if (dsSheet != null) { dsSheet.Reset(); dsSheet = null; } strCusipPos = string.Empty; strOrigPos = string.Empty; strPricePos = string.Empty; if (isValidSheet(dr)) { sColumnPositions = string.Empty; validRowCnt = 0; foreach (DataRow dataRow in dsSheet.Tables[0].Rows) { sColumnPositions = string.Empty; if (headermatch == false) { sColumnPositions = string.Empty; foreach (DataColumn column in dsSheet.Tables[0].Columns) { strColumnHeader = dataRow[column.ColumnName].ToString().ToUpper().Trim(); strColumnHeader = strColumnHeader.ToUpper(); if (strColumnHeader == ExcelHeaderValues.ORIG.ToUpper() || strColumnHeader == ExcelHeaderValues.CUSIP.ToUpper() || strColumnHeader == ExcelHeaderValues.PRICE.ToUpper()) { bTemp = true; validRow = true; reqColcount = ExcelHeaderValues.COLUMNCOUNT; } if (bTemp) { bTemp = false; sColumnPositions += column.ColumnName + "^" + strColumnHeader + ";"; } } strValidColumn = sColumnPositions.Trim().Split(';'); if (validRow == true && reqColcount == strValidColumn.Length - 1) { headermatch = true; break; } validRowCnt++; } } if (headermatch == true) { try { if (dsSheet.Tables[0].Rows.Count > 0) { if (strValidColumn.Length > 0) { for (int i = 0; i < strValidColumn.Length - 1; i++) { if (strValidColumn[i].ToUpper().Contains("CUSIP")) { strTemp = strValidColumn[i].ToString().Split('^'); strCusipPos = strTemp[0].ToString(); strTemp = null; } if (strValidColumn[i].ToUpper().Contains("PRICE")) { strTemp = strValidColumn[i].ToString().Split('^'); strPricePos = strTemp[0].ToString(); strTemp = null; } if (strValidColumn[i].ToUpper().Contains("ORIG")) { strTemp = strValidColumn[i].ToString().Split('^'); strOrigPos = strTemp[0].ToString(); strTemp = null; } } } for (int iData = validRowCnt; iData < dsSheet.Tables[0].Rows.Count; iData++) { if (strCusipPos.Trim() != "") strCusip = dsSheet.Tables[0].Rows[iData][strCusipPos].ToString().Trim(); if (strOrigPos.Trim() != "") strOrig = dsSheet.Tables[0].Rows[iData][strOrigPos].ToString().Trim(); if (strPricePos.Trim() != "") strPrice = dsSheet.Tables[0].Rows[iData][strPricePos].ToString().Trim().ToUpper(); strData = ""; if (strCusip.Length == 9 && strCusip != "" && strPrice != "" && strOrig != "" && !strPrice.ToUpper().Contains("SOLD")) strData = strCusip + "|" + Convert.ToDouble(strOrig) * 1000000 + "|" + strPrice + "|||||"; if (strData != null && strData != "") collOutput.Add(strData); strCusip = string.Empty; strOrig = string.Empty; strPrice = string.Empty; strData = string.Empty; } } } catch (Exception ex) { throw ex; } } headermatch = false; sColumnPositions = string.Empty; strColumnHeader = string.Empty; } } } private bool isValidSheet(DataRow dr) { bool isValidSheet = false; sSheetName = dr[2].ToString().ToUpper(); if (!(sSheetName.Contains("$_")) && !(sSheetName.Contains("$'_")) && (!sSheetName.Contains("Print_Titles".ToUpper())) && (dr[3].ToString() == "TABLE" && ((!sSheetName.Contains("Print_Area".ToUpper())))) && !(sSheetName.ToUpper() == "DLOFFERLOOKUP")) { if (sSheetName.Trim().ToUpper() != "Disclaimer$".ToUpper() && sSheetName.Trim().ToUpper() != "Summary$".ToUpper() && sSheetName.Trim().ToUpper() != "FORMULAS$" ) { string sQry = string.Empty; sQry = "SELECT * FROM [" + sSheetName + "]"; ExcelCommand = new OleDbCommand(sQry, ExcelConnection); dsSheet = new DataSet(); ExcelAdapter = new OleDbDataAdapter(ExcelCommand); isValidSheet = false; try { ExcelAdapter.Fill(dsSheet, sSheetName); isValidSheet = true; } catch (Exception ex) { isValidSheet = false; throw new Exception(ex.Message.ToString()); } finally { ExcelAdapter = null; ExcelCommand = null; } } } return isValidSheet; } private DataTable GetSchemaTable() { DataTable dt = null; string connectionString = String.Empty; connectionString = GetConnectionString(); ExcelConnection = new OleDbConnection(connectionString); try { ExcelConnection.Open(); dt = ExcelConnection.GetOleDbSchemaTable(OleDbSchemaGuid.Tables, new object[] { null, null, null, "TABLE" }); } catch (Exception ex) { throw ex; } return dt; } private string GetConnectionString() { string connStr = String.Empty; try { if (path.ToLower().Contains(".xlsx")) { connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0;HDR=No;IMEX=1;'"; } else if (path.ToLower().Contains(".xlsm")) { connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='Excel 12.0 Macro;HDR=No;IMEX=1;'"; } else if (path.ToLower().Contains(".xls")) { connStr = "provider=Microsoft.Jet.OLEDB.4.0;Data Source='" + path + "';Extended Properties='Excel 8.0;HDR=No;IMEX=1;'"; } else {connStr = "Provider=Microsoft.ACE.OLEDB.12.0;" + "Data Source='" + path + "';" + "Extended Properties='HTML Import;IMEX=1;HDR=No;'"; } } catch (Exception ex) { throw ex; } return connStr; } } 
+2
source

All Articles