Reading restricted rows from an Excel file loaded in IIS

I have an asp.net mvc application hosted on IIS. I have a form from where users upload excel files containing 50k + rows. I read the excel file with the following C # code.

public DataTable GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn)) { var ds = new DataSet(); adapter.Fill(ds); DataTable data = ds.Tables[0]; conn.Close(); conn.Dispose(); adapter.Dispose(); return data; } } 

The problem is that it reads only up to 30 thousand lines, but not the entire excel file.

Interestingly, I can read (with the same code) all the lines if I run the mvc application with visual studio, but, again, never from the IIS site (IIS also on my machine).

Any ideas why this is happening?

+8
c # asp.net-mvc excel iis oledb
source share
5 answers

in this approach there is no need to install excel on the target machine

  NPOI.SS.UserModel.IWorkbook hssfworkbook; bool InitializeWorkbook(string path) { try { if (path.ToLower().EndsWith(".xlsx")) { FileStream file1 = File.OpenRead(path); hssfworkbook = new XSSFWorkbook(file1); } else { //read the template via FileStream, it is suggested to use FileAccess.Read to prevent file lock. //book1.xls is an Excel-2007-generated file, so some new unknown BIFF records are added. using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read)) { hssfworkbook = new HSSFWorkbook(file); } } return true; } catch { return false; } } 

In the following:

 public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (XSSFRow)rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null) { dr[i - 1] = null; } else if (i > 0) { dr[i - 1] = cell.ToString(); } } dt.Rows.Add(dr); } return dt; } 

or

  public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (HSSFRow)rows.Current; else row = (XSSFRow)rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null && i > 0) { dr[i - 1] = null; } else if (i > 0) { switch (cell.CellType) { case CellType.Blank: dr[i - 1] = "[null]"; break; case CellType.Boolean: dr[i - 1] = cell.BooleanCellValue; break; case CellType.Numeric: dr[i - 1] = cell.ToString(); break; case CellType.String: dr[i - 1] = cell.StringCellValue; break; case CellType.Error: dr[i - 1] = cell.ErrorCellValue; break; case CellType.Formula: default: dr[i - 1] = "=" + cell.CellFormula; break; } } } dt.Rows.Add(dr); } return dt; } 

or:

 public DataTable GetExcelDataTable(NPOI.SS.UserModel.IWorkbook hssfworkbook, int segment, int rowCount) { NPOI.SS.UserModel.ISheet sheet = hssfworkbook.GetSheetAt(0); System.Collections.IEnumerator rows = sheet.GetRowEnumerator(); DataTable dt = new DataTable(); bool skipReadingHeaderRow = rows.MoveNext(); if (skipReadingHeaderRow) { dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow)rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow)rows.Current; for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell != null) { dt.Columns.Add(cell.ToString()); } else { dt.Columns.Add(string.Empty); } } } for (int i = 0; i < (segment - 1)*rowCount; i++) { if (!rows.MoveNext()) break; } int cnt = 0; while (rows.MoveNext() && cnt < rowCount) { cnt++; dynamic row; if (rows.Current is NPOI.HSSF.UserModel.HSSFRow) row = (NPOI.HSSF.UserModel.HSSFRow) rows.Current; else row = (NPOI.XSSF.UserModel.XSSFRow) rows.Current; DataRow dr = dt.NewRow(); for (int i = 0; i < row.LastCellNum; i++) { ICell cell = row.GetCell(i); if (cell == null) { dr[i - 1] = null; } else if (i > 0) { switch (cell.CellType) { case CellType.Blank: dr[i - 1] = "[null]"; break; case CellType.Boolean: dr[i - 1] = cell.BooleanCellValue; break; case CellType.Numeric: dr[i - 1] = cell.ToString(); break; case CellType.String: dr[i - 1] = cell.StringCellValue; break; case CellType.Error: dr[i - 1] = cell.ErrorCellValue; break; case CellType.Formula: default: dr[i - 1] = "=" + cell.CellFormula; break; } } } dt.Rows.Add(dr); } return dt; } 
+5
source share

Can you post some specifications of your server? Is this a virtual machine and a cloud? I used to successfully use:

  • Koogra: https://sourceforge.net/projects/koogra/
  • NPOI: http://npoi.codeplex.com/

    To read .xls files, but if you can limit your files to .xlsx, I would use ClosedXML. I read a lot of huge 50K + files with ClosedXML on a muscular VM in Azure without any problems. I have the feeling that you are pushing a wall of user space on a server. If the user reaches this percentage, he reaches his usage quota and ends the task.

+4
source share

This problem can be solved by reading the data in two parts, such as 25K + 25k = 50K. You just need to update your select query as:

 SELECT TOP 25000 * FROM [Sheet1$] 
+4
source share

I created a small sample on my side using SELECT TOP along with ORDER BY, after which you can get the results:

Check the code:

  public DataSet GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); DataTable data = new DataTable(); DataTable data2 = new DataTable(); var ds = new DataSet(); using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name asc", conn)) { adapter.Fill(data); } using (var adapter = new OleDbDataAdapter("SELECT TOP 25000 Name, Surname FROM [Sheet1$] ORDER BY Name desc", conn)) { adapter.Fill(data2); } if (data.Rows.Count > 0)ds.Tables.Add(data); if (data2.Rows.Count > 0) ds.Tables.Add(data2); return ds; } 
+2
source share

You can try to populate your datatable by reading a piece of lines after the fragment instead of doing a single read.

The beauty of this approach is that you are not limited to writing 50 thousand, but it adapts to the real power of your data.

This code works on my machine (Win10 X64, VS2010 Express):

  public DataTable GetExcelDataTable(string fileName) { string connectionString = Path.GetExtension(fileName) == "xls" ? string.Format("Provider=Microsoft.Jet.OLEDB.4.0;Data source={0}; Extended Properties=Excel 8.0;", fileName) : string.Format("Provider=Microsoft.ACE.OLEDB.12.0;Data Source={0}; Extended Properties=Excel 12.0;", fileName); var conn = new OleDbConnection(connectionString); using (var adapter = new OleDbDataAdapter("SELECT * FROM [Sheet1$]", conn)) { var dt = new DataTable(); int recordRead = 0; int recordCur = 0; //starting point int recordStep = 6789; //records to read //here, we read **recordStep** records instead of reading //all excel data do { recordRead = adapter.Fill( recordCur, recordStep, dt); recordCur += recordRead; //increment starting point accordingly } while (recordRead > 0); conn.Close(); conn.Dispose(); adapter.Dispose(); return dt; } } 
+2
source share

All Articles