C # is a faster alternative to Convert.ToSingle ()

I am working on a program that reads millions of floating point numbers from a text file. This program runs inside the game that I am developing, so I need it to be fast (I am loading the obj file). So far, downloading a relatively small file takes about a minute (without preliminary compilation) due to the slow speed of Convert.ToSingle (). Is there a faster way to do this?

EDIT: Here is the code I use to parse the obj file

http://pastebin.com/TfgEge9J

using System; using System.IO; using System.Collections.Generic; using OpenTK.Math; using System.Drawing; using PlatformLib; public class ObjMeshLoader { public static StreamReader[] LoadMeshes(string fileName) { StreamReader mreader = new StreamReader(PlatformLib.Platform.openFile(fileName)); MemoryStream current = null; List<MemoryStream> mstreams = new List<MemoryStream>(); StreamWriter mwriter = null; if (!mreader.ReadLine().Contains("#")) { mreader.BaseStream.Close(); throw new Exception("Invalid header"); } while (!mreader.EndOfStream) { string cmd = mreader.ReadLine(); string line = cmd; line = line.Trim(splitCharacters); line = line.Replace(" ", " "); string[] parameters = line.Split(splitCharacters); if (parameters[0] == "mtllib") { loadMaterials(parameters[1]); } if (parameters[0] == "o") { if (mwriter != null) { mwriter.Flush(); current.Position = 0; } current = new MemoryStream(); mwriter = new StreamWriter(current); mwriter.WriteLine(parameters[1]); mstreams.Add(current); } else { if (mwriter != null) { mwriter.WriteLine(cmd); mwriter.Flush(); } } } mwriter.Flush(); current.Position = 0; List<StreamReader> readers = new List<StreamReader>(); foreach (MemoryStream e in mstreams) { e.Position = 0; StreamReader sreader = new StreamReader(e); readers.Add(sreader); } return readers.ToArray(); } public static bool Load(ObjMesh mesh, string fileName) { try { using (StreamReader streamReader = new StreamReader(Platform.openFile(fileName))) { Load(mesh, streamReader); streamReader.Close(); return true; } } catch { return false; } } public static bool Load2(ObjMesh mesh, StreamReader streamReader, ObjMesh prevmesh) { if (prevmesh != null) { //mesh.Vertices = prevmesh.Vertices; } try { //streamReader.BaseStream.Position = 0; Load(mesh, streamReader); streamReader.Close(); #if DEBUG Console.WriteLine("Loaded "+mesh.Triangles.Length.ToString()+" triangles and"+mesh.Quads.Length.ToString()+" quadrilaterals parsed, with a grand total of "+mesh.Vertices.Length.ToString()+" vertices."); #endif return true; } catch (Exception er) { Console.WriteLine(er); return false; } } static char[] splitCharacters = new char[] { ' ' }; static List<Vector3> vertices; static List<Vector3> normals; static List<Vector2> texCoords; static Dictionary<ObjMesh.ObjVertex, int> objVerticesIndexDictionary; static List<ObjMesh.ObjVertex> objVertices; static List<ObjMesh.ObjTriangle> objTriangles; static List<ObjMesh.ObjQuad> objQuads; static Dictionary<string, Bitmap> materials = new Dictionary<string, Bitmap>(); static void loadMaterials(string path) { StreamReader mreader = new StreamReader(Platform.openFile(path)); string current = ""; bool isfound = false; while (!mreader.EndOfStream) { string line = mreader.ReadLine(); line = line.Trim(splitCharacters); line = line.Replace(" ", " "); string[] parameters = line.Split(splitCharacters); if (parameters[0] == "newmtl") { if (materials.ContainsKey(parameters[1])) { isfound = true; } else { current = parameters[1]; } } if (parameters[0] == "map_Kd") { if (!isfound) { string filename = ""; for (int i = 1; i < parameters.Length; i++) { filename += parameters[i]; } string searcher = "\\" + "\\"; filename.Replace(searcher, "\\"); Bitmap mymap = new Bitmap(filename); materials.Add(current, mymap); isfound = false; } } } } static float parsefloat(string val) { return Convert.ToSingle(val); } int remaining = 0; static string GetLine(string text, ref int pos) { string retval = text.Substring(pos, text.IndexOf(Environment.NewLine, pos)); pos = text.IndexOf(Environment.NewLine, pos); return retval; } static void Load(ObjMesh mesh, StreamReader textReader) { //try { //vertices = null; //objVertices = null; if (vertices == null) { vertices = new List<Vector3>(); } if (normals == null) { normals = new List<Vector3>(); } if (texCoords == null) { texCoords = new List<Vector2>(); } if (objVerticesIndexDictionary == null) { objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(); } if (objVertices == null) { objVertices = new List<ObjMesh.ObjVertex>(); } objTriangles = new List<ObjMesh.ObjTriangle>(); objQuads = new List<ObjMesh.ObjQuad>(); mesh.vertexPositionOffset = vertices.Count; string line; string alltext = textReader.ReadToEnd(); int pos = 0; while ((line = GetLine(alltext, pos)) != null) { if (line.Length < 2) { break; } //line = line.Trim(splitCharacters); //line = line.Replace(" ", " "); string[] parameters = line.Split(splitCharacters); switch (parameters[0]) { case "usemtl": //Material specification try { mesh.Material = materials[parameters[1]]; } catch (KeyNotFoundException) { Console.WriteLine("WARNING: Texture parse failure: " + parameters[1]); } break; case "p": // Point break; case "v": // Vertex float x = parsefloat(parameters[1]); float y = parsefloat(parameters[2]); float z = parsefloat(parameters[3]); vertices.Add(new Vector3(x, y, z)); break; case "vt": // TexCoord float u = parsefloat(parameters[1]); float v = parsefloat(parameters[2]); texCoords.Add(new Vector2(u, v)); break; case "vn": // Normal float nx = parsefloat(parameters[1]); float ny = parsefloat(parameters[2]); float nz = parsefloat(parameters[3]); normals.Add(new Vector3(nx, ny, nz)); break; case "f": switch (parameters.Length) { case 4: ObjMesh.ObjTriangle objTriangle = new ObjMesh.ObjTriangle(); objTriangle.Index0 = ParseFaceParameter(parameters[1]); objTriangle.Index1 = ParseFaceParameter(parameters[2]); objTriangle.Index2 = ParseFaceParameter(parameters[3]); objTriangles.Add(objTriangle); break; case 5: ObjMesh.ObjQuad objQuad = new ObjMesh.ObjQuad(); objQuad.Index0 = ParseFaceParameter(parameters[1]); objQuad.Index1 = ParseFaceParameter(parameters[2]); objQuad.Index2 = ParseFaceParameter(parameters[3]); objQuad.Index3 = ParseFaceParameter(parameters[4]); objQuads.Add(objQuad); break; } break; } } //}catch(Exception er) { // Console.WriteLine(er); // Console.WriteLine("Successfully recovered. Bounds/Collision checking may fail though"); //} mesh.Vertices = objVertices.ToArray(); mesh.Triangles = objTriangles.ToArray(); mesh.Quads = objQuads.ToArray(); textReader.BaseStream.Close(); } public static void Clear() { objVerticesIndexDictionary = null; vertices = null; normals = null; texCoords = null; objVertices = null; objTriangles = null; objQuads = null; } static char[] faceParamaterSplitter = new char[] { '/' }; static int ParseFaceParameter(string faceParameter) { Vector3 vertex = new Vector3(); Vector2 texCoord = new Vector2(); Vector3 normal = new Vector3(); string[] parameters = faceParameter.Split(faceParamaterSplitter); int vertexIndex = Convert.ToInt32(parameters[0]); if (vertexIndex < 0) vertexIndex = vertices.Count + vertexIndex; else vertexIndex = vertexIndex - 1; //Hmm. This seems to be broken. try { vertex = vertices[vertexIndex]; } catch (Exception) { throw new Exception("Vertex recognition failure at " + vertexIndex.ToString()); } if (parameters.Length > 1) { int texCoordIndex = Convert.ToInt32(parameters[1]); if (texCoordIndex < 0) texCoordIndex = texCoords.Count + texCoordIndex; else texCoordIndex = texCoordIndex - 1; try { texCoord = texCoords[texCoordIndex]; } catch (Exception) { Console.WriteLine("ERR: Vertex " + vertexIndex + " not found. "); throw new DllNotFoundException(vertexIndex.ToString()); } } if (parameters.Length > 2) { int normalIndex = Convert.ToInt32(parameters[2]); if (normalIndex < 0) normalIndex = normals.Count + normalIndex; else normalIndex = normalIndex - 1; normal = normals[normalIndex]; } return FindOrAddObjVertex(ref vertex, ref texCoord, ref normal); } static int FindOrAddObjVertex(ref Vector3 vertex, ref Vector2 texCoord, ref Vector3 normal) { ObjMesh.ObjVertex newObjVertex = new ObjMesh.ObjVertex(); newObjVertex.Vertex = vertex; newObjVertex.TexCoord = texCoord; newObjVertex.Normal = normal; int index; if (objVerticesIndexDictionary.TryGetValue(newObjVertex, out index)) { return index; } else { objVertices.Add(newObjVertex); objVerticesIndexDictionary[newObjVertex] = objVertices.Count - 1; return objVertices.Count - 1; } } } 
+4
source share
5 answers

Based on your description and the code you posted, I bet your problem is not with reading, parsing, or how you add things to your collections. The most likely problem is that your ObjMesh.Objvertex structure ObjMesh.Objvertex not override GetHashCode . (I assume that you are using code similar to http://www.opentk.com/files/ObjMesh.cs .

If you do not override GetHashCode , then your objVerticesIndexDictionary will be very similar to a linear list. This will be due to the performance issue you are facing.

I suggest you learn a good GetHashCode method for your ObjMesh.Objvertex class.

See Why is ValueType.GetHashCode () implemented as it is? for information about the default implementation of GetHashCode for value types and why it is not suitable for use in a hash table or dictionary.

+5
source

Edit 3: The problem is not parsing.

This is with how you read the file . If you read it correctly, it will be faster; however, your reading seems unusually slow. My initial suspicion was that it was due to redundant allocations, but it looks like there might be other problems with your code, as this does not explain the whole slowdown.

However, here the code snippet that I made completely avoids all object distributions:

 static void Main(string[] args) { long counter = 0; var sw = Stopwatch.StartNew(); var sb = new StringBuilder(); var text = File.ReadAllText("spacestation.obj"); for (int i = 0; i < text.Length; i++) { int start = i; while (i < text.Length && (char.IsDigit(text[i]) || text[i] == '-' || text[i] == '.')) { i++; } if (i > start) { sb.Append(text, start, i - start); //Copy data to the buffer float value = Parse(sb); //Parse the data sb.Remove(0, sb.Length); //Clear the buffer counter++; } } sw.Stop(); Console.WriteLine("{0:N0}", sw.Elapsed.TotalSeconds); //Only a few ms } 

with this parser:

 const int MIN_POW_10 = -16, int MAX_POW_10 = 16, NUM_POWS_10 = MAX_POW_10 - MIN_POW_10 + 1; static readonly float[] pow10 = GenerateLookupTable(); static float[] GenerateLookupTable() { var result = new float[(-MIN_POW_10 + MAX_POW_10) * 10]; for (int i = 0; i < result.Length; i++) result[i] = (float)((i / NUM_POWS_10) * Math.Pow(10, i % NUM_POWS_10 + MIN_POW_10)); return result; } static float Parse(StringBuilder str) { float result = 0; bool negate = false; int len = str.Length; int decimalIndex = str.Length; for (int i = len - 1; i >= 0; i--) if (str[i] == '.') { decimalIndex = i; break; } int offset = -MIN_POW_10 + decimalIndex; for (int i = 0; i < decimalIndex; i++) if (i != decimalIndex && str[i] != '-') result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i - 1]; else if (str[i] == '-') negate = true; for (int i = decimalIndex + 1; i < len; i++) if (i != decimalIndex) result += pow10[(str[i] - '0') * NUM_POWS_10 + offset - i]; if (negate) result = -result; return result; } 

this happens in a fraction of a second .

Of course, this analyzer is poorly tested and has these current limitations (and more):

  • Do not try to parse more digits (decimal and integer) than provided in the array.

  • No error handling.

  • Checks only decimal places , not exponents! that is, he can analyze 1234.56 , but not 1.23456E3 .

  • Does not care about globalization / localization. Your file is in only one format, so it makes no sense to take care of this because you are probably using English to store it.

It doesn't seem like you need a lot of kink, but look at your code and try to figure out the bottleneck. This does not seem to be reading or parsing.

+2
source

Have you measured that the speed problem is really caused by Convert.ToSingle ?

In the code you included, I see that you are creating lists and dictionaries like this:

 normals = new List<Vector3>(); texCoords = new List<Vector2>(); objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(); 

And then when you read the file, you add one item at a time to the collection. One possible optimization would be to save the total number of normals, texCoords, indexes, and everything at the beginning of the file, and then initialize these collections with these numbers. This will preallocate the buffers used by collections, so adding items to them will be pretty quick.

So, creating a collection should look like this:

 // These values should be stored at the beginning of the file int totalNormals = Convert.ToInt32(textReader.ReadLine()); int totalTexCoords = Convert.ToInt32(textReader.ReadLine()); int totalIndexes = Convert.ToInt32(textReader.ReadLine()); normals = new List<Vector3>(totalNormals); texCoords = new List<Vector2>(totalTexCoords); objVerticesIndexDictionary = new Dictionary<ObjMesh.ObjVertex, int>(totalIndexes); 

See List of <T> Constructor (Int32) and Dictionary <TKey, TValue> Constructor (Int32) .

+2
source

This related question is for C ++, but definitely worth a read.

To read as quickly as possible, you probably want to map the file in memory and then parse it with some custom floating point parser, especially if you know that the numbers are always in a specific format (i.e. you are the one that generates the input files primarily).

0
source

I tested .Net parsing once, and the fastest function to parse text was the old VB Val () function. You can pull the appropriate components from Microsoft.VisualBasic.Conversion Val (string)

 Converting String to numbers Comparison of relative test times (ms / 100000 conversions) Double Single Integer Int(w/ decimal point) 14 13 6 16 Val(Str) 14 14 6 16 Cxx(Val(Str)) eg, CSng(Val(str)) 22 21 17 e! Convert.To(str) 23 21 16 e! XX.Parse(str) eg Single.Parse() 30 31 31 32 Cxx(str) Val: fastest, part of VisualBasic dll, skips non-numeric, ConvertTo and Parse: slower, part of core, exception on bad format (including decimal point) Cxx: slowest (for strings), part of core, consistent times across formats 
0
source

All Articles