5x Performance with Parallel.For ... on a dual-core processor?

I was doing experimental calculations for fun when I came across an interesting result:

Completed 1024x1024 pixels with 700 points in... For Loop (Inline): 19636ms For Loop: 12612ms Parallel.For Loop: 3835ms 

This is not what I expected.

System: Windows 7 64, i3 2120 [dual-core, 4 threads], Visual Studio 2010.

Build: optimization enabled, release mode [without debugger], 32 bits.

The secondary interest is the disappointing performance of 64 bits. While this is more consistent with what I would expect in terms of correlation, he achieves this by being slower in all directions.

 Completed 1024x1024 pixels with 700 points in... For Loop (Inline): 23409ms For Loop: 24373ms Parallel.For Loop: 6839ms 

The calculation is simple: for indices x and y, find the nearest vector 3 and save it in a 2D array.

The question, if you dare, is to try to explain why the inline for loop is so slow. Bonus points for explaining the lack of 64-bit versions of performance.

 using System; using System.Diagnostics; using System.Threading.Tasks; namespace TextureFromPoints { class Program { const int numPoints = 700; const int textureSize = 1024; static Random rnd = new Random(); static void Main(string[] args) { while (true) { Console.WriteLine("Starting"); Console.WriteLine(); var pointCloud = new Vector3[numPoints]; for (int i = 0; i < numPoints; i++) pointCloud[i] = new Vector3(textureSize); var result1 = new Vector3[textureSize, textureSize]; var result2 = new Vector3[textureSize, textureSize]; var result3 = new Vector3[textureSize, textureSize]; var sw1 = Stopwatch.StartNew(); for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; var nearestV3Distance = nearestV3.DistanceToPoint(targetPos); for (int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; var currentV3Distance = currentV3.DistanceToPoint(targetPos); if (currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result1[x, y] = nearestV3; } sw1.Stop(); var sw2 = Stopwatch.StartNew(); for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) Computation(pointCloud, result2, x, y); sw2.Stop(); var sw3 = Stopwatch.StartNew(); Parallel.For(0, textureSize, x => { for (int y = 0; y < textureSize; y++) Computation(pointCloud, result3, x, y); }); sw3.Stop(); Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints); Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds); Console.WriteLine(); Console.Write("Verifying Data: "); Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error"); Console.WriteLine(); Console.WriteLine(); Console.ReadLine(); } } private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs) { for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) if (!lhs[x, y].Equals(rhs[x, y])) return false; return true; } private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; var nearestV3Distance = nearestV3.DistanceToPoint(targetPos); for (int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; var currentV3Distance = currentV3.DistanceToPoint(targetPos); if (currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result[x, y] = nearestV3; } struct Vector3 { public float x; public float y; public float z; public Vector3(float x, float y, float z) { this.x = x; this.y = y; this.z = z; } public Vector3(float randomDistance) { this.x = (float)rnd.NextDouble() * randomDistance; this.y = (float)rnd.NextDouble() * randomDistance; this.z = (float)rnd.NextDouble() * randomDistance; } public static Vector3 operator -(Vector3 a, Vector3 b) { return new Vector3(ax - bx, ay - by, az - bz); } public float sqrMagnitude() { return x * x + y * y + z * z; } public float DistanceToPoint(Vector3 point) { return (this - point).sqrMagnitude(); } } } } 

UPDATE: Thanks to the efforts of Drew Marsh, we now have this super optimized version that integrates all V3 operations.

 using System; using System.Diagnostics; using System.Threading.Tasks; namespace TextureFromPoints { class RevisedProgram { const int numPoints = 700; const int textureSize = 1024; static Random rnd = new Random(); static void Main(string[] args) { while (true) { Console.WriteLine("Starting REVISED"); Console.WriteLine(); var pointCloud = new Vector3[numPoints]; for (int i = 0; i < numPoints; i++) pointCloud[i] = new Vector3(textureSize); var result1 = new Vector3[textureSize, textureSize]; var result2 = new Vector3[textureSize, textureSize]; var result3 = new Vector3[textureSize, textureSize]; var sw1 = Inline(pointCloud, result1); var sw2 = NotInline(pointCloud, result2); var sw3 = Parallelized(pointCloud, result3); Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints); Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds); Console.WriteLine(); Console.Write("Verifying Data: "); Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error"); Console.WriteLine(); Console.WriteLine(); Console.ReadLine(); } } private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3) { var sw3 = Stopwatch.StartNew(); Parallel.For(0, textureSize, x => { for (int y = 0; y < textureSize; y++) Computation(pointCloud, result3, x, y); }); sw3.Stop(); return sw3; } private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2) { var sw2 = Stopwatch.StartNew(); for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) Computation(pointCloud, result2, x, y); sw2.Stop(); return sw2; } private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1) { var sw1 = Stopwatch.StartNew(); for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; Vector3 temp1 = new Vector3(nearestV3.x - targetPos.x, nearestV3.y - targetPos.y, nearestV3.z - targetPos.z); var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z; for (int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; Vector3 temp2 = new Vector3(currentV3.x - targetPos.x, currentV3.y - targetPos.y, currentV3.z - targetPos.z); var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z; if (currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result1[x, y] = nearestV3; } sw1.Stop(); return sw1; } private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs) { for (int x = 0; x < textureSize; x++) for (int y = 0; y < textureSize; y++) if (!lhs[x, y].Equals(rhs[x, y])) return false; return true; } private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; Vector3 temp1 = new Vector3(nearestV3.x - targetPos.x, nearestV3.y - targetPos.y, nearestV3.z - targetPos.z); var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z; for (int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; Vector3 temp2 = new Vector3(currentV3.x - targetPos.x, currentV3.y - targetPos.y, currentV3.z - targetPos.z); var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z; if (currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result[x, y] = nearestV3; } struct Vector3 { public float x; public float y; public float z; public Vector3(float x, float y, float z) { this.x = x; this.y = y; this.z = z; } public Vector3(float randomDistance) { this.x = (float)rnd.NextDouble() * randomDistance; this.y = (float)rnd.NextDouble() * randomDistance; this.z = (float)rnd.NextDouble() * randomDistance; } } } } 

And he gives the following results:

x 86

 Completed 1024x1024 pixels with 700 points in... For Loop (Inline): 3820ms For Loop: 3962ms Parallel.For Loop: 1681ms 

64

 Completed 1024x1024 pixels with 700 points in... For Loop (Inline): 10978ms For Loop: 10924ms Parallel.For Loop: 3073ms 

Thus, the good news is that we can significantly increase the performance of this code - and get a single-threaded version that will work at a speed, to some extent, in accordance with its parallel cousin.

The bad news is that this means that x64 integrates all the math completely and manually.

At this point, I am very disappointed in the performance of the compilers - I expected that they would be much better.

Conclusion

This is scary and sad ... and until we know why we can make an educated guess that this is caused by the stupid / s compiler. From 24 to 3.8 s, just changing the compiler from x64 to x86, and some manual embeddings are not what I expected. However, I finished the proof of the concept that I wrote, and thanks to a simple spatial hash, I can calculate an image of 1024 by 1024 with 70,000 β€œpoints” in 0.7 s - 340,000% faster than my original x64 script and without threading or overlays. So I accepted the answer - the immediate need is gone, although I am still considering the problem.

The code is available here and here - it generates a good Voronoi diagram as a side effect: P

+7
source share
4 answers

All data from 8 core i7, win7, x64

It's amazing that you get 5 times. One of the problems with this test that you wrote is that you put all three approaches in your Main method, which causes gobblygook to create a compiler and synchronize it to meet the closing needs used in Parallel.For interferes with the built-in method. If you do the work as follows, you will see significantly better performance in all three implementations ... for x86 at least:

Before x86:

 For Loop (Inline): 24313ms For Loop: 25236ms Parallel.For Loop: 3840ms 

After x86:

 For Loop (Inline): 13007ms For Loop: 13013ms Parallel.For Loop: 2208ms 

So, looking at my x86 Parallel.For the results, you see that it scales by about ~ 5.9x, and each version is much faster when isolated.

It is further interesting to note that after the same change there is absolutely no gain in x64. In fact, it ended a little higher in each run for 2 of 3 tests in sequence.

Up to x64

 For Loop (Inline): 24222ms For Loop: 25197ms Parallel.For Loop: 3810ms 

After x64

 For Loop (Inline): 25302ms For Loop: 25209ms Parallel.For Loop: 3821ms 

I have no direct answer why why x64 would be so bad besides the fact that people consistently come up with code that makes the x64 JIT look bad, so maybe someone else can hear it.

This says that I have one more thing that you might want to consider in such an implementation: cache invalidation. There's a great MSDN article written by @StephenToub that explains what it is. TL; DR; This is because all your data is stored in a single array and diff. kernels with different local (L2) caches will modify parts of this array that they need to synchronize with other kernels with which they overlap. If sections have a difference. the cores work too close to each other, you will get many such synchronizations that can feed on your parallel benefits. The article provides a technique in which you actually allocate the extra space in the working array, sufficient to separate the actual sections containing the data that you are going to work on, so when these kernels work with data, they should not invalidate the other cores. from a for loop, not closer to 8x. I would argue if you turned on the job to eliminate any invalid cache line that you could squeeze another 10% + out of it. Just remember that there is always overhead when setting up and coordinating parallel work, so you will never get 100% perfection.

Here, the revised version of your program with each approach takes into account individual methods:

 using System; using System.Diagnostics; using System.Threading.Tasks; namespace TextureFromPoints { class RevisedProgram { const int numPoints = 700; const int textureSize = 1024; static Random rnd = new Random(); static void Main(string[] args) { while(true) { Console.WriteLine("Starting REVISED"); Console.WriteLine(); var pointCloud = new Vector3[numPoints]; for(int i = 0; i < numPoints; i++) pointCloud[i] = new Vector3(textureSize); var result1 = new Vector3[textureSize, textureSize]; var result2 = new Vector3[textureSize, textureSize]; var result3 = new Vector3[textureSize, textureSize]; var sw1 = Inline(pointCloud, result1); var sw2 = NotInline(pointCloud, result2); var sw3 = Parallelized(pointCloud, result3); Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints); Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds); Console.WriteLine(); Console.Write("Verifying Data: "); Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error"); Console.WriteLine(); Console.WriteLine(); Console.ReadLine(); } } private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3) { var sw3 = Stopwatch.StartNew(); Parallel.For(0, textureSize, x => { for(int y = 0; y < textureSize; y++) Computation(pointCloud, result3, x, y); }); sw3.Stop(); return sw3; } private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2) { var sw2 = Stopwatch.StartNew(); for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) Computation(pointCloud, result2, x, y); sw2.Stop(); return sw2; } private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1) { var sw1 = Stopwatch.StartNew(); for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; var nearestV3Distance = nearestV3.DistanceToPoint(targetPos); for(int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; var currentV3Distance = currentV3.DistanceToPoint(targetPos); if(currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result1[x, y] = nearestV3; } sw1.Stop(); return sw1; } private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs) { for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) if(!lhs[x, y].Equals(rhs[x, y])) return false; return true; } private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; var nearestV3Distance = nearestV3.DistanceToPoint(targetPos); for(int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; var currentV3Distance = currentV3.DistanceToPoint(targetPos); if(currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result[x, y] = nearestV3; } struct Vector3 { public float x; public float y; public float z; public Vector3(float x, float y, float z) { this.x = x; this.y = y; this.z = z; } public Vector3(float randomDistance) { this.x = (float)rnd.NextDouble() * randomDistance; this.y = (float)rnd.NextDouble() * randomDistance; this.z = (float)rnd.NextDouble() * randomDistance; } public static Vector3 operator -(Vector3 a, Vector3 b) { return new Vector3(ax - bx, ay - by, az - bz); } public float sqrMagnitude() { return x * x + y * y + z * z; } public float DistanceToPoint(Vector3 point) { return (this - point).sqrMagnitude(); } } } } 

Update:

Based on the fact that Feng Yuan pointed out methods that were not built into the x64 JIT, you can change the program to perform inline calculations and get better performance from the x64 version than the x86 version. This clearly sucks, but it's the fact that I saw the x64 JIT destroy before. Here are the new numbers:

After inserting x64:

 For Loop (Inline): 19032ms For Loop: 19209ms Parallel.For Loop: 3015ms 

Embedded Code Version:

 using System; using System.Diagnostics; using System.Threading.Tasks; namespace TextureFromPoints { class RevisedProgram { const int numPoints = 700; const int textureSize = 1024; static Random rnd = new Random(); static void Main(string[] args) { while(true) { Console.WriteLine("Starting REVISED"); Console.WriteLine(); var pointCloud = new Vector3[numPoints]; for(int i = 0; i < numPoints; i++) pointCloud[i] = new Vector3(textureSize); var result1 = new Vector3[textureSize, textureSize]; var result2 = new Vector3[textureSize, textureSize]; var result3 = new Vector3[textureSize, textureSize]; var sw1 = Inline(pointCloud, result1); var sw2 = NotInline(pointCloud, result2); var sw3 = Parallelized(pointCloud, result3); Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints); Console.WriteLine("{0}: {1}ms", "For Loop (Inline)", sw1.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "For Loop", sw2.ElapsedMilliseconds); Console.WriteLine("{0}: {1}ms", "Parallel.For Loop", sw3.ElapsedMilliseconds); Console.WriteLine(); Console.Write("Verifying Data: "); Console.WriteLine(CheckResults(result1, result2) && CheckResults(result1, result3) ? "Valid" : "Error"); Console.WriteLine(); Console.WriteLine(); Console.ReadLine(); } } private static Stopwatch Parallelized(Vector3[] pointCloud, Vector3[,] result3) { var sw3 = Stopwatch.StartNew(); Parallel.For(0, textureSize, x => { for(int y = 0; y < textureSize; y++) Computation(pointCloud, result3, x, y); }); sw3.Stop(); return sw3; } private static Stopwatch NotInline(Vector3[] pointCloud, Vector3[,] result2) { var sw2 = Stopwatch.StartNew(); for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) Computation(pointCloud, result2, x, y); sw2.Stop(); return sw2; } private static Stopwatch Inline(Vector3[] pointCloud, Vector3[,] result1) { var sw1 = Stopwatch.StartNew(); for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; Vector3 temp1 = nearestV3 - targetPos; var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z; for(int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; Vector3 temp2 = currentV3 - targetPos; var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z; if(currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result1[x, y] = nearestV3; } sw1.Stop(); return sw1; } private static bool CheckResults(Vector3[,] lhs, Vector3[,] rhs) { for(int x = 0; x < textureSize; x++) for(int y = 0; y < textureSize; y++) if(!lhs[x, y].Equals(rhs[x, y])) return false; return true; } private static void Computation(Vector3[] pointCloud, Vector3[,] result, int x, int y) { var targetPos = new Vector3(x, y, 0); var nearestV3 = pointCloud[0]; Vector3 temp1 = nearestV3 - targetPos; var nearestV3Distance = temp1.x * temp1.x + temp1.y * temp1.y + temp1.z * temp1.z; for(int i = 1; i < numPoints; i++) { var currentV3 = pointCloud[i]; Vector3 temp2 = currentV3 - targetPos; var currentV3Distance = temp2.x * temp2.x + temp2.y * temp2.y + temp2.z * temp2.z; if(currentV3Distance < nearestV3Distance) { nearestV3 = currentV3; nearestV3Distance = currentV3Distance; } } result[x, y] = nearestV3; } private static float DistanceToPoint(Vector3 vector, Vector3 point) { Vector3 final = vector - point; return final.x * final.x + final.y * final.y + final.z * final.z; } struct Vector3 { public float x; public float y; public float z; public Vector3(float x, float y, float z) { this.x = x; this.y = y; this.z = z; } public Vector3(float randomDistance) { this.x = (float)rnd.NextDouble() * randomDistance; this.y = (float)rnd.NextDouble() * randomDistance; this.z = (float)rnd.NextDouble() * randomDistance; } public static Vector3 operator -(Vector3 a, Vector3 b) { return new Vector3(ax - bx, ay - by, az - bz); } } } } 
+7
source

The structure is still 12 bytes on a 64-bit system.

64-bit slower due to lack of attachment for DistanceToPoint

  2 0 [ 0] TextureFromPoints.Program+Vector3.DistanceToPoint(Vector3) 23 0 [ 0] Texture!TextureFromPoints.Program+Vector3.DistanceToPoint(Vector3) 22 0 [ 1] Texture!TextureFromPoints.Program+Vector3.op_Subtraction(Vector3, Vector3) 30 22 [ 0] Texture!TextureFromPoints.Program+Vector3.DistanceToPoint(Vector3) 10 0 [ 1] Texture!TextureFromPoints.Program+Vector3.sqrMagnitude() 33 32 [ 0] Texture!TextureFromPoints.Program+Vector3.DistanceToPoint(Vector3) 

On a 32-bit system, only sqrtMagnitude is a function call, DistanceToPoint and op_Subtraction are built-in.

+3
source

I suspect 64-bit performance is related to alignment. Your Vector3 is a 12-byte structure; they will occupy up to 12 bytes in a 32-bit environment, but they will be supplemented with up to 16 bytes in a 64-bit environment. If this means your arrays are 33% larger, you can expect 33% more cache misses.

My suspicion was completely false. After sleeping, I tried the following:

 class Program { private struct V3 { public float x; public float y; public float z; } private static unsafe long GetDistance() { var array = new V3[2]; fixed (V3* pointerOne = &array[0]) fixed (V3* pointerTwo = &array[1]) return ((byte*)pointerTwo - (byte*)pointerOne); } unsafe static void Main() { Console.WriteLine(GetDistance()); Console.WriteLine(sizeof(IntPtr)); } } 

32-bit system:

 12 4 

64-bit system:

 12 8 
+1
source

I know what to do! Write it in F #!

 Completed 1024x1024 pixels with 700 points in... Sync: 4393ms Parallel: 2409ms 

It’s faster and less ... not bad for something that I wiped for several hours with almost no knowledge of the language.

 module Program open System open System.IO open System.Linq open System.Threading.Tasks let main() = let numPoints = 700 let textureSize = 1024 let rnd = new Random() let randomPos() = (single (rnd.NextDouble()*(double textureSize))) let pointCloud = Array.init numPoints (fun _ -> (randomPos(), randomPos())) let distanceToPoint(sourceX :int ,sourceY : int, point ) = let x = (single sourceX) - fst point let y = (single sourceY) - snd point x*x + y*y let syncCalc() = let resultData = Array2D.zeroCreate<single*single> textureSize textureSize for x in 0..(textureSize-1) do for y in 0..(textureSize-1) do let mutable closestPoint = pointCloud.[0] let mutable closestDistance = distanceToPoint(x,y, closestPoint) for p in 1..(numPoints-1) do let point = pointCloud.[p] let distance = distanceToPoint(x,y, closestPoint) if (distance < closestDistance) then closestPoint <- point closestDistance <- distance resultData.[x,y] <- closestPoint (*let asyncCalc() = let resultData = Array2D.zeroCreate<single*single> textureSize textureSize let z = Async.Parallel [ for x in 0..(textureSize-1) -> async { for y in 0..(textureSize-1) do let closestPoint = ref pointCloud.[0] let closestDistance = ref (distanceToPoint(x,y, !closestPoint)) for p in 1..(numPoints-1) do let point = pointCloud.[p] let distance = distanceToPoint(x,y, !closestPoint) if (distance < !closestDistance) then closestPoint := point closestDistance := distance resultData.[x,y] <- !closestPoint } ] |>Async.RunSynchronously resultData*) let parallelCalc() = let resultData = Array2D.zeroCreate<single*single> textureSize textureSize let z = Parallel.For (0, textureSize, fun x -> for y in 0..(textureSize-1) do let closestPoint = ref pointCloud.[0] let closestDistance = ref (distanceToPoint(x,y, !closestPoint)) for p in 1..(numPoints-1) do let point = pointCloud.[p] let distance = distanceToPoint(x,y, !closestPoint) if (distance < !closestDistance) then closestPoint := point closestDistance := distance resultData.[x,y] <- !closestPoint) resultData //4.2s let sw1 = System.Diagnostics.Stopwatch.StartNew(); let r1 = syncCalc() sw1.Stop() //Slow! //let sw2 = System.Diagnostics.Stopwatch.StartNew(); //let r2 = asyncCalc() //sw2.Stop() //2.25s let sw3 = System.Diagnostics.Stopwatch.StartNew(); let r3 = parallelCalc() sw3.Stop() Console.WriteLine("Completed {0}x{0} pixels with {1} points in...", textureSize, numPoints) Console.WriteLine("Sync: {0}ms", sw1.ElapsedMilliseconds) //Console.WriteLine("ASync: {0}ms", sw2.ElapsedMilliseconds) Console.WriteLine("Parallel: {0}ms", sw3.ElapsedMilliseconds) Console.ReadLine() |> ignore while true do main() 
0
source

All Articles