In linq why subsequent calls to IEnumerable.Intersect are much faster

looking at this C # issue of the similarity of the two arrays , it was noted that the initial linq call was significantly slower than subsequent calls. What is cached, what makes such a difference? I am interested in when we can expect to achieve this type of behavior (perhaps here simply because the same lists are used over and over again).

    static void Main(string[] args)
    {
        var a = new List<int>() { 7, 17, 21, 29, 30, 33, 40, 42, 51, 53, 60, 63, 66, 68, 70, 84, 85, 91, 101, 102, 104, 108, 109, 112, 115, 116, 118, 125, 132, 137, 139, 142, 155, 163, 164, 172, 174, 176, 179, 184, 185, 186, 187, 188, 189, 192, 197, 206, 209, 234, 240, 244, 249, 250, 252, 253, 254, 261, 263, 270, 275, 277, 290, 292, 293, 304, 308, 310, 314, 316, 319, 321, 322, 325, 326, 327, 331, 332, 333, 340, 367, 371, 374, 403, 411, 422, 427, 436, 440, 443, 444, 446, 448, 449, 450, 452, 455, 459, 467, 470, 487, 488, 489, 492, 494, 502, 503, 505, 513, 514, 522, 523, 528, 532, 534, 535, 545, 547, 548, 553, 555, 556, 565, 568, 570, 577, 581, 593, 595, 596, 598, 599, 606, 608, 613, 615, 630, 638, 648, 661, 663, 665, 669, 673, 679, 681, 685, 687, 690, 697, 702, 705, 708, 710, 716, 719, 724, 725, 727, 728, 732, 733, 739, 744, 760, 762, 775, 781, 787, 788, 790, 795, 797, 802, 806, 808, 811, 818, 821, 822, 829, 835, 845, 848, 851, 859, 864, 866, 868, 875, 881, 898, 899, 906, 909, 912, 913, 915, 916, 920, 926, 929, 930, 933, 937, 945, 946, 949, 954, 957, 960, 968, 975, 980, 985, 987, 989, 995 };
        var b = new List<int>() { 14, 20, 22, 23, 32, 36, 40, 48, 63, 65, 67, 71, 83, 87, 90, 100, 104, 109, 111, 127, 128, 137, 139, 141, 143, 148, 152, 153, 157, 158, 161, 163, 166, 187, 192, 198, 210, 211, 217, 220, 221, 232, 233, 236, 251, 252, 254, 256, 257, 272, 273, 277, 278, 283, 292, 304, 305, 307, 321, 333, 336, 341, 342, 344, 349, 355, 356, 359, 366, 373, 379, 386, 387, 392, 394, 396, 401, 409, 412, 433, 437, 441, 445, 447, 452, 465, 471, 476, 479, 483, 511, 514, 516, 521, 523, 531, 544, 548, 551, 554, 559, 562, 566, 567, 571, 572, 574, 576, 586, 592, 593, 597, 600, 602, 615, 627, 631, 636, 644, 650, 655, 657, 660, 667, 670, 680, 691, 697, 699, 703, 704, 706, 707, 716, 742, 748, 751, 754, 766, 770, 779, 785, 788, 790, 802, 803, 806, 811, 812, 815, 816, 821, 824, 828, 841, 848, 853, 863, 866, 870, 872, 875, 879, 880, 882, 883, 885, 886, 887, 888, 892, 894, 902, 905, 909, 912, 913, 914, 916, 920, 922, 925, 926, 928, 930, 935, 936, 938, 942, 945, 952, 954, 955, 957, 959, 960, 961, 963, 970, 974, 976, 979, 987 };
        var s = new System.Diagnostics.Stopwatch();
        const int cycles = 10;
        for (int i = 0; i < cycles; i++)
        {
            s.Start();
            var z= a.Intersect(b);
            s.Stop();
            Console.WriteLine("Test 1-{0}: {1} {2}", i, s.ElapsedTicks, z.Count());
            s.Reset();
            a[0]=i;//simple attempt to make sure entire result isn't cached
        }

        for (int i = 0; i < cycles; i++)
        {
            var z1 = new List<int>(a.Count);
            s.Start();
            int j = 0;
            int b1 = b[j];
            foreach (var a1 in a)
            {
                while (b1 <= a1)
                {
                    if (b1 == a1)
                        z1.Add(b[j]);
                    j++;
                    if (j >= b.Count)
                        break;
                    b1 = b[j];
                }
            }
            s.Stop();
            Console.WriteLine("Test 2-{0}: {1} {2}", i, s.ElapsedTicks, z1.Count);
            s.Reset();
            a[0]=i;//simple attempt to make sure entire result isn't cached
        }

        Console.Write("Press Enter to quit");
        Console.ReadLine();
    }
}

as requested by some example output:

Test 1-0: 2900 45
Test 1-1: 2 45
Test 1-2: 0 45
Test 1-3: 1 45

(a normal cycle shows only a small difference between successive runs)

notice after the changes to trigger a.Intersect(b).ToArray();, and not just a.Intersect(b);, as suggested by @kerem, the results:

Test 1-0: 13656 45
Test 1-1: 113 45
Test 1-2: 76 45
Test 1-3: 64 45
Test 1-4: 90 45 
...
+4
source share
5 answers

, :

  • , .
  • , , CPU.
  • , , CPU.
+3

LINQ . , .

 s.Start();
 z= a.Intersect(b);
 s.Stop();

 s.Start();
 z= a.Intersect(b).**ToArray**();
 s.Stop();

.

a.Intersect(b) , a b. a b , .

+2

JITting System.Enumerable.

(). Intersect ( ()); System.Diagnostics.Stopwatch(). Stop(); .

+1

Enumerable.Intersect . HashSet. HashSet. HashSet. HashSet . HashSet, HashSet. .

, , Enumerable.Intersect , , , , "" .

+1

You list the result Intersect()only when called Count(); that when the intersection calculation actually occurs. The part you have chosen is to create an enumerable object that represents the future calculation of the intersection.

In addition to the jittering penalty noted by others, the first call Intersect()may be the first use of a type from System.Core.dll, so you can see the time it takes to load the IL code in memory.

+1
source

All Articles