Aggregation Function - Performance f # vs C #

I have a function that I use a lot, and therefore, performance should be as good as possible. It receives data from excel, and then sums, averages or calculations over pieces of data based on whether the data is for a certain period of time and whether it is a peak hour (Mo-Fr 8-20).

The data is usually about 30,000 rows and 2 columns (hourly date, value). An important feature of the data is that the date column is chronologically ordered.

I have three implementations, C # with extension methods (dead slow, and I'm not going to show it if someone is not interested).

Then I have an implementation of f #:

let ispeak dts =
    let newdts = DateTime.FromOADate dts
    match newdts.DayOfWeek, newdts.Hour with
    | DayOfWeek.Saturday, _ | DayOfWeek.Sunday, _ -> false
    | _, h when h >= 8 && h < 20 -> true
    | _ -> false

let internal isbetween a std edd =
    match a with
    | r when r >= std && r < edd+1. -> true
    | _ -> false

[<ExcelFunction(Name="aggrF")>]
let aggrF (data:float[]) (data2:float[]) std edd pob sac =
    let newd =
        [0 .. (Array.length data) - 1]
        |> List.map (fun i -> (data.[i], data2.[i])) 
        |> Seq.filter (fun (date, _) -> 
            let dateInRange = isbetween date std edd
            match pob with
            | "Peak" -> ispeak date && dateInRange
            | "Offpeak" -> not(ispeak date) && dateInRange
            | _ -> dateInRange)
   match sac with 
   | 0 -> newd |> Seq.averageBy (fun (_, value) -> value)
   | 2 -> newd |> Seq.sumBy (fun (_, value) -> 1.0)
   | _ -> newd |> Seq.sumBy (fun (_, value) -> value)

I see two problems with this:

  • I need to prepare the data since the date and double [] value
  • , , .

#:

        public static bool ispeak(double dats)
    {
        var dts = System.DateTime.FromOADate(dats);
        if (dts.DayOfWeek != DayOfWeek.Sunday & dts.DayOfWeek != DayOfWeek.Saturday & dts.Hour > 7 & dts.Hour < 20)
            return true;
        else
            return false;
    }

    [ExcelFunction(Description = "Aggregates HFC/EG into average or sum over period, start date inclusive, end date exclusive")]
    public static double aggrI(double[] dts, double[] vals, double std, double edd, string pob, double sumavg)
    {
        double accsum = 0;
        int acccounter = 0;
        int indicator = 0;
        bool peakbool = pob.Equals("Peak", StringComparison.OrdinalIgnoreCase);
        bool offpeakbool = pob.Equals("Offpeak", StringComparison.OrdinalIgnoreCase);
        bool basebool = pob.Equals("Base", StringComparison.OrdinalIgnoreCase);


        for (int i = 0; i < vals.Length; ++i)
        {
            if (dts[i] >= std && dts[i] < edd + 1)
            {
                indicator = 1;
                if (peakbool && ispeak(dts[i]))
                {
                    accsum += vals[i];
                    ++acccounter;
                }
                else if (offpeakbool && (!ispeak(dts[i])))
                {
                    accsum += vals[i];
                    ++acccounter;
                }
                else if (basebool)
                {
                    accsum += vals[i];
                    ++acccounter;
                }
            }
            else if (indicator == 1)
            {
                break;
            }
        }

        if (sumavg == 0)
        {
            return accsum / acccounter;
        }
        else if (sumavg == 2)
        {
            return acccounter;
        }
        else
        {
            return accsum;
        }
    }

( , - , ), .

:

  • f # Seq ?

  • f #?

  • - ? !

:

1/1/13-31/12/15 ( 30 000 ) . 150 , , 100 - 15000 :

csharp ( string.compare )

1,36

fsharp

1,55

Tomas array fsharp

1m40secs

fsharp

2m20secs

, , , ...

, , , , array.map .., , , # f #

+4
2

:

[0 .. (Array.length data) - 1]
    |> List.map (fun i -> (data.[i], data2.[i])) 
    |> Seq.filter (fun (date, _) -> 

, , , :

let aggrF (data:float[]) (data2:float[]) std edd pob sac =
    let isValidTime = match pob with
                        | "Peak" -> (fun x -> ispeak x)
                        | "Offpeak" -> (fun x -> not(ispeak x))
                        | _ -> (fun _ -> true)

    let data = [ for i in 0 .. (Array.length data) - 1 do 
                  let (date, value) = (data.[i], data2.[i])
                  if isbetween date std edd && isValidTime date then
                      yield (date, value)
                  else
                      () ]

    match sac with 
    | 0 -> data |> Seq.averageBy (fun (_, value) -> value)
    | 2 -> data.Length
    | _ -> data |> Seq.sumBy (fun (_, value) -> value)

:

let aggrF (data:float[]) (data2:float[]) std edd pob sac =
    let isValidTime = match pob with
                        | "Peak" -> (fun x -> ispeak x)
                        | "Offpeak" -> (fun x -> not(ispeak x))
                        | _ -> (fun _ -> true)

    let endDate = edd + 1.0

    let rec aggr i sum count =
        if i >= (Array.length data) || data.[i] >= endDate then
            match sac with 
            | 0 -> sum / float(count)
            | 2 -> float(count)
            | _ -> float(sum)
        else if data.[i] >= std && isValidTime data.[i] then
            aggr (i + 1) (sum + data2.[i]) (count + 1)
        else
            aggr (i + 1) sum count

    aggr 0 0.0 0
+5

Array List Seq 3-4 . , - Array.zip , Array.filter.

, , ( ). , Array.zip Array.map, , , ( ).

let aggrF (data:float[]) (data2:float[]) std edd pob sac =
    let newd =
        Array.zip data data2 
        |> Array.filter (fun (date, _) -> 
            let dateInRange = isbetween date std edd
            match pob with
            | "Peak" -> ispeak date && dateInRange
            | "Offpeak" -> not(ispeak date) && dateInRange
            | _ -> dateInRange)
    match sac with 
    | 0 -> newd |> Array.averageBy (fun (_, value) -> value)
    | 2 -> newd |> Array.sumBy (fun (_, value) -> 1.0)
    | _ -> newd |> Array.sumBy (fun (_, value) -> value)

isbetween - , inline, :

let inline isbetween r std edd = r >= std && r < edd+1.

, ( F # Interactive):

#time 
let d1 = Array.init 1000000 float
let d2 = Array.init 1000000 float
aggrF d1 d2 0.0 1000000.0 "Test" 0

~ 600 , 160 200 . 520 .

, BlueMountain Capital, / F #, . , , BlueMountain GitHub. ( , ):

let ts = Series(times, values)
ts.[std .. edd] |> Series.filter (fun k _ -> not (ispeak k)) |> Series.mean

, , : -).

+7
source

All Articles