Getting a list of lists without using List <List <string>> in F #

I have this function here:

let ProcessFile (allLines: string list) = let list = new List<List<string>>() let rec SplitFile (input: string list) = if input.Length <> 0 then list.Add(new List<string>(input.TakeWhile(fun x -> x <> ""))) let nextGroup = input.SkipWhile(fun x -> x <> "").SkipWhile(fun x -> x = "") SplitFile (Seq.toList nextGroup) SplitFile allLines |> ignore list 

Given the contents of the file in the form of a list of lines and takes each group, separated by empty lines as a separate list, giving me a list of lists.

My question is, is there a better way to do this with a crop that will give me something like a list of list of strings instead of having to use the new List <List <string →? Because it does not seem to me particularly neat.

+4
source share
4 answers

A more idiomatic solution might be:

 let processFile xs = let rec nonEmpties n = function | [] as xs | ""::xs -> n, xs | _::xs -> nonEmpties (n+1) xs let rec loop xs = seq { match xs with | [] -> () | ""::xs -> yield! loop xs | xs -> let n, ys = nonEmpties 0 xs yield Seq.take n xs yield! loop ys } loop xs 

where the nonEmpties nested function counts how many non-empty elements are at the front of this list and returns both the counter and the tail list after the last non-empty element, and the loop function skips empty elements and displays sequences of non-empty elements.

Some interesting features of this solution:

  • The tail is fully recursive, so it can process arbitrarily long sequences of non-empty lines and sequences of sequences of non-empty lines.

  • Prevents copying by returning to the input list.

On a test input of 1000 sequences of 1000 lines, this solution is 8 times faster than barley, and 50% faster than Thomas.

Here is an even faster solution that starts by converting the input list into an array and then acts on the array indices:

 let processFile xs = let xs = Array.ofSeq xs let rec nonEmpties i = if i=xs.Length || xs.[i]="" then i else nonEmpties (i+1) let rec loop i = seq { if i < xs.Length then if xs.[i] = "" then yield! loop (i+1) else let j = nonEmpties i yield Array.sub xs i (j - i) yield! loop j } loop 0 

In a test input of 1000 sequences of 1000 lines, this solution is 34 times faster than barley, and 6 times faster than Thomas.

+4
source

Your code is readable for me, but using TakeWhile and SkipWhile recursively rather inefficient. Here is a simple functional recursive solution:

 let ProcessFile (allLines: string list) = // Recursively processes 'input' and keeps the list of 'groups' collected // so far. We keep elements of the currently generated group in 'current' let rec SplitFile input groups current = match input with // Current line is empty and there was some previous group // Add the current group to the list of groups and continue with empty current | ""::xs when current <> [] -> SplitFile xs ((List.rev current)::groups) [] // Current line is empty, but there was no previous group - skip & continue | ""::xs -> SplitFile xs groups [] // Current line is non-empty - add it to the current group | x::xs -> SplitFile xs groups (x::current) // We reached the end - add current group if it is not empty | [] when current <> [] -> List.rev ((List.rev current)::groups) | [] -> List.rev groups SplitFile allLines [] [] ProcessFile ["a"; "b"; ""; ""; "c"; ""; "d"] 

Essentially, the same code can be written using seq { ... } as follows. We still need to save the list of current groups using the battery ( current ), but we now return the groups lazily using yield and yield! when we iterate over the inputs:

 let ProcessFile (allLines: string list) = let rec SplitFile input current = seq { match input with | ""::xs when current <> [] -> yield List.rev current yield! SplitFile xs [] | ""::xs -> yield! SplitFile xs [] | x::xs -> yield! SplitFile xs (x::current) | [] when current <> [] -> yield List.rev current | [] -> () } SplitFile allLines [] 
+2
source

Personally, I like one liner:

 let source = ["a"; "b"; ""; ""; "c"; ""; "d"] source // can be any enumerable or seq |> Seq.scan (fun (i, _) e -> if e = "" then (i + 1, e) else (i, e)) (0, "") // add the 'index' |> Seq.filter (fun (_, e) -> e <> "") // remove the empty entries |> Seq.groupBy fst // group by the index |> Seq.map (fun (_, l) -> l |> Seq.map snd |> List.ofSeq) // extract the list only from each group (discard the index) |> List.ofSeq // turn back into a list 

The biggest problem here is that Seq.groupBy will read the entire list in memory, but you do it anyway. There are groupBy implementations that will only consider contiguous entries, and that will be enough, and instead you can enter the file as Seq (for example, using File.ReadLines rather than File.ReadAllLines ).

0
source

How to use a plain old List.fold

 let processFile lines = ([], lines) ||> List.fold(fun acc l -> match acc with | [] when l = "" -> acc // filter empty lines at the start of the file | [] -> [[l]] // start the first group | []::xss when l = "" -> acc // filter continous empty lines | xs::xss when l = "" -> // found an empty line, start a new group let rxs = List.rev xs // reverse the current group before starting a new one []::rxs::xss | xs::xss -> (l::xs)::xss) // continue adding to the current group |> List.rev 
0
source

All Articles