Ruby Look Array of hash Performance

I am currently facing this question For example, I have this hash array

data = [
  {:id => 1,:start_date => "2015-01-02",:end_date => "2015-01-05"},
  {:id => 2,:start_date => "2015-01-06",:end_date => "2015-01-07"},
  {:id => 3,:start_date => "2015-01-10",:end_date => "2015-01-20"}
]

So I want to find the exact hash that has β€œ2015-01-04” in the range above the start_date and end_date hashes

Follow the document I found out, there are 3 ways to do it

1) Use select

finding_hash = data.select {|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}

find_hash will return an array of the necessary hashes. But as I do this, I assure that there will always be only one hash matching the condition after that SELECT I have finding_hash.firstto get the hash I want

2) Use find

finding_hash = data.find{|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}

This execution method, find_hash IS is the hash result I need

3) Traditional loop

data.each do |t|
  if (t[:start_date] <= "2015-01-04" && t[:end_date] >= "2015-01-04")
    return t
    break
  end
end

So which one is the fastest way to do this. I need performance because my data is pretty big!

Thank you and sorry for my bad english!

+4
4

benchmark

:

require 'benchmark'

n = 1000000

data = [
  {:id => 1,:start_date => "2015-01-02",:end_date => "2015-01-05"},
  {:id => 2,:start_date => "2015-01-06",:end_date => "2015-01-07"},
  {:id => 3,:start_date => "2015-01-10",:end_date => "2015-01-20"}
]


Benchmark.bm do |x|

x.report { n.times do
   data.select {|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
   end
}

x.report { n.times do
 data.find{|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
  end

 }

x.report {
n.times do
   finding_hash = {}
   data.each do |t|
     if (t[:start_date] <= "2015-01-04" && t[:end_date] >= "2015-01-04")
       finding_hash = t
       break
     end
    end
end
}

end

:

       user     system      total        real
   1.490000   0.020000   1.510000 (  1.533589)
   1.070000   0.010000   1.080000 (  1.096578)
   1.000000   0.010000   1.010000 (  1.011021)

n .

+2

, , - Enumerable, Array . find_index. , , 20% :

index = data.find_index {|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
x = data[index]

:

n = 1_000_000

data = [
  {:id => 1,:start_date => "2015-01-02",:end_date => "2015-01-05"},
  {:id => 2,:start_date => "2015-01-06",:end_date => "2015-01-07"},
  {:id => 3,:start_date => "2015-01-10",:end_date => "2015-01-20"}
]

Benchmark.bm do |x|
  x.report 'Enumerable#select' do
    n.times do
      data.select do |h|
        h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"
      end
    end
  end

  x.report 'Enumerable#detect' do
    n.times do
      data.detect do |h|
        h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"
      end
    end
  end

  x.report 'Enumerable#each  ' do
    n.times do
      finding_hash = {}
      data.each do |t|
        if (t[:start_date] <= "2015-01-04" && t[:end_date] >= "2015-01-04")
          finding_hash = t
          break t
        end
      end
    end
  end

  x.report 'Array#find_index ' do
    n.times do
       index = data.find_index {|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
       x = data[index]
    end
  end
end

:

Enumerable#select  1.000000   0.010000   1.010000 (  1.002282)
Enumerable#detect  0.790000   0.000000   0.790000 (  0.797319)
Enumerable#each    0.620000   0.000000   0.620000 (  0.627272)
Array#find_index   0.520000   0.000000   0.520000 (  0.515691)
+2

v3 :

def v1
  @data.select {|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
end

def v2
  @data.find{|h| h[:start_date] <= "2015-01-04" && h[:end_date] >= "2015-01-04"}
end

def v3
  @data.each do |t|
    if (t[:start_date] <= "2015-01-04" && t[:end_date] >= "2015-01-04")
      return t
      break
    end
  end
end

select , . , find , v3. , .

find v3 . .

t = Time.now; 10000.times{ v1 }; Time.now - t
=> 0.014131

t = Time.now; 10000.times{ v2 }; Time.now - t
=> 0.013138

t = Time.now; 10000.times{ v3 }; Time.now - t
=> 0.008799

- , .

If the real data is too large, you can run it on a subset of the data to get a better answer.

By the way, you can rewrite v3 as:

data.each do |t|
  break t if (t[:start_date] <= "2015-01-04" && t[:end_date] >= "2015-01-04")
end

FWIW, working on an array will be very cumbersome and slow. You can save it to the database and run the query. For a large dataset, this is likely to be at least 2 orders of magnitude faster.

+1
source

All of these options are O (n) complexity. If your ranges do not overlap, you can use an bsearcharray, which is O (log n) complexity. You must sort the ranges first.

sorted = data.sort_by { |x| x[:start_date] }
sorted.bsearch { |x| ..check if range of `x` includes value.. }
+1
source

All Articles