Find consecutive substring indices

Given the search string and the result string (which is guaranteed to contain all the letters of the search string, not case sensitive, in order), how can I most efficiently get an array of ranges representing the indices in the result string corresponding to the letters in the search string?

Required Conclusion:

substrings( "word", "Microsoft Office Word 2007" )
#=> [ 17..20 ]

substrings( "word", "Network Setup Wizard" )
#=> [ 3..5, 19..19 ]
#=> [ 3..4, 18..19 ]   # Alternative, acceptable, less-desirable output

substrings( "word", "Watch Network Daemon" )
#=> [ 0..0, 10..11, 14..14 ]

This is for the autocomplete search box. Here's a screen shot of a tool similar to Quicksilver that highlights the letters I'm looking for. Please note that - unlike my ideal conclusion above - this screenshot does not prefer longer single matches.
Screenshot of Colibri underlining letters in search results

Test results

, @tokland regex , StringScanner, :

               user     system      total        real
phrogz1    0.889000   0.062000   0.951000 (  0.944000)
phrogz2    0.920000   0.047000   0.967000 (  0.977000)
tokland    1.030000   0.000000   1.030000 (  1.035000)

:

a=["Microsoft Office Word 2007","Network Setup Wizard","Watch Network Daemon"]
b=["FooBar","Foo Bar","For the Love of Big Cars"]
test = { a=>%w[ w wo wor word ], b=>%w[ f fo foo foobar fb fbr ] }
require 'benchmark'
Benchmark.bmbm do |x|
  %w[ phrogz1 phrogz2 tokland ].each{ |method|
    x.report(method){ test.each{ |words,terms|
      words.each{ |master| terms.each{ |term|
        2000.times{ send(method,term,master) }
      } }
    } }
  }
end
+5
5

, ?

>> s = "word"
>> re = /#{s.chars.map{|c| "(#{c})" }.join(".*?")}/i # /(w).*?(o).*?(r).*?(d)/i/
>> match = "Watch Network Daemon".match(re)
=> #<MatchData "Watch Network D" 1:"W" 2:"o" 3:"r" 4:"D">
>> 1.upto(s.length).map { |idx| match.begin(idx) }
=> [0, 10, 11, 14]

( , , ).

+3

Ruby Abbrev . , , :

require 'abbrev'
require 'pp'

abbr = Abbrev::abbrev(['ruby'])
>> {"rub"=>"ruby", "ru"=>"ruby", "r"=>"ruby", "ruby"=>"ruby"}

, . , .

.

, , :

regexps = Regexp.union(
  abbr.keys.sort.reverse.map{ |k|
    Regexp.new(
      Regexp.escape(k),
      Regexp::IGNORECASE
    )
  }
)

, , , ?, * ., , , .

:

/(?i-mx:ruby)|(?i-mx:rub)|(?i-mx:ru)|(?i-mx:r)/

Regexp match .

union "ORs", , . , .

, .


: . , , :

require 'abbrev'
require 'pp'

abbr = Abbrev::abbrev(['ruby'])
regexps = Regexp.union( abbr.keys.sort.reverse.map{ |k| Regexp.new( Regexp.escape(k), Regexp::IGNORECASE ) } )

target_str ='Ruby rocks, rub-a-dub-dub, RU there?'
str_offset = 0
offsets = []
loop do
  match_results = regexps.match(target_str, str_offset)
  break if (match_results.nil?)
  s, e = match_results.offset(0)
  offsets << [s, e - s]
  str_offset = 1 + s
end

pp offsets

>> [[0, 4], [5, 1], [12, 3], [27, 2], [33, 1]]

, offsets << [s, e - s] offsets << [s .. e], :

>> [[0..4], [5..6], [12..15], [27..29], [33..34]]
+2

, , .

def substrings( search_str, result_str )
  search_chars = search_str.downcase.chars
  next_char = search_chars.shift
  result_str.downcase.each_char.with_index.take_while.with_object([]) do |(c,i),a|
    if next_char == c
      (a.empty? || i != a.last.last+1) ? a << (i..i) : a[-1]=(a.last.first..i)
      next_char = search_chars.shift
    end   
    next_char
  end
end

substrings( "word", "Microsoft Office Word 2007" ) #=> [17..20]
substrings( "word", "Network Setup Wizard" )       #=> [3..5, 19..19]
substrings( "word", "Watch Network Daemon" )       #=> [0..0, 10..11, 14..14]

              user     system      total        real
phrogz1   1.120000   0.000000   1.120000 (  1.123083)
cary      0.550000   0.000000   0.550000 (  0.550728)
+2

, - , , , - , , . , , , @tokland.

0

:

require 'strscan'
def substrings( search, master )
  [].tap do |ranges|
    scan = StringScanner.new(master)
    init = nil
    last = nil
    prev = nil
    search.chars.map do |c|
      return nil unless scan.scan_until /#{c}/i
      last = scan.pos-1
      if !init || (last-prev) > 1
        ranges << (init..prev) if init
        init = last
      end
      prev = last
    end
    ranges << (init..last)
  end
end

, ( @tokland):

require 'strscan'
def substrings( search, master )
  s = StringScanner.new(master)
  search.chars.map do |c|
    return nil unless s.scan_until(/#{c}/i)
    s.pos - 1
  end.to_ranges
end

class Array
  def to_ranges
    return [] if empty?
    [].tap do |ranges|
      init,last = first
      each do |o|
        if last && o != last.succ
          ranges << (init..last)
          init = o
        end
        last = o
      end
      ranges << (init..last)
    end
  end
end
0

All Articles