The fastest way to compare a string with an array of strings

I have an array, say:

   var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
   var myString = "I went to the park with my garmin watch";

What is a quick way to check if my string has any of the words in myArray?

Bellow is my code, but I'm not sure if this is the best way ...

   function score(arKeywords, frase) {
      if (frase == undefined) {
        return 0;
      } else {
          var indice = 0;
          var indArray = arKeywords.length;
          var sentencaMin = frase.toLowerCase();
          for (i = 0; i < indArray; i++) {
              if (sentencaMin.search(arKeywords[i]) > 0) { indice++; }
          }
          return indice;
      }
  }

Please help me. This function will be launched in LOT lines!

Thanks everyone :)

+4
source share
7 answers

Based on this proposal, from the question:

What is the [a] way to check if my string has any words in myArray?

( My emphasis .)

, , "" . – – , , :

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"],
  myString = "I went to the park with my garmin watch";

function anyInArray(needles, haystack) {

  // we split the supplied string ("needles") into words by splitting
  // the string at the occurrence of a word-boundary ('\b') followed
  // one or more ('+') occurrences of white-space ('\s') followed by
  // another word-boundary:
  return needles.split(/\b\s+\b/)
    // we then use Array.prototype.some() to work on the array of
    // words, to assess whether any/some of the words ('needle') 
    // - using an Arrow function - are present in the supplied
    // array ('haystack'), in which case Array.prototype.indexOf()
    // would return the index of the found-word, or -1 if that word
    // is not found:
    .some(needle => haystack.indexOf(needle) > -1);
    // at which point we return the Boolean, true if some of the
    // words were found, false if none of the words were found.
}

console.log(anyInArray(myString, myArray));

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"],
  myString = "I went to the park with my garmin watch";

function anyInArray(needles, haystack) {
  return needles.split(/\b\s+\b/).some(needle => haystack.indexOf(needle) > -1);
}

console.log(anyInArray(myString, myArray));

JS Fiddle demo.

:

+3

, - MyArray?

myArray myString - . FizzyTea.

, String.includes() Array.some():

 var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
 var myString = "I went to the park with my garmin watch";

 console.log(myArray.some(e => myString.includes(e)));

. https://jsfiddle.net/usq9zs61/5/

100000 Chrome 48/Firefox 46, Ubuntu:

  • compiledregextest (FizzyTea): 16.046/21.84
  • someincludes ( ): 76.707ms/62.55ms
  • compiledregexmatch (FizzyTea): 104.682/170.58
  • someset ( Bergi): 488.474/749.46
  • splitregexsome ( ): 529.529/677.20
  • filterset ( Bergi): 742.857ms/875.86ms
  • ahocorasick (ordi): 1790.654/1642.19

Aho-Corasick, orid, Javascript, myArray .

+3

RegExp:

var re = RegExp('\\b' + myArray.join('\\b|\\b') + '\\b', gi);
var i, matches;
for(i=0; i<lotsOfStrings.length; i+=1){
    // note that this retrieves the total number
    // of matches, not unique matches, which may
    // not be what you want
    matches = lotsOfStrings[i].match(re);
    // do something with matches
}

, RegExp .

:

var re = RegExp('\\b' + myArray.join('\\b|\\b') + '\\b', gi);
var i, matched;
for(i=0; i<lotsOfStrings.length; i+=1){
    matched = re.test(lotsOfStrings[i]);
    // do something with matched
}
+1

: https://jsbin.com/fiqegu/1/edit?js,console

var result = myString.split(' ').filter(function(word) {
  return myArray.indexOf(word) > -1;
});

, , .length :

var result = myString.split(' ').filter(function(word) {
  return myArray.indexOf(word) > -1;
}).length;
0

, , .

My regexp \b , park , spark.

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
var myString = "I went to the park with my garmin watch";


function score(arKeywords, frase) {
  if (frase == undefined) {
    return 0;
  } else {
    var re = new RegExp('\\b(' + arKeywords.join('|') + ')\\b', 'i');
    return !!frase.match(re);
  }
}

console.log(score(myArray, myString));
0

| , , :

function score(myArray, text) {
  var regex = new RegExp('\\b(' + myArray.join('|') + ')\\b', 'gi');
  var matches = text.match(regex);
  return matches ? matches.length : 0;
}

:

var myArray = ["ibira", "garmin", "hide", "park", "parque", "corrida", "trote", "personal", "sports", "esportes", "health", "saúde", "academia"];
var myString = "I went to the park with my garmin watch";

score(myArray, myString); // 2
score(myArray, 'Ibira is doing sports in the Park'); // 3

, myArray .

0

The most effective solution to this problem is probably the Aho-Corasick algorithm , which performs a search in O (row search size) after creating the original DAG from the list of rows in O (the sum of the row sizes in the list).

0
source

All Articles