The fastest / most efficient way to compare two Javascript string arrays

Hi, I was wondering if anyone could offer any advice on the fastest / most efficient way to compose two string arrays in javascript.

I am developing a type of cloud type tag based on user input - input in the form of a written piece of text, such as a blog article or the like.

Therefore, I have an array in which I save the words so as not to include - is, a, etc, etc.

I am currently doing the following:

Remove all punctuation from the input line, mark it, compare each word with the exclude array and delete all duplicates.

The comparison is performed by looping over each element in the exclude array for each word in the input text - this is like brute force and the failure of the Internet browser on arrays of more than a few hundred words.

I should also mention that my list of exceptions has about 300 items.

Any help would be really appreciated.

thank

+5
source share
5 answers

I'm not sure about the whole approach, but instead of building a huge array and then sorting through it, why not put the "keys" in the "how" object to simplify the comparison?

eg.

var excludes = {};//object
//set keys into the "map"
excludes['bad'] = true;
excludes['words'] = true;
excludes['exclude'] = true;
excludes['all'] = true;
excludes['these'] = true;

Then when you want to compare ... just do

var wordsToTest = ['these','are','all','my','words','to','check','for'];
var checkWord;
for(var i=0;i<wordsToTest.length;i++){
  checkWord = wordsToTest[i];
  if(excludes[checkword]){
    //bad word, ignore...
  } else {
    //good word... do something with it
  }
}

allows these words through ['are','my','to','check','for']

+5
source

, . , , .

+2

( , JS , , Google ;]). af booleans, . .

0

scunliffe :

var excludes = ['bad','words','exclude','all','these']; //array

, , :

Array.prototype.hasValue= function(value) {
  for (var i=0; i<this.length; i++)
      if (this[i] === value) return true; 
  return false;
}

:

var wordsToTest = ['these','are','all','my','words','to','check','for'];
var checkWord;
for(var i=0; i< wordsToTest.length; i++){
  checkWord = wordsToTest[i];
  if( excludes.hasValue(checkWord) ){
    //is bad word
  } else {
    //is good word
    console.log( checkWord );
  }
}

:

['are','my','to','check','for']
0

text = 'This is a text that contains the words to delete. It has some <b>HTML</b> code in it, and punctuation!';
deleteWords = ['is', 'a', 'that', 'the', 'to', 'this', 'it', 'in', 'and', 'has'];

// clear punctuation and HTML code
onlyWordsReg = /\<[^>]*\>|\W/g;
onlyWordsText = text.replace(onlyWordsReg, ' ');

reg = new RegExp('\\b' + deleteWords.join('\\b|\\b') + '\\b', 'ig');
cleanText = onlyWordsText .replace(reg, '');

// tokenize after this
0

All Articles