Valid RegEx according to online testing tools, not getting any matches when reading a file in a browser

I developed this regex with several online tools and with the help of the community:

https://regex101.com/r/hJ4pD5/1

(\s[AZ]\.).+?(?=(\s[AZ]\.)|(\W?(Answer:)\W?)) 

The goal is to extract all alternatives to the subject. According to regexr and regex101, this is the correct Javascript regular expression that works well with test data (pastebin) :

 1. Question goes here: A. Answer one B. Answer two C. Answer three D. Not indented Answer Answer: B is correct 

Expected matches should be:

"Answer. Answer", "Answer to two," "Answer three," "D. Do not back down from the answer."

But when I implement it in the code, it does not work very well, no matches were found.

(try it with pastebin data)

 /** * Created by Schwusch on 01/08/2016. */ $(document).ready(start); var questionsRaw; var questionsFormatted = []; var questionIndex = 0; function readSingleFile(e) { var file = e.target.files[0]; if (!file) { return; } var reader = new FileReader(); reader.onload = function(e) { var contents = e.target.result; displayContents(contents); }; reader.readAsText(file); } /* REGEX MAGIC -------------------------------------------------*/ function displayContents(contents) { questionsRaw = contents.split('---'); $.each(questionsRaw, function(index, question ) { var answer = question.split("Answer:")[1]; var splittedQuestion = question.split("A.")[0]; var alternatives = question.match(/(\s[AZ]\.).+?(?=(\s[AZ]\.)|(\W?(Answer:)\W?))/g); questionsFormatted.push({ question: splittedQuestion, alternatives: alternatives, answer: answer }); }); /* END REGEX MAGIC -------------------------------------------------*/ var element = document.getElementById('file-content'); element.innerHTML = questionsFormatted[questionIndex].question; for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) { $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>'); } } function start() { document.getElementById('file-input') .addEventListener('change', readSingleFile, false); $(window).keydown(function(e) { e = e || event; switch(e.keyCode) { case 37: // left previousQuestion(); return false; case 38: // up showQuestion(); return false; case 39: // right nextQuestion(); return false; case 40: // down showAnswer(); return false; } }); $(document).on('change', ':file', function() { var input = $(this), numFiles = input.get(0).files ? input.get(0).files.length : 1, label = input.val().replace(/\\/g, '/').replace(/.*\//, ''); input.trigger('fileselect', [numFiles, label]); }); $(':file').on('fileselect', function(event, numFiles, label) { var element = document.getElementById('filechoose'); element.innerHTML = label; }); } function showAnswer() { var element = document.getElementById('file-content'); element.innerHTML = questionsFormatted[questionIndex].answer; } function showQuestion() { var element = document.getElementById('file-content'); element.innerHTML = questionsFormatted[questionIndex].question; } function nextQuestion() { if (questionIndex < questionsFormatted.length - 1) questionIndex++ ; else questionIndex = 0; var element = document.getElementById('file-content'); element.innerHTML = questionsFormatted[questionIndex].question; $( ".list-group-item" ).remove(); for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) { $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>'); } } function previousQuestion() { if (questionIndex > 0) questionIndex-- ; else questionIndex = questionsFormatted.length - 1; var element = document.getElementById('file-content'); element.innerHTML = questionsFormatted[questionIndex].question; $( ".list-group-item" ).remove(); for (var i = 0; i < questionsFormatted[questionIndex].alternatives.length ; i++) { $('#alternatives').append('<button type="button" class="list-group-item">' + questionsFormatted[questionIndex].alternatives[i] + '</button>'); } } 
 <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <title>Question tool</title> <script src="https://code.jquery.com/jquery-3.1.0.js" integrity="sha256-slogkvB1K3VOkzAI8QITxV3VzpOnkeNVsKvtkYLMjfk=" crossorigin="anonymous"></script> <!-- Latest compiled and minified CSS --> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap.min.css" integrity="sha384-BVYiiSIFeK1dGmJRAkycuHAHRg32OmUcww7on3RYdg4Va+PmSTsz/K68vbdEjh4u" crossorigin="anonymous"> <!-- Optional theme --> <link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/bootstrap/3.3.7/css/bootstrap-theme.min.css" integrity="sha384-rHyoN1iRsVXV4nD0JutlnGaslCJuC7uwjduW9SVrLvRYooPp2bWYgmgJQIXwl/Sp" crossorigin="anonymous"> <script src="script.js"></script> <style> /* Move down content */ body { padding-top: 20px; padding-bottom: 20px; } </style> </head> <body> <div> <div class="container"> <div class="jumbotron"> <h3>Question Tool</h3> <label class="btn btn-default btn-file" id="filechoose"> Choose File <input type="file" id="file-input" style="display: none;"/> </label> <div class="btn-group btn-group-justified" role="group" aria-label="..."> <div class="btn-group" role="group"> <button type="button" class="btn btn-lg btn-primary" onclick="showAnswer()" role="button"> <span class="glyphicon glyphicon-arrow-down" aria-hidden="true"></span>Show Answer </button> </div> <div class="btn-group" role="group"> <button type="button" class="btn btn-lg btn-success" onclick="showQuestion()" role="button"> <span class="glyphicon glyphicon-arrow-up" aria-hidden="true"></span>Show Question </button> </div> <div class="btn-group" role="group"> <button type="button" class="btn btn-lg btn-danger" onclick="previousQuestion()" role="button"> <span class="glyphicon glyphicon-arrow-left" aria-hidden="true"></span>Previous Question </button> </div> <div class="btn-group" role="group"> <button type="button" class="btn btn-lg btn-info" onclick="nextQuestion()" role="button"> <span class="glyphicon glyphicon-arrow-right" aria-hidden="true"></span>Next Question </button> </div> </div> <div id="file-content" class="well"></div> <div id="alternatives" class="list-group"> </div> </div> </div> </div> </body> </html> 

Why does it work in online testers, but not in a browser?

+5
source share
1 answer

The reason it doesn't work for you is because, unlike the text you used during the tests on regex101.com, the download file uses \r\n as a new line, not just \n .

Add to this that the default metacharacter . doesn’t match \r , and that JavaScript does not support the s modifier, which can change this behavior, you will get less or no match.

More specifically: in the regular expression part .+? will stop character matching when it encounters \r . At first he does this because he looks ahead and finds that he can match \r with \s or \W , but the next \n doesn’t match either [AZ] or A from Answer: So he returns and trying to continue working with the part .+? but this also fails because \r cannot compare with the one described above. Thus, the matching process starts again at the beginning of the regular expression to find the potential next match. And it fails again and again for the same reasons.

To fix this, change two things:

  • Add + after \s in the middle, so it will not only match \n , but also the previous \r .

  • Change \W? on \W* , so it can also match the previous \r .

This should work:

 /(\s[AZ]\.).+?(?=(\s+[AZ]\.)|(\W*(Answer:)\W?))/g 

Although this solves it, I would also suggest simplifying this regex:

 /\s[AZ]\..+?(?=\s+[AZ]\.|\W*Answer:)/g 

Remarkably, what \W? in the end it makes little sense: it either matches \W or not, and in both cases you accept it.

+5
source

All Articles