Splitting a string only when the delimiter is not enclosed in quotation marks

I need to write a split function in JavaScript that breaks a string into an array with a comma ... but the comma should not be enclosed in quotation marks ( ' and " ).

Here are three examples and how the result (array) should be:

 "peanut, butter, jelly" -> ["peanut", "butter", "jelly"] "peanut, 'butter, bread', 'jelly'" -> ["peanut", "butter, bread", "jelly"] 'peanut, "butter, bread", "jelly"' -> ["peanut", 'butter, bread', "jelly"] 

The reason I cannot use the JavaScript split method is because it also breaks when the delimiter is enclosed in quotation marks.

How to do this, maybe with regular expression?


As for the context, I will use it to separate the arguments passed from the third element of the third argument passed to the function you are creating, with the jQuery $.expr[':'] extension. Usually, the name assigned to this parameter is called meta , which is an array containing specific information about this filter.

In any case, the third element of this array is a string containing the parameters that are passed with the filter; and since the parameters are in string format, I should be able to split them correctly for parsing.

+4
source share
4 answers

What you are asking for is essentially a Javascript CSV parser. Do a Google search on the "Javascript CSV Parser" and you will get many hits, many of which have a complete script. See Also Javascript for Parsing CSV Data

+3
source
 var str = 'text, foo, "haha, dude", bar'; var fragments = str.match(/[az]+|(['"]).*?\1/g); 

Even better (supports strings " or ' inside strings):

 var str = 'text_123 space, foo, "text, here\", dude", bar, \'one, two\', blob'; var fragments = str.match(/[^"', ][^"',]+[^"', ]|(["'])(?:[^\1\\\\]|\\\\.)*\1/g); // Result: 0: text_123 space 1: foo 2: "text, here\", dude" 3: bar 4: 'one, two' 5: blob 
+1
source

Well, I already have a jackhammer of a written solution (the general code is written for something else), so just for kicks.,.

 function Lexer () { this.setIndex = false; this.useNew = false; for (var i = 0; i < arguments.length; ++i) { var arg = arguments [i]; if (arg === Lexer.USE_NEW) { this.useNew = true; } else if (arg === Lexer.SET_INDEX) { this.setIndex = Lexer.DEFAULT_INDEX; } else if (arg instanceof Lexer.SET_INDEX) { this.setIndex = arg.indexProp; } } this.rules = []; this.errorLexeme = null; } Lexer.NULL_LEXEME = {}; Lexer.ERROR_LEXEME = { toString: function () { return "[object Lexer.ERROR_LEXEME]"; } }; Lexer.DEFAULT_INDEX = "index"; Lexer.USE_NEW = {}; Lexer.SET_INDEX = function (indexProp) { if ( !(this instanceof arguments.callee)) { return new arguments.callee.apply (this, arguments); } if (indexProp === undefined) { indexProp = Lexer.DEFAULT_INDEX; } this.indexProp = indexProp; }; (function () { var New = (function () { var fs = []; return function () { var f = fs [arguments.length]; if (f) { return f.apply (this, arguments); } var argStrs = []; for (var i = 0; i < arguments.length; ++i) { argStrs.push ("a[" + i + "]"); } f = new Function ("var a=arguments;return new this(" + argStrs.join () + ");"); if (arguments.length < 100) { fs [arguments.length] = f; } return f.apply (this, arguments); }; }) (); var flagMap = [ ["global", "g"] , ["ignoreCase", "i"] , ["multiline", "m"] , ["sticky", "y"] ]; function getFlags (regex) { var flags = ""; for (var i = 0; i < flagMap.length; ++i) { if (regex [flagMap [i] [0]]) { flags += flagMap [i] [1]; } } return flags; } function not (x) { return function (y) { return x !== y; }; } function Rule (regex, lexeme) { if (!regex.global) { var flags = "g" + getFlags (regex); regex = new RegExp (regex.source, flags); } this.regex = regex; this.lexeme = lexeme; } Lexer.prototype = { constructor: Lexer , addRule: function (regex, lexeme) { var rule = new Rule (regex, lexeme); this.rules.push (rule); } , setErrorLexeme: function (lexeme) { this.errorLexeme = lexeme; } , runLexeme: function (lexeme, exec) { if (typeof lexeme !== "function") { return lexeme; } var args = exec.concat (exec.index, exec.input); if (this.useNew) { return New.apply (lexeme, args); } return lexeme.apply (null, args); } , lex: function (str) { var index = 0; var lexemes = []; if (this.setIndex) { lexemes.push = function () { for (var i = 0; i < arguments.length; ++i) { if (arguments [i]) { arguments [i] [this.setIndex] = index; } } return Array.prototype.push.apply (this, arguments); }; } while (index < str.length) { var bestExec = null; var bestRule = null; for (var i = 0; i < this.rules.length; ++i) { var rule = this.rules [i]; rule.regex.lastIndex = index; var exec = rule.regex.exec (str); if (exec) { var doUpdate = !bestExec || (exec.index < bestExec.index) || (exec.index === bestExec.index && exec [0].length > bestExec [0].length) ; if (doUpdate) { bestExec = exec; bestRule = rule; } } } if (!bestExec) { if (this.errorLexeme) { lexemes.push (this.errorLexeme); return lexemes.filter (not (Lexer.NULL_LEXEME)); } ++index; } else { if (this.errorLexeme && index !== bestExec.index) { lexemes.push (this.errorLexeme); } var lexeme = this.runLexeme (bestRule.lexeme, bestExec); lexemes.push (lexeme); } index = bestRule.regex.lastIndex; } return lexemes.filter (not (Lexer.NULL_LEXEME)); } }; }) (); if (!Array.prototype.filter) { Array.prototype.filter = function (fun) { var len = this.length >>> 0; var res = []; var thisp = arguments [1]; for (var i = 0; i < len; ++i) { if (i in this) { var val = this [i]; if (fun.call (thisp, val, i, this)) { res.push (val); } } } return res; }; } 

Now, to use the code for your problem:

 function trim (str) { str = str.replace (/^\s+/, ""); str = str.replace (/\s+$/, ""); return str; } var splitter = new Lexer (); splitter.setErrorLexeme (Lexer.ERROR_LEXEME); splitter.addRule (/[^,"]*"[^"]*"[^,"]*/g, trim); splitter.addRule (/[^,']*'[^']*'[^,']*/g, trim); splitter.addRule (/[^,"']+/g, trim); splitter.addRule (/,/g, Lexer.NULL_LEXEME); var strs = [ "peanut, butter, jelly" , "peanut, 'butter, bread', 'jelly'" , 'peanut, "butter, bread", "jelly"' ]; // NOTE: I'm lazy here, so I'm using Array.prototype.map, // which isn't supported in all browsers. var splitStrs = strs.map (function (str) { return splitter.lex (str); }); 
+1
source

If you can control the input to ensure that the string is enclosed in double quotes " and that all elements containing the string are enclosed in single quotation marks ' and that no element can CONTAIN a single -quote, then you can split by , ' If you CANNOT control the input, then using a regular expression to sort / filter / split the input will be about as useful as using a regular expression to match with xhtml (see RegEx match open tags except self-tuning XHTM tags L )

-one
source

All Articles