Write a simple parser.
Solving this problem with regex is incorrect. This is the same as the corresponding brackets - it's hard to do with a regular expression. Regular expressions are not suitable for nested expressions in general.
Try something like this:
var parts = src.split(/(\{\{|\}\})/); for (var i in parts) { if (parts[i] == '{{') // starting new (sub) template else if (parts[i] == '}}') // ending (sub) template else // content (or outside) }
This is just pseudo code, as I am in a hurry now, will update this code to work ...
UPDATE (August 9, 2011)
var NO_TPL = 0, // outside any tpl - ignoring... IN_TPL = 1, // inside tpl IN_LIST = 3; // inside list of arguments function parseWiki(src) { var tokens = src.split(/(\{\{|\}\}|\||=|\[\[|\]\])/), i = -1, end = tokens.length - 1, token, next, state = NO_TPL, work = [], workChain = [], stateChain = []; function trim(value) { return value.replace(/^\s*/, '').replace(/\s*$/, ''); } // get next non empty token function getNext(next) { while (!next && i < end) next = trim(tokens[++i]); return next; } // go into tpl / list of arguments function goDown(newState, newWork, newWorkKey) { stateChain.push(state); workChain.push(work); if (newWorkKey) { work[newWorkKey] = newWork; } else { work.push(newWork); } work = newWork; state = newState; } // jump up from tpl / list of arguments function goUp() { work = workChain.pop(); state = stateChain.pop(); } // state machine while ((token = getNext())) { switch(state) { case IN_TPL: switch(token) { case '}}': goUp(); break; case '|': break; default: next = getNext(); if (next != '=') throw "invalid"; next = getNext(); if (next == '[[') { goDown(IN_LIST, [], token); } else if (next == '{{') { goDown(IN_TPL, {id: getNext()}, token); } else { work[token] = next; } } break; case IN_LIST: switch(token) { case ']]': goUp(); break; case '|': break; default: work.push(token); } break; case NO_TPL: if (token == '{{') { next = getNext(); goDown(IN_TPL, {id: next}); } break; } } return work; }
TEST UNITS
describe('wikiTpl', function() { it('should do empty tpl', function() { expect(parseWiki('{{name}}')) .toEqual([{id: 'name'}]); }); it('should ignore text outside from tpl', function() { expect(parseWiki(' abc {{name}} x y')) .toEqual([{id: 'name'}]); }); it('should do simple param', function() { expect(parseWiki('{{tpl | p1= 2}}')) .toEqual([{id: 'tpl', p1: '2'}]); }); it('should do list of arguments', function() { expect(parseWiki('{{name | a= [[1|two]]}}')) .toEqual([{id: 'name', a: ['1', 'two']}]); }); it('should do param after list', function() { expect(parseWiki('{{name | a= [[1|two|3]] | p2= true}}')) .toEqual([{id: 'name', a: ['1', 'two', '3'], p2: 'true'}]); }); it('should do more tpls', function() { expect(parseWiki('{{first | a= [[1|two|3]] }} odd test {{second | b= 2}}')) .toEqual([{id: 'first', a: ['1', 'two', '3']}, {id: 'second', b: '2'}]); }); it('should allow nested tpl', function() { expect(parseWiki('{{name | a= {{nested | p1= 1}} }}')) .toEqual([{id: 'name', a: {id: 'nested', p1: '1'}}]); }); });
Note. I use Jasmine syntax for these unit tests. You can easily run it using AngularJS, which contains the entire test environment - check it out at http://angularjs.org