Regex matches MediaWiki template and its parameters

I am writing simple Javascript to add a specific parameter to a specific template in an article that is currently being edited.

Wikipedia templates are structured in the following format:

{{Template name|unnamed parameter|named parameter=some value|another parameter=[[target article|article name]]|parameter={{another template|another tamplate parameter}}}} 

One template can also be above several lines, for example:

 {{Template |name=John |surname=Smith |pob=[[London|London, UK]] }} 

For more information, please view http://en.wikipedia.org/wiki/Help:Template

So, firstly, I would like to fit the whole pattern. I came to a partial solution, that is:

 document.editform.wpTextbox1.value.match(/\{\{template name((.|\n)*?)\}\}$/gmis) 

However, the problem is that it matches only texts from the first brackets to the closing brackets of the first nested template (first example).

In addition, I would like to get its parameters in the form of an array. Therefore, for the result, I would like to get an array with parameters in a specific order. Array (pob parameter value, parameter name value, parameter name value, pod parameter value (in this case, empty because it was canceled))

I would use this to clear custom formatting in some articles and add some new options.

Thanks!

+4
source share
1 answer

Write a simple parser.

Solving this problem with regex is incorrect. This is the same as the corresponding brackets - it's hard to do with a regular expression. Regular expressions are not suitable for nested expressions in general.

Try something like this:

 var parts = src.split(/(\{\{|\}\})/); for (var i in parts) { if (parts[i] == '{{') // starting new (sub) template else if (parts[i] == '}}') // ending (sub) template else // content (or outside) } 

This is just pseudo code, as I am in a hurry now, will update this code to work ...

UPDATE (August 9, 2011)

 var NO_TPL = 0, // outside any tpl - ignoring... IN_TPL = 1, // inside tpl IN_LIST = 3; // inside list of arguments function parseWiki(src) { var tokens = src.split(/(\{\{|\}\}|\||=|\[\[|\]\])/), i = -1, end = tokens.length - 1, token, next, state = NO_TPL, work = [], workChain = [], stateChain = []; function trim(value) { return value.replace(/^\s*/, '').replace(/\s*$/, ''); } // get next non empty token function getNext(next) { while (!next && i < end) next = trim(tokens[++i]); return next; } // go into tpl / list of arguments function goDown(newState, newWork, newWorkKey) { stateChain.push(state); workChain.push(work); if (newWorkKey) { work[newWorkKey] = newWork; } else { work.push(newWork); } work = newWork; state = newState; } // jump up from tpl / list of arguments function goUp() { work = workChain.pop(); state = stateChain.pop(); } // state machine while ((token = getNext())) { switch(state) { case IN_TPL: switch(token) { case '}}': goUp(); break; case '|': break; default: next = getNext(); if (next != '=') throw "invalid"; next = getNext(); if (next == '[[') { goDown(IN_LIST, [], token); } else if (next == '{{') { goDown(IN_TPL, {id: getNext()}, token); } else { work[token] = next; } } break; case IN_LIST: switch(token) { case ']]': goUp(); break; case '|': break; default: work.push(token); } break; case NO_TPL: if (token == '{{') { next = getNext(); goDown(IN_TPL, {id: next}); } break; } } return work; } 

TEST UNITS

 describe('wikiTpl', function() { it('should do empty tpl', function() { expect(parseWiki('{{name}}')) .toEqual([{id: 'name'}]); }); it('should ignore text outside from tpl', function() { expect(parseWiki(' abc {{name}} x y')) .toEqual([{id: 'name'}]); }); it('should do simple param', function() { expect(parseWiki('{{tpl | p1= 2}}')) .toEqual([{id: 'tpl', p1: '2'}]); }); it('should do list of arguments', function() { expect(parseWiki('{{name | a= [[1|two]]}}')) .toEqual([{id: 'name', a: ['1', 'two']}]); }); it('should do param after list', function() { expect(parseWiki('{{name | a= [[1|two|3]] | p2= true}}')) .toEqual([{id: 'name', a: ['1', 'two', '3'], p2: 'true'}]); }); it('should do more tpls', function() { expect(parseWiki('{{first | a= [[1|two|3]] }} odd test {{second | b= 2}}')) .toEqual([{id: 'first', a: ['1', 'two', '3']}, {id: 'second', b: '2'}]); }); it('should allow nested tpl', function() { expect(parseWiki('{{name | a= {{nested | p1= 1}} }}')) .toEqual([{id: 'name', a: {id: 'nested', p1: '1'}}]); }); }); 

Note. I use Jasmine syntax for these unit tests. You can easily run it using AngularJS, which contains the entire test environment - check it out at http://angularjs.org

+7
source

All Articles