Generating Spirit parser expressions from a variational list of alternative parser expressions

I am looking for the simplest way to implement a variational function that takes a list of rules boost :: spirit :: qi and extends the list into a format expression: rule1 | Rule 2 | rule3 | .... Suppose that rules do not synthesize an attribute. Your kind help is greatly appreciated.

#include <boost/config/warning_disable.hpp> #include <boost/spirit/include/qi.hpp> #include <string> #include <iostream> #include <boost/spirit/include/phoenix_operator.hpp> namespace qi = boost::spirit::qi; namespace ph = boost::phoenix; namespace ascii = boost::spirit::ascii; using boost::spirit::qi::phrase_parse; using boost::spirit::qi::ascii::space; using boost::spirit::iso8859_1::char_; typedef qi::rule<std::string::const_iterator,ascii::space_type> mrule_t; typedef qi::rule< std::string::const_iterator,std::string() > wrule_t; 

 //How to deduce expandBitwise() return type ? template<typename T> T expandBitwise(T& t) { return t.rule_; } template<typename T,typename ...Tail> T expandBitwise(T& t,Tail& ...tail) { return t.rule_ | expandBitwise(tail...); } struct TStruct { mrule_t rule_; template<typename T,typename R> TStruct( T& rVar,const std::string&name, R& rule ) : rule_( qi::lit( name ) >> rule[ ph::ref( rVar )=qi::_1 ] ) {} }; template<typename T,typename ...Tail> void mparse(const std::string& line,T& t,Tail& ...tail) { std::string::const_iterator f,l; f=line.begin(); l=line.end(); 

  // I would like to expand the rules here ... //if(phrase_parse(f,l,expandBitwise(t,tail...),space ) && f==l) if( phrase_parse(f, l, t.rule_, space ) && f==l ) std::cout<<"Parsed:"<<line<<std::endl; else std::cout<<"Syntax error:"<<line<<std::endl; } int main() { wrule_t rword=+~space; std::string par1,par2,par3,par4; TStruct r1( par1,"-a", rword ); TStruct r2( par2,"-b", rword ); TStruct r3( par3,"-c", rword ); TStruct r4( par4,"-d", rword ); mparse("abc 8.81" ,r1,r2,r3,r4); mparse("-a atoken" ,r1,r2,r3,r4); mparse("-b btoken" ,r1,r2,r3,r4); mparse("-c ctoken" ,r1,r2,r3,r4); mparse("-d dtoken" ,r1,r2,r3,r4); return 0; } 
+4
source share
2 answers

Thanks for the quick tip! I just tried your code and if I do something wrong ... I get this output: Syntax error:abc 8.81 Parsed:-a atoken Syntax error:-b btoken Syntax error:-c ctoken Syntax error:-d dtoken - G. Civardi 2 hours ago

Ok, so I could not leave it alone: ​​/

It turns out that Undefined Behavior was involved, due to the way the parser expressions were transferred to expandBitwise and copied: Boost Proto expression patterns were not intended to be copied since they might contain references to time periods that expandBitwise at the end of their full expression.

For more information, see Zero to 60 mph in 2 seconds!

After a long (long) setup time with rule_.alias() and boost::proto::deepcopy I reached the following solution (which, by the way, does not need an auxiliary function at all):

 template<typename ...Tail> void mparse(const std::string& line,Tail& ...tail) { auto parser = boost::fusion::fold( boost::tie(ph::bind(&TStruct::rule_, arg1)(tail)...), qi::eps(false), deepcopy_(arg2 | arg1) ); auto f=begin(line), l=end(line); if( qi::phrase_parse(f, l, parser, ascii::space ) ) std::cout << "Parsed:" << line << std::endl; else std::cout << "Syntax error:" << line << std::endl; if (f!=l) std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n"; } 

UB protection is a call to deepcopy_() , which is a trivial polymorphic called adapter for boost::proto::deepcopy :

 struct DeepCopy { template<typename E> struct result { typedef typename boost::proto::result_of::deep_copy<E>::type type; }; template<typename E> typename result<E>::type operator()(E const& expr) const { return boost::proto::deep_copy(expr); } }; static const ph::function<DeepCopy> deepcopy_; 

With this code, and now, the output will be:

 Syntax error:abc 8.81 Remaining unparsed: 'abc 8.81' Parsed:-a atoken Parsed:-b btoken Parsed:-c ctoken Parsed:-d dtoken Bye 

As a bonus, the code now allows you to use the debugging functions (built-in) for Spirit (uncomment this line):

 <-d> <try>abc 8.81</try> <fail/> </-d> <-c> <try>abc 8.81</try> <fail/> </-c> <-b> <try>abc 8.81</try> <fail/> </-b> <-a> <try>abc 8.81</try> <fail/> </-a> Syntax error:abc 8.81 Remaining unparsed: 'abc 8.81' 

Tested with

  • Boost 1_54_0
  • GCC 4.7.2, 4.8.x, Clang 3.2
  • Note the #define , which are significant.

FULL CODE

 #define BOOST_RESULT_OF_USE_DECLTYPE #define BOOST_SPIRIT_USE_PHOENIX_V3 #include <boost/fusion/adapted/boost_tuple.hpp> #include <boost/fusion/include/fold.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix.hpp> namespace qi = boost::spirit::qi; namespace ph = boost::phoenix; namespace ascii = boost::spirit::ascii; using namespace ph::arg_names; typedef qi::rule<std::string::const_iterator,ascii::space_type> mrule_t; typedef qi::rule<std::string::const_iterator,std::string() > wrule_t; struct TStruct { mrule_t rule_; template<typename T,typename R> TStruct( T& rVar,const std::string&name, R& rule ) : rule_( qi::lit(name) >> rule[ ph::ref(rVar) = qi::_1 ] ) { rule_.name(name); // debug(rule_); } }; struct DeepCopy { template<typename E> struct result { typedef typename boost::proto::result_of::deep_copy<E>::type type; }; template<typename E> typename result<E>::type operator()(E const& expr) const { return boost::proto::deep_copy(expr); } }; static const ph::function<DeepCopy> deepcopy_; template<typename ...Tail> void mparse(const std::string& line,Tail& ...tail) { auto parser = boost::fusion::fold( boost::tie(ph::bind(&TStruct::rule_, arg1)(tail)...), qi::eps(false), deepcopy_(arg2 | arg1) ); auto f=begin(line), l=end(line); if( qi::phrase_parse(f, l, parser, ascii::space ) ) std::cout << "Parsed:" << line << std::endl; else std::cout << "Syntax error:" << line << std::endl; if (f!=l) std::cout << "Remaining unparsed: '" << std::string(f,l) << "'\n"; } int main() { wrule_t rword=+~ascii::space; std::string par1,par2,par3,par4; TStruct r1( par1, "-a", rword ); TStruct r2( par2, "-b", rword ); TStruct r3( par3, "-c", rword ); TStruct r4( par4, "-d", rword ); mparse("abc 8.81" ,r1,r2,r3,r4); mparse("-a atoken" ,r1,r2,r3,r4); mparse("-b btoken" ,r1,r2,r3,r4); mparse("-c ctoken" ,r1,r2,r3,r4); mparse("-d dtoken" ,r1,r2,r3,r4); std::cout << "Bye\n"; } 
+4
source

You accidentally returned the TStruct type from the expandBitwise . Correct it like this:

 template<typename T> auto expandBitwise(T const& t) -> decltype(t.rule_) { return t.rule_; } template<typename T,typename ...Tail> auto expandBitwise(T const& t,Tail const&... tail) -> decltype(t.rule_) { return t.rule_ | expandBitwise(tail...); } 

If you want to expand attributes, the inference rules of the return type become more active. Basically, what you are doing is repeating a part of the EDSL of the Spirit.


Let them exchange stories ...

Clippy: Looks like you're trying to write a command line parser. Do you want to help with this?

The DSL mechanism for your parameter parser can be implemented more systematically by creating a new proto-domain and actually creating terminals. I would have liked it somehow now.

Alternatively, you can take it completely from a different angle using Nabialek Trick.This could be the approach that I played just a few weeks ago, and I will share with you the project that I came up with: https://gist.github.com/sehe/ 2a556a8231606406fe36 # file-test-cpp

An important role is that the grammar is “fixed”:

 start = -argument % '\0'; unparsed = as_string [ +~nul ] [ std::cerr << phx::val("ignoring unparsed argument: '") << _1 << "'\n" ]; argument = ('-' >> +shortopt) | ("--" >> longopt) >> -unparsed | unparsed; 

Trick in:

 shortopt = shortNames [_a = _1] >> lazy(_a); longopt = longNames [_a = _1] >> lazy(_a); 

Where shortNames and longNames are qi::symbols parser tables built dynamically based on the variational list of CliOptions and CliFlags (I pass them as a tuple because I wanted to save the result inside the CliOption structure).

qi::lazy(_a) calls the parser that was stored in the symbol table.

As a bonus, my CliOptions analyzer has a function to generate usage information. Constructors for parsing expressions, as well as usage information, are extensible.

 int main(int argc, char* argv[]) { using CliParsing::make_option; typedef std::string::const_iterator It; auto config = std::make_tuple( make_option('a', "absolutely", "absolutely"), make_option('b', "borked" , "borked") , make_option('c', "completion", "completion"), make_option('d', "debug", "turn on debugging"), make_option('e', "", "no long name") , //make_option('f', "flungeons" , "flungeons") , //make_option('g', "goofing" , "") , //make_option('m', "monitor", "monitoring level"), make_option('t', "testing" , "testing flags"), make_option('\0',"file" , "with a filename (no short name)"), make_option('y', "assume-yes", "always assume yes"), make_option('v', "verbose", "increase verbosity level"), make_option('i', "increment", "stepsize to increment with", 5) ); CliParsing::OptionGrammar<It> parser(config); using namespace phx::arg_names; const auto cmdline = std::accumulate(argv+1, argv+argc, std::string(), arg1 + arg2 + '\0'); bool ok = qi::parse(begin(cmdline), end(cmdline), parser); std::cout << "Parse success " << std::boolalpha << ok << "\n"; std::cout << parser.getUsage(); return ok? 0 : 255; } 

When called with some random arguments -i 3 --completion -t --file=SOME.TXT -b huh? prints:

 short form option --increment parsed ignoring unparsed argument: '3' long form switch --completion parsed short form switch --testing parsed long form switch --file parsed ignoring unparsed argument: '=SOME.TXT' short form switch --borked parsed ignoring unparsed argument: 'huh?' Parse success true --absolutely (-a) absolutely (flag) --borked (-b) borked (flag) --completion (-c) completion (flag) --debug (-d) turn on debugging (flag) -e no long name (flag) --testing (-t) testing flags (flag) --file with a filename (no short name) (flag) --assume-yes (-y) always assume yes (flag) --verbose (-v) increase verbosity level (flag) --increment (-i) stepsize to increment with (option with value; default '5') 

As you can see, not all parameters have been implemented yet (in particular, -- to mark the end of the list of options).

+2
source

All Articles