Parser parser lex-> qi: getting the "unregistered" on_success mechanism to work

edit . I pulled out a lexer because it does not fully integrate with Qi and just obfuscates grammar (see here ).


on_success poorly documented and I'm trying to connect it to my parser. The examples related to on_success apply only to parsers built on qi --ie, No lex .

This is how I try to present the construct:

 using namespace qi::labels; qi::on_success(event_entry_,std::cout << _val << _1); 

But it will not compile. I am afraid the lex problem. Can someone tell me what I'm doing wrong, and secondly, tell me that all placeholders are available, there is the type and what they represent (since they are not documented).

The full file is as follows:

 #include <boost/spirit/include/phoenix_core.hpp> #include <boost/spirit/home/phoenix/bind/bind_member_variable.hpp> #include <boost/spirit/include/lex_lexertl.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/none.hpp> #include <boost/cstdint.hpp> #include <boost/fusion/include/adapt_struct.hpp> #include <string> #include <exception> #include <vector> namespace lex = boost::spirit::lex; namespace px = boost::phoenix; namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; template <typename Lexer> struct tokens : lex::lexer<Lexer> { tokens() : left_curly("\"{\""), right_curly("\"}\""), left_paren("\"(\""), right_paren("\")\""), colon(":"), scolon(";"), namespace_("(?i:namespace)"), event("(?i:event)"), optional("(?i:optional)"), required("(?i:required)"), repeated("(?i:repeated)"), t_int_4("(?i:int4)"), t_int_8("(?i:int8)"), t_string("(?i:string)"), ordinal("\\d+"), identifier("\\w+") { using boost::spirit::lex::_val; this->self = left_curly [ std::cout << px::val("lpar") << std::endl] | right_curly [ std::cout << px::val("rpar") << std::endl] | left_paren | right_paren | colon [ std::cout << px::val("colon") << std::endl] | scolon | namespace_ [ std::cout << px::val("kw namesapce") << std::endl] | event [ std::cout << px::val("kw event") << std::endl] | optional [ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl] | required [ std::cout << px::val("required") << std::endl] | repeated | t_int_4 | t_int_8 | t_string | ordinal [ std::cout << px::val("val ordinal (") << _val << ")" << std::endl] | identifier [std::cout << px::val("val identifier(") << _val << ")" << std::endl]; this->self("WS") = lex::token_def<>("[ \\t\\n]+"); } lex::token_def<lex::omit> left_curly, right_curly, colon, scolon,repeated, left_paren, right_paren; lex::token_def<lex::omit> namespace_, event, optional, required,t_int_4, t_int_8, t_string; lex::token_def<boost::uint32_t> ordinal; lex::token_def<> identifier; }; enum event_entry_qualifier { ENTRY_OPTIONAL, ENTRY_REQUIRED, ENTRY_REPEATED }; enum entry_type { RBL_INT4, RBL_INT8, RBL_STRING, RBL_EVENT }; struct oid { boost::uint32_t ordinal; std::string name; }; BOOST_FUSION_ADAPT_STRUCT ( oid, (boost::uint32_t, ordinal) (std::string, name) ) struct type_descriptor { entry_type type_id; std::string referenced_event; }; BOOST_FUSION_ADAPT_STRUCT ( type_descriptor, (entry_type, type_id) (std::string, referenced_event) ) struct event_entry { event_entry_qualifier qualifier; oid identifier; type_descriptor descriptor; }; BOOST_FUSION_ADAPT_STRUCT ( event_entry, (event_entry_qualifier, qualifier) (oid, identifier) (type_descriptor, descriptor) ) struct event_descriptor { oid identifier; std::vector<event_entry> event_entries; }; BOOST_FUSION_ADAPT_STRUCT ( event_descriptor, (oid, identifier) (std::vector<event_entry>, event_entries) ) template <typename Iterator, typename Lexer> struct grammar : qi::grammar<Iterator,event_descriptor(), qi::in_state_skipper<Lexer> > { template <typename TokenDef> grammar(TokenDef const& tok) : grammar::base_type(event_descriptor_) { using qi::_val; //start = event; event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly; event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon; event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL] | tok.required [ _val = ENTRY_REQUIRED] | tok.repeated [ _val = ENTRY_REPEATED]; oid_ = tok.ordinal >> tok.colon >> tok.identifier; type_descriptor_ = (( atomic_type >> qi::attr("")) | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren)); atomic_type = tok.t_int_4 [ _val = RBL_INT4] | tok.t_int_8 [ _val = RBL_INT8] | tok.t_string [ _val = RBL_STRING]; event_type = tok.event [_val = RBL_EVENT]; using namespace qi::labels; qi::on_success(event_entry_,std::cout << _val << _1); } qi::rule<Iterator> start; qi::rule<Iterator, event_descriptor(), qi::in_state_skipper<Lexer> > event_descriptor_; qi::rule<Iterator, event_entry(), qi::in_state_skipper<Lexer> > event_entry_; qi::rule<Iterator, event_entry_qualifier()> event_qualifier; qi::rule<Iterator, entry_type()> atomic_type; qi::rule<Iterator, entry_type()> event_type; qi::rule<Iterator, type_descriptor(),qi::in_state_skipper<Lexer> > type_descriptor_; qi::rule<Iterator, oid()> oid_; }; std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 123:hassan int4; } "; int main() { typedef lex::lexertl::token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; typedef lex::lexertl::actor_lexer<token_type> lexer_type; typedef tokens<lexer_type>::iterator_type iterator_type; tokens<lexer_type> token_lexer; grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer); std::string::iterator it = test.begin(); iterator_type first = token_lexer.begin(it, test.end()); iterator_type last = token_lexer.end(); bool r; r = qi::phrase_parse(first, last, grammar, qi::in_state("WS")[token_lexer.self]); if(r) ; else { std::cout << "parsing failed" << std::endl; } } 
+5
c ++ parsing lexer boost-spirit
source share
1 answer

Looking at the header files, I think the meaning of the placeholders is:

 _1 = Iterator position when the rule was tried. _2 = Iterator to the end of the input. _3 = Iterator position right after the rule has been successfully matched. 

(Since I'm not sure if the lines above are clear, here is a small example with your input)

  rule being tried _________________________________ ยด ` [EVENT][1][:][sihan][{][OPTIONAL][123][:][hassan][int4][;][OPTIONAL][321][:][hassan2][int4][;][}] _1 _3 _2 

As GManNickG mentions in the comments, these are lexer iterators, and you cannot easily get the source string with them. conjure2 example combines the use of a lexer and on_error / on_success . For this, a special token, position_token . This token always has access to a pair of iterators of the source string associated with itself (a normal token loses this information when using lex::omit ). position_token has some interesting methods. matched() returns iterator_range<OriginalIterator> , and begin() and end() returns the corresponding iterators.

In the code below, I decided to create a phoenix::function that takes two lexer iterators (called with _1 and _3) and returns a string that covers the distance between them (using std::string(begin_iter->begin(), end_iter->begin()) ).

One of the problems that I discovered was that the fact that the space was in a different state caused the iterators returned by position_token invalid. What I did to solve this was put everything in the same state, and then just use lex::_pass = lex::pass_flags::pass_ignore with a space.

The last (minor) problem is that if you want to use std::cout << _val , you need to define operator<< for the types you are interested in.

PS: I always use BOOST_SPIRIT_USE_PHOENIX_V3, and this requires every spirit / phoenix to be included from boost/spirit/include/... If for any reason you need / need to use V2, you will need to change the phoenix :: function. I am also unable to use the old style for the loop, so if you cannot use C ++ 11, you have to change the definition of the <<operator for event_descriptor.


 #define BOOST_SPIRIT_USE_PHOENIX_V3 // #define BOOST_SPIRIT_DEBUG #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/phoenix_core.hpp> #include <boost/spirit/include/phoenix_bind.hpp> //CHANGED #include <boost/spirit/include/lex_lexertl.hpp> #include <boost/spirit/include/lex_lexertl_position_token.hpp> //ADDED #include <boost/none.hpp> #include <boost/cstdint.hpp> #include <boost/fusion/include/adapt_struct.hpp> #include <string> #include <exception> #include <vector> namespace lex = boost::spirit::lex; namespace px = boost::phoenix; namespace qi = boost::spirit::qi; namespace ascii = boost::spirit::ascii; template <typename Lexer> struct tokens : lex::lexer<Lexer> { tokens() : left_curly("\"{\""), right_curly("\"}\""), left_paren("\"(\""), right_paren("\")\""), colon(":"), scolon(";"), namespace_("(?i:namespace)"), event("(?i:event)"), optional("(?i:optional)"), required("(?i:required)"), repeated("(?i:repeated)"), t_int_4("(?i:int4)"), t_int_8("(?i:int8)"), t_string("(?i:string)"), ordinal("\\d+"), identifier("\\w+") { using boost::spirit::lex::_val; this->self = left_curly //[ std::cout << px::val("lpar") << std::endl] | right_curly //[ std::cout << px::val("rpar") << std::endl] | left_paren | right_paren | colon //[ std::cout << px::val("colon") << std::endl] | scolon | namespace_ // [ std::cout << px::val("kw namesapce") << std::endl] | event // [ std::cout << px::val("kw event") << std::endl] | optional //[ std::cout << px::val("optional ") << "-->" << _val << "<--" << std::endl] | required //[ std::cout << px::val("required") << std::endl] | repeated | t_int_4 | t_int_8 | t_string | ordinal //[ std::cout << px::val("val ordinal (") << _val << ")" << std::endl] | identifier //[std::cout << px::val("val identifier(") << _val << ")" << std::endl] | lex::token_def<>("[ \\t\\n]+") [lex::_pass = lex::pass_flags::pass_ignore] //CHANGED ; } lex::token_def<lex::omit> left_curly, right_curly, left_paren, right_paren, colon, scolon; lex::token_def<lex::omit> namespace_, event, optional, required, repeated, t_int_4, t_int_8, t_string; lex::token_def<boost::uint32_t> ordinal; lex::token_def<> identifier; }; enum event_entry_qualifier { ENTRY_OPTIONAL, ENTRY_REQUIRED, ENTRY_REPEATED }; enum entry_type { RBL_INT4, RBL_INT8, RBL_STRING, RBL_EVENT }; struct oid { boost::uint32_t ordinal; std::string name; }; BOOST_FUSION_ADAPT_STRUCT ( oid, (boost::uint32_t, ordinal) (std::string, name) ) std::ostream& operator<<(std::ostream& os, const oid& val) //ADDED { return os << val.ordinal << "-" << val.name; } struct type_descriptor { entry_type type_id; std::string referenced_event; }; BOOST_FUSION_ADAPT_STRUCT ( type_descriptor, (entry_type, type_id) (std::string, referenced_event) ) std::ostream& operator<<(std::ostream& os, const type_descriptor& val) //ADDED { return os << val.type_id << "-" << val.referenced_event; } struct event_entry { event_entry_qualifier qualifier; oid identifier; type_descriptor descriptor; }; BOOST_FUSION_ADAPT_STRUCT ( event_entry, (event_entry_qualifier, qualifier) (oid, identifier) (type_descriptor, descriptor) ) std::ostream& operator<<(std::ostream& os, const event_entry& val) //ADDED { return os << val.qualifier << "-" << val.identifier << "-" << val.descriptor; } struct event_descriptor { oid identifier; std::vector<event_entry> event_entries; }; BOOST_FUSION_ADAPT_STRUCT ( event_descriptor, (oid, identifier) (std::vector<event_entry>, event_entries) ) std::ostream& operator<<(std::ostream& os, const event_descriptor& val) //ADDED { os << val.identifier << "["; for(const auto& entry: val.event_entries) //C++11 os << entry; os << "]"; return os; } struct build_string_impl //ADDED { template <typename Sig> struct result; template <typename This, typename Iter1, typename Iter2> struct result<This(Iter1,Iter2)> { typedef std::string type; }; template <typename Iter1, typename Iter2> std::string operator()(Iter1 begin, Iter2 end) const { return std::string(begin->begin(),end->begin()); } }; px::function<build_string_impl> build_string; template <typename Iterator, typename Lexer> struct grammar : qi::grammar<Iterator,event_descriptor() > { template <typename TokenDef> grammar(TokenDef const& tok) : grammar::base_type(event_descriptor_) { using qi::_val; //start = event; event_descriptor_ = tok.event >> oid_ >> tok.left_curly >> *(event_entry_) >> tok.right_curly; event_entry_ = event_qualifier >> oid_ >> type_descriptor_ >> tok.scolon; event_qualifier = tok.optional [ _val = ENTRY_OPTIONAL] | tok.required [ _val = ENTRY_REQUIRED] | tok.repeated [ _val = ENTRY_REPEATED]; oid_ = tok.ordinal >> tok.colon >> tok.identifier; type_descriptor_ = (( atomic_type >> qi::attr("")) | ( event_type >> tok.left_paren >> tok.identifier >> tok.right_paren)); atomic_type = tok.t_int_4 [ _val = RBL_INT4] | tok.t_int_8 [ _val = RBL_INT8] | tok.t_string [ _val = RBL_STRING]; event_type = tok.event [_val = RBL_EVENT]; using namespace qi::labels; qi::on_success(event_entry_,std::cout << _val << " " << build_string(_1,_3) << std::endl); //CHANGED // BOOST_SPIRIT_DEBUG_NODES( (event_descriptor_)(event_entry_)(event_qualifier)(oid_)(type_descriptor_)(atomic_type)(event_type) ); } qi::rule<Iterator> start; qi::rule<Iterator, event_descriptor()> event_descriptor_; qi::rule<Iterator, event_entry()> event_entry_; qi::rule<Iterator, event_entry_qualifier()> event_qualifier; qi::rule<Iterator, entry_type()> atomic_type; qi::rule<Iterator, entry_type()> event_type; qi::rule<Iterator, type_descriptor()> type_descriptor_; qi::rule<Iterator, oid()> oid_; }; std::string test = " EVENT 1:sihan { OPTIONAL 123:hassan int4; OPTIONAL 321:hassan2 int4; } "; int main() { typedef lex::lexertl::position_token<std::string::iterator, boost::mpl::vector<boost::uint32_t, std::string> > token_type; //CHANGED typedef lex::lexertl::actor_lexer<token_type> lexer_type; typedef tokens<lexer_type>::iterator_type iterator_type; tokens<lexer_type> token_lexer; grammar<iterator_type,tokens<lexer_type>::lexer_def> grammar(token_lexer); std::string::iterator it = test.begin(); iterator_type first = token_lexer.begin(it, test.end()); iterator_type last = token_lexer.end(); bool r; r = qi::parse(first, last, grammar); //CHANGED if(r) ; else { std::cout << "parsing failed" << std::endl; } } 
+4
source share

All Articles