Parsing multiple named sets of other named sets

Question

Parsing multiple named sets of other named sets

So, I want to write ... well ... not such a simple parser with boost :: spirit :: qi. I know the basic fundamentals of raising my spirit by meeting him for the first time in the last couple of hours.

Basically I need to parse this:

# comment # other comment set "Myset A" { figure "AF 1" { i 0 0 0 i 1 2 5 i 1 1 1 f 3.1 45.11 5.3 i 3 1 5 f 1.1 2.33 5.166 } figure "AF 2" { i 25 5 1 i 3 1 3 } } # comment set "Myset B" { figure "BF 1" { f 23.1 4.3 5.11 } } set "Myset C" { include "Myset A" # includes all figures from Myset A figure "CF" { i 1 1 1 f 3.11 5.33 3 } }

In it:

 struct int_point { int x, y, z; }; struct float_point { float x, y, z; }; struct figure { string name; vector<int_point> int_points; vector<float_point> float_points; }; struct figure_set { string name; vector<figure> figures }; vector<figure_set> figure_sets; // fill with the data of the input

Now, it’s obvious that someone will write this for me, it will be too much, but can you give some tips on what to read and how to structure the grammar and parsers for this task?

And also ... it is possible that boost :: spirit is not the best library that I could use for a task. If so, which one?

EDIT: Here, where I have come so far. But I'm still not sure how to do this: http://liveworkspace.org/code/212c31dfc0b6fbdf6c462d8d931c0e9f

I can read one number, but I have no idea how to parse a set of numbers.

+6

c ++ parsing boost-spirit boost-spirit-qi

Borislav Stanimirov Oct 13 '12 at 17:00

source share

1 answer

sehe · Accepted Answer · 2012-10-13T23:45:44+0000

Here is my look at him

I believe that a rule that would be a blocker for you would be

 figure = eps >> "figure" >> name [ at_c<0>(_val) = _1 ] >> '{' >> *( ipoints [ push_back(at_c<1>(_val), _1) ] | fpoints [ push_back(at_c<2>(_val), _1) ] ) >> '}';

This is actually a symptom of you parsing the mixed strings i and f into separate containers.

Below is an alternative.

Here is my full code: test.cpp

 //#define BOOST_SPIRIT_DEBUG // before including Spirit #define BOOST_SPIRIT_USE_PHOENIX_V3 #include <boost/fusion/adapted.hpp> #include <boost/spirit/include/qi.hpp> #include <boost/spirit/include/karma.hpp> #include <boost/spirit/include/phoenix.hpp> #include <boost/spirit/include/phoenix_fusion.hpp> #include <fstream> namespace Format { struct int_point { int x, y, z; }; struct float_point { float x, y, z; }; struct figure { std::string name; std::vector<int_point> int_points; std::vector<float_point> float_points; friend std::ostream& operator<<(std::ostream& os, figure const& o); }; struct figure_set { std::string name; std::set<std::string> includes; std::vector<figure> figures; friend std::ostream& operator<<(std::ostream& os, figure_set const& o); }; typedef std::vector<figure_set> file_data; } BOOST_FUSION_ADAPT_STRUCT(Format::int_point, (int, x)(int, y)(int, z)) BOOST_FUSION_ADAPT_STRUCT(Format::float_point, (float, x)(float, y)(float, z)) BOOST_FUSION_ADAPT_STRUCT(Format::figure, (std::string, name) (std::vector<Format::int_point>, int_points) (std::vector<Format::float_point>, float_points)) BOOST_FUSION_ADAPT_STRUCT(Format::figure_set, (std::string, name) (std::set<std::string>, includes) (std::vector<Format::figure>, figures)) namespace Format { std::ostream& operator<<(std::ostream& os, figure const& o) { using namespace boost::spirit::karma; return os << format_delimited( "\n figure" << no_delimit [ '"' << string << '"' ] << "\n {" << *("\ni" << int_ << int_ << int_) << *("\nf" << float_ << float_ << float_) << "\n }" , ' ', o); } std::ostream& operator<<(std::ostream& os, figure_set const& o) { using namespace boost::spirit::karma; return os << format_delimited( "\nset" << no_delimit [ '"' << string << '"' ] << "\n{" << *("\n include " << no_delimit [ '"' << string << '"' ]) << *stream << "\n}" , ' ', o); } } namespace /*anon*/ { namespace phx=boost::phoenix; namespace qi =boost::spirit::qi; template <typename Iterator> struct skipper : public qi::grammar<Iterator> { skipper() : skipper::base_type(start, "skipper") { using namespace qi; comment = '#' >> *(char_ - eol) >> (eol|eoi); start = comment | qi::space; BOOST_SPIRIT_DEBUG_NODE(start); BOOST_SPIRIT_DEBUG_NODE(comment); } private: qi::rule<Iterator> start, comment; }; template <typename Iterator> struct parser : public qi::grammar<Iterator, Format::file_data(), skipper<Iterator> > { parser() : parser::base_type(start, "parser") { using namespace qi; using phx::push_back; using phx::at_c; name = eps >> lexeme [ '"' >> *~char_('"') >> '"' ]; include = eps >> "include" >> name; ipoints = eps >> "i" >> int_ >> int_ >> int_; fpoints = eps >> "f" >> float_ >> float_ >> float_; figure = eps >> "figure" >> name [ at_c<0>(_val) = _1 ] >> '{' >> *( ipoints [ push_back(at_c<1>(_val), _1) ] | fpoints [ push_back(at_c<2>(_val), _1) ] ) >> '}'; set = eps >> "set" >> name >> '{' >> *include >> *figure >> '}'; start = *set; } private: qi::rule<Iterator, std::string() , skipper<Iterator> > name, include; qi::rule<Iterator, Format::int_point() , skipper<Iterator> > ipoints; qi::rule<Iterator, Format::float_point(), skipper<Iterator> > fpoints; qi::rule<Iterator, Format::figure() , skipper<Iterator> > figure; qi::rule<Iterator, Format::figure_set() , skipper<Iterator> > set; qi::rule<Iterator, Format::file_data() , skipper<Iterator> > start; }; } namespace Parser { bool parsefile(const std::string& spec, Format::file_data& data) { std::ifstream in(spec.c_str()); in.unsetf(std::ios::skipws); std::string v; v.reserve(4096); v.insert(v.end(), std::istreambuf_iterator<char>(in.rdbuf()), std::istreambuf_iterator<char>()); if (!in) return false; typedef char const * iterator_type; iterator_type first = &v[0]; iterator_type last = first+v.size(); try { parser<iterator_type> p; skipper<iterator_type> s; bool r = qi::phrase_parse(first, last, p, s, data); r = r && (first == last); if (!r) std::cerr << spec << ": parsing failed at: \"" << std::string(first, last) << "\"\n"; return r; } catch (const qi::expectation_failure<char const *>& e) { std::cerr << "FIXME: expected " << e.what_ << ", got '" << std::string(e.first, e.last) << "'" << std::endl; return false; } } } int main() { Format::file_data data; bool ok = Parser::parsefile("input.txt", data); std::cerr << "Parse " << (ok?"success":"failed") << std::endl; std::cout << "# figure sets exported automatically by karma\n\n"; for (auto& set : data) std::cout << set; }

It displays the analyzed data as a check: output.txt

 Parse success # figure sets exported automatically by karma set "Myset A" { figure "AF 1" { i 0 0 0 i 1 2 5 i 1 1 1 i 3 1 5 f 3.1 45.11 5.3 f 1.1 2.33 5.166 } figure "AF 2" { i 25 5 1 i 3 1 3 } } set "Myset B" { figure "BF 1" { f 23.1 4.3 5.11 } } set "Myset C" { include "Myset A" figure "CF" { i 1 1 1 f 3.11 5.33 3.0 } }

You will notice that

the order of the dotted lines changes (all int_points precede all float_points )
minor numbers are also added, for example. in the last line of 3.0 instead of 3 , to show that the type is if float.
you "forgot" (?) to include in your question

Alternative

There is something that keeps the actual dotted strings in the original order:

 typedef boost::variant<int_point, float_point> if_point; struct figure { std::string name; std::vector<if_point> if_points; }

Now the rules are simple:

 name = eps >> lexeme [ '"' >> *~char_('"') >> '"' ]; include = eps >> "include" >> name; ipoints = eps >> "i" >> int_ >> int_ >> int_; fpoints = eps >> "f" >> float_ >> float_ >> float_; figure = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}'; set = eps >> "set" >> name >> '{' >> *include >> *figure >> '}'; start = *set;

Pay attention to elegance in

 figure = eps >> "figure" >> name >> '{' >> *(ipoints | fpoints) >> '}';

And the output remains in the exact order of input: output.txt

Once again, the full demo code (github only): test.cpp

Bonus Update

Finally, I made my first correct Karma grammar to output the results:

 name = no_delimit ['"' << string << '"']; include = "include" << name; ipoints = "\ni" << int_ << int_ << int_; fpoints = "\nf" << float_ << float_ << float_; figure = "figure" << name << "\n {" << *(ipoints | fpoints) << "\n }"; set = "set" << name << "\n{" << *("\n " << include) << *("\n " << figure) << "\n}"; start = "# figure sets exported automatically by karma\n\n" << set % eol;

It was significantly more convenient than I expected. Check it out in the latest version of the fully updated gist : test.hpp

Parsing multiple named sets of other named sets

Below is an alternative.

Alternative

Bonus Update

More articles: