I am trying to use lexer states to do context specific parsing, but it seems that different lexer states do cross-pollinate. Here is a very basic example
#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>
#include <boost/spirit/include/phoenix_operator.hpp>
#include <boost/spirit/include/phoenix_container.hpp>
#include <iostream>
#include <string>
using namespace boost::spirit;
template <typename Lexer>
struct strip_comments_tokens : lex::lexer<Lexer>
{
strip_comments_tokens()
: strip_comments_tokens::base_type(lex::match_flags::match_default)
{
ccomment = "\\/\\*";
endcomment = ".*\\*\\/";
hello = "hello";
this->self.add
(ccomment)
(hello);
this->self("COMMENT").add
(endcomment);
}
lex::token_def<> ccomment, endcomment;
lex::token_def<std::string> hello;
};
template <typename Iterator>
struct strip_comments_grammar : qi::grammar<Iterator>
{
template <typename TokenDef>
strip_comments_grammar(TokenDef const& tok)
: strip_comments_grammar::base_type(start)
{
start = *( tok.ccomment
>> qi::in_state("COMMENT")
[
tok.endcomment
]
| tok.hello [ std::cout << _1 ]
);
}
qi::rule<Iterator> start;
};
int main(int argc, char* argv[])
{
typedef std::string::iterator base_iterator_type;
typedef
lex::lexertl::lexer<lex::lexertl::token<base_iterator_type> >
lexer_type;
typedef strip_comments_tokens<lexer_type>::iterator_type iterator_type;
strip_comments_tokens<lexer_type> strip_comments; // Our lexer
strip_comments_grammar<iterator_type> g (strip_comments); // Our parser
std::string str("hello/*hello*/hello");
base_iterator_type first = str.begin();
bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g);
return 0;
}
I would expect the input
"hello/*hello*/hello"
to be tokenized as hello ccomment endcomment hello. But what happens is the input gets tokenized as hello ccomment hello, so the grammar stops working. If you change the input to
"hello/*anything else*/hello"
everything works as expected.
Any ideas?
You never modify the state of the lexer. So it's always in the
"INITIAL"state.Setting the lexer state should be done in the Lexer stage (there's no reliable way to feedback from the parser stage, in my experience and after much experimentation).
So you need to upgrade to
actor_lexerand attach semantic actions to the token_defs added to the lexer tables:And
That said, I suppose it's much easier to just skip the tokens altogether. If you really want to know how to use Lexer states for skipping see:
I'd suggest the Simplify And Profit approach using
lex::_pass = lex::pass_flags::pass_ignorethough:Here's my take:
Live On Coliru