im building a parser with Sedlex and Menhir, where i have a function definition as:
(* Lexer *)
let non_ascii = [%sedlex.regexp? '\160' .. '\255'];
let escape = [%sedlex.regexp?
unicode | ('\\', Compl('\r' | '\n' | '\012' | hex_digit))
];
let ident_start = [%sedlex.regexp?
'_' | 'a' .. 'z' | 'A' .. 'Z' | '$' | non_ascii | escape
];
let ident_char = [%sedlex.regexp?
'_' | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | non_ascii | escape];
let rec get_next_token = buf => {
switch%sedlex (buf) {
| white_space => get_next_token(buf)
| eof => EOF
| ';' => SEMI_COLON
| '}' => RIGHT_BRACE
| '{' => LEFT_BRACE
| ':' => COLON
| '(' => LEFT_PAREN
| ')' => RIGHT_PAREN
| '[' => LEFT_BRACKET
| ']' => RIGHT_BRACKET
| '%' => PERCENTAGE
| '&' => AMPERSAND
| ident => IDENT(Sedlexing.latin1(buf))
| number => get_dimension(Sedlexing.latin1(buf), buf)
| _ => assert(false)
};
}
let parse = (buf, parser) => {
let last_token = ref((Parser.EOF, Lexing.dummy_pos, Lexing.dummy_pos));
let next_token = () => {
last_token := get_next_token_with_location(buf);
last_token^;
};
try(MenhirLib.Convert.Simplified.traditional2revised(parser, next_token)) {
| LexingError(_) as e => raise(e)
| _ => raise(ParseError(last_token^))
};
};
(* Parser *)
%token <string> IDENT
%token LEFT_PAREN
%token RIGHT_PAREN
function_expr:
| i = IDENT; LEFT_PAREN; xs = list(exprs); RIGHT_PAREN {
Texp_function (
(i, Lex_buffer.make_loc $startpos(i) $endpos(i)),
(xs, Lex_buffer.make_loc $startpos(xs) $endpos(xs))
)
}
and i have a simple ident definition that is:
| i = IDENT {Texp_ident i, Lex_buffer.make_loc $startpos(i) $endpos(i) }
functions cant have a space between the ident and the LEFT_PAREN
, how can i define it?
i want that and func(1, 2, 3)
produces a list of expressions as [Texp_ident "and"; Texp_function("func", [...])]
, but it is actually producing: [Texp_function("and", ["func"; ...])]
. since it doesnt care about the space between ident
and LEFT_PAREN
. how can i fix that?