OCaml lexing and parsing with whitespace sensitivity

236 Views Asked by At

im building a parser with Sedlex and Menhir, where i have a function definition as:



(* Lexer *)

let non_ascii = [%sedlex.regexp? '\160' .. '\255'];

let escape = [%sedlex.regexp?
  unicode | ('\\', Compl('\r' | '\n' | '\012' | hex_digit))
];

let ident_start = [%sedlex.regexp?
  '_' | 'a' .. 'z' | 'A' .. 'Z' | '$' | non_ascii | escape
];

let ident_char = [%sedlex.regexp?
  '_' | 'a' .. 'z' | 'A' .. 'Z' | '0' .. '9' | '-' | non_ascii | escape];

let rec get_next_token = buf => {
  switch%sedlex (buf) {
  | white_space => get_next_token(buf)
  | eof => EOF
  | ';' => SEMI_COLON
  | '}' => RIGHT_BRACE
  | '{' => LEFT_BRACE
  | ':' => COLON
  | '(' => LEFT_PAREN
  | ')' => RIGHT_PAREN
  | '[' => LEFT_BRACKET
  | ']' => RIGHT_BRACKET
  | '%' => PERCENTAGE
  | '&' => AMPERSAND
  | ident => IDENT(Sedlexing.latin1(buf))
  | number => get_dimension(Sedlexing.latin1(buf), buf)
  | _ => assert(false)
  };
}

let parse = (buf, parser) => {
  let last_token = ref((Parser.EOF, Lexing.dummy_pos, Lexing.dummy_pos));
  let next_token = () => {
    last_token := get_next_token_with_location(buf);
    last_token^;
  };

  try(MenhirLib.Convert.Simplified.traditional2revised(parser, next_token)) {
  | LexingError(_) as e => raise(e)
  | _ => raise(ParseError(last_token^))
  };
};



(* Parser *)

%token <string> IDENT
%token LEFT_PAREN
%token RIGHT_PAREN

function_expr:
  | i = IDENT; LEFT_PAREN; xs = list(exprs); RIGHT_PAREN {
    Texp_function (
      (i, Lex_buffer.make_loc $startpos(i) $endpos(i)),
      (xs, Lex_buffer.make_loc $startpos(xs) $endpos(xs))
    )
  }

and i have a simple ident definition that is:


| i = IDENT {Texp_ident i, Lex_buffer.make_loc $startpos(i) $endpos(i) } 

functions cant have a space between the ident and the LEFT_PAREN, how can i define it?

i want that and func(1, 2, 3) produces a list of expressions as [Texp_ident "and"; Texp_function("func", [...])], but it is actually producing: [Texp_function("and", ["func"; ...])]. since it doesnt care about the space between ident and LEFT_PAREN. how can i fix that?

0

There are 0 best solutions below