I am writing a parser for a compiler in simple C and I'm trying to understand why I have
yacc: 22 shift/reduce conflicts, 15 reduce/reduce conflicts.
lex file
letter [a-zA-Z]
Hexdigits [A-F|0-9]
OctalDigit [0-7]
digit [0-9]
digitMZ [1-9]
newline (\n)
integerNum "0"|{digitMZ}{digit}*
HexNumber ("0X"|"0x"){Hexdigits}+
OctalNumber ["0"]{OctalDigit}+
BinaryNumber ["0"|"1"]+"b"
identifier {letter}({letter}|{digit}|"_")*
char "'"."'"
wordString \"[^\"]*\"
Comment "/%"[^"%/"]+"%/"
StrDec "["{integerNum}"]"
%{
%}
%option yylineno
%%
"string"" "*{StrDec} { return STRING_DECLERATION;}
boolean { return BOOLEAN; }
else { return ELSE; }
if { return IF; }
integer { return INTEGER; }
procedure { return PROCEDURE; }
char { return CHAR; }
string { return STRING; }
var { return VAR; }
intptr { return INTPTR; }
charptr { return CHARPTR; }
return { return RETURN; }
void { return VOID; }
while { return WHILE; }
"isprint()" { return IS_PRINT; }
true|false { return BOOLCONSTANT; }
":" { return COLON; }
"+" { return PLUS; }
"&" { return ADDRESS; }
"^" { return DEREFERENCE; }
"-" { return MINUS; }
"*" { return MULTIPLICATION; }
"&&" { return AND; }
"/" { return DIVISION; }
"<=" { return LESSEQUAL; }
"<" { return LESS; }
">" { return GREATER; }
">=" { return GREATEREQUAL; }
"==" { return EQUAL; }
"!=" { return NOTEQUAL; }
"=" { return ASSIGN; }
";" { return SEMICOLON; }
"," { return COMMA; }
"(" { return LEFTPAREN; }
")" { return RIGHTPAREN; }
"[" { return LEFTBRACKET; }
"]" { return RIGHTBRACKET; }
"{" { return LEFTBRACE; }
"}" { return RIGHTBRACE; }
"||" { return OR; }
"!" { return NOT; }
" "+ { }
"|" {return ABS_LENGTH;}
" "+ {}
{newline} { }
{Comment} {}
{wordString} {return WORDSTRING; }
{integerNum} {return INTEGER_CONST; }
{char} {return CHAR_IDENTIFIER; }
{identifier} {return IDENTIFIER; }
{HexNumber} {return HEX_NUMBER; }
{OctalNumber} {return OCTAL_NUMBER; }
{BinaryNumber} {return BINARY_NUMBER; }
. {return ERROR; }
%%
yacc file
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define YYDEBUG 1
#define YYSTYPE struct node*
int yyerror(char*);
int yylex();
typedef struct node
{
char * rtoken;
struct node * left;
struct node * right;
char * ltoken;
}node;
node * mknode(char * ltoken, node * left, node * right,char * rtoken);
void printtree(node * tree);
char * getLT(node *);
extern char *yytext;
%}
%token STRING_DECLERATION
%token STRING
%token WORDSTRING
%token BOOLEAN
%token BREAK
%token ELSE
%token IF
%token IMPLEMENTS
%token INTEGER
%token PROCEDURE
%token CHAR
%token VAR
%token INTPTR
%token CHARPTR
%token RETURN
%token VOID
%token WHILE
%token IS_PRINT
%token BOOLCONSTANT
%token PLUS
%token COLON
%token ADDRESS
%token DEREFERENCE
%token MINUS
%token MULTIPLICATION
%token AND
%token DIVISION
%token LESS
%token LESSEQUAL
%token GREATER
%token GREATEREQUAL
%token EQUAL
%token NOTEQUAL
%token ASSIGN
%token SEMICOLON
%token COMMA
%token LEFTPAREN
%token RIGHTPAREN
%token LEFTBRACKET
%token RIGHTBRACKET
%token LEFTBRACE
%token RIGHTBRACE
%token OR
%token NOT
%token ABS_LENGTH
%token CHAR_IDENTIFIER
%token HEX_NUMBER
%token OCTAL_NUMBER
%token BINARY_NUMBER
%token ERROR
%token INTEGER_CONST
%token IDENTIFIER
%%
start: s{printtree($1);}
;
s: expr {$$=$1;}
|expr s {node *yu = mknode("",$1,NULL,"\n");$$= mknode("",yu,$2,"");}
;
expr: function {$$=$1;}
|assignment SEMICOLON {$$=$1;}
|decleration SEMICOLON {$$=$1;}
|condition {$$=$1;}
|BRACES {$$=$1;}
;
function: PROCEDURE iden LEFTPAREN argListOpt RIGHTPAREN RETURN identifiertype compondStmt
{node *z =mknode("RETURN:",$7,NULL,"\n");node *o =mknode(getLT($2),$4,z,""); $$ = mknode("(PROCEDURE",o,$8,"\n)\n");}
|PROCEDURE iden LEFTPAREN RIGHTPAREN RETURN identifiertype compondStmt
{node *z2 = mknode("",NULL,NULL,"");node *z1 =mknode("RETURN:",$6,NULL,"\n");
node *o =mknode(getLT($2),z2,z1,""); $$ = mknode("(PROCEDURE",o,$7,"\n)\n");}
;
argListOpt: multidentifier COLON identifiertype SEMICOLON argListOpt
{node *fg =mknode("",$1,$3,""); $$ = mknode("recive:",fg,$5,"");}
|multidentifier COLON identifiertype{$$ = mknode("recive:",$1,$3,"");}
;
compondStmt: BRACES{$$=$1;}
|SEMICOLON{$$ = mknode("",NULL,NULL,"");}
;
functionCall: iden LEFTPAREN sendArgs RIGHTPAREN{node *Sgl = mknode("(",$3,NULL,")");$$= mknode("",$1,Sgl,"");}
|iden LEFTPAREN RIGHTPAREN {node *Sgg = mknode("(",NULL,NULL,")");$$= mknode("",$1,Sgg,"");}
;
sendArgs: return_val {$$=$1;}
|sendArgs COMMA return_val {node *SA = mknode("",$1,NULL,",");$$= mknode("",SA,$3,"");}
;
BRACES: LEFTBRACE All RIGHTBRACE {$$ = mknode("\n{BRACES\n",$2,NULL,"\n}");}
|LEFTBRACE RIGHTBRACE{$$ = mknode("\n{BRACES\n",NULL,NULL,"\n}");}
|LEFTBRACE BRACES RIGHTBRACE{$$ = mknode("\n{BRACES\n",$2,NULL,"\n}");}
;
All: RETURN return_val SEMICOLON{$$ = mknode("RETURN",$2,NULL,"\n");}
|All RETURN return_val SEMICOLON{node * K = mknode("RETURN",$2,NULL,"");$$ = mknode("",$1,K,"\n");}
|All function{$$ = mknode("",$1,$2,"\n");}
|function {$$=$1;}
|assignment {$$ = mknode("(",$1,NULL,")\n");}
|assignment SEMICOLON {$$ = mknode("(",$1,NULL,")\n");}
|decleration SEMICOLON{$$=$1;}
|decleration{$$=$1;}
|All decleration SEMICOLON {$$ = mknode("",$1,$2,"\n");}
|All assignment SEMICOLON {$$ = mknode("",$1,$2,"\n");}
|condition{$$=$1;}
|All condition{$$ = mknode("",$1,$2,"\n");}
|All BRACES {$$ = mknode("",$1,$2,"\n");}
|BRACES All {$$ = mknode("",$1,$2,"\n");}
;
return_val: int_num{$$=$1;}
|iden {$$=$1;}
|wstring{$$ = $1;}
|exp{$$=$1;}
|char_id{$$ = mknode(yytext,NULL,NULL,"");}
|bool{$$=$1;}
|wordlength{$$=$1;}
|absolute{$$=$1;}
|identifierabs{$$=$1;}
;
decleration: VAR multidentifier COLON identifiertype {$$ = mknode("",$2,$4,"\n");}
;
multidentifier: iden {$$=$1;}
| multidentifier COMMA iden{node *er = mknode("",$1,NULL,",");$$= mknode("",er,$3,"");}
;
bool: BOOLCONSTANT{$$ = mknode(yytext,NULL,NULL,"");}
;
assignment : iden ASSIGN functionCall {node *pop = mknode("=",$1,NULL,"");$$ = mknode("\t(",pop,$3,")");}
| iden ASSIGN exp {node *rop = mknode("=",$1,NULL,"");$$ = mknode("\t(",rop,$3,")");}
| iden ASSIGN bool{node *nop = mknode("=",$1,NULL,"");$$ = mknode("\t(",nop,$3,")");}
| iden ASSIGN pointeroperator iden
{node *gh= mknode("",$3,$4,"");node *e1 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e1,gh,")");}
| iden ASSIGN iden {node *e2 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e2,$3,")");}
| iden ASSIGN wstring {node *e3 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e3,$3,")");}
| iden ASSIGN char_id{node *e4 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e4,$3,")");}
| iden ASSIGN "null"{node *e5 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e5,NULL,"NULL)");}
;
char_id: CHAR_IDENTIFIER {$$ = mknode(yytext,NULL,NULL,"");}
;
exp: int_num {$$=$1;}
|exp methoperator exp {node *y = mknode(getLT($2),$1,NULL,"");$$ = mknode("(",y,$3,")");}
|LEFTPAREN exp methoperator exp RIGHTPAREN {node *y = mknode(getLT($3),$2,NULL,"");$$ = mknode("(",y,$4,")");}/**/
|iden {$$=$1;}
|LEFTPAREN exp RIGHTPAREN {$$ = mknode("(",$2,NULL,")");}/**/
;
condition: whileCond{$$=$1;}
|ifCond{$$=$1;}
;
ifCond: IF LEFTPAREN innercond RIGHTPAREN BRACES{$$ = mknode("(COND \n",$3,$5,"\n)\n");}
|IF LEFTPAREN innercond RIGHTPAREN BRACES ELSE BRACES{node *yk= mknode("ELSE",$7,NULL,"");
node *j= mknode("",$5,yk,"");$$ = mknode("(COND \n",$3,j,"\n)\n");}
;
whileCond: WHILE LEFTPAREN innercond RIGHTPAREN BRACES {$$ = mknode("(LOOP \n",$3,$5,"\n)\n");}
;
wstring: WORDSTRING{$$ = mknode(yytext,NULL,NULL,"");}
;
condexp: exp signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
|wstring IsEqual wstring {$$ = mknode(getLT($2),$1,$3,"");}
|iden signoperator exp{$$ = mknode(getLT($2),$1,$3,"");}
|bool{$$=$1;}
|iden IsEqual wstring {$$ = mknode(getLT($2),$1,$3,"");}
|wstring IsEqual iden{$$ = mknode(getLT($2),$1,$3,"");}
|char_id IsEqual char_id {$$ = mknode(getLT($2),$1,$3,"");}
|iden IsEqual char_id {$$ = mknode(getLT($2),$1,$3,"");}
|char_id IsEqual iden{$$ = mknode(getLT($2),$1,$3,"");}
|exp signoperator wordlength {$$ = mknode(getLT($2),$1,$3,"");}
|wordlength signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
|exp signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");}
|absolute signoperator exp{$$ = mknode(getLT($2),$1,$3,"");}
|wordlength signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");}
|absolute signoperator wordlength {$$ = mknode(getLT($2),$1,$3,"");}
|identifierabs signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
|identifierabs signoperator wordlength{$$ = mknode(getLT($2),$1,$3,"");}
|identifierabs signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");}
|identifierabs signoperator identifierabs{$$ = mknode(getLT($2),$1,$3,"");}
;
wordlength: ABS_LENGTH wstring ABS_LENGTH {$$ = mknode("|",$2,NULL,"|");}
;
absolute : ABS_LENGTH int_num ABS_LENGTH{$$ = mknode("|",$2,NULL,"|");}
;
identifierabs: ABS_LENGTH iden ABS_LENGTH {$$ = mknode("|",$2,NULL,"|");}
;
innercond: condexp {$$=mknode("\t(",$1,NULL,")");}
|LEFTPAREN condexp RIGHTPAREN{$$ = mknode("(",$2,NULL,")");}
|LEFTPAREN condexp RIGHTPAREN condoperator innercond{node *t = mknode("",$4,$5,"");$$ = mknode("(",$2,t,")");}
|condexp condoperator innercond
{node *lolo = mknode("(",$1,NULL,"");node *Sr = mknode(getLT($2),lolo,NULL,")");$$= mknode(" ",Sr,$3,"");}
|LEFTPAREN condexp condoperator innercond RIGHTPAREN
{node *lili = mknode("(",$2,NULL,"");node *Sr = mknode(getLT($3),lili,NULL,",");$$= mknode(" ",Sr,$4,")");}
;
methoperator: PLUS {$$ = mknode(yytext,NULL,NULL,"");}
|MINUS {$$ = mknode(yytext,NULL,NULL,"");}
|MULTIPLICATION {$$ = mknode(yytext,NULL,NULL,"");}
|DIVISION {$$ = mknode(yytext,NULL,NULL,"");}
;
pointeroperator:ADDRESS {$$ = mknode(yytext,NULL,NULL,"");}
|DEREFERENCE {$$ = mknode(yytext,NULL,NULL,"");}
;
identifiertype: STRING_DECLERATION {$$ = mknode(yytext,NULL,NULL,"");}
|BOOLEAN {$$ = mknode(yytext,NULL,NULL,"");}
|INTEGER {$$ = mknode(yytext,NULL,NULL,"");}
|CHAR {$$ = mknode(yytext,NULL,NULL,"");}
|INTPTR {$$ = mknode(yytext,NULL,NULL,"");}
|CHARPTR {$$ = mknode(yytext,NULL,NULL,"");}
|int_num{$$=$1;}
;
iden : IDENTIFIER {$$ = mknode(yytext,NULL,NULL,"");}
;
signoperator: LESS {$$ = mknode(yytext,NULL,NULL,"");}
|LESSEQUAL {$$ = mknode(yytext,NULL,NULL,"");}
|GREATER {$$ = mknode(yytext,NULL,NULL,"");}
|GREATEREQUAL {$$ = mknode(yytext,NULL,NULL,"");}
|IsEqual {$$=$1;}
;
IsEqual: EQUAL {$$ = mknode(yytext,NULL,NULL,"");}
|NOTEQUAL {$$ = mknode(yytext,NULL,NULL,"");}
;
condoperator: AND {$$ = mknode(yytext,NULL,NULL,"");}
|OR {$$ = mknode(yytext,NULL,NULL,"");}
;
int_num: INTEGER_CONST {$$ = mknode(yytext,NULL,NULL,"");}
|BINARY_NUMBER {$$ = mknode(yytext,NULL,NULL,"");}
|OCTAL_NUMBER {$$ = mknode(yytext,NULL,NULL,"");}
|HEX_NUMBER {$$ = mknode(yytext,NULL,NULL,"");}
;
%%
#include "lex.yy.c"
int main()
{
return yyparse();
}
node* mknode(char * ltoken, node * left, node * right,char * rtoken)
{
node * newnode = (node*)malloc(sizeof(node));
char * newstr = (char*)malloc(sizeof(ltoken)+1);
strcpy(newstr, ltoken);
char * newstrr = (char*)malloc(sizeof(rtoken)+1);
strcpy(newstrr, rtoken);
newnode->left = left;
newnode->right = right;
newnode->ltoken = newstr;
newnode->rtoken = newstrr;
return newnode;
}
void printtree(node * tree)
{
printf("%s ", tree->ltoken);
if (tree->left)
printtree(tree->left);
if (tree->right)
printtree(tree->right);
printf("%s ", tree->rtoken);
}
int yyerror(char* s)
{
extern int yylineno;
printf("yacc error: %s at line %d\n",s,yylineno); return 0;
}
char * getLT(node * s)
{
return s->ltoken;
}
i run debug and i understand to know why all these conflicts but i'm not really understand what it means.
that's my debug output (I added link since it's long document)
Conflicts are not errors, they basically mean that the parser generator can't decide which rule to apply at certain stages. A shift/reduce conflict is when you can either finish a rule (reduce) or consume the next token (shift) and continue from that state. A typical shift/reduce problem is for the if-else statement:
Which if statement does the else branch belong to? Look at this:
If the parser decides to shift, then the else branch will be part of the inner statement but if it reduces, the outer one will have it. Usually shift-reduce conflicts are handled by shifting instead of reducing.
Now a reduce-reduce conflict is something that occurs when you have a fatal error in your grammar because 2 rules apply to the same input. For example:
So look at your grammar and find those reduce-reduce conflicts!
A thing you should consider if you are just learning parsing, try to roll your own parser first (like a top-down recursive-descent parser)! It's not that bad, the most complex algorithm you will use is probably a shunting-yard algorithm for mathematical expressions. Plus you'll benefit from the better error handling and custom grammar that cannot be parsed with parser generators (without hacking).