shift/reduce conflict in simple C grammar

208 Views Asked by At

I am writing a parser for a compiler in simple C and I'm trying to understand why I have
yacc: 22 shift/reduce conflicts, 15 reduce/reduce conflicts.

lex file

 letter      [a-zA-Z]
Hexdigits   [A-F|0-9]
OctalDigit  [0-7]
digit       [0-9]
digitMZ     [1-9]
newline     (\n)
integerNum  "0"|{digitMZ}{digit}*
HexNumber   ("0X"|"0x"){Hexdigits}+
OctalNumber ["0"]{OctalDigit}+
BinaryNumber ["0"|"1"]+"b"
identifier  {letter}({letter}|{digit}|"_")*
char        "'"."'"
wordString  \"[^\"]*\"
Comment     "/%"[^"%/"]+"%/"
StrDec      "["{integerNum}"]"

%{

%}

%option yylineno

%%
"string"" "*{StrDec} { return STRING_DECLERATION;}
boolean     { return BOOLEAN; } 
else        { return ELSE; }
if          { return IF; }
integer     { return INTEGER; }
procedure   { return PROCEDURE; }
char        { return CHAR; }
string      { return STRING; }
var         { return VAR; }
intptr      { return INTPTR; }
charptr     { return CHARPTR; }
return      { return RETURN; }
void        { return VOID; }
while       { return WHILE; }
"isprint()" { return IS_PRINT; }

true|false  { return BOOLCONSTANT; } 
":"         { return COLON; }
"+"         { return PLUS; }
"&"         { return ADDRESS; }
"^"         { return DEREFERENCE; }
"-"         { return MINUS; }              
"*"         { return MULTIPLICATION; }
"&&"        { return AND; }
"/"         { return DIVISION; }
"<="        { return LESSEQUAL; }
"<"         { return LESS; }
">"         { return GREATER; }
">="        { return GREATEREQUAL; }
"=="        { return EQUAL; }
"!="        { return NOTEQUAL; }
"="         { return ASSIGN; }
";"         { return SEMICOLON; }
","         { return COMMA; }
"("         { return LEFTPAREN; }
")"         { return RIGHTPAREN; }
"["         { return LEFTBRACKET; }
"]"         { return RIGHTBRACKET; }
"{"         { return LEFTBRACE; }
"}"         { return RIGHTBRACE; }
"||"        { return OR; }
"!"         { return NOT; }
" "+        { }
"|"     {return ABS_LENGTH;}
"   "+  {}

{newline}       { }
{Comment}   {}
{wordString}    {return WORDSTRING; }
{integerNum}    {return INTEGER_CONST; }
{char}          {return CHAR_IDENTIFIER; }
{identifier}    {return IDENTIFIER; }
{HexNumber}     {return HEX_NUMBER; }
{OctalNumber}   {return OCTAL_NUMBER; }
{BinaryNumber}  {return BINARY_NUMBER; }
.               {return ERROR; }

%%

yacc file

    %{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define YYDEBUG 1
#define YYSTYPE struct node*
int yyerror(char*);
int yylex();
typedef struct node
{
    char * rtoken;
    struct node * left;
    struct node * right;
    char * ltoken;
}node;

node * mknode(char * ltoken, node * left, node * right,char * rtoken);
void printtree(node * tree);
char * getLT(node *);

extern char *yytext;



%}

%token STRING_DECLERATION
%token STRING 
%token WORDSTRING
%token BOOLEAN
%token BREAK
%token ELSE
%token IF
%token IMPLEMENTS
%token INTEGER
%token PROCEDURE
%token CHAR
%token VAR
%token INTPTR
%token CHARPTR
%token RETURN
%token VOID
%token WHILE
%token IS_PRINT
%token BOOLCONSTANT
%token PLUS
%token COLON
%token ADDRESS
%token DEREFERENCE
%token MINUS
%token MULTIPLICATION
%token AND
%token DIVISION
%token LESS
%token LESSEQUAL
%token GREATER
%token GREATEREQUAL
%token EQUAL 
%token NOTEQUAL
%token ASSIGN
%token SEMICOLON
%token COMMA
%token LEFTPAREN
%token RIGHTPAREN
%token LEFTBRACKET
%token RIGHTBRACKET
%token LEFTBRACE
%token RIGHTBRACE
%token OR
%token NOT
%token ABS_LENGTH
%token CHAR_IDENTIFIER
%token HEX_NUMBER
%token OCTAL_NUMBER
%token BINARY_NUMBER
%token ERROR
%token INTEGER_CONST
%token IDENTIFIER
%% 

start: s{printtree($1);}
        ;

s:      expr {$$=$1;}
        |expr s {node *yu = mknode("",$1,NULL,"\n");$$= mknode("",yu,$2,"");}
        ;

expr:       function {$$=$1;}
        |assignment SEMICOLON {$$=$1;}
        |decleration SEMICOLON   {$$=$1;}
        |condition  {$$=$1;}
        |BRACES {$$=$1;}

        ;

function:   PROCEDURE iden LEFTPAREN argListOpt RIGHTPAREN RETURN identifiertype compondStmt
        {node *z =mknode("RETURN:",$7,NULL,"\n");node *o =mknode(getLT($2),$4,z,""); $$ = mknode("(PROCEDURE",o,$8,"\n)\n");}
        |PROCEDURE iden LEFTPAREN RIGHTPAREN RETURN identifiertype compondStmt
        {node *z2 = mknode("",NULL,NULL,"");node *z1 =mknode("RETURN:",$6,NULL,"\n");
        node *o =mknode(getLT($2),z2,z1,""); $$ = mknode("(PROCEDURE",o,$7,"\n)\n");}   
        ;

argListOpt: multidentifier COLON identifiertype SEMICOLON argListOpt
        {node *fg =mknode("",$1,$3,""); $$ = mknode("recive:",fg,$5,"");}
        |multidentifier COLON identifiertype{$$ = mknode("recive:",$1,$3,"");}
        ;

compondStmt:    BRACES{$$=$1;}
        |SEMICOLON{$$ = mknode("",NULL,NULL,"");}
        ;

functionCall:   iden LEFTPAREN sendArgs RIGHTPAREN{node *Sgl = mknode("(",$3,NULL,")");$$= mknode("",$1,Sgl,"");}
        |iden LEFTPAREN RIGHTPAREN {node *Sgg = mknode("(",NULL,NULL,")");$$= mknode("",$1,Sgg,"");}
        ;

sendArgs:   return_val {$$=$1;}
        |sendArgs COMMA return_val {node *SA = mknode("",$1,NULL,",");$$= mknode("",SA,$3,"");}
        ;


BRACES:     LEFTBRACE All RIGHTBRACE    {$$ = mknode("\n{BRACES\n",$2,NULL,"\n}");}
        |LEFTBRACE RIGHTBRACE{$$ = mknode("\n{BRACES\n",NULL,NULL,"\n}");}
        |LEFTBRACE BRACES RIGHTBRACE{$$ = mknode("\n{BRACES\n",$2,NULL,"\n}");}
        ;


All:        RETURN return_val SEMICOLON{$$ = mknode("RETURN",$2,NULL,"\n");}
        |All RETURN return_val SEMICOLON{node * K = mknode("RETURN",$2,NULL,"");$$ = mknode("",$1,K,"\n");}
        |All function{$$ = mknode("",$1,$2,"\n");}
        |function {$$=$1;}
        |assignment {$$ = mknode("(",$1,NULL,")\n");}
        |assignment SEMICOLON   {$$ = mknode("(",$1,NULL,")\n");}
        |decleration SEMICOLON{$$=$1;}
        |decleration{$$=$1;}
        |All decleration SEMICOLON  {$$ = mknode("",$1,$2,"\n");}
        |All assignment SEMICOLON {$$ = mknode("",$1,$2,"\n");}
        |condition{$$=$1;}
        |All condition{$$ = mknode("",$1,$2,"\n");}
        |All BRACES {$$ = mknode("",$1,$2,"\n");}
        |BRACES All {$$ = mknode("",$1,$2,"\n");}
        ;

return_val: int_num{$$=$1;}
        |iden {$$=$1;}
        |wstring{$$ = $1;}
        |exp{$$=$1;}
        |char_id{$$ = mknode(yytext,NULL,NULL,"");}
        |bool{$$=$1;}
        |wordlength{$$=$1;}
        |absolute{$$=$1;}
        |identifierabs{$$=$1;}
        ;

decleration:    VAR multidentifier COLON identifiertype {$$ = mknode("",$2,$4,"\n");}
        ;

multidentifier: iden {$$=$1;}
        | multidentifier COMMA iden{node *er = mknode("",$1,NULL,",");$$= mknode("",er,$3,"");}
        ;

bool: BOOLCONSTANT{$$ = mknode(yytext,NULL,NULL,"");}
        ;

assignment :    iden ASSIGN functionCall {node *pop = mknode("=",$1,NULL,"");$$ = mknode("\t(",pop,$3,")");}
        | iden ASSIGN exp {node *rop = mknode("=",$1,NULL,"");$$ = mknode("\t(",rop,$3,")");}
        | iden ASSIGN bool{node *nop = mknode("=",$1,NULL,"");$$ = mknode("\t(",nop,$3,")");}
        | iden ASSIGN pointeroperator iden
        {node *gh= mknode("",$3,$4,"");node *e1 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e1,gh,")");}
        | iden ASSIGN iden {node *e2 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e2,$3,")");}
        | iden ASSIGN wstring {node *e3 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e3,$3,")");}
        | iden ASSIGN char_id{node *e4 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e4,$3,")");}
        | iden ASSIGN "null"{node *e5 = mknode("=",$1,NULL,"");$$ = mknode("\t(",e5,NULL,"NULL)");}
        ;
char_id: CHAR_IDENTIFIER {$$ = mknode(yytext,NULL,NULL,"");}
        ;

exp:        int_num {$$=$1;}  
            |exp methoperator exp  {node *y = mknode(getLT($2),$1,NULL,"");$$ = mknode("(",y,$3,")");}
            |LEFTPAREN exp methoperator exp RIGHTPAREN {node *y = mknode(getLT($3),$2,NULL,"");$$ = mknode("(",y,$4,")");}/**/
        |iden {$$=$1;}
        |LEFTPAREN exp RIGHTPAREN {$$ = mknode("(",$2,NULL,")");}/**/
            ;

condition:  whileCond{$$=$1;}
        |ifCond{$$=$1;}
        ;

ifCond:     IF LEFTPAREN innercond RIGHTPAREN BRACES{$$ = mknode("(COND \n",$3,$5,"\n)\n");}
        |IF LEFTPAREN innercond RIGHTPAREN BRACES ELSE BRACES{node *yk= mknode("ELSE",$7,NULL,"");
        node *j= mknode("",$5,yk,"");$$ = mknode("(COND \n",$3,j,"\n)\n");}
        ;

whileCond:  WHILE LEFTPAREN innercond RIGHTPAREN BRACES {$$ = mknode("(LOOP \n",$3,$5,"\n)\n");}    
        ;

wstring: WORDSTRING{$$ = mknode(yytext,NULL,NULL,"");}
        ;
condexp:    exp signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
        |wstring IsEqual wstring {$$ = mknode(getLT($2),$1,$3,"");}
        |iden signoperator exp{$$ = mknode(getLT($2),$1,$3,"");}
        |bool{$$=$1;}
        |iden IsEqual wstring {$$ = mknode(getLT($2),$1,$3,"");}
        |wstring IsEqual iden{$$ = mknode(getLT($2),$1,$3,"");}
        |char_id IsEqual char_id {$$ = mknode(getLT($2),$1,$3,"");}
        |iden IsEqual char_id {$$ = mknode(getLT($2),$1,$3,"");}
        |char_id IsEqual iden{$$ = mknode(getLT($2),$1,$3,"");}
        |exp signoperator wordlength {$$ = mknode(getLT($2),$1,$3,"");}
        |wordlength signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
        |exp signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");}    
        |absolute signoperator exp{$$ = mknode(getLT($2),$1,$3,"");}
        |wordlength signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");} 
        |absolute signoperator wordlength {$$ = mknode(getLT($2),$1,$3,"");}
        |identifierabs signoperator exp {$$ = mknode(getLT($2),$1,$3,"");}
        |identifierabs signoperator wordlength{$$ = mknode(getLT($2),$1,$3,"");}
        |identifierabs signoperator absolute{$$ = mknode(getLT($2),$1,$3,"");}
        |identifierabs signoperator identifierabs{$$ = mknode(getLT($2),$1,$3,"");}
        ;


wordlength: ABS_LENGTH wstring ABS_LENGTH {$$ = mknode("|",$2,NULL,"|");}
        ;

absolute :  ABS_LENGTH int_num ABS_LENGTH{$$ = mknode("|",$2,NULL,"|");}    
        ;

identifierabs:  ABS_LENGTH iden ABS_LENGTH {$$ = mknode("|",$2,NULL,"|");}
        ;

innercond:  condexp {$$=mknode("\t(",$1,NULL,")");}
        |LEFTPAREN condexp RIGHTPAREN{$$ = mknode("(",$2,NULL,")");}
        |LEFTPAREN condexp RIGHTPAREN condoperator innercond{node *t  = mknode("",$4,$5,"");$$ = mknode("(",$2,t,")");}
        |condexp condoperator innercond
        {node *lolo = mknode("(",$1,NULL,"");node *Sr = mknode(getLT($2),lolo,NULL,")");$$= mknode(" ",Sr,$3,"");}
        |LEFTPAREN condexp condoperator innercond RIGHTPAREN
        {node *lili = mknode("(",$2,NULL,"");node *Sr = mknode(getLT($3),lili,NULL,",");$$= mknode(" ",Sr,$4,")");}
        ;


methoperator:   PLUS    {$$ = mknode(yytext,NULL,NULL,"");}
        |MINUS  {$$ = mknode(yytext,NULL,NULL,"");}
        |MULTIPLICATION     {$$ = mknode(yytext,NULL,NULL,"");}
        |DIVISION   {$$ = mknode(yytext,NULL,NULL,"");}
        ;

pointeroperator:ADDRESS {$$ = mknode(yytext,NULL,NULL,"");} 
        |DEREFERENCE    {$$ = mknode(yytext,NULL,NULL,"");}
        ;
identifiertype: STRING_DECLERATION  {$$ = mknode(yytext,NULL,NULL,"");}
        |BOOLEAN    {$$ = mknode(yytext,NULL,NULL,"");}
        |INTEGER    {$$ = mknode(yytext,NULL,NULL,"");}
        |CHAR   {$$ = mknode(yytext,NULL,NULL,"");}
        |INTPTR     {$$ = mknode(yytext,NULL,NULL,"");}
        |CHARPTR    {$$ = mknode(yytext,NULL,NULL,"");}
        |int_num{$$=$1;}
        ;

iden :      IDENTIFIER {$$ = mknode(yytext,NULL,NULL,"");}
        ;
signoperator:   LESS    {$$ = mknode(yytext,NULL,NULL,"");}
        |LESSEQUAL  {$$ = mknode(yytext,NULL,NULL,"");}
        |GREATER    {$$ = mknode(yytext,NULL,NULL,"");}
        |GREATEREQUAL   {$$ = mknode(yytext,NULL,NULL,"");}
        |IsEqual    {$$=$1;}
        ;

IsEqual:    EQUAL   {$$ = mknode(yytext,NULL,NULL,"");} 
        |NOTEQUAL   {$$ = mknode(yytext,NULL,NULL,"");}
        ;

condoperator:   AND {$$ = mknode(yytext,NULL,NULL,"");}
        |OR {$$ = mknode(yytext,NULL,NULL,"");}
        ;

int_num:    INTEGER_CONST   {$$ = mknode(yytext,NULL,NULL,"");}
        |BINARY_NUMBER  {$$ = mknode(yytext,NULL,NULL,"");}
        |OCTAL_NUMBER   {$$ = mknode(yytext,NULL,NULL,"");}
        |HEX_NUMBER {$$ = mknode(yytext,NULL,NULL,"");}
        ;

%%

#include "lex.yy.c"
int main()
{
return yyparse();
}
node* mknode(char * ltoken, node * left, node * right,char * rtoken)
{
node * newnode = (node*)malloc(sizeof(node));
char * newstr = (char*)malloc(sizeof(ltoken)+1);
strcpy(newstr, ltoken);
char * newstrr = (char*)malloc(sizeof(rtoken)+1);
strcpy(newstrr, rtoken);
newnode->left = left;
newnode->right = right;
newnode->ltoken = newstr;
newnode->rtoken = newstrr;
return newnode;
}

void printtree(node * tree)
{
printf("%s ", tree->ltoken);
if (tree->left)
    printtree(tree->left);
if (tree->right)
    printtree(tree->right);
printf("%s ", tree->rtoken);
}
int yyerror(char* s)
{
extern int yylineno;
printf("yacc error: %s at line %d\n",s,yylineno); return 0;
}
char * getLT(node * s)
{
return s->ltoken;
}

i run debug and i understand to know why all these conflicts but i'm not really understand what it means.

that's my debug output (I added link since it's long document)

debug output

1

There are 1 best solutions below

0
On

Conflicts are not errors, they basically mean that the parser generator can't decide which rule to apply at certain stages. A shift/reduce conflict is when you can either finish a rule (reduce) or consume the next token (shift) and continue from that state. A typical shift/reduce problem is for the if-else statement:

if condition
    if another condition
        # Do something...
    else
        # Do something else

Which if statement does the else branch belong to? Look at this:

IF <condition> <statement> IF <condition> <statement> ELSE <statement>
                                  The parser is here ^

If the parser decides to shift, then the else branch will be part of the inner statement but if it reduces, the outer one will have it. Usually shift-reduce conflicts are handled by shifting instead of reducing.

Now a reduce-reduce conflict is something that occurs when you have a fatal error in your grammar because 2 rules apply to the same input. For example:

sequence:
       %empty         { printf ("empty sequence\n"); }
     | maybeword
     | sequence word  { printf ("added word %s\n", $2); }
     ;

maybeword:
       %empty    { printf ("empty maybeword\n"); }
     | word      { printf ("single word %s\n", $1); }
     ;

So look at your grammar and find those reduce-reduce conflicts!

A thing you should consider if you are just learning parsing, try to roll your own parser first (like a top-down recursive-descent parser)! It's not that bad, the most complex algorithm you will use is probably a shunting-yard algorithm for mathematical expressions. Plus you'll benefit from the better error handling and custom grammar that cannot be parsed with parser generators (without hacking).