Simple semantic analyser for multiple and redeclaration in yacc gives parsing error

331 Views Asked by At

Here is my code for a simple semantic analyser that only checks for re-declarations and multiple declarations. This is the lex file

{
        #include "y.tab.h"
%}

digits [0-9]*
letters [a-zA-Z]

%%

"for" return FOR;
"if" return IF;
"else" return ELSE;
"while" return WHILE;
"do" return DO;
"switch" return SWITCH;
"case" return CASE;
"break" return BREAK;
"default" return DEFAULT;
"void" return VOID;
"int"  return INT;
"float"  return FLOAT;
"char"  return CHAR;
"double"  return DOUBLE;
"(" return LPAREN;
")" return RPAREN;
"{" return LCURLY;
"}" return RCURLY;

[0-9]+ return INT;
[0-9]*\.?[0-9]+|[0-9]+ return FLOAT;
("_")*{letters}("_"|{letters}|{digits})* return alpha;
[a-z]+ return ID;
[ \n\t] {;}
"+"  return PLUS;
"-"  return MINUS;
"/"  return DIVIDE;
"*"  return MULT;
"^"  return XOR;
"="  return ASSIGN;
"==" return EQUAL;
"++" return INC;
"--" return DEC;
">=" return GE;
"<=" return LE;
">"  return GT;
"<"  return LT;
"!=" return NE;
"&&" return AND;
"||" return OR;
"$"  return END;
","  return COMMA;
";"  return SEMICOLON;
.    return yytext[0];
%%

Here is my yacc file

%{
#include<stdio.h>
#include<stdlib.h>
#include<string.h>
extern void yyerror(char *);
extern FILE *yyin;
extern int yylineno;
extern char *yytext;
int err_no=0,fl=0,i=0,j=0,type[100];
char symbol[100][100],temp[100];
void insert(int);
%}

%token FOR EQUAL INC DEC alpha num GE LE GT LT NE AND OR END IF ELSE  WHILE DO SWITCH CASE BREAK DEFAULT VOID INT FLOAT CHAR SHORT DOUBLE ID
%token LPAREN RPAREN LCURLY RCURLY COMMA SEMICOLON ASSIGN
%token PLUS MINUS MULT DIVIDE XOR
%left PLUS MINUS MULT DIVIDE
%right ASSIGN
%right XOR
%nonassoc UMINUS
%left LT GT LE GE EQUAL NE
%left AND OR

%start STRT

%%
STRT: S  {return 0;}
;

S: ST S | ST
;


ST:             INT L1 SEMICOLON { printf("Inside int declaration rule");} | FLOAT L2 SEMICOLON | CHAR L3 SEMICOLON | DOUBLE L4 SEMICOLON |INT L1 SEMICOLON ST | FLOAT L2 SEMICOLON ST |
                                CHAR L3 SEMICOLON ST | DOUBLE L4 SEMICOLON ST;

L1:             L1 COMMA ID {strcpy(temp,(char *)$3); printf("%s", temp); insert(0);}| ID {strcpy(temp,(char *)$1); printf("%s", temp); insert(0);};
L2:             L2 COMMA ID {strcpy(temp,(char *)$3); insert(1);}| ID {strcpy(temp,(char *)$1); insert(1);};
L3:             L3 COMMA ID {strcpy(temp,(char *)$3); insert(2);}| ID {strcpy(temp,(char *)$1); insert(2);};
L4:             L4 COMMA ID {strcpy(temp,(char *)$3); insert(3);}| ID {strcpy(temp,(char *)$1); insert(3);};

%%
void insert(int type1) {
        printf("Inside insert function \n");
        fl=0;
        for(j=0;j<i;j++) {
                if(strcmp(temp,symbol[j])==0) {
                        printf("Symbol encountered previously \n");
                        if(type[j]==type1) {
                                printf("Redeclaration of variable -> %s\n",temp);
                        }
                        else {
                                printf("Multiple Declaration of Variable\n");
                                err_no=1;
                        }
                        fl=1;
                }
        }
        if(fl==0) {
                type[i]=type1;
                strcpy(symbol[i],temp);
                i++;
        }
}


int main(int argc, char **argv){

        FILE *fp1;
        fp1=fopen(argv[1],"r");
        yyin=fp1;
        if (yyparse()==0) printf("Parsed Successfully for string %s\n", yytext);
        else{
                printf("\nParsing Error at line %d for string %s\n", yylineno, yytext);
                if(err_no==0) {
                //printf("Redeclaration found");
                for(j=0;j<i;j++) {
                        if(type[j]==0) printf("INT ");
                        if(type[j]==1) printf("FLOAT ");
                        if(type[j]==2) printf("CHAR ");
                        if(type[j]==3) printf("DOUBLE ");
                        printf("%s\n",symbol[j]);
                }
                }
        }
        return 0;
}

int yywrap(){
        return 0;
}

void yyerror(char*s){
        printf("Error: %s \n", s);
}


Contents of the input file is

int a,b,c;
float a;

I get the error as

Error: syntax error 

Parsing Error at line 1 for string a

I initially thought it was due to the space between int and a. If I remove the space I get the following error

Error: syntax error 

Parsing Error at line 1 for string inta

I cant figure out where the problem lies. I'm stuck with it for a long time.

Edit Modified rules:

ST:             INT SPACE L1 SEMICOLON { printf("Inside int declaration rule");} | FLOAT SPACE L2 SEMICOLON | CHAR SPACE L3 SEMICOLON | DOUBLE SPACE L4 SEMICOLON |INT SPACE L1 SEMICOLON ST | FLOAT SPACE L2 SEMICOLON ST | CHAR SPACE L3 SEMICOLON ST | DOUBLE SPACE L4 SEMICOLON ST;

Modified input

int a,a,b,c;
1

There are 1 best solutions below

1
On

You have two rules in your lex file:

("_")*{letters}("_"|{letters}|{digits})* return alpha;
[a-z]+ return ID;

because the first rule will match everything that the second one does, the second rule will never match. So you'll never get an ID token, just alpha tokens. As your grammar depends on ID tokens, it will thus never match.

If you swap these lines, you'll see ID tokens for things that match that pattern and only get alpha tokens for things that don't match the ID rule.