Getting output of convolutional PE as XXX instead of a number?

739 Views Asked by At
`timescale 1ns / 1ps
    // Description: This device performs convolution operation with a 3x3 kernel. 
//////////////////////////////////////////////////////////////////////////////////    

module PE#(
    bit_width = 3,
    out_bit_width = 9
    )(
    input logic ready,
    input logic clock,
    input logic signed [bit_width-1:0] weight [2:0][2:0],
    input logic unsigned [bit_width-1:0] fmap [2:0][2:0],
    output logic signed [out_bit_width-1:0] sum
  );
  logic signed [2*bit_width:0] intermediate1;
  logic signed [2*bit_width:0] intermediate2;
  logic signed [2*bit_width:0] intermediate3;
  logic signed [2*bit_width:0] intermediate4;
  logic signed [2*bit_width+1:0] intermediate5;
  logic signed [2*bit_width+1:0] intermediate6;
  logic signed [2*bit_width+2:0] intermediate7;
  logic signed [2*bit_width-1:0] psum [2:0][2:0];
  
  always_ff @(posedge clock)
  begin
    if (ready == 1'b1)
    begin
        psum[0][0] <= weight[0][0] * fmap[0][0];
        psum[0][1] <= weight[0][1] * fmap[0][1];
        psum[0][2] <= weight[0][2] * fmap[0][2];
        psum[1][0] <= weight[1][0] * fmap[1][0];
        psum[1][1] <= weight[1][1] * fmap[1][1];
        psum[1][2] <= weight[1][2] * fmap[1][2];
        psum[2][0] <= weight[2][0] * fmap[2][0];
        psum[2][1] <= weight[2][1] * fmap[2][1];
        psum[2][2] <= weight[2][2] * fmap[2][2];
        
        intermediate1 <= psum[0][0] + psum[0][1];
        intermediate2 <= psum[0][2] + psum[1][0];
        intermediate3 <= psum[1][1] + psum[1][2];
        intermediate4 <= psum[2][0] + psum[2][1];
        
        intermediate5 <= intermediate1 + intermediate2;
        intermediate6 <= intermediate3 + intermediate4;
        
        intermediate7 <= intermediate5 + intermediate6; 
        
        sum <= psum[2][2] + intermediate7;
        
    end
  end
endmodule   //PE


`timescale 1ns / 1ps
//////////////////////////////////////////////////////////////////////////////////    

module PE_tb();
    logic [2:0] weight [2:0][2:0];
    logic [2:0] inputfmap [2:0][2:0];
    logic [2:0] weight [2:0][2:0];
    logic [8:0] sum;
    logic clock;
    logic ready;
    
    PE PE1 (ready, clock, weight, inputfmap,sum);
    
    initial begin
        weight[0][0] = 3'b1;
        weight[0][1] = 3'b1;
        weight[0][2] = 3'b1;
        weight[1][0] = 3'b1;
        weight[1][1] = 3'b1;
        weight[1][2] = 3'b1;
        weight[2][0] = 3'b1;
        weight[2][1] = 3'b1;
        weight[2][2] = 3'b1;
        inputfmap[0][0] = 3'b0;
        inputfmap[0][1] = 3'b0;
        inputfmap[0][2] = 3'b0;
        inputfmap[1][0] = 3'b0;
        inputfmap[1][1] = 3'b0;
        inputfmap[1][2] = 3'b0;
        inputfmap[2][0] = 3'b0;
        inputfmap[2][1] = 3'b0;
        inputfmap[2][2] = 3'b0;
        clock = 1'b0;
        ready = 1'b0;
        
        # 25
        weight[0][0] = 3'd1;
        weight[0][1] = 3'd1;
        weight[0][2] = 3'd1;
        weight[1][0] = 3'd1;
        weight[1][1] = 3'd1;
        weight[1][2] = 3'd1;
        weight[2][0] = 3'd1;
        weight[2][1] = 3'd1;
        weight[2][2] = 3'd1;
        inputfmap[0][0] = 3'd1;
        inputfmap[0][1] = 3'd1;
        inputfmap[0][2] = 3'd1;
        inputfmap[1][0] = 3'd1;
        inputfmap[1][1] = 3'd1;
        inputfmap[1][2] = 3'd1;
        inputfmap[2][0] = 3'd1;
        inputfmap[2][1] = 3'd1;
        inputfmap[2][2] = 3'd1;
        
        # 25
        weight[0][0] = 3'd1;
        weight[0][1] = 3'd1;
        weight[0][2] = 3'd1;
        weight[1][0] = 3'd1;
        weight[1][1] = 3'd1;
        weight[1][2] = 3'd1;
        weight[2][0] = 3'd1;
        weight[2][1] = 3'd1;
        weight[2][2] = 3'd1;
        inputfmap[0][0] = 3'd2;
        inputfmap[0][1] = 3'd2;
        inputfmap[0][2] = 3'd2;
        inputfmap[1][0] = 3'd2;
        inputfmap[1][1] = 3'd2;
        inputfmap[1][2] = 3'd2;
        inputfmap[2][0] = 3'd2;
        inputfmap[2][1] = 3'd2;
        inputfmap[2][2] = 3'd2;
        
        # 25
        weight[0][0] = 3'd1;
        weight[0][1] = 3'd1;
        weight[0][2] = 3'd1;
        weight[1][0] = 3'd1;
        weight[1][1] = 3'd1;
        weight[1][2] = 3'd1;
        weight[2][0] = 3'd1;
        weight[2][1] = 3'd1;
        weight[2][2] = 3'd1;
        inputfmap[0][0] = 3'd3;
        inputfmap[0][1] = 3'd3;
        inputfmap[0][2] = 3'd3;
        inputfmap[1][0] = 3'd3;
        inputfmap[1][1] = 3'd3;
        inputfmap[1][2] = 3'd3;
        inputfmap[2][0] = 3'd3;
        inputfmap[2][1] = 3'd3;
        inputfmap[2][2] = 3'd3;
        
        #30 ready = 1'b1;
        
        #100 $stop;
    end
    
    always @* begin
        #10 clock = ~clock;  // Clock Period = 20
    
        #100 $stop;
    end
    
endmodule

This convolutional engine performs a 3x3 convolution on a 3x3 input feature map with //a 3x3 weight or kernel matrix. This performs element-wise multiplication of each element and then adds up the 9 multiplication results to provide the convolutional sum as output.

Here the convolution is performed on the posedge clock when ready = 1. If ready = 0, the convolution is not performed.

For some reason, when I run the test bench, the output is XXX instead of a number. I am not sure where I went wrong, but the code isn't working as I had expected.

1

There are 1 best solutions below

1
On

The simulation did not run long enough because of the $stop in the always block for clock. Also, you should not use a sensitivity list (@*) for that block. When I use the following for the clock, I see sum become a known value ('h1b) at time 190ns:

always #10 clock = ~clock;  // Clock Period = 20

If you didn't get any warnings when you compiled your code, you should try different simulators like the ones on edaplayground. When I run your code on VCS, I get this warning inside module PE_tb:

Warning-[IPDW] Identifier previously declared
  Second declaration for identifier 'weight' ignored
  Identifier 'weight' previously declared as logic.

I also get more compile errors with Cadence.