HEXAGON DSP Porting

264 Views Asked by At

I want to port our algorithm using HVX intrinsics of Hexagon DSP but am unable to understand how to use them and one more question is i have used vector 64 bit intrinsics but when i profile the code cycles are less for C code than using vector intrinsics and also am using Hexaon timer api's to calculate cycles. This is the code: C code: cycles consumed is 5452

for(i=0;i<=128;i++){
value[i]=((hs_int32)((((hs_int32)(hs_int16)((32767)))*((hs_int32) 
  (hs_int16)((((window[i])) >> (15))))))+(hs_int32)((((((hs_int32) 
   (hs_int16)((32767)))*((hs_int32)(hs_int16) 
    (((window[i])&0x00007fff))))) >> (15))));
  }

Hexagon intrinsics: Cycles consumed are 8766

for(i=0,j=0;i<=128/2;i++,j++) 
{   
   Word64 and_op=Q6_P_and_PP(R_E_VECTOR_1[i],dummy);
   shift_1[i+j]=Q6_R_asr_RI(shift_1[i+j],15);
   shift_1[i+1+j]=Q6_R_asr_RI(shift_1[i+1+j],15);
   Word64 first_op=Q6_P_vmpyweh_PP_sat(leak2_64,R_E_VECTOR_1[i]);
   out[i]=Q6_P_vmpyweh_PP_sat(leak2_64,and_op);
   shift_2[i+j]=Q6_R_asr_RI(shift_2[i+j],15);
   shift_2[i+1+j]=Q6_R_asr_RI(shift_2[i+1+j],15);
   out[i]=Q6_P_vaddw_PP(first_op,out[i]);
}

C code is showing less cycles compared to using hexagon intrinsics.Anyone can help me regarding this problem.

@Brain cain, This is the dissassembly of intrinsics version:

                      r1:0 = memd(r30+#-48)

 000000000000c400:    r2 = memw(r30+#-52)
 000000000000c404:    r3 = memw(r30+#-20)
 000000000000c408:    r5:4 = memd(r2+r3<<#3)
 000000000000c40c:    r1:0 = vmpyweh(r1:0,r5:4):sat
 000000000000c410:    memd(r30+#-192) = r1:0
 194                   out[i]=Q6_P_vmpyweh_PP_sat(leak2_64,and_op);
 000000000000c414:    r1:0 = memd(r30+#-48)
 000000000000c418:    r5:4 = memd(r30+#-184)
 000000000000c41c:    r1:0 = vmpyweh(r1:0,r5:4):sat
 000000000000c420:    r2 = memw(r30+#-84)
 000000000000c424:    r3 = memw(r30+#-20)
 000000000000c428:    memd(r2+r3<<#3) = r1:0 
195                   shift_2[i+j]=Q6_R_asr_RI(shift_2[i+j],15);
  000000000000c42c:    r2 = memw(r30+#-148)
  000000000000c430:    r3 = memw(r30+#-20)
  000000000000c434:    r6 = memw(r30+#-24)
  000000000000c438:    r3 = add(r3,r6)
000000000000c43c:    r6 = memw(r2+r3<<#2)
000000000000c440:    r6 = asr(r6,#15)
000000000000c444:    memw(r2+r3<<#2) = r6
 196                   shift_2[i+1+j]=Q6_R_asr_RI(shift_2[i+1+j],15);
 000000000000c448:    r2 = memw(r30+#-148)
 000000000000c44c:    r3 = memw(r30+#-20)
 000000000000c450:    r6 = memw(r30+#-24)
 000000000000c454:    r3 = add(r3,r6)
              mt_cv_mec_power_spectrum_fixed_hexagon:
 000000000000c458:    r2 = addasl(r2,r3,#2)
 000000000000c45c:    r3 = memw(r2+#4)
 000000000000c460:    r3 = asr(r3,#15)
 000000000000c464:    memw(r2+#4) = r3
 197                out[i]=Q6_P_vaddw_PP(first_op,out[i]);
 000000000000c468:    r1:0 = memd(r30+#-192)
 000000000000c46c:    r2 = memw(r30+#-84)
 000000000000c470:    r3 = memw(r30+#-20)
 000000000000c474:    r5:4 = memd(r2+r3<<#3)
 000000000000c478:    r1:0 = vaddw(r1:0,r5:4)
 000000000000c47c:    memd(r2+r3<<#3) = r1:0
                }

Iam new to DSP programming and facing alot of issues to understand the hexagon DSP .Your help will be very helpful for me .

0

There are 0 best solutions below