I have simulated bare-metal General purpose Riscv programs on spike. Now I want to run bare-metal vector program on spike but I think that the C program that will be used to cross compile and generate a log file through spike will be built differently than for the general purpose. But I can't find any documentations for simulating vector on spike. Can anyone please guide me through this?
I have configured my toolchain and spike for for rv64 gcv and have tried making main functions having vector intrinsics, but despite of getting vector instructions on the assembly file generated by the toolchain, my log file contains only General purpose instructions.
This is my C program:
#include <stddef.h>
#include <riscv_vector.h>
volatile uint64_t tohost __attribute__((section(".tohost")));
volatile uint64_t fromhost __attribute__((section(".fromhost")));
void saxpy(size_t n, const double a, const double *x, double *y) {
size_t vl;
vfloat64m8_t vx, vy;
for (; n > 0; n -= vl) {
vl = vsetvl_e64m8(n);
vx = vle64_v_f64m8(x, vl);
vy = vle64_v_f64m8(y, vl);
vy = vfmsac_vf_f64m8(vy, a, vx, vl);
vse64_v_f64m8(y, vy, vl);
x += vl;
y += vl;
}
// Signal completion to the "tohost" register after the loop
tohost = 1;
}
int main() {
const size_t array_size = 8;
const double a = 2.0;
double x[array_size];
double y[array_size];
// Initialize arrays x and y with data
for (size_t i = 0; i < array_size; i++) {
x[i] = (double)(i + 1); // Values 1.0, 2.0, 3.0, ..., 8.0
y[i] = (double)((i + 1) * 10); // Values 10.0, 20.0, 30.0, ..., 80.0
}
saxpy(array_size, a, x, y);
return 0;
}
This is the assembly generated by toolchain:
.file "vector.c"
.option nopic
.attribute arch, "rv64i2p0_m2p0_a2p0_f2p0_d2p0_c2p0_v1p0_zve32f1p0_zve32x1p0_zve64d1p0_zve64f1p0_zve64x1p0_zvl128b1p0_zvl32b1p0_zvl64b1p0"
.attribute unaligned_access, 0
.attribute stack_align, 16
.text
.globl tohost
.section .tohost,"aw"
.align 3
.type tohost, @object
.size tohost, 8
tohost:
.zero 8
.globl fromhost
.section .fromhost,"aw"
.align 3
.type fromhost, @object
.size fromhost, 8
fromhost:
.zero 8
.text
.align 1
.globl saxpy
.type saxpy, @function
saxpy:
addi sp,sp,-64
sd s0,56(sp)
addi s0,sp,64
csrr t0,vlenb
slli t1,t0,4
sub sp,sp,t1
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
sd a0,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-32
addi a5,a5,-16
add a5,a5,s0
sd a1,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
sd a2,0(a5)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
sd a3,0(a5)
j .L2
.L3:
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
vsetvli a5,a5,e64,m8,ta,mu
sd a5,-24(s0)
csrr a5,vlenb
neg a5,a5
slli a5,a5,3
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
ld a2,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,ta,mu
vle64.v v24,(a2)
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
ld a2,0(a5)
ld a4,-24(s0)
vle64.v v24,(a2)
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a3,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a2,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-32
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,3
addi a5,a5,-16
addi a5,a5,-16
add a5,a5,s0
vl8re64.v v24,0(a2)
fld fa5,0(a4)
vl8re64.v v8,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,tu,mu
vfmsac.vf v24,fa5,v8
vs8r.v v24,0(a3)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-16
addi a5,a5,-16
add a5,a5,s0
ld a3,0(a4)
vl8re64.v v24,0(a5)
ld a4,-24(s0)
vsetvli zero,a4,e64,m8,ta,mu
vse64.v v24,(a3)
ld a5,-24(s0)
slli a3,a5,3
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-40
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
add a5,a5,a3
sd a5,0(a4)
ld a5,-24(s0)
slli a3,a5,3
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-48
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
add a5,a5,a3
sd a5,0(a4)
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a4,a5,s0
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a3,0(a5)
ld a5,-24(s0)
sub a5,a3,a5
sd a5,0(a4)
.L2:
csrr a5,vlenb
neg a5,a5
slli a5,a5,4
addi a5,a5,-24
addi a5,a5,-16
add a5,a5,s0
ld a5,0(a5)
bne a5,zero,.L3
lui a5,%hi(tohost)
li a4,1
sd a4,%lo(tohost)(a5)
nop
csrr t0,vlenb
slli t1,t0,4
add sp,sp,t1
ld s0,56(sp)
addi sp,sp,64
jr ra
.size saxpy, .-saxpy
.align 1
.globl main
.type main, @function
main:
addi sp,sp,-112
sd ra,104(sp)
sd s0,96(sp)
sd s1,88(sp)
sd s2,80(sp)
sd s3,72(sp)
addi s0,sp,112
mv t0,sp
mv s1,t0
li t0,8
sd t0,-64(s0)
lui t0,%hi(.LC0)
fld fa5,%lo(.LC0)(t0)
fsd fa5,-72(s0)
ld t0,-64(s0)
addi t0,t0,-1
sd t0,-80(s0)
ld t0,-64(s0)
mv s2,t0
li s3,0
srli t0,s2,58
slli a7,s3,6
or a7,t0,a7
slli a6,s2,6
ld a6,-64(s0)
mv t5,a6
li t6,0
srli a6,t5,58
slli a1,t6,6
or a1,a6,a1
slli a0,t5,6
ld a1,-64(s0)
slli a1,a1,3
addi a1,a1,15
srli a1,a1,4
slli a1,a1,4
sub sp,sp,a1
mv a1,sp
addi a1,a1,7
srli a1,a1,3
slli a1,a1,3
sd a1,-88(s0)
ld a1,-64(s0)
addi a1,a1,-1
sd a1,-96(s0)
ld a1,-64(s0)
mv t3,a1
li t4,0
srli a1,t3,58
slli a3,t4,6
or a3,a1,a3
slli a2,t3,6
ld a3,-64(s0)
mv t1,a3
li t2,0
srli a3,t1,58
slli a5,t2,6
or a5,a3,a5
slli a4,t1,6
ld a5,-64(s0)
slli a5,a5,3
addi a5,a5,15
srli a5,a5,4
slli a5,a5,4
sub sp,sp,a5
mv a5,sp
addi a5,a5,7
srli a5,a5,3
slli a5,a5,3
sd a5,-104(s0)
sd zero,-56(s0)
j .L5
.L6:
ld a5,-56(s0)
addi a5,a5,1
fcvt.d.lu fa5,a5
ld a4,-88(s0)
ld a5,-56(s0)
slli a5,a5,3
add a5,a4,a5
fsd fa5,0(a5)
ld a5,-56(s0)
addi a4,a5,1
mv a5,a4
slli a5,a5,2
add a5,a5,a4
slli a5,a5,1
fcvt.d.lu fa5,a5
ld a4,-104(s0)
ld a5,-56(s0)
slli a5,a5,3
add a5,a4,a5
fsd fa5,0(a5)
ld a5,-56(s0)
addi a5,a5,1
sd a5,-56(s0)
.L5:
ld a4,-56(s0)
ld a5,-64(s0)
bltu a4,a5,.L6
ld a3,-104(s0)
ld a2,-88(s0)
ld a1,-72(s0)
ld a0,-64(s0)
call saxpy
li a5,0
mv sp,s1
mv a0,a5
addi sp,s0,-112
ld ra,104(sp)
ld s0,96(sp)
ld s1,88(sp)
ld s2,80(sp)
ld s3,72(sp)
addi sp,sp,112
jr ra
.size main, .-main
.section .rodata
.align 3
.LC0:
.word 0
.word 1073741824
.ident "GCC: () 12.0.1 20220505 (prerelease)"
This is the log file generated by spike
core 0: 0x0000000000001000 (0x00000297) auipc t0, 0x0
core 0: 3 0x0000000000001000 (0x00000297) x5 0x0000000000001000
core 0: 0x0000000000001004 (0x02028593) addi a1, t0, 32
core 0: 3 0x0000000000001004 (0x02028593) x11 0x0000000000001020
core 0: 0x0000000000001008 (0xf1402573) csrr a0, mhartid
core 0: 3 0x0000000000001008 (0xf1402573) x10 0x0000000000000000
core 0: 0x000000000000100c (0x0182b283) ld t0, 24(t0)
core 0: 3 0x000000000000100c (0x0182b283) x5 0x0000000000010116 mem 0x0000000000001018
core 0: 0x0000000000001010 (0x00028067) jr t0
core 0: 3 0x0000000000001010 (0x00028067)
core 0: >>>> _start
core 0: 0x0000000000010116 (0x00002197) auipc gp, 0x2
core 0: 3 0x0000000000010116 (0x00002197) x3 0x0000000000012116
core 0: 0x000000000001011a (0xeea18193) addi gp, gp, -278
core 0: 3 0x000000000001011a (0xeea18193) x3 0x0000000000012000
core 0: 0x000000000001011e (0xf7018513) addi a0, gp, -144
core 0: 3 0x000000000001011e (0xf7018513) x10 0x0000000000011f70
core 0: 0x0000000000010122 (0xfa818613) addi a2, gp, -88
core 0: 3 0x0000000000010122 (0xfa818613) x12 0x0000000000011fa8
core 0: 0x0000000000010126 (0x00008e09) c.sub a2, a0
core 0: 3 0x0000000000010126 (0x8e09) x12 0x0000000000000038
core 0: 0x0000000000010128 (0x00004581) c.li a1, 0
core 0: 3 0x0000000000010128 (0x4581) x11 0x0000000000000000
core 0: 0x000000000001012a (0x464000ef) jal pc + 0x464
core 0: 3 0x000000000001012a (0x464000ef) x1 0x000000000001012e
core 0: >>>> memset
core 0: 0x000000000001058e (0x0000433d) c.li t1, 15
core 0: 3 0x000000000001058e (0x433d) x6 0x000000000000000f
core 0: 0x0000000000010590 (0x0000872a) c.mv a4, a0
core 0: 3 0x0000000000010590 (0x872a) x14 0x0000000000011f70
core 0: 0x0000000000010592 (0x02c37163) bgeu t1, a2, pc + 34
core 0: 3 0x0000000000010592 (0x02c37163)
core 0: 0x0000000000010596 (0x00f77793) andi a5, a4, 15
core 0: 3 0x0000000000010596 (0x00f77793) x15 0x0000000000000000
core 0: 0x000000000001059a (0x0000e3c1) c.bnez a5, pc + 128
core 0: 3 0x000000000001059a (0xe3c1)
core 0: 0x000000000001059c (0x0000e1bd) c.bnez a1, pc + 102
core 0: 3 0x000000000001059c (0xe1bd)
core 0: 0x000000000001059e (0xff067693) andi a3, a2, -16
core 0: 3 0x000000000001059e (0xff067693) x13 0x0000000000000030
core 0: 0x00000000000105a2 (0x00008a3d) c.andi a2, 15
core 0: 3 0x00000000000105a2 (0x8a3d) x12 0x0000000000000008
core 0: 0x00000000000105a4 (0x000096ba) c.add a3, a4
core 0: 3 0x00000000000105a4 (0x96ba) x13 0x0000000000011fa0
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f70 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f78 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011f80
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f80 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f88 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011f90
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105a6 (0x0000e30c) c.sd a1, 0(a4)
core 0: 3 0x00000000000105a6 (0xe30c) mem 0x0000000000011f90 0x0000000000000000
core 0: 0x00000000000105a8 (0x0000e70c) c.sd a1, 8(a4)
core 0: 3 0x00000000000105a8 (0xe70c) mem 0x0000000000011f98 0x0000000000000000
core 0: 0x00000000000105aa (0x00000741) c.addi a4, 16
core 0: 3 0x00000000000105aa (0x0741) x14 0x0000000000011fa0
core 0: 0x00000000000105ac (0xfed76de3) bltu a4, a3, pc - 6
core 0: 3 0x00000000000105ac (0xfed76de3)
core 0: 0x00000000000105b0 (0x0000e211) c.bnez a2, pc + 4
core 0: 3 0x00000000000105b0 (0xe211)
core 0: 0x00000000000105b4 (0x40c306b3) sub a3, t1, a2
core 0: 3 0x00000000000105b4 (0x40c306b3) x13 0x0000000000000007
core 0: 0x00000000000105b8 (0x0000068a) c.slli a3, 2
core 0: 3 0x00000000000105b8 (0x068a) x13 0x000000000000001c
core 0: 0x00000000000105ba (0x00000297) auipc t0, 0x0
core 0: 3 0x00000000000105ba (0x00000297) x5 0x00000000000105ba
core 0: 0x00000000000105be (0x00009696) c.add a3, t0
core 0: 3 0x00000000000105be (0x9696) x13 0x00000000000105d6
core 0: 0x00000000000105c0 (0x00a68067) jalr zero, a3, 10
core 0: 3 0x00000000000105c0 (0x00a68067)
core 0: 0x00000000000105e0 (0x00b703a3) sb a1, 7(a4)
core 0: 3 0x00000000000105e0 (0x00b703a3) mem 0x0000000000011fa7 0x00
core 0: 0x00000000000105e4 (0x00b70323) sb a1, 6(a4)
core 0: 3 0x00000000000105e4 (0x00b70323) mem 0x0000000000011fa6 0x00
core 0: 0x00000000000105e8 (0x00b702a3) sb a1, 5(a4)
core 0: 3 0x00000000000105e8 (0x00b702a3) mem 0x0000000000011fa5 0x00
core 0: 0x00000000000105ec (0x00b70223) sb a1, 4(a4)
core 0: 3 0x00000000000105ec (0x00b70223) mem 0x0000000000011fa4 0x00
core 0: 0x00000000000105f0 (0x00b701a3) sb a1, 3(a4)
core 0: 3 0x00000000000105f0 (0x00b701a3) mem 0x0000000000011fa3 0x00
core 0: 0x00000000000105f4 (0x00b70123) sb a1, 2(a4)
core 0: 3 0x00000000000105f4 (0x00b70123) mem 0x0000000000011fa2 0x00
core 0: 0x00000000000105f8 (0x00b700a3) sb a1, 1(a4)
core 0: 3 0x00000000000105f8 (0x00b700a3) mem 0x0000000000011fa1 0x00
core 0: 0x00000000000105fc (0x00b70023) sb a1, 0(a4)
core 0: 3 0x00000000000105fc (0x00b70023) mem 0x0000000000011fa0 0x00
core 0: 0x0000000000010600 (0x00008082) ret
core 0: 3 0x0000000000010600 (0x8082)
core 0: 0x000000000001012e (0x00000517) auipc a0, 0x0
core 0: 3 0x000000000001012e (0x00000517) x10 0x000000000001012e
core 0: 0x0000000000010132 (0x60050513) addi a0, a0, 1536
core 0: 3 0x0000000000010132 (0x60050513) x10 0x000000000001072e
core 0: 0x0000000000010136 (0x0000c519) c.beqz a0, pc + 14
core 0: 3 0x0000000000010136 (0xc519)
core 0: 0x0000000000010138 (0x00000517) auipc a0, 0x0
core 0: 3 0x0000000000010138 (0x00000517) x10 0x0000000000010138
core 0: 0x000000000001013c (0x5c250513) addi a0, a0, 1474
core 0: 3 0x000000000001013c (0x5c250513) x10 0x00000000000106fa
core 0: 0x0000000000010140 (0x5ee000ef) jal pc + 0x5ee
core 0: 3 0x0000000000010140 (0x5ee000ef) x1 0x0000000000010144
core 0: >>>> atexit
core 0: 0x000000000001072e (0x000085aa) c.mv a1, a0
core 0: 3 0x000000000001072e (0x85aa) x11 0x00000000000106fa
core 0: 0x0000000000010730 (0x00004681) c.li a3, 0
core 0: 3 0x0000000000010730 (0x4681) x13 0x0000000000000000
core 0: 0x0000000000010732 (0x00004601) c.li a2, 0
core 0: 3 0x0000000000010732 (0x4601) x12 0x0000000000000000
core 0: 0x0000000000010734 (0x00004501) c.li a0, 0
core 0: 3 0x0000000000010734 (0x4501) x10 0x0000000000000000
core 0: 0x0000000000010736 (0x0000a009) c.j pc + 2
core 0: 3 0x0000000000010736 (0xa009)
core 0: >>>> __register_exitproc
core 0: 0x0000000000010738 (0xf581b703) ld a4, -168(gp)
core 0: 3 0x0000000000010738 (0xf581b703) x14 0x0000000000011800 mem 0x0000000000011f58
core 0: 0x000000000001073c (0x1f873783) ld a5, 504(a4)
core 0: 3 0x000000000001073c (0x1f873783) x15 0x0000000000000000 mem 0x00000000000119f8
core 0: 0x0000000000010740 (0x0000c3b1) c.beqz a5, pc + 68
core 0: 3 0x0000000000010740 (0xc3b1)
core 0: 0x0000000000010784 (0x20070793) addi a5, a4, 512
core 0: 3 0x0000000000010784 (0x20070793) x15 0x0000000000011a00
core 0: 0x0000000000010788 (0x1ef73c23) sd a5, 504(a4)
core 0: 3 0x0000000000010788 (0x1ef73c23) mem 0x00000000000119f8 0x0000000000011a00
core 0: 0x000000000001078c (0x0000bf5d) c.j pc - 74
core 0: 3 0x000000000001078c (0xbf5d)
core 0: 0x0000000000010742 (0x00004798) c.lw a4, 8(a5)
core 0: 3 0x0000000000010742 (0x4798) x14 0x0000000000000000 mem 0x0000000000011a08
core 0: 0x0000000000010744 (0x0000487d) c.li a6, 31
core 0: 3 0x0000000000010744 (0x487d) x16 0x000000000000001f
core 0: 0x0000000000010746 (0x06e84263) blt a6, a4, pc + 100
core 0: 3 0x0000000000010746 (0x06e84263)
core 0: 0x000000000001074a (0x0000c505) c.beqz a0, pc + 40
core 0: 3 0x000000000001074a (0xc505)
core 0: 0x0000000000010772 (0x00270693) addi a3, a4, 2
core 0: 3 0x0000000000010772 (0x00270693) x13 0x0000000000000002
core 0: 0x0000000000010776 (0x0000068e) c.slli a3, 3
core 0: 3 0x0000000000010776 (0x068e) x13 0x0000000000000010
core 0: 0x0000000000010778 (0x00002705) c.addiw a4, 1
core 0: 3 0x0000000000010778 (0x2705) x14 0x0000000000000001
core 0: 0x000000000001077a (0x0000c798) c.sw a4, 8(a5)
core 0: 3 0x000000000001077a (0xc798) mem 0x0000000000011a08 0x00000001
core 0: 0x000000000001077c (0x000097b6) c.add a5, a3
core 0: 3 0x000000000001077c (0x97b6) x15 0x0000000000011a10
core 0: 0x000000000001077e (0x0000e38c) c.sd a1, 0(a5)
core 0: 3 0x000000000001077e (0xe38c) mem 0x0000000000011a10 0x00000000000106fa
core 0: 0x0000000000010780 (0x00004501) c.li a0, 0
core 0: 3 0x0000000000010780 (0x4501) x10 0x0000000000000000
core 0: 0x0000000000010782 (0x00008082) ret
core 0: 3 0x0000000000010782 (0x8082)
core 0: 0x0000000000010144 (0x3e0000ef) jal pc + 0x3e0
core 0: 3 0x0000000000010144 (0x3e0000ef) x1 0x0000000000010148
core 0: >>>> __libc_init_array
core 0: 0x0000000000010524 (0x00001101) c.addi sp, -32
core 0: 3 0x0000000000010524 (0x1101) x2 0xffffffffffffffe0
core 0: 0x0000000000010526 (0x0000e822) c.sdsp s0, 16(sp)
my toolchain configuration was:
../configure --prefix=$HOME/rvv64 --with-arch=rv64gcv --with-abi=lp64 --enable-multilib
my spike configuration is:
../configure --prefix=$HOME/rvv64 --with-varch=vlen:128,elen=32 --with-isa=rv64iv --with-target=riscv64-unknown-elf