Sort fixes: support for repeated trials.
[riscv-tests.git] / benchmarks / vec-vvadd / vec_vvadd_asm.S
1 #*****************************************************************************
2 # vvadd function (assembly version)
3 #-----------------------------------------------------------------------------
4
5
6 #--------------------------------------------------------------------------
7 # Headers and Defines
8 #--------------------------------------------------------------------------
9
10 # Here are some defines that make writing assembly code easier.
11
12 # I'm using the knowledge that rN will be placed in register a0, rA will be
13 # placed into register a1, etc., based on the calling convention for functions.
14
15 #define rN a0
16 #define rA a1
17 #define rB a2
18 #define rC a3
19
20 #define rVLen a4
21
22 # WARNING: do not write to the s0,...,s9 registers without first saving them to
23 # the stack.
24
25 #--------------------------------------------------------------------------
26 # void scalar_vvadd_asm( int n, float a[], float b[], float c[] )
27 #--------------------------------------------------------------------------
28
29 .text
30 .align 2
31 .globl scalar_vvadd_asm
32 .type scalar_vvadd_asm,@function
33
34 scalar_vvadd_asm:
35
36 # ***** Scalar Example *****
37
38 beq rN, zero, done # exit early if n == 0
39
40 loop:
41 flw f2, 0(rA)
42 flw f3, 0(rB)
43 fadd.s f2, f2, f3
44 fsw f2, 0(rC)
45 addi rN, rN, -1
46 addi rA, rA, 4
47 addi rB, rB, 4
48 addi rC, rC, 4
49 bne rN, zero, loop
50 done:
51 ret
52
53
54 #--------------------------------------------------------------------------
55 # void vt_vvadd_asm( int n, float a[], float b[], float c[] )
56 #--------------------------------------------------------------------------
57
58
59 # ***** Vector-Thread Example *****
60
61 .globl vt_vvadd_asm
62 .type vt_vvadd_asm,@function
63
64 vt_vvadd_asm:
65
66 beq rN, zero, cpdone
67 la a5, vtcode
68
69 # First, configure the vector unit.
70 # rd (given vlen), desired vlen, num of x-regs, num of f-regs
71 # For vvadd, we do not need to use any x-registers, and only two
72 # floating point registers. By using fewer registers, hwacha can give us a longer vector length!
73 # But make sure to use registers starting from x0, f0!
74 # WARNING: there is a BUG if you tell it you want 0 registers of any type!
75 # So here I'm asking for 1 x-register, even though I don't use any of them.
76 vvcfgivl rVLen, rN, 1, 2
77
78
79 stripmineloop:
80 vsetvl rVLen, rN # set the vector length
81 # rN is the desired (application) vector length
82 # rVLen is what vector length we were given
83
84 vflw vf0, rA # vector loads
85 vflw vf1, rB
86 vf 0(a5) # jump to vector-fetch code
87 vfsw vf0, rC # vector store
88
89 sub rN, rN, rVLen # book keeping
90 slli a6, rVLen, 2 # turn num_elements into num_bytes
91 add rA, rA, a6
92 add rB, rB, a6
93 add rC, rC, a6
94 bne rN, zero, stripmineloop
95
96 cpdone:
97 fence.v.l # make stores visible to the control processor
98 ret
99
100 vtcode:
101 fadd.s f0, f0, f1
102 stop
103
104 # The C code uses a jalr instruction to call this function
105 # so we can use a jr to return back to where the function
106 # was called. Also known as "ret", for "return".
107
108 ret