Add a top-level make clean target.
[riscv-tests.git] / benchmarks / vec-vvadd / vec_vvadd_asm.S
1 # See LICENSE for license details.
2
3 #*****************************************************************************
4 # vvadd function (assembly version)
5 #-----------------------------------------------------------------------------
6
7
8 #--------------------------------------------------------------------------
9 # Headers and Defines
10 #--------------------------------------------------------------------------
11
12 # Here are some defines that make writing assembly code easier.
13
14 # I'm using the knowledge that rN will be placed in register a0, rA will be
15 # placed into register a1, etc., based on the calling convention for functions.
16
17 #define rN a0
18 #define rA a1
19 #define rB a2
20 #define rC a3
21
22 #define rVLen a4
23
24 # WARNING: do not write to the s0,...,s9 registers without first saving them to
25 # the stack.
26
27 #--------------------------------------------------------------------------
28 # void scalar_vvadd_asm( int n, float a[], float b[], float c[] )
29 #--------------------------------------------------------------------------
30
31 .text
32 .align 2
33 .globl scalar_vvadd_asm
34 .type scalar_vvadd_asm,@function
35
36 scalar_vvadd_asm:
37
38 # ***** Scalar Example *****
39
40 beq rN, zero, done # exit early if n == 0
41
42 loop:
43 flw f2, 0(rA)
44 flw f3, 0(rB)
45 fadd.s f2, f2, f3
46 fsw f2, 0(rC)
47 addi rN, rN, -1
48 addi rA, rA, 4
49 addi rB, rB, 4
50 addi rC, rC, 4
51 bne rN, zero, loop
52 done:
53 ret
54
55
56 #--------------------------------------------------------------------------
57 # void vt_vvadd_asm( int n, float a[], float b[], float c[] )
58 #--------------------------------------------------------------------------
59
60
61 # ***** Vector-Thread Example *****
62
63 .globl vt_vvadd_asm
64 .type vt_vvadd_asm,@function
65
66 vt_vvadd_asm:
67
68 beq rN, zero, cpdone
69 la a5, vtcode
70
71 # First, configure the vector unit.
72 # rd (given vlen), desired vlen, num of x-regs, num of f-regs
73 # For vvadd, we do not need to use any x-registers, and only two
74 # floating point registers. By using fewer registers, hwacha can give us a longer vector length!
75 # But make sure to use registers starting from x0, f0!
76 # WARNING: there is a BUG if you tell it you want 0 registers of any type!
77 # So here I'm asking for 1 x-register, even though I don't use any of them.
78 vvcfgivl rVLen, rN, 1, 2
79
80
81 stripmineloop:
82 vsetvl rVLen, rN # set the vector length
83 # rN is the desired (application) vector length
84 # rVLen is what vector length we were given
85
86 vflw vf0, rA # vector loads
87 vflw vf1, rB
88 vf 0(a5) # jump to vector-fetch code
89 vfsw vf0, rC # vector store
90
91 sub rN, rN, rVLen # book keeping
92 slli a6, rVLen, 2 # turn num_elements into num_bytes
93 add rA, rA, a6
94 add rB, rB, a6
95 add rC, rC, a6
96 bne rN, zero, stripmineloop
97
98 cpdone:
99 fence.v.l # make stores visible to the control processor
100 ret
101
102 vtcode:
103 fadd.s f0, f0, f1
104 stop
105
106 # The C code uses a jalr instruction to call this function
107 # so we can use a jr to return back to where the function
108 # was called. Also known as "ret", for "return".
109
110 ret