1 # See LICENSE for license details.
3 #*****************************************************************************
4 # cmplxmult function (assembly version)
5 #-----------------------------------------------------------------------------
8 #--------------------------------------------------------------------------
10 #--------------------------------------------------------------------------
12 # Here are some defines that make writing assembly code easier.
14 # I'm using the knowledge that rN will be placed in register a0, rA will be
15 # placed into register a1, etc., based on the calling convention for functions.
30 # WARNING: do not write to the s0,...,s9 registers without first saving them to
33 #--------------------------------------------------------------------------
34 # void scalar_cmplxmult_asm( int n, float a[], float b[], float c[] )
35 #--------------------------------------------------------------------------
39 .globl scalar_cmplxmult_asm
40 .type scalar_cmplxmult_asm,@function
44 # ***** Scalar Example *****
46 blez rN, done # exit early if n < 0
49 # The following code is a naive implementation...
50 # Re-ordering instructions may increase performance, also,
51 # RISC-V supports instrucitons such as the "fmuladd" and "fmulsub".
52 # fmsub.s fa2,fa4,fa3,ft1
53 # Finally, unrolling and other fun transformations can also provide
77 #--------------------------------------------------------------------------
78 # void vt_cmplxmult_asm( int n, float a[], float b[], float c[] )
79 #--------------------------------------------------------------------------
82 # ***** Vector-Thread Example *****
84 .globl vt_cmplxmult_asm
85 .type vt_cmplxmult_asm,@function
87 # HINT: because you are dealing with an array of structures, a regular,
88 # vanilla vector-load/vector-store won't work here!
96 vvcfgivl rVlen, rN, 1, 7
100 # ADD YOUR CODE HERE....
101 vsetvl rVlen, rN # set the vector length
102 # rN is the desired (application) vector length
103 # rVLen is what vector length we were given
105 vflstw vf2, rA, rStride # real number vector load of A
107 vflstw vf4, rB, rStride # real number vector load of B
109 vflstw vf3, rAI, rStride #imaginary number vector load of A
110 vflstw vf5, rBI, rStride #imaginary vector number load of B
112 vf 0(a4) # jump to vector-fetch code
114 vfsstw vf0, rC, rStride # real number vector store C
116 vfsstw vf1, rCI, rStride # imaginary
119 sub rN, rN, rVlen # book keeping
123 bne rN, zero, stripmineloop
124 # Step 0: set the vector length
125 # Step 1: perform your vector loads
126 # Step 2: jump to the vector-fetch code to perform the calculation
127 # Step 3: perform the vector store
128 # Step 4: book keeping, update the pointers, etc.
135 # ADD YOUR VECTOR-ELEMENT CODE HERE ...
137 fmsub.s f0, f3, f5, f0
140 fmadd.s f1, f3, f4, f1
143 # The C code uses a jalr instruction to call this function
144 # so we can use a jr to return back to where the function
145 # was called. Also known as "ret", for "return".