1 # See LICENSE for license details.
25 #elif defined(FFT_FLOATING)
31 #define DATA_STORE fsh
33 #define FFT_MUL fmul.h
34 #define FFT_ADD fadd.h
35 #define FFT_SUB fsub.h
36 #elif defined(FP_SINGLE)
41 #define DATA_STORE fsw
43 #define FFT_MUL fmul.s
44 #define FFT_ADD fadd.s
45 #define FFT_SUB fsub.s
46 #elif defined(FP_DOUBLE)
51 #define DATA_STORE fsd
53 #define FFT_MUL fmul.d
54 #define FFT_ADD fadd.d
55 #define FFT_SUB fsub.d
65 #error FFT_FIXED or FFT_FLOATING not defined
78 # x1: lane start (utidx=0 actually has this pos due to stripmining)
79 # x2: bit mask to select FFT block from op idx
80 # x3: bit mask to select operand in FFT block from op idx
81 # x4: necessary shift to adjust TF appropriately ( REMOVED )
82 # x5: half the current FFT size (add to get the second op)
84 # x1: Has the first operand pos = (opid & i_x2) << 1 + (opid & i_x3)
85 # x2: Has the second operand pos = o_x1 + i_x5
86 # x3: Has the twiddle factor pos = (opid & i_x3) << i_x4
88 add x6, x1, x6 # x6 <= opid
89 and x2, x2, x6 # x2 <= opid & i_x2
90 and x3, x3, x6 # x3 <= opid & i_x3
91 slli x2, x2, 1 # x2 <= (opid & i_x2) << 1
93 add x1, x2, x3 # x1 is now the proper result
94 add x2, x1, x5 # x2 is now the proper result
102 # x1: Has the first operand pos (reused)
103 # x2: Has the second operand pos (reused)
104 # x3: Has the twiddle factor pos (reused)
105 # x4: Has the tf real ptr
106 # x5: Has the tf imag ptr
107 # x6: Has the workspace real ptr
108 # x7: Has the workspace imag ptr
109 # x8: Has the fixed point shift ( REMOVED )
111 # x1: Has the first operand offset = i_x1 << 3
112 # x2: Has the second operand offset = i_x2 << 3
113 # x3: Has the scale factor real
114 # x4: Has the scale factor imag
115 # Convert positions into actual memory offsets from table start
116 slli x1, x1, PTR_SHIFT # x1 <= i_x1 << 3 (proper result)
117 slli x2, x2, PTR_SHIFT # x2 <= i_x2 << 3 (proper result)
118 slli x3, x3, PTR_SHIFT # x3 <= i_x3 << 3 (tf offset)
120 # Compute memory locations
121 add x4, x4, x3 # x4 <= load address for tf real
122 add x5, x5, x3 # x5 <= load address for tf imag
123 add x6, x6, x2 # x6 <= load address for op2 real
124 add x7, x7, x2 # x7 <= load address for op2 imag
126 # Actually read memory
127 DATA_LOAD REG1, 0(x4) # tf real (a)
128 DATA_LOAD REG2, 0(x5) # tf imag (bi)
129 DATA_LOAD REG3, 0(x6) # op2 real (c)
130 DATA_LOAD REG4, 0(x7) # op2 imag (di)
132 # Do the math using 3 multiplies
133 FFT_ADD REG0, REG1, REG2 # REG0 <= a + b
134 FFT_SUB REG2, REG2, REG1 # REG2 <= b - a
135 FFT_MUL REG0, REG0, REG4 # REG0 <= (a+b)d
137 sra REG0, REG0, REG5 # DO NOT SHIFT FOR FLOATING
139 FFT_MUL REG2, REG2, REG3 # REG2 <= (b-a)c
141 sra REG2, REG2, REG5 # DO NOT SHIFT FOR FLOATING
143 FFT_ADD REG3, REG3, REG4 # REG3 <= c + d
144 FFT_MUL REG4, REG1, REG3 # REG4 <= a(c+d)
146 sra REG4, REG4, REG5 # DO NOT SHIFT FOR FLOATING
149 # Prepare final result
150 FFT_SUB REG0, REG4, REG0 # REG0 <= a(c+d) - (a+b)d (scale real)
151 FFT_ADD REG1, REG4, REG2 # REG1 <= a(c+d) + (b-a)c (scale imag)
155 # Four multiply version
156 # Do the multiplications (a+bi)(c+di) needs ac ad bc bd
157 mul x3, x4, x6 # x3 <= ac
158 mul x4, x4, x7 # x4 <= adi
159 mul x6, x5, x6 # x6 <= bc
160 mul x5, x5, x7 # x5 <= bdi
161 sra x3, x3, x8 # These 4 shifts make sure the fixed pt properly aligned
166 # Do the additions (ac - bd) and (bc + ad)
167 sub x3, x3, x5 # x3 <= ac - bd (proper result)
168 add x4, x4, x6 # x4 <= bc + ad (proper result)
174 # x1: Has the first operand offset (reused)
175 # x2: Has the second operand offset (reused)
176 # x3: Has the scale factor real (reused)
177 # x4: Has the scale factor imag (reused)
178 # x5: Has the workspace real ptr
179 # x6: Has the workspace imag ptr
181 # x1: Has the first operand offset (carry)
182 # x2: Has the second operand offset (carry)
183 # x5: Has the first result real
184 # x6: Has the first result imag
185 # x7: Has the second result real
186 # x8: Has the second result imag
187 # Compute first operand memory locations
188 add x5, x5, x1 # x5 <= load address for op1 real
189 add x6, x6, x1 # x6 <= load address for op1 imag
191 #actually read memory
192 DATA_LOAD REG2, 0(x5) # op1 real
193 DATA_LOAD REG3, 0(x6) # op1 imag
195 # Do the add/subs (res1=op1+scale), (res2=op1-scale)
196 FFT_SUB REG4, REG2, REG0 # res2 real
197 FFT_SUB REG5, REG3, REG1 # res2 imag
198 FFT_ADD REG2, REG2, REG0 # res1 real
199 FFT_ADD REG3, REG3, REG1 # res1 imag
206 # x1: Has the first operand offset (reused)
207 # x2: Has the second operand offset (reused)
208 # x3: Has the workspace real ptr
209 # x4: Has the workspace imag ptr
210 # x5: Has the first result real (reused)
211 # x6: Has the first result imag (reused)
212 # x7: Has the second result real (reused)
213 # x8: Has the second result imag (reused)
215 # x2: Has the second operand offset (carry)
216 # x7: Has the second result real (carry)
217 # x8: Has the second result imag (carry)
218 # Compute first result memory locations
222 # actually write memory
223 DATA_STORE REG2, 0(x3)
224 DATA_STORE REG3, 0(x4)
231 # x2: Has the second operand offset
232 # x3: Has the workspace real ptr
233 # x4: Has the workspace imag ptr
234 # x7: Has the second result real
235 # x8: Has the second result imag
237 # Compute second result memory locations
241 # actually write memory
242 DATA_STORE REG4, 0(x3)
243 DATA_STORE REG5, 0(x4)