23 #elif defined(FFT_FLOATING)
29 #define DATA_STORE fsh
31 #define FFT_MUL fmul.h
32 #define FFT_ADD fadd.h
33 #define FFT_SUB fsub.h
34 #elif defined(FP_SINGLE)
39 #define DATA_STORE fsw
41 #define FFT_MUL fmul.s
42 #define FFT_ADD fadd.s
43 #define FFT_SUB fsub.s
44 #elif defined(FP_DOUBLE)
49 #define DATA_STORE fsd
51 #define FFT_MUL fmul.d
52 #define FFT_ADD fadd.d
53 #define FFT_SUB fsub.d
63 #error FFT_FIXED or FFT_FLOATING not defined
76 # x1: lane start (utidx=0 actually has this pos due to stripmining)
77 # x2: bit mask to select FFT block from op idx
78 # x3: bit mask to select operand in FFT block from op idx
79 # x4: necessary shift to adjust TF appropriately ( REMOVED )
80 # x5: half the current FFT size (add to get the second op)
82 # x1: Has the first operand pos = (opid & i_x2) << 1 + (opid & i_x3)
83 # x2: Has the second operand pos = o_x1 + i_x5
84 # x3: Has the twiddle factor pos = (opid & i_x3) << i_x4
86 add x6, x1, x6 # x6 <= opid
87 and x2, x2, x6 # x2 <= opid & i_x2
88 and x3, x3, x6 # x3 <= opid & i_x3
89 slli x2, x2, 1 # x2 <= (opid & i_x2) << 1
91 add x1, x2, x3 # x1 is now the proper result
92 add x2, x1, x5 # x2 is now the proper result
100 # x1: Has the first operand pos (reused)
101 # x2: Has the second operand pos (reused)
102 # x3: Has the twiddle factor pos (reused)
103 # x4: Has the tf real ptr
104 # x5: Has the tf imag ptr
105 # x6: Has the workspace real ptr
106 # x7: Has the workspace imag ptr
107 # x8: Has the fixed point shift ( REMOVED )
109 # x1: Has the first operand offset = i_x1 << 3
110 # x2: Has the second operand offset = i_x2 << 3
111 # x3: Has the scale factor real
112 # x4: Has the scale factor imag
113 # Convert positions into actual memory offsets from table start
114 slli x1, x1, PTR_SHIFT # x1 <= i_x1 << 3 (proper result)
115 slli x2, x2, PTR_SHIFT # x2 <= i_x2 << 3 (proper result)
116 slli x3, x3, PTR_SHIFT # x3 <= i_x3 << 3 (tf offset)
118 # Compute memory locations
119 add x4, x4, x3 # x4 <= load address for tf real
120 add x5, x5, x3 # x5 <= load address for tf imag
121 add x6, x6, x2 # x6 <= load address for op2 real
122 add x7, x7, x2 # x7 <= load address for op2 imag
124 # Actually read memory
125 DATA_LOAD REG1, 0(x4) # tf real (a)
126 DATA_LOAD REG2, 0(x5) # tf imag (bi)
127 DATA_LOAD REG3, 0(x6) # op2 real (c)
128 DATA_LOAD REG4, 0(x7) # op2 imag (di)
130 # Do the math using 3 multiplies
131 FFT_ADD REG0, REG1, REG2 # REG0 <= a + b
132 FFT_SUB REG2, REG2, REG1 # REG2 <= b - a
133 FFT_MUL REG0, REG0, REG4 # REG0 <= (a+b)d
135 sra REG0, REG0, REG5 # DO NOT SHIFT FOR FLOATING
137 FFT_MUL REG2, REG2, REG3 # REG2 <= (b-a)c
139 sra REG2, REG2, REG5 # DO NOT SHIFT FOR FLOATING
141 FFT_ADD REG3, REG3, REG4 # REG3 <= c + d
142 FFT_MUL REG4, REG1, REG3 # REG4 <= a(c+d)
144 sra REG4, REG4, REG5 # DO NOT SHIFT FOR FLOATING
147 # Prepare final result
148 FFT_SUB REG0, REG4, REG0 # REG0 <= a(c+d) - (a+b)d (scale real)
149 FFT_ADD REG1, REG4, REG2 # REG1 <= a(c+d) + (b-a)c (scale imag)
153 # Four multiply version
154 # Do the multiplications (a+bi)(c+di) needs ac ad bc bd
155 mul x3, x4, x6 # x3 <= ac
156 mul x4, x4, x7 # x4 <= adi
157 mul x6, x5, x6 # x6 <= bc
158 mul x5, x5, x7 # x5 <= bdi
159 sra x3, x3, x8 # These 4 shifts make sure the fixed pt properly aligned
164 # Do the additions (ac - bd) and (bc + ad)
165 sub x3, x3, x5 # x3 <= ac - bd (proper result)
166 add x4, x4, x6 # x4 <= bc + ad (proper result)
172 # x1: Has the first operand offset (reused)
173 # x2: Has the second operand offset (reused)
174 # x3: Has the scale factor real (reused)
175 # x4: Has the scale factor imag (reused)
176 # x5: Has the workspace real ptr
177 # x6: Has the workspace imag ptr
179 # x1: Has the first operand offset (carry)
180 # x2: Has the second operand offset (carry)
181 # x5: Has the first result real
182 # x6: Has the first result imag
183 # x7: Has the second result real
184 # x8: Has the second result imag
185 # Compute first operand memory locations
186 add x5, x5, x1 # x5 <= load address for op1 real
187 add x6, x6, x1 # x6 <= load address for op1 imag
189 #actually read memory
190 DATA_LOAD REG2, 0(x5) # op1 real
191 DATA_LOAD REG3, 0(x6) # op1 imag
193 # Do the add/subs (res1=op1+scale), (res2=op1-scale)
194 FFT_SUB REG4, REG2, REG0 # res2 real
195 FFT_SUB REG5, REG3, REG1 # res2 imag
196 FFT_ADD REG2, REG2, REG0 # res1 real
197 FFT_ADD REG3, REG3, REG1 # res1 imag
204 # x1: Has the first operand offset (reused)
205 # x2: Has the second operand offset (reused)
206 # x3: Has the workspace real ptr
207 # x4: Has the workspace imag ptr
208 # x5: Has the first result real (reused)
209 # x6: Has the first result imag (reused)
210 # x7: Has the second result real (reused)
211 # x8: Has the second result imag (reused)
213 # x2: Has the second operand offset (carry)
214 # x7: Has the second result real (carry)
215 # x8: Has the second result imag (carry)
216 # Compute first result memory locations
220 # actually write memory
221 DATA_STORE REG2, 0(x3)
222 DATA_STORE REG3, 0(x4)
229 # x2: Has the second operand offset
230 # x3: Has the workspace real ptr
231 # x4: Has the workspace imag ptr
232 # x7: Has the second result real
233 # x8: Has the second result imag
235 # Compute second result memory locations
239 # actually write memory
240 DATA_STORE REG4, 0(x3)
241 DATA_STORE REG5, 0(x4)