1 <!-- This defines Draft SVP64 instructions to augment PowerISA Version 3.0 -->
2 <!-- These are not described in book 1 -->
13 step <- SVSTATE_NEXT(SVi, vf)
16 Special Registers Altered:
24 * setvl RT,RA,SVi,vf,vs,ms
25 * setvl. RT,RA,SVi,vf,vs,ms
29 if (vf & (¬vs) & ¬(ms)) = 1 then
30 step <- SVSTATE_NEXT(SVi, 0b0)
31 if _RT != 0b00000 then
32 GPR(_RT) <- [0]*57 || step
50 if _RT != 0b00000 then
51 GPR(_RT) <- [0]*57 || VL
52 # set requested Vertical-First mode, clear persist
56 Special Registers Altered:
64 * svremap SVme,mi0,mi1,mi2,mo0,mo1,pst
68 # registers RA RB RC RT EA/FRS SVSHAPE0-3 indices
74 # enable bit for RA RB RC RT EA/FRS
75 SVSTATE[42:46] <- SVme
76 # persistence bit (applies to more than one instruction)
79 Special Registers Altered:
87 * svshape SVxd,SVyd,SVzd,SVRM,vf
91 # for convenience, VL to be calculated and stored in SVSTATE
93 itercount[0:6] <- [0] * 7
94 SVSTATE[0:31] <- [0] * 32
95 # only overwrite REMAP if "persistence" is zero
96 if (SVSTATE[62] = 0b0) then
97 SVSTATE[32:33] <- 0b00
98 SVSTATE[34:35] <- 0b00
99 SVSTATE[36:37] <- 0b00
100 SVSTATE[38:39] <- 0b00
101 SVSTATE[40:41] <- 0b00
102 SVSTATE[42:46] <- 0b00000
105 # clear out all SVSHAPEs
106 SVSHAPE0[0:31] <- [0] * 32
107 SVSHAPE1[0:31] <- [0] * 32
108 SVSHAPE2[0:31] <- [0] * 32
109 SVSHAPE3[0:31] <- [0] * 32
110 # set schedule up for multiply
111 if (SVRM = 0b0000) then
112 # VL in Matrix Multiply is xd*yd*zd
113 n <- (0b00 || SVxd) * (0b00 || SVyd) * (0b00 || SVzd)
114 vlen[0:6] <- n[14:20]
115 # set up template in SVSHAPE0, then copy to 1-3
116 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
117 SVSHAPE0[6:11] <- (0b0 || SVyd) # ydim
118 SVSHAPE0[12:17] <- (0b0 || SVzd) # zdim
119 SVSHAPE0[28:29] <- 0b11 # skip z
121 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
122 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
123 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
125 SVSHAPE1[18:20] <- 0b001 # permute x,z,y
126 SVSHAPE1[28:29] <- 0b01 # skip z
128 SVSHAPE2[18:20] <- 0b001 # permute x,z,y
129 SVSHAPE2[28:29] <- 0b11 # skip y
130 # set schedule up for FFT butterfly
131 if (SVRM = 0b0001) then
132 # calculate O(N log2 N)
135 if SVxd[4-n] = 0 then
138 n <- ((0b0 || SVxd) + 1) * n
140 # set up template in SVSHAPE0, then copy to 1-3
142 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
143 SVSHAPE0[30:31] <- 0b01 # Butterfly mode
145 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
146 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
148 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
150 SVSHAPE2[28:29] <- 0b10 # k schedule
151 # set schedule up for DCT Inner butterfly
152 # SVRM Mode 2 is for pre-calculated coefficients,
153 # SVRM Mode 4 is for on-the-fly (Vertical-First Mode)
154 if (SVRM = 0b0010) | (SVRM = 0b0100) then
155 # calculate O(N log2 N)
158 if SVxd[4-n] = 0 then
161 n <- ((0b0 || SVxd) + 1) * n
163 # set up template in SVSHAPE0, then copy to 1-3
165 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
166 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
167 if (SVRM = 0b0100) then
168 SVSHAPE0[6:11] <- 0b000011 # DCT Inner Butterfly mode 4
170 SVSHAPE0[6:11] <- 0b000001 # DCT Inner Butterfly mode 2
171 SVSHAPE0[18:20] <- 0b001 # DCT Inner Butterfly sub-mode
172 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
174 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
175 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
176 if (SVRM != 0b0100) then
177 SVSHAPE3[0:31] <- SVSHAPE0[0:31]
179 SVSHAPE0[28:29] <- 0b01 # j+halfstep schedule
180 # for cos coefficient
181 SVSHAPE2[28:29] <- 0b10 # ci (k for mode 4) schedule
182 if (SVRM != 0b0100) then
183 SVSHAPE3[28:29] <- 0b11 # size schedule
184 # set schedule up for DCT Outer butterfly
185 if (SVRM = 0b0011) then
186 # calculate O(N log2 N) number of outer butterfly overlapping adds
190 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
191 itercount[0:6] <- (0b0 || itercount[0:5])
193 if SVxd[4-n] = 0 then
196 count <- (itercount - 0b0000001) * size
197 vlen[0:6] <- vlen + count[7:13]
198 size[0:6] <- (size[1:6] || 0b0)
199 itercount[0:6] <- (0b0 || itercount[0:5])
200 # set up template in SVSHAPE0, then copy to 1-3
202 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
203 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
204 SVSHAPE0[6:11] <- 0b000010 # DCT Butterfly mode
205 SVSHAPE0[18:20] <- 0b100 # DCT Outer Butterfly sub-mode
207 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
208 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
210 SVSHAPE1[28:29] <- 0b01 # j+halfstep schedule
211 # set schedule up for DCT COS table generation
212 if (SVRM = 0b0101) then
213 # calculate O(N log2 N)
215 itercount[0:6] <- (0b00 || SVxd) + 0b0000001
216 itercount[0:6] <- (0b0 || itercount[0:5])
219 if SVxd[4-n] = 0 then
222 vlen[0:6] <- vlen + itercount
223 itercount[0:6] <- (0b0 || itercount[0:5])
224 # set up template in SVSHAPE0, then copy to 1-3
226 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
227 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
228 SVSHAPE0[6:11] <- 0b000100 # DCT Inner Butterfly COS-gen mode
229 SVSHAPE0[21:23] <- 0b001 # "inverse" on outer loop
231 SVSHAPE1[0:31] <- SVSHAPE0[0:31]
232 SVSHAPE2[0:31] <- SVSHAPE0[0:31]
233 # for cos coefficient
234 SVSHAPE1[28:29] <- 0b10 # ci schedule
235 SVSHAPE2[28:29] <- 0b11 # size schedule
236 # set schedule up for DCT inverse of half-swapped ordering
237 if (SVRM = 0b0110) then
238 vlen[0:6] <- (0b00 || SVxd) + 0b0000001
239 # set up template in SVSHAPE0
240 SVSHAPE0[0:5] <- (0b0 || SVxd) # xdim
241 SVSHAPE0[30:31] <- 0b01 # DCT/FFT mode
242 SVSHAPE0[6:11] <- 0b000101 # DCT "half-swap" mode
243 # set VL, MVL and Vertical-First
245 SVSTATE[7:13] <- vlen
248 Special Registers Altered: