02aa1b86aa4c935c2fcd4835cb6a4acc6765ec4f
[mesa.git] / src / broadcom / qpu / qpu_pack.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 #ifndef QPU_MASK
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field) \
34 ({ \
35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36 assert((fieldval & ~ field ## _MASK) == 0); \
37 fieldval & field ## _MASK; \
38 })
39
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
41
42 #define QPU_UPDATE_FIELD(inst, value, field) \
43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44 #endif /* QPU_MASK */
45
46 #define VC5_QPU_OP_MUL_SHIFT 58
47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
48
49 #define VC5_QPU_SIG_SHIFT 53
50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
51 # define VC5_QPU_SIG_THRSW_BIT 0x1
52 # define VC5_QPU_SIG_LDUNIF_BIT 0x2
53 # define VC5_QPU_SIG_LDTMU_BIT 0x4
54 # define VC5_QPU_SIG_LDVARY_BIT 0x8
55
56 #define VC5_QPU_COND_SHIFT 46
57 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
58
59 #define VC5_QPU_COND_IFA 0
60 #define VC5_QPU_COND_IFB 1
61 #define VC5_QPU_COND_IFNA 2
62 #define VC5_QPU_COND_IFNB 3
63
64 #define VC5_QPU_MM QPU_MASK(45, 45)
65 #define VC5_QPU_MA QPU_MASK(44, 44)
66
67 #define V3D_QPU_WADDR_M_SHIFT 38
68 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
69
70 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
71 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
72
73 #define V3D_QPU_WADDR_A_SHIFT 32
74 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
75
76 #define VC5_QPU_BRANCH_COND_SHIFT 32
77 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
78
79 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
80 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
81
82 #define VC5_QPU_OP_ADD_SHIFT 24
83 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
84
85 #define VC5_QPU_MUL_B_SHIFT 21
86 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
87
88 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
89 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
90
91 #define VC5_QPU_MUL_A_SHIFT 18
92 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
93
94 #define VC5_QPU_ADD_B_SHIFT 15
95 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
96
97 #define VC5_QPU_BRANCH_BDU_SHIFT 15
98 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
99
100 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
101
102 #define VC5_QPU_ADD_A_SHIFT 12
103 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
104
105 #define VC5_QPU_BRANCH_BDI_SHIFT 12
106 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
107
108 #define VC5_QPU_RADDR_A_SHIFT 6
109 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
110
111 #define VC5_QPU_RADDR_B_SHIFT 0
112 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
113
114 #define THRSW .thrsw = true
115 #define LDUNIF .ldunif = true
116 #define LDTMU .ldtmu = true
117 #define LDVARY .ldvary = true
118 #define LDVPM .ldvpm = true
119 #define SMIMM .small_imm = true
120 #define LDTLB .ldtlb = true
121 #define LDTLBU .ldtlbu = true
122 #define UCB .ucb = true
123 #define ROT .rotate = true
124 #define WRTMUC .wrtmuc = true
125
126 static const struct v3d_qpu_sig v33_sig_map[] = {
127 /* MISC R3 R4 R5 */
128 [0] = { },
129 [1] = { THRSW, },
130 [2] = { LDUNIF },
131 [3] = { THRSW, LDUNIF },
132 [4] = { LDTMU, },
133 [5] = { THRSW, LDTMU, },
134 [6] = { LDTMU, LDUNIF },
135 [7] = { THRSW, LDTMU, LDUNIF },
136 [8] = { LDVARY, },
137 [9] = { THRSW, LDVARY, },
138 [10] = { LDVARY, LDUNIF },
139 [11] = { THRSW, LDVARY, LDUNIF },
140 [12] = { LDVARY, LDTMU, },
141 [13] = { THRSW, LDVARY, LDTMU, },
142 [14] = { SMIMM, LDVARY, },
143 [15] = { SMIMM, },
144 [16] = { LDTLB, },
145 [17] = { LDTLBU, },
146 /* 18-21 reserved */
147 [22] = { UCB, },
148 [23] = { ROT, },
149 [24] = { LDVPM, },
150 [25] = { THRSW, LDVPM, },
151 [26] = { LDVPM, LDUNIF },
152 [27] = { THRSW, LDVPM, LDUNIF },
153 [28] = { LDVPM, LDTMU, },
154 [29] = { THRSW, LDVPM, LDTMU, },
155 [30] = { SMIMM, LDVPM, },
156 [31] = { SMIMM, },
157 };
158
159 bool
160 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
161 uint32_t packed_sig,
162 struct v3d_qpu_sig *sig)
163 {
164 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
165 return false;
166
167 *sig = v33_sig_map[packed_sig];
168
169 /* Signals with zeroed unpacked contents after element 0 are reserved. */
170 return (packed_sig == 0 ||
171 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
172 }
173
174 bool
175 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
176 const struct v3d_qpu_sig *sig,
177 uint32_t *packed_sig)
178 {
179 static const struct v3d_qpu_sig *map;
180
181 map = v33_sig_map;
182
183 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
184 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
185 *packed_sig = i;
186 return true;
187 }
188 }
189
190 return false;
191 }
192
193 bool
194 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
195 uint32_t packed_cond,
196 struct v3d_qpu_flags *cond)
197 {
198 static const enum v3d_qpu_cond cond_map[4] = {
199 [0] = V3D_QPU_COND_IFA,
200 [1] = V3D_QPU_COND_IFB,
201 [2] = V3D_QPU_COND_IFNA,
202 [3] = V3D_QPU_COND_IFNB,
203 };
204
205 cond->ac = V3D_QPU_COND_NONE;
206 cond->mc = V3D_QPU_COND_NONE;
207 cond->apf = V3D_QPU_PF_NONE;
208 cond->mpf = V3D_QPU_PF_NONE;
209 cond->auf = V3D_QPU_UF_NONE;
210 cond->muf = V3D_QPU_UF_NONE;
211
212 if (packed_cond == 0) {
213 return true;
214 } else if (packed_cond >> 2 == 0) {
215 cond->apf = packed_cond & 0x3;
216 } else if (packed_cond >> 4 == 0) {
217 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
218 } else if (packed_cond == 0x10) {
219 return false;
220 } else if (packed_cond >> 2 == 0x4) {
221 cond->mpf = packed_cond & 0x3;
222 } else if (packed_cond >> 4 == 0x1) {
223 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
224 } else if (packed_cond >> 4 == 0x2) {
225 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
226 cond->mpf = packed_cond & 0x3;
227 } else if (packed_cond >> 4 == 0x3) {
228 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
229 cond->apf = packed_cond & 0x3;
230 } else if (packed_cond >> 6) {
231 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
232 if (((packed_cond >> 2) & 0x3) == 0) {
233 cond->ac = cond_map[packed_cond & 0x3];
234 } else {
235 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
236 }
237 }
238
239 return true;
240 }
241
242 bool
243 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
244 const struct v3d_qpu_flags *cond,
245 uint32_t *packed_cond)
246 {
247 #define AC (1 << 0)
248 #define MC (1 << 1)
249 #define APF (1 << 2)
250 #define MPF (1 << 3)
251 #define AUF (1 << 4)
252 #define MUF (1 << 5)
253 static const struct {
254 uint8_t flags_present;
255 uint8_t bits;
256 } flags_table[] = {
257 { 0, 0 },
258 { APF, 0 },
259 { AUF, 0 },
260 { MPF, (1 << 4) },
261 { MUF, (1 << 4) },
262 { AC, (1 << 5) },
263 { AC | MPF, (1 << 5) },
264 { MC, (1 << 5) | (1 << 4) },
265 { MC | APF, (1 << 5) | (1 << 4) },
266 { MC | AC, (1 << 6) },
267 { MC | AUF, (1 << 6) },
268 };
269
270 uint8_t flags_present = 0;
271 if (cond->ac != V3D_QPU_COND_NONE)
272 flags_present |= AC;
273 if (cond->mc != V3D_QPU_COND_NONE)
274 flags_present |= MC;
275 if (cond->apf != V3D_QPU_PF_NONE)
276 flags_present |= APF;
277 if (cond->mpf != V3D_QPU_PF_NONE)
278 flags_present |= MPF;
279 if (cond->auf != V3D_QPU_UF_NONE)
280 flags_present |= AUF;
281 if (cond->muf != V3D_QPU_UF_NONE)
282 flags_present |= MUF;
283
284 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
285 if (flags_table[i].flags_present != flags_present)
286 continue;
287
288 *packed_cond = flags_table[i].bits;
289
290 *packed_cond |= cond->apf;
291 *packed_cond |= cond->mpf;
292
293 if (flags_present & AUF)
294 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
295 if (flags_present & MUF)
296 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
297
298 if (flags_present & AC)
299 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
300
301 if (flags_present & MC) {
302 if (*packed_cond & (1 << 6))
303 *packed_cond |= (cond->mc -
304 V3D_QPU_COND_IFA) << 4;
305 else
306 *packed_cond |= (cond->mc -
307 V3D_QPU_COND_IFA) << 2;
308 }
309
310 return true;
311 }
312
313 return false;
314 }
315
316 /* Make a mapping of the table of opcodes in the spec. The opcode is
317 * determined by a combination of the opcode field, and in the case of 0 or
318 * 1-arg opcodes, the mux_b field as well.
319 */
320 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
321 #define ANYMUX MUX_MASK(0, 7)
322
323 struct opcode_desc {
324 uint8_t opcode_first;
325 uint8_t opcode_last;
326 uint8_t mux_b_mask;
327 uint8_t mux_a_mask;
328 uint8_t op;
329 /* 0 if it's the same across V3D versions, or a specific V3D version. */
330 uint8_t ver;
331 };
332
333 static const struct opcode_desc add_ops[] = {
334 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
335 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
336 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
337 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
338 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
339 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
340 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
341 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
342 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
343 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
344 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
345 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
346 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
347 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
348 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
349 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
350 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
351 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
352 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
353 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
354 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
355
356 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
357 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
358 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
359
360 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
361 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
362 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
363 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
364 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
365 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
366 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
367 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
368 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
369 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
370 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
371 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
372 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
373 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
374 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
375 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
376 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
377
378 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
379 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
380 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
381 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
382
383 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
384 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
385 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT },
386 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
387 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
388
389 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
390
391 /* FIXME: MORE COMPLICATED */
392 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
393
394 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
395 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
396
397 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
398 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
399 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
400 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
401 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
402 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
403 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
404 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
405
406 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
407 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
408
409 /* The stvpms are distinguished by the waddr field. */
410 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
411 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
412 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
413
414 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
415 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
416 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
417 };
418
419 static const struct opcode_desc mul_ops[] = {
420 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
421 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
422 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
423 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
424 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
425 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
426 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
427 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
428 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
429 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
430 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
431 };
432
433 static const struct opcode_desc *
434 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
435 uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
436 {
437 for (int i = 0; i < num_opcodes; i++) {
438 const struct opcode_desc *op_desc = &opcodes[i];
439
440 if (opcode < op_desc->opcode_first ||
441 opcode > op_desc->opcode_last)
442 continue;
443
444 if (!(op_desc->mux_b_mask & (1 << mux_b)))
445 continue;
446
447 if (!(op_desc->mux_a_mask & (1 << mux_a)))
448 continue;
449
450 return op_desc;
451 }
452
453 return NULL;
454 }
455
456 static bool
457 v3d_qpu_float32_unpack_unpack(uint32_t packed,
458 enum v3d_qpu_input_unpack *unpacked)
459 {
460 switch (packed) {
461 case 0:
462 *unpacked = V3D_QPU_UNPACK_ABS;
463 return true;
464 case 1:
465 *unpacked = V3D_QPU_UNPACK_NONE;
466 return true;
467 case 2:
468 *unpacked = V3D_QPU_UNPACK_L;
469 return true;
470 case 3:
471 *unpacked = V3D_QPU_UNPACK_H;
472 return true;
473 default:
474 return false;
475 }
476 }
477
478 static bool
479 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
480 uint32_t *packed)
481 {
482 switch (unpacked) {
483 case V3D_QPU_UNPACK_ABS:
484 *packed = 0;
485 return true;
486 case V3D_QPU_UNPACK_NONE:
487 *packed = 1;
488 return true;
489 case V3D_QPU_UNPACK_L:
490 *packed = 2;
491 return true;
492 case V3D_QPU_UNPACK_H:
493 *packed = 3;
494 return true;
495 default:
496 return false;
497 }
498 }
499
500 static bool
501 v3d_qpu_float16_unpack_unpack(uint32_t packed,
502 enum v3d_qpu_input_unpack *unpacked)
503 {
504 switch (packed) {
505 case 0:
506 *unpacked = V3D_QPU_UNPACK_NONE;
507 return true;
508 case 1:
509 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
510 return true;
511 case 2:
512 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
513 return true;
514 case 3:
515 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
516 return true;
517 case 4:
518 *unpacked = V3D_QPU_UNPACK_SWAP_16;
519 return true;
520 default:
521 return false;
522 }
523 }
524
525 static bool
526 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
527 uint32_t *packed)
528 {
529 switch (unpacked) {
530 case V3D_QPU_UNPACK_NONE:
531 *packed = 0;
532 return true;
533 case V3D_QPU_UNPACK_REPLICATE_32F_16:
534 *packed = 1;
535 return true;
536 case V3D_QPU_UNPACK_REPLICATE_L_16:
537 *packed = 2;
538 return true;
539 case V3D_QPU_UNPACK_REPLICATE_H_16:
540 *packed = 3;
541 return true;
542 case V3D_QPU_UNPACK_SWAP_16:
543 *packed = 4;
544 return true;
545 default:
546 return false;
547 }
548 }
549
550 static bool
551 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
552 uint32_t *packed)
553 {
554 switch (unpacked) {
555 case V3D_QPU_PACK_NONE:
556 *packed = 0;
557 return true;
558 case V3D_QPU_PACK_L:
559 *packed = 1;
560 return true;
561 case V3D_QPU_PACK_H:
562 *packed = 2;
563 return true;
564 default:
565 return false;
566 }
567 }
568
569 static bool
570 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
571 struct v3d_qpu_instr *instr)
572 {
573 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
574 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
575 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
576 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
577
578 uint32_t map_op = op;
579 /* Some big clusters of opcodes are replicated with unpack
580 * flags
581 */
582 if (map_op >= 249 && map_op <= 251)
583 map_op = (map_op - 249 + 245);
584 if (map_op >= 253 && map_op <= 255)
585 map_op = (map_op - 253 + 245);
586
587 const struct opcode_desc *desc =
588 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
589 map_op, mux_a, mux_b);
590 if (!desc)
591 return false;
592
593 instr->alu.add.op = desc->op;
594
595 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
596 * operands.
597 */
598 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
599 if (instr->alu.add.op == V3D_QPU_A_FMIN)
600 instr->alu.add.op = V3D_QPU_A_FMAX;
601 if (instr->alu.add.op == V3D_QPU_A_FADD)
602 instr->alu.add.op = V3D_QPU_A_FADDNF;
603 }
604
605 /* Some QPU ops require a bit more than just basic opcode and mux a/b
606 * comparisons to distinguish them.
607 */
608 switch (instr->alu.add.op) {
609 case V3D_QPU_A_STVPMV:
610 case V3D_QPU_A_STVPMD:
611 case V3D_QPU_A_STVPMP:
612 switch (waddr) {
613 case 0:
614 instr->alu.add.op = V3D_QPU_A_STVPMV;
615 break;
616 case 1:
617 instr->alu.add.op = V3D_QPU_A_STVPMD;
618 break;
619 case 2:
620 instr->alu.add.op = V3D_QPU_A_STVPMP;
621 break;
622 default:
623 return false;
624 }
625 break;
626 default:
627 break;
628 }
629
630 switch (instr->alu.add.op) {
631 case V3D_QPU_A_FADD:
632 case V3D_QPU_A_FADDNF:
633 case V3D_QPU_A_FSUB:
634 case V3D_QPU_A_FMIN:
635 case V3D_QPU_A_FMAX:
636 case V3D_QPU_A_FCMP:
637 instr->alu.add.output_pack = (op >> 4) & 0x3;
638
639 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
640 &instr->alu.add.a_unpack)) {
641 return false;
642 }
643
644 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
645 &instr->alu.add.b_unpack)) {
646 return false;
647 }
648 break;
649
650 case V3D_QPU_A_FFLOOR:
651 case V3D_QPU_A_FROUND:
652 case V3D_QPU_A_FTRUNC:
653 case V3D_QPU_A_FCEIL:
654 case V3D_QPU_A_FDX:
655 case V3D_QPU_A_FDY:
656 instr->alu.add.output_pack = mux_b & 0x3;
657
658 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
659 &instr->alu.add.a_unpack)) {
660 return false;
661 }
662 break;
663
664 case V3D_QPU_A_FTOIN:
665 case V3D_QPU_A_FTOIZ:
666 case V3D_QPU_A_FTOUZ:
667 case V3D_QPU_A_FTOC:
668 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
669
670 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
671 &instr->alu.add.a_unpack)) {
672 return false;
673 }
674 break;
675
676 case V3D_QPU_A_VFMIN:
677 case V3D_QPU_A_VFMAX:
678 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
679 &instr->alu.add.a_unpack)) {
680 return false;
681 }
682
683 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
684 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
685 break;
686
687 default:
688 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
689 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
690 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
691 break;
692 }
693
694 instr->alu.add.a = mux_a;
695 instr->alu.add.b = mux_b;
696 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
697 instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
698
699 return true;
700 }
701
702 static bool
703 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
704 struct v3d_qpu_instr *instr)
705 {
706 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
707 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
708 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
709
710 {
711 const struct opcode_desc *desc =
712 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
713 op, mux_a, mux_b);
714 if (!desc)
715 return false;
716
717 instr->alu.mul.op = desc->op;
718 }
719
720 switch (instr->alu.mul.op) {
721 case V3D_QPU_M_FMUL:
722 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
723
724 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
725 &instr->alu.mul.a_unpack)) {
726 return false;
727 }
728
729 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
730 &instr->alu.mul.b_unpack)) {
731 return false;
732 }
733
734 break;
735
736 case V3D_QPU_M_FMOV:
737 instr->alu.mul.output_pack = (((op & 1) << 1) +
738 ((mux_b >> 2) & 1));
739
740 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
741 &instr->alu.mul.a_unpack)) {
742 return false;
743 }
744
745 break;
746
747 case V3D_QPU_M_VFMUL:
748 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
749
750 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
751 &instr->alu.mul.a_unpack)) {
752 return false;
753 }
754
755 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
756
757 break;
758
759 default:
760 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
761 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
762 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
763 break;
764 }
765
766 instr->alu.mul.a = mux_a;
767 instr->alu.mul.b = mux_b;
768 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
769 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
770
771 return true;
772 }
773
774 static bool
775 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
776 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
777 {
778 uint32_t waddr = instr->alu.add.waddr;
779 uint32_t mux_a = instr->alu.add.a;
780 uint32_t mux_b = instr->alu.add.b;
781 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
782 const struct opcode_desc *desc;
783
784 int opcode;
785 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
786 desc++) {
787 if (desc->op == instr->alu.add.op)
788 break;
789 }
790 if (desc == &add_ops[ARRAY_SIZE(add_ops)])
791 return false;
792
793 opcode = desc->opcode_first;
794
795 /* If an operation doesn't use an arg, its mux values may be used to
796 * identify the operation type.
797 */
798 if (nsrc < 2)
799 mux_b = ffs(desc->mux_b_mask) - 1;
800
801 if (nsrc < 1)
802 mux_a = ffs(desc->mux_a_mask) - 1;
803
804 switch (instr->alu.add.op) {
805 case V3D_QPU_A_STVPMV:
806 waddr = 0;
807 break;
808 case V3D_QPU_A_STVPMD:
809 waddr = 1;
810 break;
811 case V3D_QPU_A_STVPMP:
812 waddr = 2;
813 break;
814 default:
815 break;
816 }
817
818 switch (instr->alu.add.op) {
819 case V3D_QPU_A_FADD:
820 case V3D_QPU_A_FADDNF:
821 case V3D_QPU_A_FSUB:
822 case V3D_QPU_A_FMIN:
823 case V3D_QPU_A_FMAX:
824 case V3D_QPU_A_FCMP: {
825 uint32_t output_pack;
826 uint32_t a_unpack;
827 uint32_t b_unpack;
828
829 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
830 &output_pack)) {
831 return false;
832 }
833 opcode |= output_pack << 4;
834
835 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
836 &a_unpack)) {
837 return false;
838 }
839
840 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
841 &b_unpack)) {
842 return false;
843 }
844
845 /* These operations with commutative operands are
846 * distinguished by which order their operands come in.
847 */
848 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
849 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
850 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
851 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
852 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
853 uint32_t temp;
854
855 temp = a_unpack;
856 a_unpack = b_unpack;
857 b_unpack = temp;
858
859 temp = mux_a;
860 mux_a = mux_b;
861 mux_b = temp;
862 }
863
864 opcode |= a_unpack << 2;
865 opcode |= b_unpack << 0;
866 break;
867 }
868
869 case V3D_QPU_A_FFLOOR:
870 case V3D_QPU_A_FROUND:
871 case V3D_QPU_A_FTRUNC:
872 case V3D_QPU_A_FCEIL:
873 case V3D_QPU_A_FDX:
874 case V3D_QPU_A_FDY: {
875 uint32_t packed;
876
877 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
878 &packed)) {
879 return false;
880 }
881 mux_b |= packed;
882
883 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
884 &packed)) {
885 return false;
886 }
887 if (packed == 0)
888 return false;
889 opcode |= packed << 2;
890 break;
891 }
892
893 case V3D_QPU_A_FTOIN:
894 case V3D_QPU_A_FTOIZ:
895 case V3D_QPU_A_FTOUZ:
896 case V3D_QPU_A_FTOC:
897 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
898 return false;
899
900 uint32_t packed;
901 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
902 &packed)) {
903 return false;
904 }
905 if (packed == 0)
906 return false;
907 opcode |= packed << 2;
908
909 break;
910
911 case V3D_QPU_A_VFMIN:
912 case V3D_QPU_A_VFMAX:
913 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
914 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
915 return false;
916 }
917
918 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
919 &packed)) {
920 return false;
921 }
922 opcode |= packed;
923 break;
924
925 default:
926 if (instr->alu.add.op != V3D_QPU_A_NOP &&
927 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
928 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
929 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
930 return false;
931 }
932 break;
933 }
934
935 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
936 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
937 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
938 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
939 if (instr->alu.add.magic_write)
940 *packed_instr |= VC5_QPU_MA;
941
942 return true;
943 }
944
945 static bool
946 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
947 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
948 {
949 uint32_t mux_a = instr->alu.mul.a;
950 uint32_t mux_b = instr->alu.mul.b;
951 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
952 const struct opcode_desc *desc;
953
954 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
955 desc++) {
956 if (desc->op == instr->alu.mul.op)
957 break;
958 }
959 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
960 return false;
961
962 uint32_t opcode = desc->opcode_first;
963
964 /* Some opcodes have a single valid value for their mux a/b, so set
965 * that here. If mux a/b determine packing, it will be set below.
966 */
967 if (nsrc < 2)
968 mux_b = ffs(desc->mux_b_mask) - 1;
969
970 if (nsrc < 1)
971 mux_a = ffs(desc->mux_a_mask) - 1;
972
973 switch (instr->alu.mul.op) {
974 case V3D_QPU_M_FMUL: {
975 uint32_t packed;
976
977 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
978 &packed)) {
979 return false;
980 }
981 /* No need for a +1 because desc->opcode_first has a 1 in this
982 * field.
983 */
984 opcode += packed << 4;
985
986 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
987 &packed)) {
988 return false;
989 }
990 opcode |= packed << 2;
991
992 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
993 &packed)) {
994 return false;
995 }
996 opcode |= packed << 0;
997 break;
998 }
999
1000 case V3D_QPU_M_FMOV: {
1001 uint32_t packed;
1002
1003 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1004 &packed)) {
1005 return false;
1006 }
1007 opcode |= (packed >> 1) & 1;
1008 mux_b = (packed & 1) << 2;
1009
1010 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1011 &packed)) {
1012 return false;
1013 }
1014 mux_b |= packed;
1015 break;
1016 }
1017
1018 case V3D_QPU_M_VFMUL: {
1019 uint32_t packed;
1020
1021 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1022 return false;
1023
1024 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1025 &packed)) {
1026 return false;
1027 }
1028 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1029 opcode = 8;
1030 else
1031 opcode |= (packed + 4) & 7;
1032
1033 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1034 return false;
1035
1036 break;
1037 }
1038
1039 default:
1040 break;
1041 }
1042
1043 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1044 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1045
1046 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1047 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1048 if (instr->alu.mul.magic_write)
1049 *packed_instr |= VC5_QPU_MM;
1050
1051 return true;
1052 }
1053
1054 static bool
1055 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1056 uint64_t packed_instr,
1057 struct v3d_qpu_instr *instr)
1058 {
1059 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1060
1061 if (!v3d_qpu_sig_unpack(devinfo,
1062 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1063 &instr->sig))
1064 return false;
1065
1066 if (!v3d_qpu_flags_unpack(devinfo,
1067 QPU_GET_FIELD(packed_instr, VC5_QPU_COND),
1068 &instr->flags))
1069 return false;
1070
1071 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1072 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1073
1074 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1075 return false;
1076
1077 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1078 return false;
1079
1080 return true;
1081 }
1082
1083 static bool
1084 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1085 uint64_t packed_instr,
1086 struct v3d_qpu_instr *instr)
1087 {
1088 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1089
1090 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1091 if (cond == 0)
1092 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1093 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1094 V3D_QPU_BRANCH_COND_ALLNA)
1095 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1096 else
1097 return false;
1098
1099 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1100 if (msfign == 3)
1101 return false;
1102 instr->branch.msfign = msfign;
1103
1104 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1105
1106 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1107 if (instr->branch.ub) {
1108 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1109 VC5_QPU_BRANCH_BDU);
1110 }
1111
1112 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1113 VC5_QPU_RADDR_A);
1114
1115 instr->branch.offset = 0;
1116
1117 instr->branch.offset +=
1118 QPU_GET_FIELD(packed_instr,
1119 VC5_QPU_BRANCH_ADDR_LOW) << 3;
1120
1121 instr->branch.offset +=
1122 QPU_GET_FIELD(packed_instr,
1123 VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1124
1125 return true;
1126 }
1127
1128 bool
1129 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1130 uint64_t packed_instr,
1131 struct v3d_qpu_instr *instr)
1132 {
1133 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1134 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1135 } else {
1136 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1137
1138 if ((sig & 24) == 16) {
1139 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1140 instr);
1141 } else {
1142 return false;
1143 }
1144 }
1145 }
1146
1147 static bool
1148 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1149 const struct v3d_qpu_instr *instr,
1150 uint64_t *packed_instr)
1151 {
1152 uint32_t sig;
1153 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1154 return false;
1155 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1156
1157 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1158 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1159 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1160
1161 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1162 return false;
1163 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1164 return false;
1165
1166 uint32_t flags;
1167 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1168 return false;
1169 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1170 }
1171
1172 return true;
1173 }
1174
1175 static bool
1176 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1177 const struct v3d_qpu_instr *instr,
1178 uint64_t *packed_instr)
1179 {
1180 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1181
1182 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1183 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1184 V3D_QPU_BRANCH_COND_A0),
1185 VC5_QPU_BRANCH_COND);
1186 }
1187
1188 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1189 VC5_QPU_BRANCH_MSFIGN);
1190
1191 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1192 VC5_QPU_BRANCH_BDI);
1193
1194 if (instr->branch.ub) {
1195 *packed_instr |= VC5_QPU_BRANCH_UB;
1196 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1197 VC5_QPU_BRANCH_BDU);
1198 }
1199
1200 switch (instr->branch.bdi) {
1201 case V3D_QPU_BRANCH_DEST_ABS:
1202 case V3D_QPU_BRANCH_DEST_REL:
1203 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1204 VC5_QPU_BRANCH_MSFIGN);
1205
1206 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1207 ~0xff000000) >> 3,
1208 VC5_QPU_BRANCH_ADDR_LOW);
1209
1210 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1211 VC5_QPU_BRANCH_ADDR_HIGH);
1212
1213 case V3D_QPU_BRANCH_DEST_REGFILE:
1214 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1215 VC5_QPU_RADDR_A);
1216 break;
1217
1218 default:
1219 break;
1220 }
1221
1222 return true;
1223 }
1224
1225 bool
1226 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1227 const struct v3d_qpu_instr *instr,
1228 uint64_t *packed_instr)
1229 {
1230 *packed_instr = 0;
1231
1232 switch (instr->type) {
1233 case V3D_QPU_INSTR_TYPE_ALU:
1234 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1235 case V3D_QPU_INSTR_TYPE_BRANCH:
1236 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1237 default:
1238 return false;
1239 }
1240 }