broadcom/vc5: Add support for V3Dv4 signal bits.
[mesa.git] / src / broadcom / qpu / qpu_pack.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 #ifndef QPU_MASK
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field) \
34 ({ \
35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36 assert((fieldval & ~ field ## _MASK) == 0); \
37 fieldval & field ## _MASK; \
38 })
39
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
41
42 #define QPU_UPDATE_FIELD(inst, value, field) \
43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44 #endif /* QPU_MASK */
45
46 #define VC5_QPU_OP_MUL_SHIFT 58
47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
48
49 #define VC5_QPU_SIG_SHIFT 53
50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
51 # define VC5_QPU_SIG_THRSW_BIT 0x1
52 # define VC5_QPU_SIG_LDUNIF_BIT 0x2
53 # define VC5_QPU_SIG_LDTMU_BIT 0x4
54 # define VC5_QPU_SIG_LDVARY_BIT 0x8
55
56 #define VC5_QPU_COND_SHIFT 46
57 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
58 #define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
59
60 #define VC5_QPU_MM QPU_MASK(45, 45)
61 #define VC5_QPU_MA QPU_MASK(44, 44)
62
63 #define V3D_QPU_WADDR_M_SHIFT 38
64 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
65
66 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
67 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
68
69 #define V3D_QPU_WADDR_A_SHIFT 32
70 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
71
72 #define VC5_QPU_BRANCH_COND_SHIFT 32
73 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
74
75 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
76 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
77
78 #define VC5_QPU_OP_ADD_SHIFT 24
79 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
80
81 #define VC5_QPU_MUL_B_SHIFT 21
82 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
83
84 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
85 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
86
87 #define VC5_QPU_MUL_A_SHIFT 18
88 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
89
90 #define VC5_QPU_ADD_B_SHIFT 15
91 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
92
93 #define VC5_QPU_BRANCH_BDU_SHIFT 15
94 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
95
96 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
97
98 #define VC5_QPU_ADD_A_SHIFT 12
99 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
100
101 #define VC5_QPU_BRANCH_BDI_SHIFT 12
102 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
103
104 #define VC5_QPU_RADDR_A_SHIFT 6
105 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
106
107 #define VC5_QPU_RADDR_B_SHIFT 0
108 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
109
110 #define THRSW .thrsw = true
111 #define LDUNIF .ldunif = true
112 #define LDUNIFRF .ldunifrf = true
113 #define LDUNIFA .ldunifa = true
114 #define LDUNIFARF .ldunifarf = true
115 #define LDTMU .ldtmu = true
116 #define LDVARY .ldvary = true
117 #define LDVPM .ldvpm = true
118 #define SMIMM .small_imm = true
119 #define LDTLB .ldtlb = true
120 #define LDTLBU .ldtlbu = true
121 #define UCB .ucb = true
122 #define ROT .rotate = true
123 #define WRTMUC .wrtmuc = true
124
125 static const struct v3d_qpu_sig v33_sig_map[] = {
126 /* MISC R3 R4 R5 */
127 [0] = { },
128 [1] = { THRSW, },
129 [2] = { LDUNIF },
130 [3] = { THRSW, LDUNIF },
131 [4] = { LDTMU, },
132 [5] = { THRSW, LDTMU, },
133 [6] = { LDTMU, LDUNIF },
134 [7] = { THRSW, LDTMU, LDUNIF },
135 [8] = { LDVARY, },
136 [9] = { THRSW, LDVARY, },
137 [10] = { LDVARY, LDUNIF },
138 [11] = { THRSW, LDVARY, LDUNIF },
139 [12] = { LDVARY, LDTMU, },
140 [13] = { THRSW, LDVARY, LDTMU, },
141 [14] = { SMIMM, LDVARY, },
142 [15] = { SMIMM, },
143 [16] = { LDTLB, },
144 [17] = { LDTLBU, },
145 /* 18-21 reserved */
146 [22] = { UCB, },
147 [23] = { ROT, },
148 [24] = { LDVPM, },
149 [25] = { THRSW, LDVPM, },
150 [26] = { LDVPM, LDUNIF },
151 [27] = { THRSW, LDVPM, LDUNIF },
152 [28] = { LDVPM, LDTMU, },
153 [29] = { THRSW, LDVPM, LDTMU, },
154 [30] = { SMIMM, LDVPM, },
155 [31] = { SMIMM, },
156 };
157
158 static const struct v3d_qpu_sig v40_sig_map[] = {
159 /* MISC R3 R4 R5 */
160 [0] = { },
161 [1] = { THRSW, },
162 [2] = { LDUNIF },
163 [3] = { THRSW, LDUNIF },
164 [4] = { LDTMU, },
165 [5] = { THRSW, LDTMU, },
166 [6] = { LDTMU, LDUNIF },
167 [7] = { THRSW, LDTMU, LDUNIF },
168 [8] = { LDVARY, },
169 [9] = { THRSW, LDVARY, },
170 [10] = { LDVARY, LDUNIF },
171 [11] = { THRSW, LDVARY, LDUNIF },
172 /* 12-13 reserved */
173 [14] = { SMIMM, LDVARY, },
174 [15] = { SMIMM, },
175 [16] = { LDTLB, },
176 [17] = { LDTLBU, },
177 [18] = { WRTMUC },
178 [19] = { THRSW, WRTMUC },
179 [20] = { LDVARY, WRTMUC },
180 [21] = { THRSW, LDVARY, WRTMUC },
181 [22] = { UCB, },
182 [23] = { ROT, },
183 /* 24-30 reserved */
184 [31] = { SMIMM, LDTMU, },
185 };
186
187 static const struct v3d_qpu_sig v41_sig_map[] = {
188 /* MISC phys R5 */
189 [0] = { },
190 [1] = { THRSW, },
191 [2] = { LDUNIF },
192 [3] = { THRSW, LDUNIF },
193 [4] = { LDTMU, },
194 [5] = { THRSW, LDTMU, },
195 [6] = { LDTMU, LDUNIF },
196 [7] = { THRSW, LDTMU, LDUNIF },
197 [8] = { LDVARY, },
198 [9] = { THRSW, LDVARY, },
199 [10] = { LDVARY, LDUNIF },
200 [11] = { THRSW, LDVARY, LDUNIF },
201 [12] = { LDUNIFRF },
202 [13] = { THRSW, LDUNIFRF },
203 [14] = { SMIMM, LDVARY, },
204 [15] = { SMIMM, },
205 [16] = { LDTLB, },
206 [17] = { LDTLBU, },
207 [18] = { WRTMUC },
208 [19] = { THRSW, WRTMUC },
209 [20] = { LDVARY, WRTMUC },
210 [21] = { THRSW, LDVARY, WRTMUC },
211 [22] = { UCB, },
212 [23] = { ROT, },
213 /* 24-30 reserved */
214 [24] = { LDUNIFA},
215 [25] = { LDUNIFARF },
216 [31] = { SMIMM, LDTMU, },
217 };
218
219 bool
220 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
221 uint32_t packed_sig,
222 struct v3d_qpu_sig *sig)
223 {
224 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
225 return false;
226
227 if (devinfo->ver >= 41)
228 *sig = v41_sig_map[packed_sig];
229 else if (devinfo->ver == 40)
230 *sig = v40_sig_map[packed_sig];
231 else
232 *sig = v33_sig_map[packed_sig];
233
234 /* Signals with zeroed unpacked contents after element 0 are reserved. */
235 return (packed_sig == 0 ||
236 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
237 }
238
239 bool
240 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
241 const struct v3d_qpu_sig *sig,
242 uint32_t *packed_sig)
243 {
244 static const struct v3d_qpu_sig *map;
245
246 if (devinfo->ver >= 41)
247 map = v41_sig_map;
248 else if (devinfo->ver == 40)
249 map = v40_sig_map;
250 else
251 map = v33_sig_map;
252
253 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
254 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
255 *packed_sig = i;
256 return true;
257 }
258 }
259
260 return false;
261 }
262
263 bool
264 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
265 uint32_t packed_cond,
266 struct v3d_qpu_flags *cond)
267 {
268 static const enum v3d_qpu_cond cond_map[4] = {
269 [0] = V3D_QPU_COND_IFA,
270 [1] = V3D_QPU_COND_IFB,
271 [2] = V3D_QPU_COND_IFNA,
272 [3] = V3D_QPU_COND_IFNB,
273 };
274
275 cond->ac = V3D_QPU_COND_NONE;
276 cond->mc = V3D_QPU_COND_NONE;
277 cond->apf = V3D_QPU_PF_NONE;
278 cond->mpf = V3D_QPU_PF_NONE;
279 cond->auf = V3D_QPU_UF_NONE;
280 cond->muf = V3D_QPU_UF_NONE;
281
282 if (packed_cond == 0) {
283 return true;
284 } else if (packed_cond >> 2 == 0) {
285 cond->apf = packed_cond & 0x3;
286 } else if (packed_cond >> 4 == 0) {
287 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
288 } else if (packed_cond == 0x10) {
289 return false;
290 } else if (packed_cond >> 2 == 0x4) {
291 cond->mpf = packed_cond & 0x3;
292 } else if (packed_cond >> 4 == 0x1) {
293 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
294 } else if (packed_cond >> 4 == 0x2) {
295 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
296 cond->mpf = packed_cond & 0x3;
297 } else if (packed_cond >> 4 == 0x3) {
298 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
299 cond->apf = packed_cond & 0x3;
300 } else if (packed_cond >> 6) {
301 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
302 if (((packed_cond >> 2) & 0x3) == 0) {
303 cond->ac = cond_map[packed_cond & 0x3];
304 } else {
305 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
306 }
307 }
308
309 return true;
310 }
311
312 bool
313 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
314 const struct v3d_qpu_flags *cond,
315 uint32_t *packed_cond)
316 {
317 #define AC (1 << 0)
318 #define MC (1 << 1)
319 #define APF (1 << 2)
320 #define MPF (1 << 3)
321 #define AUF (1 << 4)
322 #define MUF (1 << 5)
323 static const struct {
324 uint8_t flags_present;
325 uint8_t bits;
326 } flags_table[] = {
327 { 0, 0 },
328 { APF, 0 },
329 { AUF, 0 },
330 { MPF, (1 << 4) },
331 { MUF, (1 << 4) },
332 { AC, (1 << 5) },
333 { AC | MPF, (1 << 5) },
334 { MC, (1 << 5) | (1 << 4) },
335 { MC | APF, (1 << 5) | (1 << 4) },
336 { MC | AC, (1 << 6) },
337 { MC | AUF, (1 << 6) },
338 };
339
340 uint8_t flags_present = 0;
341 if (cond->ac != V3D_QPU_COND_NONE)
342 flags_present |= AC;
343 if (cond->mc != V3D_QPU_COND_NONE)
344 flags_present |= MC;
345 if (cond->apf != V3D_QPU_PF_NONE)
346 flags_present |= APF;
347 if (cond->mpf != V3D_QPU_PF_NONE)
348 flags_present |= MPF;
349 if (cond->auf != V3D_QPU_UF_NONE)
350 flags_present |= AUF;
351 if (cond->muf != V3D_QPU_UF_NONE)
352 flags_present |= MUF;
353
354 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
355 if (flags_table[i].flags_present != flags_present)
356 continue;
357
358 *packed_cond = flags_table[i].bits;
359
360 *packed_cond |= cond->apf;
361 *packed_cond |= cond->mpf;
362
363 if (flags_present & AUF)
364 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
365 if (flags_present & MUF)
366 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
367
368 if (flags_present & AC)
369 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
370
371 if (flags_present & MC) {
372 if (*packed_cond & (1 << 6))
373 *packed_cond |= (cond->mc -
374 V3D_QPU_COND_IFA) << 4;
375 else
376 *packed_cond |= (cond->mc -
377 V3D_QPU_COND_IFA) << 2;
378 }
379
380 return true;
381 }
382
383 return false;
384 }
385
386 /* Make a mapping of the table of opcodes in the spec. The opcode is
387 * determined by a combination of the opcode field, and in the case of 0 or
388 * 1-arg opcodes, the mux_b field as well.
389 */
390 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
391 #define ANYMUX MUX_MASK(0, 7)
392
393 struct opcode_desc {
394 uint8_t opcode_first;
395 uint8_t opcode_last;
396 uint8_t mux_b_mask;
397 uint8_t mux_a_mask;
398 uint8_t op;
399 /* 0 if it's the same across V3D versions, or a specific V3D version. */
400 uint8_t ver;
401 };
402
403 static const struct opcode_desc add_ops[] = {
404 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
405 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
406 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
407 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
408 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
409 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
410 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
411 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
412 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
413 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
414 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
415 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
416 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
417 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
418 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
419 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
420 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
421 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
422 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
423 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
424 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
425
426 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
427 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
428 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
429
430 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
431 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
432 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
433 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
434 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
435 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
436 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
437 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
438 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
439 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
440 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
441 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
442 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
443 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
444 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
445 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
446 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
447
448 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
449 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
450 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
451 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
452
453 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
454 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
455 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT },
456 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
457 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
458
459 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
460
461 /* FIXME: MORE COMPLICATED */
462 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
463
464 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
465 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
466
467 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
468 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
469 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
470 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
471 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
472 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
473 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
474 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
475
476 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
477 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
478
479 /* The stvpms are distinguished by the waddr field. */
480 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
481 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
482 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
483
484 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
485 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
486 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
487 };
488
489 static const struct opcode_desc mul_ops[] = {
490 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
491 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
492 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
493 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
494 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
495 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
496 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
497 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
498 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
499 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
500 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
501 };
502
503 static const struct opcode_desc *
504 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
505 uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
506 {
507 for (int i = 0; i < num_opcodes; i++) {
508 const struct opcode_desc *op_desc = &opcodes[i];
509
510 if (opcode < op_desc->opcode_first ||
511 opcode > op_desc->opcode_last)
512 continue;
513
514 if (!(op_desc->mux_b_mask & (1 << mux_b)))
515 continue;
516
517 if (!(op_desc->mux_a_mask & (1 << mux_a)))
518 continue;
519
520 return op_desc;
521 }
522
523 return NULL;
524 }
525
526 static bool
527 v3d_qpu_float32_unpack_unpack(uint32_t packed,
528 enum v3d_qpu_input_unpack *unpacked)
529 {
530 switch (packed) {
531 case 0:
532 *unpacked = V3D_QPU_UNPACK_ABS;
533 return true;
534 case 1:
535 *unpacked = V3D_QPU_UNPACK_NONE;
536 return true;
537 case 2:
538 *unpacked = V3D_QPU_UNPACK_L;
539 return true;
540 case 3:
541 *unpacked = V3D_QPU_UNPACK_H;
542 return true;
543 default:
544 return false;
545 }
546 }
547
548 static bool
549 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
550 uint32_t *packed)
551 {
552 switch (unpacked) {
553 case V3D_QPU_UNPACK_ABS:
554 *packed = 0;
555 return true;
556 case V3D_QPU_UNPACK_NONE:
557 *packed = 1;
558 return true;
559 case V3D_QPU_UNPACK_L:
560 *packed = 2;
561 return true;
562 case V3D_QPU_UNPACK_H:
563 *packed = 3;
564 return true;
565 default:
566 return false;
567 }
568 }
569
570 static bool
571 v3d_qpu_float16_unpack_unpack(uint32_t packed,
572 enum v3d_qpu_input_unpack *unpacked)
573 {
574 switch (packed) {
575 case 0:
576 *unpacked = V3D_QPU_UNPACK_NONE;
577 return true;
578 case 1:
579 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
580 return true;
581 case 2:
582 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
583 return true;
584 case 3:
585 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
586 return true;
587 case 4:
588 *unpacked = V3D_QPU_UNPACK_SWAP_16;
589 return true;
590 default:
591 return false;
592 }
593 }
594
595 static bool
596 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
597 uint32_t *packed)
598 {
599 switch (unpacked) {
600 case V3D_QPU_UNPACK_NONE:
601 *packed = 0;
602 return true;
603 case V3D_QPU_UNPACK_REPLICATE_32F_16:
604 *packed = 1;
605 return true;
606 case V3D_QPU_UNPACK_REPLICATE_L_16:
607 *packed = 2;
608 return true;
609 case V3D_QPU_UNPACK_REPLICATE_H_16:
610 *packed = 3;
611 return true;
612 case V3D_QPU_UNPACK_SWAP_16:
613 *packed = 4;
614 return true;
615 default:
616 return false;
617 }
618 }
619
620 static bool
621 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
622 uint32_t *packed)
623 {
624 switch (unpacked) {
625 case V3D_QPU_PACK_NONE:
626 *packed = 0;
627 return true;
628 case V3D_QPU_PACK_L:
629 *packed = 1;
630 return true;
631 case V3D_QPU_PACK_H:
632 *packed = 2;
633 return true;
634 default:
635 return false;
636 }
637 }
638
639 static bool
640 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
641 struct v3d_qpu_instr *instr)
642 {
643 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
644 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
645 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
646 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
647
648 uint32_t map_op = op;
649 /* Some big clusters of opcodes are replicated with unpack
650 * flags
651 */
652 if (map_op >= 249 && map_op <= 251)
653 map_op = (map_op - 249 + 245);
654 if (map_op >= 253 && map_op <= 255)
655 map_op = (map_op - 253 + 245);
656
657 const struct opcode_desc *desc =
658 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
659 map_op, mux_a, mux_b);
660 if (!desc)
661 return false;
662
663 instr->alu.add.op = desc->op;
664
665 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
666 * operands.
667 */
668 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
669 if (instr->alu.add.op == V3D_QPU_A_FMIN)
670 instr->alu.add.op = V3D_QPU_A_FMAX;
671 if (instr->alu.add.op == V3D_QPU_A_FADD)
672 instr->alu.add.op = V3D_QPU_A_FADDNF;
673 }
674
675 /* Some QPU ops require a bit more than just basic opcode and mux a/b
676 * comparisons to distinguish them.
677 */
678 switch (instr->alu.add.op) {
679 case V3D_QPU_A_STVPMV:
680 case V3D_QPU_A_STVPMD:
681 case V3D_QPU_A_STVPMP:
682 switch (waddr) {
683 case 0:
684 instr->alu.add.op = V3D_QPU_A_STVPMV;
685 break;
686 case 1:
687 instr->alu.add.op = V3D_QPU_A_STVPMD;
688 break;
689 case 2:
690 instr->alu.add.op = V3D_QPU_A_STVPMP;
691 break;
692 default:
693 return false;
694 }
695 break;
696 default:
697 break;
698 }
699
700 switch (instr->alu.add.op) {
701 case V3D_QPU_A_FADD:
702 case V3D_QPU_A_FADDNF:
703 case V3D_QPU_A_FSUB:
704 case V3D_QPU_A_FMIN:
705 case V3D_QPU_A_FMAX:
706 case V3D_QPU_A_FCMP:
707 instr->alu.add.output_pack = (op >> 4) & 0x3;
708
709 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
710 &instr->alu.add.a_unpack)) {
711 return false;
712 }
713
714 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
715 &instr->alu.add.b_unpack)) {
716 return false;
717 }
718 break;
719
720 case V3D_QPU_A_FFLOOR:
721 case V3D_QPU_A_FROUND:
722 case V3D_QPU_A_FTRUNC:
723 case V3D_QPU_A_FCEIL:
724 case V3D_QPU_A_FDX:
725 case V3D_QPU_A_FDY:
726 instr->alu.add.output_pack = mux_b & 0x3;
727
728 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
729 &instr->alu.add.a_unpack)) {
730 return false;
731 }
732 break;
733
734 case V3D_QPU_A_FTOIN:
735 case V3D_QPU_A_FTOIZ:
736 case V3D_QPU_A_FTOUZ:
737 case V3D_QPU_A_FTOC:
738 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
739
740 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
741 &instr->alu.add.a_unpack)) {
742 return false;
743 }
744 break;
745
746 case V3D_QPU_A_VFMIN:
747 case V3D_QPU_A_VFMAX:
748 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
749 &instr->alu.add.a_unpack)) {
750 return false;
751 }
752
753 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
754 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
755 break;
756
757 default:
758 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
759 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
760 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
761 break;
762 }
763
764 instr->alu.add.a = mux_a;
765 instr->alu.add.b = mux_b;
766 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
767 instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
768
769 return true;
770 }
771
772 static bool
773 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
774 struct v3d_qpu_instr *instr)
775 {
776 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
777 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
778 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
779
780 {
781 const struct opcode_desc *desc =
782 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
783 op, mux_a, mux_b);
784 if (!desc)
785 return false;
786
787 instr->alu.mul.op = desc->op;
788 }
789
790 switch (instr->alu.mul.op) {
791 case V3D_QPU_M_FMUL:
792 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
793
794 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
795 &instr->alu.mul.a_unpack)) {
796 return false;
797 }
798
799 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
800 &instr->alu.mul.b_unpack)) {
801 return false;
802 }
803
804 break;
805
806 case V3D_QPU_M_FMOV:
807 instr->alu.mul.output_pack = (((op & 1) << 1) +
808 ((mux_b >> 2) & 1));
809
810 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
811 &instr->alu.mul.a_unpack)) {
812 return false;
813 }
814
815 break;
816
817 case V3D_QPU_M_VFMUL:
818 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
819
820 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
821 &instr->alu.mul.a_unpack)) {
822 return false;
823 }
824
825 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
826
827 break;
828
829 default:
830 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
831 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
832 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
833 break;
834 }
835
836 instr->alu.mul.a = mux_a;
837 instr->alu.mul.b = mux_b;
838 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
839 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
840
841 return true;
842 }
843
844 static bool
845 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
846 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
847 {
848 uint32_t waddr = instr->alu.add.waddr;
849 uint32_t mux_a = instr->alu.add.a;
850 uint32_t mux_b = instr->alu.add.b;
851 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
852 const struct opcode_desc *desc;
853
854 int opcode;
855 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
856 desc++) {
857 if (desc->op == instr->alu.add.op)
858 break;
859 }
860 if (desc == &add_ops[ARRAY_SIZE(add_ops)])
861 return false;
862
863 opcode = desc->opcode_first;
864
865 /* If an operation doesn't use an arg, its mux values may be used to
866 * identify the operation type.
867 */
868 if (nsrc < 2)
869 mux_b = ffs(desc->mux_b_mask) - 1;
870
871 if (nsrc < 1)
872 mux_a = ffs(desc->mux_a_mask) - 1;
873
874 switch (instr->alu.add.op) {
875 case V3D_QPU_A_STVPMV:
876 waddr = 0;
877 break;
878 case V3D_QPU_A_STVPMD:
879 waddr = 1;
880 break;
881 case V3D_QPU_A_STVPMP:
882 waddr = 2;
883 break;
884 default:
885 break;
886 }
887
888 switch (instr->alu.add.op) {
889 case V3D_QPU_A_FADD:
890 case V3D_QPU_A_FADDNF:
891 case V3D_QPU_A_FSUB:
892 case V3D_QPU_A_FMIN:
893 case V3D_QPU_A_FMAX:
894 case V3D_QPU_A_FCMP: {
895 uint32_t output_pack;
896 uint32_t a_unpack;
897 uint32_t b_unpack;
898
899 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
900 &output_pack)) {
901 return false;
902 }
903 opcode |= output_pack << 4;
904
905 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
906 &a_unpack)) {
907 return false;
908 }
909
910 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
911 &b_unpack)) {
912 return false;
913 }
914
915 /* These operations with commutative operands are
916 * distinguished by which order their operands come in.
917 */
918 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
919 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
920 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
921 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
922 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
923 uint32_t temp;
924
925 temp = a_unpack;
926 a_unpack = b_unpack;
927 b_unpack = temp;
928
929 temp = mux_a;
930 mux_a = mux_b;
931 mux_b = temp;
932 }
933
934 opcode |= a_unpack << 2;
935 opcode |= b_unpack << 0;
936 break;
937 }
938
939 case V3D_QPU_A_FFLOOR:
940 case V3D_QPU_A_FROUND:
941 case V3D_QPU_A_FTRUNC:
942 case V3D_QPU_A_FCEIL:
943 case V3D_QPU_A_FDX:
944 case V3D_QPU_A_FDY: {
945 uint32_t packed;
946
947 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
948 &packed)) {
949 return false;
950 }
951 mux_b |= packed;
952
953 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
954 &packed)) {
955 return false;
956 }
957 if (packed == 0)
958 return false;
959 opcode |= packed << 2;
960 break;
961 }
962
963 case V3D_QPU_A_FTOIN:
964 case V3D_QPU_A_FTOIZ:
965 case V3D_QPU_A_FTOUZ:
966 case V3D_QPU_A_FTOC:
967 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
968 return false;
969
970 uint32_t packed;
971 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
972 &packed)) {
973 return false;
974 }
975 if (packed == 0)
976 return false;
977 opcode |= packed << 2;
978
979 break;
980
981 case V3D_QPU_A_VFMIN:
982 case V3D_QPU_A_VFMAX:
983 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
984 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
985 return false;
986 }
987
988 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
989 &packed)) {
990 return false;
991 }
992 opcode |= packed;
993 break;
994
995 default:
996 if (instr->alu.add.op != V3D_QPU_A_NOP &&
997 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
998 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
999 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
1000 return false;
1001 }
1002 break;
1003 }
1004
1005 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1006 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1007 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1008 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1009 if (instr->alu.add.magic_write)
1010 *packed_instr |= VC5_QPU_MA;
1011
1012 return true;
1013 }
1014
1015 static bool
1016 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1017 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1018 {
1019 uint32_t mux_a = instr->alu.mul.a;
1020 uint32_t mux_b = instr->alu.mul.b;
1021 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1022 const struct opcode_desc *desc;
1023
1024 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1025 desc++) {
1026 if (desc->op == instr->alu.mul.op)
1027 break;
1028 }
1029 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1030 return false;
1031
1032 uint32_t opcode = desc->opcode_first;
1033
1034 /* Some opcodes have a single valid value for their mux a/b, so set
1035 * that here. If mux a/b determine packing, it will be set below.
1036 */
1037 if (nsrc < 2)
1038 mux_b = ffs(desc->mux_b_mask) - 1;
1039
1040 if (nsrc < 1)
1041 mux_a = ffs(desc->mux_a_mask) - 1;
1042
1043 switch (instr->alu.mul.op) {
1044 case V3D_QPU_M_FMUL: {
1045 uint32_t packed;
1046
1047 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1048 &packed)) {
1049 return false;
1050 }
1051 /* No need for a +1 because desc->opcode_first has a 1 in this
1052 * field.
1053 */
1054 opcode += packed << 4;
1055
1056 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1057 &packed)) {
1058 return false;
1059 }
1060 opcode |= packed << 2;
1061
1062 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1063 &packed)) {
1064 return false;
1065 }
1066 opcode |= packed << 0;
1067 break;
1068 }
1069
1070 case V3D_QPU_M_FMOV: {
1071 uint32_t packed;
1072
1073 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1074 &packed)) {
1075 return false;
1076 }
1077 opcode |= (packed >> 1) & 1;
1078 mux_b = (packed & 1) << 2;
1079
1080 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1081 &packed)) {
1082 return false;
1083 }
1084 mux_b |= packed;
1085 break;
1086 }
1087
1088 case V3D_QPU_M_VFMUL: {
1089 uint32_t packed;
1090
1091 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1092 return false;
1093
1094 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1095 &packed)) {
1096 return false;
1097 }
1098 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1099 opcode = 8;
1100 else
1101 opcode |= (packed + 4) & 7;
1102
1103 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1104 return false;
1105
1106 break;
1107 }
1108
1109 default:
1110 break;
1111 }
1112
1113 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1114 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1115
1116 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1117 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1118 if (instr->alu.mul.magic_write)
1119 *packed_instr |= VC5_QPU_MM;
1120
1121 return true;
1122 }
1123
1124 static bool
1125 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1126 uint64_t packed_instr,
1127 struct v3d_qpu_instr *instr)
1128 {
1129 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1130
1131 if (!v3d_qpu_sig_unpack(devinfo,
1132 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1133 &instr->sig))
1134 return false;
1135
1136 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1137 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1138 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1139 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1140
1141 instr->flags.ac = V3D_QPU_COND_NONE;
1142 instr->flags.mc = V3D_QPU_COND_NONE;
1143 instr->flags.apf = V3D_QPU_PF_NONE;
1144 instr->flags.mpf = V3D_QPU_PF_NONE;
1145 instr->flags.auf = V3D_QPU_UF_NONE;
1146 instr->flags.muf = V3D_QPU_UF_NONE;
1147 } else {
1148 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1149 return false;
1150 }
1151
1152 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1153 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1154
1155 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1156 return false;
1157
1158 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1159 return false;
1160
1161 return true;
1162 }
1163
1164 static bool
1165 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1166 uint64_t packed_instr,
1167 struct v3d_qpu_instr *instr)
1168 {
1169 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1170
1171 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1172 if (cond == 0)
1173 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1174 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1175 V3D_QPU_BRANCH_COND_ALLNA)
1176 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1177 else
1178 return false;
1179
1180 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1181 if (msfign == 3)
1182 return false;
1183 instr->branch.msfign = msfign;
1184
1185 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1186
1187 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1188 if (instr->branch.ub) {
1189 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1190 VC5_QPU_BRANCH_BDU);
1191 }
1192
1193 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1194 VC5_QPU_RADDR_A);
1195
1196 instr->branch.offset = 0;
1197
1198 instr->branch.offset +=
1199 QPU_GET_FIELD(packed_instr,
1200 VC5_QPU_BRANCH_ADDR_LOW) << 3;
1201
1202 instr->branch.offset +=
1203 QPU_GET_FIELD(packed_instr,
1204 VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1205
1206 return true;
1207 }
1208
1209 bool
1210 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1211 uint64_t packed_instr,
1212 struct v3d_qpu_instr *instr)
1213 {
1214 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1215 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1216 } else {
1217 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1218
1219 if ((sig & 24) == 16) {
1220 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1221 instr);
1222 } else {
1223 return false;
1224 }
1225 }
1226 }
1227
1228 static bool
1229 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1230 const struct v3d_qpu_instr *instr,
1231 uint64_t *packed_instr)
1232 {
1233 uint32_t sig;
1234 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1235 return false;
1236 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1237
1238 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1239 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1240 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1241
1242 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1243 return false;
1244 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1245 return false;
1246
1247 uint32_t flags;
1248 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1249 if (instr->flags.ac != V3D_QPU_COND_NONE ||
1250 instr->flags.mc != V3D_QPU_COND_NONE ||
1251 instr->flags.apf != V3D_QPU_PF_NONE ||
1252 instr->flags.mpf != V3D_QPU_PF_NONE ||
1253 instr->flags.auf != V3D_QPU_UF_NONE ||
1254 instr->flags.muf != V3D_QPU_UF_NONE) {
1255 return false;
1256 }
1257
1258 flags = instr->sig_addr;
1259 if (instr->sig_magic)
1260 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1261 } else {
1262 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1263 return false;
1264 }
1265
1266 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1267 } else {
1268 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1269 return false;
1270 }
1271
1272 return true;
1273 }
1274
1275 static bool
1276 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1277 const struct v3d_qpu_instr *instr,
1278 uint64_t *packed_instr)
1279 {
1280 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1281
1282 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1283 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1284 V3D_QPU_BRANCH_COND_A0),
1285 VC5_QPU_BRANCH_COND);
1286 }
1287
1288 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1289 VC5_QPU_BRANCH_MSFIGN);
1290
1291 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1292 VC5_QPU_BRANCH_BDI);
1293
1294 if (instr->branch.ub) {
1295 *packed_instr |= VC5_QPU_BRANCH_UB;
1296 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1297 VC5_QPU_BRANCH_BDU);
1298 }
1299
1300 switch (instr->branch.bdi) {
1301 case V3D_QPU_BRANCH_DEST_ABS:
1302 case V3D_QPU_BRANCH_DEST_REL:
1303 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1304 VC5_QPU_BRANCH_MSFIGN);
1305
1306 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1307 ~0xff000000) >> 3,
1308 VC5_QPU_BRANCH_ADDR_LOW);
1309
1310 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1311 VC5_QPU_BRANCH_ADDR_HIGH);
1312
1313 case V3D_QPU_BRANCH_DEST_REGFILE:
1314 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1315 VC5_QPU_RADDR_A);
1316 break;
1317
1318 default:
1319 break;
1320 }
1321
1322 return true;
1323 }
1324
1325 bool
1326 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1327 const struct v3d_qpu_instr *instr,
1328 uint64_t *packed_instr)
1329 {
1330 *packed_instr = 0;
1331
1332 switch (instr->type) {
1333 case V3D_QPU_INSTR_TYPE_ALU:
1334 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1335 case V3D_QPU_INSTR_TYPE_BRANCH:
1336 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1337 default:
1338 return false;
1339 }
1340 }