broadcom/vc5: Drop signal bit #defines.
[mesa.git] / src / broadcom / qpu / qpu_pack.c
1 /*
2 * Copyright © 2016 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <string.h>
25 #include "util/macros.h"
26
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
29
30 #ifndef QPU_MASK
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field) \
34 ({ \
35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36 assert((fieldval & ~ field ## _MASK) == 0); \
37 fieldval & field ## _MASK; \
38 })
39
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
41
42 #define QPU_UPDATE_FIELD(inst, value, field) \
43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
44 #endif /* QPU_MASK */
45
46 #define VC5_QPU_OP_MUL_SHIFT 58
47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
48
49 #define VC5_QPU_SIG_SHIFT 53
50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
51
52 #define VC5_QPU_COND_SHIFT 46
53 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
54 #define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
55
56 #define VC5_QPU_MM QPU_MASK(45, 45)
57 #define VC5_QPU_MA QPU_MASK(44, 44)
58
59 #define V3D_QPU_WADDR_M_SHIFT 38
60 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
61
62 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
63 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
64
65 #define V3D_QPU_WADDR_A_SHIFT 32
66 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
67
68 #define VC5_QPU_BRANCH_COND_SHIFT 32
69 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
70
71 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
72 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
73
74 #define VC5_QPU_OP_ADD_SHIFT 24
75 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
76
77 #define VC5_QPU_MUL_B_SHIFT 21
78 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
79
80 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
81 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
82
83 #define VC5_QPU_MUL_A_SHIFT 18
84 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
85
86 #define VC5_QPU_ADD_B_SHIFT 15
87 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
88
89 #define VC5_QPU_BRANCH_BDU_SHIFT 15
90 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
91
92 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
93
94 #define VC5_QPU_ADD_A_SHIFT 12
95 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
96
97 #define VC5_QPU_BRANCH_BDI_SHIFT 12
98 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
99
100 #define VC5_QPU_RADDR_A_SHIFT 6
101 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
102
103 #define VC5_QPU_RADDR_B_SHIFT 0
104 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
105
106 #define THRSW .thrsw = true
107 #define LDUNIF .ldunif = true
108 #define LDUNIFRF .ldunifrf = true
109 #define LDUNIFA .ldunifa = true
110 #define LDUNIFARF .ldunifarf = true
111 #define LDTMU .ldtmu = true
112 #define LDVARY .ldvary = true
113 #define LDVPM .ldvpm = true
114 #define SMIMM .small_imm = true
115 #define LDTLB .ldtlb = true
116 #define LDTLBU .ldtlbu = true
117 #define UCB .ucb = true
118 #define ROT .rotate = true
119 #define WRTMUC .wrtmuc = true
120
121 static const struct v3d_qpu_sig v33_sig_map[] = {
122 /* MISC R3 R4 R5 */
123 [0] = { },
124 [1] = { THRSW, },
125 [2] = { LDUNIF },
126 [3] = { THRSW, LDUNIF },
127 [4] = { LDTMU, },
128 [5] = { THRSW, LDTMU, },
129 [6] = { LDTMU, LDUNIF },
130 [7] = { THRSW, LDTMU, LDUNIF },
131 [8] = { LDVARY, },
132 [9] = { THRSW, LDVARY, },
133 [10] = { LDVARY, LDUNIF },
134 [11] = { THRSW, LDVARY, LDUNIF },
135 [12] = { LDVARY, LDTMU, },
136 [13] = { THRSW, LDVARY, LDTMU, },
137 [14] = { SMIMM, LDVARY, },
138 [15] = { SMIMM, },
139 [16] = { LDTLB, },
140 [17] = { LDTLBU, },
141 /* 18-21 reserved */
142 [22] = { UCB, },
143 [23] = { ROT, },
144 [24] = { LDVPM, },
145 [25] = { THRSW, LDVPM, },
146 [26] = { LDVPM, LDUNIF },
147 [27] = { THRSW, LDVPM, LDUNIF },
148 [28] = { LDVPM, LDTMU, },
149 [29] = { THRSW, LDVPM, LDTMU, },
150 [30] = { SMIMM, LDVPM, },
151 [31] = { SMIMM, },
152 };
153
154 static const struct v3d_qpu_sig v40_sig_map[] = {
155 /* MISC R3 R4 R5 */
156 [0] = { },
157 [1] = { THRSW, },
158 [2] = { LDUNIF },
159 [3] = { THRSW, LDUNIF },
160 [4] = { LDTMU, },
161 [5] = { THRSW, LDTMU, },
162 [6] = { LDTMU, LDUNIF },
163 [7] = { THRSW, LDTMU, LDUNIF },
164 [8] = { LDVARY, },
165 [9] = { THRSW, LDVARY, },
166 [10] = { LDVARY, LDUNIF },
167 [11] = { THRSW, LDVARY, LDUNIF },
168 /* 12-13 reserved */
169 [14] = { SMIMM, LDVARY, },
170 [15] = { SMIMM, },
171 [16] = { LDTLB, },
172 [17] = { LDTLBU, },
173 [18] = { WRTMUC },
174 [19] = { THRSW, WRTMUC },
175 [20] = { LDVARY, WRTMUC },
176 [21] = { THRSW, LDVARY, WRTMUC },
177 [22] = { UCB, },
178 [23] = { ROT, },
179 /* 24-30 reserved */
180 [31] = { SMIMM, LDTMU, },
181 };
182
183 static const struct v3d_qpu_sig v41_sig_map[] = {
184 /* MISC phys R5 */
185 [0] = { },
186 [1] = { THRSW, },
187 [2] = { LDUNIF },
188 [3] = { THRSW, LDUNIF },
189 [4] = { LDTMU, },
190 [5] = { THRSW, LDTMU, },
191 [6] = { LDTMU, LDUNIF },
192 [7] = { THRSW, LDTMU, LDUNIF },
193 [8] = { LDVARY, },
194 [9] = { THRSW, LDVARY, },
195 [10] = { LDVARY, LDUNIF },
196 [11] = { THRSW, LDVARY, LDUNIF },
197 [12] = { LDUNIFRF },
198 [13] = { THRSW, LDUNIFRF },
199 [14] = { SMIMM, LDVARY, },
200 [15] = { SMIMM, },
201 [16] = { LDTLB, },
202 [17] = { LDTLBU, },
203 [18] = { WRTMUC },
204 [19] = { THRSW, WRTMUC },
205 [20] = { LDVARY, WRTMUC },
206 [21] = { THRSW, LDVARY, WRTMUC },
207 [22] = { UCB, },
208 [23] = { ROT, },
209 /* 24-30 reserved */
210 [24] = { LDUNIFA},
211 [25] = { LDUNIFARF },
212 [31] = { SMIMM, LDTMU, },
213 };
214
215 bool
216 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
217 uint32_t packed_sig,
218 struct v3d_qpu_sig *sig)
219 {
220 if (packed_sig >= ARRAY_SIZE(v33_sig_map))
221 return false;
222
223 if (devinfo->ver >= 41)
224 *sig = v41_sig_map[packed_sig];
225 else if (devinfo->ver == 40)
226 *sig = v40_sig_map[packed_sig];
227 else
228 *sig = v33_sig_map[packed_sig];
229
230 /* Signals with zeroed unpacked contents after element 0 are reserved. */
231 return (packed_sig == 0 ||
232 memcmp(sig, &v33_sig_map[0], sizeof(*sig)) != 0);
233 }
234
235 bool
236 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
237 const struct v3d_qpu_sig *sig,
238 uint32_t *packed_sig)
239 {
240 static const struct v3d_qpu_sig *map;
241
242 if (devinfo->ver >= 41)
243 map = v41_sig_map;
244 else if (devinfo->ver == 40)
245 map = v40_sig_map;
246 else
247 map = v33_sig_map;
248
249 for (int i = 0; i < ARRAY_SIZE(v33_sig_map); i++) {
250 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
251 *packed_sig = i;
252 return true;
253 }
254 }
255
256 return false;
257 }
258
259 bool
260 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
261 uint32_t packed_cond,
262 struct v3d_qpu_flags *cond)
263 {
264 static const enum v3d_qpu_cond cond_map[4] = {
265 [0] = V3D_QPU_COND_IFA,
266 [1] = V3D_QPU_COND_IFB,
267 [2] = V3D_QPU_COND_IFNA,
268 [3] = V3D_QPU_COND_IFNB,
269 };
270
271 cond->ac = V3D_QPU_COND_NONE;
272 cond->mc = V3D_QPU_COND_NONE;
273 cond->apf = V3D_QPU_PF_NONE;
274 cond->mpf = V3D_QPU_PF_NONE;
275 cond->auf = V3D_QPU_UF_NONE;
276 cond->muf = V3D_QPU_UF_NONE;
277
278 if (packed_cond == 0) {
279 return true;
280 } else if (packed_cond >> 2 == 0) {
281 cond->apf = packed_cond & 0x3;
282 } else if (packed_cond >> 4 == 0) {
283 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
284 } else if (packed_cond == 0x10) {
285 return false;
286 } else if (packed_cond >> 2 == 0x4) {
287 cond->mpf = packed_cond & 0x3;
288 } else if (packed_cond >> 4 == 0x1) {
289 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
290 } else if (packed_cond >> 4 == 0x2) {
291 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
292 cond->mpf = packed_cond & 0x3;
293 } else if (packed_cond >> 4 == 0x3) {
294 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
295 cond->apf = packed_cond & 0x3;
296 } else if (packed_cond >> 6) {
297 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
298 if (((packed_cond >> 2) & 0x3) == 0) {
299 cond->ac = cond_map[packed_cond & 0x3];
300 } else {
301 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
302 }
303 }
304
305 return true;
306 }
307
308 bool
309 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
310 const struct v3d_qpu_flags *cond,
311 uint32_t *packed_cond)
312 {
313 #define AC (1 << 0)
314 #define MC (1 << 1)
315 #define APF (1 << 2)
316 #define MPF (1 << 3)
317 #define AUF (1 << 4)
318 #define MUF (1 << 5)
319 static const struct {
320 uint8_t flags_present;
321 uint8_t bits;
322 } flags_table[] = {
323 { 0, 0 },
324 { APF, 0 },
325 { AUF, 0 },
326 { MPF, (1 << 4) },
327 { MUF, (1 << 4) },
328 { AC, (1 << 5) },
329 { AC | MPF, (1 << 5) },
330 { MC, (1 << 5) | (1 << 4) },
331 { MC | APF, (1 << 5) | (1 << 4) },
332 { MC | AC, (1 << 6) },
333 { MC | AUF, (1 << 6) },
334 };
335
336 uint8_t flags_present = 0;
337 if (cond->ac != V3D_QPU_COND_NONE)
338 flags_present |= AC;
339 if (cond->mc != V3D_QPU_COND_NONE)
340 flags_present |= MC;
341 if (cond->apf != V3D_QPU_PF_NONE)
342 flags_present |= APF;
343 if (cond->mpf != V3D_QPU_PF_NONE)
344 flags_present |= MPF;
345 if (cond->auf != V3D_QPU_UF_NONE)
346 flags_present |= AUF;
347 if (cond->muf != V3D_QPU_UF_NONE)
348 flags_present |= MUF;
349
350 for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
351 if (flags_table[i].flags_present != flags_present)
352 continue;
353
354 *packed_cond = flags_table[i].bits;
355
356 *packed_cond |= cond->apf;
357 *packed_cond |= cond->mpf;
358
359 if (flags_present & AUF)
360 *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
361 if (flags_present & MUF)
362 *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
363
364 if (flags_present & AC)
365 *packed_cond |= (cond->ac - V3D_QPU_COND_IFA) << 2;
366
367 if (flags_present & MC) {
368 if (*packed_cond & (1 << 6))
369 *packed_cond |= (cond->mc -
370 V3D_QPU_COND_IFA) << 4;
371 else
372 *packed_cond |= (cond->mc -
373 V3D_QPU_COND_IFA) << 2;
374 }
375
376 return true;
377 }
378
379 return false;
380 }
381
382 /* Make a mapping of the table of opcodes in the spec. The opcode is
383 * determined by a combination of the opcode field, and in the case of 0 or
384 * 1-arg opcodes, the mux_b field as well.
385 */
386 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
387 #define ANYMUX MUX_MASK(0, 7)
388
389 struct opcode_desc {
390 uint8_t opcode_first;
391 uint8_t opcode_last;
392 uint8_t mux_b_mask;
393 uint8_t mux_a_mask;
394 uint8_t op;
395 /* 0 if it's the same across V3D versions, or a specific V3D version. */
396 uint8_t ver;
397 };
398
399 static const struct opcode_desc add_ops[] = {
400 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
401 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADD },
402 { 0, 47, ANYMUX, ANYMUX, V3D_QPU_A_FADDNF },
403 { 53, 55, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
404 { 56, 56, ANYMUX, ANYMUX, V3D_QPU_A_ADD },
405 { 57, 59, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
406 { 60, 60, ANYMUX, ANYMUX, V3D_QPU_A_SUB },
407 { 61, 63, ANYMUX, ANYMUX, V3D_QPU_A_VFPACK },
408 { 64, 111, ANYMUX, ANYMUX, V3D_QPU_A_FSUB },
409 { 120, 120, ANYMUX, ANYMUX, V3D_QPU_A_MIN },
410 { 121, 121, ANYMUX, ANYMUX, V3D_QPU_A_MAX },
411 { 122, 122, ANYMUX, ANYMUX, V3D_QPU_A_UMIN },
412 { 123, 123, ANYMUX, ANYMUX, V3D_QPU_A_UMAX },
413 { 124, 124, ANYMUX, ANYMUX, V3D_QPU_A_SHL },
414 { 125, 125, ANYMUX, ANYMUX, V3D_QPU_A_SHR },
415 { 126, 126, ANYMUX, ANYMUX, V3D_QPU_A_ASR },
416 { 127, 127, ANYMUX, ANYMUX, V3D_QPU_A_ROR },
417 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
418 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMIN },
419 { 128, 175, ANYMUX, ANYMUX, V3D_QPU_A_FMAX },
420 { 176, 180, ANYMUX, ANYMUX, V3D_QPU_A_VFMIN },
421
422 { 181, 181, ANYMUX, ANYMUX, V3D_QPU_A_AND },
423 { 182, 182, ANYMUX, ANYMUX, V3D_QPU_A_OR },
424 { 183, 183, ANYMUX, ANYMUX, V3D_QPU_A_XOR },
425
426 { 184, 184, ANYMUX, ANYMUX, V3D_QPU_A_VADD },
427 { 185, 185, ANYMUX, ANYMUX, V3D_QPU_A_VSUB },
428 { 186, 186, 1 << 0, ANYMUX, V3D_QPU_A_NOT },
429 { 186, 186, 1 << 1, ANYMUX, V3D_QPU_A_NEG },
430 { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
431 { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
432 { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLBPOP },
433 { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
434 { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
435 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
436 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX },
437 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX },
438 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR },
439 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA },
440 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA },
441 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB },
442 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB },
443
444 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD },
445 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD },
446 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD },
447 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD },
448
449 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF },
450 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF },
451 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT },
452 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
453 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
454
455 { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
456
457 /* FIXME: MORE COMPLICATED */
458 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
459
460 { 192, 239, ANYMUX, ANYMUX, V3D_QPU_A_FCMP },
461 { 240, 244, ANYMUX, ANYMUX, V3D_QPU_A_VFMAX },
462
463 { 245, 245, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FROUND },
464 { 245, 245, 1 << 3, ANYMUX, V3D_QPU_A_FTOIN },
465 { 245, 245, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FTRUNC },
466 { 245, 245, 1 << 7, ANYMUX, V3D_QPU_A_FTOIZ },
467 { 246, 246, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FFLOOR },
468 { 246, 246, 1 << 3, ANYMUX, V3D_QPU_A_FTOUZ },
469 { 246, 246, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FCEIL },
470 { 246, 246, 1 << 7, ANYMUX, V3D_QPU_A_FTOC },
471
472 { 247, 247, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_FDX },
473 { 247, 247, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_FDY },
474
475 /* The stvpms are distinguished by the waddr field. */
476 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMV },
477 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMD },
478 { 248, 248, ANYMUX, ANYMUX, V3D_QPU_A_STVPMP },
479
480 { 252, 252, MUX_MASK(0, 2), ANYMUX, V3D_QPU_A_ITOF },
481 { 252, 252, 1 << 3, ANYMUX, V3D_QPU_A_CLZ },
482 { 252, 252, MUX_MASK(4, 6), ANYMUX, V3D_QPU_A_UTOF },
483 };
484
485 static const struct opcode_desc mul_ops[] = {
486 { 1, 1, ANYMUX, ANYMUX, V3D_QPU_M_ADD },
487 { 2, 2, ANYMUX, ANYMUX, V3D_QPU_M_SUB },
488 { 3, 3, ANYMUX, ANYMUX, V3D_QPU_M_UMUL24 },
489 { 4, 8, ANYMUX, ANYMUX, V3D_QPU_M_VFMUL },
490 { 9, 9, ANYMUX, ANYMUX, V3D_QPU_M_SMUL24 },
491 { 10, 10, ANYMUX, ANYMUX, V3D_QPU_M_MULTOP },
492 { 14, 14, ANYMUX, ANYMUX, V3D_QPU_M_FMOV },
493 { 15, 15, MUX_MASK(0, 3), ANYMUX, V3D_QPU_M_FMOV },
494 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP, 0 },
495 { 15, 15, 1 << 7, ANYMUX, V3D_QPU_M_MOV },
496 { 16, 63, ANYMUX, ANYMUX, V3D_QPU_M_FMUL },
497 };
498
499 static const struct opcode_desc *
500 lookup_opcode(const struct opcode_desc *opcodes, size_t num_opcodes,
501 uint32_t opcode, uint32_t mux_a, uint32_t mux_b)
502 {
503 for (int i = 0; i < num_opcodes; i++) {
504 const struct opcode_desc *op_desc = &opcodes[i];
505
506 if (opcode < op_desc->opcode_first ||
507 opcode > op_desc->opcode_last)
508 continue;
509
510 if (!(op_desc->mux_b_mask & (1 << mux_b)))
511 continue;
512
513 if (!(op_desc->mux_a_mask & (1 << mux_a)))
514 continue;
515
516 return op_desc;
517 }
518
519 return NULL;
520 }
521
522 static bool
523 v3d_qpu_float32_unpack_unpack(uint32_t packed,
524 enum v3d_qpu_input_unpack *unpacked)
525 {
526 switch (packed) {
527 case 0:
528 *unpacked = V3D_QPU_UNPACK_ABS;
529 return true;
530 case 1:
531 *unpacked = V3D_QPU_UNPACK_NONE;
532 return true;
533 case 2:
534 *unpacked = V3D_QPU_UNPACK_L;
535 return true;
536 case 3:
537 *unpacked = V3D_QPU_UNPACK_H;
538 return true;
539 default:
540 return false;
541 }
542 }
543
544 static bool
545 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
546 uint32_t *packed)
547 {
548 switch (unpacked) {
549 case V3D_QPU_UNPACK_ABS:
550 *packed = 0;
551 return true;
552 case V3D_QPU_UNPACK_NONE:
553 *packed = 1;
554 return true;
555 case V3D_QPU_UNPACK_L:
556 *packed = 2;
557 return true;
558 case V3D_QPU_UNPACK_H:
559 *packed = 3;
560 return true;
561 default:
562 return false;
563 }
564 }
565
566 static bool
567 v3d_qpu_float16_unpack_unpack(uint32_t packed,
568 enum v3d_qpu_input_unpack *unpacked)
569 {
570 switch (packed) {
571 case 0:
572 *unpacked = V3D_QPU_UNPACK_NONE;
573 return true;
574 case 1:
575 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
576 return true;
577 case 2:
578 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
579 return true;
580 case 3:
581 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
582 return true;
583 case 4:
584 *unpacked = V3D_QPU_UNPACK_SWAP_16;
585 return true;
586 default:
587 return false;
588 }
589 }
590
591 static bool
592 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
593 uint32_t *packed)
594 {
595 switch (unpacked) {
596 case V3D_QPU_UNPACK_NONE:
597 *packed = 0;
598 return true;
599 case V3D_QPU_UNPACK_REPLICATE_32F_16:
600 *packed = 1;
601 return true;
602 case V3D_QPU_UNPACK_REPLICATE_L_16:
603 *packed = 2;
604 return true;
605 case V3D_QPU_UNPACK_REPLICATE_H_16:
606 *packed = 3;
607 return true;
608 case V3D_QPU_UNPACK_SWAP_16:
609 *packed = 4;
610 return true;
611 default:
612 return false;
613 }
614 }
615
616 static bool
617 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked,
618 uint32_t *packed)
619 {
620 switch (unpacked) {
621 case V3D_QPU_PACK_NONE:
622 *packed = 0;
623 return true;
624 case V3D_QPU_PACK_L:
625 *packed = 1;
626 return true;
627 case V3D_QPU_PACK_H:
628 *packed = 2;
629 return true;
630 default:
631 return false;
632 }
633 }
634
635 static bool
636 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
637 struct v3d_qpu_instr *instr)
638 {
639 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_ADD);
640 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_A);
641 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_ADD_B);
642 uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
643
644 uint32_t map_op = op;
645 /* Some big clusters of opcodes are replicated with unpack
646 * flags
647 */
648 if (map_op >= 249 && map_op <= 251)
649 map_op = (map_op - 249 + 245);
650 if (map_op >= 253 && map_op <= 255)
651 map_op = (map_op - 253 + 245);
652
653 const struct opcode_desc *desc =
654 lookup_opcode(add_ops, ARRAY_SIZE(add_ops),
655 map_op, mux_a, mux_b);
656 if (!desc)
657 return false;
658
659 instr->alu.add.op = desc->op;
660
661 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
662 * operands.
663 */
664 if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
665 if (instr->alu.add.op == V3D_QPU_A_FMIN)
666 instr->alu.add.op = V3D_QPU_A_FMAX;
667 if (instr->alu.add.op == V3D_QPU_A_FADD)
668 instr->alu.add.op = V3D_QPU_A_FADDNF;
669 }
670
671 /* Some QPU ops require a bit more than just basic opcode and mux a/b
672 * comparisons to distinguish them.
673 */
674 switch (instr->alu.add.op) {
675 case V3D_QPU_A_STVPMV:
676 case V3D_QPU_A_STVPMD:
677 case V3D_QPU_A_STVPMP:
678 switch (waddr) {
679 case 0:
680 instr->alu.add.op = V3D_QPU_A_STVPMV;
681 break;
682 case 1:
683 instr->alu.add.op = V3D_QPU_A_STVPMD;
684 break;
685 case 2:
686 instr->alu.add.op = V3D_QPU_A_STVPMP;
687 break;
688 default:
689 return false;
690 }
691 break;
692 default:
693 break;
694 }
695
696 switch (instr->alu.add.op) {
697 case V3D_QPU_A_FADD:
698 case V3D_QPU_A_FADDNF:
699 case V3D_QPU_A_FSUB:
700 case V3D_QPU_A_FMIN:
701 case V3D_QPU_A_FMAX:
702 case V3D_QPU_A_FCMP:
703 instr->alu.add.output_pack = (op >> 4) & 0x3;
704
705 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
706 &instr->alu.add.a_unpack)) {
707 return false;
708 }
709
710 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
711 &instr->alu.add.b_unpack)) {
712 return false;
713 }
714 break;
715
716 case V3D_QPU_A_FFLOOR:
717 case V3D_QPU_A_FROUND:
718 case V3D_QPU_A_FTRUNC:
719 case V3D_QPU_A_FCEIL:
720 case V3D_QPU_A_FDX:
721 case V3D_QPU_A_FDY:
722 instr->alu.add.output_pack = mux_b & 0x3;
723
724 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
725 &instr->alu.add.a_unpack)) {
726 return false;
727 }
728 break;
729
730 case V3D_QPU_A_FTOIN:
731 case V3D_QPU_A_FTOIZ:
732 case V3D_QPU_A_FTOUZ:
733 case V3D_QPU_A_FTOC:
734 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
735
736 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
737 &instr->alu.add.a_unpack)) {
738 return false;
739 }
740 break;
741
742 case V3D_QPU_A_VFMIN:
743 case V3D_QPU_A_VFMAX:
744 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
745 &instr->alu.add.a_unpack)) {
746 return false;
747 }
748
749 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
750 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
751 break;
752
753 default:
754 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
755 instr->alu.add.a_unpack = V3D_QPU_UNPACK_NONE;
756 instr->alu.add.b_unpack = V3D_QPU_UNPACK_NONE;
757 break;
758 }
759
760 instr->alu.add.a = mux_a;
761 instr->alu.add.b = mux_b;
762 instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
763 instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
764
765 return true;
766 }
767
768 static bool
769 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
770 struct v3d_qpu_instr *instr)
771 {
772 uint32_t op = QPU_GET_FIELD(packed_inst, VC5_QPU_OP_MUL);
773 uint32_t mux_a = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_A);
774 uint32_t mux_b = QPU_GET_FIELD(packed_inst, VC5_QPU_MUL_B);
775
776 {
777 const struct opcode_desc *desc =
778 lookup_opcode(mul_ops, ARRAY_SIZE(mul_ops),
779 op, mux_a, mux_b);
780 if (!desc)
781 return false;
782
783 instr->alu.mul.op = desc->op;
784 }
785
786 switch (instr->alu.mul.op) {
787 case V3D_QPU_M_FMUL:
788 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
789
790 if (!v3d_qpu_float32_unpack_unpack((op >> 2) & 0x3,
791 &instr->alu.mul.a_unpack)) {
792 return false;
793 }
794
795 if (!v3d_qpu_float32_unpack_unpack((op >> 0) & 0x3,
796 &instr->alu.mul.b_unpack)) {
797 return false;
798 }
799
800 break;
801
802 case V3D_QPU_M_FMOV:
803 instr->alu.mul.output_pack = (((op & 1) << 1) +
804 ((mux_b >> 2) & 1));
805
806 if (!v3d_qpu_float32_unpack_unpack(mux_b & 0x3,
807 &instr->alu.mul.a_unpack)) {
808 return false;
809 }
810
811 break;
812
813 case V3D_QPU_M_VFMUL:
814 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
815
816 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
817 &instr->alu.mul.a_unpack)) {
818 return false;
819 }
820
821 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
822
823 break;
824
825 default:
826 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
827 instr->alu.mul.a_unpack = V3D_QPU_UNPACK_NONE;
828 instr->alu.mul.b_unpack = V3D_QPU_UNPACK_NONE;
829 break;
830 }
831
832 instr->alu.mul.a = mux_a;
833 instr->alu.mul.b = mux_b;
834 instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
835 instr->alu.mul.magic_write = packed_inst & VC5_QPU_MM;
836
837 return true;
838 }
839
840 static bool
841 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
842 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
843 {
844 uint32_t waddr = instr->alu.add.waddr;
845 uint32_t mux_a = instr->alu.add.a;
846 uint32_t mux_b = instr->alu.add.b;
847 int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
848 const struct opcode_desc *desc;
849
850 int opcode;
851 for (desc = add_ops; desc != &add_ops[ARRAY_SIZE(add_ops)];
852 desc++) {
853 if (desc->op == instr->alu.add.op)
854 break;
855 }
856 if (desc == &add_ops[ARRAY_SIZE(add_ops)])
857 return false;
858
859 opcode = desc->opcode_first;
860
861 /* If an operation doesn't use an arg, its mux values may be used to
862 * identify the operation type.
863 */
864 if (nsrc < 2)
865 mux_b = ffs(desc->mux_b_mask) - 1;
866
867 if (nsrc < 1)
868 mux_a = ffs(desc->mux_a_mask) - 1;
869
870 switch (instr->alu.add.op) {
871 case V3D_QPU_A_STVPMV:
872 waddr = 0;
873 break;
874 case V3D_QPU_A_STVPMD:
875 waddr = 1;
876 break;
877 case V3D_QPU_A_STVPMP:
878 waddr = 2;
879 break;
880 default:
881 break;
882 }
883
884 switch (instr->alu.add.op) {
885 case V3D_QPU_A_FADD:
886 case V3D_QPU_A_FADDNF:
887 case V3D_QPU_A_FSUB:
888 case V3D_QPU_A_FMIN:
889 case V3D_QPU_A_FMAX:
890 case V3D_QPU_A_FCMP: {
891 uint32_t output_pack;
892 uint32_t a_unpack;
893 uint32_t b_unpack;
894
895 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
896 &output_pack)) {
897 return false;
898 }
899 opcode |= output_pack << 4;
900
901 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
902 &a_unpack)) {
903 return false;
904 }
905
906 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.b_unpack,
907 &b_unpack)) {
908 return false;
909 }
910
911 /* These operations with commutative operands are
912 * distinguished by which order their operands come in.
913 */
914 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
915 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
916 instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
917 ((instr->alu.add.op == V3D_QPU_A_FMAX ||
918 instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
919 uint32_t temp;
920
921 temp = a_unpack;
922 a_unpack = b_unpack;
923 b_unpack = temp;
924
925 temp = mux_a;
926 mux_a = mux_b;
927 mux_b = temp;
928 }
929
930 opcode |= a_unpack << 2;
931 opcode |= b_unpack << 0;
932 break;
933 }
934
935 case V3D_QPU_A_FFLOOR:
936 case V3D_QPU_A_FROUND:
937 case V3D_QPU_A_FTRUNC:
938 case V3D_QPU_A_FCEIL:
939 case V3D_QPU_A_FDX:
940 case V3D_QPU_A_FDY: {
941 uint32_t packed;
942
943 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
944 &packed)) {
945 return false;
946 }
947 mux_b |= packed;
948
949 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
950 &packed)) {
951 return false;
952 }
953 if (packed == 0)
954 return false;
955 opcode |= packed << 2;
956 break;
957 }
958
959 case V3D_QPU_A_FTOIN:
960 case V3D_QPU_A_FTOIZ:
961 case V3D_QPU_A_FTOUZ:
962 case V3D_QPU_A_FTOC:
963 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
964 return false;
965
966 uint32_t packed;
967 if (!v3d_qpu_float32_unpack_pack(instr->alu.add.a_unpack,
968 &packed)) {
969 return false;
970 }
971 if (packed == 0)
972 return false;
973 opcode |= packed << 2;
974
975 break;
976
977 case V3D_QPU_A_VFMIN:
978 case V3D_QPU_A_VFMAX:
979 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
980 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE) {
981 return false;
982 }
983
984 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a_unpack,
985 &packed)) {
986 return false;
987 }
988 opcode |= packed;
989 break;
990
991 default:
992 if (instr->alu.add.op != V3D_QPU_A_NOP &&
993 (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
994 instr->alu.add.a_unpack != V3D_QPU_UNPACK_NONE ||
995 instr->alu.add.b_unpack != V3D_QPU_UNPACK_NONE)) {
996 return false;
997 }
998 break;
999 }
1000
1001 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_ADD_A);
1002 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
1003 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
1004 *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1005 if (instr->alu.add.magic_write)
1006 *packed_instr |= VC5_QPU_MA;
1007
1008 return true;
1009 }
1010
1011 static bool
1012 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
1013 const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1014 {
1015 uint32_t mux_a = instr->alu.mul.a;
1016 uint32_t mux_b = instr->alu.mul.b;
1017 int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
1018 const struct opcode_desc *desc;
1019
1020 for (desc = mul_ops; desc != &mul_ops[ARRAY_SIZE(mul_ops)];
1021 desc++) {
1022 if (desc->op == instr->alu.mul.op)
1023 break;
1024 }
1025 if (desc == &mul_ops[ARRAY_SIZE(mul_ops)])
1026 return false;
1027
1028 uint32_t opcode = desc->opcode_first;
1029
1030 /* Some opcodes have a single valid value for their mux a/b, so set
1031 * that here. If mux a/b determine packing, it will be set below.
1032 */
1033 if (nsrc < 2)
1034 mux_b = ffs(desc->mux_b_mask) - 1;
1035
1036 if (nsrc < 1)
1037 mux_a = ffs(desc->mux_a_mask) - 1;
1038
1039 switch (instr->alu.mul.op) {
1040 case V3D_QPU_M_FMUL: {
1041 uint32_t packed;
1042
1043 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1044 &packed)) {
1045 return false;
1046 }
1047 /* No need for a +1 because desc->opcode_first has a 1 in this
1048 * field.
1049 */
1050 opcode += packed << 4;
1051
1052 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1053 &packed)) {
1054 return false;
1055 }
1056 opcode |= packed << 2;
1057
1058 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.b_unpack,
1059 &packed)) {
1060 return false;
1061 }
1062 opcode |= packed << 0;
1063 break;
1064 }
1065
1066 case V3D_QPU_M_FMOV: {
1067 uint32_t packed;
1068
1069 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
1070 &packed)) {
1071 return false;
1072 }
1073 opcode |= (packed >> 1) & 1;
1074 mux_b = (packed & 1) << 2;
1075
1076 if (!v3d_qpu_float32_unpack_pack(instr->alu.mul.a_unpack,
1077 &packed)) {
1078 return false;
1079 }
1080 mux_b |= packed;
1081 break;
1082 }
1083
1084 case V3D_QPU_M_VFMUL: {
1085 uint32_t packed;
1086
1087 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
1088 return false;
1089
1090 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a_unpack,
1091 &packed)) {
1092 return false;
1093 }
1094 if (instr->alu.mul.a_unpack == V3D_QPU_UNPACK_SWAP_16)
1095 opcode = 8;
1096 else
1097 opcode |= (packed + 4) & 7;
1098
1099 if (instr->alu.mul.b_unpack != V3D_QPU_UNPACK_NONE)
1100 return false;
1101
1102 break;
1103 }
1104
1105 default:
1106 break;
1107 }
1108
1109 *packed_instr |= QPU_SET_FIELD(mux_a, VC5_QPU_MUL_A);
1110 *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_MUL_B);
1111
1112 *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_MUL);
1113 *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
1114 if (instr->alu.mul.magic_write)
1115 *packed_instr |= VC5_QPU_MM;
1116
1117 return true;
1118 }
1119
1120 static bool
1121 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
1122 uint64_t packed_instr,
1123 struct v3d_qpu_instr *instr)
1124 {
1125 instr->type = V3D_QPU_INSTR_TYPE_ALU;
1126
1127 if (!v3d_qpu_sig_unpack(devinfo,
1128 QPU_GET_FIELD(packed_instr, VC5_QPU_SIG),
1129 &instr->sig))
1130 return false;
1131
1132 uint32_t packed_cond = QPU_GET_FIELD(packed_instr, VC5_QPU_COND);
1133 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1134 instr->sig_addr = packed_cond & ~VC5_QPU_COND_SIG_MAGIC_ADDR;
1135 instr->sig_magic = packed_cond & VC5_QPU_COND_SIG_MAGIC_ADDR;
1136
1137 instr->flags.ac = V3D_QPU_COND_NONE;
1138 instr->flags.mc = V3D_QPU_COND_NONE;
1139 instr->flags.apf = V3D_QPU_PF_NONE;
1140 instr->flags.mpf = V3D_QPU_PF_NONE;
1141 instr->flags.auf = V3D_QPU_UF_NONE;
1142 instr->flags.muf = V3D_QPU_UF_NONE;
1143 } else {
1144 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
1145 return false;
1146 }
1147
1148 instr->raddr_a = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_A);
1149 instr->raddr_b = QPU_GET_FIELD(packed_instr, VC5_QPU_RADDR_B);
1150
1151 if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
1152 return false;
1153
1154 if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
1155 return false;
1156
1157 return true;
1158 }
1159
1160 static bool
1161 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
1162 uint64_t packed_instr,
1163 struct v3d_qpu_instr *instr)
1164 {
1165 instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
1166
1167 uint32_t cond = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_COND);
1168 if (cond == 0)
1169 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
1170 else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
1171 V3D_QPU_BRANCH_COND_ALLNA)
1172 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
1173 else
1174 return false;
1175
1176 uint32_t msfign = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_MSFIGN);
1177 if (msfign == 3)
1178 return false;
1179 instr->branch.msfign = msfign;
1180
1181 instr->branch.bdi = QPU_GET_FIELD(packed_instr, VC5_QPU_BRANCH_BDI);
1182
1183 instr->branch.ub = packed_instr & VC5_QPU_BRANCH_UB;
1184 if (instr->branch.ub) {
1185 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
1186 VC5_QPU_BRANCH_BDU);
1187 }
1188
1189 instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
1190 VC5_QPU_RADDR_A);
1191
1192 instr->branch.offset = 0;
1193
1194 instr->branch.offset +=
1195 QPU_GET_FIELD(packed_instr,
1196 VC5_QPU_BRANCH_ADDR_LOW) << 3;
1197
1198 instr->branch.offset +=
1199 QPU_GET_FIELD(packed_instr,
1200 VC5_QPU_BRANCH_ADDR_HIGH) << 24;
1201
1202 return true;
1203 }
1204
1205 bool
1206 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
1207 uint64_t packed_instr,
1208 struct v3d_qpu_instr *instr)
1209 {
1210 if (QPU_GET_FIELD(packed_instr, VC5_QPU_OP_MUL) != 0) {
1211 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
1212 } else {
1213 uint32_t sig = QPU_GET_FIELD(packed_instr, VC5_QPU_SIG);
1214
1215 if ((sig & 24) == 16) {
1216 return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
1217 instr);
1218 } else {
1219 return false;
1220 }
1221 }
1222 }
1223
1224 static bool
1225 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
1226 const struct v3d_qpu_instr *instr,
1227 uint64_t *packed_instr)
1228 {
1229 uint32_t sig;
1230 if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
1231 return false;
1232 *packed_instr |= QPU_SET_FIELD(sig, VC5_QPU_SIG);
1233
1234 if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
1235 *packed_instr |= QPU_SET_FIELD(instr->raddr_a, VC5_QPU_RADDR_A);
1236 *packed_instr |= QPU_SET_FIELD(instr->raddr_b, VC5_QPU_RADDR_B);
1237
1238 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
1239 return false;
1240 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
1241 return false;
1242
1243 uint32_t flags;
1244 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
1245 if (instr->flags.ac != V3D_QPU_COND_NONE ||
1246 instr->flags.mc != V3D_QPU_COND_NONE ||
1247 instr->flags.apf != V3D_QPU_PF_NONE ||
1248 instr->flags.mpf != V3D_QPU_PF_NONE ||
1249 instr->flags.auf != V3D_QPU_UF_NONE ||
1250 instr->flags.muf != V3D_QPU_UF_NONE) {
1251 return false;
1252 }
1253
1254 flags = instr->sig_addr;
1255 if (instr->sig_magic)
1256 flags |= VC5_QPU_COND_SIG_MAGIC_ADDR;
1257 } else {
1258 if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
1259 return false;
1260 }
1261
1262 *packed_instr |= QPU_SET_FIELD(flags, VC5_QPU_COND);
1263 } else {
1264 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
1265 return false;
1266 }
1267
1268 return true;
1269 }
1270
1271 static bool
1272 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
1273 const struct v3d_qpu_instr *instr,
1274 uint64_t *packed_instr)
1275 {
1276 *packed_instr |= QPU_SET_FIELD(16, VC5_QPU_SIG);
1277
1278 if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
1279 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
1280 V3D_QPU_BRANCH_COND_A0),
1281 VC5_QPU_BRANCH_COND);
1282 }
1283
1284 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1285 VC5_QPU_BRANCH_MSFIGN);
1286
1287 *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
1288 VC5_QPU_BRANCH_BDI);
1289
1290 if (instr->branch.ub) {
1291 *packed_instr |= VC5_QPU_BRANCH_UB;
1292 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
1293 VC5_QPU_BRANCH_BDU);
1294 }
1295
1296 switch (instr->branch.bdi) {
1297 case V3D_QPU_BRANCH_DEST_ABS:
1298 case V3D_QPU_BRANCH_DEST_REL:
1299 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
1300 VC5_QPU_BRANCH_MSFIGN);
1301
1302 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
1303 ~0xff000000) >> 3,
1304 VC5_QPU_BRANCH_ADDR_LOW);
1305
1306 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
1307 VC5_QPU_BRANCH_ADDR_HIGH);
1308
1309 case V3D_QPU_BRANCH_DEST_REGFILE:
1310 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
1311 VC5_QPU_RADDR_A);
1312 break;
1313
1314 default:
1315 break;
1316 }
1317
1318 return true;
1319 }
1320
1321 bool
1322 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
1323 const struct v3d_qpu_instr *instr,
1324 uint64_t *packed_instr)
1325 {
1326 *packed_instr = 0;
1327
1328 switch (instr->type) {
1329 case V3D_QPU_INSTR_TYPE_ALU:
1330 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
1331 case V3D_QPU_INSTR_TYPE_BRANCH:
1332 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
1333 default:
1334 return false;
1335 }
1336 }