2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/macros.h"
26 #include "util/bitscan.h"
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field) \
36 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37 assert((fieldval & ~ field ## _MASK) == 0); \
38 fieldval & field ## _MASK; \
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
43 #define QPU_UPDATE_FIELD(inst, value, field) \
44 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
47 #define VC5_QPU_OP_MUL_SHIFT 58
48 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
50 #define VC5_QPU_SIG_SHIFT 53
51 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
53 #define VC5_QPU_COND_SHIFT 46
54 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
55 #define VC5_QPU_COND_SIG_MAGIC_ADDR (1 << 6)
57 #define VC5_QPU_MM QPU_MASK(45, 45)
58 #define VC5_QPU_MA QPU_MASK(44, 44)
60 #define V3D_QPU_WADDR_M_SHIFT 38
61 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
63 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
64 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
66 #define V3D_QPU_WADDR_A_SHIFT 32
67 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
69 #define VC5_QPU_BRANCH_COND_SHIFT 32
70 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
72 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
73 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
75 #define VC5_QPU_OP_ADD_SHIFT 24
76 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
78 #define VC5_QPU_MUL_B_SHIFT 21
79 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
81 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
82 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
84 #define VC5_QPU_MUL_A_SHIFT 18
85 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
87 #define VC5_QPU_ADD_B_SHIFT 15
88 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
90 #define VC5_QPU_BRANCH_BDU_SHIFT 15
91 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
93 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
95 #define VC5_QPU_ADD_A_SHIFT 12
96 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
98 #define VC5_QPU_BRANCH_BDI_SHIFT 12
99 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
101 #define VC5_QPU_RADDR_A_SHIFT 6
102 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
104 #define VC5_QPU_RADDR_B_SHIFT 0
105 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
107 #define THRSW .thrsw = true
108 #define LDUNIF .ldunif = true
109 #define LDUNIFRF .ldunifrf = true
110 #define LDUNIFA .ldunifa = true
111 #define LDUNIFARF .ldunifarf = true
112 #define LDTMU .ldtmu = true
113 #define LDVARY .ldvary = true
114 #define LDVPM .ldvpm = true
115 #define SMIMM .small_imm = true
116 #define LDTLB .ldtlb = true
117 #define LDTLBU .ldtlbu = true
118 #define UCB .ucb = true
119 #define ROT .rotate = true
120 #define WRTMUC .wrtmuc = true
122 static const struct v3d_qpu_sig v33_sig_map
[] = {
127 [3] = { THRSW
, LDUNIF
},
129 [5] = { THRSW
, LDTMU
, },
130 [6] = { LDTMU
, LDUNIF
},
131 [7] = { THRSW
, LDTMU
, LDUNIF
},
133 [9] = { THRSW
, LDVARY
, },
134 [10] = { LDVARY
, LDUNIF
},
135 [11] = { THRSW
, LDVARY
, LDUNIF
},
136 [12] = { LDVARY
, LDTMU
, },
137 [13] = { THRSW
, LDVARY
, LDTMU
, },
138 [14] = { SMIMM
, LDVARY
, },
146 [25] = { THRSW
, LDVPM
, },
147 [26] = { LDVPM
, LDUNIF
},
148 [27] = { THRSW
, LDVPM
, LDUNIF
},
149 [28] = { LDVPM
, LDTMU
, },
150 [29] = { THRSW
, LDVPM
, LDTMU
, },
151 [30] = { SMIMM
, LDVPM
, },
155 static const struct v3d_qpu_sig v40_sig_map
[] = {
160 [3] = { THRSW
, LDUNIF
},
162 [5] = { THRSW
, LDTMU
, },
163 [6] = { LDTMU
, LDUNIF
},
164 [7] = { THRSW
, LDTMU
, LDUNIF
},
166 [9] = { THRSW
, LDVARY
, },
167 [10] = { LDVARY
, LDUNIF
},
168 [11] = { THRSW
, LDVARY
, LDUNIF
},
170 [14] = { SMIMM
, LDVARY
, },
175 [19] = { THRSW
, WRTMUC
},
176 [20] = { LDVARY
, WRTMUC
},
177 [21] = { THRSW
, LDVARY
, WRTMUC
},
181 [31] = { SMIMM
, LDTMU
, },
184 static const struct v3d_qpu_sig v41_sig_map
[] = {
189 [3] = { THRSW
, LDUNIF
},
191 [5] = { THRSW
, LDTMU
, },
192 [6] = { LDTMU
, LDUNIF
},
193 [7] = { THRSW
, LDTMU
, LDUNIF
},
195 [9] = { THRSW
, LDVARY
, },
196 [10] = { LDVARY
, LDUNIF
},
197 [11] = { THRSW
, LDVARY
, LDUNIF
},
199 [13] = { THRSW
, LDUNIFRF
},
200 [14] = { SMIMM
, LDVARY
, },
205 [19] = { THRSW
, WRTMUC
},
206 [20] = { LDVARY
, WRTMUC
},
207 [21] = { THRSW
, LDVARY
, WRTMUC
},
212 [25] = { LDUNIFARF
},
213 [31] = { SMIMM
, LDTMU
, },
217 v3d_qpu_sig_unpack(const struct v3d_device_info
*devinfo
,
219 struct v3d_qpu_sig
*sig
)
221 if (packed_sig
>= ARRAY_SIZE(v33_sig_map
))
224 if (devinfo
->ver
>= 41)
225 *sig
= v41_sig_map
[packed_sig
];
226 else if (devinfo
->ver
== 40)
227 *sig
= v40_sig_map
[packed_sig
];
229 *sig
= v33_sig_map
[packed_sig
];
231 /* Signals with zeroed unpacked contents after element 0 are reserved. */
232 return (packed_sig
== 0 ||
233 memcmp(sig
, &v33_sig_map
[0], sizeof(*sig
)) != 0);
237 v3d_qpu_sig_pack(const struct v3d_device_info
*devinfo
,
238 const struct v3d_qpu_sig
*sig
,
239 uint32_t *packed_sig
)
241 static const struct v3d_qpu_sig
*map
;
243 if (devinfo
->ver
>= 41)
245 else if (devinfo
->ver
== 40)
250 for (int i
= 0; i
< ARRAY_SIZE(v33_sig_map
); i
++) {
251 if (memcmp(&map
[i
], sig
, sizeof(*sig
)) == 0) {
259 static inline unsigned
262 union {float f
; unsigned ui
;} fi
;
267 static const uint32_t small_immediates
[] = {
276 0x3b800000, /* 2.0^-8 */
277 0x3c000000, /* 2.0^-7 */
278 0x3c800000, /* 2.0^-6 */
279 0x3d000000, /* 2.0^-5 */
280 0x3d800000, /* 2.0^-4 */
281 0x3e000000, /* 2.0^-3 */
282 0x3e800000, /* 2.0^-2 */
283 0x3f000000, /* 2.0^-1 */
284 0x3f800000, /* 2.0^0 */
285 0x40000000, /* 2.0^1 */
286 0x40800000, /* 2.0^2 */
287 0x41000000, /* 2.0^3 */
288 0x41800000, /* 2.0^4 */
289 0x42000000, /* 2.0^5 */
290 0x42800000, /* 2.0^6 */
291 0x43000000, /* 2.0^7 */
295 v3d_qpu_small_imm_unpack(const struct v3d_device_info
*devinfo
,
296 uint32_t packed_small_immediate
,
297 uint32_t *small_immediate
)
299 if (packed_small_immediate
>= ARRAY_SIZE(small_immediates
))
302 *small_immediate
= small_immediates
[packed_small_immediate
];
307 v3d_qpu_small_imm_pack(const struct v3d_device_info
*devinfo
,
309 uint32_t *packed_small_immediate
)
311 STATIC_ASSERT(ARRAY_SIZE(small_immediates
) == 48);
313 for (int i
= 0; i
< ARRAY_SIZE(small_immediates
); i
++) {
314 if (small_immediates
[i
] == value
) {
315 *packed_small_immediate
= i
;
324 v3d_qpu_flags_unpack(const struct v3d_device_info
*devinfo
,
325 uint32_t packed_cond
,
326 struct v3d_qpu_flags
*cond
)
328 static const enum v3d_qpu_cond cond_map
[4] = {
329 [0] = V3D_QPU_COND_IFA
,
330 [1] = V3D_QPU_COND_IFB
,
331 [2] = V3D_QPU_COND_IFNA
,
332 [3] = V3D_QPU_COND_IFNB
,
335 cond
->ac
= V3D_QPU_COND_NONE
;
336 cond
->mc
= V3D_QPU_COND_NONE
;
337 cond
->apf
= V3D_QPU_PF_NONE
;
338 cond
->mpf
= V3D_QPU_PF_NONE
;
339 cond
->auf
= V3D_QPU_UF_NONE
;
340 cond
->muf
= V3D_QPU_UF_NONE
;
342 if (packed_cond
== 0) {
344 } else if (packed_cond
>> 2 == 0) {
345 cond
->apf
= packed_cond
& 0x3;
346 } else if (packed_cond
>> 4 == 0) {
347 cond
->auf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
348 } else if (packed_cond
== 0x10) {
350 } else if (packed_cond
>> 2 == 0x4) {
351 cond
->mpf
= packed_cond
& 0x3;
352 } else if (packed_cond
>> 4 == 0x1) {
353 cond
->muf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
354 } else if (packed_cond
>> 4 == 0x2) {
355 cond
->ac
= ((packed_cond
>> 2) & 0x3) + V3D_QPU_COND_IFA
;
356 cond
->mpf
= packed_cond
& 0x3;
357 } else if (packed_cond
>> 4 == 0x3) {
358 cond
->mc
= ((packed_cond
>> 2) & 0x3) + V3D_QPU_COND_IFA
;
359 cond
->apf
= packed_cond
& 0x3;
360 } else if (packed_cond
>> 6) {
361 cond
->mc
= cond_map
[(packed_cond
>> 4) & 0x3];
362 if (((packed_cond
>> 2) & 0x3) == 0) {
363 cond
->ac
= cond_map
[packed_cond
& 0x3];
365 cond
->auf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
373 v3d_qpu_flags_pack(const struct v3d_device_info
*devinfo
,
374 const struct v3d_qpu_flags
*cond
,
375 uint32_t *packed_cond
)
383 static const struct {
384 uint8_t flags_present
;
393 { AC
| MPF
, (1 << 5) },
394 { MC
, (1 << 5) | (1 << 4) },
395 { MC
| APF
, (1 << 5) | (1 << 4) },
396 { MC
| AC
, (1 << 6) },
397 { MC
| AUF
, (1 << 6) },
400 uint8_t flags_present
= 0;
401 if (cond
->ac
!= V3D_QPU_COND_NONE
)
403 if (cond
->mc
!= V3D_QPU_COND_NONE
)
405 if (cond
->apf
!= V3D_QPU_PF_NONE
)
406 flags_present
|= APF
;
407 if (cond
->mpf
!= V3D_QPU_PF_NONE
)
408 flags_present
|= MPF
;
409 if (cond
->auf
!= V3D_QPU_UF_NONE
)
410 flags_present
|= AUF
;
411 if (cond
->muf
!= V3D_QPU_UF_NONE
)
412 flags_present
|= MUF
;
414 for (int i
= 0; i
< ARRAY_SIZE(flags_table
); i
++) {
415 if (flags_table
[i
].flags_present
!= flags_present
)
418 *packed_cond
= flags_table
[i
].bits
;
420 *packed_cond
|= cond
->apf
;
421 *packed_cond
|= cond
->mpf
;
423 if (flags_present
& AUF
)
424 *packed_cond
|= cond
->auf
- V3D_QPU_UF_ANDZ
+ 4;
425 if (flags_present
& MUF
)
426 *packed_cond
|= cond
->muf
- V3D_QPU_UF_ANDZ
+ 4;
428 if (flags_present
& AC
)
429 *packed_cond
|= (cond
->ac
- V3D_QPU_COND_IFA
) << 2;
431 if (flags_present
& MC
) {
432 if (*packed_cond
& (1 << 6))
433 *packed_cond
|= (cond
->mc
-
434 V3D_QPU_COND_IFA
) << 4;
436 *packed_cond
|= (cond
->mc
-
437 V3D_QPU_COND_IFA
) << 2;
446 /* Make a mapping of the table of opcodes in the spec. The opcode is
447 * determined by a combination of the opcode field, and in the case of 0 or
448 * 1-arg opcodes, the mux_b field as well.
450 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
451 #define ANYMUX MUX_MASK(0, 7)
454 uint8_t opcode_first
;
459 /* 0 if it's the same across V3D versions, or a specific V3D version. */
463 static const struct opcode_desc add_ops
[] = {
464 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
465 { 0, 47, ANYMUX
, ANYMUX
, V3D_QPU_A_FADD
},
466 { 0, 47, ANYMUX
, ANYMUX
, V3D_QPU_A_FADDNF
},
467 { 53, 55, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
468 { 56, 56, ANYMUX
, ANYMUX
, V3D_QPU_A_ADD
},
469 { 57, 59, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
470 { 60, 60, ANYMUX
, ANYMUX
, V3D_QPU_A_SUB
},
471 { 61, 63, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
472 { 64, 111, ANYMUX
, ANYMUX
, V3D_QPU_A_FSUB
},
473 { 120, 120, ANYMUX
, ANYMUX
, V3D_QPU_A_MIN
},
474 { 121, 121, ANYMUX
, ANYMUX
, V3D_QPU_A_MAX
},
475 { 122, 122, ANYMUX
, ANYMUX
, V3D_QPU_A_UMIN
},
476 { 123, 123, ANYMUX
, ANYMUX
, V3D_QPU_A_UMAX
},
477 { 124, 124, ANYMUX
, ANYMUX
, V3D_QPU_A_SHL
},
478 { 125, 125, ANYMUX
, ANYMUX
, V3D_QPU_A_SHR
},
479 { 126, 126, ANYMUX
, ANYMUX
, V3D_QPU_A_ASR
},
480 { 127, 127, ANYMUX
, ANYMUX
, V3D_QPU_A_ROR
},
481 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
482 { 128, 175, ANYMUX
, ANYMUX
, V3D_QPU_A_FMIN
},
483 { 128, 175, ANYMUX
, ANYMUX
, V3D_QPU_A_FMAX
},
484 { 176, 180, ANYMUX
, ANYMUX
, V3D_QPU_A_VFMIN
},
486 { 181, 181, ANYMUX
, ANYMUX
, V3D_QPU_A_AND
},
487 { 182, 182, ANYMUX
, ANYMUX
, V3D_QPU_A_OR
},
488 { 183, 183, ANYMUX
, ANYMUX
, V3D_QPU_A_XOR
},
490 { 184, 184, ANYMUX
, ANYMUX
, V3D_QPU_A_VADD
},
491 { 185, 185, ANYMUX
, ANYMUX
, V3D_QPU_A_VSUB
},
492 { 186, 186, 1 << 0, ANYMUX
, V3D_QPU_A_NOT
},
493 { 186, 186, 1 << 1, ANYMUX
, V3D_QPU_A_NEG
},
494 { 186, 186, 1 << 2, ANYMUX
, V3D_QPU_A_FLAPUSH
},
495 { 186, 186, 1 << 3, ANYMUX
, V3D_QPU_A_FLBPUSH
},
496 { 186, 186, 1 << 4, ANYMUX
, V3D_QPU_A_FLPOP
},
497 { 186, 186, 1 << 5, ANYMUX
, V3D_QPU_A_RECIP
},
498 { 186, 186, 1 << 6, ANYMUX
, V3D_QPU_A_SETMSF
},
499 { 186, 186, 1 << 7, ANYMUX
, V3D_QPU_A_SETREVF
},
500 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP
, 0 },
501 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX
},
502 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX
},
503 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR
},
504 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA
},
505 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA
},
506 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB
},
507 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB
},
509 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD
},
510 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD
},
511 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD
},
512 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD
},
514 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF
},
515 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF
},
516 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT
, 33 },
517 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_IID
, 40 },
518 { 187, 187, 1 << 2, 1 << 3, V3D_QPU_A_SAMPID
, 40 },
519 { 187, 187, 1 << 2, 1 << 4, V3D_QPU_A_BARRIERID
, 40 },
520 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT
},
521 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT
},
523 { 187, 187, 1 << 3, ANYMUX
, V3D_QPU_A_VPMSETUP
, 33 },
524 { 188, 188, 1 << 0, ANYMUX
, V3D_QPU_A_LDVPMV_IN
, 40 },
525 { 188, 188, 1 << 0, ANYMUX
, V3D_QPU_A_LDVPMV_OUT
, 40 },
526 { 188, 188, 1 << 1, ANYMUX
, V3D_QPU_A_LDVPMD_IN
, 40 },
527 { 188, 188, 1 << 1, ANYMUX
, V3D_QPU_A_LDVPMD_OUT
, 40 },
528 { 188, 188, 1 << 2, ANYMUX
, V3D_QPU_A_LDVPMP
, 40 },
529 { 188, 188, 1 << 3, ANYMUX
, V3D_QPU_A_RSQRT
, 41 },
530 { 188, 188, 1 << 4, ANYMUX
, V3D_QPU_A_EXP
, 41 },
531 { 188, 188, 1 << 5, ANYMUX
, V3D_QPU_A_LOG
, 41 },
532 { 188, 188, 1 << 6, ANYMUX
, V3D_QPU_A_SIN
, 41 },
533 { 188, 188, 1 << 7, ANYMUX
, V3D_QPU_A_RSQRT2
, 41 },
534 { 189, 189, ANYMUX
, ANYMUX
, V3D_QPU_A_LDVPMG_IN
, 40 },
535 { 189, 189, ANYMUX
, ANYMUX
, V3D_QPU_A_LDVPMG_OUT
, 40 },
537 /* FIXME: MORE COMPLICATED */
538 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
540 { 192, 239, ANYMUX
, ANYMUX
, V3D_QPU_A_FCMP
},
541 { 240, 244, ANYMUX
, ANYMUX
, V3D_QPU_A_VFMAX
},
543 { 245, 245, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FROUND
},
544 { 245, 245, 1 << 3, ANYMUX
, V3D_QPU_A_FTOIN
},
545 { 245, 245, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FTRUNC
},
546 { 245, 245, 1 << 7, ANYMUX
, V3D_QPU_A_FTOIZ
},
547 { 246, 246, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FFLOOR
},
548 { 246, 246, 1 << 3, ANYMUX
, V3D_QPU_A_FTOUZ
},
549 { 246, 246, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FCEIL
},
550 { 246, 246, 1 << 7, ANYMUX
, V3D_QPU_A_FTOC
},
552 { 247, 247, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FDX
},
553 { 247, 247, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FDY
},
555 /* The stvpms are distinguished by the waddr field. */
556 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMV
},
557 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMD
},
558 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMP
},
560 { 252, 252, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_ITOF
},
561 { 252, 252, 1 << 3, ANYMUX
, V3D_QPU_A_CLZ
},
562 { 252, 252, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_UTOF
},
565 static const struct opcode_desc mul_ops
[] = {
566 { 1, 1, ANYMUX
, ANYMUX
, V3D_QPU_M_ADD
},
567 { 2, 2, ANYMUX
, ANYMUX
, V3D_QPU_M_SUB
},
568 { 3, 3, ANYMUX
, ANYMUX
, V3D_QPU_M_UMUL24
},
569 { 4, 8, ANYMUX
, ANYMUX
, V3D_QPU_M_VFMUL
},
570 { 9, 9, ANYMUX
, ANYMUX
, V3D_QPU_M_SMUL24
},
571 { 10, 10, ANYMUX
, ANYMUX
, V3D_QPU_M_MULTOP
},
572 { 14, 14, ANYMUX
, ANYMUX
, V3D_QPU_M_FMOV
},
573 { 15, 15, MUX_MASK(0, 3), ANYMUX
, V3D_QPU_M_FMOV
},
574 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP
, 0 },
575 { 15, 15, 1 << 7, ANYMUX
, V3D_QPU_M_MOV
},
576 { 16, 63, ANYMUX
, ANYMUX
, V3D_QPU_M_FMUL
},
579 static const struct opcode_desc
*
580 lookup_opcode(const struct opcode_desc
*opcodes
, size_t num_opcodes
,
581 uint32_t opcode
, uint32_t mux_a
, uint32_t mux_b
)
583 for (int i
= 0; i
< num_opcodes
; i
++) {
584 const struct opcode_desc
*op_desc
= &opcodes
[i
];
586 if (opcode
< op_desc
->opcode_first
||
587 opcode
> op_desc
->opcode_last
)
590 if (!(op_desc
->mux_b_mask
& (1 << mux_b
)))
593 if (!(op_desc
->mux_a_mask
& (1 << mux_a
)))
603 v3d_qpu_float32_unpack_unpack(uint32_t packed
,
604 enum v3d_qpu_input_unpack
*unpacked
)
608 *unpacked
= V3D_QPU_UNPACK_ABS
;
611 *unpacked
= V3D_QPU_UNPACK_NONE
;
614 *unpacked
= V3D_QPU_UNPACK_L
;
617 *unpacked
= V3D_QPU_UNPACK_H
;
625 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked
,
629 case V3D_QPU_UNPACK_ABS
:
632 case V3D_QPU_UNPACK_NONE
:
635 case V3D_QPU_UNPACK_L
:
638 case V3D_QPU_UNPACK_H
:
647 v3d_qpu_float16_unpack_unpack(uint32_t packed
,
648 enum v3d_qpu_input_unpack
*unpacked
)
652 *unpacked
= V3D_QPU_UNPACK_NONE
;
655 *unpacked
= V3D_QPU_UNPACK_REPLICATE_32F_16
;
658 *unpacked
= V3D_QPU_UNPACK_REPLICATE_L_16
;
661 *unpacked
= V3D_QPU_UNPACK_REPLICATE_H_16
;
664 *unpacked
= V3D_QPU_UNPACK_SWAP_16
;
672 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked
,
676 case V3D_QPU_UNPACK_NONE
:
679 case V3D_QPU_UNPACK_REPLICATE_32F_16
:
682 case V3D_QPU_UNPACK_REPLICATE_L_16
:
685 case V3D_QPU_UNPACK_REPLICATE_H_16
:
688 case V3D_QPU_UNPACK_SWAP_16
:
697 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked
,
701 case V3D_QPU_PACK_NONE
:
716 v3d_qpu_add_unpack(const struct v3d_device_info
*devinfo
, uint64_t packed_inst
,
717 struct v3d_qpu_instr
*instr
)
719 uint32_t op
= QPU_GET_FIELD(packed_inst
, VC5_QPU_OP_ADD
);
720 uint32_t mux_a
= QPU_GET_FIELD(packed_inst
, VC5_QPU_ADD_A
);
721 uint32_t mux_b
= QPU_GET_FIELD(packed_inst
, VC5_QPU_ADD_B
);
722 uint32_t waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_A
);
724 uint32_t map_op
= op
;
725 /* Some big clusters of opcodes are replicated with unpack
728 if (map_op
>= 249 && map_op
<= 251)
729 map_op
= (map_op
- 249 + 245);
730 if (map_op
>= 253 && map_op
<= 255)
731 map_op
= (map_op
- 253 + 245);
733 const struct opcode_desc
*desc
=
734 lookup_opcode(add_ops
, ARRAY_SIZE(add_ops
),
735 map_op
, mux_a
, mux_b
);
739 instr
->alu
.add
.op
= desc
->op
;
741 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
744 if (((op
>> 2) & 3) * 8 + mux_a
> (op
& 3) * 8 + mux_b
) {
745 if (instr
->alu
.add
.op
== V3D_QPU_A_FMIN
)
746 instr
->alu
.add
.op
= V3D_QPU_A_FMAX
;
747 if (instr
->alu
.add
.op
== V3D_QPU_A_FADD
)
748 instr
->alu
.add
.op
= V3D_QPU_A_FADDNF
;
751 /* Some QPU ops require a bit more than just basic opcode and mux a/b
752 * comparisons to distinguish them.
754 switch (instr
->alu
.add
.op
) {
755 case V3D_QPU_A_STVPMV
:
756 case V3D_QPU_A_STVPMD
:
757 case V3D_QPU_A_STVPMP
:
760 instr
->alu
.add
.op
= V3D_QPU_A_STVPMV
;
763 instr
->alu
.add
.op
= V3D_QPU_A_STVPMD
;
766 instr
->alu
.add
.op
= V3D_QPU_A_STVPMP
;
776 switch (instr
->alu
.add
.op
) {
778 case V3D_QPU_A_FADDNF
:
783 case V3D_QPU_A_VFPACK
:
784 if (instr
->alu
.add
.op
!= V3D_QPU_A_VFPACK
)
785 instr
->alu
.add
.output_pack
= (op
>> 4) & 0x3;
787 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
789 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
790 &instr
->alu
.add
.a_unpack
)) {
794 if (!v3d_qpu_float32_unpack_unpack((op
>> 0) & 0x3,
795 &instr
->alu
.add
.b_unpack
)) {
800 case V3D_QPU_A_FFLOOR
:
801 case V3D_QPU_A_FROUND
:
802 case V3D_QPU_A_FTRUNC
:
803 case V3D_QPU_A_FCEIL
:
806 instr
->alu
.add
.output_pack
= mux_b
& 0x3;
808 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
809 &instr
->alu
.add
.a_unpack
)) {
814 case V3D_QPU_A_FTOIN
:
815 case V3D_QPU_A_FTOIZ
:
816 case V3D_QPU_A_FTOUZ
:
818 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
820 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
821 &instr
->alu
.add
.a_unpack
)) {
826 case V3D_QPU_A_VFMIN
:
827 case V3D_QPU_A_VFMAX
:
828 if (!v3d_qpu_float16_unpack_unpack(op
& 0x7,
829 &instr
->alu
.add
.a_unpack
)) {
833 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
834 instr
->alu
.add
.b_unpack
= V3D_QPU_UNPACK_NONE
;
838 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
839 instr
->alu
.add
.a_unpack
= V3D_QPU_UNPACK_NONE
;
840 instr
->alu
.add
.b_unpack
= V3D_QPU_UNPACK_NONE
;
844 instr
->alu
.add
.a
= mux_a
;
845 instr
->alu
.add
.b
= mux_b
;
846 instr
->alu
.add
.waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_A
);
848 instr
->alu
.add
.magic_write
= false;
849 if (packed_inst
& VC5_QPU_MA
) {
850 switch (instr
->alu
.add
.op
) {
851 case V3D_QPU_A_LDVPMV_IN
:
852 instr
->alu
.add
.op
= V3D_QPU_A_LDVPMV_OUT
;
854 case V3D_QPU_A_LDVPMD_IN
:
855 instr
->alu
.add
.op
= V3D_QPU_A_LDVPMD_OUT
;
857 case V3D_QPU_A_LDVPMG_IN
:
858 instr
->alu
.add
.op
= V3D_QPU_A_LDVPMG_OUT
;
861 instr
->alu
.add
.magic_write
= true;
870 v3d_qpu_mul_unpack(const struct v3d_device_info
*devinfo
, uint64_t packed_inst
,
871 struct v3d_qpu_instr
*instr
)
873 uint32_t op
= QPU_GET_FIELD(packed_inst
, VC5_QPU_OP_MUL
);
874 uint32_t mux_a
= QPU_GET_FIELD(packed_inst
, VC5_QPU_MUL_A
);
875 uint32_t mux_b
= QPU_GET_FIELD(packed_inst
, VC5_QPU_MUL_B
);
878 const struct opcode_desc
*desc
=
879 lookup_opcode(mul_ops
, ARRAY_SIZE(mul_ops
),
884 instr
->alu
.mul
.op
= desc
->op
;
887 switch (instr
->alu
.mul
.op
) {
889 instr
->alu
.mul
.output_pack
= ((op
>> 4) & 0x3) - 1;
891 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
892 &instr
->alu
.mul
.a_unpack
)) {
896 if (!v3d_qpu_float32_unpack_unpack((op
>> 0) & 0x3,
897 &instr
->alu
.mul
.b_unpack
)) {
904 instr
->alu
.mul
.output_pack
= (((op
& 1) << 1) +
907 if (!v3d_qpu_float32_unpack_unpack(mux_b
& 0x3,
908 &instr
->alu
.mul
.a_unpack
)) {
914 case V3D_QPU_M_VFMUL
:
915 instr
->alu
.mul
.output_pack
= V3D_QPU_PACK_NONE
;
917 if (!v3d_qpu_float16_unpack_unpack(((op
& 0x7) - 4) & 7,
918 &instr
->alu
.mul
.a_unpack
)) {
922 instr
->alu
.mul
.b_unpack
= V3D_QPU_UNPACK_NONE
;
927 instr
->alu
.mul
.output_pack
= V3D_QPU_PACK_NONE
;
928 instr
->alu
.mul
.a_unpack
= V3D_QPU_UNPACK_NONE
;
929 instr
->alu
.mul
.b_unpack
= V3D_QPU_UNPACK_NONE
;
933 instr
->alu
.mul
.a
= mux_a
;
934 instr
->alu
.mul
.b
= mux_b
;
935 instr
->alu
.mul
.waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_M
);
936 instr
->alu
.mul
.magic_write
= packed_inst
& VC5_QPU_MM
;
942 v3d_qpu_add_pack(const struct v3d_device_info
*devinfo
,
943 const struct v3d_qpu_instr
*instr
, uint64_t *packed_instr
)
945 uint32_t waddr
= instr
->alu
.add
.waddr
;
946 uint32_t mux_a
= instr
->alu
.add
.a
;
947 uint32_t mux_b
= instr
->alu
.add
.b
;
948 int nsrc
= v3d_qpu_add_op_num_src(instr
->alu
.add
.op
);
949 const struct opcode_desc
*desc
;
952 for (desc
= add_ops
; desc
!= &add_ops
[ARRAY_SIZE(add_ops
)];
954 if (desc
->op
== instr
->alu
.add
.op
)
957 if (desc
== &add_ops
[ARRAY_SIZE(add_ops
)])
960 opcode
= desc
->opcode_first
;
962 /* If an operation doesn't use an arg, its mux values may be used to
963 * identify the operation type.
966 mux_b
= ffs(desc
->mux_b_mask
) - 1;
969 mux_a
= ffs(desc
->mux_a_mask
) - 1;
971 bool no_magic_write
= false;
973 switch (instr
->alu
.add
.op
) {
974 case V3D_QPU_A_STVPMV
:
976 no_magic_write
= true;
978 case V3D_QPU_A_STVPMD
:
980 no_magic_write
= true;
982 case V3D_QPU_A_STVPMP
:
984 no_magic_write
= true;
987 case V3D_QPU_A_LDVPMV_IN
:
988 case V3D_QPU_A_LDVPMD_IN
:
989 case V3D_QPU_A_LDVPMP
:
990 case V3D_QPU_A_LDVPMG_IN
:
991 assert(!instr
->alu
.add
.magic_write
);
994 case V3D_QPU_A_LDVPMV_OUT
:
995 case V3D_QPU_A_LDVPMD_OUT
:
996 case V3D_QPU_A_LDVPMG_OUT
:
997 assert(!instr
->alu
.add
.magic_write
);
998 *packed_instr
|= VC5_QPU_MA
;
1005 switch (instr
->alu
.add
.op
) {
1006 case V3D_QPU_A_FADD
:
1007 case V3D_QPU_A_FADDNF
:
1008 case V3D_QPU_A_FSUB
:
1009 case V3D_QPU_A_FMIN
:
1010 case V3D_QPU_A_FMAX
:
1011 case V3D_QPU_A_FCMP
: {
1012 uint32_t output_pack
;
1016 if (!v3d_qpu_float32_pack_pack(instr
->alu
.add
.output_pack
,
1020 opcode
|= output_pack
<< 4;
1022 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
1027 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.b_unpack
,
1032 /* These operations with commutative operands are
1033 * distinguished by which order their operands come in.
1035 bool ordering
= a_unpack
* 8 + mux_a
> b_unpack
* 8 + mux_b
;
1036 if (((instr
->alu
.add
.op
== V3D_QPU_A_FMIN
||
1037 instr
->alu
.add
.op
== V3D_QPU_A_FADD
) && ordering
) ||
1038 ((instr
->alu
.add
.op
== V3D_QPU_A_FMAX
||
1039 instr
->alu
.add
.op
== V3D_QPU_A_FADDNF
) && !ordering
)) {
1043 a_unpack
= b_unpack
;
1051 opcode
|= a_unpack
<< 2;
1052 opcode
|= b_unpack
<< 0;
1057 case V3D_QPU_A_VFPACK
: {
1061 if (instr
->alu
.add
.a_unpack
== V3D_QPU_UNPACK_ABS
||
1062 instr
->alu
.add
.b_unpack
== V3D_QPU_UNPACK_ABS
) {
1066 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
1071 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.b_unpack
,
1076 opcode
= (opcode
& ~(1 << 2)) | (a_unpack
<< 2);
1077 opcode
= (opcode
& ~(1 << 0)) | (b_unpack
<< 0);
1082 case V3D_QPU_A_FFLOOR
:
1083 case V3D_QPU_A_FROUND
:
1084 case V3D_QPU_A_FTRUNC
:
1085 case V3D_QPU_A_FCEIL
:
1087 case V3D_QPU_A_FDY
: {
1090 if (!v3d_qpu_float32_pack_pack(instr
->alu
.add
.output_pack
,
1096 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
1102 opcode
= (opcode
& ~(1 << 2)) | packed
<< 2;
1106 case V3D_QPU_A_FTOIN
:
1107 case V3D_QPU_A_FTOIZ
:
1108 case V3D_QPU_A_FTOUZ
:
1109 case V3D_QPU_A_FTOC
:
1110 if (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
)
1114 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
1120 opcode
|= packed
<< 2;
1124 case V3D_QPU_A_VFMIN
:
1125 case V3D_QPU_A_VFMAX
:
1126 if (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
||
1127 instr
->alu
.add
.b_unpack
!= V3D_QPU_UNPACK_NONE
) {
1131 if (!v3d_qpu_float16_unpack_pack(instr
->alu
.add
.a_unpack
,
1139 if (instr
->alu
.add
.op
!= V3D_QPU_A_NOP
&&
1140 (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
||
1141 instr
->alu
.add
.a_unpack
!= V3D_QPU_UNPACK_NONE
||
1142 instr
->alu
.add
.b_unpack
!= V3D_QPU_UNPACK_NONE
)) {
1148 *packed_instr
|= QPU_SET_FIELD(mux_a
, VC5_QPU_ADD_A
);
1149 *packed_instr
|= QPU_SET_FIELD(mux_b
, VC5_QPU_ADD_B
);
1150 *packed_instr
|= QPU_SET_FIELD(opcode
, VC5_QPU_OP_ADD
);
1151 *packed_instr
|= QPU_SET_FIELD(waddr
, V3D_QPU_WADDR_A
);
1152 if (instr
->alu
.add
.magic_write
&& !no_magic_write
)
1153 *packed_instr
|= VC5_QPU_MA
;
1159 v3d_qpu_mul_pack(const struct v3d_device_info
*devinfo
,
1160 const struct v3d_qpu_instr
*instr
, uint64_t *packed_instr
)
1162 uint32_t mux_a
= instr
->alu
.mul
.a
;
1163 uint32_t mux_b
= instr
->alu
.mul
.b
;
1164 int nsrc
= v3d_qpu_mul_op_num_src(instr
->alu
.mul
.op
);
1165 const struct opcode_desc
*desc
;
1167 for (desc
= mul_ops
; desc
!= &mul_ops
[ARRAY_SIZE(mul_ops
)];
1169 if (desc
->op
== instr
->alu
.mul
.op
)
1172 if (desc
== &mul_ops
[ARRAY_SIZE(mul_ops
)])
1175 uint32_t opcode
= desc
->opcode_first
;
1177 /* Some opcodes have a single valid value for their mux a/b, so set
1178 * that here. If mux a/b determine packing, it will be set below.
1181 mux_b
= ffs(desc
->mux_b_mask
) - 1;
1184 mux_a
= ffs(desc
->mux_a_mask
) - 1;
1186 switch (instr
->alu
.mul
.op
) {
1187 case V3D_QPU_M_FMUL
: {
1190 if (!v3d_qpu_float32_pack_pack(instr
->alu
.mul
.output_pack
,
1194 /* No need for a +1 because desc->opcode_first has a 1 in this
1197 opcode
+= packed
<< 4;
1199 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.a_unpack
,
1203 opcode
|= packed
<< 2;
1205 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.b_unpack
,
1209 opcode
|= packed
<< 0;
1213 case V3D_QPU_M_FMOV
: {
1216 if (!v3d_qpu_float32_pack_pack(instr
->alu
.mul
.output_pack
,
1220 opcode
|= (packed
>> 1) & 1;
1221 mux_b
= (packed
& 1) << 2;
1223 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.a_unpack
,
1231 case V3D_QPU_M_VFMUL
: {
1234 if (instr
->alu
.mul
.output_pack
!= V3D_QPU_PACK_NONE
)
1237 if (!v3d_qpu_float16_unpack_pack(instr
->alu
.mul
.a_unpack
,
1241 if (instr
->alu
.mul
.a_unpack
== V3D_QPU_UNPACK_SWAP_16
)
1244 opcode
|= (packed
+ 4) & 7;
1246 if (instr
->alu
.mul
.b_unpack
!= V3D_QPU_UNPACK_NONE
)
1256 *packed_instr
|= QPU_SET_FIELD(mux_a
, VC5_QPU_MUL_A
);
1257 *packed_instr
|= QPU_SET_FIELD(mux_b
, VC5_QPU_MUL_B
);
1259 *packed_instr
|= QPU_SET_FIELD(opcode
, VC5_QPU_OP_MUL
);
1260 *packed_instr
|= QPU_SET_FIELD(instr
->alu
.mul
.waddr
, V3D_QPU_WADDR_M
);
1261 if (instr
->alu
.mul
.magic_write
)
1262 *packed_instr
|= VC5_QPU_MM
;
1268 v3d_qpu_instr_unpack_alu(const struct v3d_device_info
*devinfo
,
1269 uint64_t packed_instr
,
1270 struct v3d_qpu_instr
*instr
)
1272 instr
->type
= V3D_QPU_INSTR_TYPE_ALU
;
1274 if (!v3d_qpu_sig_unpack(devinfo
,
1275 QPU_GET_FIELD(packed_instr
, VC5_QPU_SIG
),
1279 uint32_t packed_cond
= QPU_GET_FIELD(packed_instr
, VC5_QPU_COND
);
1280 if (v3d_qpu_sig_writes_address(devinfo
, &instr
->sig
)) {
1281 instr
->sig_addr
= packed_cond
& ~VC5_QPU_COND_SIG_MAGIC_ADDR
;
1282 instr
->sig_magic
= packed_cond
& VC5_QPU_COND_SIG_MAGIC_ADDR
;
1284 instr
->flags
.ac
= V3D_QPU_COND_NONE
;
1285 instr
->flags
.mc
= V3D_QPU_COND_NONE
;
1286 instr
->flags
.apf
= V3D_QPU_PF_NONE
;
1287 instr
->flags
.mpf
= V3D_QPU_PF_NONE
;
1288 instr
->flags
.auf
= V3D_QPU_UF_NONE
;
1289 instr
->flags
.muf
= V3D_QPU_UF_NONE
;
1291 if (!v3d_qpu_flags_unpack(devinfo
, packed_cond
, &instr
->flags
))
1295 instr
->raddr_a
= QPU_GET_FIELD(packed_instr
, VC5_QPU_RADDR_A
);
1296 instr
->raddr_b
= QPU_GET_FIELD(packed_instr
, VC5_QPU_RADDR_B
);
1298 if (!v3d_qpu_add_unpack(devinfo
, packed_instr
, instr
))
1301 if (!v3d_qpu_mul_unpack(devinfo
, packed_instr
, instr
))
1308 v3d_qpu_instr_unpack_branch(const struct v3d_device_info
*devinfo
,
1309 uint64_t packed_instr
,
1310 struct v3d_qpu_instr
*instr
)
1312 instr
->type
= V3D_QPU_INSTR_TYPE_BRANCH
;
1314 uint32_t cond
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_COND
);
1316 instr
->branch
.cond
= V3D_QPU_BRANCH_COND_ALWAYS
;
1317 else if (V3D_QPU_BRANCH_COND_A0
+ (cond
- 2) <=
1318 V3D_QPU_BRANCH_COND_ALLNA
)
1319 instr
->branch
.cond
= V3D_QPU_BRANCH_COND_A0
+ (cond
- 2);
1323 uint32_t msfign
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_MSFIGN
);
1326 instr
->branch
.msfign
= msfign
;
1328 instr
->branch
.bdi
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_BDI
);
1330 instr
->branch
.ub
= packed_instr
& VC5_QPU_BRANCH_UB
;
1331 if (instr
->branch
.ub
) {
1332 instr
->branch
.bdu
= QPU_GET_FIELD(packed_instr
,
1333 VC5_QPU_BRANCH_BDU
);
1336 instr
->branch
.raddr_a
= QPU_GET_FIELD(packed_instr
,
1339 instr
->branch
.offset
= 0;
1341 instr
->branch
.offset
+=
1342 QPU_GET_FIELD(packed_instr
,
1343 VC5_QPU_BRANCH_ADDR_LOW
) << 3;
1345 instr
->branch
.offset
+=
1346 QPU_GET_FIELD(packed_instr
,
1347 VC5_QPU_BRANCH_ADDR_HIGH
) << 24;
1353 v3d_qpu_instr_unpack(const struct v3d_device_info
*devinfo
,
1354 uint64_t packed_instr
,
1355 struct v3d_qpu_instr
*instr
)
1357 if (QPU_GET_FIELD(packed_instr
, VC5_QPU_OP_MUL
) != 0) {
1358 return v3d_qpu_instr_unpack_alu(devinfo
, packed_instr
, instr
);
1360 uint32_t sig
= QPU_GET_FIELD(packed_instr
, VC5_QPU_SIG
);
1362 if ((sig
& 24) == 16) {
1363 return v3d_qpu_instr_unpack_branch(devinfo
, packed_instr
,
1372 v3d_qpu_instr_pack_alu(const struct v3d_device_info
*devinfo
,
1373 const struct v3d_qpu_instr
*instr
,
1374 uint64_t *packed_instr
)
1377 if (!v3d_qpu_sig_pack(devinfo
, &instr
->sig
, &sig
))
1379 *packed_instr
|= QPU_SET_FIELD(sig
, VC5_QPU_SIG
);
1381 if (instr
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
1382 *packed_instr
|= QPU_SET_FIELD(instr
->raddr_a
, VC5_QPU_RADDR_A
);
1383 *packed_instr
|= QPU_SET_FIELD(instr
->raddr_b
, VC5_QPU_RADDR_B
);
1385 if (!v3d_qpu_add_pack(devinfo
, instr
, packed_instr
))
1387 if (!v3d_qpu_mul_pack(devinfo
, instr
, packed_instr
))
1391 if (v3d_qpu_sig_writes_address(devinfo
, &instr
->sig
)) {
1392 if (instr
->flags
.ac
!= V3D_QPU_COND_NONE
||
1393 instr
->flags
.mc
!= V3D_QPU_COND_NONE
||
1394 instr
->flags
.apf
!= V3D_QPU_PF_NONE
||
1395 instr
->flags
.mpf
!= V3D_QPU_PF_NONE
||
1396 instr
->flags
.auf
!= V3D_QPU_UF_NONE
||
1397 instr
->flags
.muf
!= V3D_QPU_UF_NONE
) {
1401 flags
= instr
->sig_addr
;
1402 if (instr
->sig_magic
)
1403 flags
|= VC5_QPU_COND_SIG_MAGIC_ADDR
;
1405 if (!v3d_qpu_flags_pack(devinfo
, &instr
->flags
, &flags
))
1409 *packed_instr
|= QPU_SET_FIELD(flags
, VC5_QPU_COND
);
1411 if (v3d_qpu_sig_writes_address(devinfo
, &instr
->sig
))
1419 v3d_qpu_instr_pack_branch(const struct v3d_device_info
*devinfo
,
1420 const struct v3d_qpu_instr
*instr
,
1421 uint64_t *packed_instr
)
1423 *packed_instr
|= QPU_SET_FIELD(16, VC5_QPU_SIG
);
1425 if (instr
->branch
.cond
!= V3D_QPU_BRANCH_COND_ALWAYS
) {
1426 *packed_instr
|= QPU_SET_FIELD(2 + (instr
->branch
.cond
-
1427 V3D_QPU_BRANCH_COND_A0
),
1428 VC5_QPU_BRANCH_COND
);
1431 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.msfign
,
1432 VC5_QPU_BRANCH_MSFIGN
);
1434 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.bdi
,
1435 VC5_QPU_BRANCH_BDI
);
1437 if (instr
->branch
.ub
) {
1438 *packed_instr
|= VC5_QPU_BRANCH_UB
;
1439 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.bdu
,
1440 VC5_QPU_BRANCH_BDU
);
1443 switch (instr
->branch
.bdi
) {
1444 case V3D_QPU_BRANCH_DEST_ABS
:
1445 case V3D_QPU_BRANCH_DEST_REL
:
1446 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.msfign
,
1447 VC5_QPU_BRANCH_MSFIGN
);
1449 *packed_instr
|= QPU_SET_FIELD((instr
->branch
.offset
&
1451 VC5_QPU_BRANCH_ADDR_LOW
);
1453 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.offset
>> 24,
1454 VC5_QPU_BRANCH_ADDR_HIGH
);
1456 case V3D_QPU_BRANCH_DEST_REGFILE
:
1457 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.raddr_a
,
1469 v3d_qpu_instr_pack(const struct v3d_device_info
*devinfo
,
1470 const struct v3d_qpu_instr
*instr
,
1471 uint64_t *packed_instr
)
1475 switch (instr
->type
) {
1476 case V3D_QPU_INSTR_TYPE_ALU
:
1477 return v3d_qpu_instr_pack_alu(devinfo
, instr
, packed_instr
);
1478 case V3D_QPU_INSTR_TYPE_BRANCH
:
1479 return v3d_qpu_instr_pack_branch(devinfo
, instr
, packed_instr
);