2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/macros.h"
27 #include "broadcom/common/v3d_device_info.h"
28 #include "qpu_instr.h"
31 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
32 /* Using the GNU statement expression extension */
33 #define QPU_SET_FIELD(value, field) \
35 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
36 assert((fieldval & ~ field ## _MASK) == 0); \
37 fieldval & field ## _MASK; \
40 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word) & field ## _MASK) >> field ## _SHIFT))
42 #define QPU_UPDATE_FIELD(inst, value, field) \
43 (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
46 #define VC5_QPU_OP_MUL_SHIFT 58
47 #define VC5_QPU_OP_MUL_MASK QPU_MASK(63, 58)
49 #define VC5_QPU_SIG_SHIFT 53
50 #define VC5_QPU_SIG_MASK QPU_MASK(57, 53)
51 # define VC5_QPU_SIG_THRSW_BIT 0x1
52 # define VC5_QPU_SIG_LDUNIF_BIT 0x2
53 # define VC5_QPU_SIG_LDTMU_BIT 0x4
54 # define VC5_QPU_SIG_LDVARY_BIT 0x8
56 #define VC5_QPU_COND_SHIFT 46
57 #define VC5_QPU_COND_MASK QPU_MASK(52, 46)
59 #define VC5_QPU_COND_IFA 0
60 #define VC5_QPU_COND_IFB 1
61 #define VC5_QPU_COND_IFNA 2
62 #define VC5_QPU_COND_IFNB 3
64 #define VC5_QPU_MM QPU_MASK(45, 45)
65 #define VC5_QPU_MA QPU_MASK(44, 44)
67 #define V3D_QPU_WADDR_M_SHIFT 38
68 #define V3D_QPU_WADDR_M_MASK QPU_MASK(43, 38)
70 #define VC5_QPU_BRANCH_ADDR_LOW_SHIFT 35
71 #define VC5_QPU_BRANCH_ADDR_LOW_MASK QPU_MASK(55, 35)
73 #define V3D_QPU_WADDR_A_SHIFT 32
74 #define V3D_QPU_WADDR_A_MASK QPU_MASK(37, 32)
76 #define VC5_QPU_BRANCH_COND_SHIFT 32
77 #define VC5_QPU_BRANCH_COND_MASK QPU_MASK(34, 32)
79 #define VC5_QPU_BRANCH_ADDR_HIGH_SHIFT 24
80 #define VC5_QPU_BRANCH_ADDR_HIGH_MASK QPU_MASK(31, 24)
82 #define VC5_QPU_OP_ADD_SHIFT 24
83 #define VC5_QPU_OP_ADD_MASK QPU_MASK(31, 24)
85 #define VC5_QPU_MUL_B_SHIFT 21
86 #define VC5_QPU_MUL_B_MASK QPU_MASK(23, 21)
88 #define VC5_QPU_BRANCH_MSFIGN_SHIFT 21
89 #define VC5_QPU_BRANCH_MSFIGN_MASK QPU_MASK(22, 21)
91 #define VC5_QPU_MUL_A_SHIFT 18
92 #define VC5_QPU_MUL_A_MASK QPU_MASK(20, 18)
94 #define VC5_QPU_ADD_B_SHIFT 15
95 #define VC5_QPU_ADD_B_MASK QPU_MASK(17, 15)
97 #define VC5_QPU_BRANCH_BDU_SHIFT 15
98 #define VC5_QPU_BRANCH_BDU_MASK QPU_MASK(17, 15)
100 #define VC5_QPU_BRANCH_UB QPU_MASK(14, 14)
102 #define VC5_QPU_ADD_A_SHIFT 12
103 #define VC5_QPU_ADD_A_MASK QPU_MASK(14, 12)
105 #define VC5_QPU_BRANCH_BDI_SHIFT 12
106 #define VC5_QPU_BRANCH_BDI_MASK QPU_MASK(13, 12)
108 #define VC5_QPU_RADDR_A_SHIFT 6
109 #define VC5_QPU_RADDR_A_MASK QPU_MASK(11, 6)
111 #define VC5_QPU_RADDR_B_SHIFT 0
112 #define VC5_QPU_RADDR_B_MASK QPU_MASK(5, 0)
114 #define THRSW .thrsw = true
115 #define LDUNIF .ldunif = true
116 #define LDTMU .ldtmu = true
117 #define LDVARY .ldvary = true
118 #define LDVPM .ldvpm = true
119 #define SMIMM .small_imm = true
120 #define LDTLB .ldtlb = true
121 #define LDTLBU .ldtlbu = true
122 #define UCB .ucb = true
123 #define ROT .rotate = true
124 #define WRTMUC .wrtmuc = true
126 static const struct v3d_qpu_sig v33_sig_map
[] = {
131 [3] = { THRSW
, LDUNIF
},
133 [5] = { THRSW
, LDTMU
, },
134 [6] = { LDTMU
, LDUNIF
},
135 [7] = { THRSW
, LDTMU
, LDUNIF
},
137 [9] = { THRSW
, LDVARY
, },
138 [10] = { LDVARY
, LDUNIF
},
139 [11] = { THRSW
, LDVARY
, LDUNIF
},
140 [12] = { LDVARY
, LDTMU
, },
141 [13] = { THRSW
, LDVARY
, LDTMU
, },
142 [14] = { SMIMM
, LDVARY
, },
150 [25] = { THRSW
, LDVPM
, },
151 [26] = { LDVPM
, LDUNIF
},
152 [27] = { THRSW
, LDVPM
, LDUNIF
},
153 [28] = { LDVPM
, LDTMU
, },
154 [29] = { THRSW
, LDVPM
, LDTMU
, },
155 [30] = { SMIMM
, LDVPM
, },
160 v3d_qpu_sig_unpack(const struct v3d_device_info
*devinfo
,
162 struct v3d_qpu_sig
*sig
)
164 if (packed_sig
>= ARRAY_SIZE(v33_sig_map
))
167 *sig
= v33_sig_map
[packed_sig
];
169 /* Signals with zeroed unpacked contents after element 0 are reserved. */
170 return (packed_sig
== 0 ||
171 memcmp(sig
, &v33_sig_map
[0], sizeof(*sig
)) != 0);
175 v3d_qpu_sig_pack(const struct v3d_device_info
*devinfo
,
176 const struct v3d_qpu_sig
*sig
,
177 uint32_t *packed_sig
)
179 static const struct v3d_qpu_sig
*map
;
183 for (int i
= 0; i
< ARRAY_SIZE(v33_sig_map
); i
++) {
184 if (memcmp(&map
[i
], sig
, sizeof(*sig
)) == 0) {
194 v3d_qpu_flags_unpack(const struct v3d_device_info
*devinfo
,
195 uint32_t packed_cond
,
196 struct v3d_qpu_flags
*cond
)
198 static const enum v3d_qpu_cond cond_map
[4] = {
199 [0] = V3D_QPU_COND_IFA
,
200 [1] = V3D_QPU_COND_IFB
,
201 [2] = V3D_QPU_COND_IFNA
,
202 [3] = V3D_QPU_COND_IFNB
,
205 cond
->ac
= V3D_QPU_COND_NONE
;
206 cond
->mc
= V3D_QPU_COND_NONE
;
207 cond
->apf
= V3D_QPU_PF_NONE
;
208 cond
->mpf
= V3D_QPU_PF_NONE
;
209 cond
->auf
= V3D_QPU_UF_NONE
;
210 cond
->muf
= V3D_QPU_UF_NONE
;
212 if (packed_cond
== 0) {
214 } else if (packed_cond
>> 2 == 0) {
215 cond
->apf
= packed_cond
& 0x3;
216 } else if (packed_cond
>> 4 == 0) {
217 cond
->auf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
218 } else if (packed_cond
== 0x10) {
220 } else if (packed_cond
>> 2 == 0x4) {
221 cond
->mpf
= packed_cond
& 0x3;
222 } else if (packed_cond
>> 4 == 0x1) {
223 cond
->muf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
224 } else if (packed_cond
>> 4 == 0x2) {
225 cond
->ac
= ((packed_cond
>> 2) & 0x3) + V3D_QPU_COND_IFA
;
226 cond
->mpf
= packed_cond
& 0x3;
227 } else if (packed_cond
>> 4 == 0x3) {
228 cond
->mc
= ((packed_cond
>> 2) & 0x3) + V3D_QPU_COND_IFA
;
229 cond
->apf
= packed_cond
& 0x3;
230 } else if (packed_cond
>> 6) {
231 cond
->mc
= cond_map
[(packed_cond
>> 4) & 0x3];
232 if (((packed_cond
>> 2) & 0x3) == 0) {
233 cond
->ac
= cond_map
[packed_cond
& 0x3];
235 cond
->auf
= (packed_cond
& 0xf) - 4 + V3D_QPU_UF_ANDZ
;
243 v3d_qpu_flags_pack(const struct v3d_device_info
*devinfo
,
244 const struct v3d_qpu_flags
*cond
,
245 uint32_t *packed_cond
)
253 static const struct {
254 uint8_t flags_present
;
263 { AC
| MPF
, (1 << 5) },
264 { MC
, (1 << 5) | (1 << 4) },
265 { MC
| APF
, (1 << 5) | (1 << 4) },
266 { MC
| AC
, (1 << 6) },
267 { MC
| AUF
, (1 << 6) },
270 uint8_t flags_present
= 0;
271 if (cond
->ac
!= V3D_QPU_COND_NONE
)
273 if (cond
->mc
!= V3D_QPU_COND_NONE
)
275 if (cond
->apf
!= V3D_QPU_PF_NONE
)
276 flags_present
|= APF
;
277 if (cond
->mpf
!= V3D_QPU_PF_NONE
)
278 flags_present
|= MPF
;
279 if (cond
->auf
!= V3D_QPU_UF_NONE
)
280 flags_present
|= AUF
;
281 if (cond
->muf
!= V3D_QPU_UF_NONE
)
282 flags_present
|= MUF
;
284 for (int i
= 0; i
< ARRAY_SIZE(flags_table
); i
++) {
285 if (flags_table
[i
].flags_present
!= flags_present
)
288 *packed_cond
= flags_table
[i
].bits
;
290 *packed_cond
|= cond
->apf
;
291 *packed_cond
|= cond
->mpf
;
293 if (flags_present
& AUF
)
294 *packed_cond
|= cond
->auf
- V3D_QPU_UF_ANDZ
+ 4;
295 if (flags_present
& MUF
)
296 *packed_cond
|= cond
->muf
- V3D_QPU_UF_ANDZ
+ 4;
298 if (flags_present
& AC
)
299 *packed_cond
|= (cond
->ac
- V3D_QPU_COND_IFA
) << 2;
301 if (flags_present
& MC
) {
302 if (*packed_cond
& (1 << 6))
303 *packed_cond
|= (cond
->mc
-
304 V3D_QPU_COND_IFA
) << 4;
306 *packed_cond
|= (cond
->mc
-
307 V3D_QPU_COND_IFA
) << 2;
316 /* Make a mapping of the table of opcodes in the spec. The opcode is
317 * determined by a combination of the opcode field, and in the case of 0 or
318 * 1-arg opcodes, the mux_b field as well.
320 #define MUX_MASK(bot, top) (((1 << (top + 1)) - 1) - ((1 << (bot)) - 1))
321 #define ANYMUX MUX_MASK(0, 7)
324 uint8_t opcode_first
;
329 /* 0 if it's the same across V3D versions, or a specific V3D version. */
333 static const struct opcode_desc add_ops
[] = {
334 /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
335 { 0, 47, ANYMUX
, ANYMUX
, V3D_QPU_A_FADD
},
336 { 0, 47, ANYMUX
, ANYMUX
, V3D_QPU_A_FADDNF
},
337 { 53, 55, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
338 { 56, 56, ANYMUX
, ANYMUX
, V3D_QPU_A_ADD
},
339 { 57, 59, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
340 { 60, 60, ANYMUX
, ANYMUX
, V3D_QPU_A_SUB
},
341 { 61, 63, ANYMUX
, ANYMUX
, V3D_QPU_A_VFPACK
},
342 { 64, 111, ANYMUX
, ANYMUX
, V3D_QPU_A_FSUB
},
343 { 120, 120, ANYMUX
, ANYMUX
, V3D_QPU_A_MIN
},
344 { 121, 121, ANYMUX
, ANYMUX
, V3D_QPU_A_MAX
},
345 { 122, 122, ANYMUX
, ANYMUX
, V3D_QPU_A_UMIN
},
346 { 123, 123, ANYMUX
, ANYMUX
, V3D_QPU_A_UMAX
},
347 { 124, 124, ANYMUX
, ANYMUX
, V3D_QPU_A_SHL
},
348 { 125, 125, ANYMUX
, ANYMUX
, V3D_QPU_A_SHR
},
349 { 126, 126, ANYMUX
, ANYMUX
, V3D_QPU_A_ASR
},
350 { 127, 127, ANYMUX
, ANYMUX
, V3D_QPU_A_ROR
},
351 /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
352 { 128, 175, ANYMUX
, ANYMUX
, V3D_QPU_A_FMIN
},
353 { 128, 175, ANYMUX
, ANYMUX
, V3D_QPU_A_FMAX
},
354 { 176, 180, ANYMUX
, ANYMUX
, V3D_QPU_A_VFMIN
},
356 { 181, 181, ANYMUX
, ANYMUX
, V3D_QPU_A_AND
},
357 { 182, 182, ANYMUX
, ANYMUX
, V3D_QPU_A_OR
},
358 { 183, 183, ANYMUX
, ANYMUX
, V3D_QPU_A_XOR
},
360 { 184, 184, ANYMUX
, ANYMUX
, V3D_QPU_A_VADD
},
361 { 185, 185, ANYMUX
, ANYMUX
, V3D_QPU_A_VSUB
},
362 { 186, 186, 1 << 0, ANYMUX
, V3D_QPU_A_NOT
},
363 { 186, 186, 1 << 1, ANYMUX
, V3D_QPU_A_NEG
},
364 { 186, 186, 1 << 2, ANYMUX
, V3D_QPU_A_FLAPUSH
},
365 { 186, 186, 1 << 3, ANYMUX
, V3D_QPU_A_FLBPUSH
},
366 { 186, 186, 1 << 4, ANYMUX
, V3D_QPU_A_FLBPOP
},
367 { 186, 186, 1 << 6, ANYMUX
, V3D_QPU_A_SETMSF
},
368 { 186, 186, 1 << 7, ANYMUX
, V3D_QPU_A_SETREVF
},
369 { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP
, 0 },
370 { 187, 187, 1 << 0, 1 << 1, V3D_QPU_A_TIDX
},
371 { 187, 187, 1 << 0, 1 << 2, V3D_QPU_A_EIDX
},
372 { 187, 187, 1 << 0, 1 << 3, V3D_QPU_A_LR
},
373 { 187, 187, 1 << 0, 1 << 4, V3D_QPU_A_VFLA
},
374 { 187, 187, 1 << 0, 1 << 5, V3D_QPU_A_VFLNA
},
375 { 187, 187, 1 << 0, 1 << 6, V3D_QPU_A_VFLB
},
376 { 187, 187, 1 << 0, 1 << 7, V3D_QPU_A_VFLNB
},
378 { 187, 187, 1 << 1, MUX_MASK(0, 2), V3D_QPU_A_FXCD
},
379 { 187, 187, 1 << 1, 1 << 3, V3D_QPU_A_XCD
},
380 { 187, 187, 1 << 1, MUX_MASK(4, 6), V3D_QPU_A_FYCD
},
381 { 187, 187, 1 << 1, 1 << 7, V3D_QPU_A_YCD
},
383 { 187, 187, 1 << 2, 1 << 0, V3D_QPU_A_MSF
},
384 { 187, 187, 1 << 2, 1 << 1, V3D_QPU_A_REVF
},
385 { 187, 187, 1 << 2, 1 << 2, V3D_QPU_A_VDWWT
},
386 { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT
},
387 { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT
},
389 { 187, 187, 1 << 3, ANYMUX
, V3D_QPU_A_VPMSETUP
},
391 /* FIXME: MORE COMPLICATED */
392 /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
394 { 192, 239, ANYMUX
, ANYMUX
, V3D_QPU_A_FCMP
},
395 { 240, 244, ANYMUX
, ANYMUX
, V3D_QPU_A_VFMAX
},
397 { 245, 245, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FROUND
},
398 { 245, 245, 1 << 3, ANYMUX
, V3D_QPU_A_FTOIN
},
399 { 245, 245, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FTRUNC
},
400 { 245, 245, 1 << 7, ANYMUX
, V3D_QPU_A_FTOIZ
},
401 { 246, 246, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FFLOOR
},
402 { 246, 246, 1 << 3, ANYMUX
, V3D_QPU_A_FTOUZ
},
403 { 246, 246, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FCEIL
},
404 { 246, 246, 1 << 7, ANYMUX
, V3D_QPU_A_FTOC
},
406 { 247, 247, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_FDX
},
407 { 247, 247, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_FDY
},
409 /* The stvpms are distinguished by the waddr field. */
410 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMV
},
411 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMD
},
412 { 248, 248, ANYMUX
, ANYMUX
, V3D_QPU_A_STVPMP
},
414 { 252, 252, MUX_MASK(0, 2), ANYMUX
, V3D_QPU_A_ITOF
},
415 { 252, 252, 1 << 3, ANYMUX
, V3D_QPU_A_CLZ
},
416 { 252, 252, MUX_MASK(4, 6), ANYMUX
, V3D_QPU_A_UTOF
},
419 static const struct opcode_desc mul_ops
[] = {
420 { 1, 1, ANYMUX
, ANYMUX
, V3D_QPU_M_ADD
},
421 { 2, 2, ANYMUX
, ANYMUX
, V3D_QPU_M_SUB
},
422 { 3, 3, ANYMUX
, ANYMUX
, V3D_QPU_M_UMUL24
},
423 { 4, 8, ANYMUX
, ANYMUX
, V3D_QPU_M_VFMUL
},
424 { 9, 9, ANYMUX
, ANYMUX
, V3D_QPU_M_SMUL24
},
425 { 10, 10, ANYMUX
, ANYMUX
, V3D_QPU_M_MULTOP
},
426 { 14, 14, ANYMUX
, ANYMUX
, V3D_QPU_M_FMOV
},
427 { 15, 15, MUX_MASK(0, 3), ANYMUX
, V3D_QPU_M_FMOV
},
428 { 15, 15, 1 << 4, 1 << 0, V3D_QPU_M_NOP
, 0 },
429 { 15, 15, 1 << 7, ANYMUX
, V3D_QPU_M_MOV
},
430 { 16, 63, ANYMUX
, ANYMUX
, V3D_QPU_M_FMUL
},
433 static const struct opcode_desc
*
434 lookup_opcode(const struct opcode_desc
*opcodes
, size_t num_opcodes
,
435 uint32_t opcode
, uint32_t mux_a
, uint32_t mux_b
)
437 for (int i
= 0; i
< num_opcodes
; i
++) {
438 const struct opcode_desc
*op_desc
= &opcodes
[i
];
440 if (opcode
< op_desc
->opcode_first
||
441 opcode
> op_desc
->opcode_last
)
444 if (!(op_desc
->mux_b_mask
& (1 << mux_b
)))
447 if (!(op_desc
->mux_a_mask
& (1 << mux_a
)))
457 v3d_qpu_float32_unpack_unpack(uint32_t packed
,
458 enum v3d_qpu_input_unpack
*unpacked
)
462 *unpacked
= V3D_QPU_UNPACK_ABS
;
465 *unpacked
= V3D_QPU_UNPACK_NONE
;
468 *unpacked
= V3D_QPU_UNPACK_L
;
471 *unpacked
= V3D_QPU_UNPACK_H
;
479 v3d_qpu_float32_unpack_pack(enum v3d_qpu_input_unpack unpacked
,
483 case V3D_QPU_UNPACK_ABS
:
486 case V3D_QPU_UNPACK_NONE
:
489 case V3D_QPU_UNPACK_L
:
492 case V3D_QPU_UNPACK_H
:
501 v3d_qpu_float16_unpack_unpack(uint32_t packed
,
502 enum v3d_qpu_input_unpack
*unpacked
)
506 *unpacked
= V3D_QPU_UNPACK_NONE
;
509 *unpacked
= V3D_QPU_UNPACK_REPLICATE_32F_16
;
512 *unpacked
= V3D_QPU_UNPACK_REPLICATE_L_16
;
515 *unpacked
= V3D_QPU_UNPACK_REPLICATE_H_16
;
518 *unpacked
= V3D_QPU_UNPACK_SWAP_16
;
526 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked
,
530 case V3D_QPU_UNPACK_NONE
:
533 case V3D_QPU_UNPACK_REPLICATE_32F_16
:
536 case V3D_QPU_UNPACK_REPLICATE_L_16
:
539 case V3D_QPU_UNPACK_REPLICATE_H_16
:
542 case V3D_QPU_UNPACK_SWAP_16
:
551 v3d_qpu_float32_pack_pack(enum v3d_qpu_input_unpack unpacked
,
555 case V3D_QPU_PACK_NONE
:
570 v3d_qpu_add_unpack(const struct v3d_device_info
*devinfo
, uint64_t packed_inst
,
571 struct v3d_qpu_instr
*instr
)
573 uint32_t op
= QPU_GET_FIELD(packed_inst
, VC5_QPU_OP_ADD
);
574 uint32_t mux_a
= QPU_GET_FIELD(packed_inst
, VC5_QPU_ADD_A
);
575 uint32_t mux_b
= QPU_GET_FIELD(packed_inst
, VC5_QPU_ADD_B
);
576 uint32_t waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_A
);
578 uint32_t map_op
= op
;
579 /* Some big clusters of opcodes are replicated with unpack
582 if (map_op
>= 249 && map_op
<= 251)
583 map_op
= (map_op
- 249 + 245);
584 if (map_op
>= 253 && map_op
<= 255)
585 map_op
= (map_op
- 253 + 245);
587 const struct opcode_desc
*desc
=
588 lookup_opcode(add_ops
, ARRAY_SIZE(add_ops
),
589 map_op
, mux_a
, mux_b
);
593 instr
->alu
.add
.op
= desc
->op
;
595 /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
598 if (((op
>> 2) & 3) * 8 + mux_a
> (op
& 3) * 8 + mux_b
) {
599 if (instr
->alu
.add
.op
== V3D_QPU_A_FMIN
)
600 instr
->alu
.add
.op
= V3D_QPU_A_FMAX
;
601 if (instr
->alu
.add
.op
== V3D_QPU_A_FADD
)
602 instr
->alu
.add
.op
= V3D_QPU_A_FADDNF
;
605 /* Some QPU ops require a bit more than just basic opcode and mux a/b
606 * comparisons to distinguish them.
608 switch (instr
->alu
.add
.op
) {
609 case V3D_QPU_A_STVPMV
:
610 case V3D_QPU_A_STVPMD
:
611 case V3D_QPU_A_STVPMP
:
614 instr
->alu
.add
.op
= V3D_QPU_A_STVPMV
;
617 instr
->alu
.add
.op
= V3D_QPU_A_STVPMD
;
620 instr
->alu
.add
.op
= V3D_QPU_A_STVPMP
;
630 switch (instr
->alu
.add
.op
) {
632 case V3D_QPU_A_FADDNF
:
637 instr
->alu
.add
.output_pack
= (op
>> 4) & 0x3;
639 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
640 &instr
->alu
.add
.a_unpack
)) {
644 if (!v3d_qpu_float32_unpack_unpack((op
>> 0) & 0x3,
645 &instr
->alu
.add
.b_unpack
)) {
650 case V3D_QPU_A_FFLOOR
:
651 case V3D_QPU_A_FROUND
:
652 case V3D_QPU_A_FTRUNC
:
653 case V3D_QPU_A_FCEIL
:
656 instr
->alu
.add
.output_pack
= mux_b
& 0x3;
658 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
659 &instr
->alu
.add
.a_unpack
)) {
664 case V3D_QPU_A_FTOIN
:
665 case V3D_QPU_A_FTOIZ
:
666 case V3D_QPU_A_FTOUZ
:
668 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
670 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
671 &instr
->alu
.add
.a_unpack
)) {
676 case V3D_QPU_A_VFMIN
:
677 case V3D_QPU_A_VFMAX
:
678 if (!v3d_qpu_float16_unpack_unpack(op
& 0x7,
679 &instr
->alu
.add
.a_unpack
)) {
683 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
684 instr
->alu
.add
.b_unpack
= V3D_QPU_UNPACK_NONE
;
688 instr
->alu
.add
.output_pack
= V3D_QPU_PACK_NONE
;
689 instr
->alu
.add
.a_unpack
= V3D_QPU_UNPACK_NONE
;
690 instr
->alu
.add
.b_unpack
= V3D_QPU_UNPACK_NONE
;
694 instr
->alu
.add
.a
= mux_a
;
695 instr
->alu
.add
.b
= mux_b
;
696 instr
->alu
.add
.waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_A
);
697 instr
->alu
.add
.magic_write
= packed_inst
& VC5_QPU_MA
;
703 v3d_qpu_mul_unpack(const struct v3d_device_info
*devinfo
, uint64_t packed_inst
,
704 struct v3d_qpu_instr
*instr
)
706 uint32_t op
= QPU_GET_FIELD(packed_inst
, VC5_QPU_OP_MUL
);
707 uint32_t mux_a
= QPU_GET_FIELD(packed_inst
, VC5_QPU_MUL_A
);
708 uint32_t mux_b
= QPU_GET_FIELD(packed_inst
, VC5_QPU_MUL_B
);
711 const struct opcode_desc
*desc
=
712 lookup_opcode(mul_ops
, ARRAY_SIZE(mul_ops
),
717 instr
->alu
.mul
.op
= desc
->op
;
720 switch (instr
->alu
.mul
.op
) {
722 instr
->alu
.mul
.output_pack
= ((op
>> 4) & 0x3) - 1;
724 if (!v3d_qpu_float32_unpack_unpack((op
>> 2) & 0x3,
725 &instr
->alu
.mul
.a_unpack
)) {
729 if (!v3d_qpu_float32_unpack_unpack((op
>> 0) & 0x3,
730 &instr
->alu
.mul
.b_unpack
)) {
737 instr
->alu
.mul
.output_pack
= (((op
& 1) << 1) +
740 if (!v3d_qpu_float32_unpack_unpack(mux_b
& 0x3,
741 &instr
->alu
.mul
.a_unpack
)) {
747 case V3D_QPU_M_VFMUL
:
748 instr
->alu
.mul
.output_pack
= V3D_QPU_PACK_NONE
;
750 if (!v3d_qpu_float16_unpack_unpack(((op
& 0x7) - 4) & 7,
751 &instr
->alu
.mul
.a_unpack
)) {
755 instr
->alu
.mul
.b_unpack
= V3D_QPU_UNPACK_NONE
;
760 instr
->alu
.mul
.output_pack
= V3D_QPU_PACK_NONE
;
761 instr
->alu
.mul
.a_unpack
= V3D_QPU_UNPACK_NONE
;
762 instr
->alu
.mul
.b_unpack
= V3D_QPU_UNPACK_NONE
;
766 instr
->alu
.mul
.a
= mux_a
;
767 instr
->alu
.mul
.b
= mux_b
;
768 instr
->alu
.mul
.waddr
= QPU_GET_FIELD(packed_inst
, V3D_QPU_WADDR_M
);
769 instr
->alu
.mul
.magic_write
= packed_inst
& VC5_QPU_MM
;
775 v3d_qpu_add_pack(const struct v3d_device_info
*devinfo
,
776 const struct v3d_qpu_instr
*instr
, uint64_t *packed_instr
)
778 uint32_t waddr
= instr
->alu
.add
.waddr
;
779 uint32_t mux_a
= instr
->alu
.add
.a
;
780 uint32_t mux_b
= instr
->alu
.add
.b
;
781 int nsrc
= v3d_qpu_add_op_num_src(instr
->alu
.add
.op
);
782 const struct opcode_desc
*desc
;
785 for (desc
= add_ops
; desc
!= &add_ops
[ARRAY_SIZE(add_ops
)];
787 if (desc
->op
== instr
->alu
.add
.op
)
790 if (desc
== &add_ops
[ARRAY_SIZE(add_ops
)])
793 opcode
= desc
->opcode_first
;
795 /* If an operation doesn't use an arg, its mux values may be used to
796 * identify the operation type.
799 mux_b
= ffs(desc
->mux_b_mask
) - 1;
802 mux_a
= ffs(desc
->mux_a_mask
) - 1;
804 switch (instr
->alu
.add
.op
) {
805 case V3D_QPU_A_STVPMV
:
808 case V3D_QPU_A_STVPMD
:
811 case V3D_QPU_A_STVPMP
:
818 switch (instr
->alu
.add
.op
) {
820 case V3D_QPU_A_FADDNF
:
824 case V3D_QPU_A_FCMP
: {
825 uint32_t output_pack
;
829 if (!v3d_qpu_float32_pack_pack(instr
->alu
.add
.output_pack
,
833 opcode
|= output_pack
<< 4;
835 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
840 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.b_unpack
,
845 /* These operations with commutative operands are
846 * distinguished by which order their operands come in.
848 bool ordering
= a_unpack
* 8 + mux_a
> b_unpack
* 8 + mux_b
;
849 if (((instr
->alu
.add
.op
== V3D_QPU_A_FMIN
||
850 instr
->alu
.add
.op
== V3D_QPU_A_FADD
) && ordering
) ||
851 ((instr
->alu
.add
.op
== V3D_QPU_A_FMAX
||
852 instr
->alu
.add
.op
== V3D_QPU_A_FADDNF
) && !ordering
)) {
864 opcode
|= a_unpack
<< 2;
865 opcode
|= b_unpack
<< 0;
869 case V3D_QPU_A_FFLOOR
:
870 case V3D_QPU_A_FROUND
:
871 case V3D_QPU_A_FTRUNC
:
872 case V3D_QPU_A_FCEIL
:
874 case V3D_QPU_A_FDY
: {
877 if (!v3d_qpu_float32_pack_pack(instr
->alu
.add
.output_pack
,
883 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
889 opcode
|= packed
<< 2;
893 case V3D_QPU_A_FTOIN
:
894 case V3D_QPU_A_FTOIZ
:
895 case V3D_QPU_A_FTOUZ
:
897 if (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
)
901 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.add
.a_unpack
,
907 opcode
|= packed
<< 2;
911 case V3D_QPU_A_VFMIN
:
912 case V3D_QPU_A_VFMAX
:
913 if (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
||
914 instr
->alu
.add
.b_unpack
!= V3D_QPU_UNPACK_NONE
) {
918 if (!v3d_qpu_float16_unpack_pack(instr
->alu
.add
.a_unpack
,
926 if (instr
->alu
.add
.op
!= V3D_QPU_A_NOP
&&
927 (instr
->alu
.add
.output_pack
!= V3D_QPU_PACK_NONE
||
928 instr
->alu
.add
.a_unpack
!= V3D_QPU_UNPACK_NONE
||
929 instr
->alu
.add
.b_unpack
!= V3D_QPU_UNPACK_NONE
)) {
935 *packed_instr
|= QPU_SET_FIELD(mux_a
, VC5_QPU_ADD_A
);
936 *packed_instr
|= QPU_SET_FIELD(mux_b
, VC5_QPU_ADD_B
);
937 *packed_instr
|= QPU_SET_FIELD(opcode
, VC5_QPU_OP_ADD
);
938 *packed_instr
|= QPU_SET_FIELD(waddr
, V3D_QPU_WADDR_A
);
939 if (instr
->alu
.add
.magic_write
)
940 *packed_instr
|= VC5_QPU_MA
;
946 v3d_qpu_mul_pack(const struct v3d_device_info
*devinfo
,
947 const struct v3d_qpu_instr
*instr
, uint64_t *packed_instr
)
949 uint32_t mux_a
= instr
->alu
.mul
.a
;
950 uint32_t mux_b
= instr
->alu
.mul
.b
;
951 int nsrc
= v3d_qpu_mul_op_num_src(instr
->alu
.mul
.op
);
952 const struct opcode_desc
*desc
;
954 for (desc
= mul_ops
; desc
!= &mul_ops
[ARRAY_SIZE(mul_ops
)];
956 if (desc
->op
== instr
->alu
.mul
.op
)
959 if (desc
== &mul_ops
[ARRAY_SIZE(mul_ops
)])
962 uint32_t opcode
= desc
->opcode_first
;
964 /* Some opcodes have a single valid value for their mux a/b, so set
965 * that here. If mux a/b determine packing, it will be set below.
968 mux_b
= ffs(desc
->mux_b_mask
) - 1;
971 mux_a
= ffs(desc
->mux_a_mask
) - 1;
973 switch (instr
->alu
.mul
.op
) {
974 case V3D_QPU_M_FMUL
: {
977 if (!v3d_qpu_float32_pack_pack(instr
->alu
.mul
.output_pack
,
981 /* No need for a +1 because desc->opcode_first has a 1 in this
984 opcode
+= packed
<< 4;
986 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.a_unpack
,
990 opcode
|= packed
<< 2;
992 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.b_unpack
,
996 opcode
|= packed
<< 0;
1000 case V3D_QPU_M_FMOV
: {
1003 if (!v3d_qpu_float32_pack_pack(instr
->alu
.mul
.output_pack
,
1007 opcode
|= (packed
>> 1) & 1;
1008 mux_b
= (packed
& 1) << 2;
1010 if (!v3d_qpu_float32_unpack_pack(instr
->alu
.mul
.a_unpack
,
1018 case V3D_QPU_M_VFMUL
: {
1021 if (instr
->alu
.mul
.output_pack
!= V3D_QPU_PACK_NONE
)
1024 if (!v3d_qpu_float16_unpack_pack(instr
->alu
.mul
.a_unpack
,
1028 if (instr
->alu
.mul
.a_unpack
== V3D_QPU_UNPACK_SWAP_16
)
1031 opcode
|= (packed
+ 4) & 7;
1033 if (instr
->alu
.mul
.b_unpack
!= V3D_QPU_UNPACK_NONE
)
1043 *packed_instr
|= QPU_SET_FIELD(mux_a
, VC5_QPU_MUL_A
);
1044 *packed_instr
|= QPU_SET_FIELD(mux_b
, VC5_QPU_MUL_B
);
1046 *packed_instr
|= QPU_SET_FIELD(opcode
, VC5_QPU_OP_MUL
);
1047 *packed_instr
|= QPU_SET_FIELD(instr
->alu
.mul
.waddr
, V3D_QPU_WADDR_M
);
1048 if (instr
->alu
.mul
.magic_write
)
1049 *packed_instr
|= VC5_QPU_MM
;
1055 v3d_qpu_instr_unpack_alu(const struct v3d_device_info
*devinfo
,
1056 uint64_t packed_instr
,
1057 struct v3d_qpu_instr
*instr
)
1059 instr
->type
= V3D_QPU_INSTR_TYPE_ALU
;
1061 if (!v3d_qpu_sig_unpack(devinfo
,
1062 QPU_GET_FIELD(packed_instr
, VC5_QPU_SIG
),
1066 if (!v3d_qpu_flags_unpack(devinfo
,
1067 QPU_GET_FIELD(packed_instr
, VC5_QPU_COND
),
1071 instr
->raddr_a
= QPU_GET_FIELD(packed_instr
, VC5_QPU_RADDR_A
);
1072 instr
->raddr_b
= QPU_GET_FIELD(packed_instr
, VC5_QPU_RADDR_B
);
1074 if (!v3d_qpu_add_unpack(devinfo
, packed_instr
, instr
))
1077 if (!v3d_qpu_mul_unpack(devinfo
, packed_instr
, instr
))
1084 v3d_qpu_instr_unpack_branch(const struct v3d_device_info
*devinfo
,
1085 uint64_t packed_instr
,
1086 struct v3d_qpu_instr
*instr
)
1088 instr
->type
= V3D_QPU_INSTR_TYPE_BRANCH
;
1090 uint32_t cond
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_COND
);
1092 instr
->branch
.cond
= V3D_QPU_BRANCH_COND_ALWAYS
;
1093 else if (V3D_QPU_BRANCH_COND_A0
+ (cond
- 2) <=
1094 V3D_QPU_BRANCH_COND_ALLNA
)
1095 instr
->branch
.cond
= V3D_QPU_BRANCH_COND_A0
+ (cond
- 2);
1099 uint32_t msfign
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_MSFIGN
);
1102 instr
->branch
.msfign
= msfign
;
1104 instr
->branch
.bdi
= QPU_GET_FIELD(packed_instr
, VC5_QPU_BRANCH_BDI
);
1106 instr
->branch
.ub
= packed_instr
& VC5_QPU_BRANCH_UB
;
1107 if (instr
->branch
.ub
) {
1108 instr
->branch
.bdu
= QPU_GET_FIELD(packed_instr
,
1109 VC5_QPU_BRANCH_BDU
);
1112 instr
->branch
.raddr_a
= QPU_GET_FIELD(packed_instr
,
1115 instr
->branch
.offset
= 0;
1117 instr
->branch
.offset
+=
1118 QPU_GET_FIELD(packed_instr
,
1119 VC5_QPU_BRANCH_ADDR_LOW
) << 3;
1121 instr
->branch
.offset
+=
1122 QPU_GET_FIELD(packed_instr
,
1123 VC5_QPU_BRANCH_ADDR_HIGH
) << 24;
1129 v3d_qpu_instr_unpack(const struct v3d_device_info
*devinfo
,
1130 uint64_t packed_instr
,
1131 struct v3d_qpu_instr
*instr
)
1133 if (QPU_GET_FIELD(packed_instr
, VC5_QPU_OP_MUL
) != 0) {
1134 return v3d_qpu_instr_unpack_alu(devinfo
, packed_instr
, instr
);
1136 uint32_t sig
= QPU_GET_FIELD(packed_instr
, VC5_QPU_SIG
);
1138 if ((sig
& 24) == 16) {
1139 return v3d_qpu_instr_unpack_branch(devinfo
, packed_instr
,
1148 v3d_qpu_instr_pack_alu(const struct v3d_device_info
*devinfo
,
1149 const struct v3d_qpu_instr
*instr
,
1150 uint64_t *packed_instr
)
1153 if (!v3d_qpu_sig_pack(devinfo
, &instr
->sig
, &sig
))
1155 *packed_instr
|= QPU_SET_FIELD(sig
, VC5_QPU_SIG
);
1157 if (instr
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
1158 *packed_instr
|= QPU_SET_FIELD(instr
->raddr_a
, VC5_QPU_RADDR_A
);
1159 *packed_instr
|= QPU_SET_FIELD(instr
->raddr_b
, VC5_QPU_RADDR_B
);
1161 if (!v3d_qpu_add_pack(devinfo
, instr
, packed_instr
))
1163 if (!v3d_qpu_mul_pack(devinfo
, instr
, packed_instr
))
1167 if (!v3d_qpu_flags_pack(devinfo
, &instr
->flags
, &flags
))
1169 *packed_instr
|= QPU_SET_FIELD(flags
, VC5_QPU_COND
);
1176 v3d_qpu_instr_pack_branch(const struct v3d_device_info
*devinfo
,
1177 const struct v3d_qpu_instr
*instr
,
1178 uint64_t *packed_instr
)
1180 *packed_instr
|= QPU_SET_FIELD(16, VC5_QPU_SIG
);
1182 if (instr
->branch
.cond
!= V3D_QPU_BRANCH_COND_ALWAYS
) {
1183 *packed_instr
|= QPU_SET_FIELD(2 + (instr
->branch
.cond
-
1184 V3D_QPU_BRANCH_COND_A0
),
1185 VC5_QPU_BRANCH_COND
);
1188 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.msfign
,
1189 VC5_QPU_BRANCH_MSFIGN
);
1191 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.bdi
,
1192 VC5_QPU_BRANCH_BDI
);
1194 if (instr
->branch
.ub
) {
1195 *packed_instr
|= VC5_QPU_BRANCH_UB
;
1196 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.bdu
,
1197 VC5_QPU_BRANCH_BDU
);
1200 switch (instr
->branch
.bdi
) {
1201 case V3D_QPU_BRANCH_DEST_ABS
:
1202 case V3D_QPU_BRANCH_DEST_REL
:
1203 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.msfign
,
1204 VC5_QPU_BRANCH_MSFIGN
);
1206 *packed_instr
|= QPU_SET_FIELD((instr
->branch
.offset
&
1208 VC5_QPU_BRANCH_ADDR_LOW
);
1210 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.offset
>> 24,
1211 VC5_QPU_BRANCH_ADDR_HIGH
);
1213 case V3D_QPU_BRANCH_DEST_REGFILE
:
1214 *packed_instr
|= QPU_SET_FIELD(instr
->branch
.raddr_a
,
1226 v3d_qpu_instr_pack(const struct v3d_device_info
*devinfo
,
1227 const struct v3d_qpu_instr
*instr
,
1228 uint64_t *packed_instr
)
1232 switch (instr
->type
) {
1233 case V3D_QPU_INSTR_TYPE_ALU
:
1234 return v3d_qpu_instr_pack_alu(devinfo
, instr
, packed_instr
);
1235 case V3D_QPU_INSTR_TYPE_BRANCH
:
1236 return v3d_qpu_instr_pack_branch(devinfo
, instr
, packed_instr
);