2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/macros.h"
26 #include "broadcom/common/v3d_device_info.h"
27 #include "qpu_instr.h"
30 v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr
)
32 static const char *waddr_magic
[] = {
33 [V3D_QPU_WADDR_R0
] = "r0",
34 [V3D_QPU_WADDR_R1
] = "r1",
35 [V3D_QPU_WADDR_R2
] = "r2",
36 [V3D_QPU_WADDR_R3
] = "r3",
37 [V3D_QPU_WADDR_R4
] = "r4",
38 [V3D_QPU_WADDR_R5
] = "r5",
39 [V3D_QPU_WADDR_NOP
] = "-",
40 [V3D_QPU_WADDR_TLB
] = "tlb",
41 [V3D_QPU_WADDR_TLBU
] = "tlbu",
42 [V3D_QPU_WADDR_TMU
] = "tmu",
43 [V3D_QPU_WADDR_TMUL
] = "tmul",
44 [V3D_QPU_WADDR_TMUD
] = "tmud",
45 [V3D_QPU_WADDR_TMUA
] = "tmua",
46 [V3D_QPU_WADDR_TMUAU
] = "tmuau",
47 [V3D_QPU_WADDR_VPM
] = "vpm",
48 [V3D_QPU_WADDR_VPMU
] = "vpmu",
49 [V3D_QPU_WADDR_SYNC
] = "sync",
50 [V3D_QPU_WADDR_SYNCU
] = "syncu",
51 [V3D_QPU_WADDR_RECIP
] = "recip",
52 [V3D_QPU_WADDR_RSQRT
] = "rsqrt",
53 [V3D_QPU_WADDR_EXP
] = "exp",
54 [V3D_QPU_WADDR_LOG
] = "log",
55 [V3D_QPU_WADDR_SIN
] = "sin",
56 [V3D_QPU_WADDR_RSQRT2
] = "rsqrt2",
59 return waddr_magic
[waddr
];
63 v3d_qpu_add_op_name(enum v3d_qpu_add_op op
)
65 static const char *op_names
[] = {
66 [V3D_QPU_A_FADD
] = "fadd",
67 [V3D_QPU_A_FADDNF
] = "faddnf",
68 [V3D_QPU_A_VFPACK
] = "vfpack",
69 [V3D_QPU_A_ADD
] = "add",
70 [V3D_QPU_A_SUB
] = "sub",
71 [V3D_QPU_A_FSUB
] = "fsub",
72 [V3D_QPU_A_MIN
] = "min",
73 [V3D_QPU_A_MAX
] = "max",
74 [V3D_QPU_A_UMIN
] = "umin",
75 [V3D_QPU_A_UMAX
] = "umax",
76 [V3D_QPU_A_SHL
] = "shl",
77 [V3D_QPU_A_SHR
] = "shr",
78 [V3D_QPU_A_ASR
] = "asr",
79 [V3D_QPU_A_ROR
] = "ror",
80 [V3D_QPU_A_FMIN
] = "fmin",
81 [V3D_QPU_A_FMAX
] = "fmax",
82 [V3D_QPU_A_VFMIN
] = "vfmin",
83 [V3D_QPU_A_AND
] = "and",
84 [V3D_QPU_A_OR
] = "or",
85 [V3D_QPU_A_XOR
] = "xor",
86 [V3D_QPU_A_VADD
] = "vadd",
87 [V3D_QPU_A_VSUB
] = "vsub",
88 [V3D_QPU_A_NOT
] = "not",
89 [V3D_QPU_A_NEG
] = "neg",
90 [V3D_QPU_A_FLAPUSH
] = "flapush",
91 [V3D_QPU_A_FLBPUSH
] = "flbpush",
92 [V3D_QPU_A_FLBPOP
] = "flbpop",
93 [V3D_QPU_A_SETMSF
] = "setmsf",
94 [V3D_QPU_A_SETREVF
] = "setrevf",
95 [V3D_QPU_A_NOP
] = "nop",
96 [V3D_QPU_A_TIDX
] = "tidx",
97 [V3D_QPU_A_EIDX
] = "eidx",
98 [V3D_QPU_A_LR
] = "lr",
99 [V3D_QPU_A_VFLA
] = "vfla",
100 [V3D_QPU_A_VFLNA
] = "vflna",
101 [V3D_QPU_A_VFLB
] = "vflb",
102 [V3D_QPU_A_VFLNB
] = "vflnb",
103 [V3D_QPU_A_FXCD
] = "fxcd",
104 [V3D_QPU_A_XCD
] = "xcd",
105 [V3D_QPU_A_FYCD
] = "fycd",
106 [V3D_QPU_A_YCD
] = "ycd",
107 [V3D_QPU_A_MSF
] = "msf",
108 [V3D_QPU_A_REVF
] = "revf",
109 [V3D_QPU_A_VDWWT
] = "vdwwt",
110 [V3D_QPU_A_IID
] = "iid",
111 [V3D_QPU_A_SAMPID
] = "sampid",
112 [V3D_QPU_A_PATCHID
] = "patchid",
113 [V3D_QPU_A_TMUWT
] = "tmuwt",
114 [V3D_QPU_A_VPMSETUP
] = "vpmsetup",
115 [V3D_QPU_A_VPMWT
] = "vpmwt",
116 [V3D_QPU_A_LDVPMV
] = "ldvpmv",
117 [V3D_QPU_A_LDVPMD
] = "ldvpmd",
118 [V3D_QPU_A_LDVPMP
] = "ldvpmp",
119 [V3D_QPU_A_LDVPMG
] = "ldvpmg",
120 [V3D_QPU_A_FCMP
] = "fcmp",
121 [V3D_QPU_A_VFMAX
] = "vfmax",
122 [V3D_QPU_A_FROUND
] = "fround",
123 [V3D_QPU_A_FTOIN
] = "ftoin",
124 [V3D_QPU_A_FTRUNC
] = "ftrunc",
125 [V3D_QPU_A_FTOIZ
] = "ftoiz",
126 [V3D_QPU_A_FFLOOR
] = "ffloor",
127 [V3D_QPU_A_FTOUZ
] = "ftouz",
128 [V3D_QPU_A_FCEIL
] = "fceil",
129 [V3D_QPU_A_FTOC
] = "ftoc",
130 [V3D_QPU_A_FDX
] = "fdx",
131 [V3D_QPU_A_FDY
] = "fdy",
132 [V3D_QPU_A_STVPMV
] = "stvpmv",
133 [V3D_QPU_A_STVPMD
] = "stvpmd",
134 [V3D_QPU_A_STVPMP
] = "stvpmp",
135 [V3D_QPU_A_ITOF
] = "itof",
136 [V3D_QPU_A_CLZ
] = "clz",
137 [V3D_QPU_A_UTOF
] = "utof",
140 if (op
>= ARRAY_SIZE(op_names
))
147 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op
)
149 static const char *op_names
[] = {
150 [V3D_QPU_M_ADD
] = "add",
151 [V3D_QPU_M_SUB
] = "sub",
152 [V3D_QPU_M_UMUL24
] = "umul24",
153 [V3D_QPU_M_VFMUL
] = "vfmul",
154 [V3D_QPU_M_SMUL24
] = "smul24",
155 [V3D_QPU_M_MULTOP
] = "multop",
156 [V3D_QPU_M_FMOV
] = "fmov",
157 [V3D_QPU_M_MOV
] = "mov",
158 [V3D_QPU_M_NOP
] = "nop",
159 [V3D_QPU_M_FMUL
] = "fmul",
162 if (op
>= ARRAY_SIZE(op_names
))
169 v3d_qpu_cond_name(enum v3d_qpu_cond cond
)
172 case V3D_QPU_COND_NONE
:
174 case V3D_QPU_COND_IFA
:
176 case V3D_QPU_COND_IFB
:
178 case V3D_QPU_COND_IFNA
:
180 case V3D_QPU_COND_IFNB
:
183 unreachable("bad cond value");
188 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond
)
191 case V3D_QPU_BRANCH_COND_ALWAYS
:
193 case V3D_QPU_BRANCH_COND_A0
:
195 case V3D_QPU_BRANCH_COND_NA0
:
197 case V3D_QPU_BRANCH_COND_ALLA
:
199 case V3D_QPU_BRANCH_COND_ANYNA
:
201 case V3D_QPU_BRANCH_COND_ANYA
:
203 case V3D_QPU_BRANCH_COND_ALLNA
:
206 unreachable("bad branch cond value");
211 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign
)
214 case V3D_QPU_MSFIGN_NONE
:
216 case V3D_QPU_MSFIGN_P
:
218 case V3D_QPU_MSFIGN_Q
:
221 unreachable("bad branch cond value");
226 v3d_qpu_pf_name(enum v3d_qpu_pf pf
)
229 case V3D_QPU_PF_NONE
:
231 case V3D_QPU_PF_PUSHZ
:
233 case V3D_QPU_PF_PUSHN
:
235 case V3D_QPU_PF_PUSHC
:
238 unreachable("bad pf value");
243 v3d_qpu_uf_name(enum v3d_qpu_uf uf
)
246 case V3D_QPU_UF_NONE
:
248 case V3D_QPU_UF_ANDZ
:
250 case V3D_QPU_UF_ANDNZ
:
252 case V3D_QPU_UF_NORZ
:
254 case V3D_QPU_UF_NORNZ
:
256 case V3D_QPU_UF_ANDN
:
258 case V3D_QPU_UF_ANDNN
:
260 case V3D_QPU_UF_NORN
:
262 case V3D_QPU_UF_NORNN
:
264 case V3D_QPU_UF_ANDC
:
266 case V3D_QPU_UF_ANDNC
:
268 case V3D_QPU_UF_NORC
:
270 case V3D_QPU_UF_NORNC
:
273 unreachable("bad pf value");
278 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack
)
281 case V3D_QPU_PACK_NONE
:
288 unreachable("bad pack value");
293 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack
)
296 case V3D_QPU_UNPACK_NONE
:
298 case V3D_QPU_UNPACK_L
:
300 case V3D_QPU_UNPACK_H
:
302 case V3D_QPU_UNPACK_ABS
:
304 case V3D_QPU_UNPACK_REPLICATE_32F_16
:
306 case V3D_QPU_UNPACK_REPLICATE_L_16
:
308 case V3D_QPU_UNPACK_REPLICATE_H_16
:
310 case V3D_QPU_UNPACK_SWAP_16
:
313 unreachable("bad unpack value");
320 static const uint8_t add_op_args
[] = {
321 [V3D_QPU_A_FADD
] = D
| A
| B
,
322 [V3D_QPU_A_FADDNF
] = D
| A
| B
,
323 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
324 [V3D_QPU_A_ADD
] = D
| A
| B
,
325 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
326 [V3D_QPU_A_SUB
] = D
| A
| B
,
327 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
328 [V3D_QPU_A_FSUB
] = D
| A
| B
,
329 [V3D_QPU_A_MIN
] = D
| A
| B
,
330 [V3D_QPU_A_MAX
] = D
| A
| B
,
331 [V3D_QPU_A_UMIN
] = D
| A
| B
,
332 [V3D_QPU_A_UMAX
] = D
| A
| B
,
333 [V3D_QPU_A_SHL
] = D
| A
| B
,
334 [V3D_QPU_A_SHR
] = D
| A
| B
,
335 [V3D_QPU_A_ASR
] = D
| A
| B
,
336 [V3D_QPU_A_ROR
] = D
| A
| B
,
337 [V3D_QPU_A_FMIN
] = D
| A
| B
,
338 [V3D_QPU_A_FMAX
] = D
| A
| B
,
339 [V3D_QPU_A_VFMIN
] = D
| A
| B
,
341 [V3D_QPU_A_AND
] = D
| A
| B
,
342 [V3D_QPU_A_OR
] = D
| A
| B
,
343 [V3D_QPU_A_XOR
] = D
| A
| B
,
345 [V3D_QPU_A_VADD
] = D
| A
| B
,
346 [V3D_QPU_A_VSUB
] = D
| A
| B
,
347 [V3D_QPU_A_NOT
] = D
| A
,
348 [V3D_QPU_A_NEG
] = D
| A
,
349 [V3D_QPU_A_FLAPUSH
] = D
| A
,
350 [V3D_QPU_A_FLBPUSH
] = D
| A
,
351 [V3D_QPU_A_FLBPOP
] = D
| A
,
352 [V3D_QPU_A_SETMSF
] = D
| A
,
353 [V3D_QPU_A_SETREVF
] = D
| A
,
355 [V3D_QPU_A_TIDX
] = D
,
356 [V3D_QPU_A_EIDX
] = D
,
358 [V3D_QPU_A_VFLA
] = D
,
359 [V3D_QPU_A_VFLNA
] = D
,
360 [V3D_QPU_A_VFLB
] = D
,
361 [V3D_QPU_A_VFLNB
] = D
,
363 [V3D_QPU_A_FXCD
] = D
,
365 [V3D_QPU_A_FYCD
] = D
,
369 [V3D_QPU_A_REVF
] = D
,
370 [V3D_QPU_A_VDWWT
] = D
,
372 [V3D_QPU_A_SAMPID
] = D
,
373 [V3D_QPU_A_PATCHID
] = D
,
374 [V3D_QPU_A_TMUWT
] = D
,
375 [V3D_QPU_A_VPMWT
] = D
,
377 [V3D_QPU_A_VPMSETUP
] = D
| A
,
379 [V3D_QPU_A_LDVPMV
] = D
| A
,
380 [V3D_QPU_A_LDVPMD
] = D
| A
,
381 [V3D_QPU_A_LDVPMP
] = D
| A
,
382 [V3D_QPU_A_LDVPMG
] = D
| A
| B
,
384 /* FIXME: MOVABSNEG */
386 [V3D_QPU_A_FCMP
] = D
| A
| B
,
387 [V3D_QPU_A_VFMAX
] = D
| A
| B
,
389 [V3D_QPU_A_FROUND
] = D
| A
,
390 [V3D_QPU_A_FTOIN
] = D
| A
,
391 [V3D_QPU_A_FTRUNC
] = D
| A
,
392 [V3D_QPU_A_FTOIZ
] = D
| A
,
393 [V3D_QPU_A_FFLOOR
] = D
| A
,
394 [V3D_QPU_A_FTOUZ
] = D
| A
,
395 [V3D_QPU_A_FCEIL
] = D
| A
,
396 [V3D_QPU_A_FTOC
] = D
| A
,
398 [V3D_QPU_A_FDX
] = D
| A
,
399 [V3D_QPU_A_FDY
] = D
| A
,
401 [V3D_QPU_A_STVPMV
] = A
| B
,
402 [V3D_QPU_A_STVPMD
] = A
| B
,
403 [V3D_QPU_A_STVPMP
] = A
| B
,
405 [V3D_QPU_A_ITOF
] = D
| A
,
406 [V3D_QPU_A_CLZ
] = D
| A
,
407 [V3D_QPU_A_UTOF
] = D
| A
,
410 static const uint8_t mul_op_args
[] = {
411 [V3D_QPU_M_ADD
] = D
| A
| B
,
412 [V3D_QPU_M_SUB
] = D
| A
| B
,
413 [V3D_QPU_M_UMUL24
] = D
| A
| B
,
414 [V3D_QPU_M_VFMUL
] = D
| A
| B
,
415 [V3D_QPU_M_SMUL24
] = D
| A
| B
,
416 [V3D_QPU_M_MULTOP
] = D
| A
| B
,
417 [V3D_QPU_M_FMOV
] = D
| A
,
419 [V3D_QPU_M_MOV
] = D
| A
,
420 [V3D_QPU_M_FMUL
] = D
| A
| B
,
424 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op
)
426 assert(op
< ARRAY_SIZE(add_op_args
));
428 return add_op_args
[op
] & D
;
432 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op
)
434 assert(op
< ARRAY_SIZE(mul_op_args
));
436 return mul_op_args
[op
] & D
;
440 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op
)
442 assert(op
< ARRAY_SIZE(add_op_args
));
444 uint8_t args
= add_op_args
[op
];
454 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op
)
456 assert(op
< ARRAY_SIZE(mul_op_args
));
458 uint8_t args
= mul_op_args
[op
];
468 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr
)
471 case V3D_QPU_WADDR_RECIP
:
472 case V3D_QPU_WADDR_RSQRT
:
473 case V3D_QPU_WADDR_EXP
:
474 case V3D_QPU_WADDR_LOG
:
475 case V3D_QPU_WADDR_SIN
:
476 case V3D_QPU_WADDR_RSQRT2
:
484 v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr
)
487 case V3D_QPU_WADDR_TMU
:
488 case V3D_QPU_WADDR_TMUL
:
489 case V3D_QPU_WADDR_TMUD
:
490 case V3D_QPU_WADDR_TMUA
:
491 case V3D_QPU_WADDR_TMUAU
:
499 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr
)
501 return (waddr
== V3D_QPU_WADDR_TLB
||
502 waddr
== V3D_QPU_WADDR_TLBU
);
506 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr
)
508 return (waddr
== V3D_QPU_WADDR_VPM
||
509 waddr
== V3D_QPU_WADDR_VPMU
);
513 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr
)
515 return (waddr
== V3D_QPU_WADDR_SYNC
||
516 waddr
== V3D_QPU_WADDR_SYNCU
);
520 v3d_qpu_writes_r3(const struct v3d_device_info
*devinfo
,
521 const struct v3d_qpu_instr
*inst
)
523 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
524 if (inst
->alu
.add
.magic_write
&&
525 inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R3
) {
529 if (inst
->alu
.mul
.magic_write
&&
530 inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R3
) {
535 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
) &&
536 inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R3
) {
540 return inst
->sig
.ldvary
|| inst
->sig
.ldvpm
;
544 v3d_qpu_writes_r4(const struct v3d_device_info
*devinfo
,
545 const struct v3d_qpu_instr
*inst
)
550 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
551 if (inst
->alu
.add
.magic_write
&&
552 (inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R4
||
553 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.add
.waddr
))) {
557 if (inst
->alu
.mul
.magic_write
&&
558 (inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R4
||
559 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.mul
.waddr
))) {
564 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
) &&
565 inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R4
) {
573 v3d_qpu_writes_r5(const struct v3d_device_info
*devinfo
,
574 const struct v3d_qpu_instr
*inst
)
576 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
577 if (inst
->alu
.add
.magic_write
&&
578 inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R5
) {
582 if (inst
->alu
.mul
.magic_write
&&
583 inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R5
) {
588 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
) &&
589 inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R5
) {
593 return inst
->sig
.ldvary
|| inst
->sig
.ldunif
|| inst
->sig
.ldunifa
;
597 v3d_qpu_uses_mux(const struct v3d_qpu_instr
*inst
, enum v3d_qpu_mux mux
)
599 int add_nsrc
= v3d_qpu_add_op_num_src(inst
->alu
.add
.op
);
600 int mul_nsrc
= v3d_qpu_mul_op_num_src(inst
->alu
.mul
.op
);
602 return ((add_nsrc
> 0 && inst
->alu
.add
.a
== mux
) ||
603 (add_nsrc
> 1 && inst
->alu
.add
.b
== mux
) ||
604 (mul_nsrc
> 0 && inst
->alu
.mul
.a
== mux
) ||
605 (mul_nsrc
> 1 && inst
->alu
.mul
.b
== mux
));
609 v3d_qpu_sig_writes_address(const struct v3d_device_info
*devinfo
,
610 const struct v3d_qpu_sig
*sig
)
612 if (devinfo
->ver
< 41)
615 return (sig
->ldunifrf
||