2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/macros.h"
26 #include "broadcom/common/v3d_device_info.h"
27 #include "qpu_instr.h"
30 v3d_qpu_magic_waddr_name(enum v3d_qpu_waddr waddr
)
32 static const char *waddr_magic
[] = {
33 [V3D_QPU_WADDR_R0
] = "r0",
34 [V3D_QPU_WADDR_R1
] = "r1",
35 [V3D_QPU_WADDR_R2
] = "r2",
36 [V3D_QPU_WADDR_R3
] = "r3",
37 [V3D_QPU_WADDR_R4
] = "r4",
38 [V3D_QPU_WADDR_R5
] = "r5",
39 [V3D_QPU_WADDR_NOP
] = "-",
40 [V3D_QPU_WADDR_TLB
] = "tlb",
41 [V3D_QPU_WADDR_TLBU
] = "tlbu",
42 [V3D_QPU_WADDR_TMU
] = "tmu",
43 [V3D_QPU_WADDR_TMUL
] = "tmul",
44 [V3D_QPU_WADDR_TMUD
] = "tmud",
45 [V3D_QPU_WADDR_TMUA
] = "tmua",
46 [V3D_QPU_WADDR_TMUAU
] = "tmuau",
47 [V3D_QPU_WADDR_VPM
] = "vpm",
48 [V3D_QPU_WADDR_VPMU
] = "vpmu",
49 [V3D_QPU_WADDR_SYNC
] = "sync",
50 [V3D_QPU_WADDR_SYNCU
] = "syncu",
51 [V3D_QPU_WADDR_SYNCB
] = "syncb",
52 [V3D_QPU_WADDR_RECIP
] = "recip",
53 [V3D_QPU_WADDR_RSQRT
] = "rsqrt",
54 [V3D_QPU_WADDR_EXP
] = "exp",
55 [V3D_QPU_WADDR_LOG
] = "log",
56 [V3D_QPU_WADDR_SIN
] = "sin",
57 [V3D_QPU_WADDR_RSQRT2
] = "rsqrt2",
58 [V3D_QPU_WADDR_TMUC
] = "tmuc",
59 [V3D_QPU_WADDR_TMUS
] = "tmus",
60 [V3D_QPU_WADDR_TMUT
] = "tmut",
61 [V3D_QPU_WADDR_TMUR
] = "tmur",
62 [V3D_QPU_WADDR_TMUI
] = "tmui",
63 [V3D_QPU_WADDR_TMUB
] = "tmub",
64 [V3D_QPU_WADDR_TMUDREF
] = "tmudref",
65 [V3D_QPU_WADDR_TMUOFF
] = "tmuoff",
66 [V3D_QPU_WADDR_TMUSCM
] = "tmuscm",
67 [V3D_QPU_WADDR_TMUSF
] = "tmusf",
68 [V3D_QPU_WADDR_TMUSLOD
] = "tmuslod",
69 [V3D_QPU_WADDR_TMUHS
] = "tmuhs",
70 [V3D_QPU_WADDR_TMUHSCM
] = "tmuscm",
71 [V3D_QPU_WADDR_TMUHSF
] = "tmuhsf",
72 [V3D_QPU_WADDR_TMUHSLOD
] = "tmuhslod",
73 [V3D_QPU_WADDR_R5REP
] = "r5rep",
76 return waddr_magic
[waddr
];
80 v3d_qpu_add_op_name(enum v3d_qpu_add_op op
)
82 static const char *op_names
[] = {
83 [V3D_QPU_A_FADD
] = "fadd",
84 [V3D_QPU_A_FADDNF
] = "faddnf",
85 [V3D_QPU_A_VFPACK
] = "vfpack",
86 [V3D_QPU_A_ADD
] = "add",
87 [V3D_QPU_A_SUB
] = "sub",
88 [V3D_QPU_A_FSUB
] = "fsub",
89 [V3D_QPU_A_MIN
] = "min",
90 [V3D_QPU_A_MAX
] = "max",
91 [V3D_QPU_A_UMIN
] = "umin",
92 [V3D_QPU_A_UMAX
] = "umax",
93 [V3D_QPU_A_SHL
] = "shl",
94 [V3D_QPU_A_SHR
] = "shr",
95 [V3D_QPU_A_ASR
] = "asr",
96 [V3D_QPU_A_ROR
] = "ror",
97 [V3D_QPU_A_FMIN
] = "fmin",
98 [V3D_QPU_A_FMAX
] = "fmax",
99 [V3D_QPU_A_VFMIN
] = "vfmin",
100 [V3D_QPU_A_AND
] = "and",
101 [V3D_QPU_A_OR
] = "or",
102 [V3D_QPU_A_XOR
] = "xor",
103 [V3D_QPU_A_VADD
] = "vadd",
104 [V3D_QPU_A_VSUB
] = "vsub",
105 [V3D_QPU_A_NOT
] = "not",
106 [V3D_QPU_A_NEG
] = "neg",
107 [V3D_QPU_A_FLAPUSH
] = "flapush",
108 [V3D_QPU_A_FLBPUSH
] = "flbpush",
109 [V3D_QPU_A_FLPOP
] = "flpop",
110 [V3D_QPU_A_RECIP
] = "recip",
111 [V3D_QPU_A_SETMSF
] = "setmsf",
112 [V3D_QPU_A_SETREVF
] = "setrevf",
113 [V3D_QPU_A_NOP
] = "nop",
114 [V3D_QPU_A_TIDX
] = "tidx",
115 [V3D_QPU_A_EIDX
] = "eidx",
116 [V3D_QPU_A_LR
] = "lr",
117 [V3D_QPU_A_VFLA
] = "vfla",
118 [V3D_QPU_A_VFLNA
] = "vflna",
119 [V3D_QPU_A_VFLB
] = "vflb",
120 [V3D_QPU_A_VFLNB
] = "vflnb",
121 [V3D_QPU_A_FXCD
] = "fxcd",
122 [V3D_QPU_A_XCD
] = "xcd",
123 [V3D_QPU_A_FYCD
] = "fycd",
124 [V3D_QPU_A_YCD
] = "ycd",
125 [V3D_QPU_A_MSF
] = "msf",
126 [V3D_QPU_A_REVF
] = "revf",
127 [V3D_QPU_A_VDWWT
] = "vdwwt",
128 [V3D_QPU_A_IID
] = "iid",
129 [V3D_QPU_A_SAMPID
] = "sampid",
130 [V3D_QPU_A_BARRIERID
] = "barrierid",
131 [V3D_QPU_A_TMUWT
] = "tmuwt",
132 [V3D_QPU_A_VPMSETUP
] = "vpmsetup",
133 [V3D_QPU_A_VPMWT
] = "vpmwt",
134 [V3D_QPU_A_LDVPMV_IN
] = "ldvpmv_in",
135 [V3D_QPU_A_LDVPMV_OUT
] = "ldvpmv_out",
136 [V3D_QPU_A_LDVPMD_IN
] = "ldvpmd_in",
137 [V3D_QPU_A_LDVPMD_OUT
] = "ldvpmd_out",
138 [V3D_QPU_A_LDVPMP
] = "ldvpmp",
139 [V3D_QPU_A_RSQRT
] = "rsqrt",
140 [V3D_QPU_A_EXP
] = "exp",
141 [V3D_QPU_A_LOG
] = "log",
142 [V3D_QPU_A_SIN
] = "sin",
143 [V3D_QPU_A_RSQRT2
] = "rsqrt2",
144 [V3D_QPU_A_LDVPMG_IN
] = "ldvpmg_in",
145 [V3D_QPU_A_LDVPMG_OUT
] = "ldvpmg_out",
146 [V3D_QPU_A_FCMP
] = "fcmp",
147 [V3D_QPU_A_VFMAX
] = "vfmax",
148 [V3D_QPU_A_FROUND
] = "fround",
149 [V3D_QPU_A_FTOIN
] = "ftoin",
150 [V3D_QPU_A_FTRUNC
] = "ftrunc",
151 [V3D_QPU_A_FTOIZ
] = "ftoiz",
152 [V3D_QPU_A_FFLOOR
] = "ffloor",
153 [V3D_QPU_A_FTOUZ
] = "ftouz",
154 [V3D_QPU_A_FCEIL
] = "fceil",
155 [V3D_QPU_A_FTOC
] = "ftoc",
156 [V3D_QPU_A_FDX
] = "fdx",
157 [V3D_QPU_A_FDY
] = "fdy",
158 [V3D_QPU_A_STVPMV
] = "stvpmv",
159 [V3D_QPU_A_STVPMD
] = "stvpmd",
160 [V3D_QPU_A_STVPMP
] = "stvpmp",
161 [V3D_QPU_A_ITOF
] = "itof",
162 [V3D_QPU_A_CLZ
] = "clz",
163 [V3D_QPU_A_UTOF
] = "utof",
166 if (op
>= ARRAY_SIZE(op_names
))
173 v3d_qpu_mul_op_name(enum v3d_qpu_mul_op op
)
175 static const char *op_names
[] = {
176 [V3D_QPU_M_ADD
] = "add",
177 [V3D_QPU_M_SUB
] = "sub",
178 [V3D_QPU_M_UMUL24
] = "umul24",
179 [V3D_QPU_M_VFMUL
] = "vfmul",
180 [V3D_QPU_M_SMUL24
] = "smul24",
181 [V3D_QPU_M_MULTOP
] = "multop",
182 [V3D_QPU_M_FMOV
] = "fmov",
183 [V3D_QPU_M_MOV
] = "mov",
184 [V3D_QPU_M_NOP
] = "nop",
185 [V3D_QPU_M_FMUL
] = "fmul",
188 if (op
>= ARRAY_SIZE(op_names
))
195 v3d_qpu_cond_name(enum v3d_qpu_cond cond
)
198 case V3D_QPU_COND_NONE
:
200 case V3D_QPU_COND_IFA
:
202 case V3D_QPU_COND_IFB
:
204 case V3D_QPU_COND_IFNA
:
206 case V3D_QPU_COND_IFNB
:
209 unreachable("bad cond value");
214 v3d_qpu_branch_cond_name(enum v3d_qpu_branch_cond cond
)
217 case V3D_QPU_BRANCH_COND_ALWAYS
:
219 case V3D_QPU_BRANCH_COND_A0
:
221 case V3D_QPU_BRANCH_COND_NA0
:
223 case V3D_QPU_BRANCH_COND_ALLA
:
225 case V3D_QPU_BRANCH_COND_ANYNA
:
227 case V3D_QPU_BRANCH_COND_ANYA
:
229 case V3D_QPU_BRANCH_COND_ALLNA
:
232 unreachable("bad branch cond value");
237 v3d_qpu_msfign_name(enum v3d_qpu_msfign msfign
)
240 case V3D_QPU_MSFIGN_NONE
:
242 case V3D_QPU_MSFIGN_P
:
244 case V3D_QPU_MSFIGN_Q
:
247 unreachable("bad branch cond value");
252 v3d_qpu_pf_name(enum v3d_qpu_pf pf
)
255 case V3D_QPU_PF_NONE
:
257 case V3D_QPU_PF_PUSHZ
:
259 case V3D_QPU_PF_PUSHN
:
261 case V3D_QPU_PF_PUSHC
:
264 unreachable("bad pf value");
269 v3d_qpu_uf_name(enum v3d_qpu_uf uf
)
272 case V3D_QPU_UF_NONE
:
274 case V3D_QPU_UF_ANDZ
:
276 case V3D_QPU_UF_ANDNZ
:
278 case V3D_QPU_UF_NORZ
:
280 case V3D_QPU_UF_NORNZ
:
282 case V3D_QPU_UF_ANDN
:
284 case V3D_QPU_UF_ANDNN
:
286 case V3D_QPU_UF_NORN
:
288 case V3D_QPU_UF_NORNN
:
290 case V3D_QPU_UF_ANDC
:
292 case V3D_QPU_UF_ANDNC
:
294 case V3D_QPU_UF_NORC
:
296 case V3D_QPU_UF_NORNC
:
299 unreachable("bad pf value");
304 v3d_qpu_pack_name(enum v3d_qpu_output_pack pack
)
307 case V3D_QPU_PACK_NONE
:
314 unreachable("bad pack value");
319 v3d_qpu_unpack_name(enum v3d_qpu_input_unpack unpack
)
322 case V3D_QPU_UNPACK_NONE
:
324 case V3D_QPU_UNPACK_L
:
326 case V3D_QPU_UNPACK_H
:
328 case V3D_QPU_UNPACK_ABS
:
330 case V3D_QPU_UNPACK_REPLICATE_32F_16
:
332 case V3D_QPU_UNPACK_REPLICATE_L_16
:
334 case V3D_QPU_UNPACK_REPLICATE_H_16
:
336 case V3D_QPU_UNPACK_SWAP_16
:
339 unreachable("bad unpack value");
346 static const uint8_t add_op_args
[] = {
347 [V3D_QPU_A_FADD
] = D
| A
| B
,
348 [V3D_QPU_A_FADDNF
] = D
| A
| B
,
349 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
350 [V3D_QPU_A_ADD
] = D
| A
| B
,
351 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
352 [V3D_QPU_A_SUB
] = D
| A
| B
,
353 [V3D_QPU_A_VFPACK
] = D
| A
| B
,
354 [V3D_QPU_A_FSUB
] = D
| A
| B
,
355 [V3D_QPU_A_MIN
] = D
| A
| B
,
356 [V3D_QPU_A_MAX
] = D
| A
| B
,
357 [V3D_QPU_A_UMIN
] = D
| A
| B
,
358 [V3D_QPU_A_UMAX
] = D
| A
| B
,
359 [V3D_QPU_A_SHL
] = D
| A
| B
,
360 [V3D_QPU_A_SHR
] = D
| A
| B
,
361 [V3D_QPU_A_ASR
] = D
| A
| B
,
362 [V3D_QPU_A_ROR
] = D
| A
| B
,
363 [V3D_QPU_A_FMIN
] = D
| A
| B
,
364 [V3D_QPU_A_FMAX
] = D
| A
| B
,
365 [V3D_QPU_A_VFMIN
] = D
| A
| B
,
367 [V3D_QPU_A_AND
] = D
| A
| B
,
368 [V3D_QPU_A_OR
] = D
| A
| B
,
369 [V3D_QPU_A_XOR
] = D
| A
| B
,
371 [V3D_QPU_A_VADD
] = D
| A
| B
,
372 [V3D_QPU_A_VSUB
] = D
| A
| B
,
373 [V3D_QPU_A_NOT
] = D
| A
,
374 [V3D_QPU_A_NEG
] = D
| A
,
375 [V3D_QPU_A_FLAPUSH
] = D
| A
,
376 [V3D_QPU_A_FLBPUSH
] = D
| A
,
377 [V3D_QPU_A_FLPOP
] = D
| A
,
378 [V3D_QPU_A_RECIP
] = D
| A
,
379 [V3D_QPU_A_SETMSF
] = D
| A
,
380 [V3D_QPU_A_SETREVF
] = D
| A
,
382 [V3D_QPU_A_TIDX
] = D
,
383 [V3D_QPU_A_EIDX
] = D
,
385 [V3D_QPU_A_VFLA
] = D
,
386 [V3D_QPU_A_VFLNA
] = D
,
387 [V3D_QPU_A_VFLB
] = D
,
388 [V3D_QPU_A_VFLNB
] = D
,
390 [V3D_QPU_A_FXCD
] = D
,
392 [V3D_QPU_A_FYCD
] = D
,
396 [V3D_QPU_A_REVF
] = D
,
397 [V3D_QPU_A_VDWWT
] = D
,
399 [V3D_QPU_A_SAMPID
] = D
,
400 [V3D_QPU_A_BARRIERID
] = D
,
401 [V3D_QPU_A_TMUWT
] = D
,
402 [V3D_QPU_A_VPMWT
] = D
,
404 [V3D_QPU_A_VPMSETUP
] = D
| A
,
406 [V3D_QPU_A_LDVPMV_IN
] = D
| A
,
407 [V3D_QPU_A_LDVPMV_OUT
] = D
| A
,
408 [V3D_QPU_A_LDVPMD_IN
] = D
| A
,
409 [V3D_QPU_A_LDVPMD_OUT
] = D
| A
,
410 [V3D_QPU_A_LDVPMP
] = D
| A
,
411 [V3D_QPU_A_RSQRT
] = D
| A
,
412 [V3D_QPU_A_EXP
] = D
| A
,
413 [V3D_QPU_A_LOG
] = D
| A
,
414 [V3D_QPU_A_SIN
] = D
| A
,
415 [V3D_QPU_A_RSQRT2
] = D
| A
,
416 [V3D_QPU_A_LDVPMG_IN
] = D
| A
| B
,
417 [V3D_QPU_A_LDVPMG_OUT
] = D
| A
| B
,
419 /* FIXME: MOVABSNEG */
421 [V3D_QPU_A_FCMP
] = D
| A
| B
,
422 [V3D_QPU_A_VFMAX
] = D
| A
| B
,
424 [V3D_QPU_A_FROUND
] = D
| A
,
425 [V3D_QPU_A_FTOIN
] = D
| A
,
426 [V3D_QPU_A_FTRUNC
] = D
| A
,
427 [V3D_QPU_A_FTOIZ
] = D
| A
,
428 [V3D_QPU_A_FFLOOR
] = D
| A
,
429 [V3D_QPU_A_FTOUZ
] = D
| A
,
430 [V3D_QPU_A_FCEIL
] = D
| A
,
431 [V3D_QPU_A_FTOC
] = D
| A
,
433 [V3D_QPU_A_FDX
] = D
| A
,
434 [V3D_QPU_A_FDY
] = D
| A
,
436 [V3D_QPU_A_STVPMV
] = A
| B
,
437 [V3D_QPU_A_STVPMD
] = A
| B
,
438 [V3D_QPU_A_STVPMP
] = A
| B
,
440 [V3D_QPU_A_ITOF
] = D
| A
,
441 [V3D_QPU_A_CLZ
] = D
| A
,
442 [V3D_QPU_A_UTOF
] = D
| A
,
445 static const uint8_t mul_op_args
[] = {
446 [V3D_QPU_M_ADD
] = D
| A
| B
,
447 [V3D_QPU_M_SUB
] = D
| A
| B
,
448 [V3D_QPU_M_UMUL24
] = D
| A
| B
,
449 [V3D_QPU_M_VFMUL
] = D
| A
| B
,
450 [V3D_QPU_M_SMUL24
] = D
| A
| B
,
451 [V3D_QPU_M_MULTOP
] = D
| A
| B
,
452 [V3D_QPU_M_FMOV
] = D
| A
,
454 [V3D_QPU_M_MOV
] = D
| A
,
455 [V3D_QPU_M_FMUL
] = D
| A
| B
,
459 v3d_qpu_add_op_has_dst(enum v3d_qpu_add_op op
)
461 assert(op
< ARRAY_SIZE(add_op_args
));
463 return add_op_args
[op
] & D
;
467 v3d_qpu_mul_op_has_dst(enum v3d_qpu_mul_op op
)
469 assert(op
< ARRAY_SIZE(mul_op_args
));
471 return mul_op_args
[op
] & D
;
475 v3d_qpu_add_op_num_src(enum v3d_qpu_add_op op
)
477 assert(op
< ARRAY_SIZE(add_op_args
));
479 uint8_t args
= add_op_args
[op
];
489 v3d_qpu_mul_op_num_src(enum v3d_qpu_mul_op op
)
491 assert(op
< ARRAY_SIZE(mul_op_args
));
493 uint8_t args
= mul_op_args
[op
];
503 v3d_qpu_cond_invert(enum v3d_qpu_cond cond
)
506 case V3D_QPU_COND_IFA
:
507 return V3D_QPU_COND_IFNA
;
508 case V3D_QPU_COND_IFNA
:
509 return V3D_QPU_COND_IFA
;
510 case V3D_QPU_COND_IFB
:
511 return V3D_QPU_COND_IFNB
;
512 case V3D_QPU_COND_IFNB
:
513 return V3D_QPU_COND_IFB
;
515 unreachable("Non-invertible cond");
520 v3d_qpu_magic_waddr_is_sfu(enum v3d_qpu_waddr waddr
)
523 case V3D_QPU_WADDR_RECIP
:
524 case V3D_QPU_WADDR_RSQRT
:
525 case V3D_QPU_WADDR_EXP
:
526 case V3D_QPU_WADDR_LOG
:
527 case V3D_QPU_WADDR_SIN
:
528 case V3D_QPU_WADDR_RSQRT2
:
536 v3d_qpu_magic_waddr_is_tmu(enum v3d_qpu_waddr waddr
)
538 /* XXX: WADDR_TMU changed to UNIFA on 4.x */
539 return ((waddr
>= V3D_QPU_WADDR_TMU
&&
540 waddr
<= V3D_QPU_WADDR_TMUAU
) ||
541 (waddr
>= V3D_QPU_WADDR_TMUC
&&
542 waddr
<= V3D_QPU_WADDR_TMUHSLOD
));
546 v3d_qpu_waits_on_tmu(const struct v3d_qpu_instr
*inst
)
548 return (inst
->sig
.ldtmu
||
549 (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
&&
550 inst
->alu
.add
.op
== V3D_QPU_A_TMUWT
));
554 v3d_qpu_magic_waddr_is_tlb(enum v3d_qpu_waddr waddr
)
556 return (waddr
== V3D_QPU_WADDR_TLB
||
557 waddr
== V3D_QPU_WADDR_TLBU
);
561 v3d_qpu_magic_waddr_is_vpm(enum v3d_qpu_waddr waddr
)
563 return (waddr
== V3D_QPU_WADDR_VPM
||
564 waddr
== V3D_QPU_WADDR_VPMU
);
568 v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr
)
570 return (waddr
== V3D_QPU_WADDR_SYNC
||
571 waddr
== V3D_QPU_WADDR_SYNCB
||
572 waddr
== V3D_QPU_WADDR_SYNCU
);
576 v3d_qpu_magic_waddr_loads_unif(enum v3d_qpu_waddr waddr
)
579 case V3D_QPU_WADDR_VPMU
:
580 case V3D_QPU_WADDR_TLBU
:
581 case V3D_QPU_WADDR_TMUAU
:
582 case V3D_QPU_WADDR_SYNCU
:
590 v3d_qpu_add_op_reads_vpm(enum v3d_qpu_add_op op
)
593 case V3D_QPU_A_VPMSETUP
:
594 case V3D_QPU_A_LDVPMV_IN
:
595 case V3D_QPU_A_LDVPMV_OUT
:
596 case V3D_QPU_A_LDVPMD_IN
:
597 case V3D_QPU_A_LDVPMD_OUT
:
598 case V3D_QPU_A_LDVPMP
:
599 case V3D_QPU_A_LDVPMG_IN
:
600 case V3D_QPU_A_LDVPMG_OUT
:
608 v3d_qpu_add_op_writes_vpm(enum v3d_qpu_add_op op
)
611 case V3D_QPU_A_VPMSETUP
:
612 case V3D_QPU_A_STVPMV
:
613 case V3D_QPU_A_STVPMD
:
614 case V3D_QPU_A_STVPMP
:
622 v3d_qpu_uses_tlb(const struct v3d_qpu_instr
*inst
)
624 if (inst
->sig
.ldtlb
||
628 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
629 if (inst
->alu
.add
.magic_write
&&
630 v3d_qpu_magic_waddr_is_tlb(inst
->alu
.add
.waddr
)) {
634 if (inst
->alu
.mul
.magic_write
&&
635 v3d_qpu_magic_waddr_is_tlb(inst
->alu
.mul
.waddr
)) {
644 v3d_qpu_uses_sfu(const struct v3d_qpu_instr
*inst
)
646 if (v3d_qpu_instr_is_sfu(inst
))
649 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
650 if (inst
->alu
.add
.magic_write
&&
651 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.add
.waddr
)) {
655 if (inst
->alu
.mul
.magic_write
&&
656 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.mul
.waddr
)) {
665 v3d_qpu_instr_is_sfu(const struct v3d_qpu_instr
*inst
)
667 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
668 switch (inst
->alu
.add
.op
) {
669 case V3D_QPU_A_RECIP
:
670 case V3D_QPU_A_RSQRT
:
674 case V3D_QPU_A_RSQRT2
:
684 v3d_qpu_writes_tmu(const struct v3d_qpu_instr
*inst
)
686 return (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
&&
687 ((inst
->alu
.add
.magic_write
&&
688 v3d_qpu_magic_waddr_is_tmu(inst
->alu
.add
.waddr
)) ||
689 (inst
->alu
.mul
.magic_write
&&
690 v3d_qpu_magic_waddr_is_tmu(inst
->alu
.mul
.waddr
))));
694 v3d_qpu_writes_tmu_not_tmuc(const struct v3d_qpu_instr
*inst
)
696 return v3d_qpu_writes_tmu(inst
) &&
697 (!inst
->alu
.add
.magic_write
||
698 inst
->alu
.add
.waddr
!= V3D_QPU_WADDR_TMUC
) &&
699 (!inst
->alu
.mul
.magic_write
||
700 inst
->alu
.mul
.waddr
!= V3D_QPU_WADDR_TMUC
);
704 v3d_qpu_reads_vpm(const struct v3d_qpu_instr
*inst
)
709 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
710 if (v3d_qpu_add_op_reads_vpm(inst
->alu
.add
.op
))
718 v3d_qpu_writes_vpm(const struct v3d_qpu_instr
*inst
)
720 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
721 if (v3d_qpu_add_op_writes_vpm(inst
->alu
.add
.op
))
724 if (inst
->alu
.add
.magic_write
&&
725 v3d_qpu_magic_waddr_is_vpm(inst
->alu
.add
.waddr
)) {
729 if (inst
->alu
.mul
.magic_write
&&
730 v3d_qpu_magic_waddr_is_vpm(inst
->alu
.mul
.waddr
)) {
739 v3d_qpu_waits_vpm(const struct v3d_qpu_instr
*inst
)
741 return inst
->type
== V3D_QPU_INSTR_TYPE_ALU
&&
742 inst
->alu
.add
.op
== V3D_QPU_A_VPMWT
;
746 v3d_qpu_reads_or_writes_vpm(const struct v3d_qpu_instr
*inst
)
748 return v3d_qpu_reads_vpm(inst
) || v3d_qpu_writes_vpm(inst
);
752 v3d_qpu_uses_vpm(const struct v3d_qpu_instr
*inst
)
754 return v3d_qpu_reads_vpm(inst
) ||
755 v3d_qpu_writes_vpm(inst
) ||
756 v3d_qpu_waits_vpm(inst
);
760 v3d_qpu_writes_r3(const struct v3d_device_info
*devinfo
,
761 const struct v3d_qpu_instr
*inst
)
763 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
764 if (inst
->alu
.add
.magic_write
&&
765 inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R3
) {
769 if (inst
->alu
.mul
.magic_write
&&
770 inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R3
) {
775 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
) &&
776 inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R3
) {
780 return inst
->sig
.ldvary
|| inst
->sig
.ldvpm
;
784 v3d_qpu_writes_r4(const struct v3d_device_info
*devinfo
,
785 const struct v3d_qpu_instr
*inst
)
787 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
788 if (inst
->alu
.add
.magic_write
&&
789 (inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R4
||
790 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.add
.waddr
))) {
794 if (inst
->alu
.mul
.magic_write
&&
795 (inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R4
||
796 v3d_qpu_magic_waddr_is_sfu(inst
->alu
.mul
.waddr
))) {
801 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
)) {
802 if (inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R4
)
804 } else if (inst
->sig
.ldtmu
) {
812 v3d_qpu_writes_r5(const struct v3d_device_info
*devinfo
,
813 const struct v3d_qpu_instr
*inst
)
815 if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
816 if (inst
->alu
.add
.magic_write
&&
817 inst
->alu
.add
.waddr
== V3D_QPU_WADDR_R5
) {
821 if (inst
->alu
.mul
.magic_write
&&
822 inst
->alu
.mul
.waddr
== V3D_QPU_WADDR_R5
) {
827 if (v3d_qpu_sig_writes_address(devinfo
, &inst
->sig
) &&
828 inst
->sig_magic
&& inst
->sig_addr
== V3D_QPU_WADDR_R5
) {
832 return inst
->sig
.ldvary
|| inst
->sig
.ldunif
|| inst
->sig
.ldunifa
;
836 v3d_qpu_uses_mux(const struct v3d_qpu_instr
*inst
, enum v3d_qpu_mux mux
)
838 int add_nsrc
= v3d_qpu_add_op_num_src(inst
->alu
.add
.op
);
839 int mul_nsrc
= v3d_qpu_mul_op_num_src(inst
->alu
.mul
.op
);
841 return ((add_nsrc
> 0 && inst
->alu
.add
.a
== mux
) ||
842 (add_nsrc
> 1 && inst
->alu
.add
.b
== mux
) ||
843 (mul_nsrc
> 0 && inst
->alu
.mul
.a
== mux
) ||
844 (mul_nsrc
> 1 && inst
->alu
.mul
.b
== mux
));
848 v3d_qpu_sig_writes_address(const struct v3d_device_info
*devinfo
,
849 const struct v3d_qpu_sig
*sig
)
851 if (devinfo
->ver
< 41)
854 return (sig
->ldunifrf
||
863 v3d_qpu_reads_flags(const struct v3d_qpu_instr
*inst
)
865 if (inst
->type
== V3D_QPU_INSTR_TYPE_BRANCH
) {
866 return inst
->branch
.cond
!= V3D_QPU_BRANCH_COND_ALWAYS
;
867 } else if (inst
->type
== V3D_QPU_INSTR_TYPE_ALU
) {
868 if (inst
->flags
.ac
!= V3D_QPU_COND_NONE
||
869 inst
->flags
.mc
!= V3D_QPU_COND_NONE
||
870 inst
->flags
.auf
!= V3D_QPU_UF_NONE
||
871 inst
->flags
.muf
!= V3D_QPU_UF_NONE
)
874 switch (inst
->alu
.add
.op
) {
876 case V3D_QPU_A_VFLNA
:
878 case V3D_QPU_A_VFLNB
:
879 case V3D_QPU_A_FLAPUSH
:
880 case V3D_QPU_A_FLBPUSH
:
891 v3d_qpu_writes_flags(const struct v3d_qpu_instr
*inst
)
893 if (inst
->flags
.apf
!= V3D_QPU_PF_NONE
||
894 inst
->flags
.mpf
!= V3D_QPU_PF_NONE
||
895 inst
->flags
.auf
!= V3D_QPU_UF_NONE
||
896 inst
->flags
.muf
!= V3D_QPU_UF_NONE
) {
904 v3d_qpu_unpacks_f32(const struct v3d_qpu_instr
*inst
)
906 if (inst
->type
!= V3D_QPU_INSTR_TYPE_ALU
)
909 switch (inst
->alu
.add
.op
) {
911 case V3D_QPU_A_FADDNF
:
916 case V3D_QPU_A_FROUND
:
917 case V3D_QPU_A_FTRUNC
:
918 case V3D_QPU_A_FFLOOR
:
919 case V3D_QPU_A_FCEIL
:
922 case V3D_QPU_A_FTOIN
:
923 case V3D_QPU_A_FTOIZ
:
924 case V3D_QPU_A_FTOUZ
:
926 case V3D_QPU_A_VFPACK
:
933 switch (inst
->alu
.mul
.op
) {
945 v3d_qpu_unpacks_f16(const struct v3d_qpu_instr
*inst
)
947 if (inst
->type
!= V3D_QPU_INSTR_TYPE_ALU
)
950 switch (inst
->alu
.add
.op
) {
951 case V3D_QPU_A_VFMIN
:
952 case V3D_QPU_A_VFMAX
:
959 switch (inst
->alu
.mul
.op
) {
960 case V3D_QPU_M_VFMUL
: