2 * Copyright © 2014 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "util/ralloc.h"
30 set_src_raddr(uint64_t inst
, struct qpu_reg src
)
32 if (src
.mux
== QPU_MUX_A
) {
33 assert(QPU_GET_FIELD(inst
, QPU_RADDR_A
) == QPU_R_NOP
||
34 QPU_GET_FIELD(inst
, QPU_RADDR_A
) == src
.addr
);
35 return ((inst
& ~QPU_RADDR_A_MASK
) |
36 QPU_SET_FIELD(src
.addr
, QPU_RADDR_A
));
39 if (src
.mux
== QPU_MUX_B
) {
40 assert(QPU_GET_FIELD(inst
, QPU_RADDR_B
) == QPU_R_NOP
||
41 QPU_GET_FIELD(inst
, QPU_RADDR_B
) == src
.addr
);
42 return ((inst
& ~QPU_RADDR_B_MASK
) |
43 QPU_SET_FIELD(src
.addr
, QPU_RADDR_B
));
54 inst
|= QPU_SET_FIELD(QPU_A_NOP
, QPU_OP_ADD
);
55 inst
|= QPU_SET_FIELD(QPU_M_NOP
, QPU_OP_MUL
);
57 /* Note: These field values are actually non-zero */
58 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_ADD
);
59 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_MUL
);
60 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
61 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
);
62 inst
|= QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
);
68 qpu_a_dst(struct qpu_reg dst
)
72 if (dst
.mux
<= QPU_MUX_R5
) {
73 /* Translate the mux to the ACCn values. */
74 inst
|= QPU_SET_FIELD(32 + dst
.mux
, QPU_WADDR_ADD
);
76 inst
|= QPU_SET_FIELD(dst
.addr
, QPU_WADDR_ADD
);
77 if (dst
.mux
== QPU_MUX_B
)
85 qpu_m_dst(struct qpu_reg dst
)
89 if (dst
.mux
<= QPU_MUX_R5
) {
90 /* Translate the mux to the ACCn values. */
91 inst
|= QPU_SET_FIELD(32 + dst
.mux
, QPU_WADDR_MUL
);
93 inst
|= QPU_SET_FIELD(dst
.addr
, QPU_WADDR_MUL
);
94 if (dst
.mux
== QPU_MUX_A
)
102 qpu_a_MOV(struct qpu_reg dst
, struct qpu_reg src
)
106 inst
|= QPU_SET_FIELD(QPU_A_OR
, QPU_OP_ADD
);
107 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
108 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
);
109 inst
|= qpu_a_dst(dst
);
110 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_ADD
);
111 inst
|= QPU_SET_FIELD(src
.mux
, QPU_ADD_A
);
112 inst
|= QPU_SET_FIELD(src
.mux
, QPU_ADD_B
);
113 inst
= set_src_raddr(inst
, src
);
114 inst
|= QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
);
115 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_MUL
);
121 qpu_m_MOV(struct qpu_reg dst
, struct qpu_reg src
)
125 inst
|= QPU_SET_FIELD(QPU_M_V8MIN
, QPU_OP_MUL
);
126 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
127 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
);
128 inst
|= qpu_m_dst(dst
);
129 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_MUL
);
130 inst
|= QPU_SET_FIELD(src
.mux
, QPU_MUL_A
);
131 inst
|= QPU_SET_FIELD(src
.mux
, QPU_MUL_B
);
132 inst
= set_src_raddr(inst
, src
);
133 inst
|= QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
);
134 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_ADD
);
140 qpu_load_imm_ui(struct qpu_reg dst
, uint32_t val
)
144 inst
|= qpu_a_dst(dst
);
145 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_MUL
);
146 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_ADD
);
147 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_MUL
);
148 inst
|= QPU_SET_FIELD(QPU_SIG_LOAD_IMM
, QPU_SIG
);
155 qpu_a_alu2(enum qpu_op_add op
,
156 struct qpu_reg dst
, struct qpu_reg src0
, struct qpu_reg src1
)
160 inst
|= QPU_SET_FIELD(op
, QPU_OP_ADD
);
161 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
162 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
);
163 inst
|= qpu_a_dst(dst
);
164 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_ADD
);
165 inst
|= QPU_SET_FIELD(src0
.mux
, QPU_ADD_A
);
166 inst
= set_src_raddr(inst
, src0
);
167 inst
|= QPU_SET_FIELD(src1
.mux
, QPU_ADD_B
);
168 inst
= set_src_raddr(inst
, src1
);
169 inst
|= QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
);
170 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_MUL
);
176 qpu_m_alu2(enum qpu_op_mul op
,
177 struct qpu_reg dst
, struct qpu_reg src0
, struct qpu_reg src1
)
181 inst
|= QPU_SET_FIELD(op
, QPU_OP_MUL
);
182 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
183 inst
|= QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
);
184 inst
|= qpu_m_dst(dst
);
185 inst
|= QPU_SET_FIELD(QPU_COND_ALWAYS
, QPU_COND_MUL
);
186 inst
|= QPU_SET_FIELD(src0
.mux
, QPU_MUL_A
);
187 inst
= set_src_raddr(inst
, src0
);
188 inst
|= QPU_SET_FIELD(src1
.mux
, QPU_MUL_B
);
189 inst
= set_src_raddr(inst
, src1
);
190 inst
|= QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
);
191 inst
|= QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_ADD
);
197 merge_fields(uint64_t *merge
,
198 uint64_t a
, uint64_t b
,
199 uint64_t mask
, uint64_t ignore
)
201 if ((a
& mask
) == ignore
) {
202 *merge
= (*merge
& ~mask
) | (b
& mask
);
203 } else if ((b
& mask
) == ignore
) {
204 *merge
= (*merge
& ~mask
) | (a
& mask
);
206 if ((a
& mask
) != (b
& mask
))
214 qpu_num_sf_accesses(uint64_t inst
)
217 static const uint32_t specials
[] = {
234 uint32_t waddr_add
= QPU_GET_FIELD(inst
, QPU_WADDR_ADD
);
235 uint32_t waddr_mul
= QPU_GET_FIELD(inst
, QPU_WADDR_MUL
);
236 uint32_t raddr_a
= QPU_GET_FIELD(inst
, QPU_RADDR_A
);
237 uint32_t raddr_b
= QPU_GET_FIELD(inst
, QPU_RADDR_B
);
239 for (int j
= 0; j
< ARRAY_SIZE(specials
); j
++) {
240 if (waddr_add
== specials
[j
])
242 if (waddr_mul
== specials
[j
])
246 if (raddr_a
== QPU_R_MUTEX_ACQUIRE
)
248 if (raddr_b
== QPU_R_MUTEX_ACQUIRE
)
251 /* XXX: semaphore, combined color read/write? */
252 switch (QPU_GET_FIELD(inst
, QPU_SIG
)) {
253 case QPU_SIG_COLOR_LOAD
:
254 case QPU_SIG_COLOR_LOAD_END
:
255 case QPU_SIG_LOAD_TMU0
:
256 case QPU_SIG_LOAD_TMU1
:
264 qpu_waddr_ignores_pm(uint32_t waddr
)
272 case QPU_W_TLB_COLOR_MS
:
273 case QPU_W_TLB_COLOR_ALL
:
274 case QPU_W_TLB_ALPHA_MASK
:
276 case QPU_W_SFU_RECIP
:
277 case QPU_W_SFU_RECIPSQRT
:
295 swap_ra_file_mux_helper(uint64_t *merge
, uint64_t *a
, uint32_t mux_shift
)
297 uint64_t mux_mask
= (uint64_t)0x7 << mux_shift
;
298 uint64_t mux_a_val
= (uint64_t)QPU_MUX_A
<< mux_shift
;
299 uint64_t mux_b_val
= (uint64_t)QPU_MUX_B
<< mux_shift
;
301 if ((*a
& mux_mask
) == mux_a_val
) {
302 *a
= (*a
& ~mux_mask
) | mux_b_val
;
303 *merge
= (*merge
& ~mux_mask
) | mux_b_val
;
308 try_swap_ra_file(uint64_t *merge
, uint64_t *a
, uint64_t *b
)
310 uint32_t raddr_a_a
= QPU_GET_FIELD(*a
, QPU_RADDR_A
);
311 uint32_t raddr_a_b
= QPU_GET_FIELD(*a
, QPU_RADDR_B
);
312 uint32_t raddr_b_a
= QPU_GET_FIELD(*b
, QPU_RADDR_A
);
313 uint32_t raddr_b_b
= QPU_GET_FIELD(*b
, QPU_RADDR_B
);
315 if (raddr_a_b
!= QPU_R_NOP
)
326 if (raddr_b_b
!= QPU_R_NOP
&&
327 raddr_b_b
!= raddr_a_a
)
330 /* Move raddr A to B in instruction a. */
331 *a
= (*a
& ~QPU_RADDR_A_MASK
) | QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
);
332 *a
= (*a
& ~QPU_RADDR_B_MASK
) | QPU_SET_FIELD(raddr_a_a
, QPU_RADDR_B
);
333 *merge
= ((*merge
& ~QPU_RADDR_A_MASK
) | QPU_SET_FIELD(raddr_b_a
, QPU_RADDR_A
));
334 *merge
= ((*merge
& ~QPU_RADDR_B_MASK
) | QPU_SET_FIELD(raddr_a_a
, QPU_RADDR_B
));
335 swap_ra_file_mux_helper(merge
, a
, QPU_ADD_A_SHIFT
);
336 swap_ra_file_mux_helper(merge
, a
, QPU_ADD_B_SHIFT
);
337 swap_ra_file_mux_helper(merge
, a
, QPU_MUL_A_SHIFT
);
338 swap_ra_file_mux_helper(merge
, a
, QPU_MUL_B_SHIFT
);
344 qpu_merge_inst(uint64_t a
, uint64_t b
)
346 uint64_t merge
= a
| b
;
349 if (QPU_GET_FIELD(a
, QPU_OP_ADD
) != QPU_A_NOP
&&
350 QPU_GET_FIELD(b
, QPU_OP_ADD
) != QPU_A_NOP
)
353 if (QPU_GET_FIELD(a
, QPU_OP_MUL
) != QPU_M_NOP
&&
354 QPU_GET_FIELD(b
, QPU_OP_MUL
) != QPU_M_NOP
)
357 if (qpu_num_sf_accesses(a
) && qpu_num_sf_accesses(b
))
360 if (QPU_GET_FIELD(a
, QPU_SIG
) == QPU_SIG_LOAD_IMM
||
361 QPU_GET_FIELD(b
, QPU_SIG
) == QPU_SIG_LOAD_IMM
) {
365 ok
= ok
&& merge_fields(&merge
, a
, b
, QPU_SIG_MASK
,
366 QPU_SET_FIELD(QPU_SIG_NONE
, QPU_SIG
));
368 /* Misc fields that have to match exactly. */
369 ok
= ok
&& merge_fields(&merge
, a
, b
, QPU_SF
| QPU_PM
,
372 if (!merge_fields(&merge
, a
, b
, QPU_RADDR_A_MASK
,
373 QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_A
))) {
374 /* Since we tend to use regfile A by default both for register
375 * allocation and for our special values (uniforms and
376 * varyings), try swapping uniforms and varyings to regfile B
377 * to resolve raddr A conflicts.
379 if (!try_swap_ra_file(&merge
, &a
, &b
) &&
380 !try_swap_ra_file(&merge
, &b
, &a
)) {
385 ok
= ok
&& merge_fields(&merge
, a
, b
, QPU_RADDR_B_MASK
,
386 QPU_SET_FIELD(QPU_R_NOP
, QPU_RADDR_B
));
388 ok
= ok
&& merge_fields(&merge
, a
, b
, QPU_WADDR_ADD_MASK
,
389 QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_ADD
));
390 ok
= ok
&& merge_fields(&merge
, a
, b
, QPU_WADDR_MUL_MASK
,
391 QPU_SET_FIELD(QPU_W_NOP
, QPU_WADDR_MUL
));
393 /* Allow disagreement on WS (swapping A vs B physical reg file as the
394 * destination for ADD/MUL) if one of the original instructions
395 * ignores it (probably because it's just writing to accumulators).
397 if (qpu_waddr_ignores_pm(QPU_GET_FIELD(a
, QPU_WADDR_ADD
)) &&
398 qpu_waddr_ignores_pm(QPU_GET_FIELD(a
, QPU_WADDR_MUL
))) {
399 merge
= (merge
& ~QPU_WS
) | (b
& QPU_WS
);
400 } else if (qpu_waddr_ignores_pm(QPU_GET_FIELD(b
, QPU_WADDR_ADD
)) &&
401 qpu_waddr_ignores_pm(QPU_GET_FIELD(b
, QPU_WADDR_MUL
))) {
402 merge
= (merge
& ~QPU_WS
) | (a
& QPU_WS
);
404 if ((a
& QPU_WS
) != (b
& QPU_WS
))
415 qpu_set_sig(uint64_t inst
, uint32_t sig
)
417 assert(QPU_GET_FIELD(inst
, QPU_SIG
) == QPU_SIG_NONE
);
418 return (inst
& ~QPU_SIG_MASK
) | QPU_SET_FIELD(sig
, QPU_SIG
);
422 qpu_set_cond_add(uint64_t inst
, uint32_t sig
)
424 assert(QPU_GET_FIELD(inst
, QPU_COND_ADD
) == QPU_COND_ALWAYS
);
425 return (inst
& ~QPU_COND_ADD_MASK
) | QPU_SET_FIELD(sig
, QPU_COND_ADD
);
429 qpu_set_cond_mul(uint64_t inst
, uint32_t sig
)
431 assert(QPU_GET_FIELD(inst
, QPU_COND_MUL
) == QPU_COND_ALWAYS
);
432 return (inst
& ~QPU_COND_MUL_MASK
) | QPU_SET_FIELD(sig
, QPU_COND_MUL
);
436 qpu_waddr_is_tlb(uint32_t waddr
)
439 case QPU_W_TLB_COLOR_ALL
:
440 case QPU_W_TLB_COLOR_MS
:
449 qpu_inst_is_tlb(uint64_t inst
)
451 uint32_t sig
= QPU_GET_FIELD(inst
, QPU_SIG
);
453 return (qpu_waddr_is_tlb(QPU_GET_FIELD(inst
, QPU_WADDR_ADD
)) ||
454 qpu_waddr_is_tlb(QPU_GET_FIELD(inst
, QPU_WADDR_MUL
)) ||
455 sig
== QPU_SIG_COLOR_LOAD
||
456 sig
== QPU_SIG_WAIT_FOR_SCOREBOARD
);
460 qpu_serialize_one_inst(struct vc4_compile
*c
, uint64_t inst
)
462 if (c
->qpu_inst_count
>= c
->qpu_inst_size
) {
463 c
->qpu_inst_size
= MAX2(16, c
->qpu_inst_size
* 2);
464 c
->qpu_insts
= reralloc(c
, c
->qpu_insts
,
465 uint64_t, c
->qpu_inst_size
);
467 c
->qpu_insts
[c
->qpu_inst_count
++] = inst
;