2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
28 static inline struct qpu_reg
31 struct qpu_reg reg
= {
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr
)
41 struct qpu_reg reg
= {
48 static inline struct qpu_reg
51 return qpu_magic(V3D_QPU_WADDR_R0
+ acc
);
57 struct v3d_qpu_instr instr
= {
58 .type
= V3D_QPU_INSTR_TYPE_ALU
,
62 .waddr
= V3D_QPU_WADDR_NOP
,
67 .waddr
= V3D_QPU_WADDR_NOP
,
79 struct qreg undef
= vir_nop_reg();
80 struct qinst
*qinst
= vir_add_inst(V3D_QPU_A_NOP
, undef
, undef
, undef
);
86 new_qpu_nop_before(struct qinst
*inst
)
88 struct qinst
*q
= vir_nop();
90 list_addtail(&q
->link
, &inst
->link
);
96 * Allocates the src register (accumulator or register file) into the RADDR
97 * fields of the instruction.
100 set_src(struct v3d_qpu_instr
*instr
, enum v3d_qpu_mux
*mux
, struct qpu_reg src
)
103 assert(instr
->sig
.small_imm
);
104 *mux
= V3D_QPU_MUX_B
;
109 assert(src
.index
>= V3D_QPU_WADDR_R0
&&
110 src
.index
<= V3D_QPU_WADDR_R5
);
111 *mux
= src
.index
- V3D_QPU_WADDR_R0
+ V3D_QPU_MUX_R0
;
115 if (instr
->alu
.add
.a
!= V3D_QPU_MUX_A
&&
116 instr
->alu
.add
.b
!= V3D_QPU_MUX_A
&&
117 instr
->alu
.mul
.a
!= V3D_QPU_MUX_A
&&
118 instr
->alu
.mul
.b
!= V3D_QPU_MUX_A
) {
119 instr
->raddr_a
= src
.index
;
120 *mux
= V3D_QPU_MUX_A
;
122 if (instr
->raddr_a
== src
.index
) {
123 *mux
= V3D_QPU_MUX_A
;
125 assert(!(instr
->alu
.add
.a
== V3D_QPU_MUX_B
&&
126 instr
->alu
.add
.b
== V3D_QPU_MUX_B
&&
127 instr
->alu
.mul
.a
== V3D_QPU_MUX_B
&&
128 instr
->alu
.mul
.b
== V3D_QPU_MUX_B
) ||
129 src
.index
== instr
->raddr_b
);
131 instr
->raddr_b
= src
.index
;
132 *mux
= V3D_QPU_MUX_B
;
138 is_no_op_mov(struct qinst
*qinst
)
140 static const struct v3d_qpu_sig no_sig
= {0};
142 /* Make sure it's just a lone MOV. */
143 if (qinst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
||
144 qinst
->qpu
.alu
.mul
.op
!= V3D_QPU_M_MOV
||
145 qinst
->qpu
.alu
.add
.op
!= V3D_QPU_A_NOP
||
146 memcmp(&qinst
->qpu
.sig
, &no_sig
, sizeof(no_sig
)) != 0) {
150 /* Check if it's a MOV from a register to itself. */
151 enum v3d_qpu_waddr waddr
= qinst
->qpu
.alu
.mul
.waddr
;
152 if (qinst
->qpu
.alu
.mul
.magic_write
) {
153 if (waddr
< V3D_QPU_WADDR_R0
|| waddr
> V3D_QPU_WADDR_R4
)
156 if (qinst
->qpu
.alu
.mul
.a
!=
157 V3D_QPU_MUX_R0
+ (waddr
- V3D_QPU_WADDR_R0
)) {
163 switch (qinst
->qpu
.alu
.mul
.a
) {
165 raddr
= qinst
->qpu
.raddr_a
;
168 raddr
= qinst
->qpu
.raddr_b
;
177 /* No packing or flags updates, or we need to execute the
180 if (qinst
->qpu
.alu
.mul
.a_unpack
!= V3D_QPU_UNPACK_NONE
||
181 qinst
->qpu
.alu
.mul
.output_pack
!= V3D_QPU_PACK_NONE
||
182 qinst
->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
||
183 qinst
->qpu
.flags
.mpf
!= V3D_QPU_PF_NONE
||
184 qinst
->qpu
.flags
.muf
!= V3D_QPU_UF_NONE
) {
192 v3d_generate_code_block(struct v3d_compile
*c
,
193 struct qblock
*block
,
194 struct qpu_reg
*temp_registers
)
196 int last_vpm_read_index
= -1;
198 vir_for_each_inst_safe(qinst
, block
) {
200 fprintf(stderr
, "translating qinst to qpu: ");
201 vir_dump_inst(c
, qinst
);
202 fprintf(stderr
, "\n");
207 if (vir_has_uniform(qinst
))
210 int nsrc
= vir_get_nsrc(qinst
);
211 struct qpu_reg src
[ARRAY_SIZE(qinst
->src
)];
212 for (int i
= 0; i
< nsrc
; i
++) {
213 int index
= qinst
->src
[i
].index
;
214 switch (qinst
->src
[i
].file
) {
216 src
[i
] = qpu_reg(qinst
->src
[i
].index
);
219 src
[i
] = qpu_magic(qinst
->src
[i
].index
);
226 src
[i
] = temp_registers
[index
];
228 case QFILE_SMALL_IMM
:
233 assert((int)qinst
->src
[i
].index
>=
234 last_vpm_read_index
);
235 (void)last_vpm_read_index
;
236 last_vpm_read_index
= qinst
->src
[i
].index
;
238 temp
= new_qpu_nop_before(qinst
);
239 temp
->qpu
.sig
.ldvpm
= true;
247 switch (qinst
->dst
.file
) {
249 dst
= qpu_magic(V3D_QPU_WADDR_NOP
);
253 dst
= qpu_reg(qinst
->dst
.index
);
257 dst
= qpu_magic(qinst
->dst
.index
);
261 dst
= temp_registers
[qinst
->dst
.index
];
265 dst
= qpu_magic(V3D_QPU_WADDR_VPM
);
268 case QFILE_SMALL_IMM
:
270 assert(!"not reached");
274 if (qinst
->qpu
.type
== V3D_QPU_INSTR_TYPE_ALU
) {
275 if (qinst
->qpu
.sig
.ldunif
) {
276 assert(qinst
->qpu
.alu
.add
.op
== V3D_QPU_A_NOP
);
277 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
280 dst
.index
!= V3D_QPU_WADDR_R5
) {
281 assert(c
->devinfo
->ver
>= 40);
283 qinst
->qpu
.sig
.ldunif
= false;
284 qinst
->qpu
.sig
.ldunifrf
= true;
285 qinst
->qpu
.sig_addr
= dst
.index
;
286 qinst
->qpu
.sig_magic
= dst
.magic
;
288 } else if (v3d_qpu_sig_writes_address(c
->devinfo
,
290 assert(qinst
->qpu
.alu
.add
.op
== V3D_QPU_A_NOP
);
291 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
293 qinst
->qpu
.sig_addr
= dst
.index
;
294 qinst
->qpu
.sig_magic
= dst
.magic
;
295 } else if (qinst
->qpu
.alu
.add
.op
!= V3D_QPU_A_NOP
) {
296 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
299 &qinst
->qpu
.alu
.add
.a
, src
[0]);
303 &qinst
->qpu
.alu
.add
.b
, src
[1]);
306 qinst
->qpu
.alu
.add
.waddr
= dst
.index
;
307 qinst
->qpu
.alu
.add
.magic_write
= dst
.magic
;
311 &qinst
->qpu
.alu
.mul
.a
, src
[0]);
315 &qinst
->qpu
.alu
.mul
.b
, src
[1]);
318 qinst
->qpu
.alu
.mul
.waddr
= dst
.index
;
319 qinst
->qpu
.alu
.mul
.magic_write
= dst
.magic
;
321 if (is_no_op_mov(qinst
)) {
322 vir_remove_instruction(c
, qinst
);
327 assert(qinst
->qpu
.type
== V3D_QPU_INSTR_TYPE_BRANCH
);
333 reads_uniform(const struct v3d_device_info
*devinfo
, uint64_t instruction
)
335 struct v3d_qpu_instr qpu
;
336 MAYBE_UNUSED
bool ok
= v3d_qpu_instr_unpack(devinfo
, instruction
, &qpu
);
339 if (qpu
.sig
.ldunif
||
345 if (qpu
.type
== V3D_QPU_INSTR_TYPE_BRANCH
)
348 if (qpu
.type
== V3D_QPU_INSTR_TYPE_ALU
) {
349 if (qpu
.alu
.add
.magic_write
&&
350 v3d_qpu_magic_waddr_loads_unif(qpu
.alu
.add
.waddr
)) {
354 if (qpu
.alu
.mul
.magic_write
&&
355 v3d_qpu_magic_waddr_loads_unif(qpu
.alu
.mul
.waddr
)) {
364 v3d_dump_qpu(struct v3d_compile
*c
)
366 fprintf(stderr
, "%s prog %d/%d QPU:\n",
367 vir_get_stage_name(c
),
368 c
->program_id
, c
->variant_id
);
370 int next_uniform
= 0;
371 for (int i
= 0; i
< c
->qpu_inst_count
; i
++) {
372 const char *str
= v3d_qpu_disasm(c
->devinfo
, c
->qpu_insts
[i
]);
373 fprintf(stderr
, "0x%016"PRIx64
" %s", c
->qpu_insts
[i
], str
);
375 /* We can only do this on 4.x, because we're not tracking TMU
376 * implicit uniforms here on 3.x.
378 if (c
->devinfo
->ver
>= 40 &&
379 reads_uniform(c
->devinfo
, c
->qpu_insts
[i
])) {
380 fprintf(stderr
, " (");
381 vir_dump_uniform(c
->uniform_contents
[next_uniform
],
382 c
->uniform_data
[next_uniform
]);
383 fprintf(stderr
, ")");
386 fprintf(stderr
, "\n");
387 ralloc_free((void *)str
);
390 /* Make sure our dumping lined up. */
391 if (c
->devinfo
->ver
>= 40)
392 assert(next_uniform
== c
->num_uniforms
);
394 fprintf(stderr
, "\n");
398 v3d_vir_to_qpu(struct v3d_compile
*c
, struct qpu_reg
*temp_registers
)
400 /* Reset the uniform count to how many will be actually loaded by the
401 * generated QPU code.
405 vir_for_each_block(block
, c
)
406 v3d_generate_code_block(c
, block
, temp_registers
);
408 v3d_qpu_schedule_instructions(c
);
410 c
->qpu_insts
= rzalloc_array(c
, uint64_t, c
->qpu_inst_count
);
412 vir_for_each_inst_inorder(inst
, c
) {
413 bool ok
= v3d_qpu_instr_pack(c
->devinfo
, &inst
->qpu
,
416 fprintf(stderr
, "Failed to pack instruction:\n");
417 vir_dump_inst(c
, inst
);
418 fprintf(stderr
, "\n");
423 assert(i
== c
->qpu_inst_count
);
425 if (V3D_DEBUG
& (V3D_DEBUG_QPU
|
426 v3d_debug_flag_for_shader_stage(c
->s
->info
.stage
))) {
432 free(temp_registers
);