2 * Copyright © 2016 Broadcom
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "compiler/v3d_compiler.h"
25 #include "qpu/qpu_instr.h"
26 #include "qpu/qpu_disasm.h"
28 static inline struct qpu_reg
31 struct qpu_reg reg
= {
38 static inline struct qpu_reg
39 qpu_magic(enum v3d_qpu_waddr waddr
)
41 struct qpu_reg reg
= {
48 static inline struct qpu_reg
51 return qpu_magic(V3D_QPU_WADDR_R0
+ acc
);
57 struct v3d_qpu_instr instr
= {
58 .type
= V3D_QPU_INSTR_TYPE_ALU
,
62 .waddr
= V3D_QPU_WADDR_NOP
,
67 .waddr
= V3D_QPU_WADDR_NOP
,
79 struct qreg undef
= vir_nop_reg();
80 struct qinst
*qinst
= vir_add_inst(V3D_QPU_A_NOP
, undef
, undef
, undef
);
86 new_qpu_nop_before(struct qinst
*inst
)
88 struct qinst
*q
= vir_nop();
90 list_addtail(&q
->link
, &inst
->link
);
96 new_ldunif_instr(struct qinst
*inst
, int i
)
98 struct qinst
*ldunif
= new_qpu_nop_before(inst
);
100 ldunif
->qpu
.sig
.ldunif
= true;
101 assert(inst
->src
[i
].file
== QFILE_UNIF
);
102 ldunif
->uniform
= inst
->src
[i
].index
;
106 * Allocates the src register (accumulator or register file) into the RADDR
107 * fields of the instruction.
110 set_src(struct v3d_qpu_instr
*instr
, enum v3d_qpu_mux
*mux
, struct qpu_reg src
)
113 assert(instr
->sig
.small_imm
);
114 *mux
= V3D_QPU_MUX_B
;
119 assert(src
.index
>= V3D_QPU_WADDR_R0
&&
120 src
.index
<= V3D_QPU_WADDR_R5
);
121 *mux
= src
.index
- V3D_QPU_WADDR_R0
+ V3D_QPU_MUX_R0
;
125 if (instr
->alu
.add
.a
!= V3D_QPU_MUX_A
&&
126 instr
->alu
.add
.b
!= V3D_QPU_MUX_A
&&
127 instr
->alu
.mul
.a
!= V3D_QPU_MUX_A
&&
128 instr
->alu
.mul
.b
!= V3D_QPU_MUX_A
) {
129 instr
->raddr_a
= src
.index
;
130 *mux
= V3D_QPU_MUX_A
;
132 if (instr
->raddr_a
== src
.index
) {
133 *mux
= V3D_QPU_MUX_A
;
135 assert(!(instr
->alu
.add
.a
== V3D_QPU_MUX_B
&&
136 instr
->alu
.add
.b
== V3D_QPU_MUX_B
&&
137 instr
->alu
.mul
.a
== V3D_QPU_MUX_B
&&
138 instr
->alu
.mul
.b
== V3D_QPU_MUX_B
) ||
139 src
.index
== instr
->raddr_b
);
141 instr
->raddr_b
= src
.index
;
142 *mux
= V3D_QPU_MUX_B
;
148 is_no_op_mov(struct qinst
*qinst
)
150 static const struct v3d_qpu_sig no_sig
= {0};
152 /* Make sure it's just a lone MOV. */
153 if (qinst
->qpu
.type
!= V3D_QPU_INSTR_TYPE_ALU
||
154 qinst
->qpu
.alu
.mul
.op
!= V3D_QPU_M_MOV
||
155 qinst
->qpu
.alu
.add
.op
!= V3D_QPU_A_NOP
||
156 memcmp(&qinst
->qpu
.sig
, &no_sig
, sizeof(no_sig
)) != 0) {
160 /* Check if it's a MOV from a register to itself. */
161 enum v3d_qpu_waddr waddr
= qinst
->qpu
.alu
.mul
.waddr
;
162 if (qinst
->qpu
.alu
.mul
.magic_write
) {
163 if (waddr
< V3D_QPU_WADDR_R0
|| waddr
> V3D_QPU_WADDR_R4
)
166 if (qinst
->qpu
.alu
.mul
.a
!=
167 V3D_QPU_MUX_R0
+ (waddr
- V3D_QPU_WADDR_R0
)) {
173 switch (qinst
->qpu
.alu
.mul
.a
) {
175 raddr
= qinst
->qpu
.raddr_a
;
178 raddr
= qinst
->qpu
.raddr_b
;
187 /* No packing or flags updates, or we need to execute the
190 if (qinst
->qpu
.alu
.mul
.a_unpack
!= V3D_QPU_UNPACK_NONE
||
191 qinst
->qpu
.alu
.mul
.output_pack
!= V3D_QPU_PACK_NONE
||
192 qinst
->qpu
.flags
.mc
!= V3D_QPU_COND_NONE
||
193 qinst
->qpu
.flags
.mpf
!= V3D_QPU_PF_NONE
||
194 qinst
->qpu
.flags
.muf
!= V3D_QPU_UF_NONE
) {
202 v3d_generate_code_block(struct v3d_compile
*c
,
203 struct qblock
*block
,
204 struct qpu_reg
*temp_registers
)
206 int last_vpm_read_index
= -1;
208 vir_for_each_inst_safe(qinst
, block
) {
210 fprintf(stderr
, "translating qinst to qpu: ");
211 vir_dump_inst(c
, qinst
);
212 fprintf(stderr
, "\n");
217 if (vir_has_uniform(qinst
))
220 int nsrc
= vir_get_nsrc(qinst
);
221 struct qpu_reg src
[ARRAY_SIZE(qinst
->src
)];
222 bool emitted_ldunif
= false;
223 for (int i
= 0; i
< nsrc
; i
++) {
224 int index
= qinst
->src
[i
].index
;
225 switch (qinst
->src
[i
].file
) {
227 src
[i
] = qpu_reg(qinst
->src
[i
].index
);
230 src
[i
] = qpu_magic(qinst
->src
[i
].index
);
237 src
[i
] = temp_registers
[index
];
240 /* XXX perf: If the last ldunif we emitted was
241 * the same uniform value, skip it. Common
242 * for multop/umul24 sequences.
244 if (!emitted_ldunif
) {
245 new_ldunif_instr(qinst
, i
);
247 emitted_ldunif
= true;
252 case QFILE_SMALL_IMM
:
257 assert((int)qinst
->src
[i
].index
>=
258 last_vpm_read_index
);
259 (void)last_vpm_read_index
;
260 last_vpm_read_index
= qinst
->src
[i
].index
;
262 temp
= new_qpu_nop_before(qinst
);
263 temp
->qpu
.sig
.ldvpm
= true;
270 unreachable("bad vir src file");
275 switch (qinst
->dst
.file
) {
277 dst
= qpu_magic(V3D_QPU_WADDR_NOP
);
281 dst
= qpu_reg(qinst
->dst
.index
);
285 dst
= qpu_magic(qinst
->dst
.index
);
289 dst
= temp_registers
[qinst
->dst
.index
];
293 dst
= qpu_magic(V3D_QPU_WADDR_VPM
);
297 dst
= qpu_magic(V3D_QPU_WADDR_TLB
);
301 dst
= qpu_magic(V3D_QPU_WADDR_TLBU
);
305 case QFILE_SMALL_IMM
:
307 assert(!"not reached");
311 if (qinst
->qpu
.type
== V3D_QPU_INSTR_TYPE_ALU
) {
312 if (qinst
->qpu
.sig
.ldunif
) {
313 assert(qinst
->qpu
.alu
.add
.op
== V3D_QPU_A_NOP
);
314 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
317 dst
.index
!= V3D_QPU_WADDR_R5
) {
318 assert(c
->devinfo
->ver
>= 40);
320 qinst
->qpu
.sig
.ldunif
= false;
321 qinst
->qpu
.sig
.ldunifrf
= true;
322 qinst
->qpu
.sig_addr
= dst
.index
;
323 qinst
->qpu
.sig_magic
= dst
.magic
;
325 } else if (v3d_qpu_sig_writes_address(c
->devinfo
,
327 assert(qinst
->qpu
.alu
.add
.op
== V3D_QPU_A_NOP
);
328 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
330 qinst
->qpu
.sig_addr
= dst
.index
;
331 qinst
->qpu
.sig_magic
= dst
.magic
;
332 } else if (qinst
->qpu
.alu
.add
.op
!= V3D_QPU_A_NOP
) {
333 assert(qinst
->qpu
.alu
.mul
.op
== V3D_QPU_M_NOP
);
336 &qinst
->qpu
.alu
.add
.a
, src
[0]);
340 &qinst
->qpu
.alu
.add
.b
, src
[1]);
343 qinst
->qpu
.alu
.add
.waddr
= dst
.index
;
344 qinst
->qpu
.alu
.add
.magic_write
= dst
.magic
;
348 &qinst
->qpu
.alu
.mul
.a
, src
[0]);
352 &qinst
->qpu
.alu
.mul
.b
, src
[1]);
355 qinst
->qpu
.alu
.mul
.waddr
= dst
.index
;
356 qinst
->qpu
.alu
.mul
.magic_write
= dst
.magic
;
358 if (is_no_op_mov(qinst
)) {
359 vir_remove_instruction(c
, qinst
);
364 assert(qinst
->qpu
.type
== V3D_QPU_INSTR_TYPE_BRANCH
);
370 reads_uniform(const struct v3d_device_info
*devinfo
, uint64_t instruction
)
372 struct v3d_qpu_instr qpu
;
373 MAYBE_UNUSED
bool ok
= v3d_qpu_instr_unpack(devinfo
, instruction
, &qpu
);
376 if (qpu
.sig
.ldunif
||
382 if (qpu
.type
== V3D_QPU_INSTR_TYPE_BRANCH
)
385 if (qpu
.type
== V3D_QPU_INSTR_TYPE_ALU
) {
386 if (qpu
.alu
.add
.magic_write
&&
387 v3d_qpu_magic_waddr_loads_unif(qpu
.alu
.add
.waddr
)) {
391 if (qpu
.alu
.mul
.magic_write
&&
392 v3d_qpu_magic_waddr_loads_unif(qpu
.alu
.mul
.waddr
)) {
401 v3d_dump_qpu(struct v3d_compile
*c
)
403 fprintf(stderr
, "%s prog %d/%d QPU:\n",
404 vir_get_stage_name(c
),
405 c
->program_id
, c
->variant_id
);
407 int next_uniform
= 0;
408 for (int i
= 0; i
< c
->qpu_inst_count
; i
++) {
409 const char *str
= v3d_qpu_disasm(c
->devinfo
, c
->qpu_insts
[i
]);
410 fprintf(stderr
, "0x%016"PRIx64
" %s", c
->qpu_insts
[i
], str
);
412 /* We can only do this on 4.x, because we're not tracking TMU
413 * implicit uniforms here on 3.x.
415 if (c
->devinfo
->ver
>= 40 &&
416 reads_uniform(c
->devinfo
, c
->qpu_insts
[i
])) {
417 fprintf(stderr
, " (");
418 vir_dump_uniform(c
->uniform_contents
[next_uniform
],
419 c
->uniform_data
[next_uniform
]);
420 fprintf(stderr
, ")");
423 fprintf(stderr
, "\n");
424 ralloc_free((void *)str
);
427 /* Make sure our dumping lined up. */
428 if (c
->devinfo
->ver
>= 40)
429 assert(next_uniform
== c
->num_uniforms
);
431 fprintf(stderr
, "\n");
435 v3d_vir_to_qpu(struct v3d_compile
*c
, struct qpu_reg
*temp_registers
)
437 /* Reset the uniform count to how many will be actually loaded by the
438 * generated QPU code.
442 vir_for_each_block(block
, c
)
443 v3d_generate_code_block(c
, block
, temp_registers
);
445 uint32_t cycles
= v3d_qpu_schedule_instructions(c
);
447 c
->qpu_insts
= rzalloc_array(c
, uint64_t, c
->qpu_inst_count
);
449 vir_for_each_inst_inorder(inst
, c
) {
450 bool ok
= v3d_qpu_instr_pack(c
->devinfo
, &inst
->qpu
,
453 fprintf(stderr
, "Failed to pack instruction:\n");
454 vir_dump_inst(c
, inst
);
455 fprintf(stderr
, "\n");
460 assert(i
== c
->qpu_inst_count
);
462 if (V3D_DEBUG
& V3D_DEBUG_SHADERDB
) {
463 fprintf(stderr
, "SHADER-DB: %s prog %d/%d: %d instructions\n",
464 vir_get_stage_name(c
),
465 c
->program_id
, c
->variant_id
,
469 /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
470 * don't report them for now.
473 fprintf(stderr
, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
474 vir_get_stage_name(c
),
475 c
->program_id
, c
->variant_id
,
479 if (V3D_DEBUG
& (V3D_DEBUG_QPU
|
480 v3d_debug_flag_for_shader_stage(c
->s
->info
.stage
))) {
486 free(temp_registers
);