return q;
}
-static void
-new_ldunif_instr(struct qinst *inst, int i)
-{
- struct qinst *ldunif = new_qpu_nop_before(inst);
-
- ldunif->qpu.sig.ldunif = true;
- assert(inst->src[i].file == QFILE_UNIF);
- ldunif->uniform = inst->src[i].index;
-}
-
/**
* Allocates the src register (accumulator or register file) into the RADDR
* fields of the instruction.
struct qinst *temp;
- if (vir_has_implicit_uniform(qinst)) {
- int src = vir_get_implicit_uniform_src(qinst);
- assert(qinst->src[src].file == QFILE_UNIF);
- qinst->uniform = qinst->src[src].index;
+ if (vir_has_uniform(qinst))
c->num_uniforms++;
- }
- int nsrc = vir_get_non_sideband_nsrc(qinst);
+ int nsrc = vir_get_nsrc(qinst);
struct qpu_reg src[ARRAY_SIZE(qinst->src)];
- bool emitted_ldunif = false;
for (int i = 0; i < nsrc; i++) {
int index = qinst->src[i].index;
switch (qinst->src[i].file) {
case QFILE_TEMP:
src[i] = temp_registers[index];
break;
- case QFILE_UNIF:
- /* XXX perf: If the last ldunif we emitted was
- * the same uniform value, skip it. Common
- * for multop/umul24 sequences.
- */
- if (!emitted_ldunif) {
- new_ldunif_instr(qinst, i);
- c->num_uniforms++;
- emitted_ldunif = true;
- }
-
- src[i] = qpu_acc(5);
- break;
case QFILE_SMALL_IMM:
src[i].smimm = true;
break;
src[i] = qpu_acc(3);
break;
-
- case QFILE_TLB:
- case QFILE_TLBU:
- unreachable("bad vir src file");
}
}
dst = qpu_magic(V3D_QPU_WADDR_VPM);
break;
- case QFILE_TLB:
- dst = qpu_magic(V3D_QPU_WADDR_TLB);
- break;
-
- case QFILE_TLBU:
- dst = qpu_magic(V3D_QPU_WADDR_TLBU);
- break;
-
- case QFILE_UNIF:
case QFILE_SMALL_IMM:
case QFILE_LOAD_IMM:
assert(!"not reached");
}
if (qinst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
- if (v3d_qpu_sig_writes_address(c->devinfo,
+ if (qinst->qpu.sig.ldunif) {
+ assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
+ assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
+
+ if (!dst.magic ||
+ dst.index != V3D_QPU_WADDR_R5) {
+ assert(c->devinfo->ver >= 40);
+
+ qinst->qpu.sig.ldunif = false;
+ qinst->qpu.sig.ldunifrf = true;
+ qinst->qpu.sig_addr = dst.index;
+ qinst->qpu.sig_magic = dst.magic;
+ }
+ } else if (v3d_qpu_sig_writes_address(c->devinfo,
&qinst->qpu.sig)) {
assert(qinst->qpu.alu.add.op == V3D_QPU_A_NOP);
assert(qinst->qpu.alu.mul.op == V3D_QPU_M_NOP);
assert(ok);
if (qpu.sig.ldunif ||
- qpu.sig.ldunifarf ||
+ qpu.sig.ldunifrf ||
qpu.sig.wrtmuc) {
return true;
}
vir_for_each_block(block, c)
v3d_generate_code_block(c, block, temp_registers);
- uint32_t cycles = v3d_qpu_schedule_instructions(c);
+ v3d_qpu_schedule_instructions(c);
c->qpu_insts = rzalloc_array(c, uint64_t, c->qpu_inst_count);
int i = 0;
}
assert(i == c->qpu_inst_count);
- if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
- fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n",
- vir_get_stage_name(c),
- c->program_id, c->variant_id,
- c->qpu_inst_count);
- }
-
- /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
- * don't report them for now.
- */
- if (false) {
- fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
- vir_get_stage_name(c),
- c->program_id, c->variant_id,
- cycles);
- }
-
if (V3D_DEBUG & (V3D_DEBUG_QPU |
v3d_debug_flag_for_shader_stage(c->s->info.stage))) {
v3d_dump_qpu(c);