v3d: Switch implicit uniforms over to being any qinst->uniform != ~0.
[mesa.git] / src / broadcom / compiler / vir_to_qpu.c
index 83b1936cbd9d76b5bc91b6bb21e55e6b33776c7a..c82c1aae6e6033462e7aded281b8e38e7b1fedb0 100644 (file)
@@ -76,7 +76,7 @@ v3d_qpu_nop(void)
 static struct qinst *
 vir_nop(void)
 {
-        struct qreg undef = { QFILE_NULL, 0 };
+        struct qreg undef = vir_nop_reg();
         struct qinst *qinst = vir_add_inst(V3D_QPU_A_NOP, undef, undef, undef);
 
         return qinst;
@@ -109,6 +109,12 @@ new_ldunif_instr(struct qinst *inst, int i)
 static void
 set_src(struct v3d_qpu_instr *instr, enum v3d_qpu_mux *mux, struct qpu_reg src)
 {
+        if (src.smimm) {
+                assert(instr->sig.small_imm);
+                *mux = V3D_QPU_MUX_B;
+                return;
+        }
+
         if (src.magic) {
                 assert(src.index >= V3D_QPU_WADDR_R0 &&
                        src.index <= V3D_QPU_WADDR_R5);
@@ -208,14 +214,10 @@ v3d_generate_code_block(struct v3d_compile *c,
 
                 struct qinst *temp;
 
-                if (vir_has_implicit_uniform(qinst)) {
-                        int src = vir_get_implicit_uniform_src(qinst);
-                        assert(qinst->src[src].file == QFILE_UNIF);
-                        qinst->uniform = qinst->src[src].index;
+                if (vir_has_uniform(qinst))
                         c->num_uniforms++;
-                }
 
-                int nsrc = vir_get_non_sideband_nsrc(qinst);
+                int nsrc = vir_get_nsrc(qinst);
                 struct qpu_reg src[ARRAY_SIZE(qinst->src)];
                 bool emitted_ldunif = false;
                 for (int i = 0; i < nsrc; i++) {
@@ -235,6 +237,10 @@ v3d_generate_code_block(struct v3d_compile *c,
                                 src[i] = temp_registers[index];
                                 break;
                         case QFILE_UNIF:
+                                /* XXX perf: If the last ldunif we emitted was
+                                 * the same uniform value, skip it.  Common
+                                 * for multop/umul24 sequences.
+                                 */
                                 if (!emitted_ldunif) {
                                         new_ldunif_instr(qinst, i);
                                         c->num_uniforms++;
@@ -244,15 +250,7 @@ v3d_generate_code_block(struct v3d_compile *c,
                                 src[i] = qpu_acc(5);
                                 break;
                         case QFILE_SMALL_IMM:
-                                abort(); /* XXX */
-#if 0
-                                src[i].mux = QPU_MUX_SMALL_IMM;
-                                src[i].addr = qpu_encode_small_immediate(qinst->src[i].index);
-                                /* This should only have returned a valid
-                                 * small immediate field, not ~0 for failure.
-                                 */
-                                assert(src[i].addr <= 47);
-#endif
+                                src[i].smimm = true;
                                 break;
 
                         case QFILE_VPM:
@@ -355,6 +353,36 @@ v3d_generate_code_block(struct v3d_compile *c,
         }
 }
 
+static bool
+reads_uniform(const struct v3d_device_info *devinfo, uint64_t instruction)
+{
+        struct v3d_qpu_instr qpu;
+        MAYBE_UNUSED bool ok = v3d_qpu_instr_unpack(devinfo, instruction, &qpu);
+        assert(ok);
+
+        if (qpu.sig.ldunif ||
+            qpu.sig.ldunifarf ||
+            qpu.sig.wrtmuc) {
+                return true;
+        }
+
+        if (qpu.type == V3D_QPU_INSTR_TYPE_BRANCH)
+                return true;
+
+        if (qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (qpu.alu.add.magic_write &&
+                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.add.waddr)) {
+                        return true;
+                }
+
+                if (qpu.alu.mul.magic_write &&
+                    v3d_qpu_magic_waddr_loads_unif(qpu.alu.mul.waddr)) {
+                        return true;
+                }
+        }
+
+        return false;
+}
 
 static void
 v3d_dump_qpu(struct v3d_compile *c)
@@ -363,10 +391,30 @@ v3d_dump_qpu(struct v3d_compile *c)
                 vir_get_stage_name(c),
                 c->program_id, c->variant_id);
 
+        int next_uniform = 0;
         for (int i = 0; i < c->qpu_inst_count; i++) {
                 const char *str = v3d_qpu_disasm(c->devinfo, c->qpu_insts[i]);
-                fprintf(stderr, "0x%016"PRIx64" %s\n", c->qpu_insts[i], str);
+                fprintf(stderr, "0x%016"PRIx64" %s", c->qpu_insts[i], str);
+
+                /* We can only do this on 4.x, because we're not tracking TMU
+                 * implicit uniforms here on 3.x.
+                 */
+                if (c->devinfo->ver >= 40 &&
+                    reads_uniform(c->devinfo, c->qpu_insts[i])) {
+                        fprintf(stderr, " (");
+                        vir_dump_uniform(c->uniform_contents[next_uniform],
+                                         c->uniform_data[next_uniform]);
+                        fprintf(stderr, ")");
+                        next_uniform++;
+                }
+                fprintf(stderr, "\n");
+                ralloc_free((void *)str);
         }
+
+        /* Make sure our dumping lined up. */
+        if (c->devinfo->ver >= 40)
+                assert(next_uniform == c->num_uniforms);
+
         fprintf(stderr, "\n");
 }
 
@@ -405,7 +453,10 @@ v3d_vir_to_qpu(struct v3d_compile *c, struct qpu_reg *temp_registers)
                         c->qpu_inst_count);
         }
 
-        if (V3D_DEBUG & V3D_DEBUG_SHADERDB) {
+        /* The QPU cycle estimates are pretty broken (see waddr_latency()), so
+         * don't report them for now.
+         */
+        if (false) {
                 fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d estimated cycles\n",
                         vir_get_stage_name(c),
                         c->program_id, c->variant_id,