vc4: Restructure texture insts as ALU ops with tex_[strb] as the dst.
authorEric Anholt <eric@anholt.net>
Tue, 15 Nov 2016 20:54:26 +0000 (12:54 -0800)
committerEric Anholt <eric@anholt.net>
Tue, 29 Nov 2016 16:38:59 +0000 (08:38 -0800)
For now we're still just generating MOVs, but this will let us fold into
other ops in the future.  No difference on shader-db.

src/gallium/drivers/vc4/vc4_opt_algebraic.c
src/gallium/drivers/vc4/vc4_opt_small_immediates.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qir_lower_uniforms.c
src/gallium/drivers/vc4/vc4_qir_schedule.c
src/gallium/drivers/vc4/vc4_qir_validate.c
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 01ad05d2759e96361e7d8cf49ecd6148fd3dc5b3..5e7d26923de822921cf39ff567cb0a79f3cc4b36 100644 (file)
@@ -94,14 +94,17 @@ static void
 replace_with_mov(struct vc4_compile *c, struct qinst *inst, struct qreg arg)
 {
         dump_from(c, inst);
+
+        inst->src[0] = arg;
+        if (qir_has_implicit_tex_uniform(inst))
+                inst->src[1] = inst->src[qir_get_tex_uniform_src(inst)];
+
         if (qir_is_mul(inst))
                 inst->op = QOP_MMOV;
         else if (qir_is_float_input(inst))
                 inst->op = QOP_FMOV;
         else
                 inst->op = QOP_MOV;
-        inst->src[0] = arg;
-        inst->src[1] = c->undef;
         dump_to(c, inst);
 }
 
@@ -172,8 +175,12 @@ qir_opt_algebraic(struct vc4_compile *c)
                         break;
 
                 case QOP_ADD:
-                        if (replace_x_0_with_x(c, inst, 0) ||
-                            replace_x_0_with_x(c, inst, 1)) {
+                        /* Kernel validation requires that we use an actual
+                         * add instruction.
+                         */
+                        if (inst->dst.file != QFILE_TEX_S_DIRECT &&
+                            (replace_x_0_with_x(c, inst, 0) ||
+                             replace_x_0_with_x(c, inst, 1))) {
                                 progress = true;
                                 break;
                         }
index 15cbd12773fb883892609ffda500b5f3624e516f..89c48578021e7c01adfb586c0258bd750b9017ad 100644 (file)
@@ -62,11 +62,8 @@ qir_opt_small_immediates(struct vc4_compile *c)
                                 continue;
                         }
 
-                        if (i == 1 &&
-                            (inst->op == QOP_TEX_S ||
-                             inst->op == QOP_TEX_T ||
-                             inst->op == QOP_TEX_R ||
-                             inst->op == QOP_TEX_B)) {
+                        if (qir_is_tex(inst) &&
+                            i == qir_get_tex_uniform_src(inst)) {
                                 /* No turning the implicit uniform read into
                                  * an immediate.
                                  */
index 97cbabbd511b57d72f256d096c8c3277136f8d39..66fd902b69521a43ccca8bdfe75ba26bdb987c36 100644 (file)
@@ -120,7 +120,10 @@ indirect_uniform_load(struct vc4_compile *c, nir_intrinsic_instr *intr)
                                   qir_uniform_ui(c, (range->dst_offset +
                                                      range->size - 4)));
 
-        qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
+        qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
+                     indirect_offset,
+                     qir_uniform(c, QUNIFORM_UBO_ADDR, 0));
+
         c->num_texture_samples++;
 
         ntq_emit_thrsw(c);
@@ -381,7 +384,8 @@ ntq_emit_txf(struct vc4_compile *c, nir_tex_instr *instr)
         addr = qir_MAX(c, addr, qir_uniform_ui(c, 0));
         addr = qir_MIN(c, addr,  qir_uniform_ui(c, size - 4));
 
-        qir_TEX_DIRECT(c, addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
+        qir_ADD_dest(c, qir_reg(QFILE_TEX_S_DIRECT, 0),
+                     addr, qir_uniform(c, QUNIFORM_TEXTURE_MSAA_ADDR, unit));
 
         ntq_emit_thrsw(c);
 
@@ -479,14 +483,20 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
                                            unit | (is_txl << 16));
         }
 
+        struct qinst *tmu;
         if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
-                qir_TEX_R(c, r, texture_u[next_texture_u++]);
+                tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_R, 0), r);
+                tmu->src[qir_get_tex_uniform_src(tmu)] =
+                        texture_u[next_texture_u++];
         } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
                    c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP ||
                    c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER ||
                    c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) {
-                qir_TEX_R(c, qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit),
-                          texture_u[next_texture_u++]);
+                tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_R, 0),
+                                   qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR,
+                                               unit));
+                tmu->src[qir_get_tex_uniform_src(tmu)] =
+                        texture_u[next_texture_u++];
         }
 
         if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) {
@@ -497,12 +507,18 @@ ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr)
                 t = qir_SAT(c, t);
         }
 
-        qir_TEX_T(c, t, texture_u[next_texture_u++]);
+        tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_T, 0), t);
+        tmu->src[qir_get_tex_uniform_src(tmu)] =
+                texture_u[next_texture_u++];
 
-        if (is_txl || is_txb)
-                qir_TEX_B(c, lod, texture_u[next_texture_u++]);
+        if (is_txl || is_txb) {
+                tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_B, 0), lod);
+                tmu->src[qir_get_tex_uniform_src(tmu)] =
+                        texture_u[next_texture_u++];
+        }
 
-        qir_TEX_S(c, s, texture_u[next_texture_u++]);
+        tmu = qir_MOV_dest(c, qir_reg(QFILE_TEX_S, 0), s);
+        tmu->src[qir_get_tex_uniform_src(tmu)] = texture_u[next_texture_u++];
 
         c->num_texture_samples++;
 
index 2c9119d9ccf4c72d9533b139b993ae7b173935b6..7c556a98ea2b1fd884b7b0f5411bdfbb7797924f 100644 (file)
@@ -75,11 +75,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_FRAG_Z] = { "frag_z", 1, 0 },
         [QOP_FRAG_W] = { "frag_w", 1, 0 },
 
-        [QOP_TEX_S] = { "tex_s", 0, 2, true },
-        [QOP_TEX_T] = { "tex_t", 0, 2, true },
-        [QOP_TEX_R] = { "tex_r", 0, 2, true },
-        [QOP_TEX_B] = { "tex_b", 0, 2, true },
-        [QOP_TEX_DIRECT] = { "tex_direct", 0, 2, true },
         [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
 
         [QOP_THRSW] = { "thrsw", 0, 0, true },
@@ -104,12 +99,37 @@ qir_get_op_name(enum qop qop)
 }
 
 int
-qir_get_nsrc(struct qinst *inst)
+qir_get_non_sideband_nsrc(struct qinst *inst)
 {
         assert(qir_op_info[inst->op].name);
         return qir_op_info[inst->op].nsrc;
 }
 
+int
+qir_get_nsrc(struct qinst *inst)
+{
+        assert(qir_op_info[inst->op].name);
+
+        int nsrc = qir_get_non_sideband_nsrc(inst);
+
+        /* Normal (non-direct) texture coordinate writes also implicitly load
+         * a uniform for the texture parameters.
+         */
+        if (qir_is_tex(inst) && inst->dst.file != QFILE_TEX_S_DIRECT)
+                nsrc++;
+
+        return nsrc;
+}
+
+/* The sideband uniform for textures gets stored after the normal ALU
+ * arguments.
+ */
+int
+qir_get_tex_uniform_src(struct qinst *inst)
+{
+        return qir_get_nsrc(inst) - 1;
+}
+
 /**
  * Returns whether the instruction has any side effects that must be
  * preserved.
@@ -122,6 +142,11 @@ qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
         case QFILE_TLB_COLOR_WRITE:
         case QFILE_TLB_COLOR_WRITE_MS:
         case QFILE_TLB_STENCIL_SETUP:
+        case QFILE_TEX_S_DIRECT:
+        case QFILE_TEX_S:
+        case QFILE_TEX_T:
+        case QFILE_TEX_R:
+        case QFILE_TEX_B:
                 return true;
         default:
                 break;
@@ -206,7 +231,30 @@ qir_is_raw_mov(struct qinst *inst)
 bool
 qir_is_tex(struct qinst *inst)
 {
-        return inst->op >= QOP_TEX_S && inst->op <= QOP_TEX_DIRECT;
+        switch (inst->dst.file) {
+        case QFILE_TEX_S_DIRECT:
+        case QFILE_TEX_S:
+        case QFILE_TEX_T:
+        case QFILE_TEX_R:
+        case QFILE_TEX_B:
+                return true;
+        default:
+                return false;
+        }
+}
+
+bool
+qir_has_implicit_tex_uniform(struct qinst *inst)
+{
+        switch (inst->dst.file) {
+        case QFILE_TEX_S:
+        case QFILE_TEX_T:
+        case QFILE_TEX_R:
+        case QFILE_TEX_B:
+                return true;
+        default:
+                return false;
+        }
 }
 
 bool
@@ -298,6 +346,11 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
                 [QFILE_FRAG_Y] = "frag_y",
                 [QFILE_FRAG_REV_FLAG] = "frag_rev_flag",
                 [QFILE_QPU_ELEMENT] = "elem",
+                [QFILE_TEX_S_DIRECT] = "tex_s_direct",
+                [QFILE_TEX_S] = "tex_s",
+                [QFILE_TEX_T] = "tex_t",
+                [QFILE_TEX_R] = "tex_r",
+                [QFILE_TEX_B] = "tex_b",
         };
 
         switch (reg.file) {
@@ -330,6 +383,11 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
         case QFILE_TLB_COLOR_WRITE_MS:
         case QFILE_TLB_Z_WRITE:
         case QFILE_TLB_STENCIL_SETUP:
+        case QFILE_TEX_S_DIRECT:
+        case QFILE_TEX_S:
+        case QFILE_TEX_T:
+        case QFILE_TEX_R:
+        case QFILE_TEX_B:
                 fprintf(stderr, "%s", files[reg.file]);
                 break;
 
index a3b8762951d78d8dcbe87cdb5a0df9f00f5de6e9..99cc957853a52c0be2501b1b92280f14da0af419 100644 (file)
@@ -55,6 +55,18 @@ enum qfile {
         QFILE_TLB_Z_WRITE,
         QFILE_TLB_STENCIL_SETUP,
 
+        /* If tex_s is written on its own without preceding t/r/b setup, it's
+         * a direct memory access using the input value, without the sideband
+         * uniform load.  We represent these in QIR as a separate write
+         * destination so we can tell if the sideband uniform is present.
+         */
+        QFILE_TEX_S_DIRECT,
+
+        QFILE_TEX_S,
+        QFILE_TEX_T,
+        QFILE_TEX_R,
+        QFILE_TEX_B,
+
         /* Payload registers that aren't in the physical register file, so we
          * can just use the corresponding qpu_reg at qpu_emit time.
          */
@@ -132,24 +144,6 @@ enum qop {
         QOP_FRAG_Z,
         QOP_FRAG_W,
 
-        /** Texture x coordinate parameter write */
-        QOP_TEX_S,
-        /** Texture y coordinate parameter write */
-        QOP_TEX_T,
-        /** Texture border color parameter or cube map z coordinate write */
-        QOP_TEX_R,
-        /** Texture LOD bias parameter write */
-        QOP_TEX_B,
-
-        /**
-         * Texture-unit 4-byte read with address provided direct in S
-         * cooordinate.
-         *
-         * The first operand is the offset from the start of the UBO, and the
-         * second is the uniform that has the UBO's base pointer.
-         */
-        QOP_TEX_DIRECT,
-
         /**
          * Signal of texture read being necessary and then reading r4 into
          * the destination
@@ -203,7 +197,7 @@ struct qinst {
 
         enum qop op;
         struct qreg dst;
-        struct qreg src[2];
+        struct qreg src[3];
         bool sf;
         bool cond_is_exec_mask;
         uint8_t cond;
@@ -578,12 +572,15 @@ struct qinst *qir_emit_nondef(struct vc4_compile *c, struct qinst *inst);
 struct qreg qir_get_temp(struct vc4_compile *c);
 void qir_calculate_live_intervals(struct vc4_compile *c);
 int qir_get_nsrc(struct qinst *inst);
+int qir_get_non_sideband_nsrc(struct qinst *inst);
+int qir_get_tex_uniform_src(struct qinst *inst);
 bool qir_reg_equals(struct qreg a, struct qreg b);
 bool qir_has_side_effects(struct vc4_compile *c, struct qinst *inst);
 bool qir_has_side_effect_reads(struct vc4_compile *c, struct qinst *inst);
 bool qir_is_mul(struct qinst *inst);
 bool qir_is_raw_mov(struct qinst *inst);
 bool qir_is_tex(struct qinst *inst);
+bool qir_has_implicit_tex_uniform(struct qinst *inst);
 bool qir_is_float_input(struct qinst *inst);
 bool qir_depends_on_flags(struct qinst *inst);
 bool qir_writes_r4(struct qinst *inst);
@@ -737,11 +734,6 @@ QIR_ALU1(RSQ)
 QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
 QIR_ALU1(VARY_ADD_C)
-QIR_NODST_2(TEX_S)
-QIR_NODST_2(TEX_T)
-QIR_NODST_2(TEX_R)
-QIR_NODST_2(TEX_B)
-QIR_NODST_2(TEX_DIRECT)
 QIR_PAYLOAD(FRAG_Z)
 QIR_PAYLOAD(FRAG_W)
 QIR_ALU0(TEX_RESULT)
index 1884cfa5b784c1f015c35d1b3953be5e06d5954c..9ecfe65211e981b44daa8f635451ab1ff1084bf3 100644 (file)
@@ -77,7 +77,7 @@ is_lowerable_uniform(struct qinst *inst, int i)
         if (inst->src[i].file != QFILE_UNIF)
                 return false;
         if (qir_is_tex(inst))
-                return i != 1;
+                return i != qir_get_tex_uniform_src(inst);
         return true;
 }
 
index c1a2db5e3c5fdb226113be1b8ad7d1d843622cbb..a8ef189e583395d019fe310cfbe5a61cd5601186 100644 (file)
@@ -212,18 +212,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                 add_dep(dir, state->last_vary_read, n);
                 break;
 
-        case QOP_TEX_S:
-        case QOP_TEX_T:
-        case QOP_TEX_R:
-        case QOP_TEX_B:
-        case QOP_TEX_DIRECT:
-                /* Texturing setup gets scheduled in order, because
-                 * the uniforms referenced by them have to land in a
-                 * specific order.
-                 */
-                add_write_dep(dir, &state->last_tex_coord, n);
-                break;
-
         case QOP_TEX_RESULT:
                 /* Results have to be fetched in order. */
                 add_write_dep(dir, &state->last_tex_result, n);
@@ -278,6 +266,18 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                 add_write_dep(dir, &state->last_tlb, n);
                 break;
 
+        case QFILE_TEX_S_DIRECT:
+        case QFILE_TEX_S:
+        case QFILE_TEX_T:
+        case QFILE_TEX_R:
+        case QFILE_TEX_B:
+                /* Texturing setup gets scheduled in order, because
+                 * the uniforms referenced by them have to land in a
+                 * specific order.
+                 */
+                add_write_dep(dir, &state->last_tex_coord, n);
+                break;
+
         default:
                 break;
         }
@@ -315,12 +315,12 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
                         }
                 }
 
-                switch (inst->op) {
-                case QOP_TEX_S:
-                case QOP_TEX_T:
-                case QOP_TEX_R:
-                case QOP_TEX_B:
-                case QOP_TEX_DIRECT:
+                switch (inst->dst.file) {
+                case QFILE_TEX_S_DIRECT:
+                case QFILE_TEX_S:
+                case QFILE_TEX_T:
+                case QFILE_TEX_R:
+                case QFILE_TEX_B:
                         /* From the VC4 spec:
                          *
                          *     "The TFREQ input FIFO holds two full lots of s,
@@ -364,8 +364,8 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
                          * If the texture result fifo is full, block adding
                          * any more to it until the last QOP_TEX_RESULT.
                          */
-                        if (inst->op == QOP_TEX_S ||
-                            inst->op == QOP_TEX_DIRECT) {
+                        if (inst->dst.file == QFILE_TEX_S ||
+                            inst->dst.file == QFILE_TEX_S_DIRECT) {
                                 if (state.tfrcv_count ==
                                     (c->fs_threaded ? 2 : 4))
                                         block_until_tex_result(&state, n);
@@ -376,6 +376,11 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
                         state.tfreq_count++;
                         break;
 
+                default:
+                        break;
+                }
+
+                switch (inst->op) {
                 case QOP_TEX_RESULT:
                         /* Results have to be fetched after the
                          * coordinate setup.  Note that we're assuming
@@ -398,7 +403,6 @@ calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
                         break;
 
                 default:
-                        assert(!qir_is_tex(inst));
                         break;
                 }
         }
@@ -560,8 +564,8 @@ dump_state(struct vc4_compile *c, struct schedule_state *state)
 static uint32_t
 latency_between(struct schedule_node *before, struct schedule_node *after)
 {
-        if ((before->inst->op == QOP_TEX_S ||
-             before->inst->op == QOP_TEX_DIRECT) &&
+        if ((before->inst->dst.file == QFILE_TEX_S ||
+             before->inst->dst.file == QFILE_TEX_S_DIRECT) &&
             after->inst->op == QOP_TEX_RESULT)
                 return 100;
 
index 9579f7a15cbdf4f2e8f74fd28281cb86c17ff00a..302eb48265c35e48bdcd1f49c864dff73c3e2e8c 100644 (file)
@@ -84,6 +84,25 @@ void qir_validate(struct vc4_compile *c)
                 case QFILE_LOAD_IMM:
                         fail_instr(c, inst, "Bad dest file");
                         break;
+
+                case QFILE_TEX_S:
+                case QFILE_TEX_T:
+                case QFILE_TEX_R:
+                case QFILE_TEX_B:
+                        if (inst->src[qir_get_tex_uniform_src(inst)].file !=
+                            QFILE_UNIF) {
+                                fail_instr(c, inst,
+                                           "tex op missing implicit uniform");
+                        }
+                        break;
+
+                case QFILE_TEX_S_DIRECT:
+                        if (inst->op != QOP_ADD) {
+                                fail_instr(c, inst,
+                                           "kernel validation requires that "
+                                           "direct texture lookups use an ADD");
+                        }
+                        break;
                 }
 
                 for (int i = 0; i < qir_get_nsrc(inst); i++) {
@@ -119,6 +138,11 @@ void qir_validate(struct vc4_compile *c)
                         case QFILE_TLB_COLOR_WRITE_MS:
                         case QFILE_TLB_Z_WRITE:
                         case QFILE_TLB_STENCIL_SETUP:
+                        case QFILE_TEX_S_DIRECT:
+                        case QFILE_TEX_S:
+                        case QFILE_TEX_T:
+                        case QFILE_TEX_R:
+                        case QFILE_TEX_B:
                                 fail_instr(c, inst, "Bad src file");
                                 break;
                         }
index 9d9e5d84ecd9178dbbe537a93508dabc5fddb83a..47fc0b0928b2ad3dacdded4fc8839b6f37596df5 100644 (file)
@@ -347,6 +347,11 @@ vc4_generate_code_block(struct vc4_compile *c,
                         case QFILE_TLB_COLOR_WRITE_MS:
                         case QFILE_TLB_Z_WRITE:
                         case QFILE_TLB_STENCIL_SETUP:
+                        case QFILE_TEX_S:
+                        case QFILE_TEX_S_DIRECT:
+                        case QFILE_TEX_T:
+                        case QFILE_TEX_R:
+                        case QFILE_TEX_B:
                                 unreachable("bad qir src file");
                         }
                 }
@@ -379,6 +384,23 @@ vc4_generate_code_block(struct vc4_compile *c,
                         dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP);
                         break;
 
+                case QFILE_TEX_S:
+                case QFILE_TEX_S_DIRECT:
+                        dst = qpu_rb(QPU_W_TMU0_S);
+                        break;
+
+                case QFILE_TEX_T:
+                        dst = qpu_rb(QPU_W_TMU0_T);
+                        break;
+
+                case QFILE_TEX_R:
+                        dst = qpu_rb(QPU_W_TMU0_R);
+                        break;
+
+                case QFILE_TEX_B:
+                        dst = qpu_rb(QPU_W_TMU0_B);
+                        break;
+
                 case QFILE_VARY:
                 case QFILE_UNIF:
                 case QFILE_SMALL_IMM:
@@ -477,21 +499,6 @@ vc4_generate_code_block(struct vc4_compile *c,
                         queue(block, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
                         break;
 
-                case QOP_TEX_S:
-                case QOP_TEX_T:
-                case QOP_TEX_R:
-                case QOP_TEX_B:
-                        queue(block, qpu_a_MOV(qpu_rb(QPU_W_TMU0_S +
-                                                      (qinst->op - QOP_TEX_S)),
-                                               src[0]) | unpack);
-                        break;
-
-                case QOP_TEX_DIRECT:
-                        fixup_raddr_conflict(block, dst, &src[0], &src[1],
-                                             qinst, &unpack);
-                        queue(block, qpu_a_ADD(qpu_rb(QPU_W_TMU0_S),
-                                               src[0], src[1]) | unpack);
-                        break;
 
                 case QOP_TEX_RESULT:
                         queue(block, qpu_NOP());
@@ -538,7 +545,7 @@ vc4_generate_code_block(struct vc4_compile *c,
                          * argument slot as well so that we don't take up
                          * another raddr just to get unused data.
                          */
-                        if (qir_get_nsrc(qinst) == 1)
+                        if (qir_get_non_sideband_nsrc(qinst) == 1)
                                 src[1] = src[0];
 
                         fixup_raddr_conflict(block, dst, &src[0], &src[1],