vc4: Add support for the 2-bit LOAD_IMM variants.
authorEric Anholt <eric@anholt.net>
Thu, 25 Aug 2016 19:15:29 +0000 (12:15 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 26 Aug 2016 00:24:11 +0000 (17:24 -0700)
Extracted and fixed up from a patch by jonasarrow on github.  This ended
up not getting used for ddx/ddy, but seems like it might still be useful.

src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu.c
src/gallium/drivers/vc4/vc4_qpu.h
src/gallium/drivers/vc4/vc4_qpu_defines.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 0919d32a5284594b4ac91d4f63fceacd5a3d2ea7..9b4a28ebab6b8ed0f8eaf10a915b46de091f717f 100644 (file)
@@ -83,6 +83,8 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TEX_RESULT] = { "tex_result", 1, 0, true },
 
         [QOP_LOAD_IMM] = { "load_imm", 0, 1 },
+        [QOP_LOAD_IMM_U2] = { "load_imm_u2", 0, 1 },
+        [QOP_LOAD_IMM_I2] = { "load_imm_i2", 0, 1 },
 
         [QOP_BRANCH] = { "branch", 0, 0, true },
         [QOP_UNIFORMS_RESET] = { "uniforms_reset", 0, 2, true },
index 9e61200ef649f1ceec37d8e272d3f51a01298425..90cc138504352df4cccd27a86d45cb49fec0411b 100644 (file)
@@ -156,8 +156,18 @@ enum qop {
          */
         QOP_TEX_RESULT,
 
+        /* 32-bit immediate loaded to each SIMD channel */
         QOP_LOAD_IMM,
 
+        /* 32-bit immediate divided into 16 2-bit unsigned int values and
+         * loaded to each corresponding SIMD channel.
+         */
+        QOP_LOAD_IMM_U2,
+        /* 32-bit immediate divided into 16 2-bit signed int values and
+         * loaded to each corresponding SIMD channel.
+         */
+        QOP_LOAD_IMM_I2,
+
         /* Jumps to block->successor[0] if the qinst->cond (as a
          * QPU_COND_BRANCH_*) passes, or block->successor[1] if not.  Note
          * that block->successor[1] may be unset if the condition is ALWAYS.
@@ -796,6 +806,22 @@ qir_LOAD_IMM(struct vc4_compile *c, uint32_t val)
                                         qir_reg(QFILE_LOAD_IMM, val), c->undef));
 }
 
+static inline struct qreg
+qir_LOAD_IMM_U2(struct vc4_compile *c, uint32_t val)
+{
+        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_U2, c->undef,
+                                        qir_reg(QFILE_LOAD_IMM, val),
+                                        c->undef));
+}
+
+static inline struct qreg
+qir_LOAD_IMM_I2(struct vc4_compile *c, uint32_t val)
+{
+        return qir_emit_def(c, qir_inst(QOP_LOAD_IMM_I2, c->undef,
+                                        qir_reg(QFILE_LOAD_IMM, val),
+                                        c->undef));
+}
+
 static inline void
 qir_MOV_cond(struct vc4_compile *c, uint8_t cond,
              struct qreg dest, struct qreg src)
index cf74c42439100c61f28be8e759654af1c2ea1e73..d022d107eb3fa585788f6c21048e07194b56c0fc 100644 (file)
@@ -164,6 +164,20 @@ qpu_load_imm_ui(struct qpu_reg dst, uint32_t val)
         return inst;
 }
 
+uint64_t
+qpu_load_imm_u2(struct qpu_reg dst, uint32_t val)
+{
+        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_U2,
+                                                         QPU_LOAD_IMM_MODE);
+}
+
+uint64_t
+qpu_load_imm_i2(struct qpu_reg dst, uint32_t val)
+{
+        return qpu_load_imm_ui(dst, val) | QPU_SET_FIELD(QPU_LOAD_IMM_MODE_I2,
+                                                         QPU_LOAD_IMM_MODE);
+}
+
 uint64_t
 qpu_branch(uint32_t cond, uint32_t target)
 {
index a0aac1587decd0cc17b71751da16411376e4392c..437e4f5e5a4c54e669f9ceeefcc8225767815d5e 100644 (file)
@@ -143,6 +143,8 @@ uint64_t qpu_m_alu2(enum qpu_op_mul op, struct qpu_reg dst,
                     struct qpu_reg src0, struct qpu_reg src1) ATTRIBUTE_CONST;
 uint64_t qpu_merge_inst(uint64_t a, uint64_t b) ATTRIBUTE_CONST;
 uint64_t qpu_load_imm_ui(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_u2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
+uint64_t qpu_load_imm_i2(struct qpu_reg dst, uint32_t val) ATTRIBUTE_CONST;
 uint64_t qpu_branch(uint32_t cond, uint32_t target) ATTRIBUTE_CONST;
 uint64_t qpu_set_sig(uint64_t inst, uint32_t sig) ATTRIBUTE_CONST;
 uint64_t qpu_set_cond_add(uint64_t inst, uint32_t cond) ATTRIBUTE_CONST;
index 3ca5aba76c95eb24da13e81e2e05087c6fe80b54..e6ca345c3b2286ffd229e88d48c66ae0a698adbc 100644 (file)
@@ -246,6 +246,12 @@ enum qpu_unpack {
 #define QPU_UNPACK_SHIFT                57
 #define QPU_UNPACK_MASK                 QPU_MASK(59, 57)
 
+#define QPU_LOAD_IMM_MODE_SHIFT         57
+#define QPU_LOAD_IMM_MODE_MASK          QPU_MASK(59, 57)
+# define QPU_LOAD_IMM_MODE_U32          0
+# define QPU_LOAD_IMM_MODE_I2           1
+# define QPU_LOAD_IMM_MODE_U2           3
+
 /**
  * If set, the pack field means PACK_MUL or R4 packing, instead of normal
  * regfile a packing.
index 77aa4f674ae740dfd8ce4685570b1fedb2461e91..f5a5b8a862ac8fc5db2212e0ba2228af4cd3a0ab 100644 (file)
@@ -428,6 +428,14 @@ vc4_generate_code_block(struct vc4_compile *c,
                         queue(block, qpu_load_imm_ui(dst, qinst->src[0].index));
                         break;
 
+                case QOP_LOAD_IMM_U2:
+                        queue(block, qpu_load_imm_u2(dst, qinst->src[0].index));
+                        break;
+
+                case QOP_LOAD_IMM_I2:
+                        queue(block, qpu_load_imm_i2(dst, qinst->src[0].index));
+                        break;
+
                 case QOP_MS_MASK:
                         src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
                         fixup_raddr_conflict(block, dst, &src[0], &src[1],