vc4: Switch to using native integers.
authorEric Anholt <eric@anholt.net>
Sun, 24 Aug 2014 21:05:37 +0000 (14:05 -0700)
committerEric Anholt <eric@anholt.net>
Thu, 4 Sep 2014 18:39:51 +0000 (11:39 -0700)
There were troubles with bools without using native integers
(st_glsl_to_tgsi seemed to think bool true was 1.0f sometimes, when as a
uniform it's stored as ~0), and since I've got native integers other than
divide, I might as well just support them.

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_screen.c

index aaa7eb346f328a3a7e5f64d7cd32f91406b7ae2a..82766ff1fc622cc727d136552646fc4890c38dde 100644 (file)
@@ -30,6 +30,7 @@
 #include "util/u_memory.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_info.h"
 
 #include "vc4_context.h"
 #include "vc4_qpu.h"
@@ -129,7 +130,8 @@ qir_uniform_f(struct tgsi_to_qir *trans, float f)
 }
 
 static struct qreg
-get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
+get_src(struct tgsi_to_qir *trans, unsigned tgsi_op,
+        struct tgsi_src_register *src, int i)
 {
         struct qcompile *c = trans->c;
         struct qreg r = c->undef;
@@ -182,8 +184,17 @@ get_src(struct tgsi_to_qir *trans, struct tgsi_src_register *src, int i)
         if (src->Absolute)
                 r = qir_FMAXABS(c, r, r);
 
-        if (src->Negate)
-                r = qir_FSUB(c, qir_uniform_f(trans, 0), r);
+        if (src->Negate) {
+                switch (tgsi_opcode_infer_src_type(tgsi_op)) {
+                case TGSI_TYPE_SIGNED:
+                case TGSI_TYPE_UNSIGNED:
+                        r = qir_SUB(c, qir_uniform_ui(trans, 0), r);
+                        break;
+                default:
+                        r = qir_FSUB(c, qir_uniform_f(trans, 0.0), r);
+                        break;
+                }
+        }
 
         return r;
 };
@@ -248,6 +259,51 @@ tgsi_to_qir_alu(struct tgsi_to_qir *trans,
         return dst;
 }
 
+static struct qreg
+tgsi_to_qir_umul(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+
+        struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
+                                      qir_uniform_ui(trans, 16));
+        struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
+                                      qir_uniform_ui(trans, 0xffff));
+        struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
+                                      qir_uniform_ui(trans, 16));
+        struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
+                                      qir_uniform_ui(trans, 0xffff));
+
+        struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
+        struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
+        struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
+
+        return qir_ADD(c, lolo, qir_SHL(c,
+                                        qir_ADD(c, hilo, lohi),
+                                        qir_uniform_ui(trans, 16)));
+}
+
+static struct qreg
+tgsi_to_qir_idiv(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        return qir_FTOI(c, qir_FMUL(c,
+                                    qir_ITOF(c, src[0 * 4 + i]),
+                                    qir_RCP(c, qir_ITOF(c, src[1 * 4 + i]))));
+}
+
+static struct qreg
+tgsi_to_qir_ineg(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        return qir_SUB(c, qir_uniform_ui(trans, 0), src[0 * 4 + i]);
+}
+
 static struct qreg
 tgsi_to_qir_seq(struct tgsi_to_qir *trans,
                 struct tgsi_full_instruction *tgsi_inst,
@@ -288,6 +344,86 @@ tgsi_to_qir_sge(struct tgsi_to_qir *trans,
         return qir_SEL_X_0_NC(c, qir_uniform_f(trans, 1.0));
 }
 
+static struct qreg
+tgsi_to_qir_fseq(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZS(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_fsne(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZC(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_fslt(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NS(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_fsge(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_FSUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NC(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_useq(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZS(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_usne(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_ZC(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_islt(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NS(c, qir_uniform_ui(trans, ~0));
+}
+
+static struct qreg
+tgsi_to_qir_isge(struct tgsi_to_qir *trans,
+                 struct tgsi_full_instruction *tgsi_inst,
+                 enum qop op, struct qreg *src, int i)
+{
+        struct qcompile *c = trans->c;
+        qir_SF(c, qir_SUB(c, src[0 * 4 + i], src[1 * 4 + i]));
+        return qir_SEL_X_0_NC(c, qir_uniform_ui(trans, ~0));
+}
+
 static struct qreg
 tgsi_to_qir_cmp(struct tgsi_to_qir *trans,
                 struct tgsi_full_instruction *tgsi_inst,
@@ -754,11 +890,37 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
                 [TGSI_OPCODE_SUB] = { QOP_FSUB, tgsi_to_qir_alu },
                 [TGSI_OPCODE_MIN] = { QOP_FMIN, tgsi_to_qir_alu },
                 [TGSI_OPCODE_MAX] = { QOP_FMAX, tgsi_to_qir_alu },
+                [TGSI_OPCODE_F2I] = { QOP_FTOI, tgsi_to_qir_alu },
+                [TGSI_OPCODE_I2F] = { QOP_ITOF, tgsi_to_qir_alu },
+                [TGSI_OPCODE_UADD] = { QOP_ADD, tgsi_to_qir_alu },
+                [TGSI_OPCODE_USHR] = { QOP_SHR, tgsi_to_qir_alu },
+                [TGSI_OPCODE_ISHR] = { QOP_ASR, tgsi_to_qir_alu },
+                [TGSI_OPCODE_SHL] = { QOP_SHL, tgsi_to_qir_alu },
+                [TGSI_OPCODE_IMIN] = { QOP_MIN, tgsi_to_qir_alu },
+                [TGSI_OPCODE_IMAX] = { QOP_MAX, tgsi_to_qir_alu },
+                [TGSI_OPCODE_AND] = { QOP_AND, tgsi_to_qir_alu },
+                [TGSI_OPCODE_OR] = { QOP_OR, tgsi_to_qir_alu },
+                [TGSI_OPCODE_XOR] = { QOP_XOR, tgsi_to_qir_alu },
+                [TGSI_OPCODE_NOT] = { QOP_NOT, tgsi_to_qir_alu },
+
+                [TGSI_OPCODE_UMUL] = { 0, tgsi_to_qir_umul },
+                [TGSI_OPCODE_IDIV] = { 0, tgsi_to_qir_idiv },
+                [TGSI_OPCODE_INEG] = { 0, tgsi_to_qir_ineg },
+
                 [TGSI_OPCODE_RSQ] = { QOP_RSQ, tgsi_to_qir_alu },
                 [TGSI_OPCODE_SEQ] = { 0, tgsi_to_qir_seq },
                 [TGSI_OPCODE_SNE] = { 0, tgsi_to_qir_sne },
                 [TGSI_OPCODE_SGE] = { 0, tgsi_to_qir_sge },
                 [TGSI_OPCODE_SLT] = { 0, tgsi_to_qir_slt },
+                [TGSI_OPCODE_FSEQ] = { 0, tgsi_to_qir_fseq },
+                [TGSI_OPCODE_FSNE] = { 0, tgsi_to_qir_fsne },
+                [TGSI_OPCODE_FSGE] = { 0, tgsi_to_qir_fsge },
+                [TGSI_OPCODE_FSLT] = { 0, tgsi_to_qir_fslt },
+                [TGSI_OPCODE_USEQ] = { 0, tgsi_to_qir_useq },
+                [TGSI_OPCODE_USNE] = { 0, tgsi_to_qir_usne },
+                [TGSI_OPCODE_ISGE] = { 0, tgsi_to_qir_isge },
+                [TGSI_OPCODE_ISLT] = { 0, tgsi_to_qir_islt },
+
                 [TGSI_OPCODE_CMP] = { 0, tgsi_to_qir_cmp },
                 [TGSI_OPCODE_MAD] = { 0, tgsi_to_qir_mad },
                 [TGSI_OPCODE_DP2] = { 0, tgsi_to_qir_dp2 },
@@ -787,7 +949,8 @@ emit_tgsi_instruction(struct tgsi_to_qir *trans,
         for (int s = 0; s < 3; s++) {
                 for (int i = 0; i < 4; i++) {
                         src_regs[4 * s + i] =
-                                get_src(trans, &tgsi_inst->Src[s].Register, i);
+                                get_src(trans, tgsi_inst->Instruction.Opcode,
+                                        &tgsi_inst->Src[s].Register, i);
                 }
         }
 
index 72149908422cdab9c5a84a488ae5e43d470641f7..93f97c219f7b42dac97071ed220d1648b2f9e172 100644 (file)
@@ -38,10 +38,24 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_FADD] = { "fadd", 1, 2 },
         [QOP_FSUB] = { "fsub", 1, 2 },
         [QOP_FMUL] = { "fmul", 1, 2 },
+        [QOP_MUL24] = { "mul24", 1, 2 },
         [QOP_FMIN] = { "fmin", 1, 2 },
         [QOP_FMAX] = { "fmax", 1, 2 },
         [QOP_FMINABS] = { "fminabs", 1, 2 },
         [QOP_FMAXABS] = { "fmaxabs", 1, 2 },
+        [QOP_FTOI] = { "ftoi", 1, 1 },
+        [QOP_ITOF] = { "itof", 1, 1 },
+        [QOP_ADD] = { "add", 1, 2 },
+        [QOP_SUB] = { "sub", 1, 2 },
+        [QOP_SHR] = { "shr", 1, 2 },
+        [QOP_ASR] = { "asr", 1, 2 },
+        [QOP_SHL] = { "shl", 1, 2 },
+        [QOP_MIN] = { "min", 1, 2 },
+        [QOP_MAX] = { "max", 1, 2 },
+        [QOP_AND] = { "and", 1, 2 },
+        [QOP_OR] = { "or", 1, 2 },
+        [QOP_XOR] = { "xor", 1, 2 },
+        [QOP_NOT] = { "not", 1, 1 },
 
         [QOP_SF] = { "sf", 0, 1 },
         [QOP_SEL_X_0_NS] = { "fsel_x_0_ns", 1, 1 },
@@ -53,8 +67,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_SEL_X_Y_ZS] = { "fsel_x_y_zs", 1, 2 },
         [QOP_SEL_X_Y_ZC] = { "fsel_x_y_zc", 1, 2 },
 
-        [QOP_FTOI] = { "ftoi", 1, 1 },
-        [QOP_ITOF] = { "itof", 1, 1 },
         [QOP_RCP] = { "rcp", 1, 1 },
         [QOP_RSQ] = { "rsq", 1, 1 },
         [QOP_EXP2] = { "exp2", 1, 2 },
index 99df99c1a073104d4ddfb5b0ae23ecd517e44cfe..2e210c3bd60ae24f4bfe36fee6c404ff4d064078 100644 (file)
@@ -49,10 +49,22 @@ enum qop {
         QOP_FADD,
         QOP_FSUB,
         QOP_FMUL,
+        QOP_MUL24,
         QOP_FMIN,
         QOP_FMAX,
         QOP_FMINABS,
         QOP_FMAXABS,
+        QOP_ADD,
+        QOP_SUB,
+        QOP_SHL,
+        QOP_SHR,
+        QOP_ASR,
+        QOP_MIN,
+        QOP_MAX,
+        QOP_AND,
+        QOP_OR,
+        QOP_XOR,
+        QOP_NOT,
 
         /* Sets the flag register according to src. */
         QOP_SF,
@@ -270,6 +282,7 @@ QIR_ALU1(MOV)
 QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
 QIR_ALU2(FMUL)
+QIR_ALU2(MUL24)
 QIR_NODST_1(SF)
 QIR_ALU1(SEL_X_0_ZS)
 QIR_ALU1(SEL_X_0_ZC)
@@ -285,6 +298,19 @@ QIR_ALU2(FMINABS)
 QIR_ALU2(FMAXABS)
 QIR_ALU1(FTOI)
 QIR_ALU1(ITOF)
+
+QIR_ALU2(ADD)
+QIR_ALU2(SUB)
+QIR_ALU2(SHL)
+QIR_ALU2(SHR)
+QIR_ALU2(ASR)
+QIR_ALU2(MIN)
+QIR_ALU2(MAX)
+QIR_ALU2(AND)
+QIR_ALU2(OR)
+QIR_ALU2(XOR)
+QIR_ALU1(NOT)
+
 QIR_ALU1(RCP)
 QIR_ALU1(RSQ)
 QIR_ALU1(EXP2)
@@ -310,4 +336,12 @@ qir_R4_UNPACK(struct qcompile *c, int i)
         return t;
 }
 
+static inline struct qreg
+qir_SEL_X_0_COND(struct qcompile *c, int i)
+{
+        struct qreg t = qir_get_temp(c);
+        qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, t, c->undef, c->undef));
+        return t;
+}
+
 #endif /* VC4_QIR_H */
index 6d2c34f2d1fc5e7c00c0fc5ec90b8b5c60ce9b6d..579bfdc1aa145d6ac6951f7a8a5d52c85b13215f 100644 (file)
@@ -280,8 +280,20 @@ vc4_generate_code(struct qcompile *c)
                         A(FMAXABS),
                         A(FTOI),
                         A(ITOF),
+                        A(ADD),
+                        A(SUB),
+                        A(SHL),
+                        A(SHR),
+                        A(ASR),
+                        A(MIN),
+                        A(MAX),
+                        A(AND),
+                        A(OR),
+                        A(XOR),
+                        A(NOT),
 
                         M(FMUL),
+                        M(MUL24),
                 };
 
                 struct qpu_reg src[4];
index 46cd4c55b971c9cab3fdd5306c3e2b8f8bb647d2..b0f97103deef0078b5b05baf089a26373bcf079b 100644 (file)
@@ -291,6 +291,7 @@ vc4_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
         case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
                 return 0;
         case PIPE_SHADER_CAP_INTEGERS:
+                return 1;
         case PIPE_SHADER_CAP_DOUBLES:
                 return 0;
         case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS: