vc4: Add support for 16-bit signed/unsigned norm/scaled vertex attrs.
authorEric Anholt <eric@anholt.net>
Mon, 15 Dec 2014 20:30:26 +0000 (12:30 -0800)
committerEric Anholt <eric@anholt.net>
Mon, 15 Dec 2014 22:33:01 +0000 (14:33 -0800)
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_register_allocate.c
src/gallium/drivers/vc4/vc4_screen.c

index 1efdf37097f5399fe7de63f76a46fa20d167316e..e37303fb69b25ea480b8e97369f2131cd4d6ac22 100644 (file)
@@ -1026,6 +1026,37 @@ get_channel_from_vpm(struct vc4_compile *c,
                                 return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz));
                         }
                 }
+        } else if (chan->size == 16 &&
+                   (chan->type == UTIL_FORMAT_TYPE_UNSIGNED ||
+                    chan->type == UTIL_FORMAT_TYPE_SIGNED)) {
+                struct qreg vpm = vpm_reads[swiz / 2];
+
+                /* Note that UNPACK_16F eats a half float, not ints, so we use
+                 * UNPACK_16_I for all of these.
+                 */
+                if (chan->type == UTIL_FORMAT_TYPE_SIGNED) {
+                        temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2));
+                        if (chan->normalized) {
+                                return qir_FMUL(c, temp,
+                                                qir_uniform_f(c, 1/32768.0f));
+                        } else {
+                                return temp;
+                        }
+                } else {
+                        /* UNPACK_16I sign-extends, so we have to emit ANDs. */
+                        temp = vpm;
+                        if (swiz == 1 || swiz == 3)
+                                temp = qir_UNPACK_16_I(c, temp, 1);
+                        temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff));
+                        temp = qir_ITOF(c, temp);
+
+                        if (chan->normalized) {
+                                return qir_FMUL(c, temp,
+                                                qir_uniform_f(c, 1 / 65535.0));
+                        } else {
+                                return temp;
+                        }
+                }
         } else {
                 return c->undef;
         }
index 8cd571d5b77dc8c99c40a8a58a3a28ee71842d31..49b79014c095e19f2b96e88d402cd0105f517ed1 100644 (file)
@@ -103,10 +103,14 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_UNPACK_8B_F] = { "unpack_8b_f", 1, 1 },
         [QOP_UNPACK_8C_F] = { "unpack_8c_f", 1, 1 },
         [QOP_UNPACK_8D_F] = { "unpack_8d_f", 1, 1 },
+        [QOP_UNPACK_16A_F] = { "unpack_16a_f", 1, 1 },
+        [QOP_UNPACK_16B_F] = { "unpack_16b_f", 1, 1 },
         [QOP_UNPACK_8A_I] = { "unpack_8a_i", 1, 1 },
         [QOP_UNPACK_8B_I] = { "unpack_8b_i", 1, 1 },
         [QOP_UNPACK_8C_I] = { "unpack_8c_i", 1, 1 },
         [QOP_UNPACK_8D_I] = { "unpack_8d_i", 1, 1 },
+        [QOP_UNPACK_16A_I] = { "unpack_16a_i", 1, 1 },
+        [QOP_UNPACK_16B_I] = { "unpack_16b_i", 1, 1 },
 };
 
 static const char *
index 9da120ab91253d01ccb41569e16acd49cfb3dec8..46f4c12b22c54fdb5e9e960fb043e12f7e7a71a4 100644 (file)
@@ -113,11 +113,15 @@ enum qop {
         QOP_UNPACK_8B_F,
         QOP_UNPACK_8C_F,
         QOP_UNPACK_8D_F,
+        QOP_UNPACK_16A_F,
+        QOP_UNPACK_16B_F,
 
         QOP_UNPACK_8A_I,
         QOP_UNPACK_8B_I,
         QOP_UNPACK_8C_I,
         QOP_UNPACK_8D_I,
+        QOP_UNPACK_16A_I,
+        QOP_UNPACK_16B_I,
 
         /** Texture x coordinate parameter write */
         QOP_TEX_S,
@@ -509,6 +513,22 @@ qir_UNPACK_8_I(struct vc4_compile *c, struct qreg src, int i)
         return t;
 }
 
+static inline struct qreg
+qir_UNPACK_16_F(struct vc4_compile *c, struct qreg src, int i)
+{
+        struct qreg t = qir_get_temp(c);
+        qir_emit(c, qir_inst(QOP_UNPACK_16A_F + i, t, src, c->undef));
+        return t;
+}
+
+static inline struct qreg
+qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
+{
+        struct qreg t = qir_get_temp(c);
+        qir_emit(c, qir_inst(QOP_UNPACK_16A_I + i, t, src, c->undef));
+        return t;
+}
+
 static inline struct qreg
 qir_POW(struct vc4_compile *c, struct qreg x, struct qreg y)
 {
index f88072766601687fd17b809f77abe1a55823ba69..530ec8bf501dde840ea94bd3913e284d79c3a9c5 100644 (file)
@@ -141,6 +141,15 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
         uint32_t vpm_read_offset = 0;
         bool written_r3 = false;
         bool needs_restore;
+        /* Map from the QIR ops enum order to QPU unpack bits. */
+        static const uint32_t unpack_map[] = {
+                QPU_UNPACK_8A,
+                QPU_UNPACK_8B,
+                QPU_UNPACK_8C,
+                QPU_UNPACK_8D,
+                QPU_UNPACK_16A_TO_F32,
+                QPU_UNPACK_16B_TO_F32,
+        };
 
         make_empty_list(&c->qpu_inst_list);
 
@@ -472,6 +481,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_UNPACK_8B_F:
                 case QOP_UNPACK_8C_F:
                 case QOP_UNPACK_8D_F:
+                case QOP_UNPACK_16A_F:
+                case QOP_UNPACK_16B_F: {
                         assert(src[0].mux == QPU_MUX_A);
 
                         /* Since we're setting the pack bits, if the
@@ -480,20 +491,22 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         queue(c, qpu_a_FMAX((dst.mux == QPU_MUX_A ?
                                              qpu_rb(31) : dst),
                                             src[0], src[0]));
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
-                                                       (qinst->op -
-                                                        QOP_UNPACK_8A_F),
+                        *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
+                                                                  QOP_UNPACK_8A_F],
                                                        QPU_UNPACK);
 
                         if (dst.mux == QPU_MUX_A) {
                                 queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                         }
+                }
                         break;
 
                 case QOP_UNPACK_8A_I:
                 case QOP_UNPACK_8B_I:
                 case QOP_UNPACK_8C_I:
                 case QOP_UNPACK_8D_I:
+                case QOP_UNPACK_16A_I:
+                case QOP_UNPACK_16B_I: {
                         assert(src[0].mux == QPU_MUX_A);
 
                         /* Since we're setting the pack bits, if the
@@ -501,14 +514,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                          */
                         queue(c, qpu_a_MOV((dst.mux == QPU_MUX_A ?
                                             qpu_rb(31) : dst), src[0]));
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_UNPACK_8A +
-                                                       (qinst->op -
-                                                        QOP_UNPACK_8A_I),
+                        *last_inst(c) |= QPU_SET_FIELD(unpack_map[qinst->op -
+                                                                  QOP_UNPACK_8A_I],
                                                        QPU_UNPACK);
 
                         if (dst.mux == QPU_MUX_A) {
                                 queue(c, qpu_a_MOV(dst, qpu_rb(31)));
                         }
+                }
                         break;
 
                 default:
index 8f8c1899071711de5f9c7a2ca7ab958e6fe41eea..9eae7fca758f5bc7cd001e10ee36c7111e90eb15 100644 (file)
@@ -258,10 +258,14 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_UNPACK_8B_F:
                 case QOP_UNPACK_8C_F:
                 case QOP_UNPACK_8D_F:
+                case QOP_UNPACK_16A_F:
+                case QOP_UNPACK_16B_F:
                 case QOP_UNPACK_8A_I:
                 case QOP_UNPACK_8B_I:
                 case QOP_UNPACK_8C_I:
                 case QOP_UNPACK_8D_I:
+                case QOP_UNPACK_16A_I:
+                case QOP_UNPACK_16B_I:
                         /* The unpack flags require an A-file src register. */
                         ra_set_node_class(g, temp_to_node[inst->src[0].index],
                                           vc4->reg_class_a);
index 6bb158b59900dda616b1ba5099ce3bb444c74bf3..62912d84eb68d083b23f1877b3b2f2b341b9529e 100644 (file)
@@ -346,6 +346,22 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
                 case PIPE_FORMAT_R32G32B32_FLOAT:
                 case PIPE_FORMAT_R32G32_FLOAT:
                 case PIPE_FORMAT_R32_FLOAT:
+                case PIPE_FORMAT_R16G16B16A16_UNORM:
+                case PIPE_FORMAT_R16G16B16_UNORM:
+                case PIPE_FORMAT_R16G16_UNORM:
+                case PIPE_FORMAT_R16_UNORM:
+                case PIPE_FORMAT_R16G16B16A16_SNORM:
+                case PIPE_FORMAT_R16G16B16_SNORM:
+                case PIPE_FORMAT_R16G16_SNORM:
+                case PIPE_FORMAT_R16_SNORM:
+                case PIPE_FORMAT_R16G16B16A16_USCALED:
+                case PIPE_FORMAT_R16G16B16_USCALED:
+                case PIPE_FORMAT_R16G16_USCALED:
+                case PIPE_FORMAT_R16_USCALED:
+                case PIPE_FORMAT_R16G16B16A16_SSCALED:
+                case PIPE_FORMAT_R16G16B16_SSCALED:
+                case PIPE_FORMAT_R16G16_SSCALED:
+                case PIPE_FORMAT_R16_SSCALED:
                 case PIPE_FORMAT_R8G8B8A8_UNORM:
                 case PIPE_FORMAT_R8G8B8_UNORM:
                 case PIPE_FORMAT_R8G8_UNORM: