broadcom/vc5: Use the new LDVPM/STVPM opcodes on V3D 4.1.
authorEric Anholt <eric@anholt.net>
Thu, 4 Jan 2018 23:35:28 +0000 (15:35 -0800)
committerEric Anholt <eric@anholt.net>
Sat, 13 Jan 2018 05:54:33 +0000 (21:54 -0800)
Now, instead of a magic write register for VPM stores we have an
instruction to do them (which means no packing of other ALU ops into it),
with the ability to reorder the VPM stores due to the offset being baked
into the instruction.

VPM loads also gain the ability to be reordered by packing the row into
the A argument.  They also no longer write to the r3 accumulator, and
instead must be stored to a physical register.

src/broadcom/compiler/nir_to_vir.c
src/broadcom/compiler/qpu_schedule.c
src/broadcom/compiler/v3d_compiler.h
src/broadcom/compiler/vir.c
src/broadcom/compiler/vir_register_allocate.c
src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h
src/broadcom/qpu/qpu_pack.c
src/broadcom/qpu/tests/qpu_disasm.c

index 208ee1b86a886d45a598d98a6d90470ccccd36e3..87ce06a49cc4a00a01e5af6e42b3c27101d2c599 100644 (file)
@@ -29,6 +29,7 @@
 #include "util/hash_table.h"
 #include "compiler/nir/nir.h"
 #include "compiler/nir/nir_builder.h"
+#include "common/v3d_device_info.h"
 #include "v3d_compiler.h"
 
 /* We don't do any address packing. */
@@ -1224,7 +1225,21 @@ emit_frag_end(struct v3d_compile *c)
 }
 
 static void
-emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w)
+vir_VPM_WRITE(struct v3d_compile *c, struct qreg val, uint32_t *vpm_index)
+{
+        if (c->devinfo->ver >= 40) {
+                vir_STVPMV(c, vir_uniform_ui(c, *vpm_index), val);
+                *vpm_index = *vpm_index + 1;
+        } else {
+                vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
+        }
+
+        c->num_vpm_writes++;
+}
+
+static void
+emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w,
+                           uint32_t *vpm_index)
 {
         for (int i = 0; i < 2; i++) {
                 struct qreg coord = c->outputs[c->output_position_index + i];
@@ -1232,34 +1247,32 @@ emit_scaled_viewport_write(struct v3d_compile *c, struct qreg rcp_w)
                                  vir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i,
                                              0));
                 coord = vir_FMUL(c, coord, rcp_w);
-                vir_FTOIN_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
-                               coord);
+                vir_VPM_WRITE(c, vir_FTOIN(c, coord), vpm_index);
         }
 
 }
 
 static void
-emit_zs_write(struct v3d_compile *c, struct qreg rcp_w)
+emit_zs_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
 {
         struct qreg zscale = vir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0);
         struct qreg zoffset = vir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0);
 
-        vir_FADD_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM),
-                      vir_FMUL(c, vir_FMUL(c,
-                                           c->outputs[c->output_position_index + 2],
-                                           zscale),
-                               rcp_w),
-                      zoffset);
+        struct qreg z = c->outputs[c->output_position_index + 2];
+        z = vir_FMUL(c, z, zscale);
+        z = vir_FMUL(c, z, rcp_w);
+        z = vir_FADD(c, z, zoffset);
+        vir_VPM_WRITE(c, z, vpm_index);
 }
 
 static void
-emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w)
+emit_rcp_wc_write(struct v3d_compile *c, struct qreg rcp_w, uint32_t *vpm_index)
 {
-        vir_VPM_WRITE(c, rcp_w);
+        vir_VPM_WRITE(c, rcp_w, vpm_index);
 }
 
 static void
-emit_point_size_write(struct v3d_compile *c)
+emit_point_size_write(struct v3d_compile *c, uint32_t *vpm_index)
 {
         struct qreg point_size;
 
@@ -1273,12 +1286,15 @@ emit_point_size_write(struct v3d_compile *c)
          */
         point_size = vir_FMAX(c, point_size, vir_uniform_f(c, .125));
 
-        vir_VPM_WRITE(c, point_size);
+        vir_VPM_WRITE(c, point_size, vpm_index);
 }
 
 static void
 emit_vpm_write_setup(struct v3d_compile *c)
 {
+        if (c->devinfo->ver >= 40)
+                return;
+
         uint32_t packed;
         struct V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP unpacked = {
                 V3D33_VPM_GENERIC_BLOCK_WRITE_SETUP_header,
@@ -1300,6 +1316,7 @@ emit_vpm_write_setup(struct v3d_compile *c)
 static void
 emit_vert_end(struct v3d_compile *c)
 {
+        uint32_t vpm_index = 0;
         struct qreg rcp_w = vir_SFU(c, V3D_QPU_WADDR_RECIP,
                                     c->outputs[c->output_position_index + 3]);
 
@@ -1307,21 +1324,22 @@ emit_vert_end(struct v3d_compile *c)
 
         if (c->vs_key->is_coord) {
                 for (int i = 0; i < 4; i++)
-                        vir_VPM_WRITE(c, c->outputs[c->output_position_index + i]);
-                emit_scaled_viewport_write(c, rcp_w);
+                        vir_VPM_WRITE(c, c->outputs[c->output_position_index + i],
+                                      &vpm_index);
+                emit_scaled_viewport_write(c, rcp_w, &vpm_index);
                 if (c->vs_key->per_vertex_point_size) {
-                        emit_point_size_write(c);
+                        emit_point_size_write(c, &vpm_index);
                         /* emit_rcp_wc_write(c, rcp_w); */
                 }
                 /* XXX: Z-only rendering */
                 if (0)
-                        emit_zs_write(c, rcp_w);
+                        emit_zs_write(c, rcp_w, &vpm_index);
         } else {
-                emit_scaled_viewport_write(c, rcp_w);
-                emit_zs_write(c, rcp_w);
-                emit_rcp_wc_write(c, rcp_w);
+                emit_scaled_viewport_write(c, rcp_w, &vpm_index);
+                emit_zs_write(c, rcp_w, &vpm_index);
+                emit_rcp_wc_write(c, rcp_w, &vpm_index);
                 if (c->vs_key->per_vertex_point_size)
-                        emit_point_size_write(c);
+                        emit_point_size_write(c, &vpm_index);
         }
 
         for (int i = 0; i < c->vs_key->num_fs_inputs; i++) {
@@ -1332,7 +1350,8 @@ emit_vert_end(struct v3d_compile *c)
                         struct v3d_varying_slot output = c->output_slots[j];
 
                         if (!memcmp(&input, &output, sizeof(input))) {
-                                vir_VPM_WRITE(c, c->outputs[j]);
+                                vir_VPM_WRITE(c, c->outputs[j],
+                                              &vpm_index);
                                 break;
                         }
                 }
@@ -1340,7 +1359,8 @@ emit_vert_end(struct v3d_compile *c)
                  * this FS input.
                  */
                 if (j == c->num_outputs)
-                        vir_VPM_WRITE(c, vir_uniform_f(c, 0.0));
+                        vir_VPM_WRITE(c, vir_uniform_f(c, 0.0),
+                                      &vpm_index);
         }
 }
 
@@ -1384,6 +1404,12 @@ ntq_emit_vpm_read(struct v3d_compile *c,
 {
         struct qreg vpm = vir_reg(QFILE_VPM, vpm_index);
 
+        if (c->devinfo->ver >= 40 ) {
+                return vir_LDVPMV_IN(c,
+                                     vir_uniform_ui(c,
+                                                    (*num_components_queued)++));
+        }
+
         if (*num_components_queued != 0) {
                 (*num_components_queued)--;
                 c->num_inputs++;
@@ -1501,8 +1527,12 @@ ntq_setup_inputs(struct v3d_compile *c)
         }
 
         if (c->s->info.stage == MESA_SHADER_VERTEX) {
-                assert(vpm_components_queued == 0);
-                assert(num_components == 0);
+                if (c->devinfo->ver >= 40) {
+                        assert(vpm_components_queued == num_components);
+                } else {
+                        assert(vpm_components_queued == 0);
+                        assert(num_components == 0);
+                }
         }
 }
 
index 7fe46202636081de961ae5d0002fd690b565c78a..cab117b523eebf42da787b934f4bca1c8111a4c6 100644 (file)
@@ -594,6 +594,9 @@ qpu_magic_waddr_is_periph(enum v3d_qpu_waddr waddr)
 static bool
 qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
 {
+        if (v3d_qpu_uses_vpm(inst))
+                return true;
+
         if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
                 if (inst->alu.add.op != V3D_QPU_A_NOP &&
                     inst->alu.add.magic_write &&
@@ -601,9 +604,6 @@ qpu_accesses_peripheral(const struct v3d_qpu_instr *inst)
                         return true;
                 }
 
-                if (inst->alu.add.op == V3D_QPU_A_VPMSETUP)
-                        return true;
-
                 if (inst->alu.mul.op != V3D_QPU_M_NOP &&
                     inst->alu.mul.magic_write &&
                     qpu_magic_waddr_is_periph(inst->alu.mul.waddr)) {
index 4ced588fbbe80bfb046f7e6beaf5d6e71b8b23ab..0a6638b95c5780c786c2acbdbcbb13b29cbe5533 100644 (file)
@@ -791,6 +791,7 @@ VIR_A_ALU2(OR)
 VIR_A_ALU2(XOR)
 VIR_A_ALU2(VADD)
 VIR_A_ALU2(VSUB)
+VIR_A_ALU2(STVPMV)
 VIR_A_ALU1(NOT)
 VIR_A_ALU1(NEG)
 VIR_A_ALU1(FLAPUSH)
@@ -800,6 +801,8 @@ VIR_A_ALU1(SETMSF)
 VIR_A_ALU1(SETREVF)
 VIR_A_ALU1(TIDX)
 VIR_A_ALU1(EIDX)
+VIR_A_ALU1(LDVPMV_IN)
+VIR_A_ALU1(LDVPMV_OUT)
 
 VIR_A_ALU0(FXCD)
 VIR_A_ALU0(XCD)
@@ -854,12 +857,6 @@ vir_SEL(struct v3d_compile *c, enum v3d_qpu_cond cond,
         return t;
 }
 
-static inline void
-vir_VPM_WRITE(struct v3d_compile *c, struct qreg val)
-{
-        vir_MOV_dest(c, vir_reg(QFILE_MAGIC, V3D_QPU_WADDR_VPM), val);
-}
-
 static inline struct qinst *
 vir_NOP(struct v3d_compile *c)
 {
index c129bb047e679ce354c21397645af1ea45775ca0..80d11aaf7f6bfc50f2a82ba68d1a83231176cdbf 100644 (file)
@@ -92,6 +92,9 @@ vir_has_side_effects(struct v3d_compile *c, struct qinst *inst)
                 case V3D_QPU_A_SETREVF:
                 case V3D_QPU_A_SETMSF:
                 case V3D_QPU_A_VPMSETUP:
+                case V3D_QPU_A_STVPMV:
+                case V3D_QPU_A_STVPMD:
+                case V3D_QPU_A_STVPMP:
                         return true;
                 default:
                         break;
@@ -412,10 +415,6 @@ static void
 vir_emit(struct v3d_compile *c, struct qinst *inst)
 {
         list_addtail(&inst->link, &c->cur_block->instructions);
-
-        if (inst->dst.file == QFILE_MAGIC &&
-            inst->dst.index == V3D_QPU_WADDR_VPM)
-                c->num_vpm_writes++;
 }
 
 /* Updates inst to write to a new temporary, emits it, and notes the def. */
index f39f0c2829b9855f84836be857e41072ba4c2977..0889937f4248fd2b5389e3a5c2e15eba236fd1e2 100644 (file)
@@ -160,6 +160,28 @@ v3d_register_allocate(struct v3d_compile *c)
                         }
                 }
 
+                if (inst->qpu.type == V3D_QPU_INSTR_TYPE_ALU) {
+                        switch (inst->qpu.alu.add.op) {
+                        case V3D_QPU_A_LDVPMV_IN:
+                        case V3D_QPU_A_LDVPMV_OUT:
+                        case V3D_QPU_A_LDVPMD_IN:
+                        case V3D_QPU_A_LDVPMD_OUT:
+                        case V3D_QPU_A_LDVPMP:
+                        case V3D_QPU_A_LDVPMG_IN:
+                        case V3D_QPU_A_LDVPMG_OUT:
+                                /* LDVPMs only store to temps (the MA flag
+                                 * decides whether the LDVPM is in or out)
+                                 */
+                                assert(inst->dst.file == QFILE_TEMP);
+                                class_bits[temp_to_node[inst->dst.index]] &=
+                                        CLASS_BIT_PHYS;
+                                break;
+
+                        default:
+                                break;
+                        }
+                }
+
                 if (inst->src[0].file == QFILE_REG) {
                         switch (inst->src[0].index) {
                         case 0:
index 682fee740a000e5d853cd49a9aa3e76e4dfc045c..85d6cf75d81766d10911ab1e77a5a736aeaf7c6a 100644 (file)
@@ -113,10 +113,13 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
                 [V3D_QPU_A_TMUWT] = "tmuwt",
                 [V3D_QPU_A_VPMSETUP] = "vpmsetup",
                 [V3D_QPU_A_VPMWT] = "vpmwt",
-                [V3D_QPU_A_LDVPMV] = "ldvpmv",
-                [V3D_QPU_A_LDVPMD] = "ldvpmd",
+                [V3D_QPU_A_LDVPMV_IN] = "ldvpmv_in",
+                [V3D_QPU_A_LDVPMV_OUT] = "ldvpmv_out",
+                [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
+                [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
-                [V3D_QPU_A_LDVPMG] = "ldvpmg",
+                [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
+                [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
                 [V3D_QPU_A_FCMP] = "fcmp",
                 [V3D_QPU_A_VFMAX] = "vfmax",
                 [V3D_QPU_A_FROUND] = "fround",
@@ -376,10 +379,13 @@ static const uint8_t add_op_args[] = {
 
         [V3D_QPU_A_VPMSETUP] = D | A,
 
-        [V3D_QPU_A_LDVPMV] = D | A,
-        [V3D_QPU_A_LDVPMD] = D | A,
+        [V3D_QPU_A_LDVPMV_IN] = D | A,
+        [V3D_QPU_A_LDVPMV_OUT] = D | A,
+        [V3D_QPU_A_LDVPMD_IN] = D | A,
+        [V3D_QPU_A_LDVPMD_OUT] = D | A,
         [V3D_QPU_A_LDVPMP] = D | A,
-        [V3D_QPU_A_LDVPMG] = D | A | B,
+        [V3D_QPU_A_LDVPMG_IN] = D | A | B,
+        [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
 
         /* FIXME: MOVABSNEG */
 
@@ -516,6 +522,49 @@ v3d_qpu_magic_waddr_is_tsy(enum v3d_qpu_waddr waddr)
                 waddr == V3D_QPU_WADDR_SYNCU);
 }
 
+static bool
+v3d_qpu_add_op_uses_vpm(enum  v3d_qpu_add_op op)
+{
+        switch (op) {
+        case V3D_QPU_A_VPMSETUP:
+        case V3D_QPU_A_VPMWT:
+        case V3D_QPU_A_LDVPMV_IN:
+        case V3D_QPU_A_LDVPMV_OUT:
+        case V3D_QPU_A_LDVPMD_IN:
+        case V3D_QPU_A_LDVPMD_OUT:
+        case V3D_QPU_A_LDVPMP:
+        case V3D_QPU_A_LDVPMG_IN:
+        case V3D_QPU_A_LDVPMG_OUT:
+        case V3D_QPU_A_STVPMV:
+        case V3D_QPU_A_STVPMD:
+        case V3D_QPU_A_STVPMP:
+                return true;
+        default:
+                return false;
+        }
+}
+
+bool
+v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst)
+{
+        if (inst->type == V3D_QPU_INSTR_TYPE_ALU) {
+                if (v3d_qpu_add_op_uses_vpm(inst->alu.add.op))
+                        return true;
+
+                if (inst->alu.add.magic_write &&
+                    v3d_qpu_magic_waddr_is_vpm(inst->alu.add.waddr)) {
+                        return true;
+                }
+
+                if (inst->alu.mul.magic_write &&
+                    v3d_qpu_magic_waddr_is_vpm(inst->alu.mul.waddr)) {
+                        return true;
+                }
+        }
+
+        return false;
+}
+
 bool
 v3d_qpu_writes_r3(const struct v3d_device_info *devinfo,
                   const struct v3d_qpu_instr *inst)
index 468fe89facdedaee9791050e047b8554811f59ee..0bd79ca68da99487fcfddaee536d18d3d4a7e8f4 100644 (file)
@@ -173,10 +173,13 @@ enum v3d_qpu_add_op {
         V3D_QPU_A_TMUWT,
         V3D_QPU_A_VPMSETUP,
         V3D_QPU_A_VPMWT,
-        V3D_QPU_A_LDVPMV,
-        V3D_QPU_A_LDVPMD,
+        V3D_QPU_A_LDVPMV_IN,
+        V3D_QPU_A_LDVPMV_OUT,
+        V3D_QPU_A_LDVPMD_IN,
+        V3D_QPU_A_LDVPMD_OUT,
         V3D_QPU_A_LDVPMP,
-        V3D_QPU_A_LDVPMG,
+        V3D_QPU_A_LDVPMG_IN,
+        V3D_QPU_A_LDVPMG_OUT,
         V3D_QPU_A_FCMP,
         V3D_QPU_A_VFMAX,
         V3D_QPU_A_FROUND,
@@ -425,6 +428,7 @@ bool v3d_qpu_writes_r4(const struct v3d_device_info *devinfo,
 bool v3d_qpu_writes_r5(const struct v3d_device_info *devinfo,
                        const struct v3d_qpu_instr *instr) ATTRIBUTE_CONST;
 bool v3d_qpu_uses_mux(const struct v3d_qpu_instr *inst, enum v3d_qpu_mux mux);
+bool v3d_qpu_uses_vpm(const struct v3d_qpu_instr *inst);
 bool v3d_qpu_sig_writes_address(const struct v3d_device_info *devinfo,
                                 const struct v3d_qpu_sig *sig) ATTRIBUTE_CONST;
 
index 161e24f03008bdea4d35be5f17e87857fc5638e0..ffabc9a969d1a9d82b406436c0b7bb97590b2faa 100644 (file)
@@ -515,7 +515,11 @@ static const struct opcode_desc add_ops[] = {
         { 187, 187, 1 << 2, 1 << 5, V3D_QPU_A_TMUWT },
         { 187, 187, 1 << 2, 1 << 6, V3D_QPU_A_VPMWT },
 
-        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP },
+        { 187, 187, 1 << 3, ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
+        { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
+        { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
+        { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
+        { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
 
         /* FIXME: MORE COMPLICATED */
         /* { 190, 191, ANYMUX, ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
@@ -823,7 +827,24 @@ v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
         instr->alu.add.a = mux_a;
         instr->alu.add.b = mux_b;
         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
-        instr->alu.add.magic_write = packed_inst & VC5_QPU_MA;
+
+        instr->alu.add.magic_write = false;
+        if (packed_inst & VC5_QPU_MA) {
+                switch (instr->alu.add.op) {
+                case V3D_QPU_A_LDVPMV_IN:
+                        instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
+                        break;
+                case V3D_QPU_A_LDVPMD_IN:
+                        instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
+                        break;
+                case V3D_QPU_A_LDVPMG_IN:
+                        instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
+                        break;
+                default:
+                        instr->alu.add.magic_write = true;
+                        break;
+                }
+        }
 
         return true;
 }
@@ -930,16 +951,36 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
         if (nsrc < 1)
                 mux_a = ffs(desc->mux_a_mask) - 1;
 
+        bool no_magic_write = false;
+
         switch (instr->alu.add.op) {
         case V3D_QPU_A_STVPMV:
                 waddr = 0;
+                no_magic_write = true;
                 break;
         case V3D_QPU_A_STVPMD:
                 waddr = 1;
+                no_magic_write = true;
                 break;
         case V3D_QPU_A_STVPMP:
                 waddr = 2;
+                no_magic_write = true;
+                break;
+
+        case V3D_QPU_A_LDVPMV_IN:
+        case V3D_QPU_A_LDVPMD_IN:
+        case V3D_QPU_A_LDVPMP:
+        case V3D_QPU_A_LDVPMG_IN:
+                assert(!instr->alu.add.magic_write);
                 break;
+
+        case V3D_QPU_A_LDVPMV_OUT:
+        case V3D_QPU_A_LDVPMD_OUT:
+        case V3D_QPU_A_LDVPMG_OUT:
+                assert(!instr->alu.add.magic_write);
+                *packed_instr |= VC5_QPU_MA;
+                break;
+
         default:
                 break;
         }
@@ -1065,7 +1106,7 @@ v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
         *packed_instr |= QPU_SET_FIELD(mux_b, VC5_QPU_ADD_B);
         *packed_instr |= QPU_SET_FIELD(opcode, VC5_QPU_OP_ADD);
         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
-        if (instr->alu.add.magic_write)
+        if (instr->alu.add.magic_write && !no_magic_write)
                 *packed_instr |= VC5_QPU_MA;
 
         return true;
index 27dc184f76e88ea8d43c053034367e66f3f6235f..8d77740edce25d723a842a14330e68750afd7eb7 100644 (file)
@@ -76,6 +76,10 @@ static const struct {
         { 41, 0xdb3048eb9d533780ull, "fmax  rf43.l, r3.h, rf30; fmul  rf35.h, r4, r2.l; ldunifarf.r1" },
         { 41, 0x733620471e6ce700ull, "faddnf  rf7.l, rf28.h, r1.l; fmul  r1, r3.h, r3.abs; ldunifarf.rsqrt2" },
         { 41, 0x9c094adef634b000ull, "ffloor.ifb  rf30.l, r3; fmul.pushz  rf43.l, r5, r1.h" },
+
+        /* v4.1 opcodes */
+        { 41, 0x3de020c7bdfd200dull, "ldvpmg_in  rf7, r2, r2; mov  r3, 13" },
+        { 41, 0x3de02040f8ff7201ull, "stvpmv  1, rf8       ; mov  r1, 1" },
 };
 
 static void