v3d: Add QPU pack/unpack for the new SFU instructions.
authorEric Anholt <eric@anholt.net>
Fri, 20 Jul 2018 19:19:36 +0000 (12:19 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 23 Jul 2018 17:21:43 +0000 (10:21 -0700)
These instructions allow writing the result to any register, instead of a
special writeback to r4.

src/broadcom/qpu/qpu_instr.c
src/broadcom/qpu/qpu_instr.h
src/broadcom/qpu/qpu_pack.c
src/broadcom/qpu/tests/qpu_disasm.c

index 9e051e6c433baff83453365e59d57240b58a1c04..deaa533c8aed722baa38a9ef26df7fe9ea9d3820 100644 (file)
@@ -107,6 +107,7 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
                 [V3D_QPU_A_FLAPUSH] = "flapush",
                 [V3D_QPU_A_FLBPUSH] = "flbpush",
                 [V3D_QPU_A_FLPOP] = "flpop",
+                [V3D_QPU_A_RECIP] = "recip",
                 [V3D_QPU_A_SETMSF] = "setmsf",
                 [V3D_QPU_A_SETREVF] = "setrevf",
                 [V3D_QPU_A_NOP] = "nop",
@@ -135,6 +136,11 @@ v3d_qpu_add_op_name(enum v3d_qpu_add_op op)
                 [V3D_QPU_A_LDVPMD_IN] = "ldvpmd_in",
                 [V3D_QPU_A_LDVPMD_OUT] = "ldvpmd_out",
                 [V3D_QPU_A_LDVPMP] = "ldvpmp",
+                [V3D_QPU_A_RSQRT] = "rsqrt",
+                [V3D_QPU_A_EXP] = "exp",
+                [V3D_QPU_A_LOG] = "log",
+                [V3D_QPU_A_SIN] = "sin",
+                [V3D_QPU_A_RSQRT2] = "rsqrt2",
                 [V3D_QPU_A_LDVPMG_IN] = "ldvpmg_in",
                 [V3D_QPU_A_LDVPMG_OUT] = "ldvpmg_out",
                 [V3D_QPU_A_FCMP] = "fcmp",
@@ -369,6 +375,7 @@ static const uint8_t add_op_args[] = {
         [V3D_QPU_A_FLAPUSH] = D | A,
         [V3D_QPU_A_FLBPUSH] = D | A,
         [V3D_QPU_A_FLPOP] = D | A,
+        [V3D_QPU_A_RECIP] = D | A,
         [V3D_QPU_A_SETMSF] = D | A,
         [V3D_QPU_A_SETREVF] = D | A,
         [V3D_QPU_A_NOP] = 0,
@@ -401,6 +408,11 @@ static const uint8_t add_op_args[] = {
         [V3D_QPU_A_LDVPMD_IN] = D | A,
         [V3D_QPU_A_LDVPMD_OUT] = D | A,
         [V3D_QPU_A_LDVPMP] = D | A,
+        [V3D_QPU_A_RSQRT] = D | A,
+        [V3D_QPU_A_EXP] = D | A,
+        [V3D_QPU_A_LOG] = D | A,
+        [V3D_QPU_A_SIN] = D | A,
+        [V3D_QPU_A_RSQRT2] = D | A,
         [V3D_QPU_A_LDVPMG_IN] = D | A | B,
         [V3D_QPU_A_LDVPMG_OUT] = D | A | B,
 
index d99bb9db53be33de7829f1757a78bdd03b702da9..09dbf3eb4fa557cbefa5021d1f6e9a48da7b642f 100644 (file)
@@ -166,6 +166,7 @@ enum v3d_qpu_add_op {
         V3D_QPU_A_FLAPUSH,
         V3D_QPU_A_FLBPUSH,
         V3D_QPU_A_FLPOP,
+        V3D_QPU_A_RECIP,
         V3D_QPU_A_SETMSF,
         V3D_QPU_A_SETREVF,
         V3D_QPU_A_NOP,
@@ -194,6 +195,11 @@ enum v3d_qpu_add_op {
         V3D_QPU_A_LDVPMD_IN,
         V3D_QPU_A_LDVPMD_OUT,
         V3D_QPU_A_LDVPMP,
+        V3D_QPU_A_RSQRT,
+        V3D_QPU_A_EXP,
+        V3D_QPU_A_LOG,
+        V3D_QPU_A_SIN,
+        V3D_QPU_A_RSQRT2,
         V3D_QPU_A_LDVPMG_IN,
         V3D_QPU_A_LDVPMG_OUT,
         V3D_QPU_A_FCMP,
index d2e878d70e81ba64b052d614a31331694811084f..70f31d7340030295de31f20645914241d694a3ed 100644 (file)
@@ -493,6 +493,7 @@ static const struct opcode_desc add_ops[] = {
         { 186, 186, 1 << 2, ANYMUX, V3D_QPU_A_FLAPUSH },
         { 186, 186, 1 << 3, ANYMUX, V3D_QPU_A_FLBPUSH },
         { 186, 186, 1 << 4, ANYMUX, V3D_QPU_A_FLPOP },
+        { 186, 186, 1 << 5, ANYMUX, V3D_QPU_A_RECIP },
         { 186, 186, 1 << 6, ANYMUX, V3D_QPU_A_SETMSF },
         { 186, 186, 1 << 7, ANYMUX, V3D_QPU_A_SETREVF },
         { 187, 187, 1 << 0, 1 << 0, V3D_QPU_A_NOP, 0 },
@@ -522,6 +523,11 @@ static const struct opcode_desc add_ops[] = {
         { 188, 188, 1 << 0, ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
         { 188, 188, 1 << 1, ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
         { 188, 188, 1 << 2, ANYMUX, V3D_QPU_A_LDVPMP, 40 },
+        { 188, 188, 1 << 3, ANYMUX, V3D_QPU_A_RSQRT, 41 },
+        { 188, 188, 1 << 4, ANYMUX, V3D_QPU_A_EXP, 41 },
+        { 188, 188, 1 << 5, ANYMUX, V3D_QPU_A_LOG, 41 },
+        { 188, 188, 1 << 6, ANYMUX, V3D_QPU_A_SIN, 41 },
+        { 188, 188, 1 << 7, ANYMUX, V3D_QPU_A_RSQRT2, 41 },
         { 189, 189, ANYMUX, ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
 
         /* FIXME: MORE COMPLICATED */
index 7237912b8bdf0f15b452d522bd76e1befbcd8904..2e8d980581aaddccf05854268ff2c8cea22971e5 100644 (file)
@@ -84,6 +84,14 @@ static const struct {
         { 41, 0x3de02040f8ff7201ull, "stvpmv  1, rf8       ; mov  r1, 1" },
         { 41, 0xd8000e50bb2d3000ull, "sampid  rf16         ; fmul  rf57.h, r3, r1.l" },
 
+        /* v4.1 SFU instructions. */
+        { 41, 0xe98d60c1ba2aef80ull, "recip  rf1, rf62     ; fmul  r3.h, r2.l, r1.l; ldunifrf.rf53" },
+        { 41, 0x7d87c2debc51c000ull, "rsqrt  rf30, r4      ; fmul  rf11, r4.h, r2.h; ldunifrf.rf31" },
+        { 41, 0xb182475abc2bb000ull, "rsqrt2  rf26, r3     ; fmul  rf29.l, r2.h, r1.abs; ldunifrf.rf9" },
+        { 41, 0x79880808bc0b6900ull, "sin  rf8, rf36       ; fmul  rf32, r2.h, r0.l; ldunifrf.rf32" },
+        { 41, 0x04092094bc5a28c0ull, "exp.ifb  rf20, r2    ; add  r2, rf35, r2" },
+        { 41, 0xe00648bfbc32a000ull, "log  rf63, r2        ; fmul.andnn  rf34.h, r4.l, r1.abs" },
+
         /* v4.2 changes */
         { 42, 0x3c203192bb814000ull, "barrierid  syncb     ; nop               ; thrsw" },
 };