From: Ben Skeggs <bskeggs@redhat.com>
Date: Sat, 6 Jun 2020 23:51:53 +0000 (+1000)
Subject: nvir: introduce OP_SHF
X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=e1e4d1d373aa3090ed3cd186fe3158cee38d1e31;p=mesa.git

nvir: introduce OP_SHF

We already use a hack from NVC0LegalizeSSA::handleShift() on GK110 and
newer which encodes SHF into the existing SHL/SHR opcodes, but there's
a couple of problems with it:

- LO/HI are swapped in one of the directions, which is very confusing.
- The initial SM70 code will emit this from NIR->NVIR, and using the
  existing encodings will confuse the optimisation passes.

As I want to limit the impact on other GPUs from the initial bring-up
of Volta/Turing, let's add an explicit representation of SHF in the IR.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
Reviewed-by: Karol Herbst <kherbst@redhat.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5377>
---

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index e4bbc8edafb..b03cab82a03 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -70,6 +70,7 @@ enum operation
    OP_LOP3_LUT,
    OP_SHL,
    OP_SHR,
+   OP_SHF,
    OP_MAX,
    OP_MIN,
    OP_SAT, // CLAMP(f32, 0.0, 1.0)
@@ -271,6 +272,13 @@ enum operation
 #define NV50_IR_SUBOP_MINMAX_MED  2
 #define NV50_IR_SUBOP_MINMAX_HIGH 3
 
+#define NV50_IR_SUBOP_SHF_L  (0 << 0)
+#define NV50_IR_SUBOP_SHF_R  (1 << 0)
+#define NV50_IR_SUBOP_SHF_LO (0 << 1)
+#define NV50_IR_SUBOP_SHF_HI (1 << 1)
+#define NV50_IR_SUBOP_SHF_C  (0 << 2)
+#define NV50_IR_SUBOP_SHF_W  (1 << 2)
+
 // xmad(src0, src1, 0) << 16 + src2
 #define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
 // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
index e3bac648761..c6550ca187f 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
@@ -96,6 +96,7 @@ const char *operationStr[OP_LAST + 1] =
    "lop3 lut",
    "shl",
    "shr",
+   "shf",
    "max",
    "min",
    "sat",
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 77edf41e124..888dc7fb876 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
    3, 3,                   // SHLADD, XMAD
    1, 1, 1,                // ABS, NEG, NOT
-   2, 2, 2, 3, 2, 2,       // AND, OR, XOR, LOP3_LUT, SHL, SHR
+   2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
    2, 2, 1,                // MAX, MIN, SAT
    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
@@ -76,10 +76,10 @@ const OpClass Target::operationClass[] =
    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
    OPCLASS_ARITH, OPCLASS_ARITH,
    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
-   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR
+   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
    OPCLASS_CONVERT, OPCLASS_CONVERT,
    OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
-   OPCLASS_SHIFT, OPCLASS_SHIFT,
+   OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
    // MAX, MIN
    OPCLASS_COMPARE, OPCLASS_COMPARE,
    // SAT, CEIL, FLOOR, TRUNC; CVT